本節(jié)介紹了插入數(shù)據(jù)時與WAL相關(guān)的處理邏輯,主要包括heap_insert依賴的函數(shù)XLogBeginInsert/XLogRegisterBufData/XLogRegisterData/XLogSetRecordFlags。
一、數(shù)據(jù)結(jié)構(gòu)
靜態(tài)變量
進(jìn)程中全局共享
/*
* An array of XLogRecData structs, to hold registered data.
* XLogRecData結(jié)構(gòu)體數(shù)組,存儲已注冊的數(shù)據(jù)
*/
static XLogRecData *rdatas;
//已使用的入口
static int num_rdatas; /* entries currently used */
//已分配的空間大小
static int max_rdatas; /* allocated size */
//是否調(diào)用XLogBeginInsert函數(shù)
static bool begininsert_called = false;
registered_buffer
對于每一個使用XLogRegisterBuffer注冊的每個數(shù)據(jù)塊,填充到registered_buffer結(jié)構(gòu)體中
/*
* For each block reference registered with XLogRegisterBuffer, we fill in
* a registered_buffer struct.
* 對于每一個使用XLogRegisterBuffer注冊的每個數(shù)據(jù)塊,
* 填充到registered_buffer結(jié)構(gòu)體中
*/
typedef struct
{
//slot是否在使用?
bool in_use; /* is this slot in use? */
//REGBUF_* 相關(guān)標(biāo)記
uint8 flags; /* REGBUF_* flags */
//定義關(guān)系和數(shù)據(jù)庫的標(biāo)識符
RelFileNode rnode; /* identifies the relation and block */
//fork進(jìn)程編號
ForkNumber forkno;
//塊編號
BlockNumber block;
//頁內(nèi)容
Page page; /* page content */
//rdata鏈中的數(shù)據(jù)總大小
uint32 rdata_len; /* total length of data in rdata chain */
//使用該數(shù)據(jù)塊注冊的數(shù)據(jù)鏈頭
XLogRecData *rdata_head; /* head of the chain of data registered with
* this block */
//使用該數(shù)據(jù)塊注冊的數(shù)據(jù)鏈尾
XLogRecData *rdata_tail; /* last entry in the chain, or &rdata_head if
* empty */
//臨時rdatas數(shù)據(jù)引用,用于存儲XLogRecordAssemble()中使用的備份塊數(shù)據(jù)
XLogRecData bkp_rdatas[2]; /* temporary rdatas used to hold references to
* backup block data in XLogRecordAssemble() */
/* buffer to store a compressed version of backup block image */
//用于存儲壓縮版本的備份塊鏡像的緩存
char compressed_page[PGLZ_MAX_BLCKSZ];
} registered_buffer;
//registered_buffer指正
static registered_buffer *registered_buffers;
//已分配的大小
static int max_registered_buffers; /* allocated size */
//最大塊號 + 1(當(dāng)前注冊塊)
static int max_registered_block_id = 0; /* highest block_id + 1 currently
* registered */
XLogCtlInsert
WAL插入記錄時使用的共享數(shù)據(jù)結(jié)構(gòu)
/*
* Shared state data for WAL insertion.
* WAL插入記錄時使用的共享數(shù)據(jù)結(jié)構(gòu)
*/
typedef struct XLogCtlInsert
{
//包含CurrBytePos和PrevBytePos的lock
slock_t insertpos_lck; /* protects CurrBytePos and PrevBytePos */
/*
* CurrBytePos is the end of reserved WAL. The next record will be
* inserted at that position. PrevBytePos is the start position of the
* previously inserted (or rather, reserved) record - it is copied to the
* prev-link of the next record. These are stored as "usable byte
* positions" rather than XLogRecPtrs (see XLogBytePosToRecPtr()).
* CurrBytePos是保留WAL的結(jié)束位置。
* 下一條記錄將插入到那個位置。
* PrevBytePos是先前插入(或者保留)記錄的起始位置——它被復(fù)制到下一條記錄的prev-link中。
* 這些存儲為“可用字節(jié)位置”,而不是XLogRecPtrs(參見XLogBytePosToRecPtr())。
*/
uint64 CurrBytePos;
uint64 PrevBytePos;
/*
* Make sure the above heavily-contended spinlock and byte positions are
* on their own cache line. In particular, the RedoRecPtr and full page
* write variables below should be on a different cache line. They are
* read on every WAL insertion, but updated rarely, and we don't want
* those reads to steal the cache line containing Curr/PrevBytePos.
* 確保以上激烈競爭的自旋鎖和字節(jié)位置在它們自己的緩存line上。
* 特別是,RedoRecPtr和下面的全頁寫變量應(yīng)該位于不同的緩存line上。
* 它們在每次插入WAL時都被讀取,但很少更新,
* 我們不希望這些讀取竊取包含Curr/PrevBytePos的緩存line。
*/
char pad[PG_CACHE_LINE_SIZE];
/*
* fullPageWrites is the master copy used by all backends to determine
* whether to write full-page to WAL, instead of using process-local one.
* This is required because, when full_page_writes is changed by SIGHUP,
* we must WAL-log it before it actually affects WAL-logging by backends.
* Checkpointer sets at startup or after SIGHUP.
* fullpagewrite是所有后臺進(jìn)程使用的主副本,
* 用于確定是否將整個頁面寫入WAL,而不是使用process-local副本。
* 這是必需的,因為當(dāng)SIGHUP更改full_page_write時,
* 我們必須在它通過后臺進(jìn)程實際影響WAL-logging之前對其進(jìn)行WAL-log記錄。
* Checkpointer檢查點設(shè)置在啟動或SIGHUP之后。
*
* To read these fields, you must hold an insertion lock. To modify them,
* you must hold ALL the locks.
* 為了讀取這些域,必須持有insertion lock.
* 如需更新,則需要持有所有這些lock.
*/
//插入時的當(dāng)前redo point
XLogRecPtr RedoRecPtr; /* current redo point for insertions */
//為PITR強(qiáng)制執(zhí)行full-page寫?
bool forcePageWrites; /* forcing full-page writes for PITR? */
//是否全頁寫?
bool fullPageWrites;
/*
* exclusiveBackupState indicates the state of an exclusive backup (see
* comments of ExclusiveBackupState for more details). nonExclusiveBackups
* is a counter indicating the number of streaming base backups currently
* in progress. forcePageWrites is set to true when either of these is
* non-zero. lastBackupStart is the latest checkpoint redo location used
* as a starting point for an online backup.
* exclusive sivebackupstate表示排他備份的狀態(tài)
* (有關(guān)詳細(xì)信息,請參閱exclusive sivebackupstate的注釋)。
* 非排他性備份是一個計數(shù)器,指示當(dāng)前正在進(jìn)行的流基礎(chǔ)備份的數(shù)量。
* forcePageWrites在這兩個值都不為零時被設(shè)置為true。
* lastBackupStart用作在線備份起點的最新檢查點的重做位置。
*/
ExclusiveBackupState exclusiveBackupState;
int nonExclusiveBackups;
XLogRecPtr lastBackupStart;
/*
* WAL insertion locks.
* WAL寫入鎖
*/
WALInsertLockPadded *WALInsertLocks;
} XLogCtlInsert;
XLogRecData
xloginsert.c中的函數(shù)構(gòu)造一個XLogRecData結(jié)構(gòu)體鏈用于標(biāo)識最后的WAL記錄
/*
* The functions in xloginsert.c construct a chain of XLogRecData structs
* to represent the final WAL record.
* xloginsert.c中的函數(shù)構(gòu)造一個XLogRecData結(jié)構(gòu)體鏈用于標(biāo)識最后的WAL記錄
*/
typedef struct XLogRecData
{
//鏈中的下一個結(jié)構(gòu)體,如無則為NULL
struct XLogRecData *next; /* next struct in chain, or NULL */
//rmgr數(shù)據(jù)的起始地址
char *data; /* start of rmgr data to include */
//rmgr數(shù)據(jù)大小
uint32 len; /* length of rmgr data to include */
} XLogRecData;
registered_buffer/registered_buffers
對于每一個使用XLogRegisterBuffer注冊的每個數(shù)據(jù)塊,填充到registered_buffer結(jié)構(gòu)體中
/*
* For each block reference registered with XLogRegisterBuffer, we fill in
* a registered_buffer struct.
* 對于每一個使用XLogRegisterBuffer注冊的每個數(shù)據(jù)塊,
* 填充到registered_buffer結(jié)構(gòu)體中
*/
typedef struct
{
//slot是否在使用?
bool in_use; /* is this slot in use? */
//REGBUF_* 相關(guān)標(biāo)記
uint8 flags; /* REGBUF_* flags */
//定義關(guān)系和數(shù)據(jù)庫的標(biāo)識符
RelFileNode rnode; /* identifies the relation and block */
//fork進(jìn)程編號
ForkNumber forkno;
//塊編號
BlockNumber block;
//頁內(nèi)容
Page page; /* page content */
//rdata鏈中的數(shù)據(jù)總大小
uint32 rdata_len; /* total length of data in rdata chain */
//使用該數(shù)據(jù)塊注冊的數(shù)據(jù)鏈頭
XLogRecData *rdata_head; /* head of the chain of data registered with
* this block */
//使用該數(shù)據(jù)塊注冊的數(shù)據(jù)鏈尾
XLogRecData *rdata_tail; /* last entry in the chain, or &rdata_head if
* empty */
//臨時rdatas數(shù)據(jù)引用,用于存儲XLogRecordAssemble()中使用的備份塊數(shù)據(jù)
XLogRecData bkp_rdatas[2]; /* temporary rdatas used to hold references to
* backup block data in XLogRecordAssemble() */
/* buffer to store a compressed version of backup block image */
//用于存儲壓縮版本的備份塊鏡像的緩存
char compressed_page[PGLZ_MAX_BLCKSZ];
} registered_buffer;
//registered_buffer指針(全局變量)
static registered_buffer *registered_buffers;
//已分配的大小
static int max_registered_buffers; /* allocated size */
//最大塊號 + 1(當(dāng)前注冊塊)
static int max_registered_block_id = 0; /* highest block_id + 1 currently
* registered */
二、源碼解讀
heap_insert
主要實現(xiàn)邏輯是插入元組到堆中,其中存在對WAL(XLog)進(jìn)行處理的部分.
參見PostgreSQL 源碼解讀(104)- WAL#1(Insert & WAL-heap_insert函數(shù)#1)
XLogBeginInsert
開始構(gòu)造WAL記錄.
必須在調(diào)用XLogRegister*和XLogInsert()函數(shù)前調(diào)用.
/*
* Begin constructing a WAL record. This must be called before the
* XLogRegister* functions and XLogInsert().
* 開始構(gòu)造WAL記錄.
* 必須在調(diào)用XLogRegister*和XLogInsert()函數(shù)前調(diào)用.
*/
void
XLogBeginInsert(void)
{
//驗證邏輯
Assert(max_registered_block_id == 0);
Assert(mainrdata_last == (XLogRecData *) &mainrdata_head);
Assert(mainrdata_len == 0);
/* cross-check on whether we should be here or not */
//交叉校驗是否應(yīng)該在這里還是不應(yīng)該在這里出現(xiàn)
if (!XLogInsertAllowed())
elog(ERROR, "cannot make new WAL entries during recovery");
if (begininsert_called)
elog(ERROR, "XLogBeginInsert was already called");
//變量賦值
begininsert_called = true;
}
/*
* Is this process allowed to insert new WAL records?
* 判斷該進(jìn)程是否允許插入新的WAL記錄
*
* Ordinarily this is essentially equivalent to !RecoveryInProgress().
* But we also have provisions for forcing the result "true" or "false"
* within specific processes regardless of the global state.
* 通常,這本質(zhì)上等同于! recoverinprogress()。
* 但我們也有規(guī)定,無論全局狀況如何,都要在特定進(jìn)程中強(qiáng)制實現(xiàn)“正確”或“錯誤”的結(jié)果。
*/
bool
XLogInsertAllowed(void)
{
/*
* If value is "unconditionally true" or "unconditionally false", just
* return it. This provides the normal fast path once recovery is known
* done.
* 如果值為“無條件為真”或“無條件為假”,則返回。
* 這提供正常的快速判斷路徑。
*/
if (LocalXLogInsertAllowed >= 0)
return (bool) LocalXLogInsertAllowed;
/*
* Else, must check to see if we're still in recovery.
* 否則,必須檢查是否處于恢復(fù)狀態(tài)
*/
if (RecoveryInProgress())
return false;
/*
* On exit from recovery, reset to "unconditionally true", since there is
* no need to keep checking.
* 從恢復(fù)中退出,由于不需要繼續(xù)檢查,重置為"無條件為真"
*/
LocalXLogInsertAllowed = 1;
return true;
}
XLogRegisterData
添加數(shù)據(jù)到正在構(gòu)造的WAL記錄中
/*
* Add data to the WAL record that's being constructed.
* 添加數(shù)據(jù)到正在構(gòu)造的WAL記錄中
*
* The data is appended to the "main chunk", available at replay with
* XLogRecGetData().
* 數(shù)據(jù)追加到"main chunk"中,用于XLogRecGetData()函數(shù)回放
*/
void
XLogRegisterData(char *data, int len)
{
XLogRecData *rdata;//數(shù)據(jù)
//驗證是否已調(diào)用begin
Assert(begininsert_called);
//驗證大小
if (num_rdatas >= max_rdatas)
elog(ERROR, "too much WAL data");
rdata = &rdatas[num_rdatas++];
rdata->data = data;
rdata->len = len;
/*
* we use the mainrdata_last pointer to track the end of the chain, so no
* need to clear 'next' here.
* 使用mainrdata_last指針跟蹤鏈條的結(jié)束點,在這里不需要清除next變量
*/
mainrdata_last->next = rdata;
mainrdata_last = rdata;
mainrdata_len += len;
}
XLogRegisterBuffer
在緩沖區(qū)中注冊已構(gòu)建的WAL記錄的依賴,在WAL-logged操作更新每一個page時必須調(diào)用此函數(shù)
/*
* Register a reference to a buffer with the WAL record being constructed.
* This must be called for every page that the WAL-logged operation modifies.
* 在緩沖區(qū)中注冊已構(gòu)建的WAL記錄的依賴
* 在WAL-logged操作更新每一個page時必須調(diào)用此函數(shù)
*/
void
XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
{
registered_buffer *regbuf;//緩沖
/* NO_IMAGE doesn't make sense with FORCE_IMAGE */
//NO_IMAGE不能與REGBUF_NO_IMAGE同時使用
Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
Assert(begininsert_called);
//塊ID > 最大已注冊的緩沖區(qū),報錯
if (block_id >= max_registered_block_id)
{
if (block_id >= max_registered_buffers)
elog(ERROR, "too many registered buffers");
max_registered_block_id = block_id + 1;
}
//賦值
regbuf = ®istered_buffers[block_id];
//獲取Tag
BufferGetTag(buffer, ®buf->rnode, ®buf->forkno, ®buf->block);
regbuf->page = BufferGetPage(buffer);
regbuf->flags = flags;
regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head;
regbuf->rdata_len = 0;
/*
* Check that this page hasn't already been registered with some other
* block_id.
* 檢查該page是否已被其他block_id注冊
*/
#ifdef USE_ASSERT_CHECKING
{
int i;
for (i = 0; i < max_registered_block_id; i++)//循環(huán)檢查
{
registered_buffer *regbuf_old = ®istered_buffers[i];
if (i == block_id || !regbuf_old->in_use)
continue;
Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
regbuf_old->forkno != regbuf->forkno ||
regbuf_old->block != regbuf->block);
}
}
#endif
regbuf->in_use = true;//標(biāo)記為使用
}
/*
* BufferGetTag
* Returns the relfilenode, fork number and block number associated with
* a buffer.
* 返回與緩沖區(qū)相關(guān)的relfilenode,fork編號和塊號
*/
void
BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
BlockNumber *blknum)
{
BufferDesc *bufHdr;
/* Do the same checks as BufferGetBlockNumber. */
//驗證buffer已被pinned
Assert(BufferIsPinned(buffer));
if (BufferIsLocal(buffer))
bufHdr = GetLocalBufferDescriptor(-buffer - 1);
else
bufHdr = GetBufferDescriptor(buffer - 1);
/* pinned, so OK to read tag without spinlock */
//pinned,不需要spinlock讀取tage
*rnode = bufHdr->tag.rnode;
*forknum = bufHdr->tag.forkNum;
*blknum = bufHdr->tag.blockNum;
}
/*
* BufferIsLocal
* True iff the buffer is local (not visible to other backends).
* 如緩沖區(qū)對其他后臺進(jìn)程不不可見,則為本地buffer
*/
#define BufferIsLocal(buffer) ((buffer) < 0)
#define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc)
#define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)])
BufferDesc *LocalBufferDescriptors = NULL;
BufferDescPadded *BufferDescriptors;
XLogRegisterBufData
在正在構(gòu)造的WAL記錄中添加buffer相關(guān)的數(shù)據(jù).
/*
* Add buffer-specific data to the WAL record that's being constructed.
* 在正在構(gòu)造的WAL記錄中添加buffer相關(guān)的數(shù)據(jù).
*
* Block_id must reference a block previously registered with
* XLogRegisterBuffer(). If this is called more than once for the same
* block_id, the data is appended.
* Block_id必須引用先前注冊到XLogRegisterBuffer()中的數(shù)據(jù)塊。
* 如果對同一個block_id不止一次調(diào)用,那么數(shù)據(jù)將會追加。
*
* The maximum amount of data that can be registered per block is 65535
* bytes. That should be plenty; if you need more than BLCKSZ bytes to
* reconstruct the changes to the page, you might as well just log a full
* copy of it. (the "main data" that's not associated with a block is not
* limited)
* 每個塊可注冊的最大大小是65535Bytes.
* 通常來說這已經(jīng)足夠了;如果需要大小比BLCKSZ字節(jié)更大的數(shù)據(jù)用于重建頁面的變化,
* 那么需要整頁進(jìn)行拷貝.
* (與數(shù)據(jù)塊相關(guān)的"main data"是不受限的)
*/
void
XLogRegisterBufData(uint8 block_id, char *data, int len)
{
registered_buffer *regbuf;//注冊的緩沖區(qū)
XLogRecData *rdata;//數(shù)據(jù)
Assert(begininsert_called);//XLogBeginInsert函數(shù)已調(diào)用
/* find the registered buffer struct */
//尋找已注冊的緩存結(jié)構(gòu)體
regbuf = ®istered_buffers[block_id];
if (!regbuf->in_use)
elog(ERROR, "no block with id %d registered with WAL insertion",
block_id);
if (num_rdatas >= max_rdatas)
elog(ERROR, "too much WAL data");
rdata = &rdatas[num_rdatas++];
rdata->data = data;
rdata->len = len;
regbuf->rdata_tail->next = rdata;
regbuf->rdata_tail = rdata;
regbuf->rdata_len += len;
}
XLogSetRecordFlags
為即將"到來"的WAL記錄設(shè)置插入狀態(tài)標(biāo)記
XLOG_INCLUDE_ORIGIN 確定復(fù)制起點是否應(yīng)該包含在記錄中
XLOG_MARK_UNIMPORTANT 表示記錄對于持久性并不重要,這可以避免觸發(fā)WAL歸檔和其他后臺活動
/*
* Set insert status flags for the upcoming WAL record.
* 為即將"到來"的WAL記錄設(shè)置插入狀態(tài)標(biāo)記
*
* The flags that can be used here are:
* - XLOG_INCLUDE_ORIGIN, to determine if the replication origin should be
* included in the record.
* - XLOG_MARK_UNIMPORTANT, to signal that the record is not important for
* durability, which allows to avoid triggering WAL archiving and other
* background activity.
* 標(biāo)記用于:
* - XLOG_INCLUDE_ORIGIN 確定復(fù)制起點是否應(yīng)該包含在記錄中
* - XLOG_MARK_UNIMPORTANT 表示記錄對于持久性并不重要,這可以避免觸發(fā)WAL歸檔和其他后臺活動。
*/
void
XLogSetRecordFlags(uint8 flags)
{
Assert(begininsert_called);
curinsert_flags = flags;
}
三、跟蹤分析
測試腳本如下
insert into t_wal_partition(c1,c2,c3) VALUES(0,'HASH0','HAHS0');
XLogBeginInsert
啟動gdb,設(shè)置斷點,進(jìn)入XLogBeginInsert
(gdb) b XLogBeginInsert
Breakpoint 1 at 0x564897: file xloginsert.c, line 122.
(gdb) c
Continuing.
Breakpoint 1, XLogBeginInsert () at xloginsert.c:122
122 Assert(max_registered_block_id == 0);
校驗,調(diào)用XLogInsertAllowed
122 Assert(max_registered_block_id == 0);
(gdb) n
123 Assert(mainrdata_last == (XLogRecData *) &mainrdata_head);
(gdb)
124 Assert(mainrdata_len == 0);
(gdb)
127 if (!XLogInsertAllowed())
(gdb) step
XLogInsertAllowed () at xlog.c:8126
8126 if (LocalXLogInsertAllowed >= 0)
(gdb) n
8132 if (RecoveryInProgress())
(gdb)
8139 LocalXLogInsertAllowed = 1;
(gdb)
8140 return true;
(gdb)
8141 }
(gdb)
賦值,設(shè)置begininsert_called為T,返回
(gdb)
XLogBeginInsert () at xloginsert.c:130
130 if (begininsert_called)
(gdb) p begininsert_called
$1 = false
(gdb) n
133 begininsert_called = true;
(gdb)
134 }
(gdb)
heap_insert (relation=0x7f5cc0338228, tup=0x29b2440, cid=0, options=0, bistate=0x0) at heapam.c:2567
2567 XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
(gdb)
XLogRegisterData
進(jìn)入XLogRegisterData函數(shù)
(gdb) step
XLogRegisterData (data=0x7fff03ba99e0 "\002", len=3) at xloginsert.c:327
327 Assert(begininsert_called);
(gdb) p *data
$2 = 2 '\002'
(gdb) p *(xl_heap_insert *)data
$3 = {offnum = 2, flags = 0 '\000'}
執(zhí)行相關(guān)判斷,并賦值
rdatas是XLogRecData結(jié)構(gòu)體指針,全局靜態(tài)變量:
static XLogRecData *rdatas;
(gdb) n
329 if (num_rdatas >= max_rdatas)
(gdb) p num_rdatas
$4 = 0
(gdb) p max_rdatas
$5 = 20
(gdb) n
331 rdata = &rdatas[num_rdatas++];
(gdb) p rdatas[0]
$6 = {next = 0x0, data = 0x0, len = 0}
(gdb) p rdatas[1]
$7 = {next = 0x0, data = 0x0, len = 0}
相關(guān)結(jié)構(gòu)體賦值
其中mainrdata_last是mainrdata_head的地址:
static XLogRecData *mainrdata_head;
static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
(gdb) n
333 rdata->data = data;
(gdb)
334 rdata->len = len;
(gdb)
341 mainrdata_last->next = rdata;
(gdb)
342 mainrdata_last = rdata;
(gdb)
344 mainrdata_len += len;
(gdb)
345 }
完成調(diào)用,回到heap_insert
(gdb) n
heap_insert (relation=0x7f5cc0338228, tup=0x29b2440, cid=0, options=0, bistate=0x0) at heapam.c:2569
2569 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
XLogRegisterBuffer
進(jìn)入XLogRegisterBuffer
(gdb) step
XLogRegisterBuffer (block_id=0 '\000', buffer=99, flags=8 '\b') at xloginsert.c:218
218 Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
判斷block_id,設(shè)置max_registered_block_id變量等.
注:max_registered_buffers初始化為5
(gdb) n
219 Assert(begininsert_called);
(gdb)
221 if (block_id >= max_registered_block_id)
(gdb) p max_registered_block_id
$14 = 0
(gdb) n
223 if (block_id >= max_registered_buffers)
(gdb) p max_registered_buffers
$15 = 5
(gdb) n
225 max_registered_block_id = block_id + 1;
(gdb)
228 regbuf = ®istered_buffers[block_id];
(gdb) p max_registered_buffers
$16 = 5
(gdb) p max_registered_block_id
$17 = 1
(gdb) n
230 BufferGetTag(buffer, ®buf->rnode, ®buf->forkno, ®buf->block);
(gdb) p *regbuf
$18 = {in_use = false, flags = 0 '\000', rnode = {spcNode = 0, dbNode = 0, relNode = 0}, forkno = MAIN_FORKNUM, block = 0,
page = 0x0, rdata_len = 0, rdata_head = 0x0, rdata_tail = 0x0, bkp_rdatas = {{next = 0x0, data = 0x0, len = 0}, {
next = 0x0, data = 0x0, len = 0}}, compressed_page = '\000' <repeats 8195 times>}
獲取buffer的tag
rnode/forkno/block
(gdb) n
231 regbuf->page = BufferGetPage(buffer);
(gdb) p *regbuf
$19 = {in_use = false, flags = 0 '\000', rnode = {spcNode = 1663, dbNode = 16402, relNode = 17034}, forkno = MAIN_FORKNUM,
block = 0, page = 0x0, rdata_len = 0, rdata_head = 0x0, rdata_tail = 0x0, bkp_rdatas = {{next = 0x0, data = 0x0,
len = 0}, {next = 0x0, data = 0x0, len = 0}}, compressed_page = '\000' <repeats 8195 times>}
設(shè)置flags等其他變量
(gdb) n
232 regbuf->flags = flags;
(gdb)
233 regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head;
(gdb)
234 regbuf->rdata_len = 0;
(gdb)
244 for (i = 0; i < max_registered_block_id; i++)
(gdb) p regbuf->flags
$21 = 8 '\b'
(gdb) p *regbuf->rdata_tail
$23 = {next = 0x0, data = 0x292e1a8 "", len = 0}
(gdb) p regbuf->rdata_len
$24 = 0
檢查該page是否已被其他block_id注冊
最后設(shè)置in_use為T,返回XLogRegisterBufData
(gdb) n
246 registered_buffer *regbuf_old = ®istered_buffers[i];
(gdb)
248 if (i == block_id || !regbuf_old->in_use)
(gdb)
249 continue;
(gdb)
244 for (i = 0; i < max_registered_block_id; i++)
(gdb)
258 regbuf->in_use = true;
(gdb)
259 }
(gdb)
heap_insert (relation=0x7f5cc0338228, tup=0x29b2440, cid=0, options=0, bistate=0x0) at heapam.c:2579
2579 XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
XLogRegisterBufData
進(jìn)入XLogRegisterBufData函數(shù)
(gdb) step
XLogRegisterBufData (block_id=0 '\000', data=0x7fff03ba99d0 "\003", len=5) at xloginsert.c:366
366 Assert(begininsert_called);
尋找已注冊的緩存結(jié)構(gòu)體
(gdb) n
369 regbuf = ®istered_buffers[block_id];
(gdb)
370 if (!regbuf->in_use)
(gdb) p *regbuf
$25 = {in_use = true, flags = 8 '\b', rnode = {spcNode = 1663, dbNode = 16402, relNode = 17034}, forkno = MAIN_FORKNUM,
block = 0, page = 0x7f5c93854380 "\001", rdata_len = 0, rdata_head = 0x0, rdata_tail = 0x292e1a8, bkp_rdatas = {{
next = 0x0, data = 0x0, len = 0}, {next = 0x0, data = 0x0, len = 0}}, compressed_page = '\000' <repeats 8195 times>}
(gdb) p *regbuf->page
$26 = 1 '\001'
(gdb) n
374 if (num_rdatas >= max_rdatas)
(gdb)
在正在構(gòu)造的WAL記錄中添加buffer相關(guān)的數(shù)據(jù).
(gdb) n
376 rdata = &rdatas[num_rdatas++];
(gdb) p num_rdatas
$27 = 1
(gdb) p max_rdatas
$28 = 20
(gdb) n
378 rdata->data = data;
(gdb)
379 rdata->len = len;
(gdb)
381 regbuf->rdata_tail->next = rdata;
(gdb)
382 regbuf->rdata_tail = rdata;
(gdb)
383 regbuf->rdata_len += len;
(gdb)
384 }
(gdb) p *rdata
$29 = {next = 0x0, data = 0x7fff03ba99d0 "\003", len = 5}
(gdb)
完成調(diào)用,回到heap_insert
(gdb) n
heap_insert (relation=0x7f5cc0338228, tup=0x29b2440, cid=0, options=0, bistate=0x0) at heapam.c:2583
2583 heaptup->t_len - SizeofHeapTupleHeader);
繼續(xù)調(diào)用XLogRegisterBufData函數(shù)注冊tuple實際數(shù)據(jù)
2583 heaptup->t_len - SizeofHeapTupleHeader);
(gdb) n
2581 XLogRegisterBufData(0,
(gdb)
XLogSetRecordFlags
為即將"到來"的WAL記錄設(shè)置插入狀態(tài)標(biāo)記
(gdb)
2586 XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
邏輯很簡單,設(shè)置標(biāo)記位curinsert_flags
(gdb) step
XLogSetRecordFlags (flags=1 '\001') at xloginsert.c:399
399 Assert(begininsert_called);
(gdb) n
400 curinsert_flags = flags;
(gdb)
401 }
(gdb)
heap_insert (relation=0x7f5cc0338228, tup=0x29b2440, cid=0, options=0, bistate=0x0) at heapam.c:2588
2588 recptr = XLogInsert(RM_HEAP_ID, info);
(gdb)
調(diào)用XLogInsert,插入WAL
(gdb)
2590 PageSetLSN(page, recptr);
...
XLogInsert函數(shù)下節(jié)再行介紹.
四、參考資料
Write Ahead Logging — WAL
PostgreSQL 源碼解讀(4)- 插入數(shù)據(jù)#3(heap_insert)
PgSQL · 特性分析 · 數(shù)據(jù)庫崩潰恢復(fù)(上)
PgSQL · 特性分析 · 數(shù)據(jù)庫崩潰恢復(fù)(下)
PgSQL · 特性分析 · Write-Ahead Logging機(jī)制淺析
PostgreSQL WAL Buffers, Clog Buffers Deep Dive