本图中,block0表示存放block descriptor的数据块。block descriptor是存放在buffer pool最开始的若干数据块中。本图为了表示方便,只用了block0表示存放block descriptor的数据块。
每个block descriptor中,有一个指针指向数据块位置。
/********************************************************************//**
Initialize a buffer pool instance.
@return DB_SUCCESS if all goes well. */
UNIV_INTERN
ulint
buf_pool_init_instance(
/*===================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
ulint buf_pool_size, /*!< in: size in bytes */
ulint instance_no) /*!< in: id of the instance */
{
ulint i;
buf_chunk_t* chunk;
/* 1. Initialize general fields
------------------------------- */
mutex_create(buf_pool_mutex_key,
&buf_pool->mutex, SYNC_BUF_POOL);
mutex_create(buf_pool_zip_mutex_key,
&buf_pool->zip_mutex, SYNC_BUF_BLOCK);
buf_pool_mutex_enter(buf_pool);
if (buf_pool_size > 0) {
buf_pool->n_chunks = 1;
buf_pool->chunks = chunk = mem_zalloc(sizeof *chunk);
UT_LIST_INIT(buf_pool->free);
if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
mem_free(chunk);
mem_free(buf_pool);
buf_pool_mutex_exit(buf_pool);
return(DB_ERROR);
}
buf_pool->instance_no = instance_no;
buf_pool->old_pool_size = buf_pool_size;
buf_pool->curr_size = chunk->size; /* 块(页)数,排除了buffer pool开头的block descriptor*/
buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
/*初始化HASH表,容量是当前数据块的2倍*/
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
buf_pool->last_printout_time = ut_time();
}
/* 2. Initialize flushing fields
-------------------------------- */
mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
SYNC_BUF_FLUSH_LIST);
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
buf_pool->no_flush[i] = os_event_create(NULL);
}
/* 3. Initialize LRU fields
--------------------------- */
/* All fields are initialized by mem_zalloc(). */
buf_pool_mutex_exit(buf_pool);
return(DB_SUCCESS);
}
/********************************************************************//**
Allocates a chunk of buffer frames.
@return chunk, or NULL on failure */
static
buf_chunk_t*
buf_chunk_init(
/*===========*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_chunk_t* chunk, /*!< out: chunk of buffers */
ulint mem_size) /*!< in: requested size in bytes */
{
buf_block_t* block;
byte* frame;
ulint i;
/* Round down to a multiple of page size,
although it already should be. */
mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
/* Reserve space for the block descriptors. */
mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
+ (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
chunk->mem_size = mem_size;
/*分配buffer pool的内存*/
chunk->mem = os_mem_alloc_large(&chunk->mem_size);
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
return(NULL);
}
/* Allocate the block descriptors from
the start of the memory block. */
chunk->blocks = chunk->mem;
/* Align a pointer to the first frame. Note that when
os_large_page_size is smaller than UNIV_PAGE_SIZE,
we may allocate one fewer block than requested. When
it is bigger, we may allocate more blocks than requested. */
/* UNIV_PAGE_SIZE是16K */
frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
- (frame != chunk->mem);
/* Subtract the space needed for block descriptors. */
/*buffer pool的前端是所谓block descriptor,即每个block的相关属性。
chunk->blocks的类型是buf_block_t*,也即buf_block_struct*,
buf_block_struct的大小是320(在windows下跟踪看到的,linux下不知道是不是这个值。
这里是跳过block descriptor的数据页(块),让frame指向将来存放表、索引数据的块*/
{
ulint size = chunk->size;
while (frame < (byte*) (chunk->blocks + size)) {
frame += UNIV_PAGE_SIZE;
size--;
}
chunk->size = size;
}
/* Init block structs and assign frames for them. Then we
assign the frames to the first blocks (we already mapped the
memory above). */
/* 一页一页,或者说一块一块地循环,初始化的实际是block descriptor */
block = chunk->blocks;
for (i = chunk->size; i--; ) {
/*初始化block(是一个block descriptor)的各个属性,
比如把frame(块地址)赋值给它;初始化互斥锁、条件变量等*/
buf_block_init(buf_pool, block, frame);
UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
/* Add the block to the free list */
/*加入传说中的free list。注意block->page的地址,也可以当作buf_block_t使用;
因为此page是buf_block_t中最开头的那个属性page */
UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
ut_d(block->page.in_free_list = TRUE);
ut_ad(buf_pool_from_block(block) == buf_pool);
block++;/*下一个block descriptor*/
frame += UNIV_PAGE_SIZE;/*下一个数据块*/
}
#ifdef PFS_GROUP_BUFFER_SYNC
pfs_register_buffer_block(chunk);
#endif
return(chunk);
}
/*******************************************************************//**
Adds the node as the last element in a two-way linked list.
@param NAME list name
@param BASE the base node (not a pointer to it)
@param N pointer to the node to be added to the list
*/
#define UT_LIST_ADD_LAST(NAME, BASE, N)\
{\
ut_ad(N != NULL);\
((BASE).count)++;\
((N)->NAME).prev = (BASE).end;\
((N)->NAME).next = NULL;\
if ((BASE).end != NULL) {\
ut_ad((BASE).end != (N));\
(((BASE).end)->NAME).next = (N);\
}\
(BASE).end = (N);\
if ((BASE).start == NULL) {\
(BASE).start = (N);\
}\
}\
UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
{\
ut_ad((&block->page) != NULL);\
((buf_pool->free).count)++;\
(((&block->page))->list).prev = (buf_pool->free).end;\
(((&block->page))->list).next = NULL;\
if ((buf_pool->free).end != NULL) {\
ut_ad((buf_pool->free).end != ((&block->page)));\
(((buf_pool->free).end)->list).next = ((&block->page));\
}\
(buf_pool->free).end = ((&block->page));\
if ((buf_pool->free).start == NULL) {\
(buf_pool->free).start = ((&block->page));\
}\
}\
/** Buffer page (uncompressed or compressed) */
typedef struct buf_page_struct buf_page_t;
struct buf_page_struct{
/** @name General fields
None of these bit-fields must be modified without holding
buf_page_get_mutex() [buf_block_struct::mutex or
buf_pool->zip_mutex], since they can be stored in the same
machine word. Some of these fields are additionally protected
by buf_pool->mutex. */
/* @{ */
unsigned space:32; /*!< tablespace id; also protected
by buf_pool->mutex. */
unsigned offset:32; /*!< page number; also protected
by buf_pool->mutex. */
unsigned state:BUF_PAGE_STATE_BITS;
/*!< state of the control block; also
protected by buf_pool->mutex.
State transitions from
BUF_BLOCK_READY_FOR_USE to
BUF_BLOCK_MEMORY need not be
protected by buf_page_get_mutex().
@see enum buf_page_state */
#ifndef UNIV_HOTBACKUP
unsigned flush_type:2; /*!< if this block is currently being
flushed to disk, this tells the
flush_type.
@see enum buf_flush */
unsigned io_fix:2; /*!< type of pending I/O operation;
also protected by buf_pool->mutex
@see enum buf_io_fix */
unsigned buf_fix_count:19;/*!< count of how manyfold this block
is currently bufferfixed */
unsigned buf_pool_index:6;/*!< index number of the buffer pool
that this block belongs to */
# if MAX_BUFFER_POOLS > 64
# error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6"
# endif
/* @} */
#endif /* !UNIV_HOTBACKUP */
page_zip_des_t zip; /*!< compressed page; zip.data
(but not the data it points to) is
also protected by buf_pool->mutex;
state == BUF_BLOCK_ZIP_PAGE and
zip.data == NULL means an active
buf_pool->watch */
#ifndef UNIV_HOTBACKUP
buf_page_t* hash; /*!< node used in chaining to
buf_pool->page_hash or
buf_pool->zip_hash */
#ifdef UNIV_DEBUG
ibool in_page_hash; /*!< TRUE if in buf_pool->page_hash */
ibool in_zip_hash; /*!< TRUE if in buf_pool->zip_hash */
#endif /* UNIV_DEBUG */
/** @name Page flushing fields
All these are protected by buf_pool->mutex. */
/* @{ */
UT_LIST_NODE_T(buf_page_t) list;
/*!< based on state, this is a
list node, protected either by
buf_pool->mutex or by
buf_pool->flush_list_mutex,
in one of the following lists in
buf_pool:
- BUF_BLOCK_NOT_USED: free
- BUF_BLOCK_FILE_PAGE: flush_list
- BUF_BLOCK_ZIP_DIRTY: flush_list
- BUF_BLOCK_ZIP_PAGE: zip_clean
- BUF_BLOCK_ZIP_FREE: zip_free[]
If bpage is part of flush_list
then the node pointers are
covered by buf_pool->flush_list_mutex.
Otherwise these pointers are
protected by buf_pool->mutex.
The contents of the list node
is undefined if !in_flush_list
&& state == BUF_BLOCK_FILE_PAGE,
or if state is one of
BUF_BLOCK_MEMORY,
BUF_BLOCK_REMOVE_HASH or
BUF_BLOCK_READY_IN_USE. */
#ifdef UNIV_DEBUG
ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
when buf_pool->flush_list_mutex is
free, the following should hold:
in_flush_list
== (state == BUF_BLOCK_FILE_PAGE
|| state == BUF_BLOCK_ZIP_DIRTY)
Writes to this field must be
covered by both block->mutex
and buf_pool->flush_list_mutex. Hence
reads can happen while holding
any one of the two mutexes */
ibool in_free_list; /*!< TRUE if in buf_pool->free; when
buf_pool->mutex is free, the following
should hold: in_free_list
== (state == BUF_BLOCK_NOT_USED) */
#endif /* UNIV_DEBUG */
ib_uint64_t newest_modification;