innodb buffer pool初始化

990阅读 0评论2013-02-21 gladness
分类:Mysql/postgreSQL

本图中,block0表示存放block descriptor的数据块。block descriptor是存放在buffer pool最开始的若干数据块中。本图为了表示方便,只用了block0表示存放block descriptor的数据块。

每个block descriptor中,有一个指针指向数据块位置。

/********************************************************************//**

Initialize a buffer pool instance.

@return DB_SUCCESS if all goes well. */

UNIV_INTERN

ulint

buf_pool_init_instance(

/*===================*/

     buf_pool_t*   buf_pool, /*!< in: buffer pool instance */

     ulint         buf_pool_size,     /*!< in: size in bytes */

     ulint         instance_no)  /*!< in: id of the instance */

{

     ulint         i;

     buf_chunk_t*  chunk;

 

     /* 1. Initialize general fields

     ------------------------------- */

     mutex_create(buf_pool_mutex_key,

              &buf_pool->mutex, SYNC_BUF_POOL);

     mutex_create(buf_pool_zip_mutex_key,

              &buf_pool->zip_mutex, SYNC_BUF_BLOCK);

 

     buf_pool_mutex_enter(buf_pool);

 

     if (buf_pool_size > 0) {

         buf_pool->n_chunks = 1;

         buf_pool->chunks = chunk = mem_zalloc(sizeof *chunk);

 

         UT_LIST_INIT(buf_pool->free);

 

         if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {

              mem_free(chunk);

              mem_free(buf_pool);

 

              buf_pool_mutex_exit(buf_pool);

 

              return(DB_ERROR);

         }

 

         buf_pool->instance_no = instance_no;

         buf_pool->old_pool_size = buf_pool_size;

         buf_pool->curr_size = chunk->size; /* 块(页)数,排除了buffer pool开头的block descriptor*/

         buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;

 

         /*初始化HASH表,容量是当前数据块的2*/

         buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);

         buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);

 

         buf_pool->last_printout_time = ut_time();

     }

     /* 2. Initialize flushing fields

     -------------------------------- */

 

     mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,

              SYNC_BUF_FLUSH_LIST);

 

     for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {

         buf_pool->no_flush[i] = os_event_create(NULL);

     }

 

     /* 3. Initialize LRU fields

     --------------------------- */

 

     /* All fields are initialized by mem_zalloc(). */

 

     buf_pool_mutex_exit(buf_pool);

 

     return(DB_SUCCESS);

}



/********************************************************************//**

Allocates a chunk of buffer frames.

@return  chunk, or NULL on failure */

static

buf_chunk_t*

buf_chunk_init(

/*===========*/

     buf_pool_t*   buf_pool, /*!< in: buffer pool instance */

     buf_chunk_t*  chunk,        /*!< out: chunk of buffers */

     ulint         mem_size) /*!< in: requested size in bytes */

{

     buf_block_t*  block;

     byte*         frame;

     ulint         i;

 

     /* Round down to a multiple of page size,

     although it already should be. */

     mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);

     /* Reserve space for the block descriptors. */

     mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)

                     + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);

 

     chunk->mem_size = mem_size;

     /*分配buffer pool的内存*/

     chunk->mem = os_mem_alloc_large(&chunk->mem_size);

 

     if (UNIV_UNLIKELY(chunk->mem == NULL)) {

 

         return(NULL);

     }

 

     /* Allocate the block descriptors from

     the start of the memory block. */

     chunk->blocks = chunk->mem;

 

     /* Align a pointer to the first frame.  Note that when

     os_large_page_size is smaller than UNIV_PAGE_SIZE,

     we may allocate one fewer block than requested.  When

     it is bigger, we may allocate more blocks than requested. */

     /* UNIV_PAGE_SIZE16K */

     frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);

     chunk->size = chunk->mem_size / UNIV_PAGE_SIZE

         - (frame != chunk->mem);

 

     /* Subtract the space needed for block descriptors. */

     /*buffer pool的前端是所谓block descriptor,即每个block的相关属性。

     chunk->blocks的类型是buf_block_t*,也即buf_block_struct*

buf_block_struct的大小是320(windows下跟踪看到的,linux下不知道是不是这个值。

这里是跳过block descriptor的数据页(块),让frame指向将来存放表、索引数据的块*/

     {

         ulint    size = chunk->size;

 

         while (frame < (byte*) (chunk->blocks + size)) {

              frame += UNIV_PAGE_SIZE;

              size--;

         }

 

         chunk->size = size;

     }

 

     /* Init block structs and assign frames for them. Then we

     assign the frames to the first blocks (we already mapped the

     memory above). */

     /* 一页一页,或者说一块一块地循环,初始化的实际是block descriptor */

 

     block = chunk->blocks;

 

     for (i = chunk->size; i--; ) {

         /*初始化block(是一个block descriptor)的各个属性,

         比如把frame(块地址)赋值给它;初始化互斥锁、条件变量等*/

         buf_block_init(buf_pool, block, frame);

         UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);

 

         /* Add the block to the free list */

         /*加入传说中的free list。注意block->page的地址,也可以当作buf_block_t使用;

         因为此pagebuf_block_t中最开头的那个属性page  */

         UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));

 

         ut_d(block->page.in_free_list = TRUE);

         ut_ad(buf_pool_from_block(block) == buf_pool);

 

         block++;/*下一个block descriptor*/

         frame += UNIV_PAGE_SIZE;/*下一个数据块*/

     }

 

#ifdef PFS_GROUP_BUFFER_SYNC

     pfs_register_buffer_block(chunk);

#endif

     return(chunk);

}

 

 

/*******************************************************************//**

Adds the node as the last element in a two-way linked list.

@param NAME   list name

@param BASE   the base node (not a pointer to it)

@param N pointer to the node to be added to the list

*/

#define UT_LIST_ADD_LAST(NAME, BASE, N)\

{\

     ut_ad(N != NULL);\

     ((BASE).count)++;\

     ((N)->NAME).prev = (BASE).end;\

     ((N)->NAME).next = NULL;\

     if ((BASE).end != NULL) {\

         ut_ad((BASE).end != (N));\

         (((BASE).end)->NAME).next = (N);\

     }\

     (BASE).end = (N);\

     if ((BASE).start == NULL) {\

         (BASE).start = (N);\

     }\

}\

 

UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));

{\

     ut_ad((&block->page) != NULL);\

     ((buf_pool->free).count)++;\

     (((&block->page))->list).prev = (buf_pool->free).end;\

     (((&block->page))->list).next = NULL;\

     if ((buf_pool->free).end != NULL) {\

         ut_ad((buf_pool->free).end != ((&block->page)));\

         (((buf_pool->free).end)->list).next = ((&block->page));\

     }\

     (buf_pool->free).end = ((&block->page));\

     if ((buf_pool->free).start == NULL) {\

         (buf_pool->free).start = ((&block->page));\

     }\

}\

 

/** Buffer page (uncompressed or compressed) */

typedef  struct buf_page_struct      buf_page_t;

 

struct buf_page_struct{

     /** @name General fields

     None of these bit-fields must be modified without holding

     buf_page_get_mutex() [buf_block_struct::mutex or

     buf_pool->zip_mutex], since they can be stored in the same

     machine word.  Some of these fields are additionally protected

     by buf_pool->mutex. */

     /* @{ */

 

     unsigned space:32; /*!< tablespace id; also protected

                       by buf_pool->mutex. */

     unsigned offset:32;    /*!< page number; also protected

                       by buf_pool->mutex. */

 

     unsigned state:BUF_PAGE_STATE_BITS;

                       /*!< state of the control block; also

                       protected by buf_pool->mutex.

                       State transitions from

                       BUF_BLOCK_READY_FOR_USE to

                       BUF_BLOCK_MEMORY need not be

                       protected by buf_page_get_mutex().

                       @see enum buf_page_state */

#ifndef UNIV_HOTBACKUP

     unsigned flush_type:2; /*!< if this block is currently being

                       flushed to disk, this tells the

                       flush_type.

                       @see enum buf_flush */

     unsigned io_fix:2; /*!< type of pending I/O operation;

                       also protected by buf_pool->mutex

                       @see enum buf_io_fix */

     unsigned buf_fix_count:19;/*!< count of how manyfold this block

                       is currently bufferfixed */

     unsigned buf_pool_index:6;/*!< index number of the buffer pool

                       that this block belongs to */

# if MAX_BUFFER_POOLS > 64

#  error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6"

# endif

     /* @} */

#endif /* !UNIV_HOTBACKUP */

     page_zip_des_t     zip;     /*!< compressed page; zip.data

                       (but not the data it points to) is

                       also protected by buf_pool->mutex;

                       state == BUF_BLOCK_ZIP_PAGE and

                       zip.data == NULL means an active

                       buf_pool->watch */

#ifndef UNIV_HOTBACKUP

     buf_page_t*   hash;         /*!< node used in chaining to

                       buf_pool->page_hash or

                       buf_pool->zip_hash */

#ifdef UNIV_DEBUG

     ibool         in_page_hash; /*!< TRUE if in buf_pool->page_hash */

     ibool         in_zip_hash;  /*!< TRUE if in buf_pool->zip_hash */

#endif /* UNIV_DEBUG */

 

     /** @name Page flushing fields

     All these are protected by buf_pool->mutex. */

     /* @{ */

 

     UT_LIST_NODE_T(buf_page_t) list;

                       /*!< based on state, this is a

                       list node, protected either by

                       buf_pool->mutex or by

                       buf_pool->flush_list_mutex,

                       in one of the following lists in

                       buf_pool:

 

                       - BUF_BLOCK_NOT_USED:  free

                       - BUF_BLOCK_FILE_PAGE: flush_list

                       - BUF_BLOCK_ZIP_DIRTY: flush_list

                       - BUF_BLOCK_ZIP_PAGE:  zip_clean

                       - BUF_BLOCK_ZIP_FREE:  zip_free[]

 

                       If bpage is part of flush_list

                       then the node pointers are

                       covered by buf_pool->flush_list_mutex.

                       Otherwise these pointers are

                       protected by buf_pool->mutex.

 

                       The contents of the list node

                       is undefined if !in_flush_list

                       && state == BUF_BLOCK_FILE_PAGE,

                       or if state is one of

                       BUF_BLOCK_MEMORY,

                       BUF_BLOCK_REMOVE_HASH or

                       BUF_BLOCK_READY_IN_USE. */

 

#ifdef UNIV_DEBUG

     ibool         in_flush_list;     /*!< TRUE if in buf_pool->flush_list;

                       when buf_pool->flush_list_mutex is

                       free, the following should hold:

                       in_flush_list

                       == (state == BUF_BLOCK_FILE_PAGE

                           || state == BUF_BLOCK_ZIP_DIRTY)

                       Writes to this field must be

                       covered by both block->mutex

                       and buf_pool->flush_list_mutex. Hence

                       reads can happen while holding

                       any one of the two mutexes */

     ibool         in_free_list; /*!< TRUE if in buf_pool->free; when

                       buf_pool->mutex is free, the following

                       should hold: in_free_list

                       == (state == BUF_BLOCK_NOT_USED) */

#endif /* UNIV_DEBUG */

     ib_uint64_t   newest_modification;

上一篇:innodb buffer pool中的page hash表
下一篇:innodb读取innodb buffer page的函数buf_page_get_gen