innodb读取innodb buffer page的函数buf_page_get_gen

1280阅读 0评论2013-02-25 gladness
分类:Mysql/postgreSQL


/********************************************************************//**

This is the general function used to get access to a database page.

@return  pointer to the block or NULL */

UNIV_INTERN

buf_block_t*

buf_page_get_gen(

/*=============*/

     ulint         space,   /*!< in: space id */

     ulint         zip_size,/*!< in: compressed page size in bytes

                   or 0 for uncompressed pages */

     ulint         offset,  /*!< in: page number */

     ulint         rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */

     buf_block_t*  guess,   /*!< in: guessed block or NULL */

     ulint         mode,    /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,

                   BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or

                   BUF_GET_IF_IN_POOL_OR_WATCH */

     const char*   file,    /*!< in: file name */

     ulint         line,    /*!< in: line where called */

     mtr_t*        mtr) /*!< in: mini-transaction */

{

     buf_block_t*  block;

     ulint         fold;

     unsigned access_time;

     ulint         fix_type;

     ibool         must_read;

     ulint         retries = 0;

     buf_pool_t*   buf_pool = buf_pool_get(space, offset);

 

     ut_ad(mtr);

     ut_ad(mtr->state == MTR_ACTIVE);

     ut_ad((rw_latch == RW_S_LATCH)

           || (rw_latch == RW_X_LATCH)

           || (rw_latch == RW_NO_LATCH));

#ifdef UNIV_DEBUG

     switch (mode) {

     case BUF_GET_NO_LATCH:

         ut_ad(rw_latch == RW_NO_LATCH);

         break;

     case BUF_GET:

     case BUF_GET_IF_IN_POOL:

     case BUF_PEEK_IF_IN_POOL:

     case BUF_GET_IF_IN_POOL_OR_WATCH:

     case BUF_GET_POSSIBLY_FREED:

         break;

     default:

         ut_error;

     }

#endif /* UNIV_DEBUG */

     ut_ad(zip_size == fil_space_get_zip_size(space));

     ut_ad(ut_is_2pow(zip_size));

#ifndef UNIV_LOG_DEBUG

     ut_ad(!ibuf_inside(mtr)

           || ibuf_page_low(space, zip_size, offset,

                     FALSE, file, line, NULL));

#endif

     buf_pool->stat.n_page_gets++;

     fold = buf_page_address_fold(space, offset);

loop:

     block = guess;

     buf_pool_mutex_enter(buf_pool);

 

     if (block) {

         /* If the guess is a compressed page descriptor that

         has been allocated by buf_page_alloc_descriptor(),

         it may have been freed by buf_relocate(). */

 

         if (!buf_block_is_uncompressed(buf_pool, block)

             || offset != block->page.offset

             || space != block->page.space

             || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {

 

              block = guess = NULL;

         } else {

              ut_ad(!block->page.in_zip_hash);

              ut_ad(block->page.in_page_hash);

         }

     }

 

      /* 从hash表中查找 */

     if (block == NULL) {

         block = (buf_block_t*) buf_page_hash_get_low(

              buf_pool, space, offset, fold);

     }

 

loop2:

     if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {

         block = NULL;

     }

 

     if (block == NULL) {

         /* Page not in buf_pool: needs to be read from file */

 

         if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {

              block = (buf_block_t*) buf_pool_watch_set(

                   space, offset, fold);

 

              if (UNIV_LIKELY_NULL(block)) {

 

                   goto got_block;

              }

         }

 

         buf_pool_mutex_exit(buf_pool);

 

         if (mode == BUF_GET_IF_IN_POOL

             || mode == BUF_PEEK_IF_IN_POOL

             || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {

 

              return(NULL);

         }

         /* 从数据文件中读取page到buffer中 */

         if (buf_read_page(space, zip_size, offset)) {

              buf_read_ahead_random(space, zip_size, offset,

                             ibuf_inside(mtr));

 

              retries = 0;

         } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {

              ++retries;

         } else {

              fprintf(stderr, "InnoDB: Error: Unable"

                   " to read tablespace %lu page no"

                   " %lu into the buffer pool after"

                   " %lu attempts\n"

                   "InnoDB: The most probable cause"

                   " of this error may be that the"

                   " table has been corrupted.\n"

                   "InnoDB: You can try to fix this"

                   " problem by using"

                   " innodb_force_recovery.\n"

                   "InnoDB: Please see reference manual"

                   " for more details.\n"

                   "InnoDB: Aborting...\n",

                   space, offset,

                   BUF_PAGE_READ_MAX_RETRIES);

 

              ut_error;

         }

 

#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG

         ut_a(++buf_dbg_counter % 37 || buf_validate());

#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */

         goto loop;

     }

 

got_block:

     ut_ad(page_zip_get_size(&block->page.zip) == zip_size);

 

     must_read = buf_block_get_io_fix(block) == BUF_IO_READ;

 

     if (must_read && (mode == BUF_GET_IF_IN_POOL

                || mode == BUF_PEEK_IF_IN_POOL)) {

 

         /* The page is being read to buffer pool,

         but we cannot wait around for the read to

         complete. */

         buf_pool_mutex_exit(buf_pool);

 

         return(NULL);

     }

 

     switch (buf_block_get_state(block)) {

         buf_page_t*   bpage;

         ibool         success;

 

     case BUF_BLOCK_FILE_PAGE:

         break;

 

     case BUF_BLOCK_ZIP_PAGE:

     case BUF_BLOCK_ZIP_DIRTY:

         bpage = &block->page;

         /* Protect bpage->buf_fix_count. */

         mutex_enter(&buf_pool->zip_mutex);

 

         if (bpage->buf_fix_count

             || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {

              /* This condition often occurs when the buffer

              is not buffer-fixed, but I/O-fixed by

              buf_page_init_for_read(). */

              mutex_exit(&buf_pool->zip_mutex);

wait_until_unfixed:

              /* The block is buffer-fixed or I/O-fixed.

              Try again later. */

              buf_pool_mutex_exit(buf_pool);

              os_thread_sleep(WAIT_FOR_READ);

 

              goto loop;

         }

 

         /* Allocate an uncompressed page. */

         buf_pool_mutex_exit(buf_pool);

         mutex_exit(&buf_pool->zip_mutex);

 

         block = buf_LRU_get_free_block(buf_pool);

         ut_a(block);

 

         buf_pool_mutex_enter(buf_pool);

         mutex_enter(&block->mutex);

 

         {

              buf_page_t*   hash_bpage;

 

              hash_bpage = buf_page_hash_get_low(

                   buf_pool, space, offset, fold);

 

              if (UNIV_UNLIKELY(bpage != hash_bpage)) {

                   /* The buf_pool->page_hash was modified

                   while buf_pool->mutex was released.

                   Free the block that was allocated. */

 

                   buf_LRU_block_free_non_file_page(block);

                   mutex_exit(&block->mutex);

 

                   block = (buf_block_t*) hash_bpage;

                   goto loop2;

              }

         }

 

         if (UNIV_UNLIKELY

             (bpage->buf_fix_count

              || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {

 

              /* The block was buffer-fixed or I/O-fixed

              while buf_pool->mutex was not held by this thread.

              Free the block that was allocated and try again.

              This should be extremely unlikely. */

 

              buf_LRU_block_free_non_file_page(block);

              mutex_exit(&block->mutex);

 

              goto wait_until_unfixed;

         }

 

         /* Move the compressed page from bpage to block,

         and uncompress it. */

 

         mutex_enter(&buf_pool->zip_mutex);

 

         buf_relocate(bpage, &block->page);

         buf_block_init_low(block);

         block->lock_hash_val = lock_rec_hash(space, offset);

 

         UNIV_MEM_DESC(&block->page.zip.data,

                    page_zip_get_size(&block->page.zip), block);

 

         if (buf_page_get_state(&block->page)

             == BUF_BLOCK_ZIP_PAGE) {

#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG

              UT_LIST_REMOVE(list, buf_pool->zip_clean,

                          &block->page);

#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */

              ut_ad(!block->page.in_flush_list);

         } else {

              /* Relocate buf_pool->flush_list. */

              buf_flush_relocate_on_flush_list(bpage,

                                  &block->page);

         }

 

         /* Buffer-fix, I/O-fix, and X-latch the block

         for the duration of the decompression.

         Also add the block to the unzip_LRU list. */

         block->page.state = BUF_BLOCK_FILE_PAGE;

 

         /* Insert at the front of unzip_LRU list */

         buf_unzip_LRU_add_block(block, FALSE);

 

         block->page.buf_fix_count = 1;

         buf_block_set_io_fix(block, BUF_IO_READ);

         rw_lock_x_lock_inline(&block->lock, 0, file, line);

 

         UNIV_MEM_INVALID(bpage, sizeof *bpage);

 

         mutex_exit(&block->mutex);

         mutex_exit(&buf_pool->zip_mutex);

         buf_pool->n_pend_unzip++;

 

         buf_pool_mutex_exit(buf_pool);

 

         buf_page_free_descriptor(bpage);

 

         /* Decompress the page and apply buffered operations

         while not holding buf_pool->mutex or block->mutex. */

         success = buf_zip_decompress(block, srv_use_checksums);

         ut_a(success);

 

         if (UNIV_LIKELY(!recv_no_ibuf_operations)) {

              ibuf_merge_or_delete_for_page(block, space, offset,

                                  zip_size, TRUE);

         }

 

         /* Unfix and unlatch the block. */

         buf_pool_mutex_enter(buf_pool);

         mutex_enter(&block->mutex);

         block->page.buf_fix_count--;

         buf_block_set_io_fix(block, BUF_IO_NONE);

         mutex_exit(&block->mutex);

         buf_pool->n_pend_unzip--;

         rw_lock_x_unlock(&block->lock);

 

         break;

 

     case BUF_BLOCK_ZIP_FREE:

     case BUF_BLOCK_NOT_USED:

     case BUF_BLOCK_READY_FOR_USE:

     case BUF_BLOCK_MEMORY:

     case BUF_BLOCK_REMOVE_HASH:

         ut_error;

         break;

     }

 

     ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);

 

     mutex_enter(&block->mutex);

#if UNIV_WORD_SIZE == 4

     /* On 32-bit systems, there is no padding in buf_page_t.  On

     other systems, Valgrind could complain about uninitialized pad

     bytes. */

     UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);

#endif

#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG

     if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)

         && ibuf_debug) {

         /* Try to evict the block from the buffer pool, to use the

         insert buffer (change buffer) as much as possible. */

 

         if (buf_LRU_free_block(&block->page, TRUE)) {

              mutex_exit(&block->mutex);

              if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {

                   /* Set the watch, as it would have

                   been set if the page were not in the

                   buffer pool in the first place. */

                   block = (buf_block_t*) buf_pool_watch_set(

                       space, offset, fold);

 

                   if (UNIV_LIKELY_NULL(block)) {

 

                       /* The page entered the buffer

                       pool for some reason. Try to

                       evict it again. */

                       goto got_block;

                   }

              }

              buf_pool_mutex_exit(buf_pool);

              fprintf(stderr,

                   "innodb_change_buffering_debug evict %u %u\n",

                   (unsigned) space, (unsigned) offset);

              return(NULL);

         } else if (buf_flush_page_try(buf_pool, block)) {

              fprintf(stderr,

                   "innodb_change_buffering_debug flush %u %u\n",

                   (unsigned) space, (unsigned) offset);

              guess = block;

              goto loop;

         }

 

         /* Failed to evict the page; change it directly */

     }

#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */

 

     buf_block_buf_fix_inc(block, file, line);

#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG

     ut_a(mode == BUF_GET_POSSIBLY_FREED

          || !block->page.file_page_was_freed);

#endif

     mutex_exit(&block->mutex);

 

     /* Check if this is the first access to the page */

 

     access_time = buf_page_is_accessed(&block->page);

 

     buf_pool_mutex_exit(buf_pool);

 

     if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {

         buf_page_set_accessed_make_young(&block->page, access_time);

     }

 

#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG

     ut_a(++buf_dbg_counter % 5771 || buf_validate());

     ut_a(block->page.buf_fix_count > 0);

     ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);

#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */

 

     switch (rw_latch) {

     case RW_NO_LATCH:

         if (must_read) {

              /* Let us wait until the read operation

              completes */

 

              for (;;) {

                   enum buf_io_fix    io_fix;

 

                   mutex_enter(&block->mutex);

                   io_fix = buf_block_get_io_fix(block);

                   mutex_exit(&block->mutex);

 

                   if (io_fix == BUF_IO_READ) {

 

                       os_thread_sleep(WAIT_FOR_READ);

                   } else {

                       break;

                   }

              }

         }

 

         fix_type = MTR_MEMO_BUF_FIX;

         break;

 

     case RW_S_LATCH:

         rw_lock_s_lock_inline(&(block->lock), 0, file, line);

 

         fix_type = MTR_MEMO_PAGE_S_FIX;

         break;

 

     default:

         ut_ad(rw_latch == RW_X_LATCH);

         rw_lock_x_lock_inline(&(block->lock), 0, file, line);

 

         fix_type = MTR_MEMO_PAGE_X_FIX;

         break;

     }

 

     mtr_memo_push(mtr, block, fix_type);

 

     if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {

         /* In the case of a first access, try to apply linear

         read-ahead */

 

         buf_read_ahead_linear(space, zip_size, offset,

                         ibuf_inside(mtr));

     }

 

#ifdef UNIV_IBUF_COUNT_DEBUG

     ut_a(ibuf_count_get(buf_block_get_space(block),

                  buf_block_get_page_no(block)) == 0);

#endif

     return(block);

}

上一篇:innodb buffer pool初始化
下一篇:从innodb buffer pool的freelist中获得空块buf_LRU_get_free_only