do_generic_file_read函数的详细分析-alloysystem-ChinaUnix博客

					 
/** 

 * do_generic_file_read - generic file read routine 

 * @filp:    the file to read 

 * @ppos:    current file position 

 * @desc:    read_descriptor 

 * @actor:    read method 

 * 

 * This is a generic file read routine, and uses the 

 * mapping->a_ops->readpage() function for the actual low-level stuff. 

 * 

 * This is really ugly. But the goto's actually try to clarify some 

 * of the logic when it comes to error handling etc. 

 */ 

static void do_generic_file_read(struct file *filp, loff_t *ppos, 

                                 read_descriptor_t *desc, read_actor_t actor) 

{ 

    struct address_space *mapping = filp->f_mapping; 

    struct inode *inode = mapping->host; 

    struct file_ra_state *ra = &filp->f_ra; 

    pgoff_t index; 

    pgoff_t last_index; 

    pgoff_t prev_index; 

    unsigned long offset;      /* offset into pagecache page */ 

    unsigned int prev_offset; 

    int error; 

    /*计算本次读取的是文件中的第几个page*/ 

    index = *ppos >> PAGE_CACHE_SHIFT; 

    /*上次读取的是第几个page*/ 

    prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; 

    prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE - 1); 

    /*要读取的最后一个page*/ 

    last_index = (*ppos + desc->count + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 

    offset = *ppos & ~PAGE_CACHE_MASK; 

    for (;;) 

    { 

        struct page *page; 

        pgoff_t end_index; 

        loff_t isize; 

        unsigned long nr, ret; 

        cond_resched(); 

find_page: 

        /*在radix树中查找相应的page*/ 

        page = find_get_page(mapping, index); 

        if (!page) 

        { 

            /*如果没有找到page，说明内存中没有将数据读入进来 

            先进行预读*/ 

            page_cache_sync_readahead(mapping, 

                                      ra, filp, 

                                      index, last_index - index); 

            /*预读一般是可以找到page的，但是也有可能找不到*/ 

            page = find_get_page(mapping, index); 

            if (unlikely(page == NULL)) 

                goto no_cached_page; 

        } 

        if (PageReadahead(page)) 

        { 

            /*发现找到的page已经是预读的情况了，再继续预读*/ 

            /*这里预读就是一种经验的猜测*/ 

            page_cache_async_readahead(mapping, 

                                       ra, filp, page, 

                                       index, last_index - index); 

        } 

        /*page的内容还没有从磁盘上读出来*/ 

        if (!PageUptodate(page)) 

        { 

            if (inode->i_blkbits == PAGE_CACHE_SHIFT || 

                    !mapping->a_ops->is_partially_uptodate) 

                goto page_not_up_to_date; 

            if (!trylock_page(page)) 

                goto page_not_up_to_date; 

            if (!mapping->a_ops->is_partially_uptodate(page, 

                    desc, offset)) 

                goto page_not_up_to_date_locked; 

            unlock_page(page); 

        } 

        /*page的内容已经是最新的了，接下来准备拷贝到用户空间*/ 

page_ok: 

        /* 

         * i_size must be checked after we know the page is Uptodate. 

         * 

         * Checking i_size after the check allows us to calculate 

         * the correct value for "nr", which means the zero-filled 

         * part of the page is not copied back to userspace (unless 

         * another truncate extends the file - this is desired though). 

         */ 

        /*下面这段代码是在page中的内容ok的情况下将page中的内容 

        拷贝到用户空间去，主要的逻辑分为检查参数是否合法 

        进性拷贝操作*/ 

        /*合法性检查，是不是长度为0，或者超出文件范围*/ 

        isize = i_size_read(inode); 

        end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 

        if (unlikely(!isize || index > end_index)) 

        { 

            page_cache_release(page); 

            goto out; 

        } 

        /* nr is the maximum number of bytes to copy from this page */ 

        /*合法性检查*/ 

        nr = PAGE_CACHE_SIZE; 

        if (index == end_index) 

        { 

            nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; 

            if (nr <= offset) 

            { 

                page_cache_release(page); 

                goto out; 

            } 

        } 

        nr = nr - offset; 

        /* If users can be writing to this page using arbitrary 

         * virtual addresses, take care about potential aliasing 

         * before reading the page on the kernel side. 

         */ 

        if (mapping_writably_mapped(mapping)) 

            flush_dcache_page(page); 

        /* 

         * When a sequential read accesses a page several times, 

         * only mark it as accessed the first time. 

         */ 

        if (prev_index != index || offset != prev_offset) 

            mark_page_accessed(page); 

        prev_index = index; 

        /* 

         * Ok, we have the page, and it's up-to-date, so 

         * now we can copy it to user space... 

         * 

         * The actor routine returns how many bytes were actually used.. 

         * NOTE! This may not be the same as how much of a user buffer 

         * we filled up (we may be padding etc), so we can only update 

         * "pos" here (the actor routine has to update the user buffer 

         * pointers and the remaining count). 

         */ 

        /*实际的拷贝操作*/ 

        ret = actor(desc, page, offset, nr); 

        offset += ret; 

        index += offset >> PAGE_CACHE_SHIFT; 

        offset &= ~PAGE_CACHE_MASK; 

        prev_offset = offset; 

        page_cache_release(page); 

        if (ret == nr && desc->count) 

            continue; 

        goto out; 

page_not_up_to_date: 

        /* Get exclusive access to the page ... */ 

        /*在读一个page之前首先要锁定这个page*/ 

        error = lock_page_killable(page); 

        if (unlikely(error)) 

            goto readpage_error; 

page_not_up_to_date_locked: 

        /* Did it get truncated before we got the lock? */ 

        /*获取到锁之后，发现这个page没有被映射了，可能是在获取锁 

        之前就被其它模块释放掉了，重新开始获取lock*/ 

        if (!page->mapping) 

        { 

            unlock_page(page); 

            page_cache_release(page); 

            continue; 

        } 

        /*获取到锁后发现page中的数据已经ok了，万事大吉，不需要再 

        读取数据*/ 

        /* Did somebody else fill it already? */ 

        if (PageUptodate(page)) 

        { 

            unlock_page(page); 

            goto page_ok; 

        } 

readpage: 

        /* Start the actual read. The read will unlock the page. */ 

        /*实际的读取数据*/ 

        error = mapping->a_ops->readpage(filp, page); 

        if (unlikely(error)) 

        { 

            if (error == AOP_TRUNCATED_PAGE) 

            { 

                page_cache_release(page); 

                goto find_page; 

            } 

            goto readpage_error; 

        } 

        if (!PageUptodate(page)) 

        { 

            /*lock page等待数据返回，可能会休眠*/ 

            error = lock_page_killable(page); 

            if (unlikely(error)) 

                goto readpage_error; 

            if (!PageUptodate(page)) 

            { 

                if (page->mapping == NULL) 

                { 

                    /* 

                     * invalidate_inode_pages got it 

                     */ 

                    unlock_page(page); 

                    page_cache_release(page); 

                    goto find_page; 

                } 

                unlock_page(page); 

                shrink_readahead_size_eio(filp, ra); 

                error = -EIO; 

                goto readpage_error; 

            } 

            unlock_page(page); 

        } 

        /*读取数据成功，进行数据拷贝*/ 

        goto page_ok; 

readpage_error: 

        /* UHHUH! A synchronous read error occurred. Report it */ 

        desc->error = error; 

        page_cache_release(page); 

        goto out; 

no_cached_page: 

        /* 

         * Ok, it wasn't cached, so we need to create a new 

         * page.. 

         */ 

        /*系统中没有数据，又不进行预读的情况，显示的分配page， 

        并读取page*/ 

        page = page_cache_alloc_cold(mapping); 

        if (!page) 

        { 

            desc->error = -ENOMEM; 

            goto out; 

        } 

        error = add_to_page_cache_lru(page, mapping, 

                                      index, GFP_KERNEL); 

        if (error) 

        { 

            page_cache_release(page); 

            if (error == -EEXIST) 

                goto find_page; 

            desc->error = error; 

            goto out; 

        } 

        goto readpage; 

    } 

out: 

    ra->prev_pos = prev_index; 

    ra->prev_pos <<= PAGE_CACHE_SHIFT; 

    ra->prev_pos |= prev_offset; 

    *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; 

    file_accessed(filp); 

}