Linux系统中,read文件过程分析

2810阅读 0评论2013-09-23 joepayne
分类:LINUX

read一个文件
首先是通过系统调用open一个文件
大家好,我是ChinaUnix的T-bagwell
然后通过系统调用去read一个文件,为什么man 2 read的时候或者man 2 write的时候的参数与写的驱动的read和write里面定义的函数看上去不同呢?

ssize_t read(int fd, void *buf, size_t count);
ssize_t
write(int fd, const void *buf, size_t count);

下面是driver/nvram.c里面的

static ssize_t nvram_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
static ssize_t nvram_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)


下面就以说read为例就可以了

391 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
392 {
393 struct file *file;
394 ssize_t ret = -EBADF;
395 int fput_needed;
396
397 file = fget_light(fd, &fput_needed);
398 if (file) {
399 loff_t pos = file_pos_read(file);
400 ret = vfs_read(file, buf, count, &pos);
401 file_pos_write(file, pos);
402 fput_light(file, fput_needed);
403 }
404
405 return ret;
406 }

通过阅读代码,发现这个系统调用read与man看到的系统调用的定义的是相同的,没有这里可以没有疑问,但是这个比nvram.有些不同,其实操作都是在这个系统调用里面,struct 

file *file结构里面的file是通过这个fget_light来或得到的,这个file结构如下:

918 struct file {
 919
/*
 920 * fu_list becomes invalid after file_free is called and queued via
 921 * fu_rcuhead for RCU freeing
 922 */

 923
union {
 924
struct list_head fu_list;
 925
struct rcu_head fu_rcuhead;
 926
} f_u;
 927
struct path f_path;
 928
#define f_dentry f_path.dentry
 929
#define f_vfsmnt f_path.mnt
 930
const struct file_operations *f_op;
 931 spinlock_t f_lock
; /* f_ep_links, f_flags, no IRQ */
 932 atomic_long_t f_count
;
 933
unsigned int f_flags;
 934 fmode_t f_mode
;
 935 loff_t f_pos
;
 936
struct fown_struct f_owner;
 937
const struct cred *f_cred;
 938
struct file_ra_state f_ra;
 939
 940 u64 f_version
;
 941
#ifdef CONFIG_SECURITY
 942
void *f_security;
 943
#endif
 944
/* needed for tty driver, and maybe others */
 945
void *private_data;
 946
 947
#ifdef CONFIG_EPOLL
 948
/* Used by fs/eventpoll.c to link all the hooks to this file */
 949
struct list_head f_ep_links;
 950
#endif /* #ifdef CONFIG_EPOLL */
 951
struct address_space *f_mapping;
 952
#ifdef CONFIG_DEBUG_WRITECOUNT
 953
unsigned long f_mnt_write_state;
 954
#endif
 955
};

从上面可以看到f_pos,记录偏移值的,后面read的时候会用到,
大家好,我是Chinaunix的T-bagwell.下面继续说ppos,其实就是这个loff_t *ppos,这个是通过file_pos_read来或得到的,

381 static inline loff_t file_pos_read(struct file *file)
382
{
383
return file->f_pos;
384
}

这个f_pos在每一次read的时候,都有可能会改变偏移量,继续进入vfs_read去读文件:

295 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
296 {
297 ssize_t ret;
298
299 if (!(file->f_mode & FMODE_READ))
300 return -EBADF;
301 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
302 return -EINVAL;
303 if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
304 return -EFAULT;
305
306 ret = rw_verify_area(READ, file, pos, count);
307 if (ret >= 0) {
308 count = ret;
309 if (file->f_op->read)
310 ret = file->f_op->read(file, buf, count, pos);
311 else
312 ret = do_sync_read(file, buf, count, pos);
313 if (ret > 0) {
314 fsnotify_access(file);
315 add_rchar(current, ret);
316 }
317 inc_syscr(current);
318 }
319
320 return ret;
321 }

大家好,我是ChinaUnix的T-bagwell
先确认一下要读的文件是否可以去读,如果不让读或者不让写的话,就只能直接推出去了,否则可以继续上面的代码里面有两个read接口,一个是file的read,大家好,我是ChinaUnix的T-bagwell,转载请注明出处,一个是do_sync_read,下面直接说file->f_op里面的read,这个read是在写设备驱动的时候,或者文件系统加载的时候注册的read
下面看设备驱动部分的read

231 static ssize_t nvram_read(struct file *file, char __user *buf,
232 size_t count, loff_t *ppos)
233 {
234 unsigned char contents[NVRAM_BYTES];
235 unsigned i = *ppos;
236 unsigned char *tmp;
237
238 spin_lock_irq(&rtc_lock);
239
240 if (!__nvram_check_checksum())
241 goto checksum_err;
242
243 for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp)
244 *tmp = __nvram_read_byte(i);
245
246 spin_unlock_irq(&rtc_lock);
247
248 if (copy_to_user(buf, contents, tmp - contents))
249 return -EFAULT;
250
251 *ppos = i;
252
253 return tmp - contents;
254
255 checksum_err:
256 spin_unlock_irq(&rtc_lock);
257 return -EIO;
258 }

这里就不用多说了,ppos是有需要的,当然,有些设备驱动里面可以不用这个ppos,比如keyboard的驱动一类的只要一个值的,但是如果想获得很大的一段buffer的话,这个估计就有必要了。
接下来说do_sync_read文件,这个就要会想一下注册文件系统时,对fops的注册了
比如ext4文件系统里面,在ext4_file_super里面有个ext4_iget

struct inode *ext4_iget(struct super_block *sb, unsigned long ino)


在这个接口里面会有注册fops的操作:

5165 if (S_ISREG(inode->i_mode)) {
5166 inode
->i_op = &ext4_file_inode_operations;
5167 inode
->i_fop = &ext4_file_operations;
5168 ext4_set_aops
(inode);
5169
} else if (S_ISDIR(inode->i_mode)) {
5170 inode
->i_op = &ext4_dir_inode_operations;
5171 inode
->i_fop = &ext4_dir_operations;
5172
} else if (S_ISLNK(inode->i_mode)) {
5173
if (ext4_inode_is_fast_symlink(inode)) {
5174 inode
->i_op = &ext4_fast_symlink_inode_operations;
5175 nd_terminate_link
(ei->i_data, inode->i_size,
5176
sizeof(ei->i_data) - 1);
5177
} else {
5178 inode
->i_op = &ext4_symlink_inode_operations;
5179 ext4_set_aops
(inode);
5180
}
5181
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
5182 S_ISFIFO
(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
5183 inode
->i_op = &ext4_special_inode_operations;
5184
if (raw_inode->i_block[0])
5185 init_special_inode
(inode, inode->i_mode,
5186 old_decode_dev
(le32_to_cpu(raw_inode->i_block[0])));
5187
else
5188 init_special_inode
(inode, inode->i_mode,
5189 new_decode_dev
(le32_to_cpu(raw_inode->i_block[1])));
5190
} else {

在这个里面可以知道了,为什么这里的是inode呢?这个在open里面应该有对应的答案,接下来继续进继续看文件操作部分

133 const struct file_operations ext4_file_operations = {
134 .llseek = generic_file_llseek,
135 .read = do_sync_read,
136 .write = do_sync_write,
137 .aio_read = generic_file_aio_read,
138 .aio_write = ext4_file_write,
139 .unlocked_ioctl = ext4_ioctl,
140 #ifdef CONFIG_COMPAT
141 .compat_ioctl = ext4_compat_ioctl,
142 #endif
143 .mmap = ext4_file_mmap,
144 .open = ext4_file_open,
145 .release = ext4_release_file,
146 .fsync = ext4_sync_file,
147 .splice_read = generic_file_splice_read,
148 .splice_write = generic_file_splice_write,
149 };
150


其实文件操作就是和do_sync_read是一样的操作,最终会进入到generic_file_aio_read,里面
generic_file_aio_read里面就是从快设备里面读取内容了,到这里,如文件结束
上一篇:应用层创建socket,内核模块通过该socket发送数据包
下一篇:整理几道算法题