read一个文件
首先是通过系统调用open一个文件
大家好,我是ChinaUnix的T-bagwell
然后通过系统调用去read一个文件,为什么man 2 read的时候或者man 2 write的时候的参数与写的驱动的read和write里面定义的函数看上去不同呢?
ssize_t read(int fd, void *buf, size_t count);
ssize_t write(int fd, const void *buf, size_t count);
|
下面是driver/nvram.c里面的
static ssize_t nvram_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
static ssize_t nvram_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
|
下面就以说read为例就可以了
391 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
392 {
393 struct file *file;
394 ssize_t ret = -EBADF;
395 int fput_needed;
396
397 file = fget_light(fd, &fput_needed);
398 if (file) {
399 loff_t pos = file_pos_read(file);
400 ret = vfs_read(file, buf, count, &pos);
401 file_pos_write(file, pos);
402 fput_light(file, fput_needed);
403 }
404
405 return ret;
406 }
|
通过阅读代码,发现这个系统调用read与man看到的系统调用的定义的是相同的,没有这里可以没有疑问,但是这个比nvram.有些不同,其实操作都是在这个系统调用里面,struct
file *file结构里面的file是通过这个fget_light来或得到的,这个file结构如下:
918 struct file {
919 /*
920 * fu_list becomes invalid after file_free is called and queued via
921 * fu_rcuhead for RCU freeing
922 */
923 union {
924 struct list_head fu_list;
925 struct rcu_head fu_rcuhead;
926 } f_u;
927 struct path f_path;
928 #define f_dentry f_path.dentry
929 #define f_vfsmnt f_path.mnt
930 const struct file_operations *f_op;
931 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
932 atomic_long_t f_count;
933 unsigned int f_flags;
934 fmode_t f_mode;
935 loff_t f_pos;
936 struct fown_struct f_owner;
937 const struct cred *f_cred;
938 struct file_ra_state f_ra;
939
940 u64 f_version;
941 #ifdef CONFIG_SECURITY
942 void *f_security;
943 #endif
944 /* needed for tty driver, and maybe others */
945 void *private_data;
946
947 #ifdef CONFIG_EPOLL
948 /* Used by fs/eventpoll.c to link all the hooks to this file */
949 struct list_head f_ep_links;
950 #endif /* #ifdef CONFIG_EPOLL */
951 struct address_space *f_mapping;
952 #ifdef CONFIG_DEBUG_WRITECOUNT
953 unsigned long f_mnt_write_state;
954 #endif
955 };
|
从上面可以看到f_pos,记录偏移值的,后面read的时候会用到,大家好,我是Chinaunix的T-bagwell.下面继续说ppos,其实就是这个loff_t *ppos,这个是通过file_pos_read来或得到的,
381 static inline loff_t file_pos_read(struct file *file)
382 {
383 return file->f_pos;
384 }
|
这个f_pos在每一次read的时候,都有可能会改变偏移量,继续进入vfs_read去读文件:
295 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
296 {
297 ssize_t ret;
298
299 if (!(file->f_mode & FMODE_READ))
300 return -EBADF;
301 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
302 return -EINVAL;
303 if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
304 return -EFAULT;
305
306 ret = rw_verify_area(READ, file, pos, count);
307 if (ret >= 0) {
308 count = ret;
309 if (file->f_op->read)
310 ret = file->f_op->read(file, buf, count, pos);
311 else
312 ret = do_sync_read(file, buf, count, pos);
313 if (ret > 0) {
314 fsnotify_access(file);
315 add_rchar(current, ret);
316 }
317 inc_syscr(current);
318 }
319
320 return ret;
321 }
|
大家好,我是ChinaUnix的T-bagwell
先确认一下要读的文件是否可以去读,如果不让读或者不让写的话,就只能直接推出去了,否则可以继续上面的代码里面有两个read接口,一个是file的read,大家好,我是ChinaUnix的T-bagwell,转载请注明出处,一个是do_sync_read,下面直接说file->f_op里面的read,这个read是在写设备驱动的时候,或者文件系统加载的时候注册的read
下面看设备驱动部分的read
231 static ssize_t nvram_read(struct file *file, char __user *buf,
232 size_t count, loff_t *ppos)
233 {
234 unsigned char contents[NVRAM_BYTES];
235 unsigned i = *ppos;
236 unsigned char *tmp;
237
238 spin_lock_irq(&rtc_lock);
239
240 if (!__nvram_check_checksum())
241 goto checksum_err;
242
243 for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp)
244 *tmp = __nvram_read_byte(i);
245
246 spin_unlock_irq(&rtc_lock);
247
248 if (copy_to_user(buf, contents, tmp - contents))
249 return -EFAULT;
250
251 *ppos = i;
252
253 return tmp - contents;
254
255 checksum_err:
256 spin_unlock_irq(&rtc_lock);
257 return -EIO;
258 }
|
这里就不用多说了,ppos是有需要的,当然,有些设备驱动里面可以不用这个ppos,比如keyboard的驱动一类的只要一个值的,但是如果想获得很大的一段buffer的话,这个估计就有必要了。
接下来说do_sync_read文件,这个就要会想一下注册文件系统时,对fops的注册了
比如ext4文件系统里面,在ext4_file_super里面有个ext4_iget
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
在这个接口里面会有注册fops的操作:
5165 if (S_ISREG(inode->i_mode)) {
5166 inode->i_op = &ext4_file_inode_operations;
5167 inode->i_fop = &ext4_file_operations;
5168 ext4_set_aops(inode);
5169 } else if (S_ISDIR(inode->i_mode)) {
5170 inode->i_op = &ext4_dir_inode_operations;
5171 inode->i_fop = &ext4_dir_operations;
5172 } else if (S_ISLNK(inode->i_mode)) {
5173 if (ext4_inode_is_fast_symlink(inode)) {
5174 inode->i_op = &ext4_fast_symlink_inode_operations;
5175 nd_terminate_link(ei->i_data, inode->i_size,
5176 sizeof(ei->i_data) - 1);
5177 } else {
5178 inode->i_op = &ext4_symlink_inode_operations;
5179 ext4_set_aops(inode);
5180 }
5181 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
5182 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
5183 inode->i_op = &ext4_special_inode_operations;
5184 if (raw_inode->i_block[0])
5185 init_special_inode(inode, inode->i_mode,
5186 old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
5187 else
5188 init_special_inode(inode, inode->i_mode,
5189 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
5190 } else {
|
在这个里面可以知道了,为什么这里的是inode呢?这个在open里面应该有对应的答案,接下来继续进继续看文件操作部分
133 const struct file_operations ext4_file_operations = {
134 .llseek = generic_file_llseek,
135 .read = do_sync_read,
136 .write = do_sync_write,
137 .aio_read = generic_file_aio_read,
138 .aio_write = ext4_file_write,
139 .unlocked_ioctl = ext4_ioctl,
140 #ifdef CONFIG_COMPAT
141 .compat_ioctl = ext4_compat_ioctl,
142 #endif
143 .mmap = ext4_file_mmap,
144 .open = ext4_file_open,
145 .release = ext4_release_file,
146 .fsync = ext4_sync_file,
147 .splice_read = generic_file_splice_read,
148 .splice_write = generic_file_splice_write,
149 };
150
|
其实文件操作就是和do_sync_read是一样的操作,最终会进入到generic_file_aio_read,里面
generic_file_aio_read里面就是从快设备里面读取内容了,到这里,如文件结束