select 系统调用分析

2472阅读 2评论2012-04-22 datao0907
分类:LINUX

select 系统调用简单分析:

点击(此处)折叠或打开

  1. 函数的入口定义如下:
  2. SYSCALL_DEFINES(select, int n,fd_set __user *,inp,fd_set __user *,outp,fd_set __user *,exp,struct timeval __user *,tvp)
  3. {
  4.     struct timespec end_time,*to = NULL;
  5.     struct timeval tv;
  6.     int ret;
  7.     
  8.     if(tvp) {
  9.         if(copy_from_user(&tv,tvp,sizeof(tv)))
  10.             return -FAULT;
  11.         to = &end_time;
  12.         //获取绝对超时时间
  13.         if(poll_select_set_timeout(to,
  14.             tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  15.             (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
  16.         return -EINVAL;
  17.     }

  18.     ret = core_sys_select(n,inp,outp,exp,to);
  19.     ret = poll_select_copy_remaining(&end_time,tvp,1,ret);
  20.     
  21.     return ret;
  22. }

  23. int poll_select_set_timeout(struct timespec *to,long sec,long nsec)
  24. {
  25.     struct timespec ts = {.tv_sec = sec,.tv_nsec = nsec};
  26.     
  27.     if(!timespec_valid(&ts))
  28.         return – EINVAL;
  29.     if(!sec && !nsec) {
  30.         to->tv_sec = to->tv_nsec = 0;
  31.     } else {
  32.         ktime_get_ts(to);
  33.         *to = timespec_add_safe(*to,ts);
  34.     }

  35.     return 0;
  36. }
  37. //fd_set其实就是一个大小为32(32位系统)的长整性数组,这也就说明select最多只能注册32*32(1024)个fd
  38. int core_sys_select(int n,fd_set __user *inp,fd_set __user *outp,fd_set __user *exp,
  39.         struct timespec *end_time)
  40. {
  41.     /* typedef struct {
  42.                  unsigned long *in,*out,*ex;
  43.                  unsigned long *res_in,*res_out,*res_ex;
  44.                  }fd_set_bits;
  45.      */            
  46.     fd_set_bits fds;
  47.     void *bits;
  48.     int ret,max_fds;
  49.     unsigned int size;
  50.     /*
  51.      struct fdtable {
  52.             unsigned int max_fds;
  53.      struct file_rcu **fd; //current fd array
  54.              fd_set *close_on_exec;
  55.             fd_set *open_fdt;
  56.              struct rcu_head rcu;
  57.      struct fdtable *next;
  58.              };
  59.     
  60.      **/
  61.     struct fdtable *fdt;
  62.     long stack_fds[SELECT_STACK_ALLOC / sizeof(long)];
  63.     
  64.     ret = -EINVAL;
  65.     if(n < 0)
  66.         goto out_nofds;
  67.     
  68.     rcu_read_lock();
  69.     //获取对应进程的文件打开表
  70.     fdt = file_fdtable(current->files);
  71.     max_fds = fdt->max_fds;
  72.     rcu_read_unlock();
  73.     if(n > max_fds)
  74.         n = max_fds;
  75.     //通过最大的文件fd,找到总共需要的数组大小,并为指针开辟内存
  76.     size = FDS_BYTES(n);
  77.     bits = stack_fds;
  78.     if(size > sizeof(stack_fds) / 6) {
  79.         ret = -ENOMEM;
  80.         bits = kmalloc(6 * size,GFP_KERNEL);
  81.         if(!bits)
  82.             goto out_nofds;
  83.     }
  84.     fds.in = bits;
  85.     fds.out = bits + size;
  86.     fds.ex = bits + 2 * size;
  87.     fds.res_in = bits + 3 * size;
  88.     fds.res_out = bits + 4 * size;
  89.     fds.res_ex = bits + 5 * size;
  90.     //拷贝至内核态
  91.     if((ret = get_fd_set(n,inp,fds.in)) ||
  92.      (ret = get_fd_set(n,outp,fds.out)) ||
  93.      (ret = get_fd_set(n,exp,fds.ex)))
  94.             goto out;
  95.     
  96.     zero_fd_set(n,fds.res_in);
  97.     zero_fd_set(n,fds.res_out);
  98.     zero_fd_set(n,fds.res_ex);
  99.     //将用户态的fd拷贝至内核态后开始进行处理
  100.     ret = do_select(n,&fds,end_time);
  101.     
  102.     if(ret < 0)
  103.         goto out;
  104.     if(!ret) {
  105.         ret = -ERESTARTNOHAND;
  106.         if(signal_pending(current))
  107.             goto out;
  108.         ret = 0;
  109.     }
  110.     //将结果拷贝至用户态,只要出现错误就立即返回
  111.     if(set_fd_set(n,inp,fds.res_in) ||
  112.     set_fd_set(n,outp,fds.res_out) ||
  113.     set_fd_set(n,exp,fds.res_ex))
  114.         ret = -EFAULT;

  115. out:
  116.     if(bits != stack_fds) //通过kmalloc分配所得,就进行释放
  117.         kfree(bits);
  118. out_nofds:
  119.     retun ret;
  120. }
核心代码如下:

点击(此处)折叠或打开

  1. int do_select(int n,fd_set_bits *fds,struct timespec *end_time)
  2. {
  3.     ktime_t expire,*to = NULL;
  4.     /*
  5.     typedef struct poll_table_struct {
  6.         poll_queue_proc qproc;
  7.         unsigned long key;
  8.     } poll_table;
  9.     struct page_table_entry {
  10.         struct file *filp;
  11.         unsigned long key;
  12.         wait_queue_t wait;
  13.         wait_queue_head_t *wait_address;
  14.     }
  15.     struct poll_table_page {
  16.         struct poll_table_page *next;
  17.         struct poll_table_entry *entry;
  18.         struct poll_table_entry entries[0];
  19.     }
  20.      struct poll_wqueue {
  21.         poll_table pt;
  22.         struct poll_table_page *table;
  23.         struct task_struct *polling_task;
  24.         int triggered;
  25.         int error;
  26.         int inline_index;
  27.         //N_INLINE_POLL_ENTRIES 18(32位系统)
  28.         struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
  29.     }    
  30.      **/
  31.     struct poll_wqueue table;
  32.     poll_table *wait;
  33.     int retval,i,timed_out = 0;
  34.     unsigned long slack = 0;
  35.     
  36.     rcu_read_lock();
  37.     retval = max_select_fd(n,fds);
  38.     rcu_read_unlock();

  39.     if(retval < 0)
  40.         return retval;
  41.     n = retval;
  42.     
  43.     poll_initwait(&table);
  44.     wait = & table.pt;
  45.     if(end_time && !end_time->tv_sec && !end_time->tv_nsec) {
  46.             wait = NULL;
  47.             timed_out = 1;
  48.     }

  49.     if(end_time && !time_out)
  50.         slack = select_estimate_accuracy(end_time);
  51.     retval = 0;
  52.     for(;;) {
  53.         unsigned long *rinp,*routp,*rexp,*inp,*oup,*exp;
  54.         
  55.         inp = fds->in;outp = fds->out;exp = fds->ex;
  56.         rinp = fds->res_in;routp = fds->res_out;rexp = fds->res_ex;
  57.         
  58.         for(i = 0;i < n;++rinp,++routp,++rexp) {
  59.             unsigned long in,out,ex,all_bits,bit = 1,mask,j;
  60.             unsigned long res_in = 0,res_out = 0,res_ex = 0;
  61.             const struct file_operations *f_op = NULL;
  62.             struct file *file = NULL;
  63.             //获取一个fd
  64.             in = *inp ++; out = *oup++;ex = *exp ++;
  65.             all_bits = in | out | ex;
  66.             if(all_bits == 0) {    //三个都没有注册,则直接进行下一个long
  67.                 i += __NFDBITS;
  68.                 continue;
  69.             }
  70.             for(j = 0;j < __NFDBITS;++j,++i,bit << =1) {
  71.                 int fput_needed;
  72.                 if(i >= n)
  73.                         break;
  74.                 if(!(bit & all_bits))
  75.                     continue;
  76.                 //增加对fd的引用计数,获取文件对象
  77.                 file = fget_light(i,&fput_needed);
  78.                 if(file) {
  79.                     f_op = file->f_op;
  80.                     mask = DEFAULT_POLLMASK;
  81.                     if(f_op && f_op->poll) {
  82.                     //获取用户态注册的事件
  83.                     wait_key_set(wait,in,out,bit);
  84.                     //调用对应文件系统的poll驱动,如果没有事件产生就会进行睡眠等待,
  85.                     //所有的文件poll驱动都会睡眠在同一对象wait上面
  86.                     mask = (*f_op->poll)(file,wait);
  87.                 }
  88.                 fput_light(file,fput_needed);
  89.                 if((mask & POLLIN_SET) && (in & bit)) {
  90.                     res_in |= bit;
  91.                     retval ++;
  92.                     wait = NULL;
  93.                 }
  94.                 if((mask & POLLOUT_SET) && (out & bit)) {
  95.                     res_out |= bit;
  96.                     retval ++;
  97.                     wait = NULL;
  98.                 }
  99.                 if((mask & POLLEX_SET) && (ex & bit)) {
  100.                     res_ex |= bit;
  101.                     retval ++;
  102.                     wait = NULL;
  103.                 }
  104.                 }
  105.             }
  106.         //将结果返回至数组中
  107.         if(res_in)
  108.             *rinp = res_in;
  109.         if(res_out)
  110.             *routp = res_out;
  111.         if(res_ex)
  112.             *rexp = res_ex;
  113.         //如果有抢占进程,则让其运行
  114.         cond_resched();    
  115.         }
  116.     wait = NULL;
  117.     //一有返回事件或超时或被信号中断就立即返回
  118.     if(ret_val || timed_out || signal_pending(current))
  119.         break;
  120.     if(table.error) {
  121.         retal = table.error;
  122.         break;
  123.     }
  124.     if(end_time && !to) { //更新剩余过期时间
  125.         expire = timespec_to_ktime(*end_time);
  126.         to = &expire;
  127.     }
  128.     //睡眠(to,to+slack)之间直到超时
  129.     if(!poll_schedule_timeout(&table,TASK_INTERRUPTIBLE,
  130.         to,slack))
  131.         timed_out = 1;
  132.     }
  133.     poll_freewait(&table);//释放资源
  134.     return retval;
  135. }
参考资料:
1.linux-3.3.2 
2.linux 设备驱动程序


上一篇:Fibonacci 堆
下一篇:C++ Slab分配器的实现

文章评论