recvfrom函数

13880阅读 0评论2013-12-14 liubangbo
分类:LINUX

内核版本2.6.21.5

1. recvfrom 函数原型
    int recvfrom(int socket,  void *buffer,  size tsize,  int flags,  struct sockaddr *addr,  socklen t *length-ptr)
    这个void *buffer是进程用户空间地址,可以是栈上的地址,也可以是你用malloc创建的堆地址。struct sockaddr *addr,  socklen t *length-ptr 分别用来存储对端的地址信息和长度,注意这个长度用的是指针表示,别弄错了。

2. 看看这个函数的源代码

  

点击(此处)折叠或打开

  1. asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
  2.              unsigned flags, struct sockaddr __user *addr,
  3.              int __user *addr_len)
  4. {
  5.     struct socket *sock;
  6.     struct iovec iov; //缓冲区向量
  7.     struct msghdr msg;
  8.     char address[MAX_SOCK_ADDR]; //保存对端地址信息
  9.     int err, err2;
  10.     struct file *sock_file;
  11.     int fput_needed;

  12.     sock_file = fget_light(fd, &fput_needed);
  13.     err = -EBADF;
  14.     if (!sock_file)
  15.         goto out;

  16.     sock = sock_from_file(sock_file, &err);
  17.     if (!sock)
  18.         goto out_put;

  19.     msg.msg_control = NULL;
  20.     msg.msg_controllen = 0;
  21.     msg.msg_iovlen = 1;
  22.     msg.msg_iov = &iov;
  23.     iov.iov_len = size;
  24.     iov.iov_base = ubuf;
  25.     msg.msg_name = address;
  26.     msg.msg_namelen = MAX_SOCK_ADDR;
  27.     if (sock->file->f_flags & O_NONBLOCK) //看是否设置了非阻塞熟悉,默认是阻塞的,也就是如果没有数据包到来,进程会阻塞在这个系统调用里。
  28.         flags |= MSG_DONTWAIT;
  29.     err = sock_recvmsg(sock, &msg, size, flags);

  30.     if (err >= 0 && addr != NULL) {
  31.         err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);  //得到对端地址信息,返回给api
  32.         if (err2 < 0)
  33.             err = err2;
  34.     }
  35. out_put:
  36.     fput_light(sock_file, fput_needed);
  37. out:
  38.     return err;
  39. }
上面代码是与之对应的系统调用服务函数,直接看sock_recvmsg吧:


点击(此处)折叠或打开

  1. int sock_recvmsg(struct socket *sock, struct msghdr *msg,
  2.          size_t size, int flags)
  3. {
  4.     struct kiocb iocb;
  5.     struct sock_iocb siocb;
  6.     int ret;

  7.     init_sync_kiocb(&iocb, NULL);
  8.     iocb.private = &siocb;
  9.     ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
  10.     if (-EIOCBQUEUED == ret)
  11.         ret = wait_on_sync_kiocb(&iocb);
  12.     return ret;
  13. }


点击(此处)折叠或打开

  1. static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
  2.                  struct msghdr *msg, size_t size, int flags)
  3. {
  4.     int err;
  5.     struct sock_iocb *si = kiocb_to_siocb(iocb);

  6.     si->sock = sock;
  7.     si->scm = NULL;
  8.     si->msg = msg;
  9.     si->size = size;
  10.     si->flags = flags;

  11.     err = security_socket_recvmsg(sock, msg, size, flags);
  12.     if (err)
  13.         return err;

  14.     return sock->ops->recvmsg(iocb, sock, msg, size, flags);
  15. }

还是直接看

点击(此处)折叠或打开

  1. int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
  2.             struct msghdr *msg, size_t size, int flags)
  3. {
  4.     struct sock *sk = sock->sk;
  5.     int addr_len = 0;
  6.     int err;

  7.     err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
  8.                  flags & ~MSG_DONTWAIT, &addr_len);
  9.     if (err >= 0)
  10.         msg->msg_namelen = addr_len;
  11.     return err;
  12. }


点击(此处)折叠或打开

  1. int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
  2.         size_t len, int noblock, int flags, int *addr_len)
  3. {
  4.     struct inet_sock *inet = inet_sk(sk);
  5.     struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; //store peer address
  6.     struct sk_buff *skb;
  7.     int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);

  8.     /*
  9.      *    Check any passed addresses
  10.      */
  11.     if (addr_len)
  12.         *addr_len=sizeof(*sin);

  13.     if (flags & MSG_ERRQUEUE) /* Fetch message from error queue. */
  14.         return ip_recv_error(sk, msg, len);

  15. try_again:
  16.     skb = skb_recv_datagram(sk, flags, noblock, &err); //这个函数取得了skb
  17.     if (!skb)
  18.         goto out;

  19.     copied = skb->len - sizeof(struct udphdr); //如果收到的data大于用户想要的长度,截断处理
  20.     if (copied > len) {
  21.         copied = len;
  22.         msg->msg_flags |= MSG_TRUNC; //例如,用户想要100个字节,skb中确有1000字节,那么只从skb拷贝出前100个字节,并设置标志MSG_TRUNC
  23.     }

  24.     /*
  25.      *     Decide whether to checksum and/or copy data.
  26.      *
  27.      *     UDP: checksum may have been computed in HW,
  28.      *      (re-)compute it if message is truncated.
  29.      *     UDP-Lite: always needs to checksum, no HW support.
  30.      */
  31.     copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);

  32.     if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
  33.         if (__udp_lib_checksum_complete(skb))
  34.             goto csum_copy_err;
  35.         copy_only = 1;
  36.     }

  37.     if (copy_only)
  38.         err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), //在这里完成了data的拷贝
  39.                      msg->msg_iov, copied );
  40.     else {
  41.         err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);

  42.         if (err == -EINVAL)
  43.             goto csum_copy_err;
  44.     }

  45.     if (err)
  46.         goto out_free;

  47.     sock_recv_timestamp(msg, sk, skb); //打上时间戳

  48.     /* Copy the address. */
  49.     if (sin)
  50.     {
  51.         sin->sin_family = AF_INET;
  52.         sin->sin_port = skb->h.uh->source;
  53.         sin->sin_addr.s_addr = skb->nh.iph->saddr; //在这里取得对端的IP地址和端口号
  54.         memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
  55.     }
  56.     if (inet->cmsg_flags)
  57.         ip_cmsg_recv(msg, skb);

  58.     err = copied;
  59.     if (flags & MSG_TRUNC)
  60.         err = skb->len - sizeof(struct udphdr);

  61. out_free:
  62.     skb_free_datagram(sk, skb);
  63. out:
  64.     return err;   //err返回的是拷贝的字节数

  65. csum_copy_err:
  66.     UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);

  67.     skb_kill_datagram(sk, skb, flags);

  68.     if (noblock)
  69.         return -EAGAIN;
  70.     goto try_again;
  71. }

3. 从上面的代码中我们看到有2个重要的函数:skb_recv_datagram和skb_copy_datagram_iovec,一个是取得skb, 一个是从skb中拷贝数据


点击(此处)折叠或打开

  1. struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
  2.                  int noblock, int *err)
  3. {
  4.     struct sk_buff *skb;
  5.     long timeo;
  6.     /*
  7.      * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
  8.      */
  9.     int error = sock_error(sk);

  10.     if (error)
  11.         goto no_packet;

  12.     timeo = sock_rcvtimeo(sk, noblock); //如果设置为非阻塞模式timeo为0,否则是你用setsockopt设置的时间,没设置
  13.                                         //的话默认是一直阻塞,在创建socket的时候sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;

  14.     do {
  15.         /* Again only user level code calls this function, so nothing
  16.          * interrupt level will suddenly eat the receive_queue.
  17.          *
  18.          * Look at current nfs client by the way...
  19.          * However, this function was corrent in any case. 8)
  20.          */
  21.         if (flags & MSG_PEEK) {
  22.             unsigned long cpu_flags;

  23.             spin_lock_irqsave(&sk->sk_receive_queue.lock,
  24.                      cpu_flags);
  25.             skb = skb_peek(&sk->sk_receive_queue);
  26.             if (skb)
  27.                 atomic_inc(&skb->users);
  28.             spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
  29.                      cpu_flags);
  30.         } else
  31.             skb = skb_dequeue(&sk->sk_receive_queue); //一目了然,从接受队列中取得skb,在这里还记得我们把skb挂入到sk_receive_queue队列?

  32.         if (skb)
  33.             return skb; //不管是阻塞还是非阻塞,有skb直接返回, ideal

  34.         /* User doesn't want to wait */
  35.         error = -EAGAIN;
  36.         if (!timeo) //non_block直接返回或指定的时间减少到0了,不等了
  37.             goto no_packet;

  38.     } while (!wait_for_packet(sk, err, &timeo)); //只有在这里等了,等待一个指定时间,或痴情的一直等下去...

  39.     return NULL;

  40. no_packet:
  41.     *err = error;
  42.     return NULL;
  43. }

点击(此处)折叠或打开

  1. /*
  2.  * Wait for a packet..
  3.  */
  4. static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
  5. {
  6.     int error;
  7.     DEFINE_WAIT(wait);

  8.     prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); //把当前进程挂入socket等待队列中sk->sk_sleep

  9.     /* Socket errors? */
  10.     error = sock_error(sk);
  11.     if (error)
  12.         goto out_err;

  13.     if (!skb_queue_empty(&sk->sk_receive_queue)) //看是否有skb,有的话直接error = 0 return 去接收skb
  14.         goto out;

  15.     /* Socket shut down? */
  16.     if (sk->sk_shutdown & RCV_SHUTDOWN) //看在等待的时候,socket是否被shut down了。如shut down 了error = 1; 不接收skb了
  17.         goto out_noerr;

  18.     /* Sequenced packets can come disconnected.
  19.      * If so we report the problem
  20.      */
  21.     error = -ENOTCONN;
  22.     if (connection_based(sk) && //看这里是对tcp的处理,先不管
  23.      !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
  24.         goto out_err;

  25.     /* handle signals */
  26.     if (signal_pending(current)) //当等待的时候来了中断...
  27.         goto interrupted;

  28.     error = 0;
  29.     *timeo_p = schedule_timeout(*timeo_p); //如果指定了时间,减少时间
  30. out:
  31.     finish_wait(sk->sk_sleep, &wait);
  32.     return error;
  33. interrupted:
  34.     error = sock_intr_errno(*timeo_p); //返回ERESTARTSYS或EINTR给应用层处理,我们在应用层要对这个中断信号进程处理,一般的话就是继续这个系统调用:
  35.                                        //if(errno == EINTR) continue;
  36. out_err:
  37.     *err = error;
  38.     goto out;
  39. out_noerr:
  40.     *err = 0;
  41.     error = 1;
  42.     goto out;
  43. }
上面2个代码段是取得skb的过程,从中我们看到涉及到我们在应用层设置非阻塞,此系统调用返回时间,对中断信号的处理等问题,下面我们看看数据拷贝过程吧:

点击(此处)折叠或打开

  1. int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
  2.              struct iovec *to, int len)
  3. {
  4.     int start = skb_headlen(skb); //这个是数据包总长度减去分片中数据包的长度
  5.     int i, copy = start - offset;

  6.     /* Copy header. */
  7.     if (copy > 0) {     //如果没有分片的话,在这里就一次拷贝完了,如果有分片的话,就拷贝第一个分片
  8.         if (copy > len)
  9.             copy = len;
  10.         if (memcpy_toiovec(to, skb->data + offset, copy))
  11.             goto fault;
  12.         if ((len -= copy) == 0)
  13.             return 0;
  14.         offset += copy;
  15.     }

  16.     /* Copy paged appendix. Hmm... why does this look so complicated? */ //在这里拷贝其他分片中的数据包,关于分片以后再说吧...
  17.     for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  18.         int end;

  19.         BUG_TRAP(start <= offset + len);

  20.         end = start + skb_shinfo(skb)->frags[i].size;
  21.         if ((copy = end - offset) > 0) {
  22.             int err;
  23.             u8 *vaddr;
  24.             skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  25.             struct page *page = frag->page;

  26.             if (copy > len)
  27.                 copy = len;
  28.             vaddr = kmap(page);
  29.             err = memcpy_toiovec(to, vaddr + frag->page_offset +
  30.                      offset - start, copy);
  31.             kunmap(page);
  32.             if (err)
  33.                 goto fault;
  34.             if (!(len -= copy))
  35.                 return 0;
  36.             offset += copy;
  37.         }
  38.         start = end;
  39.     }

  40.     if (skb_shinfo(skb)->frag_list) {
  41.         struct sk_buff *list = skb_shinfo(skb)->frag_list;

  42.         for (; list; list = list->next) {
  43.             int end;

  44.             BUG_TRAP(start <= offset + len);

  45.             end = start + list->len;
  46.             if ((copy = end - offset) > 0) {
  47.                 if (copy > len)
  48.                     copy = len;
  49.                 if (skb_copy_datagram_iovec(list,
  50.                              offset - start,
  51.                              to, copy))
  52.                     goto fault;
  53.                 if ((len -= copy) == 0)
  54.                     return 0;
  55.                 offset += copy;
  56.             }
  57.             start = end;
  58.         }
  59.     }
  60.     if (!len)
  61.         return 0;

  62. fault:
  63.     return -EFAULT;
  64. }



over ...

上一篇:bind函数
下一篇:setsockopt函数