bind函数

2820阅读 0评论2013-12-12 liubangbo
分类:LINUX

内核版本:2.6.21.5
相信接触linux网络编程工程师对这个函数也很熟悉,还是让我们从源代码的角度看看这个函数到底做了什么,函数注释是这么说的:
Bind a name to a socket. Nothing much to do here since it's the protocol's responsibility to handle the local address.  在看这个函数之前,先看一下怎么使用这个api:
struct _in servaddr;
sockfd = (, , 0); /* create a  */

/* init servaddr */ (&servaddr, sizeof(servaddr));
servaddr.sin_family = ;
servaddr.s.s_addr = ();  //通常我们用

下面来具体看一下这个函数吧
1.

点击(此处)折叠或打开

  1. /*
  2.  *    Bind a name to a socket. Nothing much to do here since it's
  3.  *    the protocol's responsibility to handle the local address.
  4.  *
  5.  *    We move the socket address to kernel space before we call
  6.  *    the protocol layer (having also checked the address is ok).
  7.  */

  8. asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
  9. {
  10.     struct socket *sock;
  11.     char address[MAX_SOCK_ADDR];
  12.     int err, fput_needed;

  13.     sock = sockfd_lookup_light(fd, &err, &fput_needed);    //不具体看这个函数了,但我们知道通过fd,我们得到了这个socket结构
  14.     if(sock) {
  15.         err = move_addr_to_kernel(umyaddr, addrlen, address);  //这个函数是怎么实现的,还是以后再说
  16.         if (err >= 0) {
  17.             err = security_socket_bind(sock,
  18.                          (struct sockaddr *)address,
  19.                          addrlen);
  20.             if (!err)
  21.                 err = sock->ops->bind(sock,        //还记得在创建socket的时候,挂入的回调函数吧:inet_bind
  22.                          (struct sockaddr *)
  23.                          address, addrlen);
  24.         }
  25.         fput_light(sock->file, fput_needed);
  26.     }
  27.     return err;
  28. }
从上面的代码可以看到,进入bind系统调用后,根据fd得到socket,直接调用bind的回调函数,直接看inet_bind:
2.

点击(此处)折叠或打开

  1. int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
  2. {
  3.     struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;  //这个是我们调用bind api 传入的
  4.                                                             
  5.     struct sock *sk = sock->sk;   //通过struct socket结构,直接得到struct sock和struct inet_sock结构
  6.     struct inet_sock *inet = inet_sk(sk);
  7.     unsigned short snum;
  8.     int chk_addr_ret;
  9.     int err;

  10.     /* If the socket has its own bind function then use it. (RAW) */
  11.     if (sk->sk_prot->bind) {                                 //如果创建的是RAW类型的socket
  12.         err = sk->sk_prot->bind(sk, uaddr, addr_len);
  13.         goto out;
  14.     }
  15.     err = -EINVAL;
  16.     if (addr_len < sizeof(struct sockaddr_in))
  17.         goto out;

  18.     chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); //通过这个函数能得到传入IP地址的类型,是单播地址,广播地址,多播地址等,有好多类型呢

  19.     /* Not specified by any standard per-se, however it breaks too
  20.      * many applications when removed. It is unfortunate since
  21.      * allowing applications to make a non-local bind solves
  22.      * several problems with systems using dynamic addressing.
  23.      * (ie. your servers still start up even if your ISDN link
  24.      * is temporarily down)
  25.      */
  26.     err = -EADDRNOTAVAIL;
  27.     if (!sysctl_ip_nonlocal_bind &&            //这个从注释上看,没明白,但从代码上看,我们设置一下sysctl_ip_nonlocal_bind等参数在这里bind就失败了
  28.      !inet->freebind &&
  29.      addr->sin_addr.s_addr != INADDR_ANY &&
  30.      chk_addr_ret != RTN_LOCAL &&
  31.      chk_addr_ret != RTN_MULTICAST &&
  32.      chk_addr_ret != RTN_BROADCAST)
  33.         goto out;

  34.     snum = ntohs(addr->sin_port); //取得端口号
  35.     err = -EACCES;
  36.     if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) //在这里我们看到,端口号是不能随便填的,得给知名端口号让路...
  37.         goto out;

  38.     /* We keep a pair of addresses. rcv_saddr is the one
  39.      * used by hash lookups, and saddr is used for transmit.
  40.      *
  41.      * In the BSD API these are the same except where it
  42.      * would be illegal to use them (multicast/broadcast) in
  43.      * which case the sending device address is used.
  44.      */
  45.     lock_sock(sk);

  46.     /* Check these errors (active socket, double bind). */
  47.     err = -EINVAL;
  48.     if (sk->sk_state != TCP_CLOSE || inet->num) //这里检查连接的状态,对TCP来说在timewait状态时就不能绑定了,所以
  49.                                                 //在2MSL时间后才可以绑定,这就是在server端你close socket后,过一段时间才可以绑定的原因
  50.                                                 //inet->num 这个是检查是否重复绑定,现在内核TCP/IP协议栈好像已支持端口重复绑定了,以后再分析,bind失败,大部分都是在这里引起的
  51.         goto out_release_sock;

  52.     inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;  //在这里一般是0,也就是调用bind api的时候我们用了servaddr.s.s_addr = (); 
  53.                                                             //从注释上看rcv_saddr is the one used by hash lookups, and saddr is used for transmit.
  54.                                                             //当我们填上本机IP地址的时候,这2个地址就用到了,用的时候再分析
  55.     if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
  56.         inet->saddr = 0; /* Use device */

  57.     /* Make sure we are allowed to bind here. */
  58.     if (sk->sk_prot->get_port(sk, snum)) {   //bind这个函数的作用体现在这里,这个函数一调用,我们就能通过端口号找到这个socket了,稍后我们分析这个回调函数是怎么个情况
  59.         inet->saddr = inet->rcv_saddr = 0;
  60.         err = -EADDRINUSE;
  61.         goto out_release_sock;
  62.     }

  63.     if (inet->rcv_saddr)
  64.         sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 
  65.     if (snum)
  66.         sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
  67.     inet->sport = htons(inet->num); //这个是服务端的端口号,你绑定的
  68.     inet->daddr = 0; //这个是要从对端发来的数据包中提取的IP地址
  69.     inet->dport = 0; //这个是要从对端发来的数据包中提取的端口号
  70.     sk_dst_reset(sk);
  71.     err = 0;
  72. out_release_sock:
  73.     release_sock(sk);
  74. out:
  75.     return err;
  76. }
从上面的代码中,我们看到sk->sk_prot->get_port(sk, snum) 这里是关键,看看这个回调函数udp_v4_get_port:
3.
static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
    return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
}

__inline__ int udp_get_port(struct sock *sk, unsigned short snum,
            int (*scmp)(const struct sock *, const struct sock *))
{
    return  __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
}
还是直接分析__udp_lib_get_port函数,但我们要注意第3个参数udp_hash,先看看这个东东:
#define UDP_HTABLE_SIZE        128
struct hlist_head udp_hash[UDP_HTABLE_SIZE];  //是个哈希数组
DEFINE_RWLOCK(udp_hash_lock);


点击(此处)折叠或打开

  1. /**
  2.  * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
  3.  *
  4.  * @sk: socket struct in question
  5.  * @snum: port number to look up
  6.  * @udptable: hash list table, must be of UDP_HTABLE_SIZE
  7.  * @port_rover: pointer to record of last unallocated port
  8.  * @saddr_comp: AF-dependent comparison of bound local IP addresses
  9.  */
  10. int __udp_lib_get_port(struct sock *sk, unsigned short snum,
  11.          struct hlist_head udptable[], int *port_rover,
  12.          int (*saddr_comp)(const struct sock *sk1,
  13.                      const struct sock *sk2 ) )
  14. {
  15.     struct hlist_node *node;
  16.     struct hlist_head *head;
  17.     struct sock *sk2;
  18.     int error = 1;

  19.     write_lock_bh(&udp_hash_lock);
  20.     if (snum == 0) {     //先不看这里
  21.         int best_size_so_far, best, result, i;

  22.         if (*port_rover > sysctl_local_port_range[1] ||
  23.          *port_rover < sysctl_local_port_range[0])
  24.             *port_rover = sysctl_local_port_range[0];
  25.         best_size_so_far = 32767;
  26.         best = result = *port_rover;
  27.         for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
  28.             int size;

  29.             head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
  30.             if (hlist_empty(head)) {
  31.                 if (result > sysctl_local_port_range[1])
  32.                     result = sysctl_local_port_range[0] +
  33.                         ((result - sysctl_local_port_range[0]) &
  34.                          (UDP_HTABLE_SIZE - 1));
  35.                 goto gotit;
  36.             }
  37.             size = 0;
  38.             sk_for_each(sk2, node, head) {
  39.                 if (++size >= best_size_so_far)
  40.                     goto next;
  41.             }
  42.             best_size_so_far = size;
  43.             best = result;
  44.         next:
  45.             ;
  46.         }
  47.         result = best;
  48.         for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
  49.             if (result > sysctl_local_port_range[1])
  50.                 result = sysctl_local_port_range[0]
  51.                     + ((result - sysctl_local_port_range[0]) &
  52.                      (UDP_HTABLE_SIZE - 1));
  53.             if (! __udp_lib_lport_inuse(result, udptable))
  54.                 break;
  55.         }
  56.         if (i >= (1 << 16) / UDP_HTABLE_SIZE)
  57.             goto fail;
  58. gotit:
  59.         *port_rover = snum = result;
  60.     } else {  
  61.         head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; //以端口号为key找到head

  62.         sk_for_each(sk2, node, head)
  63.             if (sk2->sk_hash == snum &&     //在这里哈希冲突了(当我们用创建的不同socket去bind相同的端口号就出现了此情况),就判断这个端口号是否已经绑定了等其他条件,如还冲突就goto fail;
  64.              sk2 != sk &&       //这个比较就看这2个socket是否是同一个socket
  65.              (!sk2->sk_reuse || !sk->sk_reuse) &&  //这个比较就是对应的SO_REUSEADDR 端口复用属性,如果不设置这个属性,2个有相同端口号的socket第二个bind会失败
  66.              (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
  67.              || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
  68.              (*saddr_comp)(sk, sk2) )
  69.                 goto fail;
  70.     }
  71.     inet_sk(sk)->num = snum; //在这里把端口号赋值给inet_sk->num字段,也就是我们说的bind端口号了
  72.     sk->sk_hash = snum;      //赋值给哈希key
  73.     if (sk_unhashed(sk)) {
  74.         head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
  75.         sk_add_node(sk, head);       //在这里把挂入sock节点,也就是通过端口号把socket关联起来了,在前面文章的分析中我们知道通过接收到的skb中的目的端口号我们就可以找到处理这个skb的socket
  76.                                      //然后就把这个skb挂入到这个socket接收队列中,在应用层我们就可以recv_from了。
  77.         sock_prot_inc_use(sk->sk_prot);
  78.     }
  79.     error = 0;
  80. fail:
  81.     write_unlock_bh(&udp_hash_lock);
  82.     return error;
  83. }
还是来张图吧:



over......


上一篇:socket创建过程
下一篇:recvfrom函数