LVS 内核实现分析(2)

5300阅读 0评论2017-01-06 frankzfz
分类:云计算

3 重要数据结构

  这里重点介绍其中用到的三个数据结构,

3.1 ip_vs_conn 

  1. /*
  2.  *    IP_VS structure allocated for each dynamically scheduled connection每一个动态的连接会
  3.  有一个ip_vs_conn结构,它包含客户端,IPVS,RealServer的地址和端口信息*/
  4. struct ip_vs_conn {
  5.     struct list_head c_list; /* hashed list heads */

  6.     /* Protocol, addresses and port numbers */
  7.     u16 af;        /* address family */
  8.     union nf_inet_addr caddr; /* client address */
  9.     union nf_inet_addr vaddr; /* virtual address */
  10.     union nf_inet_addr daddr; /* destination address */
  11.     __be16 cport;             /*客户端的端口*/
  12.     __be16 vport;            /*IPVS的端口*/
  13.     __be16 dport;            /*RS的端口*/
  14.     __u16 protocol; /* Which protocol (TCP/UDP)协议号 */

  15.     /* counter and timer ip_vs_conn对象的使用计数。其初值为1,__ip_vs_conn_in_get/__ip_vs_conn_put成对调用 */
  16.     atomic_t        refcnt;        /* reference count */
  17.     struct timer_list    timer;        /* Expiration timerip_vs_conn对象的生存期,当timer到期时,对象被销毁 */
  18.     volatile unsigned long    timeout;    /* timeoutip_vs_conn对象动态的超时时间,每当对象操作完毕,timeout值用来更新timer,以延长对象的生存期。timeout受连接状态等的影响 */

  19.     /* Flags and state transition */
  20.     spinlock_t lock; /* lock for state transition */
  21.     volatile __u16 flags; /* status flags */
  22.     volatile __u16 state; /* state info */
  23.     volatile __u16 old_state; /* old state, to be used for
  24.                          * state transition triggerd
  25.                          * synchronization
  26.                          */

  27.     /* Control members */
  28.     struct ip_vs_conn *control; /* Master control connection */
  29.     atomic_t n_control; /* Number of controlled ones */
  30.     struct ip_vs_dest *dest; /* real server 指向此连接对象对应的ip_vs_dest对象*/
  31.     atomic_t in_pkts; /* incoming packet counter */

  32.     /* packet transmitter for different forwarding methods. If it
  33.      mangles the packet, it must return NF_DROP or better NF_STOLEN,
  34.      otherwise this must be changed to a sk_buff **.
  35.      */
  36.     int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
  37.              struct ip_vs_protocol *pp); /*不同的发包函数,三种模式对应三个不同的发包函数*/

  38.     /* Note: we can group the following members into a structure,
  39.      in order to save more space, and the following members are
  40.      only used in VS/NAT anyway */
  41.     struct ip_vs_app *app; /* bound ip_vs_app object */
  42.     void *app_data; /* Application private data */
  43.     struct ip_vs_seq in_seq; /* incoming seq. struct */
  44.     struct ip_vs_seq out_seq; /* outgoing seq. struct */
  45. };

3.2  ip_vs_dest

  1. /*
  2.  *    The real server destination forwarding entry
  3.  *    with ip address, port number, and so on.该结构体主要描述的是real server的相关信息,也是ipvsdm配置的到内核的相关信息
  4.  */
  5. struct ip_vs_dest {
  6.     struct list_head    n_list; /* for the dests in the service */
  7.     struct list_head    d_list; /* for table with all the dests t是全局hash链表ip_vs_rtable的一个节点*/

  8.     u16            af;        /* address family 地址的协议族 AF_INET/AF_INET6 */
  9.     union nf_inet_addr    addr;        /* IP address of the server Real Server 的地址 */
  10.     __be16            port;        /* port number of the server Real Server 端口号 */
  11. /*ip_vs_dest对象的状态标志位,IP_VS_DEST_F_AVAILABLE表示此真实服务器可用,IP_VS_DEST_F_OVERLOAD表示此真实服务器超负荷。*/
  12.     volatile unsigned    flags;        /* dest status flags */
  13. /*ip_vs_dest对象的连接标志位。这些标志位本身不是用来标示 ip_vs_dest对象的,而是由ip_vs_dest对象创建ip_vs_conn对象时,赋给后者的。IP_VS_CONN_F_MASQ、 IP_VS_CONN_F_TUNNEL和IP_VS_CONN_F_DROUTE,分别代表NAT、TUN和DR三种模式*/
  14.     atomic_t        conn_flags;    /* flags to copy to conn */
  15.     atomic_t        weight;        /* server weight 权重用于调度*/
  16. /*ip_vs_dest对象的引用计数,初值为0,当对象被加入链表或从链表删除,或者被ip_vs_conn对象引用时,refcnt相应地增或减1*/
  17.     atomic_t        refcnt;        /* reference counter */
  18.     struct ip_vs_stats stats; /* statistics */

  19.     /* connection counters and thresholds 连接的统计和阈值 */
  20.     atomic_t        activeconns;    /* active connections */
  21.     atomic_t        inactconns;    /* inactive connections */
  22.     atomic_t        persistconns;    /* persistent connections */
  23.     __u32            u_threshold;    /* upper threshold */
  24.     __u32            l_threshold;    /* lower threshold */

  25.     /* for destination cache */
  26.     spinlock_t        dst_lock;    /* lock of dst_cache */
  27.     struct dst_entry    *dst_cache;    /* destination cache entry */
  28.     u32            dst_rtos;    /* RT_TOS(tos) for dst */

  29.     /* for virtual service LVS的相关信息*/
  30.     struct ip_vs_service    *svc;        /* service it belongs to */
  31.     __u16            protocol;    /* which protocol (TCP/UDP) */
  32.     union nf_inet_addr    vaddr;        /* virtual IP address */
  33.     __be16            vport;        /* virtual port number */
  34.     __u32            vfwmark;    /* firewall mark of service 防火墙标志*/
  35. };

3. 3  ip_vs_service_user_kern

下面这个结构体主要是用户空间向内核空间发送IPVS Server的相关配置。

  1. struct ip_vs_service_user_kern {
  2.     /* virtual service addresses */
  3.     u16            af;
  4.     u16            protocol;
  5.     union nf_inet_addr    addr;        /* virtual ip address */
  6.     u16            port;
  7.     u32            fwmark;        /* firwall mark of service */

  8.     /* virtual service options */
  9.     char            *sched_name;
  10.     unsigned        flags;        /* virtual service flags */
  11.     unsigned        timeout;    /* persistent timeout in sec */
  12.     u32            netmask;    /* persistent netmask */
  13. };

3.4  ip_vs_dest_user_kern

下面这个结构主要是用户空间向内核空间发送Real Server的相关数据使用。

点击(此处)折叠或打开

  1. struct ip_vs_dest_user_kern {
  2.     /* destination server address */
  3.     union nf_inet_addr    addr;
  4.     u16            port;

  5.     /* real server options */
  6.     unsigned        conn_flags;    /* connection flags */
  7.     int            weight;        /* destination weight */

  8.     /* thresholds for active connections */
  9.     u32            u_threshold;    /* upper threshold */
  10.     u32            l_threshold;    /* lower threshold */
  11. };

3.5  ip_vs_service

点击(此处)折叠或打开

  1. /*
  2.  *    The information about the virtual service offered to the net
  3.  *    and the forwarding entries
  4.  */
  5. struct ip_vs_service {
  6. /*s_list是全局hash链表ip_vs_svc_table的一个节点 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 它是一个数组,每个成员是一个链表头。将ip_vs_service对象的协议类型、地址和端口进行hash,hash值作为数组下标,然后将此对象置入数组成员对应的链表中。*/
  7.     struct list_head    s_list; /* for normal service table */
  8.     struct list_head    f_list; /* for fwmark-based service table */
  9. /*refcnt和usecnt分别是ip_vs_service对象的引用计数和使用计数。它们是atomic_t类型的变量。refcnt在对象新建时为0,当ip_vs_service对象被加入链表或从链表删除,或者被 ip_vs_dest对象引用时,refcnt相应地增或减1。usecnt初始化为1*/
  10.     atomic_t        refcnt; /* reference counter */
  11.     atomic_t        usecnt; /* use counter */

  12.     u16            af; /* address family 地址族*/
  13.     __u16            protocol; /* which protocol (TCP/UDP) */
  14.     union nf_inet_addr    addr;     /* IP address for virtual service 虚拟服务器的地址 */
  15.     __be16            port;     /* port number for the service 虚拟服务器的端口号*/
  16.     __u32 fwmark; /* firewall mark of the service */
  17.     /*ip_vs_service对象的状态标志位,可以取IP_VS_SVC_F_PERSISTENT和 IP_VS_SVC_F_HASHED。前者表示IPVS服务使用了基于IP地址的会话保持,即同一IP地址发起的连接将被负载到同一台真实服务器上。后 者表示ip_vs_service对象已被加入到ip_vs_svc_table链表中*/
  18. unsigned        flags;     /* service status flags */
  19. /*timeout和netmask只有在IP_VS_SVC_F_PERSISTENT标志位被设置时才有效, timeout是会话的超时时间,超过 此时间后,会话将不再有效。同一IP地址发起的两个连接,如果间隔超过此时间,则未必会被负载到同一台真实服务器上*/
  20.     unsigned        timeout; /* persistent timeout in ticks */
  21. /*netmask可以将会话保持设置成 基于IP网段的,即同一网段发起的连接将被负载到同一台真实服务器上。*/
  22.     __be32            netmask; /* grouping granularity */
  23. /*destinations是ip_vs_dest对象链表,它代指了IPVS服务对应的真实服务器列表。num_dests是服务器个数*/
  24.     struct list_head    destinations; /* real server d-linked list */
  25.     __u32            num_dests; /* number of servers */
  26.     struct ip_vs_stats stats; /* statistics for the service */
  27.     struct ip_vs_app    *inc;     /* bind conns to this app inc */

  28.     /* for scheduling 指向了一个ip_vs_scheduler对象,它代指一种调度算法*/
  29.     struct ip_vs_scheduler    *scheduler; /* bound scheduler object */
  30.     rwlock_t        sched_lock; /* lock sched_data */
  31.     void            *sched_data; /* scheduler application data */
  32. };

3.6  ip_vs_dest_user_kern

用户空间真实服务器信息

点击(此处)折叠或打开

  1. struct ip_vs_dest_user_kern {
  2.     /* destination server address */
  3.     union nf_inet_addr    addr;
  4.     __be16            port;

  5.     /* real server options */
  6.     unsigned int        conn_flags;    /* connection flags */
  7.     int            weight;        /* destination weight */

  8.     /* thresholds for active connections */
  9.     u32            u_threshold;    /* upper threshold */
  10.     u32            l_threshold;    /* lower threshold */

  11.     /* Address family of addr */
  12.     u16            af;
  13. };

3.7  ip_vs_service_user_kern

用户空间虚拟服务器信息

点击(此处)折叠或打开

  1. struct ip_vs_service_user_kern {
  2.     /* virtual service addresses */
  3.     u16            af;
  4.     u16            protocol;
  5.     union nf_inet_addr    addr;        /* virtual ip address */
  6.     __be16            port;
  7.     u32            fwmark;        /* firwall mark of service */

  8.     /* virtual service options */
  9.     char            *sched_name;
  10.     char            *pe_name;
  11.     unsigned int        flags;        /* virtual service flags */
  12.     unsigned int        timeout;    /* persistent timeout in sec */
  13.     __be32            netmask;    /* persistent netmask or plen */
  14. };

3.8  ip_vs_protocol


点击(此处)折叠或打开

  1. struct ip_vs_protocol {
  2.     struct ip_vs_protocol    *next;
  3.  /*协议的名字*/
  4.     char            *name;
  5. /*协议号*/
  6.     u16            protocol;
  7.     u16            num_states;
  8.     int            dont_defrag;

  9.     void (*init)(struct ip_vs_protocol *pp);

  10.     void (*exit)(struct ip_vs_protocol *pp);

  11.     int (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);

  12.     void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);

  13. /*协议调度函数*/    
  14. int (*conn_schedule)(int af, struct sk_buff *skb,
  15.              struct ip_vs_proto_data *pd,
  16.              int *verdict, struct ip_vs_conn **cpp,
  17.              struct ip_vs_iphdr *iph);
  18.  /*查in方向的IPVS*/
  19.     struct ip_vs_conn *
  20.     (*conn_in_get)(int af,
  21.          const struct sk_buff *skb,
  22.          const struct ip_vs_iphdr *iph,
  23.          int inverse);
  24.  /*查out方向的IPVS*/
  25.     struct ip_vs_conn *
  26.     (*conn_out_get)(int af,
  27.             const struct sk_buff *skb,
  28.             const struct ip_vs_iphdr *iph,
  29.             int inverse);
  30.  /*SNAT处理函数*/
  31.     int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
  32.              struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
  33.  /*DNAT处理函数*/
  34.     int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
  35.              struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
  36.  /*校验和处理函数*/
  37.     int (*csum_check)(int af, struct sk_buff *skb,
  38.              struct ip_vs_protocol *pp);

  39.     const char *(*state_name)(int state);
  40.  /*状态转换函数*/
  41.     void (*state_transition)(struct ip_vs_conn *cp, int direction,
  42.                  const struct sk_buff *skb,
  43.                  struct ip_vs_proto_data *pd);

  44.     int (*register_app)(struct net *net, struct ip_vs_app *inc);

  45.     void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
  46.  /*多连接的应用绑定函数*/
  47.     int (*app_conn_bind)(struct ip_vs_conn *cp);

  48.     void (*debug_packet)(int af, struct ip_vs_protocol *pp,
  49.              const struct sk_buff *skb,
  50.              int offset,
  51.              const char *msg);

  52.     void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
  53. };
上一篇:关于TCP MSS的内核设置
下一篇:LVS 内核实现分析(3)