八、iptables配置的rule在内核中的转换-guanglongxishui-ChinaUnix博客

使用iptable配置的rule,扩展match使用的match名字，target也是使用的target名字。从前面我们描述的看，到内核后rule中的扩展match应该和内核中对应的实例进行关联。Target也是一样的。
我们来看一看xt_entry_match的定义
struct xt_entry_match
{
    union {
        struct {
            __u16 match_size;
            char name[XT_FUNCTION_MAXNAMELEN-1];
            __u8 revision;
        } user;
        struct {
            __u16 match_size;
            struct xt_match *match;
        } kernel;
        __u16 match_size;
    } u;
    unsigned char data[0];
};
用户空间和内核共用该结构体，所有该结构体定义成了一个联合体。
iptable配置完match后，传给内核后，是传下来的match name，存在xt_entry_match.u.user.name里。
内核需要根据name来找到相应的在内核中注册的xt_match，设置xt_entry_match.u.kernel.match=xt_match；

扩展target和扩展match是一样的处理流程。

我们从处理iptable set命令的函数开始看：

/net/ipv4/netfilter/ip_table.c

static int do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)

{

int ret;

if (!capable(CAP_NET_ADMIN))

return -EPERM;

switch (cmd) {

/*iptable下发rule使用该命令字*/

case IPT_SO_SET_REPLACE:

ret = do_replace(sock_net(sk), user, len);

break;
}
......

return ret;

}

static int do_replace(struct net *net, void __user *user, unsigned int len)
{
    int ret;
    struct ipt_replace tmp;
    struct xt_table_info *newinfo;
    void *loc_cpu_entry;
    /*从用户空间拷贝一个ipt_replace,该结构体内存储着iptable配置
　　所有rule的大小和位置*/
    if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
        return -EFAULT;

     /* overflow check */
    if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
        return -ENOMEM;
    /*在内核中申请一个新的xt_table_info来存放rule，数据结构定义详见上文*/
    newinfo = xt_alloc_table_info(tmp.size);
    if (!newinfo)
        return -ENOMEM;

     /*找到本cpu上存储rule的内存首地址,从用户空间把rule给拷贝到该内核空间里*/
    loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
    if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
      tmp.size) != 0) {
        ret = -EFAULT;
        goto free_newinfo;
    }

    /*用户空间的rule格式转换为内核空间的rule格式*/
    ret = translate_table(tmp.name, tmp.valid_hooks,
                             newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
                             tmp.hook_entry, tmp.underflow);
    if (ret != 0)
        goto free_newinfo;

    duprintf("ip_tables: Translated table\n");

    /*找到对应的xt_table，用新的xt_table_info替换旧的xt_table_info，
　　并释放旧xt_table_info的内存*/
    ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
                                  tmp.num_counters, tmp.counters);
    if (ret)
        goto free_newinfo_untrans;
    return 0;

free_newinfo_untrans:
    IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
free_newinfo:
    xt_free_table_info(newinfo);
    return ret;
}

static int translate_table(const char *name,

unsigned int valid_hooks,

struct xt_table_info *newinfo,

void *entry0,

unsigned int size,

unsigned int number,

const unsigned int *hook_entries,

const unsigned int *underflows)

{

unsigned int i;

int ret;
/*初始化xt_table_info一些字段*/

newinfo->size = size;

newinfo->number = number;

/* Init all hooks to impossible value. */

for (i = 0; i < NF_INET_NUMHOOKS; i++) {

newinfo->hook_entry[i] = 0xFFFFFFFF;

newinfo->underflow[i] = 0xFFFFFFFF;

}

。。。。。。

i = 0;

    /* Walk through entries, checking offsets. */
    /*遍历该table下所有的chain中的所有rule,进行检查并初始化
  xt_table_info的hook_entries和underflows。*/

ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,

check_entry_size_and_hooks,

newinfo,

entry0,

entry0 + size,

hook_entries, underflows, valid_hooks, &i);

。。。。。。

if (!mark_source_chains(newinfo, valid_hooks, entry0))

return -ELOOP;

/* Finally, each sanity check must pass */

i = 0;

/*遍历table下的所有chain下的所有rule，

调用find_check_entry进行rule的检查和转换成内核rule*/
ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,

find_check_entry, name, size, &i);

。。。。。。
/*把转换好的本地CPU上的rule给每个cpu拷贝一份*/

/* And one copy for every other CPU */

for_each_possible_cpu(i) {

if (newinfo->entries[i] && newinfo->entries[i] != entry0)

memcpy(newinfo->entries[i], entry0, newinfo->size);

}

return ret;

}

static int find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,

unsigned int *i)

{

struct ipt_entry_target *t;

struct xt_target *target;

int ret;

unsigned int j;

struct xt_mtchk_param mtpar;

ret = check_entry(e, name);

if (ret)

return ret;

j = 0;

mtpar.table = name;

mtpar.entryinfo = &e->ip;

mtpar.hook_mask = e->comefrom;

mtpar.family = NFPROTO_IPV4;
/*把rule中存在的扩展match，从名字转换为内核指针*/

ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);

if (ret != 0)

goto cleanup_matches;

t = ipt_get_target(e);
/*把rule中存在的扩展target，从名字转换为内核指针*/

target = try_then_request_module(xt_find_target(AF_INET,

t->u.user.name,

t->u.user.revision),

"ipt_%s", t->u.user.name);

if (IS_ERR(target) || !target) {

duprintf("find_check_entry: `%s' not found\n", t->u.user.name);

ret = target ? PTR_ERR(target) : -ENOENT;

goto cleanup_matches;

}

t->u.kernel.target = target;

ret = check_target(e, name);

if (ret)

goto err;

(*i)++;

return 0;

err:

module_put(t->u.kernel.target->me);

cleanup_matches:

IPT_MATCH_ITERATE(e, cleanup_match, &j);

return ret;

}

Netfilter把rule的排序由配置工具来管理和完成，每次使用iptable下发一条rule时，iptable首先要从内核中把对应表的所有rule到拷贝一份到用户空间，然后再把新的rule插入，然后排序后再下发到内核中，替换表中旧的rule。

这样如果在已存在大量rule的情况下，使用iptable下发一条rule时，速度是很慢的。

（未完待续）