作者:gfree.wind@gmail.com
博客:linuxfocus.blog.chinaunix.net
今天开始学习路由表的另一种实现方式trie。
先看关键的数据结构,然后通过查找函数,来理解trie的路由表组织形式。
1. 路由节点
- struct rt_trie_node {
- /*
- 该变量进行了复用。
- 对于32位机来说,每个节点的地址都是4字节对齐的。所以路由表中节点地址的后两位为0.
- 这样parent的最低位就用作了标志位,0为中间节点,1为叶子节点。
- */
-
unsigned long parent;
- /* t_key实际上为无符号整形变量,对于ipv4来说,就是IP地址 */
-
t_key key;
- };
/* 下面的宏就是用于获取父节点的类型 */
#define T_TNODE 0
#define T_LEAF 1
#define NODE_TYPE_MASK 0x1UL
#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
2. 叶子节点
- struct leaf {
- /* 同上 */
-
unsigned long parent;
-
t_key key;
-
struct hlist_head list;
-
struct rcu_head rcu;
- };
注意:每一个route条目都对应一个叶子节点。
3. 叶子信息
- struct leaf_info {
-
struct hlist_node hlist;
-
struct rcu_head rcu;
-
int plen;
-
struct list_head falh;
- };
4. 中间节点
- struct tnode {
-
unsigned long parent;
-
t_key key;
- /* pos表示从何处开始检查子节点的匹配,即pos之前的位的值为该节点的key有效位 */
-
unsigned char pos; /* 2log(KEYLENGTH) bits needed */
- /* bits表示子节点的个数 */
-
unsigned char bits; /* 2log(KEYLENGTH) bits needed */
-
unsigned int full_children; /* KEYLENGTH bits needed */
-
unsigned int empty_children; /* KEYLENGTH bits needed */
-
union {
-
struct rcu_head rcu;
-
struct work_struct work;
-
struct tnode *tnode_free;
-
};
/* 子节点的柔性数组 */
-
struct rt_trie_node *child[0];
- };
5. 根节点
- struct trie {
-
struct rt_trie_node *trie;
-
#ifdef CONFIG_IP_FIB_TRIE_STATS
-
struct trie_use_stats stats;
-
#endif
- };
下面看查找函数
- int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
-
struct fib_result *res, int fib_flags)
-
{
- /* 与hash方式相同,tb->tb_data为实际的trie */
-
struct trie *t = (struct trie *) tb->tb_data;
-
int ret;
-
struct rt_trie_node *n;
-
struct tnode *pn;
-
unsigned int pos, bits;
- /* 将目的地址转为主机序 */
-
t_key key = ntohl(flp->daddr);
-
unsigned int chopped_off;
-
t_key cindex = 0;
-
unsigned int current_prefix_length = KEYLENGTH;
-
struct tnode *cn;
-
t_key pref_mismatch;
-
-
rcu_read_lock();
-
-
n = rcu_dereference(t->trie);
-
if (!n)
-
goto failed;
-
-
#ifdef CONFIG_IP_FIB_TRIE_STATS
-
t->stats.gets ;
-
#endif
-
-
/* Just a leaf? */
-
if (IS_LEAF(n)) {
- /*
- 只有一个叶子节点
- 叶子节点保存到达某一地址的所有route信息,通过check_leaf判断地址是否匹配,并从中找出是否有匹配
- 的路由信息。
- */
-
ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
-
goto found;
-
}
-
-
pn = (struct tnode *) n;
-
chopped_off = 0;
-
- while (pn) {
-
pos = pn->pos;
-
bits = pn->bits;
-
-
if (!chopped_off) {
- /*
- mask_pfx对key取current_prefix_length的掩码
- tkey_extract_bits从第一个参数的第pos位,取bits位的值
- 并将这个值作为孩子节点的索引。
- */
-
cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
-
pos, bits);
- }
/* 以cindex为索引,取得pn该中间节点的孩子节点 */
-
n = tnode_get_child_rcu(pn, cindex);
-
-
if (n == NULL) {
- /* 没有子节点, 需要回溯 */
-
#ifdef CONFIG_IP_FIB_TRIE_STATS
-
t->stats.null_node_hit ;
-
#endif
-
goto backtrace;
-
}
-
-
if (IS_LEAF(n)) {
- /* 该节点为叶子节点,使用check_leaf来检测是否有对应的route */
-
ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
-
if (ret > 0)
-
goto backtrace; //没有找到,回溯
//找到了route
-
goto found;
-
}
/* 仍然为中间节点 */
-
cn = (struct tnode *)n;
-
-
/*
-
* It's a tnode, and we can do some extra checks here if we
-
* like, to avoid descending into a dead-end branch.
-
* This tnode is in the parent's child array at index
-
* key[p_pos..p_pos p_bits] but potentially with some bits
-
* chopped off, so in reality the index may be just a
-
* subprefix, padded with zero at the end.
-
* We can also take a look at any skipped bits in this
-
* tnode - everything up to p_pos is supposed to be ok,
-
* and the non-chopped bits of the index (se previous
-
* paragraph) are also guaranteed ok, but the rest is
-
* considered unknown.
-
*
-
* The skipped bits are key[pos bits..cn->pos].
-
*/
-
-
/* If current_prefix_length < pos bits, we are already doing
-
* actual prefix matching, which means everything from
-
* pos (bits-chopped_off) onward must be zero along some
-
* branch of this subtree - otherwise there is *no* valid
-
* prefix present. Here we can only check the skipped
-
* bits. Remember, since we have already indexed into the
-
* parent's child array, we know that the bits we chopped of
-
* *are* zero.
-
*/
-
-
/* NOTA BENE: Checking only skipped bits
-
for the new node here */
-
-
if (current_prefix_length < pos+bits) {
- /*
- 提前判断是否需要回溯
- 1. 相当于该节点对应的route的prefix都比要查找的目的地址的prefi小,所以肯定不匹配;
- 2. 该节点没有子节点,那么自然不匹配,需要回溯;
- */
-
if (tkey_extract_bits(cn->key, current_prefix_length,
-
cn->pos - current_prefix_length)
-
|| !(cn->child[0]))
-
goto backtrace;
-
}
-
-
/*
-
* If chopped_off=0, the index is fully validated and we
-
* only need to look at the skipped bits for this, the new,
-
* tnode. What we actually want to do is to find out if
-
* these skipped bits match our key perfectly, or if we will
-
* have to count on finding a matching prefix further down,
-
* because if we do, we would like to have some way of
-
* verifying the existence of such a prefix at this point.
-
*/
-
-
/* The only thing we can do at this point is to verify that
-
* any such matching prefix can indeed be a prefix to our
-
* key, and if the bits in the node we are inspecting that
-
* do not match our key are not ZERO, this cannot be true.
-
* Thus, find out where there is a mismatch (before cn->pos)
-
* and verify that all the mismatching bits are zero in the
-
* new tnode's key.
-
*/
-
-
/*
-
* Note: We aren't very concerned about the piece of
-
* the key that precede pn->pos pn->bits, since these
-
* have already been checked. The bits after cn->pos
-
* aren't checked since these are by definition
-
* "unknown" at this point. Thus, what we want to see
-
* is if we are about to enter the "prefix matching"
-
* state, and in that case verify that the skipped
-
* bits that will prevail throughout this subtree are
-
* zero, as they have to be if we are to find a
-
* matching prefix.
-
*/
/* 计算pos位以前的不匹配位 */
-
pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
-
-
/*
-
* In short: If skipped bits in this node do not match
-
* the search key, enter the "prefix matching"
-
* state.directly.
-
*/
-
if (pref_mismatch) {
- /*
- fls找到第一个为1的位,即第一个不匹配的位
- 那么mp即为所有前面匹配的位的个数。
- */
-
int mp = KEYLENGTH - fls(pref_mismatch);
/*
这个节点从mp位到pos位之间的bits,有为1的位,则这个节点以及子节点肯定不匹配,
需要回溯
*/
-
if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
-
goto backtrace;
/*
该节点mp位到pos位以后的位均为0,且当前的prefix大于pos,那么prefix需要缩短到mp。
即该节点仍然可能匹配
*/
-
if (current_prefix_length >= cn->pos)
-
current_prefix_length = mp;
-
}
/* 继续向下匹配 */
-
pn = (struct tnode *)n; /* Descend */
-
chopped_off = 0;
-
continue;
-
-
backtrace:
-
chopped_off++;
/*
放弃选定的子节点,寻找最长匹配的子节点。
如之前选择的子节点索引为7(111),那么尝试索引为6的子节点(110)
*/
-
/* As zero don't change the child key (cindex) */
-
while ((chopped_off <= pn->bits)
-
&& !(cindex & (1<<(chopped_off-1))))
- chopped_off++;
-
/* Decrease current_... with bits chopped off */
- /* 根据chopped_off,调整prefix长度*/
-
if (current_prefix_length > pn->pos+pn->bits - chopped_off)
-
current_prefix_length = pn->pos+pn->bits
-
- chopped_off;
-
-
/*
-
* Either we do the actual chop off according or if we have
-
* chopped off all bits in this tnode walk up to our parent.
-
*/
-
-
if (chopped_off <= pn->bits) {
- /* 找到了另外一个合适的子节点 */
-
cindex &= ~(1 << (chopped_off-1));
-
} else {
- /* 该节点的所有子节点都不合适,则向上回溯一个节点,然后重复最长匹配的操作 */
-
struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
-
if (!parent)
-
goto failed;
-
-
/* Get Child's index */
-
cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
-
pn = parent;
-
chopped_off = 0;
-
-
#ifdef CONFIG_IP_FIB_TRIE_STATS
-
t->stats.backtrack ;
-
#endif
-
goto backtrace;
-
}
-
}
-
failed:
-
ret = 1;
-
found:
-
rcu_read_unlock();
-
return ret;
- }
基本查找的函数就是这样,通过分析查找函数,对于kernel的路由表的trie实现结构,基本上有了比较清晰的结构。但是仅仅看上面的代码,还不能完全反映出tries的结构组织。
后文,我会详细的描写tries的路由表的数据组织结构
参考文章http://blog.csdn.net/dog250/article/details/6596046 该文虽然没有具体代码,但是对于trie的原理讲解的还是很清楚的,推荐一看。