* Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
* & Swedish University of Agricultural Sciences.
*
- * Jens Laas <jens.laas@data.slu.se> Swedish University of
+ * Jens Laas <jens.laas@data.slu.se> Swedish University of
* Agricultural Sciences.
- *
+ *
* Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
*
* This work is based on the LPC-trie which is originally descibed in:
- *
+ *
* An experimental study of compression methods for dynamic tries
* Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
* http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
* Patrick McHardy <kaber@trash.net>
*/
-#define VERSION "0.407"
+#define VERSION "0.408"
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/socket.h>
#define MAX_STAT_DEPTH 32
#define KEYLENGTH (8*sizeof(t_key))
-#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
-#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
typedef unsigned int t_key;
#define T_TNODE 0
#define T_LEAF 1
#define NODE_TYPE_MASK 0x1UL
-#define NODE_PARENT(node) \
- ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
-
#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
-#define NODE_SET_PARENT(node, ptr) \
- rcu_assign_pointer((node)->parent, \
- ((unsigned long)(ptr)) | NODE_TYPE(node))
-
#define IS_TNODE(n) (!(n->parent & T_LEAF))
#define IS_LEAF(n) (n->parent & T_LEAF)
static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct trie *trie_local = NULL, *trie_main = NULL;
+static inline struct tnode *node_parent(struct node *node)
+{
+ struct tnode *ret;
+
+ ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
+ return rcu_dereference(ret);
+}
+
+static inline void node_set_parent(struct node *node, struct tnode *ptr)
+{
+ rcu_assign_pointer(node->parent,
+ (unsigned long)ptr | NODE_TYPE(node));
+}
/* rcu_read_lock needs to be hold by caller from readside */
return 1 << tn->bits;
}
+static inline t_key mask_pfx(t_key k, unsigned short l)
+{
+ return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
+}
+
static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
{
if (offset < KEYLENGTH)
}
/*
- To understand this stuff, an understanding of keys and all their bits is
- necessary. Every node in the trie has a key associated with it, but not
+ To understand this stuff, an understanding of keys and all their bits is
+ necessary. Every node in the trie has a key associated with it, but not
all of the bits in that key are significant.
Consider a node 'n' and its parent 'tp'.
- If n is a leaf, every bit in its key is significant. Its presence is
- necessitated by path compression, since during a tree traversal (when
- searching for a leaf - unless we are doing an insertion) we will completely
- ignore all skipped bits we encounter. Thus we need to verify, at the end of
- a potentially successful search, that we have indeed been walking the
+ If n is a leaf, every bit in its key is significant. Its presence is
+ necessitated by path compression, since during a tree traversal (when
+ searching for a leaf - unless we are doing an insertion) we will completely
+ ignore all skipped bits we encounter. Thus we need to verify, at the end of
+ a potentially successful search, that we have indeed been walking the
correct key path.
- Note that we can never "miss" the correct key in the tree if present by
- following the wrong path. Path compression ensures that segments of the key
- that are the same for all keys with a given prefix are skipped, but the
- skipped part *is* identical for each node in the subtrie below the skipped
- bit! trie_insert() in this implementation takes care of that - note the
+ Note that we can never "miss" the correct key in the tree if present by
+ following the wrong path. Path compression ensures that segments of the key
+ that are the same for all keys with a given prefix are skipped, but the
+ skipped part *is* identical for each node in the subtrie below the skipped
+ bit! trie_insert() in this implementation takes care of that - note the
call to tkey_sub_equals() in trie_insert().
- if n is an internal node - a 'tnode' here, the various parts of its key
+ if n is an internal node - a 'tnode' here, the various parts of its key
have many different meanings.
- Example:
+ Example:
_________________________________________________________________
| i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
-----------------------------------------------------------------
- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
_________________________________________________________________
| C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
n->pos = 15
n->bits = 4
- First, let's just ignore the bits that come before the parent tp, that is
- the bits from 0 to (tp->pos-1). They are *known* but at this point we do
+ First, let's just ignore the bits that come before the parent tp, that is
+ the bits from 0 to (tp->pos-1). They are *known* but at this point we do
not use them for anything.
The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
- index into the parent's child array. That is, they will be used to find
+ index into the parent's child array. That is, they will be used to find
'n' among tp's children.
The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
for the node n.
- All the bits we have seen so far are significant to the node n. The rest
+ All the bits we have seen so far are significant to the node n. The rest
of the bits are really not needed or indeed known in n->key.
- The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
+ The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
n's child array, and will of course be different for each child.
-
+
The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
at this point.
static int halve_threshold = 25;
static int inflate_threshold = 50;
-static int halve_threshold_root = 15;
-static int inflate_threshold_root = 25;
+static int halve_threshold_root = 8;
+static int inflate_threshold_root = 15;
static void __alias_free_mem(struct rcu_head *head)
static inline void tnode_free(struct tnode *tn)
{
- if(IS_LEAF(tn)) {
+ if (IS_LEAF(tn)) {
struct leaf *l = (struct leaf *) tn;
call_rcu_bh(&l->rcu, __leaf_free_rcu);
- }
- else
+ } else
call_rcu(&tn->rcu, __tnode_free_rcu);
}
tn->full_children++;
if (n)
- NODE_SET_PARENT(n, tn);
+ node_set_parent(n, tn);
rcu_assign_pointer(tn->child[i], n);
}
struct tnode *old_tn;
int inflate_threshold_use;
int halve_threshold_use;
+ int max_resize;
- if (!tn)
+ if (!tn)
return NULL;
pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
continue;
/* compress one level */
- NODE_SET_PARENT(n, NULL);
+ node_set_parent(n, NULL);
tnode_free(tn);
return n;
}
/* Keep root node larger */
- if(!tn->parent)
+ if (!tn->parent)
inflate_threshold_use = inflate_threshold_root;
- else
+ else
inflate_threshold_use = inflate_threshold;
err = 0;
- while ((tn->full_children > 0 &&
+ max_resize = 10;
+ while ((tn->full_children > 0 && max_resize-- &&
50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
inflate_threshold_use * tnode_child_length(tn))) {
}
}
+ if (max_resize < 0) {
+ if (!tn->parent)
+ printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
+ inflate_threshold_root, tn->bits);
+ else
+ printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
+ inflate_threshold, tn->bits);
+ }
+
check_tnode(tn);
/*
/* Keep root node larger */
- if(!tn->parent)
+ if (!tn->parent)
halve_threshold_use = halve_threshold_root;
- else
+ else
halve_threshold_use = halve_threshold;
err = 0;
- while (tn->bits > 1 &&
+ max_resize = 10;
+ while (tn->bits > 1 && max_resize-- &&
100 * (tnode_child_length(tn) - tn->empty_children) <
halve_threshold_use * tnode_child_length(tn)) {
}
}
+ if (max_resize < 0) {
+ if (!tn->parent)
+ printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
+ halve_threshold_root, tn->bits);
+ else
+ printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
+ halve_threshold, tn->bits);
+ }
/* Only one child remains */
if (tn->empty_children == tnode_child_length(tn) - 1)
/* compress one level */
- NODE_SET_PARENT(n, NULL);
+ node_set_parent(n, NULL);
tnode_free(tn);
return n;
}
inode->pos == oldtnode->pos + oldtnode->bits &&
inode->bits > 1) {
struct tnode *left, *right;
- t_key m = TKEY_GET_MASK(inode->pos, 1);
+ t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
left = tnode_new(inode->key&(~m), inode->pos + 1,
inode->bits - 1);
right = tnode_new(inode->key|m, inode->pos + 1,
inode->bits - 1);
- if (!right) {
+ if (!right) {
tnode_free(left);
goto nomem;
- }
+ }
put_child(t, tn, 2*i, (struct node *) left);
put_child(t, tn, 2*i+1, (struct node *) right);
static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
{
- struct leaf_info *li = NULL, *last = NULL;
- struct hlist_node *node;
+ struct leaf_info *li = NULL, *last = NULL;
+ struct hlist_node *node;
- if (hlist_empty(head)) {
- hlist_add_head_rcu(&new->hlist, head);
- } else {
- hlist_for_each_entry(li, node, head, hlist) {
- if (new->plen > li->plen)
- break;
+ if (hlist_empty(head)) {
+ hlist_add_head_rcu(&new->hlist, head);
+ } else {
+ hlist_for_each_entry(li, node, head, hlist) {
+ if (new->plen > li->plen)
+ break;
- last = li;
- }
- if (last)
- hlist_add_after_rcu(&last->hlist, &new->hlist);
- else
- hlist_add_before_rcu(&new->hlist, &li->hlist);
- }
+ last = li;
+ }
+ if (last)
+ hlist_add_after_rcu(&last->hlist, &new->hlist);
+ else
+ hlist_add_before_rcu(&new->hlist, &li->hlist);
+ }
}
/* rcu_read_lock needs to be hold by caller from readside */
static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
{
int wasfull;
- t_key cindex, key;
- struct tnode *tp = NULL;
-
- key = tn->key;
+ t_key cindex, key = tn->key;
+ struct tnode *tp;
- while (tn != NULL && NODE_PARENT(tn) != NULL) {
-
- tp = NODE_PARENT(tn);
+ while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
tn = (struct tnode *) resize (t, (struct tnode *)tn);
tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
- if (!NODE_PARENT(tn))
+ tp = node_parent((struct node *) tn);
+ if (!tp)
break;
-
- tn = NODE_PARENT(tn);
+ tn = tp;
}
+
/* Handle last (top) tnode */
if (IS_TNODE(tn))
tn = (struct tnode*) resize(t, (struct tnode *)tn);
pos = tn->pos + tn->bits;
n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
- BUG_ON(n && NODE_PARENT(n) != tn);
+ BUG_ON(n && node_parent(n) != tn);
} else
break;
}
if (t->trie && n == NULL) {
/* Case 2: n is NULL, and will just insert a new leaf */
- NODE_SET_PARENT(l, tp);
+ node_set_parent((struct node *)l, tp);
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
goto err;
}
- NODE_SET_PARENT(tn, tp);
+ node_set_parent((struct node *)tn, tp);
missbit = tkey_extract_bits(key, newpos, 1);
put_child(t, tn, missbit, (struct node *)l);
return fa_head;
}
+/*
+ * Caller must hold RTNL.
+ */
static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
rt_cache_flush(-1);
+ rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
+ tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
goto succeeded;
}
rt_cache_flush(-1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
- &cfg->fc_nlinfo);
+ &cfg->fc_nlinfo, 0);
succeeded:
return 0;
bits = pn->bits;
if (!chopped_off)
- cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
+ cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
+ pos, bits);
n = tnode_get_child(pn, cindex);
* to find a matching prefix.
*/
- node_prefix = MASK_PFX(cn->key, cn->pos);
- key_prefix = MASK_PFX(key, cn->pos);
+ node_prefix = mask_pfx(cn->key, cn->pos);
+ key_prefix = mask_pfx(key, cn->pos);
pref_mismatch = key_prefix^node_prefix;
mp = 0;
if (chopped_off <= pn->bits) {
cindex &= ~(1 << (chopped_off-1));
} else {
- if (NODE_PARENT(pn) == NULL)
+ struct tnode *parent = node_parent((struct node *) pn);
+ if (!parent)
goto failed;
/* Get Child's index */
- cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
- pn = NODE_PARENT(pn);
+ cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
+ pn = parent;
chopped_off = 0;
#ifdef CONFIG_IP_FIB_TRIE_STATS
check_tnode(tn);
n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
- BUG_ON(n && NODE_PARENT(n) != tn);
+ BUG_ON(n && node_parent(n) != tn);
}
l = (struct leaf *) n;
t->revision++;
t->size--;
- preempt_disable();
- tp = NODE_PARENT(n);
+ tp = node_parent(n);
tnode_free((struct tnode *) n);
if (tp) {
rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
} else
rcu_assign_pointer(t->trie, NULL);
- preempt_enable();
return 1;
}
+/*
+ * Caller must hold RTNL.
+ */
static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
{
struct trie *t = (struct trie *) tb->tb_data;
fa = fa_to_delete;
rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
- &cfg->fc_nlinfo);
+ &cfg->fc_nlinfo, 0);
l = fib_find_node(t, key);
li = find_leaf_info(l, plen);
p = (struct tnode*) trie; /* Start */
} else
- p = (struct tnode *) NODE_PARENT(c);
+ p = node_parent(c);
while (p) {
int pos, last;
/* Decend if tnode */
while (IS_TNODE(c)) {
p = (struct tnode *) c;
- idx = 0;
+ idx = 0;
/* Rightmost non-NULL branch */
if (p && IS_TNODE(p))
up:
/* No more children go up one step */
c = (struct node *) p;
- p = (struct tnode *) NODE_PARENT(p);
+ p = node_parent(c);
}
return NULL; /* Ready. Root of trie */
}
+/*
+ * Caller must hold RTNL.
+ */
static int fn_trie_flush(struct fib_table *tb)
{
struct trie *t = (struct trie *) tb->tb_data;
fn_alias_kmem = kmem_cache_create("ip_fib_alias",
sizeof(struct fib_alias),
0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
GFP_KERNEL);
}
/* Current node exhausted, pop back up */
- p = NODE_PARENT(tn);
+ p = node_parent((struct node *)tn);
if (p) {
cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
tn = p;
{
struct node *n ;
- if(!t)
+ if (!t)
return NULL;
n = rcu_dereference(t->trie);
- if(!iter)
+ if (!iter)
return NULL;
if (n) {
int i;
s->tnodes++;
- if(tn->bits < MAX_STAT_DEPTH)
+ if (tn->bits < MAX_STAT_DEPTH)
s->nodesizes[tn->bits]++;
for (i = 0; i < (1<<tn->bits); i++)
return single_open(file, fib_triestat_seq_show, NULL);
}
-static struct file_operations fib_triestat_fops = {
+static const struct file_operations fib_triestat_fops = {
.owner = THIS_MODULE,
.open = fib_triestat_seq_open,
.read = seq_read,
{
static char buf[32];
- switch(s) {
+ switch (s) {
case RT_SCOPE_UNIVERSE: return "universe";
case RT_SCOPE_SITE: return "site";
case RT_SCOPE_LINK: return "link";
if (v == SEQ_START_TOKEN)
return 0;
- if (!NODE_PARENT(n)) {
+ if (!node_parent(n)) {
if (iter->trie == trie_local)
seq_puts(seq, "<local>:\n");
else
if (IS_TNODE(n)) {
struct tnode *tn = (struct tnode *) n;
- __be32 prf = htonl(MASK_PFX(tn->key, tn->pos));
+ __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
seq_indent(seq, iter->depth-1);
seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
- NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
+ NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
tn->empty_children);
-
+
} else {
struct leaf *l = (struct leaf *) n;
int i;
return 0;
}
-static struct seq_operations fib_trie_seq_ops = {
+static const struct seq_operations fib_trie_seq_ops = {
.start = fib_trie_seq_start,
.next = fib_trie_seq_next,
.stop = fib_trie_seq_stop,
goto out;
}
-static struct file_operations fib_trie_fops = {
+static const struct file_operations fib_trie_fops = {
.owner = THIS_MODULE,
.open = fib_trie_seq_open,
.read = seq_read,
return 0;
}
-static struct seq_operations fib_route_seq_ops = {
+static const struct seq_operations fib_route_seq_ops = {
.start = fib_trie_seq_start,
.next = fib_trie_seq_next,
.stop = fib_trie_seq_stop,
goto out;
}
-static struct file_operations fib_route_fops = {
+static const struct file_operations fib_route_fops = {
.owner = THIS_MODULE,
.open = fib_route_seq_open,
.read = seq_read,