[TCP]: Update comment of SACK block validator
[deliverable/linux.git] / net / ipv4 / fib_trie.c
CommitLineData
19baf839
RO
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
e905a9ed 10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
19baf839 11 * Agricultural Sciences.
e905a9ed 12 *
19baf839
RO
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
e905a9ed 16 *
19baf839
RO
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
fd966255
RO
44 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
19baf839
RO
51 */
52
05eee48c 53#define VERSION "0.408"
19baf839 54
19baf839
RO
55#include <asm/uaccess.h>
56#include <asm/system.h>
57#include <asm/bitops.h>
58#include <linux/types.h>
59#include <linux/kernel.h>
19baf839
RO
60#include <linux/mm.h>
61#include <linux/string.h>
62#include <linux/socket.h>
63#include <linux/sockios.h>
64#include <linux/errno.h>
65#include <linux/in.h>
66#include <linux/inet.h>
cd8787ab 67#include <linux/inetdevice.h>
19baf839
RO
68#include <linux/netdevice.h>
69#include <linux/if_arp.h>
70#include <linux/proc_fs.h>
2373ce1c 71#include <linux/rcupdate.h>
19baf839
RO
72#include <linux/skbuff.h>
73#include <linux/netlink.h>
74#include <linux/init.h>
75#include <linux/list.h>
457c4cbc 76#include <net/net_namespace.h>
19baf839
RO
77#include <net/ip.h>
78#include <net/protocol.h>
79#include <net/route.h>
80#include <net/tcp.h>
81#include <net/sock.h>
82#include <net/ip_fib.h>
83#include "fib_lookup.h"
84
85#undef CONFIG_IP_FIB_TRIE_STATS
06ef921d 86#define MAX_STAT_DEPTH 32
19baf839 87
19baf839 88#define KEYLENGTH (8*sizeof(t_key))
19baf839 89
19baf839
RO
90typedef unsigned int t_key;
91
92#define T_TNODE 0
93#define T_LEAF 1
94#define NODE_TYPE_MASK 0x1UL
2373ce1c
RO
95#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
96
91b9a277
OJ
97#define IS_TNODE(n) (!(n->parent & T_LEAF))
98#define IS_LEAF(n) (n->parent & T_LEAF)
19baf839
RO
99
100struct node {
91b9a277
OJ
101 t_key key;
102 unsigned long parent;
19baf839
RO
103};
104
105struct leaf {
91b9a277
OJ
106 t_key key;
107 unsigned long parent;
19baf839 108 struct hlist_head list;
2373ce1c 109 struct rcu_head rcu;
19baf839
RO
110};
111
112struct leaf_info {
113 struct hlist_node hlist;
2373ce1c 114 struct rcu_head rcu;
19baf839
RO
115 int plen;
116 struct list_head falh;
117};
118
119struct tnode {
91b9a277
OJ
120 t_key key;
121 unsigned long parent;
122 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
123 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
124 unsigned short full_children; /* KEYLENGTH bits needed */
125 unsigned short empty_children; /* KEYLENGTH bits needed */
2373ce1c 126 struct rcu_head rcu;
91b9a277 127 struct node *child[0];
19baf839
RO
128};
129
130#ifdef CONFIG_IP_FIB_TRIE_STATS
131struct trie_use_stats {
132 unsigned int gets;
133 unsigned int backtrack;
134 unsigned int semantic_match_passed;
135 unsigned int semantic_match_miss;
136 unsigned int null_node_hit;
2f36895a 137 unsigned int resize_node_skipped;
19baf839
RO
138};
139#endif
140
141struct trie_stat {
142 unsigned int totdepth;
143 unsigned int maxdepth;
144 unsigned int tnodes;
145 unsigned int leaves;
146 unsigned int nullpointers;
06ef921d 147 unsigned int nodesizes[MAX_STAT_DEPTH];
c877efb2 148};
19baf839
RO
149
150struct trie {
91b9a277 151 struct node *trie;
19baf839
RO
152#ifdef CONFIG_IP_FIB_TRIE_STATS
153 struct trie_use_stats stats;
154#endif
91b9a277 155 int size;
19baf839
RO
156 unsigned int revision;
157};
158
19baf839
RO
159static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
160static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
19baf839 161static struct node *resize(struct trie *t, struct tnode *tn);
2f80b3c8
RO
162static struct tnode *inflate(struct trie *t, struct tnode *tn);
163static struct tnode *halve(struct trie *t, struct tnode *tn);
19baf839 164static void tnode_free(struct tnode *tn);
19baf839 165
e18b890b 166static struct kmem_cache *fn_alias_kmem __read_mostly;
19baf839
RO
167static struct trie *trie_local = NULL, *trie_main = NULL;
168
06801916
SH
169static inline struct tnode *node_parent(struct node *node)
170{
171 struct tnode *ret;
172
173 ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
174 return rcu_dereference(ret);
175}
176
177static inline void node_set_parent(struct node *node, struct tnode *ptr)
178{
179 rcu_assign_pointer(node->parent,
180 (unsigned long)ptr | NODE_TYPE(node));
181}
2373ce1c
RO
182
183/* rcu_read_lock needs to be hold by caller from readside */
184
c877efb2 185static inline struct node *tnode_get_child(struct tnode *tn, int i)
19baf839 186{
91b9a277 187 BUG_ON(i >= 1 << tn->bits);
19baf839 188
2373ce1c 189 return rcu_dereference(tn->child[i]);
19baf839
RO
190}
191
bb435b8d 192static inline int tnode_child_length(const struct tnode *tn)
19baf839 193{
91b9a277 194 return 1 << tn->bits;
19baf839
RO
195}
196
ab66b4a7
SH
197static inline t_key mask_pfx(t_key k, unsigned short l)
198{
199 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
200}
201
19baf839
RO
202static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
203{
91b9a277 204 if (offset < KEYLENGTH)
19baf839 205 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
91b9a277 206 else
19baf839
RO
207 return 0;
208}
209
210static inline int tkey_equals(t_key a, t_key b)
211{
c877efb2 212 return a == b;
19baf839
RO
213}
214
215static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
216{
c877efb2
SH
217 if (bits == 0 || offset >= KEYLENGTH)
218 return 1;
91b9a277
OJ
219 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
220 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
c877efb2 221}
19baf839
RO
222
223static inline int tkey_mismatch(t_key a, int offset, t_key b)
224{
225 t_key diff = a ^ b;
226 int i = offset;
227
c877efb2
SH
228 if (!diff)
229 return 0;
230 while ((diff << i) >> (KEYLENGTH-1) == 0)
19baf839
RO
231 i++;
232 return i;
233}
234
19baf839 235/*
e905a9ed
YH
236 To understand this stuff, an understanding of keys and all their bits is
237 necessary. Every node in the trie has a key associated with it, but not
19baf839
RO
238 all of the bits in that key are significant.
239
240 Consider a node 'n' and its parent 'tp'.
241
e905a9ed
YH
242 If n is a leaf, every bit in its key is significant. Its presence is
243 necessitated by path compression, since during a tree traversal (when
244 searching for a leaf - unless we are doing an insertion) we will completely
245 ignore all skipped bits we encounter. Thus we need to verify, at the end of
246 a potentially successful search, that we have indeed been walking the
19baf839
RO
247 correct key path.
248
e905a9ed
YH
249 Note that we can never "miss" the correct key in the tree if present by
250 following the wrong path. Path compression ensures that segments of the key
251 that are the same for all keys with a given prefix are skipped, but the
252 skipped part *is* identical for each node in the subtrie below the skipped
253 bit! trie_insert() in this implementation takes care of that - note the
19baf839
RO
254 call to tkey_sub_equals() in trie_insert().
255
e905a9ed 256 if n is an internal node - a 'tnode' here, the various parts of its key
19baf839
RO
257 have many different meanings.
258
e905a9ed 259 Example:
19baf839
RO
260 _________________________________________________________________
261 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
262 -----------------------------------------------------------------
e905a9ed 263 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
19baf839
RO
264
265 _________________________________________________________________
266 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
267 -----------------------------------------------------------------
268 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
269
270 tp->pos = 7
271 tp->bits = 3
272 n->pos = 15
91b9a277 273 n->bits = 4
19baf839 274
e905a9ed
YH
275 First, let's just ignore the bits that come before the parent tp, that is
276 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
19baf839
RO
277 not use them for anything.
278
279 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
e905a9ed 280 index into the parent's child array. That is, they will be used to find
19baf839
RO
281 'n' among tp's children.
282
283 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
284 for the node n.
285
e905a9ed 286 All the bits we have seen so far are significant to the node n. The rest
19baf839
RO
287 of the bits are really not needed or indeed known in n->key.
288
e905a9ed 289 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
19baf839 290 n's child array, and will of course be different for each child.
e905a9ed 291
c877efb2 292
19baf839
RO
293 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
294 at this point.
295
296*/
297
0c7770c7 298static inline void check_tnode(const struct tnode *tn)
19baf839 299{
0c7770c7 300 WARN_ON(tn && tn->pos+tn->bits > 32);
19baf839
RO
301}
302
303static int halve_threshold = 25;
304static int inflate_threshold = 50;
965ffea4
RO
305static int halve_threshold_root = 8;
306static int inflate_threshold_root = 15;
19baf839 307
2373ce1c
RO
308
309static void __alias_free_mem(struct rcu_head *head)
19baf839 310{
2373ce1c
RO
311 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
312 kmem_cache_free(fn_alias_kmem, fa);
19baf839
RO
313}
314
2373ce1c 315static inline void alias_free_mem_rcu(struct fib_alias *fa)
19baf839 316{
2373ce1c
RO
317 call_rcu(&fa->rcu, __alias_free_mem);
318}
91b9a277 319
2373ce1c
RO
320static void __leaf_free_rcu(struct rcu_head *head)
321{
322 kfree(container_of(head, struct leaf, rcu));
323}
91b9a277 324
2373ce1c 325static void __leaf_info_free_rcu(struct rcu_head *head)
19baf839 326{
2373ce1c 327 kfree(container_of(head, struct leaf_info, rcu));
19baf839
RO
328}
329
2373ce1c 330static inline void free_leaf_info(struct leaf_info *leaf)
19baf839 331{
2373ce1c 332 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
19baf839
RO
333}
334
f0e36f8c
PM
335static struct tnode *tnode_alloc(unsigned int size)
336{
2373ce1c
RO
337 struct page *pages;
338
339 if (size <= PAGE_SIZE)
340 return kcalloc(size, 1, GFP_KERNEL);
341
342 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
343 if (!pages)
344 return NULL;
345
346 return page_address(pages);
f0e36f8c
PM
347}
348
2373ce1c 349static void __tnode_free_rcu(struct rcu_head *head)
f0e36f8c 350{
2373ce1c 351 struct tnode *tn = container_of(head, struct tnode, rcu);
f0e36f8c 352 unsigned int size = sizeof(struct tnode) +
2373ce1c 353 (1 << tn->bits) * sizeof(struct node *);
f0e36f8c
PM
354
355 if (size <= PAGE_SIZE)
356 kfree(tn);
357 else
358 free_pages((unsigned long)tn, get_order(size));
359}
360
2373ce1c
RO
361static inline void tnode_free(struct tnode *tn)
362{
132adf54 363 if (IS_LEAF(tn)) {
550e29bc
RO
364 struct leaf *l = (struct leaf *) tn;
365 call_rcu_bh(&l->rcu, __leaf_free_rcu);
132adf54 366 } else
550e29bc 367 call_rcu(&tn->rcu, __tnode_free_rcu);
2373ce1c
RO
368}
369
370static struct leaf *leaf_new(void)
371{
372 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
373 if (l) {
374 l->parent = T_LEAF;
375 INIT_HLIST_HEAD(&l->list);
376 }
377 return l;
378}
379
380static struct leaf_info *leaf_info_new(int plen)
381{
382 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
383 if (li) {
384 li->plen = plen;
385 INIT_LIST_HEAD(&li->falh);
386 }
387 return li;
388}
389
19baf839
RO
390static struct tnode* tnode_new(t_key key, int pos, int bits)
391{
392 int nchildren = 1<<bits;
393 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
f0e36f8c 394 struct tnode *tn = tnode_alloc(sz);
19baf839 395
91b9a277 396 if (tn) {
19baf839 397 memset(tn, 0, sz);
2373ce1c 398 tn->parent = T_TNODE;
19baf839
RO
399 tn->pos = pos;
400 tn->bits = bits;
401 tn->key = key;
402 tn->full_children = 0;
403 tn->empty_children = 1<<bits;
404 }
c877efb2 405
0c7770c7
SH
406 pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
407 (unsigned int) (sizeof(struct node) * 1<<bits));
19baf839
RO
408 return tn;
409}
410
19baf839
RO
411/*
412 * Check whether a tnode 'n' is "full", i.e. it is an internal node
413 * and no bits are skipped. See discussion in dyntree paper p. 6
414 */
415
bb435b8d 416static inline int tnode_full(const struct tnode *tn, const struct node *n)
19baf839 417{
c877efb2 418 if (n == NULL || IS_LEAF(n))
19baf839
RO
419 return 0;
420
421 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
422}
423
c877efb2 424static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
19baf839
RO
425{
426 tnode_put_child_reorg(tn, i, n, -1);
427}
428
c877efb2 429 /*
19baf839
RO
430 * Add a child at position i overwriting the old value.
431 * Update the value of full_children and empty_children.
432 */
433
c877efb2 434static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
19baf839 435{
2373ce1c 436 struct node *chi = tn->child[i];
19baf839
RO
437 int isfull;
438
0c7770c7
SH
439 BUG_ON(i >= 1<<tn->bits);
440
19baf839
RO
441
442 /* update emptyChildren */
443 if (n == NULL && chi != NULL)
444 tn->empty_children++;
445 else if (n != NULL && chi == NULL)
446 tn->empty_children--;
c877efb2 447
19baf839 448 /* update fullChildren */
91b9a277 449 if (wasfull == -1)
19baf839
RO
450 wasfull = tnode_full(tn, chi);
451
452 isfull = tnode_full(tn, n);
c877efb2 453 if (wasfull && !isfull)
19baf839 454 tn->full_children--;
c877efb2 455 else if (!wasfull && isfull)
19baf839 456 tn->full_children++;
91b9a277 457
c877efb2 458 if (n)
06801916 459 node_set_parent(n, tn);
19baf839 460
2373ce1c 461 rcu_assign_pointer(tn->child[i], n);
19baf839
RO
462}
463
c877efb2 464static struct node *resize(struct trie *t, struct tnode *tn)
19baf839
RO
465{
466 int i;
2f36895a 467 int err = 0;
2f80b3c8 468 struct tnode *old_tn;
e6308be8
RO
469 int inflate_threshold_use;
470 int halve_threshold_use;
05eee48c 471 int max_resize;
19baf839 472
e905a9ed 473 if (!tn)
19baf839
RO
474 return NULL;
475
0c7770c7
SH
476 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
477 tn, inflate_threshold, halve_threshold);
19baf839
RO
478
479 /* No children */
480 if (tn->empty_children == tnode_child_length(tn)) {
481 tnode_free(tn);
482 return NULL;
483 }
484 /* One child */
485 if (tn->empty_children == tnode_child_length(tn) - 1)
486 for (i = 0; i < tnode_child_length(tn); i++) {
91b9a277 487 struct node *n;
19baf839 488
91b9a277 489 n = tn->child[i];
2373ce1c 490 if (!n)
91b9a277 491 continue;
91b9a277
OJ
492
493 /* compress one level */
06801916 494 node_set_parent(n, NULL);
91b9a277
OJ
495 tnode_free(tn);
496 return n;
19baf839 497 }
c877efb2 498 /*
19baf839
RO
499 * Double as long as the resulting node has a number of
500 * nonempty nodes that are above the threshold.
501 */
502
503 /*
c877efb2
SH
504 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
505 * the Helsinki University of Technology and Matti Tikkanen of Nokia
19baf839 506 * Telecommunications, page 6:
c877efb2 507 * "A node is doubled if the ratio of non-empty children to all
19baf839
RO
508 * children in the *doubled* node is at least 'high'."
509 *
c877efb2
SH
510 * 'high' in this instance is the variable 'inflate_threshold'. It
511 * is expressed as a percentage, so we multiply it with
512 * tnode_child_length() and instead of multiplying by 2 (since the
513 * child array will be doubled by inflate()) and multiplying
514 * the left-hand side by 100 (to handle the percentage thing) we
19baf839 515 * multiply the left-hand side by 50.
c877efb2
SH
516 *
517 * The left-hand side may look a bit weird: tnode_child_length(tn)
518 * - tn->empty_children is of course the number of non-null children
519 * in the current node. tn->full_children is the number of "full"
19baf839 520 * children, that is non-null tnodes with a skip value of 0.
c877efb2 521 * All of those will be doubled in the resulting inflated tnode, so
19baf839 522 * we just count them one extra time here.
c877efb2 523 *
19baf839 524 * A clearer way to write this would be:
c877efb2 525 *
19baf839 526 * to_be_doubled = tn->full_children;
c877efb2 527 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
19baf839
RO
528 * tn->full_children;
529 *
530 * new_child_length = tnode_child_length(tn) * 2;
531 *
c877efb2 532 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
19baf839
RO
533 * new_child_length;
534 * if (new_fill_factor >= inflate_threshold)
c877efb2
SH
535 *
536 * ...and so on, tho it would mess up the while () loop.
537 *
19baf839
RO
538 * anyway,
539 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
540 * inflate_threshold
c877efb2 541 *
19baf839
RO
542 * avoid a division:
543 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
544 * inflate_threshold * new_child_length
c877efb2 545 *
19baf839 546 * expand not_to_be_doubled and to_be_doubled, and shorten:
c877efb2 547 * 100 * (tnode_child_length(tn) - tn->empty_children +
91b9a277 548 * tn->full_children) >= inflate_threshold * new_child_length
c877efb2 549 *
19baf839 550 * expand new_child_length:
c877efb2 551 * 100 * (tnode_child_length(tn) - tn->empty_children +
91b9a277 552 * tn->full_children) >=
19baf839 553 * inflate_threshold * tnode_child_length(tn) * 2
c877efb2 554 *
19baf839 555 * shorten again:
c877efb2 556 * 50 * (tn->full_children + tnode_child_length(tn) -
91b9a277 557 * tn->empty_children) >= inflate_threshold *
19baf839 558 * tnode_child_length(tn)
c877efb2 559 *
19baf839
RO
560 */
561
562 check_tnode(tn);
c877efb2 563
e6308be8
RO
564 /* Keep root node larger */
565
132adf54 566 if (!tn->parent)
e6308be8 567 inflate_threshold_use = inflate_threshold_root;
e905a9ed 568 else
e6308be8
RO
569 inflate_threshold_use = inflate_threshold;
570
2f36895a 571 err = 0;
05eee48c
RO
572 max_resize = 10;
573 while ((tn->full_children > 0 && max_resize-- &&
19baf839 574 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
e6308be8 575 inflate_threshold_use * tnode_child_length(tn))) {
19baf839 576
2f80b3c8
RO
577 old_tn = tn;
578 tn = inflate(t, tn);
579 if (IS_ERR(tn)) {
580 tn = old_tn;
2f36895a
RO
581#ifdef CONFIG_IP_FIB_TRIE_STATS
582 t->stats.resize_node_skipped++;
583#endif
584 break;
585 }
19baf839
RO
586 }
587
05eee48c
RO
588 if (max_resize < 0) {
589 if (!tn->parent)
590 printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
591 inflate_threshold_root, tn->bits);
592 else
593 printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
594 inflate_threshold, tn->bits);
595 }
596
19baf839
RO
597 check_tnode(tn);
598
599 /*
600 * Halve as long as the number of empty children in this
601 * node is above threshold.
602 */
2f36895a 603
e6308be8
RO
604
605 /* Keep root node larger */
606
132adf54 607 if (!tn->parent)
e6308be8 608 halve_threshold_use = halve_threshold_root;
e905a9ed 609 else
e6308be8
RO
610 halve_threshold_use = halve_threshold;
611
2f36895a 612 err = 0;
05eee48c
RO
613 max_resize = 10;
614 while (tn->bits > 1 && max_resize-- &&
19baf839 615 100 * (tnode_child_length(tn) - tn->empty_children) <
e6308be8 616 halve_threshold_use * tnode_child_length(tn)) {
2f36895a 617
2f80b3c8
RO
618 old_tn = tn;
619 tn = halve(t, tn);
620 if (IS_ERR(tn)) {
621 tn = old_tn;
2f36895a
RO
622#ifdef CONFIG_IP_FIB_TRIE_STATS
623 t->stats.resize_node_skipped++;
624#endif
625 break;
626 }
627 }
19baf839 628
05eee48c
RO
629 if (max_resize < 0) {
630 if (!tn->parent)
631 printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
632 halve_threshold_root, tn->bits);
633 else
634 printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
635 halve_threshold, tn->bits);
636 }
c877efb2 637
19baf839 638 /* Only one child remains */
19baf839
RO
639 if (tn->empty_children == tnode_child_length(tn) - 1)
640 for (i = 0; i < tnode_child_length(tn); i++) {
91b9a277 641 struct node *n;
19baf839 642
91b9a277 643 n = tn->child[i];
2373ce1c 644 if (!n)
91b9a277 645 continue;
91b9a277
OJ
646
647 /* compress one level */
648
06801916 649 node_set_parent(n, NULL);
91b9a277
OJ
650 tnode_free(tn);
651 return n;
19baf839
RO
652 }
653
654 return (struct node *) tn;
655}
656
2f80b3c8 657static struct tnode *inflate(struct trie *t, struct tnode *tn)
19baf839
RO
658{
659 struct tnode *inode;
660 struct tnode *oldtnode = tn;
661 int olen = tnode_child_length(tn);
662 int i;
663
0c7770c7 664 pr_debug("In inflate\n");
19baf839
RO
665
666 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
667
0c7770c7 668 if (!tn)
2f80b3c8 669 return ERR_PTR(-ENOMEM);
2f36895a
RO
670
671 /*
c877efb2
SH
672 * Preallocate and store tnodes before the actual work so we
673 * don't get into an inconsistent state if memory allocation
674 * fails. In case of failure we return the oldnode and inflate
2f36895a
RO
675 * of tnode is ignored.
676 */
91b9a277
OJ
677
678 for (i = 0; i < olen; i++) {
2f36895a
RO
679 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
680
681 if (inode &&
682 IS_TNODE(inode) &&
683 inode->pos == oldtnode->pos + oldtnode->bits &&
684 inode->bits > 1) {
685 struct tnode *left, *right;
ab66b4a7 686 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
c877efb2 687
2f36895a
RO
688 left = tnode_new(inode->key&(~m), inode->pos + 1,
689 inode->bits - 1);
2f80b3c8
RO
690 if (!left)
691 goto nomem;
91b9a277 692
2f36895a
RO
693 right = tnode_new(inode->key|m, inode->pos + 1,
694 inode->bits - 1);
695
e905a9ed 696 if (!right) {
2f80b3c8
RO
697 tnode_free(left);
698 goto nomem;
e905a9ed 699 }
2f36895a
RO
700
701 put_child(t, tn, 2*i, (struct node *) left);
702 put_child(t, tn, 2*i+1, (struct node *) right);
703 }
704 }
705
91b9a277 706 for (i = 0; i < olen; i++) {
19baf839 707 struct node *node = tnode_get_child(oldtnode, i);
91b9a277
OJ
708 struct tnode *left, *right;
709 int size, j;
c877efb2 710
19baf839
RO
711 /* An empty child */
712 if (node == NULL)
713 continue;
714
715 /* A leaf or an internal node with skipped bits */
716
c877efb2 717 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
19baf839 718 tn->pos + tn->bits - 1) {
c877efb2 719 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
19baf839
RO
720 1) == 0)
721 put_child(t, tn, 2*i, node);
722 else
723 put_child(t, tn, 2*i+1, node);
724 continue;
725 }
726
727 /* An internal node with two children */
728 inode = (struct tnode *) node;
729
730 if (inode->bits == 1) {
731 put_child(t, tn, 2*i, inode->child[0]);
732 put_child(t, tn, 2*i+1, inode->child[1]);
733
734 tnode_free(inode);
91b9a277 735 continue;
19baf839
RO
736 }
737
91b9a277
OJ
738 /* An internal node with more than two children */
739
740 /* We will replace this node 'inode' with two new
741 * ones, 'left' and 'right', each with half of the
742 * original children. The two new nodes will have
743 * a position one bit further down the key and this
744 * means that the "significant" part of their keys
745 * (see the discussion near the top of this file)
746 * will differ by one bit, which will be "0" in
747 * left's key and "1" in right's key. Since we are
748 * moving the key position by one step, the bit that
749 * we are moving away from - the bit at position
750 * (inode->pos) - is the one that will differ between
751 * left and right. So... we synthesize that bit in the
752 * two new keys.
753 * The mask 'm' below will be a single "one" bit at
754 * the position (inode->pos)
755 */
19baf839 756
91b9a277
OJ
757 /* Use the old key, but set the new significant
758 * bit to zero.
759 */
2f36895a 760
91b9a277
OJ
761 left = (struct tnode *) tnode_get_child(tn, 2*i);
762 put_child(t, tn, 2*i, NULL);
2f36895a 763
91b9a277 764 BUG_ON(!left);
2f36895a 765
91b9a277
OJ
766 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
767 put_child(t, tn, 2*i+1, NULL);
19baf839 768
91b9a277 769 BUG_ON(!right);
19baf839 770
91b9a277
OJ
771 size = tnode_child_length(left);
772 for (j = 0; j < size; j++) {
773 put_child(t, left, j, inode->child[j]);
774 put_child(t, right, j, inode->child[j + size]);
19baf839 775 }
91b9a277
OJ
776 put_child(t, tn, 2*i, resize(t, left));
777 put_child(t, tn, 2*i+1, resize(t, right));
778
779 tnode_free(inode);
19baf839
RO
780 }
781 tnode_free(oldtnode);
782 return tn;
2f80b3c8
RO
783nomem:
784 {
785 int size = tnode_child_length(tn);
786 int j;
787
0c7770c7 788 for (j = 0; j < size; j++)
2f80b3c8
RO
789 if (tn->child[j])
790 tnode_free((struct tnode *)tn->child[j]);
791
792 tnode_free(tn);
0c7770c7 793
2f80b3c8
RO
794 return ERR_PTR(-ENOMEM);
795 }
19baf839
RO
796}
797
2f80b3c8 798static struct tnode *halve(struct trie *t, struct tnode *tn)
19baf839
RO
799{
800 struct tnode *oldtnode = tn;
801 struct node *left, *right;
802 int i;
803 int olen = tnode_child_length(tn);
804
0c7770c7 805 pr_debug("In halve\n");
c877efb2
SH
806
807 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
19baf839 808
2f80b3c8
RO
809 if (!tn)
810 return ERR_PTR(-ENOMEM);
2f36895a
RO
811
812 /*
c877efb2
SH
813 * Preallocate and store tnodes before the actual work so we
814 * don't get into an inconsistent state if memory allocation
815 * fails. In case of failure we return the oldnode and halve
2f36895a
RO
816 * of tnode is ignored.
817 */
818
91b9a277 819 for (i = 0; i < olen; i += 2) {
2f36895a
RO
820 left = tnode_get_child(oldtnode, i);
821 right = tnode_get_child(oldtnode, i+1);
c877efb2 822
2f36895a 823 /* Two nonempty children */
0c7770c7 824 if (left && right) {
2f80b3c8 825 struct tnode *newn;
0c7770c7 826
2f80b3c8 827 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
0c7770c7
SH
828
829 if (!newn)
2f80b3c8 830 goto nomem;
0c7770c7 831
2f80b3c8 832 put_child(t, tn, i/2, (struct node *)newn);
2f36895a 833 }
2f36895a 834
2f36895a 835 }
19baf839 836
91b9a277
OJ
837 for (i = 0; i < olen; i += 2) {
838 struct tnode *newBinNode;
839
19baf839
RO
840 left = tnode_get_child(oldtnode, i);
841 right = tnode_get_child(oldtnode, i+1);
c877efb2 842
19baf839
RO
843 /* At least one of the children is empty */
844 if (left == NULL) {
845 if (right == NULL) /* Both are empty */
846 continue;
847 put_child(t, tn, i/2, right);
91b9a277 848 continue;
0c7770c7 849 }
91b9a277
OJ
850
851 if (right == NULL) {
19baf839 852 put_child(t, tn, i/2, left);
91b9a277
OJ
853 continue;
854 }
c877efb2 855
19baf839 856 /* Two nonempty children */
91b9a277
OJ
857 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
858 put_child(t, tn, i/2, NULL);
91b9a277
OJ
859 put_child(t, newBinNode, 0, left);
860 put_child(t, newBinNode, 1, right);
861 put_child(t, tn, i/2, resize(t, newBinNode));
19baf839
RO
862 }
863 tnode_free(oldtnode);
864 return tn;
2f80b3c8
RO
865nomem:
866 {
867 int size = tnode_child_length(tn);
868 int j;
869
0c7770c7 870 for (j = 0; j < size; j++)
2f80b3c8
RO
871 if (tn->child[j])
872 tnode_free((struct tnode *)tn->child[j]);
873
874 tnode_free(tn);
0c7770c7 875
2f80b3c8
RO
876 return ERR_PTR(-ENOMEM);
877 }
19baf839
RO
878}
879
91b9a277 880static void trie_init(struct trie *t)
19baf839 881{
91b9a277
OJ
882 if (!t)
883 return;
884
885 t->size = 0;
2373ce1c 886 rcu_assign_pointer(t->trie, NULL);
91b9a277 887 t->revision = 0;
19baf839 888#ifdef CONFIG_IP_FIB_TRIE_STATS
91b9a277 889 memset(&t->stats, 0, sizeof(struct trie_use_stats));
19baf839 890#endif
19baf839
RO
891}
892
772cb712 893/* readside must use rcu_read_lock currently dump routines
2373ce1c
RO
894 via get_fa_head and dump */
895
772cb712 896static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
19baf839 897{
772cb712 898 struct hlist_head *head = &l->list;
19baf839
RO
899 struct hlist_node *node;
900 struct leaf_info *li;
901
2373ce1c 902 hlist_for_each_entry_rcu(li, node, head, hlist)
c877efb2 903 if (li->plen == plen)
19baf839 904 return li;
91b9a277 905
19baf839
RO
906 return NULL;
907}
908
909static inline struct list_head * get_fa_head(struct leaf *l, int plen)
910{
772cb712 911 struct leaf_info *li = find_leaf_info(l, plen);
c877efb2 912
91b9a277
OJ
913 if (!li)
914 return NULL;
c877efb2 915
91b9a277 916 return &li->falh;
19baf839
RO
917}
918
919static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
920{
e905a9ed
YH
921 struct leaf_info *li = NULL, *last = NULL;
922 struct hlist_node *node;
923
924 if (hlist_empty(head)) {
925 hlist_add_head_rcu(&new->hlist, head);
926 } else {
927 hlist_for_each_entry(li, node, head, hlist) {
928 if (new->plen > li->plen)
929 break;
930
931 last = li;
932 }
933 if (last)
934 hlist_add_after_rcu(&last->hlist, &new->hlist);
935 else
936 hlist_add_before_rcu(&new->hlist, &li->hlist);
937 }
19baf839
RO
938}
939
2373ce1c
RO
940/* rcu_read_lock needs to be hold by caller from readside */
941
19baf839
RO
942static struct leaf *
943fib_find_node(struct trie *t, u32 key)
944{
945 int pos;
946 struct tnode *tn;
947 struct node *n;
948
949 pos = 0;
2373ce1c 950 n = rcu_dereference(t->trie);
19baf839
RO
951
952 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
953 tn = (struct tnode *) n;
91b9a277 954
19baf839 955 check_tnode(tn);
91b9a277 956
c877efb2 957 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
91b9a277 958 pos = tn->pos + tn->bits;
19baf839 959 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
91b9a277 960 } else
19baf839
RO
961 break;
962 }
963 /* Case we have found a leaf. Compare prefixes */
964
91b9a277
OJ
965 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
966 return (struct leaf *)n;
967
19baf839
RO
968 return NULL;
969}
970
971static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
972{
19baf839 973 int wasfull;
06801916
SH
974 t_key cindex, key = tn->key;
975 struct tnode *tp;
19baf839 976
06801916 977 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
19baf839
RO
978 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
979 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
980 tn = (struct tnode *) resize (t, (struct tnode *)tn);
981 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
91b9a277 982
06801916
SH
983 tp = node_parent((struct node *) tn);
984 if (!tp)
19baf839 985 break;
06801916 986 tn = tp;
19baf839 987 }
06801916 988
19baf839 989 /* Handle last (top) tnode */
c877efb2 990 if (IS_TNODE(tn))
19baf839
RO
991 tn = (struct tnode*) resize(t, (struct tnode *)tn);
992
993 return (struct node*) tn;
994}
995
2373ce1c
RO
996/* only used from updater-side */
997
f835e471
RO
998static struct list_head *
999fib_insert_node(struct trie *t, int *err, u32 key, int plen)
19baf839
RO
1000{
1001 int pos, newpos;
1002 struct tnode *tp = NULL, *tn = NULL;
1003 struct node *n;
1004 struct leaf *l;
1005 int missbit;
c877efb2 1006 struct list_head *fa_head = NULL;
19baf839
RO
1007 struct leaf_info *li;
1008 t_key cindex;
1009
1010 pos = 0;
c877efb2 1011 n = t->trie;
19baf839 1012
c877efb2
SH
1013 /* If we point to NULL, stop. Either the tree is empty and we should
1014 * just put a new leaf in if, or we have reached an empty child slot,
19baf839 1015 * and we should just put our new leaf in that.
c877efb2
SH
1016 * If we point to a T_TNODE, check if it matches our key. Note that
1017 * a T_TNODE might be skipping any number of bits - its 'pos' need
19baf839
RO
1018 * not be the parent's 'pos'+'bits'!
1019 *
c877efb2 1020 * If it does match the current key, get pos/bits from it, extract
19baf839
RO
1021 * the index from our key, push the T_TNODE and walk the tree.
1022 *
1023 * If it doesn't, we have to replace it with a new T_TNODE.
1024 *
c877efb2
SH
1025 * If we point to a T_LEAF, it might or might not have the same key
1026 * as we do. If it does, just change the value, update the T_LEAF's
1027 * value, and return it.
19baf839
RO
1028 * If it doesn't, we need to replace it with a T_TNODE.
1029 */
1030
1031 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1032 tn = (struct tnode *) n;
91b9a277 1033
c877efb2 1034 check_tnode(tn);
91b9a277 1035
c877efb2 1036 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
19baf839 1037 tp = tn;
91b9a277 1038 pos = tn->pos + tn->bits;
19baf839
RO
1039 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1040
06801916 1041 BUG_ON(n && node_parent(n) != tn);
91b9a277 1042 } else
19baf839
RO
1043 break;
1044 }
1045
1046 /*
1047 * n ----> NULL, LEAF or TNODE
1048 *
c877efb2 1049 * tp is n's (parent) ----> NULL or TNODE
19baf839
RO
1050 */
1051
91b9a277 1052 BUG_ON(tp && IS_LEAF(tp));
19baf839
RO
1053
1054 /* Case 1: n is a leaf. Compare prefixes */
1055
c877efb2 1056 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
91b9a277
OJ
1057 struct leaf *l = (struct leaf *) n;
1058
19baf839 1059 li = leaf_info_new(plen);
91b9a277 1060
c877efb2 1061 if (!li) {
f835e471
RO
1062 *err = -ENOMEM;
1063 goto err;
1064 }
19baf839
RO
1065
1066 fa_head = &li->falh;
1067 insert_leaf_info(&l->list, li);
1068 goto done;
1069 }
1070 t->size++;
1071 l = leaf_new();
1072
c877efb2 1073 if (!l) {
f835e471
RO
1074 *err = -ENOMEM;
1075 goto err;
1076 }
19baf839
RO
1077
1078 l->key = key;
1079 li = leaf_info_new(plen);
1080
c877efb2 1081 if (!li) {
f835e471
RO
1082 tnode_free((struct tnode *) l);
1083 *err = -ENOMEM;
1084 goto err;
1085 }
19baf839
RO
1086
1087 fa_head = &li->falh;
1088 insert_leaf_info(&l->list, li);
1089
19baf839 1090 if (t->trie && n == NULL) {
91b9a277 1091 /* Case 2: n is NULL, and will just insert a new leaf */
19baf839 1092
06801916 1093 node_set_parent((struct node *)l, tp);
19baf839 1094
91b9a277
OJ
1095 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1096 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1097 } else {
1098 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
c877efb2
SH
1099 /*
1100 * Add a new tnode here
19baf839
RO
1101 * first tnode need some special handling
1102 */
1103
1104 if (tp)
91b9a277 1105 pos = tp->pos+tp->bits;
19baf839 1106 else
91b9a277
OJ
1107 pos = 0;
1108
c877efb2 1109 if (n) {
19baf839
RO
1110 newpos = tkey_mismatch(key, pos, n->key);
1111 tn = tnode_new(n->key, newpos, 1);
91b9a277 1112 } else {
19baf839 1113 newpos = 0;
c877efb2 1114 tn = tnode_new(key, newpos, 1); /* First tnode */
19baf839 1115 }
19baf839 1116
c877efb2 1117 if (!tn) {
f835e471
RO
1118 free_leaf_info(li);
1119 tnode_free((struct tnode *) l);
1120 *err = -ENOMEM;
1121 goto err;
91b9a277
OJ
1122 }
1123
06801916 1124 node_set_parent((struct node *)tn, tp);
19baf839 1125
91b9a277 1126 missbit = tkey_extract_bits(key, newpos, 1);
19baf839
RO
1127 put_child(t, tn, missbit, (struct node *)l);
1128 put_child(t, tn, 1-missbit, n);
1129
c877efb2 1130 if (tp) {
19baf839
RO
1131 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1132 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
91b9a277 1133 } else {
2373ce1c 1134 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
19baf839
RO
1135 tp = tn;
1136 }
1137 }
91b9a277
OJ
1138
1139 if (tp && tp->pos + tp->bits > 32)
78c6671a 1140 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
19baf839 1141 tp, tp->pos, tp->bits, key, plen);
91b9a277 1142
19baf839 1143 /* Rebalance the trie */
2373ce1c
RO
1144
1145 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
f835e471
RO
1146done:
1147 t->revision++;
91b9a277 1148err:
19baf839
RO
1149 return fa_head;
1150}
1151
d562f1f8
RO
1152/*
1153 * Caller must hold RTNL.
1154 */
4e902c57 1155static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
19baf839
RO
1156{
1157 struct trie *t = (struct trie *) tb->tb_data;
1158 struct fib_alias *fa, *new_fa;
c877efb2 1159 struct list_head *fa_head = NULL;
19baf839 1160 struct fib_info *fi;
4e902c57
TG
1161 int plen = cfg->fc_dst_len;
1162 u8 tos = cfg->fc_tos;
19baf839
RO
1163 u32 key, mask;
1164 int err;
1165 struct leaf *l;
1166
1167 if (plen > 32)
1168 return -EINVAL;
1169
4e902c57 1170 key = ntohl(cfg->fc_dst);
19baf839 1171
2dfe55b4 1172 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
19baf839 1173
91b9a277 1174 mask = ntohl(inet_make_mask(plen));
19baf839 1175
c877efb2 1176 if (key & ~mask)
19baf839
RO
1177 return -EINVAL;
1178
1179 key = key & mask;
1180
4e902c57
TG
1181 fi = fib_create_info(cfg);
1182 if (IS_ERR(fi)) {
1183 err = PTR_ERR(fi);
19baf839 1184 goto err;
4e902c57 1185 }
19baf839
RO
1186
1187 l = fib_find_node(t, key);
c877efb2 1188 fa = NULL;
19baf839 1189
c877efb2 1190 if (l) {
19baf839
RO
1191 fa_head = get_fa_head(l, plen);
1192 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1193 }
1194
1195 /* Now fa, if non-NULL, points to the first fib alias
1196 * with the same keys [prefix,tos,priority], if such key already
1197 * exists or to the node before which we will insert new one.
1198 *
1199 * If fa is NULL, we will need to allocate a new one and
1200 * insert to the head of f.
1201 *
1202 * If f is NULL, no fib node matched the destination key
1203 * and we need to allocate a new one of those as well.
1204 */
1205
91b9a277 1206 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
19baf839
RO
1207 struct fib_alias *fa_orig;
1208
1209 err = -EEXIST;
4e902c57 1210 if (cfg->fc_nlflags & NLM_F_EXCL)
19baf839
RO
1211 goto out;
1212
4e902c57 1213 if (cfg->fc_nlflags & NLM_F_REPLACE) {
19baf839
RO
1214 struct fib_info *fi_drop;
1215 u8 state;
1216
2373ce1c 1217 err = -ENOBUFS;
e94b1766 1218 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
2373ce1c
RO
1219 if (new_fa == NULL)
1220 goto out;
19baf839
RO
1221
1222 fi_drop = fa->fa_info;
2373ce1c
RO
1223 new_fa->fa_tos = fa->fa_tos;
1224 new_fa->fa_info = fi;
4e902c57
TG
1225 new_fa->fa_type = cfg->fc_type;
1226 new_fa->fa_scope = cfg->fc_scope;
19baf839 1227 state = fa->fa_state;
2373ce1c 1228 new_fa->fa_state &= ~FA_S_ACCESSED;
19baf839 1229
2373ce1c
RO
1230 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1231 alias_free_mem_rcu(fa);
19baf839
RO
1232
1233 fib_release_info(fi_drop);
1234 if (state & FA_S_ACCESSED)
91b9a277 1235 rt_cache_flush(-1);
b8f55831
MK
1236 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1237 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
19baf839 1238
91b9a277 1239 goto succeeded;
19baf839
RO
1240 }
1241 /* Error if we find a perfect match which
1242 * uses the same scope, type, and nexthop
1243 * information.
1244 */
1245 fa_orig = fa;
1246 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1247 if (fa->fa_tos != tos)
1248 break;
1249 if (fa->fa_info->fib_priority != fi->fib_priority)
1250 break;
4e902c57
TG
1251 if (fa->fa_type == cfg->fc_type &&
1252 fa->fa_scope == cfg->fc_scope &&
19baf839
RO
1253 fa->fa_info == fi) {
1254 goto out;
1255 }
1256 }
4e902c57 1257 if (!(cfg->fc_nlflags & NLM_F_APPEND))
19baf839
RO
1258 fa = fa_orig;
1259 }
1260 err = -ENOENT;
4e902c57 1261 if (!(cfg->fc_nlflags & NLM_F_CREATE))
19baf839
RO
1262 goto out;
1263
1264 err = -ENOBUFS;
e94b1766 1265 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
19baf839
RO
1266 if (new_fa == NULL)
1267 goto out;
1268
1269 new_fa->fa_info = fi;
1270 new_fa->fa_tos = tos;
4e902c57
TG
1271 new_fa->fa_type = cfg->fc_type;
1272 new_fa->fa_scope = cfg->fc_scope;
19baf839 1273 new_fa->fa_state = 0;
19baf839
RO
1274 /*
1275 * Insert new entry to the list.
1276 */
1277
c877efb2 1278 if (!fa_head) {
f835e471 1279 err = 0;
b47b2ec1 1280 fa_head = fib_insert_node(t, &err, key, plen);
c877efb2 1281 if (err)
f835e471
RO
1282 goto out_free_new_fa;
1283 }
19baf839 1284
2373ce1c
RO
1285 list_add_tail_rcu(&new_fa->fa_list,
1286 (fa ? &fa->fa_list : fa_head));
19baf839
RO
1287
1288 rt_cache_flush(-1);
4e902c57 1289 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
b8f55831 1290 &cfg->fc_nlinfo, 0);
19baf839
RO
1291succeeded:
1292 return 0;
f835e471
RO
1293
1294out_free_new_fa:
1295 kmem_cache_free(fn_alias_kmem, new_fa);
19baf839
RO
1296out:
1297 fib_release_info(fi);
91b9a277 1298err:
19baf839
RO
1299 return err;
1300}
1301
2373ce1c 1302
772cb712 1303/* should be called with rcu_read_lock */
0c7770c7
SH
1304static inline int check_leaf(struct trie *t, struct leaf *l,
1305 t_key key, int *plen, const struct flowi *flp,
06c74270 1306 struct fib_result *res)
19baf839 1307{
06c74270 1308 int err, i;
888454c5 1309 __be32 mask;
19baf839
RO
1310 struct leaf_info *li;
1311 struct hlist_head *hhead = &l->list;
1312 struct hlist_node *node;
c877efb2 1313
2373ce1c 1314 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
19baf839 1315 i = li->plen;
888454c5
AV
1316 mask = inet_make_mask(i);
1317 if (l->key != (key & ntohl(mask)))
19baf839
RO
1318 continue;
1319
888454c5 1320 if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
19baf839
RO
1321 *plen = i;
1322#ifdef CONFIG_IP_FIB_TRIE_STATS
1323 t->stats.semantic_match_passed++;
1324#endif
06c74270 1325 return err;
19baf839
RO
1326 }
1327#ifdef CONFIG_IP_FIB_TRIE_STATS
1328 t->stats.semantic_match_miss++;
1329#endif
1330 }
06c74270 1331 return 1;
19baf839
RO
1332}
1333
1334static int
1335fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1336{
1337 struct trie *t = (struct trie *) tb->tb_data;
1338 int plen, ret = 0;
1339 struct node *n;
1340 struct tnode *pn;
1341 int pos, bits;
91b9a277 1342 t_key key = ntohl(flp->fl4_dst);
19baf839
RO
1343 int chopped_off;
1344 t_key cindex = 0;
1345 int current_prefix_length = KEYLENGTH;
91b9a277
OJ
1346 struct tnode *cn;
1347 t_key node_prefix, key_prefix, pref_mismatch;
1348 int mp;
1349
2373ce1c 1350 rcu_read_lock();
91b9a277 1351
2373ce1c 1352 n = rcu_dereference(t->trie);
c877efb2 1353 if (!n)
19baf839
RO
1354 goto failed;
1355
1356#ifdef CONFIG_IP_FIB_TRIE_STATS
1357 t->stats.gets++;
1358#endif
1359
1360 /* Just a leaf? */
1361 if (IS_LEAF(n)) {
06c74270 1362 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
19baf839
RO
1363 goto found;
1364 goto failed;
1365 }
1366 pn = (struct tnode *) n;
1367 chopped_off = 0;
c877efb2 1368
91b9a277 1369 while (pn) {
19baf839
RO
1370 pos = pn->pos;
1371 bits = pn->bits;
1372
c877efb2 1373 if (!chopped_off)
ab66b4a7
SH
1374 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1375 pos, bits);
19baf839
RO
1376
1377 n = tnode_get_child(pn, cindex);
1378
1379 if (n == NULL) {
1380#ifdef CONFIG_IP_FIB_TRIE_STATS
1381 t->stats.null_node_hit++;
1382#endif
1383 goto backtrace;
1384 }
1385
91b9a277
OJ
1386 if (IS_LEAF(n)) {
1387 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
1388 goto found;
1389 else
1390 goto backtrace;
1391 }
1392
19baf839
RO
1393#define HL_OPTIMIZE
1394#ifdef HL_OPTIMIZE
91b9a277 1395 cn = (struct tnode *)n;
19baf839 1396
91b9a277
OJ
1397 /*
1398 * It's a tnode, and we can do some extra checks here if we
1399 * like, to avoid descending into a dead-end branch.
1400 * This tnode is in the parent's child array at index
1401 * key[p_pos..p_pos+p_bits] but potentially with some bits
1402 * chopped off, so in reality the index may be just a
1403 * subprefix, padded with zero at the end.
1404 * We can also take a look at any skipped bits in this
1405 * tnode - everything up to p_pos is supposed to be ok,
1406 * and the non-chopped bits of the index (se previous
1407 * paragraph) are also guaranteed ok, but the rest is
1408 * considered unknown.
1409 *
1410 * The skipped bits are key[pos+bits..cn->pos].
1411 */
19baf839 1412
91b9a277
OJ
1413 /* If current_prefix_length < pos+bits, we are already doing
1414 * actual prefix matching, which means everything from
1415 * pos+(bits-chopped_off) onward must be zero along some
1416 * branch of this subtree - otherwise there is *no* valid
1417 * prefix present. Here we can only check the skipped
1418 * bits. Remember, since we have already indexed into the
1419 * parent's child array, we know that the bits we chopped of
1420 * *are* zero.
1421 */
19baf839 1422
91b9a277 1423 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
19baf839 1424
91b9a277
OJ
1425 if (current_prefix_length < pos+bits) {
1426 if (tkey_extract_bits(cn->key, current_prefix_length,
1427 cn->pos - current_prefix_length) != 0 ||
1428 !(cn->child[0]))
1429 goto backtrace;
1430 }
19baf839 1431
91b9a277
OJ
1432 /*
1433 * If chopped_off=0, the index is fully validated and we
1434 * only need to look at the skipped bits for this, the new,
1435 * tnode. What we actually want to do is to find out if
1436 * these skipped bits match our key perfectly, or if we will
1437 * have to count on finding a matching prefix further down,
1438 * because if we do, we would like to have some way of
1439 * verifying the existence of such a prefix at this point.
1440 */
19baf839 1441
91b9a277
OJ
1442 /* The only thing we can do at this point is to verify that
1443 * any such matching prefix can indeed be a prefix to our
1444 * key, and if the bits in the node we are inspecting that
1445 * do not match our key are not ZERO, this cannot be true.
1446 * Thus, find out where there is a mismatch (before cn->pos)
1447 * and verify that all the mismatching bits are zero in the
1448 * new tnode's key.
1449 */
19baf839 1450
91b9a277
OJ
1451 /* Note: We aren't very concerned about the piece of the key
1452 * that precede pn->pos+pn->bits, since these have already been
1453 * checked. The bits after cn->pos aren't checked since these are
1454 * by definition "unknown" at this point. Thus, what we want to
1455 * see is if we are about to enter the "prefix matching" state,
1456 * and in that case verify that the skipped bits that will prevail
1457 * throughout this subtree are zero, as they have to be if we are
1458 * to find a matching prefix.
1459 */
1460
ab66b4a7
SH
1461 node_prefix = mask_pfx(cn->key, cn->pos);
1462 key_prefix = mask_pfx(key, cn->pos);
91b9a277
OJ
1463 pref_mismatch = key_prefix^node_prefix;
1464 mp = 0;
1465
1466 /* In short: If skipped bits in this node do not match the search
1467 * key, enter the "prefix matching" state.directly.
1468 */
1469 if (pref_mismatch) {
1470 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1471 mp++;
1472 pref_mismatch = pref_mismatch <<1;
1473 }
1474 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1475
1476 if (key_prefix != 0)
1477 goto backtrace;
1478
1479 if (current_prefix_length >= cn->pos)
1480 current_prefix_length = mp;
c877efb2 1481 }
91b9a277
OJ
1482#endif
1483 pn = (struct tnode *)n; /* Descend */
1484 chopped_off = 0;
1485 continue;
1486
19baf839
RO
1487backtrace:
1488 chopped_off++;
1489
1490 /* As zero don't change the child key (cindex) */
91b9a277 1491 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
19baf839 1492 chopped_off++;
19baf839
RO
1493
1494 /* Decrease current_... with bits chopped off */
1495 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1496 current_prefix_length = pn->pos + pn->bits - chopped_off;
91b9a277 1497
19baf839 1498 /*
c877efb2 1499 * Either we do the actual chop off according or if we have
19baf839
RO
1500 * chopped off all bits in this tnode walk up to our parent.
1501 */
1502
91b9a277 1503 if (chopped_off <= pn->bits) {
19baf839 1504 cindex &= ~(1 << (chopped_off-1));
91b9a277 1505 } else {
06801916
SH
1506 struct tnode *parent = node_parent((struct node *) pn);
1507 if (!parent)
19baf839 1508 goto failed;
91b9a277 1509
19baf839 1510 /* Get Child's index */
06801916
SH
1511 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1512 pn = parent;
19baf839
RO
1513 chopped_off = 0;
1514
1515#ifdef CONFIG_IP_FIB_TRIE_STATS
1516 t->stats.backtrack++;
1517#endif
1518 goto backtrace;
c877efb2 1519 }
19baf839
RO
1520 }
1521failed:
c877efb2 1522 ret = 1;
19baf839 1523found:
2373ce1c 1524 rcu_read_unlock();
19baf839
RO
1525 return ret;
1526}
1527
2373ce1c 1528/* only called from updater side */
19baf839
RO
1529static int trie_leaf_remove(struct trie *t, t_key key)
1530{
1531 t_key cindex;
1532 struct tnode *tp = NULL;
1533 struct node *n = t->trie;
1534 struct leaf *l;
1535
0c7770c7 1536 pr_debug("entering trie_leaf_remove(%p)\n", n);
19baf839
RO
1537
1538 /* Note that in the case skipped bits, those bits are *not* checked!
c877efb2 1539 * When we finish this, we will have NULL or a T_LEAF, and the
19baf839
RO
1540 * T_LEAF may or may not match our key.
1541 */
1542
91b9a277 1543 while (n != NULL && IS_TNODE(n)) {
19baf839
RO
1544 struct tnode *tn = (struct tnode *) n;
1545 check_tnode(tn);
1546 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1547
06801916 1548 BUG_ON(n && node_parent(n) != tn);
91b9a277 1549 }
19baf839
RO
1550 l = (struct leaf *) n;
1551
c877efb2 1552 if (!n || !tkey_equals(l->key, key))
19baf839 1553 return 0;
c877efb2
SH
1554
1555 /*
1556 * Key found.
1557 * Remove the leaf and rebalance the tree
19baf839
RO
1558 */
1559
1560 t->revision++;
1561 t->size--;
1562
06801916 1563 tp = node_parent(n);
19baf839
RO
1564 tnode_free((struct tnode *) n);
1565
c877efb2 1566 if (tp) {
19baf839
RO
1567 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1568 put_child(t, (struct tnode *)tp, cindex, NULL);
2373ce1c 1569 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
91b9a277 1570 } else
2373ce1c 1571 rcu_assign_pointer(t->trie, NULL);
19baf839
RO
1572
1573 return 1;
1574}
1575
d562f1f8
RO
1576/*
1577 * Caller must hold RTNL.
1578 */
4e902c57 1579static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
19baf839
RO
1580{
1581 struct trie *t = (struct trie *) tb->tb_data;
1582 u32 key, mask;
4e902c57
TG
1583 int plen = cfg->fc_dst_len;
1584 u8 tos = cfg->fc_tos;
19baf839
RO
1585 struct fib_alias *fa, *fa_to_delete;
1586 struct list_head *fa_head;
1587 struct leaf *l;
91b9a277
OJ
1588 struct leaf_info *li;
1589
c877efb2 1590 if (plen > 32)
19baf839
RO
1591 return -EINVAL;
1592
4e902c57 1593 key = ntohl(cfg->fc_dst);
91b9a277 1594 mask = ntohl(inet_make_mask(plen));
19baf839 1595
c877efb2 1596 if (key & ~mask)
19baf839
RO
1597 return -EINVAL;
1598
1599 key = key & mask;
1600 l = fib_find_node(t, key);
1601
c877efb2 1602 if (!l)
19baf839
RO
1603 return -ESRCH;
1604
1605 fa_head = get_fa_head(l, plen);
1606 fa = fib_find_alias(fa_head, tos, 0);
1607
1608 if (!fa)
1609 return -ESRCH;
1610
0c7770c7 1611 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
19baf839
RO
1612
1613 fa_to_delete = NULL;
1614 fa_head = fa->fa_list.prev;
2373ce1c 1615
19baf839
RO
1616 list_for_each_entry(fa, fa_head, fa_list) {
1617 struct fib_info *fi = fa->fa_info;
1618
1619 if (fa->fa_tos != tos)
1620 break;
1621
4e902c57
TG
1622 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1623 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1624 fa->fa_scope == cfg->fc_scope) &&
1625 (!cfg->fc_protocol ||
1626 fi->fib_protocol == cfg->fc_protocol) &&
1627 fib_nh_match(cfg, fi) == 0) {
19baf839
RO
1628 fa_to_delete = fa;
1629 break;
1630 }
1631 }
1632
91b9a277
OJ
1633 if (!fa_to_delete)
1634 return -ESRCH;
19baf839 1635
91b9a277 1636 fa = fa_to_delete;
4e902c57 1637 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
b8f55831 1638 &cfg->fc_nlinfo, 0);
91b9a277
OJ
1639
1640 l = fib_find_node(t, key);
772cb712 1641 li = find_leaf_info(l, plen);
19baf839 1642
2373ce1c 1643 list_del_rcu(&fa->fa_list);
19baf839 1644
91b9a277 1645 if (list_empty(fa_head)) {
2373ce1c 1646 hlist_del_rcu(&li->hlist);
91b9a277 1647 free_leaf_info(li);
2373ce1c 1648 }
19baf839 1649
91b9a277
OJ
1650 if (hlist_empty(&l->list))
1651 trie_leaf_remove(t, key);
19baf839 1652
91b9a277
OJ
1653 if (fa->fa_state & FA_S_ACCESSED)
1654 rt_cache_flush(-1);
19baf839 1655
2373ce1c
RO
1656 fib_release_info(fa->fa_info);
1657 alias_free_mem_rcu(fa);
91b9a277 1658 return 0;
19baf839
RO
1659}
1660
1661static int trie_flush_list(struct trie *t, struct list_head *head)
1662{
1663 struct fib_alias *fa, *fa_node;
1664 int found = 0;
1665
1666 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1667 struct fib_info *fi = fa->fa_info;
19baf839 1668
2373ce1c
RO
1669 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1670 list_del_rcu(&fa->fa_list);
1671 fib_release_info(fa->fa_info);
1672 alias_free_mem_rcu(fa);
19baf839
RO
1673 found++;
1674 }
1675 }
1676 return found;
1677}
1678
1679static int trie_flush_leaf(struct trie *t, struct leaf *l)
1680{
1681 int found = 0;
1682 struct hlist_head *lih = &l->list;
1683 struct hlist_node *node, *tmp;
1684 struct leaf_info *li = NULL;
1685
1686 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
19baf839
RO
1687 found += trie_flush_list(t, &li->falh);
1688
1689 if (list_empty(&li->falh)) {
2373ce1c 1690 hlist_del_rcu(&li->hlist);
19baf839
RO
1691 free_leaf_info(li);
1692 }
1693 }
1694 return found;
1695}
1696
2373ce1c
RO
1697/* rcu_read_lock needs to be hold by caller from readside */
1698
19baf839
RO
1699static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1700{
1701 struct node *c = (struct node *) thisleaf;
1702 struct tnode *p;
1703 int idx;
2373ce1c 1704 struct node *trie = rcu_dereference(t->trie);
19baf839 1705
c877efb2 1706 if (c == NULL) {
2373ce1c 1707 if (trie == NULL)
19baf839
RO
1708 return NULL;
1709
2373ce1c
RO
1710 if (IS_LEAF(trie)) /* trie w. just a leaf */
1711 return (struct leaf *) trie;
19baf839 1712
2373ce1c 1713 p = (struct tnode*) trie; /* Start */
91b9a277 1714 } else
06801916 1715 p = node_parent(c);
c877efb2 1716
19baf839
RO
1717 while (p) {
1718 int pos, last;
1719
1720 /* Find the next child of the parent */
c877efb2
SH
1721 if (c)
1722 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1723 else
19baf839
RO
1724 pos = 0;
1725
1726 last = 1 << p->bits;
91b9a277 1727 for (idx = pos; idx < last ; idx++) {
2373ce1c
RO
1728 c = rcu_dereference(p->child[idx]);
1729
1730 if (!c)
91b9a277
OJ
1731 continue;
1732
1733 /* Decend if tnode */
2373ce1c
RO
1734 while (IS_TNODE(c)) {
1735 p = (struct tnode *) c;
e905a9ed 1736 idx = 0;
91b9a277
OJ
1737
1738 /* Rightmost non-NULL branch */
1739 if (p && IS_TNODE(p))
2373ce1c
RO
1740 while (!(c = rcu_dereference(p->child[idx]))
1741 && idx < (1<<p->bits)) idx++;
91b9a277
OJ
1742
1743 /* Done with this tnode? */
2373ce1c 1744 if (idx >= (1 << p->bits) || !c)
91b9a277 1745 goto up;
19baf839 1746 }
2373ce1c 1747 return (struct leaf *) c;
19baf839
RO
1748 }
1749up:
1750 /* No more children go up one step */
91b9a277 1751 c = (struct node *) p;
06801916 1752 p = node_parent(c);
19baf839
RO
1753 }
1754 return NULL; /* Ready. Root of trie */
1755}
1756
d562f1f8
RO
1757/*
1758 * Caller must hold RTNL.
1759 */
19baf839
RO
1760static int fn_trie_flush(struct fib_table *tb)
1761{
1762 struct trie *t = (struct trie *) tb->tb_data;
1763 struct leaf *ll = NULL, *l = NULL;
1764 int found = 0, h;
1765
1766 t->revision++;
1767
91b9a277 1768 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
19baf839
RO
1769 found += trie_flush_leaf(t, l);
1770
1771 if (ll && hlist_empty(&ll->list))
1772 trie_leaf_remove(t, ll->key);
1773 ll = l;
1774 }
1775
1776 if (ll && hlist_empty(&ll->list))
1777 trie_leaf_remove(t, ll->key);
1778
0c7770c7 1779 pr_debug("trie_flush found=%d\n", found);
19baf839
RO
1780 return found;
1781}
1782
91b9a277 1783static int trie_last_dflt = -1;
19baf839
RO
1784
1785static void
1786fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1787{
1788 struct trie *t = (struct trie *) tb->tb_data;
1789 int order, last_idx;
1790 struct fib_info *fi = NULL;
1791 struct fib_info *last_resort;
1792 struct fib_alias *fa = NULL;
1793 struct list_head *fa_head;
1794 struct leaf *l;
1795
1796 last_idx = -1;
1797 last_resort = NULL;
1798 order = -1;
1799
2373ce1c 1800 rcu_read_lock();
c877efb2 1801
19baf839 1802 l = fib_find_node(t, 0);
c877efb2 1803 if (!l)
19baf839
RO
1804 goto out;
1805
1806 fa_head = get_fa_head(l, 0);
c877efb2 1807 if (!fa_head)
19baf839
RO
1808 goto out;
1809
c877efb2 1810 if (list_empty(fa_head))
19baf839
RO
1811 goto out;
1812
2373ce1c 1813 list_for_each_entry_rcu(fa, fa_head, fa_list) {
19baf839 1814 struct fib_info *next_fi = fa->fa_info;
91b9a277 1815
19baf839
RO
1816 if (fa->fa_scope != res->scope ||
1817 fa->fa_type != RTN_UNICAST)
1818 continue;
91b9a277 1819
19baf839
RO
1820 if (next_fi->fib_priority > res->fi->fib_priority)
1821 break;
1822 if (!next_fi->fib_nh[0].nh_gw ||
1823 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1824 continue;
1825 fa->fa_state |= FA_S_ACCESSED;
91b9a277 1826
19baf839
RO
1827 if (fi == NULL) {
1828 if (next_fi != res->fi)
1829 break;
1830 } else if (!fib_detect_death(fi, order, &last_resort,
1831 &last_idx, &trie_last_dflt)) {
1832 if (res->fi)
1833 fib_info_put(res->fi);
1834 res->fi = fi;
1835 atomic_inc(&fi->fib_clntref);
1836 trie_last_dflt = order;
1837 goto out;
1838 }
1839 fi = next_fi;
1840 order++;
1841 }
1842 if (order <= 0 || fi == NULL) {
1843 trie_last_dflt = -1;
1844 goto out;
1845 }
1846
1847 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1848 if (res->fi)
1849 fib_info_put(res->fi);
1850 res->fi = fi;
1851 atomic_inc(&fi->fib_clntref);
1852 trie_last_dflt = order;
1853 goto out;
1854 }
1855 if (last_idx >= 0) {
1856 if (res->fi)
1857 fib_info_put(res->fi);
1858 res->fi = last_resort;
1859 if (last_resort)
1860 atomic_inc(&last_resort->fib_clntref);
1861 }
1862 trie_last_dflt = last_idx;
1863 out:;
2373ce1c 1864 rcu_read_unlock();
19baf839
RO
1865}
1866
c877efb2 1867static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
19baf839
RO
1868 struct sk_buff *skb, struct netlink_callback *cb)
1869{
1870 int i, s_i;
1871 struct fib_alias *fa;
1872
32ab5f80 1873 __be32 xkey = htonl(key);
19baf839 1874
1af5a8c4 1875 s_i = cb->args[4];
19baf839
RO
1876 i = 0;
1877
2373ce1c
RO
1878 /* rcu_read_lock is hold by caller */
1879
1880 list_for_each_entry_rcu(fa, fah, fa_list) {
19baf839
RO
1881 if (i < s_i) {
1882 i++;
1883 continue;
1884 }
78c6671a 1885 BUG_ON(!fa->fa_info);
19baf839
RO
1886
1887 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1888 cb->nlh->nlmsg_seq,
1889 RTM_NEWROUTE,
1890 tb->tb_id,
1891 fa->fa_type,
1892 fa->fa_scope,
be403ea1 1893 xkey,
19baf839
RO
1894 plen,
1895 fa->fa_tos,
90f66914 1896 fa->fa_info, 0) < 0) {
1af5a8c4 1897 cb->args[4] = i;
19baf839 1898 return -1;
91b9a277 1899 }
19baf839
RO
1900 i++;
1901 }
1af5a8c4 1902 cb->args[4] = i;
19baf839
RO
1903 return skb->len;
1904}
1905
c877efb2 1906static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
19baf839
RO
1907 struct netlink_callback *cb)
1908{
1909 int h, s_h;
1910 struct list_head *fa_head;
1911 struct leaf *l = NULL;
19baf839 1912
1af5a8c4 1913 s_h = cb->args[3];
19baf839 1914
91b9a277 1915 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
19baf839
RO
1916 if (h < s_h)
1917 continue;
1918 if (h > s_h)
1af5a8c4
PM
1919 memset(&cb->args[4], 0,
1920 sizeof(cb->args) - 4*sizeof(cb->args[0]));
19baf839
RO
1921
1922 fa_head = get_fa_head(l, plen);
91b9a277 1923
c877efb2 1924 if (!fa_head)
19baf839
RO
1925 continue;
1926
c877efb2 1927 if (list_empty(fa_head))
19baf839
RO
1928 continue;
1929
1930 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1af5a8c4 1931 cb->args[3] = h;
19baf839
RO
1932 return -1;
1933 }
1934 }
1af5a8c4 1935 cb->args[3] = h;
19baf839
RO
1936 return skb->len;
1937}
1938
1939static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1940{
1941 int m, s_m;
1942 struct trie *t = (struct trie *) tb->tb_data;
1943
1af5a8c4 1944 s_m = cb->args[2];
19baf839 1945
2373ce1c 1946 rcu_read_lock();
91b9a277 1947 for (m = 0; m <= 32; m++) {
19baf839
RO
1948 if (m < s_m)
1949 continue;
1950 if (m > s_m)
1af5a8c4
PM
1951 memset(&cb->args[3], 0,
1952 sizeof(cb->args) - 3*sizeof(cb->args[0]));
19baf839
RO
1953
1954 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1af5a8c4 1955 cb->args[2] = m;
19baf839
RO
1956 goto out;
1957 }
1958 }
2373ce1c 1959 rcu_read_unlock();
1af5a8c4 1960 cb->args[2] = m;
19baf839 1961 return skb->len;
91b9a277 1962out:
2373ce1c 1963 rcu_read_unlock();
19baf839
RO
1964 return -1;
1965}
1966
1967/* Fix more generic FIB names for init later */
1968
1969#ifdef CONFIG_IP_MULTIPLE_TABLES
2dfe55b4 1970struct fib_table * fib_hash_init(u32 id)
19baf839 1971#else
2dfe55b4 1972struct fib_table * __init fib_hash_init(u32 id)
19baf839
RO
1973#endif
1974{
1975 struct fib_table *tb;
1976 struct trie *t;
1977
1978 if (fn_alias_kmem == NULL)
1979 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1980 sizeof(struct fib_alias),
1981 0, SLAB_HWCACHE_ALIGN,
20c2df83 1982 NULL);
19baf839
RO
1983
1984 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1985 GFP_KERNEL);
1986 if (tb == NULL)
1987 return NULL;
1988
1989 tb->tb_id = id;
1990 tb->tb_lookup = fn_trie_lookup;
1991 tb->tb_insert = fn_trie_insert;
1992 tb->tb_delete = fn_trie_delete;
1993 tb->tb_flush = fn_trie_flush;
1994 tb->tb_select_default = fn_trie_select_default;
1995 tb->tb_dump = fn_trie_dump;
1996 memset(tb->tb_data, 0, sizeof(struct trie));
1997
1998 t = (struct trie *) tb->tb_data;
1999
2000 trie_init(t);
2001
c877efb2 2002 if (id == RT_TABLE_LOCAL)
91b9a277 2003 trie_local = t;
c877efb2 2004 else if (id == RT_TABLE_MAIN)
91b9a277 2005 trie_main = t;
19baf839
RO
2006
2007 if (id == RT_TABLE_LOCAL)
78c6671a 2008 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
19baf839
RO
2009
2010 return tb;
2011}
2012
cb7b593c
SH
2013#ifdef CONFIG_PROC_FS
2014/* Depth first Trie walk iterator */
2015struct fib_trie_iter {
2016 struct tnode *tnode;
2017 struct trie *trie;
2018 unsigned index;
2019 unsigned depth;
2020};
19baf839 2021
cb7b593c 2022static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
19baf839 2023{
cb7b593c
SH
2024 struct tnode *tn = iter->tnode;
2025 unsigned cindex = iter->index;
2026 struct tnode *p;
19baf839 2027
6640e697
EB
2028 /* A single entry routing table */
2029 if (!tn)
2030 return NULL;
2031
cb7b593c
SH
2032 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2033 iter->tnode, iter->index, iter->depth);
2034rescan:
2035 while (cindex < (1<<tn->bits)) {
2036 struct node *n = tnode_get_child(tn, cindex);
19baf839 2037
cb7b593c
SH
2038 if (n) {
2039 if (IS_LEAF(n)) {
2040 iter->tnode = tn;
2041 iter->index = cindex + 1;
2042 } else {
2043 /* push down one level */
2044 iter->tnode = (struct tnode *) n;
2045 iter->index = 0;
2046 ++iter->depth;
2047 }
2048 return n;
2049 }
19baf839 2050
cb7b593c
SH
2051 ++cindex;
2052 }
91b9a277 2053
cb7b593c 2054 /* Current node exhausted, pop back up */
06801916 2055 p = node_parent((struct node *)tn);
cb7b593c
SH
2056 if (p) {
2057 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2058 tn = p;
2059 --iter->depth;
2060 goto rescan;
19baf839 2061 }
cb7b593c
SH
2062
2063 /* got root? */
2064 return NULL;
19baf839
RO
2065}
2066
cb7b593c
SH
2067static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2068 struct trie *t)
19baf839 2069{
5ddf0eb2
RO
2070 struct node *n ;
2071
132adf54 2072 if (!t)
5ddf0eb2
RO
2073 return NULL;
2074
2075 n = rcu_dereference(t->trie);
2076
132adf54 2077 if (!iter)
5ddf0eb2 2078 return NULL;
19baf839 2079
6640e697
EB
2080 if (n) {
2081 if (IS_TNODE(n)) {
2082 iter->tnode = (struct tnode *) n;
2083 iter->trie = t;
2084 iter->index = 0;
2085 iter->depth = 1;
2086 } else {
2087 iter->tnode = NULL;
2088 iter->trie = t;
2089 iter->index = 0;
2090 iter->depth = 0;
2091 }
cb7b593c 2092 return n;
91b9a277 2093 }
cb7b593c
SH
2094 return NULL;
2095}
91b9a277 2096
cb7b593c
SH
2097static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2098{
2099 struct node *n;
2100 struct fib_trie_iter iter;
91b9a277 2101
cb7b593c 2102 memset(s, 0, sizeof(*s));
91b9a277 2103
cb7b593c
SH
2104 rcu_read_lock();
2105 for (n = fib_trie_get_first(&iter, t); n;
2106 n = fib_trie_get_next(&iter)) {
2107 if (IS_LEAF(n)) {
2108 s->leaves++;
2109 s->totdepth += iter.depth;
2110 if (iter.depth > s->maxdepth)
2111 s->maxdepth = iter.depth;
2112 } else {
2113 const struct tnode *tn = (const struct tnode *) n;
2114 int i;
2115
2116 s->tnodes++;
132adf54 2117 if (tn->bits < MAX_STAT_DEPTH)
06ef921d
RO
2118 s->nodesizes[tn->bits]++;
2119
cb7b593c
SH
2120 for (i = 0; i < (1<<tn->bits); i++)
2121 if (!tn->child[i])
2122 s->nullpointers++;
19baf839 2123 }
19baf839 2124 }
2373ce1c 2125 rcu_read_unlock();
19baf839
RO
2126}
2127
cb7b593c
SH
2128/*
2129 * This outputs /proc/net/fib_triestats
2130 */
2131static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
19baf839 2132{
cb7b593c 2133 unsigned i, max, pointers, bytes, avdepth;
c877efb2 2134
cb7b593c
SH
2135 if (stat->leaves)
2136 avdepth = stat->totdepth*100 / stat->leaves;
2137 else
2138 avdepth = 0;
91b9a277 2139
cb7b593c
SH
2140 seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2141 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
91b9a277 2142
cb7b593c 2143 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
91b9a277 2144
cb7b593c
SH
2145 bytes = sizeof(struct leaf) * stat->leaves;
2146 seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
2147 bytes += sizeof(struct tnode) * stat->tnodes;
19baf839 2148
06ef921d
RO
2149 max = MAX_STAT_DEPTH;
2150 while (max > 0 && stat->nodesizes[max-1] == 0)
cb7b593c 2151 max--;
19baf839 2152
cb7b593c
SH
2153 pointers = 0;
2154 for (i = 1; i <= max; i++)
2155 if (stat->nodesizes[i] != 0) {
2156 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2157 pointers += (1<<i) * stat->nodesizes[i];
2158 }
2159 seq_putc(seq, '\n');
2160 seq_printf(seq, "\tPointers: %d\n", pointers);
2373ce1c 2161
cb7b593c
SH
2162 bytes += sizeof(struct node *) * pointers;
2163 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2164 seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024);
2373ce1c 2165
cb7b593c
SH
2166#ifdef CONFIG_IP_FIB_TRIE_STATS
2167 seq_printf(seq, "Counters:\n---------\n");
2168 seq_printf(seq,"gets = %d\n", t->stats.gets);
2169 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2170 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2171 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2172 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2173 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2174#ifdef CLEAR_STATS
2175 memset(&(t->stats), 0, sizeof(t->stats));
2176#endif
2177#endif /* CONFIG_IP_FIB_TRIE_STATS */
2178}
19baf839 2179
cb7b593c
SH
2180static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2181{
2182 struct trie_stat *stat;
91b9a277 2183
cb7b593c
SH
2184 stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2185 if (!stat)
2186 return -ENOMEM;
91b9a277 2187
cb7b593c
SH
2188 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2189 sizeof(struct leaf), sizeof(struct tnode));
91b9a277 2190
cb7b593c
SH
2191 if (trie_local) {
2192 seq_printf(seq, "Local:\n");
2193 trie_collect_stats(trie_local, stat);
2194 trie_show_stats(seq, stat);
2195 }
91b9a277 2196
cb7b593c
SH
2197 if (trie_main) {
2198 seq_printf(seq, "Main:\n");
2199 trie_collect_stats(trie_main, stat);
2200 trie_show_stats(seq, stat);
19baf839 2201 }
cb7b593c 2202 kfree(stat);
19baf839 2203
cb7b593c 2204 return 0;
19baf839
RO
2205}
2206
cb7b593c 2207static int fib_triestat_seq_open(struct inode *inode, struct file *file)
19baf839 2208{
cb7b593c 2209 return single_open(file, fib_triestat_seq_show, NULL);
19baf839
RO
2210}
2211
9a32144e 2212static const struct file_operations fib_triestat_fops = {
cb7b593c
SH
2213 .owner = THIS_MODULE,
2214 .open = fib_triestat_seq_open,
2215 .read = seq_read,
2216 .llseek = seq_lseek,
2217 .release = single_release,
2218};
2219
2220static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2221 loff_t pos)
19baf839 2222{
cb7b593c
SH
2223 loff_t idx = 0;
2224 struct node *n;
2225
2226 for (n = fib_trie_get_first(iter, trie_local);
2227 n; ++idx, n = fib_trie_get_next(iter)) {
2228 if (pos == idx)
2229 return n;
2230 }
2231
2232 for (n = fib_trie_get_first(iter, trie_main);
2233 n; ++idx, n = fib_trie_get_next(iter)) {
2234 if (pos == idx)
2235 return n;
2236 }
19baf839
RO
2237 return NULL;
2238}
2239
cb7b593c 2240static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
19baf839 2241{
cb7b593c
SH
2242 rcu_read_lock();
2243 if (*pos == 0)
91b9a277 2244 return SEQ_START_TOKEN;
cb7b593c 2245 return fib_trie_get_idx(seq->private, *pos - 1);
19baf839
RO
2246}
2247
cb7b593c 2248static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
19baf839 2249{
cb7b593c
SH
2250 struct fib_trie_iter *iter = seq->private;
2251 void *l = v;
2252
19baf839 2253 ++*pos;
91b9a277 2254 if (v == SEQ_START_TOKEN)
cb7b593c 2255 return fib_trie_get_idx(iter, 0);
19baf839 2256
cb7b593c
SH
2257 v = fib_trie_get_next(iter);
2258 BUG_ON(v == l);
2259 if (v)
2260 return v;
19baf839 2261
cb7b593c
SH
2262 /* continue scan in next trie */
2263 if (iter->trie == trie_local)
2264 return fib_trie_get_first(iter, trie_main);
19baf839 2265
cb7b593c
SH
2266 return NULL;
2267}
19baf839 2268
cb7b593c 2269static void fib_trie_seq_stop(struct seq_file *seq, void *v)
19baf839 2270{
cb7b593c
SH
2271 rcu_read_unlock();
2272}
91b9a277 2273
cb7b593c
SH
2274static void seq_indent(struct seq_file *seq, int n)
2275{
2276 while (n-- > 0) seq_puts(seq, " ");
2277}
19baf839 2278
cb7b593c
SH
2279static inline const char *rtn_scope(enum rt_scope_t s)
2280{
2281 static char buf[32];
19baf839 2282
132adf54 2283 switch (s) {
cb7b593c
SH
2284 case RT_SCOPE_UNIVERSE: return "universe";
2285 case RT_SCOPE_SITE: return "site";
2286 case RT_SCOPE_LINK: return "link";
2287 case RT_SCOPE_HOST: return "host";
2288 case RT_SCOPE_NOWHERE: return "nowhere";
2289 default:
2290 snprintf(buf, sizeof(buf), "scope=%d", s);
2291 return buf;
2292 }
2293}
19baf839 2294
cb7b593c
SH
2295static const char *rtn_type_names[__RTN_MAX] = {
2296 [RTN_UNSPEC] = "UNSPEC",
2297 [RTN_UNICAST] = "UNICAST",
2298 [RTN_LOCAL] = "LOCAL",
2299 [RTN_BROADCAST] = "BROADCAST",
2300 [RTN_ANYCAST] = "ANYCAST",
2301 [RTN_MULTICAST] = "MULTICAST",
2302 [RTN_BLACKHOLE] = "BLACKHOLE",
2303 [RTN_UNREACHABLE] = "UNREACHABLE",
2304 [RTN_PROHIBIT] = "PROHIBIT",
2305 [RTN_THROW] = "THROW",
2306 [RTN_NAT] = "NAT",
2307 [RTN_XRESOLVE] = "XRESOLVE",
2308};
19baf839 2309
cb7b593c
SH
2310static inline const char *rtn_type(unsigned t)
2311{
2312 static char buf[32];
19baf839 2313
cb7b593c
SH
2314 if (t < __RTN_MAX && rtn_type_names[t])
2315 return rtn_type_names[t];
2316 snprintf(buf, sizeof(buf), "type %d", t);
2317 return buf;
19baf839
RO
2318}
2319
cb7b593c
SH
2320/* Pretty print the trie */
2321static int fib_trie_seq_show(struct seq_file *seq, void *v)
19baf839 2322{
cb7b593c
SH
2323 const struct fib_trie_iter *iter = seq->private;
2324 struct node *n = v;
c877efb2 2325
cb7b593c
SH
2326 if (v == SEQ_START_TOKEN)
2327 return 0;
19baf839 2328
06801916 2329 if (!node_parent(n)) {
095b8501
RO
2330 if (iter->trie == trie_local)
2331 seq_puts(seq, "<local>:\n");
2332 else
2333 seq_puts(seq, "<main>:\n");
2334 }
2335
cb7b593c
SH
2336 if (IS_TNODE(n)) {
2337 struct tnode *tn = (struct tnode *) n;
ab66b4a7 2338 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
91b9a277 2339
1d25cd6c
RO
2340 seq_indent(seq, iter->depth-1);
2341 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
e905a9ed 2342 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
1d25cd6c 2343 tn->empty_children);
e905a9ed 2344
cb7b593c
SH
2345 } else {
2346 struct leaf *l = (struct leaf *) n;
2347 int i;
32ab5f80 2348 __be32 val = htonl(l->key);
cb7b593c
SH
2349
2350 seq_indent(seq, iter->depth);
2351 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2352 for (i = 32; i >= 0; i--) {
772cb712 2353 struct leaf_info *li = find_leaf_info(l, i);
cb7b593c
SH
2354 if (li) {
2355 struct fib_alias *fa;
2356 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2357 seq_indent(seq, iter->depth+1);
2358 seq_printf(seq, " /%d %s %s", i,
2359 rtn_scope(fa->fa_scope),
2360 rtn_type(fa->fa_type));
2361 if (fa->fa_tos)
2362 seq_printf(seq, "tos =%d\n",
2363 fa->fa_tos);
2364 seq_putc(seq, '\n');
2365 }
2366 }
2367 }
19baf839 2368 }
cb7b593c 2369
19baf839
RO
2370 return 0;
2371}
2372
f690808e 2373static const struct seq_operations fib_trie_seq_ops = {
cb7b593c
SH
2374 .start = fib_trie_seq_start,
2375 .next = fib_trie_seq_next,
2376 .stop = fib_trie_seq_stop,
2377 .show = fib_trie_seq_show,
19baf839
RO
2378};
2379
cb7b593c 2380static int fib_trie_seq_open(struct inode *inode, struct file *file)
19baf839
RO
2381{
2382 struct seq_file *seq;
2383 int rc = -ENOMEM;
cb7b593c 2384 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
19baf839 2385
cb7b593c
SH
2386 if (!s)
2387 goto out;
2388
2389 rc = seq_open(file, &fib_trie_seq_ops);
19baf839
RO
2390 if (rc)
2391 goto out_kfree;
2392
cb7b593c
SH
2393 seq = file->private_data;
2394 seq->private = s;
2395 memset(s, 0, sizeof(*s));
19baf839
RO
2396out:
2397 return rc;
2398out_kfree:
cb7b593c 2399 kfree(s);
19baf839
RO
2400 goto out;
2401}
2402
9a32144e 2403static const struct file_operations fib_trie_fops = {
cb7b593c
SH
2404 .owner = THIS_MODULE,
2405 .open = fib_trie_seq_open,
2406 .read = seq_read,
2407 .llseek = seq_lseek,
c877efb2 2408 .release = seq_release_private,
19baf839
RO
2409};
2410
32ab5f80 2411static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
19baf839 2412{
cb7b593c
SH
2413 static unsigned type2flags[RTN_MAX + 1] = {
2414 [7] = RTF_REJECT, [8] = RTF_REJECT,
2415 };
2416 unsigned flags = type2flags[type];
19baf839 2417
cb7b593c
SH
2418 if (fi && fi->fib_nh->nh_gw)
2419 flags |= RTF_GATEWAY;
32ab5f80 2420 if (mask == htonl(0xFFFFFFFF))
cb7b593c
SH
2421 flags |= RTF_HOST;
2422 flags |= RTF_UP;
2423 return flags;
19baf839
RO
2424}
2425
cb7b593c
SH
2426/*
2427 * This outputs /proc/net/route.
2428 * The format of the file is not supposed to be changed
2429 * and needs to be same as fib_hash output to avoid breaking
2430 * legacy utilities
2431 */
2432static int fib_route_seq_show(struct seq_file *seq, void *v)
19baf839 2433{
c9e53cbe 2434 const struct fib_trie_iter *iter = seq->private;
cb7b593c
SH
2435 struct leaf *l = v;
2436 int i;
2437 char bf[128];
19baf839 2438
cb7b593c
SH
2439 if (v == SEQ_START_TOKEN) {
2440 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2441 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2442 "\tWindow\tIRTT");
2443 return 0;
2444 }
19baf839 2445
c9e53cbe
PM
2446 if (iter->trie == trie_local)
2447 return 0;
cb7b593c
SH
2448 if (IS_TNODE(l))
2449 return 0;
19baf839 2450
cb7b593c 2451 for (i=32; i>=0; i--) {
772cb712 2452 struct leaf_info *li = find_leaf_info(l, i);
cb7b593c 2453 struct fib_alias *fa;
32ab5f80 2454 __be32 mask, prefix;
91b9a277 2455
cb7b593c
SH
2456 if (!li)
2457 continue;
19baf839 2458
cb7b593c
SH
2459 mask = inet_make_mask(li->plen);
2460 prefix = htonl(l->key);
19baf839 2461
cb7b593c 2462 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1371e37d 2463 const struct fib_info *fi = fa->fa_info;
cb7b593c 2464 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
19baf839 2465
cb7b593c
SH
2466 if (fa->fa_type == RTN_BROADCAST
2467 || fa->fa_type == RTN_MULTICAST)
2468 continue;
19baf839 2469
cb7b593c
SH
2470 if (fi)
2471 snprintf(bf, sizeof(bf),
2472 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2473 fi->fib_dev ? fi->fib_dev->name : "*",
2474 prefix,
2475 fi->fib_nh->nh_gw, flags, 0, 0,
2476 fi->fib_priority,
2477 mask,
2478 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2479 fi->fib_window,
2480 fi->fib_rtt >> 3);
2481 else
2482 snprintf(bf, sizeof(bf),
2483 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2484 prefix, 0, flags, 0, 0, 0,
2485 mask, 0, 0, 0);
19baf839 2486
cb7b593c
SH
2487 seq_printf(seq, "%-127s\n", bf);
2488 }
19baf839
RO
2489 }
2490
2491 return 0;
2492}
2493
f690808e 2494static const struct seq_operations fib_route_seq_ops = {
cb7b593c
SH
2495 .start = fib_trie_seq_start,
2496 .next = fib_trie_seq_next,
2497 .stop = fib_trie_seq_stop,
2498 .show = fib_route_seq_show,
19baf839
RO
2499};
2500
cb7b593c 2501static int fib_route_seq_open(struct inode *inode, struct file *file)
19baf839
RO
2502{
2503 struct seq_file *seq;
2504 int rc = -ENOMEM;
cb7b593c 2505 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
19baf839 2506
cb7b593c
SH
2507 if (!s)
2508 goto out;
2509
2510 rc = seq_open(file, &fib_route_seq_ops);
19baf839
RO
2511 if (rc)
2512 goto out_kfree;
2513
cb7b593c
SH
2514 seq = file->private_data;
2515 seq->private = s;
2516 memset(s, 0, sizeof(*s));
19baf839
RO
2517out:
2518 return rc;
2519out_kfree:
cb7b593c 2520 kfree(s);
19baf839
RO
2521 goto out;
2522}
2523
9a32144e 2524static const struct file_operations fib_route_fops = {
cb7b593c
SH
2525 .owner = THIS_MODULE,
2526 .open = fib_route_seq_open,
2527 .read = seq_read,
2528 .llseek = seq_lseek,
2529 .release = seq_release_private,
19baf839
RO
2530};
2531
2532int __init fib_proc_init(void)
2533{
457c4cbc 2534 if (!proc_net_fops_create(&init_net, "fib_trie", S_IRUGO, &fib_trie_fops))
cb7b593c
SH
2535 goto out1;
2536
457c4cbc 2537 if (!proc_net_fops_create(&init_net, "fib_triestat", S_IRUGO, &fib_triestat_fops))
cb7b593c
SH
2538 goto out2;
2539
457c4cbc 2540 if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_route_fops))
cb7b593c
SH
2541 goto out3;
2542
19baf839 2543 return 0;
cb7b593c
SH
2544
2545out3:
457c4cbc 2546 proc_net_remove(&init_net, "fib_triestat");
cb7b593c 2547out2:
457c4cbc 2548 proc_net_remove(&init_net, "fib_trie");
cb7b593c
SH
2549out1:
2550 return -ENOMEM;
19baf839
RO
2551}
2552
2553void __init fib_proc_exit(void)
2554{
457c4cbc
EB
2555 proc_net_remove(&init_net, "fib_trie");
2556 proc_net_remove(&init_net, "fib_triestat");
2557 proc_net_remove(&init_net, "route");
19baf839
RO
2558}
2559
2560#endif /* CONFIG_PROC_FS */
This page took 0.451017 seconds and 5 git commands to generate.