]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_trie.c
[BRIDGE]: Fix faulty check in br_stp_recalculate_bridge_id()
[net-next-2.6.git] / net / ipv4 / fib_trie.c
CommitLineData
19baf839
RO
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
16 *
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
fd966255
RO
44 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
19baf839
RO
51 */
52
772cb712 53#define VERSION "0.404"
19baf839
RO
54
55#include <linux/config.h>
56#include <asm/uaccess.h>
57#include <asm/system.h>
58#include <asm/bitops.h>
59#include <linux/types.h>
60#include <linux/kernel.h>
61#include <linux/sched.h>
62#include <linux/mm.h>
63#include <linux/string.h>
64#include <linux/socket.h>
65#include <linux/sockios.h>
66#include <linux/errno.h>
67#include <linux/in.h>
68#include <linux/inet.h>
69#include <linux/netdevice.h>
70#include <linux/if_arp.h>
71#include <linux/proc_fs.h>
2373ce1c 72#include <linux/rcupdate.h>
19baf839
RO
73#include <linux/skbuff.h>
74#include <linux/netlink.h>
75#include <linux/init.h>
76#include <linux/list.h>
77#include <net/ip.h>
78#include <net/protocol.h>
79#include <net/route.h>
80#include <net/tcp.h>
81#include <net/sock.h>
82#include <net/ip_fib.h>
83#include "fib_lookup.h"
84
85#undef CONFIG_IP_FIB_TRIE_STATS
86#define MAX_CHILDS 16384
87
19baf839
RO
88#define KEYLENGTH (8*sizeof(t_key))
89#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
90#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
91
19baf839
RO
92typedef unsigned int t_key;
93
94#define T_TNODE 0
95#define T_LEAF 1
96#define NODE_TYPE_MASK 0x1UL
91b9a277 97#define NODE_PARENT(node) \
2373ce1c
RO
98 ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
99
100#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
101
102#define NODE_SET_PARENT(node, ptr) \
103 rcu_assign_pointer((node)->parent, \
104 ((unsigned long)(ptr)) | NODE_TYPE(node))
91b9a277
OJ
105
106#define IS_TNODE(n) (!(n->parent & T_LEAF))
107#define IS_LEAF(n) (n->parent & T_LEAF)
19baf839
RO
108
109struct node {
91b9a277
OJ
110 t_key key;
111 unsigned long parent;
19baf839
RO
112};
113
114struct leaf {
91b9a277
OJ
115 t_key key;
116 unsigned long parent;
19baf839 117 struct hlist_head list;
2373ce1c 118 struct rcu_head rcu;
19baf839
RO
119};
120
121struct leaf_info {
122 struct hlist_node hlist;
2373ce1c 123 struct rcu_head rcu;
19baf839
RO
124 int plen;
125 struct list_head falh;
126};
127
128struct tnode {
91b9a277
OJ
129 t_key key;
130 unsigned long parent;
131 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
132 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
133 unsigned short full_children; /* KEYLENGTH bits needed */
134 unsigned short empty_children; /* KEYLENGTH bits needed */
2373ce1c 135 struct rcu_head rcu;
91b9a277 136 struct node *child[0];
19baf839
RO
137};
138
139#ifdef CONFIG_IP_FIB_TRIE_STATS
140struct trie_use_stats {
141 unsigned int gets;
142 unsigned int backtrack;
143 unsigned int semantic_match_passed;
144 unsigned int semantic_match_miss;
145 unsigned int null_node_hit;
2f36895a 146 unsigned int resize_node_skipped;
19baf839
RO
147};
148#endif
149
150struct trie_stat {
151 unsigned int totdepth;
152 unsigned int maxdepth;
153 unsigned int tnodes;
154 unsigned int leaves;
155 unsigned int nullpointers;
156 unsigned int nodesizes[MAX_CHILDS];
c877efb2 157};
19baf839
RO
158
159struct trie {
91b9a277 160 struct node *trie;
19baf839
RO
161#ifdef CONFIG_IP_FIB_TRIE_STATS
162 struct trie_use_stats stats;
163#endif
91b9a277 164 int size;
19baf839
RO
165 unsigned int revision;
166};
167
19baf839
RO
168static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
169static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
19baf839 170static struct node *resize(struct trie *t, struct tnode *tn);
2f80b3c8
RO
171static struct tnode *inflate(struct trie *t, struct tnode *tn);
172static struct tnode *halve(struct trie *t, struct tnode *tn);
19baf839 173static void tnode_free(struct tnode *tn);
19baf839 174
ba89966c 175static kmem_cache_t *fn_alias_kmem __read_mostly;
19baf839
RO
176static struct trie *trie_local = NULL, *trie_main = NULL;
177
2373ce1c
RO
178
179/* rcu_read_lock needs to be hold by caller from readside */
180
c877efb2 181static inline struct node *tnode_get_child(struct tnode *tn, int i)
19baf839 182{
91b9a277 183 BUG_ON(i >= 1 << tn->bits);
19baf839 184
2373ce1c 185 return rcu_dereference(tn->child[i]);
19baf839
RO
186}
187
bb435b8d 188static inline int tnode_child_length(const struct tnode *tn)
19baf839 189{
91b9a277 190 return 1 << tn->bits;
19baf839
RO
191}
192
19baf839
RO
193static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
194{
91b9a277 195 if (offset < KEYLENGTH)
19baf839 196 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
91b9a277 197 else
19baf839
RO
198 return 0;
199}
200
201static inline int tkey_equals(t_key a, t_key b)
202{
c877efb2 203 return a == b;
19baf839
RO
204}
205
206static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
207{
c877efb2
SH
208 if (bits == 0 || offset >= KEYLENGTH)
209 return 1;
91b9a277
OJ
210 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
211 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
c877efb2 212}
19baf839
RO
213
214static inline int tkey_mismatch(t_key a, int offset, t_key b)
215{
216 t_key diff = a ^ b;
217 int i = offset;
218
c877efb2
SH
219 if (!diff)
220 return 0;
221 while ((diff << i) >> (KEYLENGTH-1) == 0)
19baf839
RO
222 i++;
223 return i;
224}
225
19baf839
RO
226/*
227 To understand this stuff, an understanding of keys and all their bits is
228 necessary. Every node in the trie has a key associated with it, but not
229 all of the bits in that key are significant.
230
231 Consider a node 'n' and its parent 'tp'.
232
233 If n is a leaf, every bit in its key is significant. Its presence is
772cb712 234 necessitated by path compression, since during a tree traversal (when
19baf839
RO
235 searching for a leaf - unless we are doing an insertion) we will completely
236 ignore all skipped bits we encounter. Thus we need to verify, at the end of
237 a potentially successful search, that we have indeed been walking the
238 correct key path.
239
240 Note that we can never "miss" the correct key in the tree if present by
241 following the wrong path. Path compression ensures that segments of the key
242 that are the same for all keys with a given prefix are skipped, but the
243 skipped part *is* identical for each node in the subtrie below the skipped
244 bit! trie_insert() in this implementation takes care of that - note the
245 call to tkey_sub_equals() in trie_insert().
246
247 if n is an internal node - a 'tnode' here, the various parts of its key
248 have many different meanings.
249
250 Example:
251 _________________________________________________________________
252 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
253 -----------------------------------------------------------------
254 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
255
256 _________________________________________________________________
257 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
258 -----------------------------------------------------------------
259 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
260
261 tp->pos = 7
262 tp->bits = 3
263 n->pos = 15
91b9a277 264 n->bits = 4
19baf839
RO
265
266 First, let's just ignore the bits that come before the parent tp, that is
267 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
268 not use them for anything.
269
270 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
271 index into the parent's child array. That is, they will be used to find
272 'n' among tp's children.
273
274 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
275 for the node n.
276
277 All the bits we have seen so far are significant to the node n. The rest
278 of the bits are really not needed or indeed known in n->key.
279
280 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
281 n's child array, and will of course be different for each child.
282
c877efb2 283
19baf839
RO
284 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
285 at this point.
286
287*/
288
0c7770c7 289static inline void check_tnode(const struct tnode *tn)
19baf839 290{
0c7770c7 291 WARN_ON(tn && tn->pos+tn->bits > 32);
19baf839
RO
292}
293
294static int halve_threshold = 25;
295static int inflate_threshold = 50;
e6308be8
RO
296static int halve_threshold_root = 15;
297static int inflate_threshold_root = 25;
19baf839 298
2373ce1c
RO
299
300static void __alias_free_mem(struct rcu_head *head)
19baf839 301{
2373ce1c
RO
302 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
303 kmem_cache_free(fn_alias_kmem, fa);
19baf839
RO
304}
305
2373ce1c 306static inline void alias_free_mem_rcu(struct fib_alias *fa)
19baf839 307{
2373ce1c
RO
308 call_rcu(&fa->rcu, __alias_free_mem);
309}
91b9a277 310
2373ce1c
RO
311static void __leaf_free_rcu(struct rcu_head *head)
312{
313 kfree(container_of(head, struct leaf, rcu));
314}
91b9a277 315
2373ce1c
RO
316static inline void free_leaf(struct leaf *leaf)
317{
318 call_rcu(&leaf->rcu, __leaf_free_rcu);
19baf839
RO
319}
320
2373ce1c 321static void __leaf_info_free_rcu(struct rcu_head *head)
19baf839 322{
2373ce1c 323 kfree(container_of(head, struct leaf_info, rcu));
19baf839
RO
324}
325
2373ce1c 326static inline void free_leaf_info(struct leaf_info *leaf)
19baf839 327{
2373ce1c 328 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
19baf839
RO
329}
330
f0e36f8c
PM
331static struct tnode *tnode_alloc(unsigned int size)
332{
2373ce1c
RO
333 struct page *pages;
334
335 if (size <= PAGE_SIZE)
336 return kcalloc(size, 1, GFP_KERNEL);
337
338 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
339 if (!pages)
340 return NULL;
341
342 return page_address(pages);
f0e36f8c
PM
343}
344
2373ce1c 345static void __tnode_free_rcu(struct rcu_head *head)
f0e36f8c 346{
2373ce1c 347 struct tnode *tn = container_of(head, struct tnode, rcu);
f0e36f8c 348 unsigned int size = sizeof(struct tnode) +
2373ce1c 349 (1 << tn->bits) * sizeof(struct node *);
f0e36f8c
PM
350
351 if (size <= PAGE_SIZE)
352 kfree(tn);
353 else
354 free_pages((unsigned long)tn, get_order(size));
355}
356
2373ce1c
RO
357static inline void tnode_free(struct tnode *tn)
358{
359 call_rcu(&tn->rcu, __tnode_free_rcu);
360}
361
362static struct leaf *leaf_new(void)
363{
364 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
365 if (l) {
366 l->parent = T_LEAF;
367 INIT_HLIST_HEAD(&l->list);
368 }
369 return l;
370}
371
372static struct leaf_info *leaf_info_new(int plen)
373{
374 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
375 if (li) {
376 li->plen = plen;
377 INIT_LIST_HEAD(&li->falh);
378 }
379 return li;
380}
381
19baf839
RO
382static struct tnode* tnode_new(t_key key, int pos, int bits)
383{
384 int nchildren = 1<<bits;
385 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
f0e36f8c 386 struct tnode *tn = tnode_alloc(sz);
19baf839 387
91b9a277 388 if (tn) {
19baf839 389 memset(tn, 0, sz);
2373ce1c 390 tn->parent = T_TNODE;
19baf839
RO
391 tn->pos = pos;
392 tn->bits = bits;
393 tn->key = key;
394 tn->full_children = 0;
395 tn->empty_children = 1<<bits;
396 }
c877efb2 397
0c7770c7
SH
398 pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
399 (unsigned int) (sizeof(struct node) * 1<<bits));
19baf839
RO
400 return tn;
401}
402
19baf839
RO
403/*
404 * Check whether a tnode 'n' is "full", i.e. it is an internal node
405 * and no bits are skipped. See discussion in dyntree paper p. 6
406 */
407
bb435b8d 408static inline int tnode_full(const struct tnode *tn, const struct node *n)
19baf839 409{
c877efb2 410 if (n == NULL || IS_LEAF(n))
19baf839
RO
411 return 0;
412
413 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
414}
415
c877efb2 416static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
19baf839
RO
417{
418 tnode_put_child_reorg(tn, i, n, -1);
419}
420
c877efb2 421 /*
19baf839
RO
422 * Add a child at position i overwriting the old value.
423 * Update the value of full_children and empty_children.
424 */
425
c877efb2 426static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
19baf839 427{
2373ce1c 428 struct node *chi = tn->child[i];
19baf839
RO
429 int isfull;
430
0c7770c7
SH
431 BUG_ON(i >= 1<<tn->bits);
432
19baf839
RO
433
434 /* update emptyChildren */
435 if (n == NULL && chi != NULL)
436 tn->empty_children++;
437 else if (n != NULL && chi == NULL)
438 tn->empty_children--;
c877efb2 439
19baf839 440 /* update fullChildren */
91b9a277 441 if (wasfull == -1)
19baf839
RO
442 wasfull = tnode_full(tn, chi);
443
444 isfull = tnode_full(tn, n);
c877efb2 445 if (wasfull && !isfull)
19baf839 446 tn->full_children--;
c877efb2 447 else if (!wasfull && isfull)
19baf839 448 tn->full_children++;
91b9a277 449
c877efb2
SH
450 if (n)
451 NODE_SET_PARENT(n, tn);
19baf839 452
2373ce1c 453 rcu_assign_pointer(tn->child[i], n);
19baf839
RO
454}
455
c877efb2 456static struct node *resize(struct trie *t, struct tnode *tn)
19baf839
RO
457{
458 int i;
2f36895a 459 int err = 0;
2f80b3c8 460 struct tnode *old_tn;
e6308be8
RO
461 int inflate_threshold_use;
462 int halve_threshold_use;
19baf839
RO
463
464 if (!tn)
465 return NULL;
466
0c7770c7
SH
467 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
468 tn, inflate_threshold, halve_threshold);
19baf839
RO
469
470 /* No children */
471 if (tn->empty_children == tnode_child_length(tn)) {
472 tnode_free(tn);
473 return NULL;
474 }
475 /* One child */
476 if (tn->empty_children == tnode_child_length(tn) - 1)
477 for (i = 0; i < tnode_child_length(tn); i++) {
91b9a277 478 struct node *n;
19baf839 479
91b9a277 480 n = tn->child[i];
2373ce1c 481 if (!n)
91b9a277 482 continue;
91b9a277
OJ
483
484 /* compress one level */
2373ce1c 485 NODE_SET_PARENT(n, NULL);
91b9a277
OJ
486 tnode_free(tn);
487 return n;
19baf839 488 }
c877efb2 489 /*
19baf839
RO
490 * Double as long as the resulting node has a number of
491 * nonempty nodes that are above the threshold.
492 */
493
494 /*
c877efb2
SH
495 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
496 * the Helsinki University of Technology and Matti Tikkanen of Nokia
19baf839 497 * Telecommunications, page 6:
c877efb2 498 * "A node is doubled if the ratio of non-empty children to all
19baf839
RO
499 * children in the *doubled* node is at least 'high'."
500 *
c877efb2
SH
501 * 'high' in this instance is the variable 'inflate_threshold'. It
502 * is expressed as a percentage, so we multiply it with
503 * tnode_child_length() and instead of multiplying by 2 (since the
504 * child array will be doubled by inflate()) and multiplying
505 * the left-hand side by 100 (to handle the percentage thing) we
19baf839 506 * multiply the left-hand side by 50.
c877efb2
SH
507 *
508 * The left-hand side may look a bit weird: tnode_child_length(tn)
509 * - tn->empty_children is of course the number of non-null children
510 * in the current node. tn->full_children is the number of "full"
19baf839 511 * children, that is non-null tnodes with a skip value of 0.
c877efb2 512 * All of those will be doubled in the resulting inflated tnode, so
19baf839 513 * we just count them one extra time here.
c877efb2 514 *
19baf839 515 * A clearer way to write this would be:
c877efb2 516 *
19baf839 517 * to_be_doubled = tn->full_children;
c877efb2 518 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
19baf839
RO
519 * tn->full_children;
520 *
521 * new_child_length = tnode_child_length(tn) * 2;
522 *
c877efb2 523 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
19baf839
RO
524 * new_child_length;
525 * if (new_fill_factor >= inflate_threshold)
c877efb2
SH
526 *
527 * ...and so on, tho it would mess up the while () loop.
528 *
19baf839
RO
529 * anyway,
530 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
531 * inflate_threshold
c877efb2 532 *
19baf839
RO
533 * avoid a division:
534 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
535 * inflate_threshold * new_child_length
c877efb2 536 *
19baf839 537 * expand not_to_be_doubled and to_be_doubled, and shorten:
c877efb2 538 * 100 * (tnode_child_length(tn) - tn->empty_children +
91b9a277 539 * tn->full_children) >= inflate_threshold * new_child_length
c877efb2 540 *
19baf839 541 * expand new_child_length:
c877efb2 542 * 100 * (tnode_child_length(tn) - tn->empty_children +
91b9a277 543 * tn->full_children) >=
19baf839 544 * inflate_threshold * tnode_child_length(tn) * 2
c877efb2 545 *
19baf839 546 * shorten again:
c877efb2 547 * 50 * (tn->full_children + tnode_child_length(tn) -
91b9a277 548 * tn->empty_children) >= inflate_threshold *
19baf839 549 * tnode_child_length(tn)
c877efb2 550 *
19baf839
RO
551 */
552
553 check_tnode(tn);
c877efb2 554
e6308be8
RO
555 /* Keep root node larger */
556
557 if(!tn->parent)
558 inflate_threshold_use = inflate_threshold_root;
559 else
560 inflate_threshold_use = inflate_threshold;
561
2f36895a 562 err = 0;
19baf839
RO
563 while ((tn->full_children > 0 &&
564 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
e6308be8 565 inflate_threshold_use * tnode_child_length(tn))) {
19baf839 566
2f80b3c8
RO
567 old_tn = tn;
568 tn = inflate(t, tn);
569 if (IS_ERR(tn)) {
570 tn = old_tn;
2f36895a
RO
571#ifdef CONFIG_IP_FIB_TRIE_STATS
572 t->stats.resize_node_skipped++;
573#endif
574 break;
575 }
19baf839
RO
576 }
577
578 check_tnode(tn);
579
580 /*
581 * Halve as long as the number of empty children in this
582 * node is above threshold.
583 */
2f36895a 584
e6308be8
RO
585
586 /* Keep root node larger */
587
588 if(!tn->parent)
589 halve_threshold_use = halve_threshold_root;
590 else
591 halve_threshold_use = halve_threshold;
592
2f36895a 593 err = 0;
19baf839
RO
594 while (tn->bits > 1 &&
595 100 * (tnode_child_length(tn) - tn->empty_children) <
e6308be8 596 halve_threshold_use * tnode_child_length(tn)) {
2f36895a 597
2f80b3c8
RO
598 old_tn = tn;
599 tn = halve(t, tn);
600 if (IS_ERR(tn)) {
601 tn = old_tn;
2f36895a
RO
602#ifdef CONFIG_IP_FIB_TRIE_STATS
603 t->stats.resize_node_skipped++;
604#endif
605 break;
606 }
607 }
19baf839 608
c877efb2 609
19baf839 610 /* Only one child remains */
19baf839
RO
611 if (tn->empty_children == tnode_child_length(tn) - 1)
612 for (i = 0; i < tnode_child_length(tn); i++) {
91b9a277 613 struct node *n;
19baf839 614
91b9a277 615 n = tn->child[i];
2373ce1c 616 if (!n)
91b9a277 617 continue;
91b9a277
OJ
618
619 /* compress one level */
620
2373ce1c 621 NODE_SET_PARENT(n, NULL);
91b9a277
OJ
622 tnode_free(tn);
623 return n;
19baf839
RO
624 }
625
626 return (struct node *) tn;
627}
628
2f80b3c8 629static struct tnode *inflate(struct trie *t, struct tnode *tn)
19baf839
RO
630{
631 struct tnode *inode;
632 struct tnode *oldtnode = tn;
633 int olen = tnode_child_length(tn);
634 int i;
635
0c7770c7 636 pr_debug("In inflate\n");
19baf839
RO
637
638 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
639
0c7770c7 640 if (!tn)
2f80b3c8 641 return ERR_PTR(-ENOMEM);
2f36895a
RO
642
643 /*
c877efb2
SH
644 * Preallocate and store tnodes before the actual work so we
645 * don't get into an inconsistent state if memory allocation
646 * fails. In case of failure we return the oldnode and inflate
2f36895a
RO
647 * of tnode is ignored.
648 */
91b9a277
OJ
649
650 for (i = 0; i < olen; i++) {
2f36895a
RO
651 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
652
653 if (inode &&
654 IS_TNODE(inode) &&
655 inode->pos == oldtnode->pos + oldtnode->bits &&
656 inode->bits > 1) {
657 struct tnode *left, *right;
2f36895a 658 t_key m = TKEY_GET_MASK(inode->pos, 1);
c877efb2 659
2f36895a
RO
660 left = tnode_new(inode->key&(~m), inode->pos + 1,
661 inode->bits - 1);
2f80b3c8
RO
662 if (!left)
663 goto nomem;
91b9a277 664
2f36895a
RO
665 right = tnode_new(inode->key|m, inode->pos + 1,
666 inode->bits - 1);
667
2f80b3c8
RO
668 if (!right) {
669 tnode_free(left);
670 goto nomem;
671 }
2f36895a
RO
672
673 put_child(t, tn, 2*i, (struct node *) left);
674 put_child(t, tn, 2*i+1, (struct node *) right);
675 }
676 }
677
91b9a277 678 for (i = 0; i < olen; i++) {
19baf839 679 struct node *node = tnode_get_child(oldtnode, i);
91b9a277
OJ
680 struct tnode *left, *right;
681 int size, j;
c877efb2 682
19baf839
RO
683 /* An empty child */
684 if (node == NULL)
685 continue;
686
687 /* A leaf or an internal node with skipped bits */
688
c877efb2 689 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
19baf839 690 tn->pos + tn->bits - 1) {
c877efb2 691 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
19baf839
RO
692 1) == 0)
693 put_child(t, tn, 2*i, node);
694 else
695 put_child(t, tn, 2*i+1, node);
696 continue;
697 }
698
699 /* An internal node with two children */
700 inode = (struct tnode *) node;
701
702 if (inode->bits == 1) {
703 put_child(t, tn, 2*i, inode->child[0]);
704 put_child(t, tn, 2*i+1, inode->child[1]);
705
706 tnode_free(inode);
91b9a277 707 continue;
19baf839
RO
708 }
709
91b9a277
OJ
710 /* An internal node with more than two children */
711
712 /* We will replace this node 'inode' with two new
713 * ones, 'left' and 'right', each with half of the
714 * original children. The two new nodes will have
715 * a position one bit further down the key and this
716 * means that the "significant" part of their keys
717 * (see the discussion near the top of this file)
718 * will differ by one bit, which will be "0" in
719 * left's key and "1" in right's key. Since we are
720 * moving the key position by one step, the bit that
721 * we are moving away from - the bit at position
722 * (inode->pos) - is the one that will differ between
723 * left and right. So... we synthesize that bit in the
724 * two new keys.
725 * The mask 'm' below will be a single "one" bit at
726 * the position (inode->pos)
727 */
19baf839 728
91b9a277
OJ
729 /* Use the old key, but set the new significant
730 * bit to zero.
731 */
2f36895a 732
91b9a277
OJ
733 left = (struct tnode *) tnode_get_child(tn, 2*i);
734 put_child(t, tn, 2*i, NULL);
2f36895a 735
91b9a277 736 BUG_ON(!left);
2f36895a 737
91b9a277
OJ
738 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
739 put_child(t, tn, 2*i+1, NULL);
19baf839 740
91b9a277 741 BUG_ON(!right);
19baf839 742
91b9a277
OJ
743 size = tnode_child_length(left);
744 for (j = 0; j < size; j++) {
745 put_child(t, left, j, inode->child[j]);
746 put_child(t, right, j, inode->child[j + size]);
19baf839 747 }
91b9a277
OJ
748 put_child(t, tn, 2*i, resize(t, left));
749 put_child(t, tn, 2*i+1, resize(t, right));
750
751 tnode_free(inode);
19baf839
RO
752 }
753 tnode_free(oldtnode);
754 return tn;
2f80b3c8
RO
755nomem:
756 {
757 int size = tnode_child_length(tn);
758 int j;
759
0c7770c7 760 for (j = 0; j < size; j++)
2f80b3c8
RO
761 if (tn->child[j])
762 tnode_free((struct tnode *)tn->child[j]);
763
764 tnode_free(tn);
0c7770c7 765
2f80b3c8
RO
766 return ERR_PTR(-ENOMEM);
767 }
19baf839
RO
768}
769
2f80b3c8 770static struct tnode *halve(struct trie *t, struct tnode *tn)
19baf839
RO
771{
772 struct tnode *oldtnode = tn;
773 struct node *left, *right;
774 int i;
775 int olen = tnode_child_length(tn);
776
0c7770c7 777 pr_debug("In halve\n");
c877efb2
SH
778
779 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
19baf839 780
2f80b3c8
RO
781 if (!tn)
782 return ERR_PTR(-ENOMEM);
2f36895a
RO
783
784 /*
c877efb2
SH
785 * Preallocate and store tnodes before the actual work so we
786 * don't get into an inconsistent state if memory allocation
787 * fails. In case of failure we return the oldnode and halve
2f36895a
RO
788 * of tnode is ignored.
789 */
790
91b9a277 791 for (i = 0; i < olen; i += 2) {
2f36895a
RO
792 left = tnode_get_child(oldtnode, i);
793 right = tnode_get_child(oldtnode, i+1);
c877efb2 794
2f36895a 795 /* Two nonempty children */
0c7770c7 796 if (left && right) {
2f80b3c8 797 struct tnode *newn;
0c7770c7 798
2f80b3c8 799 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
0c7770c7
SH
800
801 if (!newn)
2f80b3c8 802 goto nomem;
0c7770c7 803
2f80b3c8 804 put_child(t, tn, i/2, (struct node *)newn);
2f36895a 805 }
2f36895a 806
2f36895a 807 }
19baf839 808
91b9a277
OJ
809 for (i = 0; i < olen; i += 2) {
810 struct tnode *newBinNode;
811
19baf839
RO
812 left = tnode_get_child(oldtnode, i);
813 right = tnode_get_child(oldtnode, i+1);
c877efb2 814
19baf839
RO
815 /* At least one of the children is empty */
816 if (left == NULL) {
817 if (right == NULL) /* Both are empty */
818 continue;
819 put_child(t, tn, i/2, right);
91b9a277 820 continue;
0c7770c7 821 }
91b9a277
OJ
822
823 if (right == NULL) {
19baf839 824 put_child(t, tn, i/2, left);
91b9a277
OJ
825 continue;
826 }
c877efb2 827
19baf839 828 /* Two nonempty children */
91b9a277
OJ
829 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
830 put_child(t, tn, i/2, NULL);
91b9a277
OJ
831 put_child(t, newBinNode, 0, left);
832 put_child(t, newBinNode, 1, right);
833 put_child(t, tn, i/2, resize(t, newBinNode));
19baf839
RO
834 }
835 tnode_free(oldtnode);
836 return tn;
2f80b3c8
RO
837nomem:
838 {
839 int size = tnode_child_length(tn);
840 int j;
841
0c7770c7 842 for (j = 0; j < size; j++)
2f80b3c8
RO
843 if (tn->child[j])
844 tnode_free((struct tnode *)tn->child[j]);
845
846 tnode_free(tn);
0c7770c7 847
2f80b3c8
RO
848 return ERR_PTR(-ENOMEM);
849 }
19baf839
RO
850}
851
91b9a277 852static void trie_init(struct trie *t)
19baf839 853{
91b9a277
OJ
854 if (!t)
855 return;
856
857 t->size = 0;
2373ce1c 858 rcu_assign_pointer(t->trie, NULL);
91b9a277 859 t->revision = 0;
19baf839 860#ifdef CONFIG_IP_FIB_TRIE_STATS
91b9a277 861 memset(&t->stats, 0, sizeof(struct trie_use_stats));
19baf839 862#endif
19baf839
RO
863}
864
772cb712 865/* readside must use rcu_read_lock currently dump routines
2373ce1c
RO
866 via get_fa_head and dump */
867
772cb712 868static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
19baf839 869{
772cb712 870 struct hlist_head *head = &l->list;
19baf839
RO
871 struct hlist_node *node;
872 struct leaf_info *li;
873
2373ce1c 874 hlist_for_each_entry_rcu(li, node, head, hlist)
c877efb2 875 if (li->plen == plen)
19baf839 876 return li;
91b9a277 877
19baf839
RO
878 return NULL;
879}
880
881static inline struct list_head * get_fa_head(struct leaf *l, int plen)
882{
772cb712 883 struct leaf_info *li = find_leaf_info(l, plen);
c877efb2 884
91b9a277
OJ
885 if (!li)
886 return NULL;
c877efb2 887
91b9a277 888 return &li->falh;
19baf839
RO
889}
890
891static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
892{
2373ce1c
RO
893 struct leaf_info *li = NULL, *last = NULL;
894 struct hlist_node *node;
895
896 if (hlist_empty(head)) {
897 hlist_add_head_rcu(&new->hlist, head);
898 } else {
899 hlist_for_each_entry(li, node, head, hlist) {
900 if (new->plen > li->plen)
901 break;
902
903 last = li;
904 }
905 if (last)
906 hlist_add_after_rcu(&last->hlist, &new->hlist);
907 else
908 hlist_add_before_rcu(&new->hlist, &li->hlist);
909 }
19baf839
RO
910}
911
2373ce1c
RO
912/* rcu_read_lock needs to be hold by caller from readside */
913
19baf839
RO
914static struct leaf *
915fib_find_node(struct trie *t, u32 key)
916{
917 int pos;
918 struct tnode *tn;
919 struct node *n;
920
921 pos = 0;
2373ce1c 922 n = rcu_dereference(t->trie);
19baf839
RO
923
924 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
925 tn = (struct tnode *) n;
91b9a277 926
19baf839 927 check_tnode(tn);
91b9a277 928
c877efb2 929 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
91b9a277 930 pos = tn->pos + tn->bits;
19baf839 931 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
91b9a277 932 } else
19baf839
RO
933 break;
934 }
935 /* Case we have found a leaf. Compare prefixes */
936
91b9a277
OJ
937 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
938 return (struct leaf *)n;
939
19baf839
RO
940 return NULL;
941}
942
943static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
944{
19baf839
RO
945 int wasfull;
946 t_key cindex, key;
947 struct tnode *tp = NULL;
948
19baf839 949 key = tn->key;
19baf839
RO
950
951 while (tn != NULL && NODE_PARENT(tn) != NULL) {
19baf839
RO
952
953 tp = NODE_PARENT(tn);
954 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
955 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
956 tn = (struct tnode *) resize (t, (struct tnode *)tn);
957 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
91b9a277 958
c877efb2 959 if (!NODE_PARENT(tn))
19baf839
RO
960 break;
961
962 tn = NODE_PARENT(tn);
963 }
964 /* Handle last (top) tnode */
c877efb2 965 if (IS_TNODE(tn))
19baf839
RO
966 tn = (struct tnode*) resize(t, (struct tnode *)tn);
967
968 return (struct node*) tn;
969}
970
2373ce1c
RO
971/* only used from updater-side */
972
f835e471
RO
973static struct list_head *
974fib_insert_node(struct trie *t, int *err, u32 key, int plen)
19baf839
RO
975{
976 int pos, newpos;
977 struct tnode *tp = NULL, *tn = NULL;
978 struct node *n;
979 struct leaf *l;
980 int missbit;
c877efb2 981 struct list_head *fa_head = NULL;
19baf839
RO
982 struct leaf_info *li;
983 t_key cindex;
984
985 pos = 0;
c877efb2 986 n = t->trie;
19baf839 987
c877efb2
SH
988 /* If we point to NULL, stop. Either the tree is empty and we should
989 * just put a new leaf in if, or we have reached an empty child slot,
19baf839 990 * and we should just put our new leaf in that.
c877efb2
SH
991 * If we point to a T_TNODE, check if it matches our key. Note that
992 * a T_TNODE might be skipping any number of bits - its 'pos' need
19baf839
RO
993 * not be the parent's 'pos'+'bits'!
994 *
c877efb2 995 * If it does match the current key, get pos/bits from it, extract
19baf839
RO
996 * the index from our key, push the T_TNODE and walk the tree.
997 *
998 * If it doesn't, we have to replace it with a new T_TNODE.
999 *
c877efb2
SH
1000 * If we point to a T_LEAF, it might or might not have the same key
1001 * as we do. If it does, just change the value, update the T_LEAF's
1002 * value, and return it.
19baf839
RO
1003 * If it doesn't, we need to replace it with a T_TNODE.
1004 */
1005
1006 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1007 tn = (struct tnode *) n;
91b9a277 1008
c877efb2 1009 check_tnode(tn);
91b9a277 1010
c877efb2 1011 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
19baf839 1012 tp = tn;
91b9a277 1013 pos = tn->pos + tn->bits;
19baf839
RO
1014 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1015
0c7770c7 1016 BUG_ON(n && NODE_PARENT(n) != tn);
91b9a277 1017 } else
19baf839
RO
1018 break;
1019 }
1020
1021 /*
1022 * n ----> NULL, LEAF or TNODE
1023 *
c877efb2 1024 * tp is n's (parent) ----> NULL or TNODE
19baf839
RO
1025 */
1026
91b9a277 1027 BUG_ON(tp && IS_LEAF(tp));
19baf839
RO
1028
1029 /* Case 1: n is a leaf. Compare prefixes */
1030
c877efb2 1031 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
91b9a277
OJ
1032 struct leaf *l = (struct leaf *) n;
1033
19baf839 1034 li = leaf_info_new(plen);
91b9a277 1035
c877efb2 1036 if (!li) {
f835e471
RO
1037 *err = -ENOMEM;
1038 goto err;
1039 }
19baf839
RO
1040
1041 fa_head = &li->falh;
1042 insert_leaf_info(&l->list, li);
1043 goto done;
1044 }
1045 t->size++;
1046 l = leaf_new();
1047
c877efb2 1048 if (!l) {
f835e471
RO
1049 *err = -ENOMEM;
1050 goto err;
1051 }
19baf839
RO
1052
1053 l->key = key;
1054 li = leaf_info_new(plen);
1055
c877efb2 1056 if (!li) {
f835e471
RO
1057 tnode_free((struct tnode *) l);
1058 *err = -ENOMEM;
1059 goto err;
1060 }
19baf839
RO
1061
1062 fa_head = &li->falh;
1063 insert_leaf_info(&l->list, li);
1064
19baf839 1065 if (t->trie && n == NULL) {
91b9a277 1066 /* Case 2: n is NULL, and will just insert a new leaf */
19baf839
RO
1067
1068 NODE_SET_PARENT(l, tp);
19baf839 1069
91b9a277
OJ
1070 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1071 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1072 } else {
1073 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
c877efb2
SH
1074 /*
1075 * Add a new tnode here
19baf839
RO
1076 * first tnode need some special handling
1077 */
1078
1079 if (tp)
91b9a277 1080 pos = tp->pos+tp->bits;
19baf839 1081 else
91b9a277
OJ
1082 pos = 0;
1083
c877efb2 1084 if (n) {
19baf839
RO
1085 newpos = tkey_mismatch(key, pos, n->key);
1086 tn = tnode_new(n->key, newpos, 1);
91b9a277 1087 } else {
19baf839 1088 newpos = 0;
c877efb2 1089 tn = tnode_new(key, newpos, 1); /* First tnode */
19baf839 1090 }
19baf839 1091
c877efb2 1092 if (!tn) {
f835e471
RO
1093 free_leaf_info(li);
1094 tnode_free((struct tnode *) l);
1095 *err = -ENOMEM;
1096 goto err;
91b9a277
OJ
1097 }
1098
19baf839
RO
1099 NODE_SET_PARENT(tn, tp);
1100
91b9a277 1101 missbit = tkey_extract_bits(key, newpos, 1);
19baf839
RO
1102 put_child(t, tn, missbit, (struct node *)l);
1103 put_child(t, tn, 1-missbit, n);
1104
c877efb2 1105 if (tp) {
19baf839
RO
1106 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1107 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
91b9a277 1108 } else {
2373ce1c 1109 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
19baf839
RO
1110 tp = tn;
1111 }
1112 }
91b9a277
OJ
1113
1114 if (tp && tp->pos + tp->bits > 32)
78c6671a 1115 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
19baf839 1116 tp, tp->pos, tp->bits, key, plen);
91b9a277 1117
19baf839 1118 /* Rebalance the trie */
2373ce1c
RO
1119
1120 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
f835e471
RO
1121done:
1122 t->revision++;
91b9a277 1123err:
19baf839
RO
1124 return fa_head;
1125}
1126
1127static int
1128fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1129 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1130{
1131 struct trie *t = (struct trie *) tb->tb_data;
1132 struct fib_alias *fa, *new_fa;
c877efb2 1133 struct list_head *fa_head = NULL;
19baf839
RO
1134 struct fib_info *fi;
1135 int plen = r->rtm_dst_len;
1136 int type = r->rtm_type;
1137 u8 tos = r->rtm_tos;
1138 u32 key, mask;
1139 int err;
1140 struct leaf *l;
1141
1142 if (plen > 32)
1143 return -EINVAL;
1144
1145 key = 0;
c877efb2 1146 if (rta->rta_dst)
19baf839
RO
1147 memcpy(&key, rta->rta_dst, 4);
1148
1149 key = ntohl(key);
1150
0c7770c7 1151 pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
19baf839 1152
91b9a277 1153 mask = ntohl(inet_make_mask(plen));
19baf839 1154
c877efb2 1155 if (key & ~mask)
19baf839
RO
1156 return -EINVAL;
1157
1158 key = key & mask;
1159
91b9a277
OJ
1160 fi = fib_create_info(r, rta, nlhdr, &err);
1161
1162 if (!fi)
19baf839
RO
1163 goto err;
1164
1165 l = fib_find_node(t, key);
c877efb2 1166 fa = NULL;
19baf839 1167
c877efb2 1168 if (l) {
19baf839
RO
1169 fa_head = get_fa_head(l, plen);
1170 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1171 }
1172
1173 /* Now fa, if non-NULL, points to the first fib alias
1174 * with the same keys [prefix,tos,priority], if such key already
1175 * exists or to the node before which we will insert new one.
1176 *
1177 * If fa is NULL, we will need to allocate a new one and
1178 * insert to the head of f.
1179 *
1180 * If f is NULL, no fib node matched the destination key
1181 * and we need to allocate a new one of those as well.
1182 */
1183
91b9a277 1184 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
19baf839
RO
1185 struct fib_alias *fa_orig;
1186
1187 err = -EEXIST;
1188 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1189 goto out;
1190
1191 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1192 struct fib_info *fi_drop;
1193 u8 state;
1194
2373ce1c
RO
1195 err = -ENOBUFS;
1196 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1197 if (new_fa == NULL)
1198 goto out;
19baf839
RO
1199
1200 fi_drop = fa->fa_info;
2373ce1c
RO
1201 new_fa->fa_tos = fa->fa_tos;
1202 new_fa->fa_info = fi;
1203 new_fa->fa_type = type;
1204 new_fa->fa_scope = r->rtm_scope;
19baf839 1205 state = fa->fa_state;
2373ce1c 1206 new_fa->fa_state &= ~FA_S_ACCESSED;
19baf839 1207
2373ce1c
RO
1208 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1209 alias_free_mem_rcu(fa);
19baf839
RO
1210
1211 fib_release_info(fi_drop);
1212 if (state & FA_S_ACCESSED)
91b9a277 1213 rt_cache_flush(-1);
19baf839 1214
91b9a277 1215 goto succeeded;
19baf839
RO
1216 }
1217 /* Error if we find a perfect match which
1218 * uses the same scope, type, and nexthop
1219 * information.
1220 */
1221 fa_orig = fa;
1222 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1223 if (fa->fa_tos != tos)
1224 break;
1225 if (fa->fa_info->fib_priority != fi->fib_priority)
1226 break;
1227 if (fa->fa_type == type &&
1228 fa->fa_scope == r->rtm_scope &&
1229 fa->fa_info == fi) {
1230 goto out;
1231 }
1232 }
1233 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1234 fa = fa_orig;
1235 }
1236 err = -ENOENT;
91b9a277 1237 if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
19baf839
RO
1238 goto out;
1239
1240 err = -ENOBUFS;
1241 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1242 if (new_fa == NULL)
1243 goto out;
1244
1245 new_fa->fa_info = fi;
1246 new_fa->fa_tos = tos;
1247 new_fa->fa_type = type;
1248 new_fa->fa_scope = r->rtm_scope;
1249 new_fa->fa_state = 0;
19baf839
RO
1250 /*
1251 * Insert new entry to the list.
1252 */
1253
c877efb2 1254 if (!fa_head) {
f835e471
RO
1255 fa_head = fib_insert_node(t, &err, key, plen);
1256 err = 0;
c877efb2 1257 if (err)
f835e471
RO
1258 goto out_free_new_fa;
1259 }
19baf839 1260
2373ce1c
RO
1261 list_add_tail_rcu(&new_fa->fa_list,
1262 (fa ? &fa->fa_list : fa_head));
19baf839
RO
1263
1264 rt_cache_flush(-1);
1265 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1266succeeded:
1267 return 0;
f835e471
RO
1268
1269out_free_new_fa:
1270 kmem_cache_free(fn_alias_kmem, new_fa);
19baf839
RO
1271out:
1272 fib_release_info(fi);
91b9a277 1273err:
19baf839
RO
1274 return err;
1275}
1276
2373ce1c 1277
772cb712 1278/* should be called with rcu_read_lock */
0c7770c7
SH
1279static inline int check_leaf(struct trie *t, struct leaf *l,
1280 t_key key, int *plen, const struct flowi *flp,
06c74270 1281 struct fib_result *res)
19baf839 1282{
06c74270 1283 int err, i;
19baf839
RO
1284 t_key mask;
1285 struct leaf_info *li;
1286 struct hlist_head *hhead = &l->list;
1287 struct hlist_node *node;
c877efb2 1288
2373ce1c 1289 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
19baf839
RO
1290 i = li->plen;
1291 mask = ntohl(inet_make_mask(i));
c877efb2 1292 if (l->key != (key & mask))
19baf839
RO
1293 continue;
1294
06c74270 1295 if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
19baf839
RO
1296 *plen = i;
1297#ifdef CONFIG_IP_FIB_TRIE_STATS
1298 t->stats.semantic_match_passed++;
1299#endif
06c74270 1300 return err;
19baf839
RO
1301 }
1302#ifdef CONFIG_IP_FIB_TRIE_STATS
1303 t->stats.semantic_match_miss++;
1304#endif
1305 }
06c74270 1306 return 1;
19baf839
RO
1307}
1308
1309static int
1310fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1311{
1312 struct trie *t = (struct trie *) tb->tb_data;
1313 int plen, ret = 0;
1314 struct node *n;
1315 struct tnode *pn;
1316 int pos, bits;
91b9a277 1317 t_key key = ntohl(flp->fl4_dst);
19baf839
RO
1318 int chopped_off;
1319 t_key cindex = 0;
1320 int current_prefix_length = KEYLENGTH;
91b9a277
OJ
1321 struct tnode *cn;
1322 t_key node_prefix, key_prefix, pref_mismatch;
1323 int mp;
1324
2373ce1c 1325 rcu_read_lock();
91b9a277 1326
2373ce1c 1327 n = rcu_dereference(t->trie);
c877efb2 1328 if (!n)
19baf839
RO
1329 goto failed;
1330
1331#ifdef CONFIG_IP_FIB_TRIE_STATS
1332 t->stats.gets++;
1333#endif
1334
1335 /* Just a leaf? */
1336 if (IS_LEAF(n)) {
06c74270 1337 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
19baf839
RO
1338 goto found;
1339 goto failed;
1340 }
1341 pn = (struct tnode *) n;
1342 chopped_off = 0;
c877efb2 1343
91b9a277 1344 while (pn) {
19baf839
RO
1345 pos = pn->pos;
1346 bits = pn->bits;
1347
c877efb2 1348 if (!chopped_off)
19baf839
RO
1349 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1350
1351 n = tnode_get_child(pn, cindex);
1352
1353 if (n == NULL) {
1354#ifdef CONFIG_IP_FIB_TRIE_STATS
1355 t->stats.null_node_hit++;
1356#endif
1357 goto backtrace;
1358 }
1359
91b9a277
OJ
1360 if (IS_LEAF(n)) {
1361 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
1362 goto found;
1363 else
1364 goto backtrace;
1365 }
1366
19baf839
RO
1367#define HL_OPTIMIZE
1368#ifdef HL_OPTIMIZE
91b9a277 1369 cn = (struct tnode *)n;
19baf839 1370
91b9a277
OJ
1371 /*
1372 * It's a tnode, and we can do some extra checks here if we
1373 * like, to avoid descending into a dead-end branch.
1374 * This tnode is in the parent's child array at index
1375 * key[p_pos..p_pos+p_bits] but potentially with some bits
1376 * chopped off, so in reality the index may be just a
1377 * subprefix, padded with zero at the end.
1378 * We can also take a look at any skipped bits in this
1379 * tnode - everything up to p_pos is supposed to be ok,
1380 * and the non-chopped bits of the index (se previous
1381 * paragraph) are also guaranteed ok, but the rest is
1382 * considered unknown.
1383 *
1384 * The skipped bits are key[pos+bits..cn->pos].
1385 */
19baf839 1386
91b9a277
OJ
1387 /* If current_prefix_length < pos+bits, we are already doing
1388 * actual prefix matching, which means everything from
1389 * pos+(bits-chopped_off) onward must be zero along some
1390 * branch of this subtree - otherwise there is *no* valid
1391 * prefix present. Here we can only check the skipped
1392 * bits. Remember, since we have already indexed into the
1393 * parent's child array, we know that the bits we chopped of
1394 * *are* zero.
1395 */
19baf839 1396
91b9a277 1397 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
19baf839 1398
91b9a277
OJ
1399 if (current_prefix_length < pos+bits) {
1400 if (tkey_extract_bits(cn->key, current_prefix_length,
1401 cn->pos - current_prefix_length) != 0 ||
1402 !(cn->child[0]))
1403 goto backtrace;
1404 }
19baf839 1405
91b9a277
OJ
1406 /*
1407 * If chopped_off=0, the index is fully validated and we
1408 * only need to look at the skipped bits for this, the new,
1409 * tnode. What we actually want to do is to find out if
1410 * these skipped bits match our key perfectly, or if we will
1411 * have to count on finding a matching prefix further down,
1412 * because if we do, we would like to have some way of
1413 * verifying the existence of such a prefix at this point.
1414 */
19baf839 1415
91b9a277
OJ
1416 /* The only thing we can do at this point is to verify that
1417 * any such matching prefix can indeed be a prefix to our
1418 * key, and if the bits in the node we are inspecting that
1419 * do not match our key are not ZERO, this cannot be true.
1420 * Thus, find out where there is a mismatch (before cn->pos)
1421 * and verify that all the mismatching bits are zero in the
1422 * new tnode's key.
1423 */
19baf839 1424
91b9a277
OJ
1425 /* Note: We aren't very concerned about the piece of the key
1426 * that precede pn->pos+pn->bits, since these have already been
1427 * checked. The bits after cn->pos aren't checked since these are
1428 * by definition "unknown" at this point. Thus, what we want to
1429 * see is if we are about to enter the "prefix matching" state,
1430 * and in that case verify that the skipped bits that will prevail
1431 * throughout this subtree are zero, as they have to be if we are
1432 * to find a matching prefix.
1433 */
1434
1435 node_prefix = MASK_PFX(cn->key, cn->pos);
1436 key_prefix = MASK_PFX(key, cn->pos);
1437 pref_mismatch = key_prefix^node_prefix;
1438 mp = 0;
1439
1440 /* In short: If skipped bits in this node do not match the search
1441 * key, enter the "prefix matching" state.directly.
1442 */
1443 if (pref_mismatch) {
1444 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1445 mp++;
1446 pref_mismatch = pref_mismatch <<1;
1447 }
1448 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1449
1450 if (key_prefix != 0)
1451 goto backtrace;
1452
1453 if (current_prefix_length >= cn->pos)
1454 current_prefix_length = mp;
c877efb2 1455 }
91b9a277
OJ
1456#endif
1457 pn = (struct tnode *)n; /* Descend */
1458 chopped_off = 0;
1459 continue;
1460
19baf839
RO
1461backtrace:
1462 chopped_off++;
1463
1464 /* As zero don't change the child key (cindex) */
91b9a277 1465 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
19baf839 1466 chopped_off++;
19baf839
RO
1467
1468 /* Decrease current_... with bits chopped off */
1469 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1470 current_prefix_length = pn->pos + pn->bits - chopped_off;
91b9a277 1471
19baf839 1472 /*
c877efb2 1473 * Either we do the actual chop off according or if we have
19baf839
RO
1474 * chopped off all bits in this tnode walk up to our parent.
1475 */
1476
91b9a277 1477 if (chopped_off <= pn->bits) {
19baf839 1478 cindex &= ~(1 << (chopped_off-1));
91b9a277 1479 } else {
c877efb2 1480 if (NODE_PARENT(pn) == NULL)
19baf839 1481 goto failed;
91b9a277 1482
19baf839
RO
1483 /* Get Child's index */
1484 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1485 pn = NODE_PARENT(pn);
1486 chopped_off = 0;
1487
1488#ifdef CONFIG_IP_FIB_TRIE_STATS
1489 t->stats.backtrack++;
1490#endif
1491 goto backtrace;
c877efb2 1492 }
19baf839
RO
1493 }
1494failed:
c877efb2 1495 ret = 1;
19baf839 1496found:
2373ce1c 1497 rcu_read_unlock();
19baf839
RO
1498 return ret;
1499}
1500
2373ce1c 1501/* only called from updater side */
19baf839
RO
1502static int trie_leaf_remove(struct trie *t, t_key key)
1503{
1504 t_key cindex;
1505 struct tnode *tp = NULL;
1506 struct node *n = t->trie;
1507 struct leaf *l;
1508
0c7770c7 1509 pr_debug("entering trie_leaf_remove(%p)\n", n);
19baf839
RO
1510
1511 /* Note that in the case skipped bits, those bits are *not* checked!
c877efb2 1512 * When we finish this, we will have NULL or a T_LEAF, and the
19baf839
RO
1513 * T_LEAF may or may not match our key.
1514 */
1515
91b9a277 1516 while (n != NULL && IS_TNODE(n)) {
19baf839
RO
1517 struct tnode *tn = (struct tnode *) n;
1518 check_tnode(tn);
1519 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1520
0c7770c7 1521 BUG_ON(n && NODE_PARENT(n) != tn);
91b9a277 1522 }
19baf839
RO
1523 l = (struct leaf *) n;
1524
c877efb2 1525 if (!n || !tkey_equals(l->key, key))
19baf839 1526 return 0;
c877efb2
SH
1527
1528 /*
1529 * Key found.
1530 * Remove the leaf and rebalance the tree
19baf839
RO
1531 */
1532
1533 t->revision++;
1534 t->size--;
1535
2373ce1c 1536 preempt_disable();
19baf839
RO
1537 tp = NODE_PARENT(n);
1538 tnode_free((struct tnode *) n);
1539
c877efb2 1540 if (tp) {
19baf839
RO
1541 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1542 put_child(t, (struct tnode *)tp, cindex, NULL);
2373ce1c 1543 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
91b9a277 1544 } else
2373ce1c
RO
1545 rcu_assign_pointer(t->trie, NULL);
1546 preempt_enable();
19baf839
RO
1547
1548 return 1;
1549}
1550
1551static int
1552fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
91b9a277 1553 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
19baf839
RO
1554{
1555 struct trie *t = (struct trie *) tb->tb_data;
1556 u32 key, mask;
1557 int plen = r->rtm_dst_len;
1558 u8 tos = r->rtm_tos;
1559 struct fib_alias *fa, *fa_to_delete;
1560 struct list_head *fa_head;
1561 struct leaf *l;
91b9a277
OJ
1562 struct leaf_info *li;
1563
19baf839 1564
c877efb2 1565 if (plen > 32)
19baf839
RO
1566 return -EINVAL;
1567
1568 key = 0;
c877efb2 1569 if (rta->rta_dst)
19baf839
RO
1570 memcpy(&key, rta->rta_dst, 4);
1571
1572 key = ntohl(key);
91b9a277 1573 mask = ntohl(inet_make_mask(plen));
19baf839 1574
c877efb2 1575 if (key & ~mask)
19baf839
RO
1576 return -EINVAL;
1577
1578 key = key & mask;
1579 l = fib_find_node(t, key);
1580
c877efb2 1581 if (!l)
19baf839
RO
1582 return -ESRCH;
1583
1584 fa_head = get_fa_head(l, plen);
1585 fa = fib_find_alias(fa_head, tos, 0);
1586
1587 if (!fa)
1588 return -ESRCH;
1589
0c7770c7 1590 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
19baf839
RO
1591
1592 fa_to_delete = NULL;
1593 fa_head = fa->fa_list.prev;
2373ce1c 1594
19baf839
RO
1595 list_for_each_entry(fa, fa_head, fa_list) {
1596 struct fib_info *fi = fa->fa_info;
1597
1598 if (fa->fa_tos != tos)
1599 break;
1600
1601 if ((!r->rtm_type ||
1602 fa->fa_type == r->rtm_type) &&
1603 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1604 fa->fa_scope == r->rtm_scope) &&
1605 (!r->rtm_protocol ||
1606 fi->fib_protocol == r->rtm_protocol) &&
1607 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1608 fa_to_delete = fa;
1609 break;
1610 }
1611 }
1612
91b9a277
OJ
1613 if (!fa_to_delete)
1614 return -ESRCH;
19baf839 1615
91b9a277
OJ
1616 fa = fa_to_delete;
1617 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1618
1619 l = fib_find_node(t, key);
772cb712 1620 li = find_leaf_info(l, plen);
19baf839 1621
2373ce1c 1622 list_del_rcu(&fa->fa_list);
19baf839 1623
91b9a277 1624 if (list_empty(fa_head)) {
2373ce1c 1625 hlist_del_rcu(&li->hlist);
91b9a277 1626 free_leaf_info(li);
2373ce1c 1627 }
19baf839 1628
91b9a277
OJ
1629 if (hlist_empty(&l->list))
1630 trie_leaf_remove(t, key);
19baf839 1631
91b9a277
OJ
1632 if (fa->fa_state & FA_S_ACCESSED)
1633 rt_cache_flush(-1);
19baf839 1634
2373ce1c
RO
1635 fib_release_info(fa->fa_info);
1636 alias_free_mem_rcu(fa);
91b9a277 1637 return 0;
19baf839
RO
1638}
1639
1640static int trie_flush_list(struct trie *t, struct list_head *head)
1641{
1642 struct fib_alias *fa, *fa_node;
1643 int found = 0;
1644
1645 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1646 struct fib_info *fi = fa->fa_info;
19baf839 1647
2373ce1c
RO
1648 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1649 list_del_rcu(&fa->fa_list);
1650 fib_release_info(fa->fa_info);
1651 alias_free_mem_rcu(fa);
19baf839
RO
1652 found++;
1653 }
1654 }
1655 return found;
1656}
1657
1658static int trie_flush_leaf(struct trie *t, struct leaf *l)
1659{
1660 int found = 0;
1661 struct hlist_head *lih = &l->list;
1662 struct hlist_node *node, *tmp;
1663 struct leaf_info *li = NULL;
1664
1665 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
19baf839
RO
1666 found += trie_flush_list(t, &li->falh);
1667
1668 if (list_empty(&li->falh)) {
2373ce1c 1669 hlist_del_rcu(&li->hlist);
19baf839
RO
1670 free_leaf_info(li);
1671 }
1672 }
1673 return found;
1674}
1675
2373ce1c
RO
1676/* rcu_read_lock needs to be hold by caller from readside */
1677
19baf839
RO
1678static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1679{
1680 struct node *c = (struct node *) thisleaf;
1681 struct tnode *p;
1682 int idx;
2373ce1c 1683 struct node *trie = rcu_dereference(t->trie);
19baf839 1684
c877efb2 1685 if (c == NULL) {
2373ce1c 1686 if (trie == NULL)
19baf839
RO
1687 return NULL;
1688
2373ce1c
RO
1689 if (IS_LEAF(trie)) /* trie w. just a leaf */
1690 return (struct leaf *) trie;
19baf839 1691
2373ce1c 1692 p = (struct tnode*) trie; /* Start */
91b9a277 1693 } else
19baf839 1694 p = (struct tnode *) NODE_PARENT(c);
c877efb2 1695
19baf839
RO
1696 while (p) {
1697 int pos, last;
1698
1699 /* Find the next child of the parent */
c877efb2
SH
1700 if (c)
1701 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1702 else
19baf839
RO
1703 pos = 0;
1704
1705 last = 1 << p->bits;
91b9a277 1706 for (idx = pos; idx < last ; idx++) {
2373ce1c
RO
1707 c = rcu_dereference(p->child[idx]);
1708
1709 if (!c)
91b9a277
OJ
1710 continue;
1711
1712 /* Decend if tnode */
2373ce1c
RO
1713 while (IS_TNODE(c)) {
1714 p = (struct tnode *) c;
1715 idx = 0;
91b9a277
OJ
1716
1717 /* Rightmost non-NULL branch */
1718 if (p && IS_TNODE(p))
2373ce1c
RO
1719 while (!(c = rcu_dereference(p->child[idx]))
1720 && idx < (1<<p->bits)) idx++;
91b9a277
OJ
1721
1722 /* Done with this tnode? */
2373ce1c 1723 if (idx >= (1 << p->bits) || !c)
91b9a277 1724 goto up;
19baf839 1725 }
2373ce1c 1726 return (struct leaf *) c;
19baf839
RO
1727 }
1728up:
1729 /* No more children go up one step */
91b9a277 1730 c = (struct node *) p;
19baf839
RO
1731 p = (struct tnode *) NODE_PARENT(p);
1732 }
1733 return NULL; /* Ready. Root of trie */
1734}
1735
1736static int fn_trie_flush(struct fib_table *tb)
1737{
1738 struct trie *t = (struct trie *) tb->tb_data;
1739 struct leaf *ll = NULL, *l = NULL;
1740 int found = 0, h;
1741
1742 t->revision++;
1743
91b9a277 1744 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
19baf839
RO
1745 found += trie_flush_leaf(t, l);
1746
1747 if (ll && hlist_empty(&ll->list))
1748 trie_leaf_remove(t, ll->key);
1749 ll = l;
1750 }
1751
1752 if (ll && hlist_empty(&ll->list))
1753 trie_leaf_remove(t, ll->key);
1754
0c7770c7 1755 pr_debug("trie_flush found=%d\n", found);
19baf839
RO
1756 return found;
1757}
1758
91b9a277 1759static int trie_last_dflt = -1;
19baf839
RO
1760
1761static void
1762fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1763{
1764 struct trie *t = (struct trie *) tb->tb_data;
1765 int order, last_idx;
1766 struct fib_info *fi = NULL;
1767 struct fib_info *last_resort;
1768 struct fib_alias *fa = NULL;
1769 struct list_head *fa_head;
1770 struct leaf *l;
1771
1772 last_idx = -1;
1773 last_resort = NULL;
1774 order = -1;
1775
2373ce1c 1776 rcu_read_lock();
c877efb2 1777
19baf839 1778 l = fib_find_node(t, 0);
c877efb2 1779 if (!l)
19baf839
RO
1780 goto out;
1781
1782 fa_head = get_fa_head(l, 0);
c877efb2 1783 if (!fa_head)
19baf839
RO
1784 goto out;
1785
c877efb2 1786 if (list_empty(fa_head))
19baf839
RO
1787 goto out;
1788
2373ce1c 1789 list_for_each_entry_rcu(fa, fa_head, fa_list) {
19baf839 1790 struct fib_info *next_fi = fa->fa_info;
91b9a277 1791
19baf839
RO
1792 if (fa->fa_scope != res->scope ||
1793 fa->fa_type != RTN_UNICAST)
1794 continue;
91b9a277 1795
19baf839
RO
1796 if (next_fi->fib_priority > res->fi->fib_priority)
1797 break;
1798 if (!next_fi->fib_nh[0].nh_gw ||
1799 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1800 continue;
1801 fa->fa_state |= FA_S_ACCESSED;
91b9a277 1802
19baf839
RO
1803 if (fi == NULL) {
1804 if (next_fi != res->fi)
1805 break;
1806 } else if (!fib_detect_death(fi, order, &last_resort,
1807 &last_idx, &trie_last_dflt)) {
1808 if (res->fi)
1809 fib_info_put(res->fi);
1810 res->fi = fi;
1811 atomic_inc(&fi->fib_clntref);
1812 trie_last_dflt = order;
1813 goto out;
1814 }
1815 fi = next_fi;
1816 order++;
1817 }
1818 if (order <= 0 || fi == NULL) {
1819 trie_last_dflt = -1;
1820 goto out;
1821 }
1822
1823 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1824 if (res->fi)
1825 fib_info_put(res->fi);
1826 res->fi = fi;
1827 atomic_inc(&fi->fib_clntref);
1828 trie_last_dflt = order;
1829 goto out;
1830 }
1831 if (last_idx >= 0) {
1832 if (res->fi)
1833 fib_info_put(res->fi);
1834 res->fi = last_resort;
1835 if (last_resort)
1836 atomic_inc(&last_resort->fib_clntref);
1837 }
1838 trie_last_dflt = last_idx;
1839 out:;
2373ce1c 1840 rcu_read_unlock();
19baf839
RO
1841}
1842
c877efb2 1843static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
19baf839
RO
1844 struct sk_buff *skb, struct netlink_callback *cb)
1845{
1846 int i, s_i;
1847 struct fib_alias *fa;
1848
91b9a277 1849 u32 xkey = htonl(key);
19baf839 1850
91b9a277 1851 s_i = cb->args[3];
19baf839
RO
1852 i = 0;
1853
2373ce1c
RO
1854 /* rcu_read_lock is hold by caller */
1855
1856 list_for_each_entry_rcu(fa, fah, fa_list) {
19baf839
RO
1857 if (i < s_i) {
1858 i++;
1859 continue;
1860 }
78c6671a 1861 BUG_ON(!fa->fa_info);
19baf839
RO
1862
1863 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1864 cb->nlh->nlmsg_seq,
1865 RTM_NEWROUTE,
1866 tb->tb_id,
1867 fa->fa_type,
1868 fa->fa_scope,
1869 &xkey,
1870 plen,
1871 fa->fa_tos,
90f66914 1872 fa->fa_info, 0) < 0) {
19baf839
RO
1873 cb->args[3] = i;
1874 return -1;
91b9a277 1875 }
19baf839
RO
1876 i++;
1877 }
91b9a277 1878 cb->args[3] = i;
19baf839
RO
1879 return skb->len;
1880}
1881
c877efb2 1882static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
19baf839
RO
1883 struct netlink_callback *cb)
1884{
1885 int h, s_h;
1886 struct list_head *fa_head;
1887 struct leaf *l = NULL;
19baf839 1888
91b9a277 1889 s_h = cb->args[2];
19baf839 1890
91b9a277 1891 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
19baf839
RO
1892 if (h < s_h)
1893 continue;
1894 if (h > s_h)
1895 memset(&cb->args[3], 0,
1896 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1897
1898 fa_head = get_fa_head(l, plen);
91b9a277 1899
c877efb2 1900 if (!fa_head)
19baf839
RO
1901 continue;
1902
c877efb2 1903 if (list_empty(fa_head))
19baf839
RO
1904 continue;
1905
1906 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
91b9a277 1907 cb->args[2] = h;
19baf839
RO
1908 return -1;
1909 }
1910 }
91b9a277 1911 cb->args[2] = h;
19baf839
RO
1912 return skb->len;
1913}
1914
1915static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1916{
1917 int m, s_m;
1918 struct trie *t = (struct trie *) tb->tb_data;
1919
1920 s_m = cb->args[1];
1921
2373ce1c 1922 rcu_read_lock();
91b9a277 1923 for (m = 0; m <= 32; m++) {
19baf839
RO
1924 if (m < s_m)
1925 continue;
1926 if (m > s_m)
1927 memset(&cb->args[2], 0,
91b9a277 1928 sizeof(cb->args) - 2*sizeof(cb->args[0]));
19baf839
RO
1929
1930 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1931 cb->args[1] = m;
1932 goto out;
1933 }
1934 }
2373ce1c 1935 rcu_read_unlock();
19baf839
RO
1936 cb->args[1] = m;
1937 return skb->len;
91b9a277 1938out:
2373ce1c 1939 rcu_read_unlock();
19baf839
RO
1940 return -1;
1941}
1942
1943/* Fix more generic FIB names for init later */
1944
1945#ifdef CONFIG_IP_MULTIPLE_TABLES
1946struct fib_table * fib_hash_init(int id)
1947#else
1948struct fib_table * __init fib_hash_init(int id)
1949#endif
1950{
1951 struct fib_table *tb;
1952 struct trie *t;
1953
1954 if (fn_alias_kmem == NULL)
1955 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1956 sizeof(struct fib_alias),
1957 0, SLAB_HWCACHE_ALIGN,
1958 NULL, NULL);
1959
1960 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1961 GFP_KERNEL);
1962 if (tb == NULL)
1963 return NULL;
1964
1965 tb->tb_id = id;
1966 tb->tb_lookup = fn_trie_lookup;
1967 tb->tb_insert = fn_trie_insert;
1968 tb->tb_delete = fn_trie_delete;
1969 tb->tb_flush = fn_trie_flush;
1970 tb->tb_select_default = fn_trie_select_default;
1971 tb->tb_dump = fn_trie_dump;
1972 memset(tb->tb_data, 0, sizeof(struct trie));
1973
1974 t = (struct trie *) tb->tb_data;
1975
1976 trie_init(t);
1977
c877efb2 1978 if (id == RT_TABLE_LOCAL)
91b9a277 1979 trie_local = t;
c877efb2 1980 else if (id == RT_TABLE_MAIN)
91b9a277 1981 trie_main = t;
19baf839
RO
1982
1983 if (id == RT_TABLE_LOCAL)
78c6671a 1984 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
19baf839
RO
1985
1986 return tb;
1987}
1988
cb7b593c
SH
1989#ifdef CONFIG_PROC_FS
1990/* Depth first Trie walk iterator */
1991struct fib_trie_iter {
1992 struct tnode *tnode;
1993 struct trie *trie;
1994 unsigned index;
1995 unsigned depth;
1996};
19baf839 1997
cb7b593c 1998static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
19baf839 1999{
cb7b593c
SH
2000 struct tnode *tn = iter->tnode;
2001 unsigned cindex = iter->index;
2002 struct tnode *p;
19baf839 2003
cb7b593c
SH
2004 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2005 iter->tnode, iter->index, iter->depth);
2006rescan:
2007 while (cindex < (1<<tn->bits)) {
2008 struct node *n = tnode_get_child(tn, cindex);
19baf839 2009
cb7b593c
SH
2010 if (n) {
2011 if (IS_LEAF(n)) {
2012 iter->tnode = tn;
2013 iter->index = cindex + 1;
2014 } else {
2015 /* push down one level */
2016 iter->tnode = (struct tnode *) n;
2017 iter->index = 0;
2018 ++iter->depth;
2019 }
2020 return n;
2021 }
19baf839 2022
cb7b593c
SH
2023 ++cindex;
2024 }
91b9a277 2025
cb7b593c
SH
2026 /* Current node exhausted, pop back up */
2027 p = NODE_PARENT(tn);
2028 if (p) {
2029 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2030 tn = p;
2031 --iter->depth;
2032 goto rescan;
19baf839 2033 }
cb7b593c
SH
2034
2035 /* got root? */
2036 return NULL;
19baf839
RO
2037}
2038
cb7b593c
SH
2039static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2040 struct trie *t)
19baf839 2041{
cb7b593c 2042 struct node *n = rcu_dereference(t->trie);
19baf839 2043
cb7b593c
SH
2044 if (n && IS_TNODE(n)) {
2045 iter->tnode = (struct tnode *) n;
2046 iter->trie = t;
2047 iter->index = 0;
1d25cd6c 2048 iter->depth = 1;
cb7b593c 2049 return n;
91b9a277 2050 }
cb7b593c
SH
2051 return NULL;
2052}
91b9a277 2053
cb7b593c
SH
2054static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2055{
2056 struct node *n;
2057 struct fib_trie_iter iter;
91b9a277 2058
cb7b593c 2059 memset(s, 0, sizeof(*s));
91b9a277 2060
cb7b593c
SH
2061 rcu_read_lock();
2062 for (n = fib_trie_get_first(&iter, t); n;
2063 n = fib_trie_get_next(&iter)) {
2064 if (IS_LEAF(n)) {
2065 s->leaves++;
2066 s->totdepth += iter.depth;
2067 if (iter.depth > s->maxdepth)
2068 s->maxdepth = iter.depth;
2069 } else {
2070 const struct tnode *tn = (const struct tnode *) n;
2071 int i;
2072
2073 s->tnodes++;
2074 s->nodesizes[tn->bits]++;
2075 for (i = 0; i < (1<<tn->bits); i++)
2076 if (!tn->child[i])
2077 s->nullpointers++;
19baf839 2078 }
19baf839 2079 }
2373ce1c 2080 rcu_read_unlock();
19baf839
RO
2081}
2082
cb7b593c
SH
2083/*
2084 * This outputs /proc/net/fib_triestats
2085 */
2086static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
19baf839 2087{
cb7b593c 2088 unsigned i, max, pointers, bytes, avdepth;
c877efb2 2089
cb7b593c
SH
2090 if (stat->leaves)
2091 avdepth = stat->totdepth*100 / stat->leaves;
2092 else
2093 avdepth = 0;
91b9a277 2094
cb7b593c
SH
2095 seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2096 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
91b9a277 2097
cb7b593c 2098 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
91b9a277 2099
cb7b593c
SH
2100 bytes = sizeof(struct leaf) * stat->leaves;
2101 seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
2102 bytes += sizeof(struct tnode) * stat->tnodes;
19baf839 2103
cb7b593c
SH
2104 max = MAX_CHILDS-1;
2105 while (max >= 0 && stat->nodesizes[max] == 0)
2106 max--;
19baf839 2107
cb7b593c
SH
2108 pointers = 0;
2109 for (i = 1; i <= max; i++)
2110 if (stat->nodesizes[i] != 0) {
2111 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2112 pointers += (1<<i) * stat->nodesizes[i];
2113 }
2114 seq_putc(seq, '\n');
2115 seq_printf(seq, "\tPointers: %d\n", pointers);
2373ce1c 2116
cb7b593c
SH
2117 bytes += sizeof(struct node *) * pointers;
2118 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2119 seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024);
2373ce1c 2120
cb7b593c
SH
2121#ifdef CONFIG_IP_FIB_TRIE_STATS
2122 seq_printf(seq, "Counters:\n---------\n");
2123 seq_printf(seq,"gets = %d\n", t->stats.gets);
2124 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2125 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2126 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2127 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2128 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2129#ifdef CLEAR_STATS
2130 memset(&(t->stats), 0, sizeof(t->stats));
2131#endif
2132#endif /* CONFIG_IP_FIB_TRIE_STATS */
2133}
19baf839 2134
cb7b593c
SH
2135static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2136{
2137 struct trie_stat *stat;
91b9a277 2138
cb7b593c
SH
2139 stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2140 if (!stat)
2141 return -ENOMEM;
91b9a277 2142
cb7b593c
SH
2143 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2144 sizeof(struct leaf), sizeof(struct tnode));
91b9a277 2145
cb7b593c
SH
2146 if (trie_local) {
2147 seq_printf(seq, "Local:\n");
2148 trie_collect_stats(trie_local, stat);
2149 trie_show_stats(seq, stat);
2150 }
91b9a277 2151
cb7b593c
SH
2152 if (trie_main) {
2153 seq_printf(seq, "Main:\n");
2154 trie_collect_stats(trie_main, stat);
2155 trie_show_stats(seq, stat);
19baf839 2156 }
cb7b593c 2157 kfree(stat);
19baf839 2158
cb7b593c 2159 return 0;
19baf839
RO
2160}
2161
cb7b593c 2162static int fib_triestat_seq_open(struct inode *inode, struct file *file)
19baf839 2163{
cb7b593c 2164 return single_open(file, fib_triestat_seq_show, NULL);
19baf839
RO
2165}
2166
cb7b593c
SH
2167static struct file_operations fib_triestat_fops = {
2168 .owner = THIS_MODULE,
2169 .open = fib_triestat_seq_open,
2170 .read = seq_read,
2171 .llseek = seq_lseek,
2172 .release = single_release,
2173};
2174
2175static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2176 loff_t pos)
19baf839 2177{
cb7b593c
SH
2178 loff_t idx = 0;
2179 struct node *n;
2180
2181 for (n = fib_trie_get_first(iter, trie_local);
2182 n; ++idx, n = fib_trie_get_next(iter)) {
2183 if (pos == idx)
2184 return n;
2185 }
2186
2187 for (n = fib_trie_get_first(iter, trie_main);
2188 n; ++idx, n = fib_trie_get_next(iter)) {
2189 if (pos == idx)
2190 return n;
2191 }
19baf839
RO
2192 return NULL;
2193}
2194
cb7b593c 2195static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
19baf839 2196{
cb7b593c
SH
2197 rcu_read_lock();
2198 if (*pos == 0)
91b9a277 2199 return SEQ_START_TOKEN;
cb7b593c 2200 return fib_trie_get_idx(seq->private, *pos - 1);
19baf839
RO
2201}
2202
cb7b593c 2203static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
19baf839 2204{
cb7b593c
SH
2205 struct fib_trie_iter *iter = seq->private;
2206 void *l = v;
2207
19baf839 2208 ++*pos;
91b9a277 2209 if (v == SEQ_START_TOKEN)
cb7b593c 2210 return fib_trie_get_idx(iter, 0);
19baf839 2211
cb7b593c
SH
2212 v = fib_trie_get_next(iter);
2213 BUG_ON(v == l);
2214 if (v)
2215 return v;
19baf839 2216
cb7b593c
SH
2217 /* continue scan in next trie */
2218 if (iter->trie == trie_local)
2219 return fib_trie_get_first(iter, trie_main);
19baf839 2220
cb7b593c
SH
2221 return NULL;
2222}
19baf839 2223
cb7b593c 2224static void fib_trie_seq_stop(struct seq_file *seq, void *v)
19baf839 2225{
cb7b593c
SH
2226 rcu_read_unlock();
2227}
91b9a277 2228
cb7b593c
SH
2229static void seq_indent(struct seq_file *seq, int n)
2230{
2231 while (n-- > 0) seq_puts(seq, " ");
2232}
19baf839 2233
cb7b593c
SH
2234static inline const char *rtn_scope(enum rt_scope_t s)
2235{
2236 static char buf[32];
19baf839 2237
cb7b593c
SH
2238 switch(s) {
2239 case RT_SCOPE_UNIVERSE: return "universe";
2240 case RT_SCOPE_SITE: return "site";
2241 case RT_SCOPE_LINK: return "link";
2242 case RT_SCOPE_HOST: return "host";
2243 case RT_SCOPE_NOWHERE: return "nowhere";
2244 default:
2245 snprintf(buf, sizeof(buf), "scope=%d", s);
2246 return buf;
2247 }
2248}
19baf839 2249
cb7b593c
SH
2250static const char *rtn_type_names[__RTN_MAX] = {
2251 [RTN_UNSPEC] = "UNSPEC",
2252 [RTN_UNICAST] = "UNICAST",
2253 [RTN_LOCAL] = "LOCAL",
2254 [RTN_BROADCAST] = "BROADCAST",
2255 [RTN_ANYCAST] = "ANYCAST",
2256 [RTN_MULTICAST] = "MULTICAST",
2257 [RTN_BLACKHOLE] = "BLACKHOLE",
2258 [RTN_UNREACHABLE] = "UNREACHABLE",
2259 [RTN_PROHIBIT] = "PROHIBIT",
2260 [RTN_THROW] = "THROW",
2261 [RTN_NAT] = "NAT",
2262 [RTN_XRESOLVE] = "XRESOLVE",
2263};
19baf839 2264
cb7b593c
SH
2265static inline const char *rtn_type(unsigned t)
2266{
2267 static char buf[32];
19baf839 2268
cb7b593c
SH
2269 if (t < __RTN_MAX && rtn_type_names[t])
2270 return rtn_type_names[t];
2271 snprintf(buf, sizeof(buf), "type %d", t);
2272 return buf;
19baf839
RO
2273}
2274
cb7b593c
SH
2275/* Pretty print the trie */
2276static int fib_trie_seq_show(struct seq_file *seq, void *v)
19baf839 2277{
cb7b593c
SH
2278 const struct fib_trie_iter *iter = seq->private;
2279 struct node *n = v;
c877efb2 2280
cb7b593c
SH
2281 if (v == SEQ_START_TOKEN)
2282 return 0;
19baf839 2283
cb7b593c
SH
2284 if (IS_TNODE(n)) {
2285 struct tnode *tn = (struct tnode *) n;
2286 t_key prf = ntohl(MASK_PFX(tn->key, tn->pos));
91b9a277 2287
cb7b593c
SH
2288 if (!NODE_PARENT(n)) {
2289 if (iter->trie == trie_local)
2290 seq_puts(seq, "<local>:\n");
2291 else
2292 seq_puts(seq, "<main>:\n");
1d25cd6c
RO
2293 }
2294 seq_indent(seq, iter->depth-1);
2295 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
2296 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
2297 tn->empty_children);
2298
cb7b593c
SH
2299 } else {
2300 struct leaf *l = (struct leaf *) n;
2301 int i;
2302 u32 val = ntohl(l->key);
2303
2304 seq_indent(seq, iter->depth);
2305 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2306 for (i = 32; i >= 0; i--) {
772cb712 2307 struct leaf_info *li = find_leaf_info(l, i);
cb7b593c
SH
2308 if (li) {
2309 struct fib_alias *fa;
2310 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2311 seq_indent(seq, iter->depth+1);
2312 seq_printf(seq, " /%d %s %s", i,
2313 rtn_scope(fa->fa_scope),
2314 rtn_type(fa->fa_type));
2315 if (fa->fa_tos)
2316 seq_printf(seq, "tos =%d\n",
2317 fa->fa_tos);
2318 seq_putc(seq, '\n');
2319 }
2320 }
2321 }
19baf839 2322 }
cb7b593c 2323
19baf839
RO
2324 return 0;
2325}
2326
cb7b593c
SH
2327static struct seq_operations fib_trie_seq_ops = {
2328 .start = fib_trie_seq_start,
2329 .next = fib_trie_seq_next,
2330 .stop = fib_trie_seq_stop,
2331 .show = fib_trie_seq_show,
19baf839
RO
2332};
2333
cb7b593c 2334static int fib_trie_seq_open(struct inode *inode, struct file *file)
19baf839
RO
2335{
2336 struct seq_file *seq;
2337 int rc = -ENOMEM;
cb7b593c 2338 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
19baf839 2339
cb7b593c
SH
2340 if (!s)
2341 goto out;
2342
2343 rc = seq_open(file, &fib_trie_seq_ops);
19baf839
RO
2344 if (rc)
2345 goto out_kfree;
2346
cb7b593c
SH
2347 seq = file->private_data;
2348 seq->private = s;
2349 memset(s, 0, sizeof(*s));
19baf839
RO
2350out:
2351 return rc;
2352out_kfree:
cb7b593c 2353 kfree(s);
19baf839
RO
2354 goto out;
2355}
2356
cb7b593c
SH
2357static struct file_operations fib_trie_fops = {
2358 .owner = THIS_MODULE,
2359 .open = fib_trie_seq_open,
2360 .read = seq_read,
2361 .llseek = seq_lseek,
c877efb2 2362 .release = seq_release_private,
19baf839
RO
2363};
2364
cb7b593c 2365static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
19baf839 2366{
cb7b593c
SH
2367 static unsigned type2flags[RTN_MAX + 1] = {
2368 [7] = RTF_REJECT, [8] = RTF_REJECT,
2369 };
2370 unsigned flags = type2flags[type];
19baf839 2371
cb7b593c
SH
2372 if (fi && fi->fib_nh->nh_gw)
2373 flags |= RTF_GATEWAY;
2374 if (mask == 0xFFFFFFFF)
2375 flags |= RTF_HOST;
2376 flags |= RTF_UP;
2377 return flags;
19baf839
RO
2378}
2379
cb7b593c
SH
2380/*
2381 * This outputs /proc/net/route.
2382 * The format of the file is not supposed to be changed
2383 * and needs to be same as fib_hash output to avoid breaking
2384 * legacy utilities
2385 */
2386static int fib_route_seq_show(struct seq_file *seq, void *v)
19baf839 2387{
c9e53cbe 2388 const struct fib_trie_iter *iter = seq->private;
cb7b593c
SH
2389 struct leaf *l = v;
2390 int i;
2391 char bf[128];
19baf839 2392
cb7b593c
SH
2393 if (v == SEQ_START_TOKEN) {
2394 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2395 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2396 "\tWindow\tIRTT");
2397 return 0;
2398 }
19baf839 2399
c9e53cbe
PM
2400 if (iter->trie == trie_local)
2401 return 0;
cb7b593c
SH
2402 if (IS_TNODE(l))
2403 return 0;
19baf839 2404
cb7b593c 2405 for (i=32; i>=0; i--) {
772cb712 2406 struct leaf_info *li = find_leaf_info(l, i);
cb7b593c
SH
2407 struct fib_alias *fa;
2408 u32 mask, prefix;
91b9a277 2409
cb7b593c
SH
2410 if (!li)
2411 continue;
19baf839 2412
cb7b593c
SH
2413 mask = inet_make_mask(li->plen);
2414 prefix = htonl(l->key);
19baf839 2415
cb7b593c 2416 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1371e37d 2417 const struct fib_info *fi = fa->fa_info;
cb7b593c 2418 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
19baf839 2419
cb7b593c
SH
2420 if (fa->fa_type == RTN_BROADCAST
2421 || fa->fa_type == RTN_MULTICAST)
2422 continue;
19baf839 2423
cb7b593c
SH
2424 if (fi)
2425 snprintf(bf, sizeof(bf),
2426 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2427 fi->fib_dev ? fi->fib_dev->name : "*",
2428 prefix,
2429 fi->fib_nh->nh_gw, flags, 0, 0,
2430 fi->fib_priority,
2431 mask,
2432 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2433 fi->fib_window,
2434 fi->fib_rtt >> 3);
2435 else
2436 snprintf(bf, sizeof(bf),
2437 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2438 prefix, 0, flags, 0, 0, 0,
2439 mask, 0, 0, 0);
19baf839 2440
cb7b593c
SH
2441 seq_printf(seq, "%-127s\n", bf);
2442 }
19baf839
RO
2443 }
2444
2445 return 0;
2446}
2447
cb7b593c
SH
2448static struct seq_operations fib_route_seq_ops = {
2449 .start = fib_trie_seq_start,
2450 .next = fib_trie_seq_next,
2451 .stop = fib_trie_seq_stop,
2452 .show = fib_route_seq_show,
19baf839
RO
2453};
2454
cb7b593c 2455static int fib_route_seq_open(struct inode *inode, struct file *file)
19baf839
RO
2456{
2457 struct seq_file *seq;
2458 int rc = -ENOMEM;
cb7b593c 2459 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
19baf839 2460
cb7b593c
SH
2461 if (!s)
2462 goto out;
2463
2464 rc = seq_open(file, &fib_route_seq_ops);
19baf839
RO
2465 if (rc)
2466 goto out_kfree;
2467
cb7b593c
SH
2468 seq = file->private_data;
2469 seq->private = s;
2470 memset(s, 0, sizeof(*s));
19baf839
RO
2471out:
2472 return rc;
2473out_kfree:
cb7b593c 2474 kfree(s);
19baf839
RO
2475 goto out;
2476}
2477
cb7b593c
SH
2478static struct file_operations fib_route_fops = {
2479 .owner = THIS_MODULE,
2480 .open = fib_route_seq_open,
2481 .read = seq_read,
2482 .llseek = seq_lseek,
2483 .release = seq_release_private,
19baf839
RO
2484};
2485
2486int __init fib_proc_init(void)
2487{
cb7b593c
SH
2488 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
2489 goto out1;
2490
2491 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
2492 goto out2;
2493
2494 if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
2495 goto out3;
2496
19baf839 2497 return 0;
cb7b593c
SH
2498
2499out3:
2500 proc_net_remove("fib_triestat");
2501out2:
2502 proc_net_remove("fib_trie");
2503out1:
2504 return -ENOMEM;
19baf839
RO
2505}
2506
2507void __init fib_proc_exit(void)
2508{
2509 proc_net_remove("fib_trie");
cb7b593c
SH
2510 proc_net_remove("fib_triestat");
2511 proc_net_remove("route");
19baf839
RO
2512}
2513
2514#endif /* CONFIG_PROC_FS */