]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_semantics.c
[TR]: Use menuconfig objects.
[net-next-2.6.git] / net / ipv4 / fib_semantics.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
14c85021 31#include <linux/inetdevice.h>
1da177e4
LT
32#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
1da177e4
LT
36#include <linux/init.h>
37
14c85021 38#include <net/arp.h>
1da177e4
LT
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
45#include <net/ip_mp_alg.h>
f21c7bc5 46#include <net/netlink.h>
4e902c57 47#include <net/nexthop.h>
1da177e4
LT
48
49#include "fib_lookup.h"
50
51#define FSprintk(a...)
52
832b4c5e 53static DEFINE_SPINLOCK(fib_info_lock);
1da177e4
LT
54static struct hlist_head *fib_info_hash;
55static struct hlist_head *fib_info_laddrhash;
56static unsigned int fib_hash_size;
57static unsigned int fib_info_cnt;
58
59#define DEVINDEX_HASHBITS 8
60#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62
63#ifdef CONFIG_IP_ROUTE_MULTIPATH
64
65static DEFINE_SPINLOCK(fib_multipath_lock);
66
67#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72
73#else /* CONFIG_IP_ROUTE_MULTIPATH */
74
75/* Hope, that gcc will optimize it to get rid of dummy loop */
76
77#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81for (nhsel=0; nhsel < 1; nhsel++)
82
83#endif /* CONFIG_IP_ROUTE_MULTIPATH */
84
85#define endfor_nexthops(fi) }
86
87
e905a9ed 88static const struct
1da177e4
LT
89{
90 int error;
91 u8 scope;
a0ee18b9 92} fib_props[RTN_MAX + 1] = {
e905a9ed 93 {
1da177e4
LT
94 .error = 0,
95 .scope = RT_SCOPE_NOWHERE,
96 }, /* RTN_UNSPEC */
97 {
98 .error = 0,
99 .scope = RT_SCOPE_UNIVERSE,
100 }, /* RTN_UNICAST */
101 {
102 .error = 0,
103 .scope = RT_SCOPE_HOST,
104 }, /* RTN_LOCAL */
105 {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_BROADCAST */
109 {
110 .error = 0,
111 .scope = RT_SCOPE_LINK,
112 }, /* RTN_ANYCAST */
113 {
114 .error = 0,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_MULTICAST */
117 {
118 .error = -EINVAL,
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_BLACKHOLE */
121 {
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_UNREACHABLE */
125 {
126 .error = -EACCES,
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_PROHIBIT */
129 {
130 .error = -EAGAIN,
131 .scope = RT_SCOPE_UNIVERSE,
132 }, /* RTN_THROW */
133 {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_NAT */
137 {
138 .error = -EINVAL,
139 .scope = RT_SCOPE_NOWHERE,
140 }, /* RTN_XRESOLVE */
141};
142
143
144/* Release a nexthop info record */
145
146void free_fib_info(struct fib_info *fi)
147{
148 if (fi->fib_dead == 0) {
149 printk("Freeing alive fib_info %p\n", fi);
150 return;
151 }
152 change_nexthops(fi) {
153 if (nh->nh_dev)
154 dev_put(nh->nh_dev);
155 nh->nh_dev = NULL;
156 } endfor_nexthops(fi);
157 fib_info_cnt--;
158 kfree(fi);
159}
160
161void fib_release_info(struct fib_info *fi)
162{
832b4c5e 163 spin_lock_bh(&fib_info_lock);
1da177e4
LT
164 if (fi && --fi->fib_treeref == 0) {
165 hlist_del(&fi->fib_hash);
166 if (fi->fib_prefsrc)
167 hlist_del(&fi->fib_lhash);
168 change_nexthops(fi) {
169 if (!nh->nh_dev)
170 continue;
171 hlist_del(&nh->nh_hash);
172 } endfor_nexthops(fi)
173 fi->fib_dead = 1;
174 fib_info_put(fi);
175 }
832b4c5e 176 spin_unlock_bh(&fib_info_lock);
1da177e4
LT
177}
178
179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180{
181 const struct fib_nh *onh = ofi->fib_nh;
182
183 for_nexthops(fi) {
184 if (nh->nh_oif != onh->nh_oif ||
185 nh->nh_gw != onh->nh_gw ||
186 nh->nh_scope != onh->nh_scope ||
187#ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh->nh_weight != onh->nh_weight ||
189#endif
190#ifdef CONFIG_NET_CLS_ROUTE
191 nh->nh_tclassid != onh->nh_tclassid ||
192#endif
193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194 return -1;
195 onh++;
196 } endfor_nexthops(fi);
197 return 0;
198}
199
200static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201{
202 unsigned int mask = (fib_hash_size - 1);
203 unsigned int val = fi->fib_nhs;
204
205 val ^= fi->fib_protocol;
81f7bf6c 206 val ^= (__force u32)fi->fib_prefsrc;
1da177e4
LT
207 val ^= fi->fib_priority;
208
209 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210}
211
212static struct fib_info *fib_find_info(const struct fib_info *nfi)
213{
214 struct hlist_head *head;
215 struct hlist_node *node;
216 struct fib_info *fi;
217 unsigned int hash;
218
219 hash = fib_info_hashfn(nfi);
220 head = &fib_info_hash[hash];
221
222 hlist_for_each_entry(fi, node, head, fib_hash) {
223 if (fi->fib_nhs != nfi->fib_nhs)
224 continue;
225 if (nfi->fib_protocol == fi->fib_protocol &&
226 nfi->fib_prefsrc == fi->fib_prefsrc &&
227 nfi->fib_priority == fi->fib_priority &&
228 memcmp(nfi->fib_metrics, fi->fib_metrics,
229 sizeof(fi->fib_metrics)) == 0 &&
230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232 return fi;
233 }
234
235 return NULL;
236}
237
238static inline unsigned int fib_devindex_hashfn(unsigned int val)
239{
240 unsigned int mask = DEVINDEX_HASHSIZE - 1;
241
242 return (val ^
243 (val >> DEVINDEX_HASHBITS) ^
244 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
245}
246
247/* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
249 */
250
d878e72e 251int ip_fib_check_default(__be32 gw, struct net_device *dev)
1da177e4
LT
252{
253 struct hlist_head *head;
254 struct hlist_node *node;
255 struct fib_nh *nh;
256 unsigned int hash;
257
832b4c5e 258 spin_lock(&fib_info_lock);
1da177e4
LT
259
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
264 nh->nh_gw == gw &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
832b4c5e 266 spin_unlock(&fib_info_lock);
1da177e4
LT
267 return 0;
268 }
269 }
270
832b4c5e 271 spin_unlock(&fib_info_lock);
1da177e4
LT
272
273 return -1;
274}
275
339bf98f
TG
276static inline size_t fib_nlmsg_size(struct fib_info *fi)
277{
278 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
283
284 /* space for nested metrics */
285 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
286
287 if (fi->fib_nhs) {
288 /* Also handles the special case fib_nhs == 1 */
289
290 /* each nexthop is packed in an attribute */
291 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
292
293 /* may contain flow and gateway attribute */
294 nhsize += 2 * nla_total_size(4);
295
296 /* all nexthops are packed in a nested attribute */
297 payload += nla_total_size(fi->fib_nhs * nhsize);
298 }
299
300 return payload;
301}
302
81f7bf6c 303void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
4e902c57 304 int dst_len, u32 tb_id, struct nl_info *info)
1da177e4
LT
305{
306 struct sk_buff *skb;
4e902c57 307 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
f21c7bc5 308 int err = -ENOBUFS;
1da177e4 309
339bf98f 310 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
f21c7bc5
TG
311 if (skb == NULL)
312 goto errout;
1da177e4 313
4e902c57 314 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
be403ea1 315 fa->fa_type, fa->fa_scope, key, dst_len,
4e902c57 316 fa->fa_tos, fa->fa_info, 0);
26932566
PM
317 if (err < 0) {
318 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
319 WARN_ON(err == -EMSGSIZE);
320 kfree_skb(skb);
321 goto errout;
322 }
4e902c57
TG
323 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
324 info->nlh, GFP_KERNEL);
f21c7bc5
TG
325errout:
326 if (err < 0)
327 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
1da177e4
LT
328}
329
330/* Return the first fib alias matching TOS with
331 * priority less than or equal to PRIO.
332 */
333struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
334{
335 if (fah) {
336 struct fib_alias *fa;
337 list_for_each_entry(fa, fah, fa_list) {
338 if (fa->fa_tos > tos)
339 continue;
340 if (fa->fa_info->fib_priority >= prio ||
341 fa->fa_tos < tos)
342 return fa;
343 }
344 }
345 return NULL;
346}
347
348int fib_detect_death(struct fib_info *fi, int order,
349 struct fib_info **last_resort, int *last_idx, int *dflt)
350{
351 struct neighbour *n;
352 int state = NUD_NONE;
353
354 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
355 if (n) {
356 state = n->nud_state;
357 neigh_release(n);
358 }
359 if (state==NUD_REACHABLE)
360 return 0;
361 if ((state&NUD_VALID) && order != *dflt)
362 return 0;
363 if ((state&NUD_VALID) ||
364 (*last_idx<0 && order > *dflt)) {
365 *last_resort = fi;
366 *last_idx = order;
367 }
368 return 1;
369}
370
371#ifdef CONFIG_IP_ROUTE_MULTIPATH
372
4e902c57 373static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
1da177e4
LT
374{
375 int nhs = 0;
1da177e4 376
4e902c57 377 while (rtnh_ok(rtnh, remaining)) {
1da177e4 378 nhs++;
4e902c57
TG
379 rtnh = rtnh_next(rtnh, &remaining);
380 }
381
382 /* leftover implies invalid nexthop configuration, discard it */
383 return remaining > 0 ? 0 : nhs;
1da177e4
LT
384}
385
4e902c57
TG
386static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
387 int remaining, struct fib_config *cfg)
1da177e4 388{
1da177e4 389 change_nexthops(fi) {
4e902c57
TG
390 int attrlen;
391
392 if (!rtnh_ok(rtnh, remaining))
1da177e4 393 return -EINVAL;
4e902c57
TG
394
395 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
396 nh->nh_oif = rtnh->rtnh_ifindex;
397 nh->nh_weight = rtnh->rtnh_hops + 1;
398
399 attrlen = rtnh_attrlen(rtnh);
400 if (attrlen > 0) {
401 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
402
403 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
17fb2c64 404 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
1da177e4 405#ifdef CONFIG_NET_CLS_ROUTE
4e902c57
TG
406 nla = nla_find(attrs, attrlen, RTA_FLOW);
407 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
1da177e4
LT
408#endif
409 }
4e902c57
TG
410
411 rtnh = rtnh_next(rtnh, &remaining);
1da177e4 412 } endfor_nexthops(fi);
4e902c57 413
1da177e4
LT
414 return 0;
415}
416
417#endif
418
4e902c57 419int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
1da177e4
LT
420{
421#ifdef CONFIG_IP_ROUTE_MULTIPATH
4e902c57
TG
422 struct rtnexthop *rtnh;
423 int remaining;
1da177e4
LT
424#endif
425
4e902c57 426 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
1da177e4
LT
427 return 1;
428
4e902c57
TG
429 if (cfg->fc_oif || cfg->fc_gw) {
430 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
431 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
1da177e4
LT
432 return 0;
433 return 1;
434 }
435
436#ifdef CONFIG_IP_ROUTE_MULTIPATH
4e902c57 437 if (cfg->fc_mp == NULL)
1da177e4 438 return 0;
4e902c57
TG
439
440 rtnh = cfg->fc_mp;
441 remaining = cfg->fc_mp_len;
e905a9ed 442
1da177e4 443 for_nexthops(fi) {
4e902c57 444 int attrlen;
1da177e4 445
4e902c57 446 if (!rtnh_ok(rtnh, remaining))
1da177e4 447 return -EINVAL;
4e902c57
TG
448
449 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
1da177e4 450 return 1;
4e902c57
TG
451
452 attrlen = rtnh_attrlen(rtnh);
453 if (attrlen < 0) {
454 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
455
456 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
17fb2c64 457 if (nla && nla_get_be32(nla) != nh->nh_gw)
1da177e4
LT
458 return 1;
459#ifdef CONFIG_NET_CLS_ROUTE
4e902c57
TG
460 nla = nla_find(attrs, attrlen, RTA_FLOW);
461 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
1da177e4
LT
462 return 1;
463#endif
464 }
4e902c57
TG
465
466 rtnh = rtnh_next(rtnh, &remaining);
1da177e4
LT
467 } endfor_nexthops(fi);
468#endif
469 return 0;
470}
471
472
473/*
474 Picture
475 -------
476
477 Semantics of nexthop is very messy by historical reasons.
478 We have to take into account, that:
479 a) gateway can be actually local interface address,
480 so that gatewayed route is direct.
481 b) gateway must be on-link address, possibly
482 described not by an ifaddr, but also by a direct route.
483 c) If both gateway and interface are specified, they should not
484 contradict.
485 d) If we use tunnel routes, gateway could be not on-link.
486
487 Attempt to reconcile all of these (alas, self-contradictory) conditions
488 results in pretty ugly and hairy code with obscure logic.
489
490 I chose to generalized it instead, so that the size
491 of code does not increase practically, but it becomes
492 much more general.
493 Every prefix is assigned a "scope" value: "host" is local address,
494 "link" is direct route,
495 [ ... "site" ... "interior" ... ]
496 and "universe" is true gateway route with global meaning.
497
498 Every prefix refers to a set of "nexthop"s (gw, oif),
499 where gw must have narrower scope. This recursion stops
500 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
501 which means that gw is forced to be on link.
502
503 Code is still hairy, but now it is apparently logically
504 consistent and very flexible. F.e. as by-product it allows
505 to co-exists in peace independent exterior and interior
506 routing processes.
507
508 Normally it looks as following.
509
510 {universe prefix} -> (gw, oif) [scope link]
e905a9ed 511 |
1da177e4 512 |-> {link prefix} -> (gw, oif) [scope local]
e905a9ed 513 |
1da177e4
LT
514 |-> {local prefix} (terminal node)
515 */
516
4e902c57
TG
517static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518 struct fib_nh *nh)
1da177e4
LT
519{
520 int err;
521
522 if (nh->nh_gw) {
523 struct fib_result res;
524
525#ifdef CONFIG_IP_ROUTE_PERVASIVE
526 if (nh->nh_flags&RTNH_F_PERVASIVE)
527 return 0;
528#endif
529 if (nh->nh_flags&RTNH_F_ONLINK) {
530 struct net_device *dev;
531
4e902c57 532 if (cfg->fc_scope >= RT_SCOPE_LINK)
1da177e4
LT
533 return -EINVAL;
534 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
535 return -EINVAL;
536 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
537 return -ENODEV;
538 if (!(dev->flags&IFF_UP))
539 return -ENETDOWN;
540 nh->nh_dev = dev;
541 dev_hold(dev);
542 nh->nh_scope = RT_SCOPE_LINK;
543 return 0;
544 }
545 {
4e902c57
TG
546 struct flowi fl = {
547 .nl_u = {
548 .ip4_u = {
549 .daddr = nh->nh_gw,
550 .scope = cfg->fc_scope + 1,
551 },
552 },
553 .oif = nh->nh_oif,
554 };
1da177e4
LT
555
556 /* It is not necessary, but requires a bit of thinking */
557 if (fl.fl4_scope < RT_SCOPE_LINK)
558 fl.fl4_scope = RT_SCOPE_LINK;
559 if ((err = fib_lookup(&fl, &res)) != 0)
560 return err;
561 }
562 err = -EINVAL;
563 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
564 goto out;
565 nh->nh_scope = res.scope;
566 nh->nh_oif = FIB_RES_OIF(res);
567 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
568 goto out;
569 dev_hold(nh->nh_dev);
570 err = -ENETDOWN;
571 if (!(nh->nh_dev->flags & IFF_UP))
572 goto out;
573 err = 0;
574out:
575 fib_res_put(&res);
576 return err;
577 } else {
578 struct in_device *in_dev;
579
580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581 return -EINVAL;
582
583 in_dev = inetdev_by_index(nh->nh_oif);
584 if (in_dev == NULL)
585 return -ENODEV;
586 if (!(in_dev->dev->flags&IFF_UP)) {
587 in_dev_put(in_dev);
588 return -ENETDOWN;
589 }
590 nh->nh_dev = in_dev->dev;
591 dev_hold(nh->nh_dev);
592 nh->nh_scope = RT_SCOPE_HOST;
593 in_dev_put(in_dev);
594 }
595 return 0;
596}
597
81f7bf6c 598static inline unsigned int fib_laddr_hashfn(__be32 val)
1da177e4
LT
599{
600 unsigned int mask = (fib_hash_size - 1);
601
81f7bf6c 602 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
1da177e4
LT
603}
604
605static struct hlist_head *fib_hash_alloc(int bytes)
606{
607 if (bytes <= PAGE_SIZE)
608 return kmalloc(bytes, GFP_KERNEL);
609 else
610 return (struct hlist_head *)
611 __get_free_pages(GFP_KERNEL, get_order(bytes));
612}
613
614static void fib_hash_free(struct hlist_head *hash, int bytes)
615{
616 if (!hash)
617 return;
618
619 if (bytes <= PAGE_SIZE)
620 kfree(hash);
621 else
622 free_pages((unsigned long) hash, get_order(bytes));
623}
624
625static void fib_hash_move(struct hlist_head *new_info_hash,
626 struct hlist_head *new_laddrhash,
627 unsigned int new_size)
628{
b7656e7f 629 struct hlist_head *old_info_hash, *old_laddrhash;
1da177e4 630 unsigned int old_size = fib_hash_size;
b7656e7f 631 unsigned int i, bytes;
1da177e4 632
832b4c5e 633 spin_lock_bh(&fib_info_lock);
b7656e7f
DM
634 old_info_hash = fib_info_hash;
635 old_laddrhash = fib_info_laddrhash;
1da177e4
LT
636 fib_hash_size = new_size;
637
638 for (i = 0; i < old_size; i++) {
639 struct hlist_head *head = &fib_info_hash[i];
640 struct hlist_node *node, *n;
641 struct fib_info *fi;
642
643 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
644 struct hlist_head *dest;
645 unsigned int new_hash;
646
647 hlist_del(&fi->fib_hash);
648
649 new_hash = fib_info_hashfn(fi);
650 dest = &new_info_hash[new_hash];
651 hlist_add_head(&fi->fib_hash, dest);
652 }
653 }
654 fib_info_hash = new_info_hash;
655
656 for (i = 0; i < old_size; i++) {
657 struct hlist_head *lhead = &fib_info_laddrhash[i];
658 struct hlist_node *node, *n;
659 struct fib_info *fi;
660
661 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
662 struct hlist_head *ldest;
663 unsigned int new_hash;
664
665 hlist_del(&fi->fib_lhash);
666
667 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
668 ldest = &new_laddrhash[new_hash];
669 hlist_add_head(&fi->fib_lhash, ldest);
670 }
671 }
672 fib_info_laddrhash = new_laddrhash;
673
832b4c5e 674 spin_unlock_bh(&fib_info_lock);
b7656e7f
DM
675
676 bytes = old_size * sizeof(struct hlist_head *);
677 fib_hash_free(old_info_hash, bytes);
678 fib_hash_free(old_laddrhash, bytes);
1da177e4
LT
679}
680
4e902c57 681struct fib_info *fib_create_info(struct fib_config *cfg)
1da177e4
LT
682{
683 int err;
684 struct fib_info *fi = NULL;
685 struct fib_info *ofi;
1da177e4 686 int nhs = 1;
1da177e4
LT
687
688 /* Fast check to catch the most weird cases */
4e902c57 689 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
1da177e4
LT
690 goto err_inval;
691
692#ifdef CONFIG_IP_ROUTE_MULTIPATH
4e902c57
TG
693 if (cfg->fc_mp) {
694 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
1da177e4
LT
695 if (nhs == 0)
696 goto err_inval;
697 }
698#endif
699#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
4e902c57
TG
700 if (cfg->fc_mp_alg) {
701 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
702 cfg->fc_mp_alg > IP_MP_ALG_MAX)
1da177e4
LT
703 goto err_inval;
704 }
705#endif
706
707 err = -ENOBUFS;
708 if (fib_info_cnt >= fib_hash_size) {
709 unsigned int new_size = fib_hash_size << 1;
710 struct hlist_head *new_info_hash;
711 struct hlist_head *new_laddrhash;
712 unsigned int bytes;
713
714 if (!new_size)
715 new_size = 1;
716 bytes = new_size * sizeof(struct hlist_head *);
717 new_info_hash = fib_hash_alloc(bytes);
718 new_laddrhash = fib_hash_alloc(bytes);
719 if (!new_info_hash || !new_laddrhash) {
720 fib_hash_free(new_info_hash, bytes);
721 fib_hash_free(new_laddrhash, bytes);
722 } else {
723 memset(new_info_hash, 0, bytes);
724 memset(new_laddrhash, 0, bytes);
725
726 fib_hash_move(new_info_hash, new_laddrhash, new_size);
727 }
728
729 if (!fib_hash_size)
730 goto failure;
731 }
732
0da974f4 733 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
1da177e4
LT
734 if (fi == NULL)
735 goto failure;
736 fib_info_cnt++;
1da177e4 737
4e902c57
TG
738 fi->fib_protocol = cfg->fc_protocol;
739 fi->fib_flags = cfg->fc_flags;
740 fi->fib_priority = cfg->fc_priority;
741 fi->fib_prefsrc = cfg->fc_prefsrc;
1da177e4
LT
742
743 fi->fib_nhs = nhs;
744 change_nexthops(fi) {
745 nh->nh_parent = fi;
746 } endfor_nexthops(fi)
747
4e902c57
TG
748 if (cfg->fc_mx) {
749 struct nlattr *nla;
750 int remaining;
751
752 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
753 int type = nla->nla_type;
754
755 if (type) {
756 if (type > RTAX_MAX)
1da177e4 757 goto err_inval;
4e902c57 758 fi->fib_metrics[type - 1] = nla_get_u32(nla);
1da177e4 759 }
1da177e4
LT
760 }
761 }
1da177e4 762
4e902c57 763 if (cfg->fc_mp) {
1da177e4 764#ifdef CONFIG_IP_ROUTE_MULTIPATH
4e902c57
TG
765 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
766 if (err != 0)
1da177e4 767 goto failure;
4e902c57 768 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
1da177e4 769 goto err_inval;
4e902c57 770 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
1da177e4
LT
771 goto err_inval;
772#ifdef CONFIG_NET_CLS_ROUTE
4e902c57 773 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
1da177e4
LT
774 goto err_inval;
775#endif
776#else
777 goto err_inval;
778#endif
779 } else {
780 struct fib_nh *nh = fi->fib_nh;
4e902c57
TG
781
782 nh->nh_oif = cfg->fc_oif;
783 nh->nh_gw = cfg->fc_gw;
784 nh->nh_flags = cfg->fc_flags;
1da177e4 785#ifdef CONFIG_NET_CLS_ROUTE
4e902c57 786 nh->nh_tclassid = cfg->fc_flow;
1da177e4 787#endif
1da177e4
LT
788#ifdef CONFIG_IP_ROUTE_MULTIPATH
789 nh->nh_weight = 1;
790#endif
791 }
792
793#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
4e902c57 794 fi->fib_mp_alg = cfg->fc_mp_alg;
1da177e4
LT
795#endif
796
4e902c57
TG
797 if (fib_props[cfg->fc_type].error) {
798 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
1da177e4
LT
799 goto err_inval;
800 goto link_it;
801 }
802
4e902c57 803 if (cfg->fc_scope > RT_SCOPE_HOST)
1da177e4
LT
804 goto err_inval;
805
4e902c57 806 if (cfg->fc_scope == RT_SCOPE_HOST) {
1da177e4
LT
807 struct fib_nh *nh = fi->fib_nh;
808
809 /* Local address is added. */
810 if (nhs != 1 || nh->nh_gw)
811 goto err_inval;
812 nh->nh_scope = RT_SCOPE_NOWHERE;
813 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
814 err = -ENODEV;
815 if (nh->nh_dev == NULL)
816 goto failure;
817 } else {
818 change_nexthops(fi) {
4e902c57 819 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
1da177e4
LT
820 goto failure;
821 } endfor_nexthops(fi)
822 }
823
824 if (fi->fib_prefsrc) {
4e902c57
TG
825 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
826 fi->fib_prefsrc != cfg->fc_dst)
1da177e4
LT
827 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
828 goto err_inval;
829 }
830
831link_it:
832 if ((ofi = fib_find_info(fi)) != NULL) {
833 fi->fib_dead = 1;
834 free_fib_info(fi);
835 ofi->fib_treeref++;
836 return ofi;
837 }
838
839 fi->fib_treeref++;
840 atomic_inc(&fi->fib_clntref);
832b4c5e 841 spin_lock_bh(&fib_info_lock);
1da177e4
LT
842 hlist_add_head(&fi->fib_hash,
843 &fib_info_hash[fib_info_hashfn(fi)]);
844 if (fi->fib_prefsrc) {
845 struct hlist_head *head;
846
847 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
848 hlist_add_head(&fi->fib_lhash, head);
849 }
850 change_nexthops(fi) {
851 struct hlist_head *head;
852 unsigned int hash;
853
854 if (!nh->nh_dev)
855 continue;
856 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
857 head = &fib_info_devhash[hash];
858 hlist_add_head(&nh->nh_hash, head);
859 } endfor_nexthops(fi)
832b4c5e 860 spin_unlock_bh(&fib_info_lock);
1da177e4
LT
861 return fi;
862
863err_inval:
864 err = -EINVAL;
865
866failure:
e905a9ed 867 if (fi) {
1da177e4
LT
868 fi->fib_dead = 1;
869 free_fib_info(fi);
870 }
4e902c57
TG
871
872 return ERR_PTR(err);
1da177e4
LT
873}
874
e5b43760 875/* Note! fib_semantic_match intentionally uses RCU list functions. */
1da177e4 876int fib_semantic_match(struct list_head *head, const struct flowi *flp,
1ef1b8c8 877 struct fib_result *res, __be32 zone, __be32 mask,
1da177e4
LT
878 int prefixlen)
879{
880 struct fib_alias *fa;
881 int nh_sel = 0;
882
e5b43760 883 list_for_each_entry_rcu(fa, head, fa_list) {
1da177e4
LT
884 int err;
885
886 if (fa->fa_tos &&
887 fa->fa_tos != flp->fl4_tos)
888 continue;
889
890 if (fa->fa_scope < flp->fl4_scope)
891 continue;
892
893 fa->fa_state |= FA_S_ACCESSED;
894
895 err = fib_props[fa->fa_type].error;
896 if (err == 0) {
897 struct fib_info *fi = fa->fa_info;
898
899 if (fi->fib_flags & RTNH_F_DEAD)
900 continue;
901
902 switch (fa->fa_type) {
903 case RTN_UNICAST:
904 case RTN_LOCAL:
905 case RTN_BROADCAST:
906 case RTN_ANYCAST:
907 case RTN_MULTICAST:
908 for_nexthops(fi) {
909 if (nh->nh_flags&RTNH_F_DEAD)
910 continue;
911 if (!flp->oif || flp->oif == nh->nh_oif)
912 break;
913 }
914#ifdef CONFIG_IP_ROUTE_MULTIPATH
915 if (nhsel < fi->fib_nhs) {
916 nh_sel = nhsel;
917 goto out_fill_res;
918 }
919#else
920 if (nhsel < 1) {
921 goto out_fill_res;
922 }
923#endif
924 endfor_nexthops(fi);
925 continue;
926
927 default:
928 printk(KERN_DEBUG "impossible 102\n");
929 return -EINVAL;
3ff50b79 930 }
1da177e4
LT
931 }
932 return err;
933 }
934 return 1;
935
936out_fill_res:
937 res->prefixlen = prefixlen;
938 res->nh_sel = nh_sel;
939 res->type = fa->fa_type;
940 res->scope = fa->fa_scope;
941 res->fi = fa->fa_info;
942#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
943 res->netmask = mask;
1e8aa6f1 944 res->network = zone & inet_make_mask(prefixlen);
1da177e4
LT
945#endif
946 atomic_inc(&res->fi->fib_clntref);
947 return 0;
948}
949
950/* Find appropriate source address to this destination */
951
b83738ae 952__be32 __fib_res_prefsrc(struct fib_result *res)
1da177e4
LT
953{
954 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
955}
956
be403ea1 957int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
81f7bf6c 958 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
be403ea1 959 struct fib_info *fi, unsigned int flags)
1da177e4 960{
be403ea1 961 struct nlmsghdr *nlh;
1da177e4 962 struct rtmsg *rtm;
1da177e4 963
be403ea1
TG
964 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
965 if (nlh == NULL)
26932566 966 return -EMSGSIZE;
be403ea1
TG
967
968 rtm = nlmsg_data(nlh);
1da177e4
LT
969 rtm->rtm_family = AF_INET;
970 rtm->rtm_dst_len = dst_len;
971 rtm->rtm_src_len = 0;
972 rtm->rtm_tos = tos;
973 rtm->rtm_table = tb_id;
be403ea1 974 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
1da177e4
LT
975 rtm->rtm_type = type;
976 rtm->rtm_flags = fi->fib_flags;
977 rtm->rtm_scope = scope;
1da177e4 978 rtm->rtm_protocol = fi->fib_protocol;
be403ea1
TG
979
980 if (rtm->rtm_dst_len)
17fb2c64 981 NLA_PUT_BE32(skb, RTA_DST, dst);
be403ea1 982
1da177e4 983 if (fi->fib_priority)
be403ea1
TG
984 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
985
1da177e4 986 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
be403ea1
TG
987 goto nla_put_failure;
988
1da177e4 989 if (fi->fib_prefsrc)
17fb2c64 990 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
be403ea1 991
1da177e4
LT
992 if (fi->fib_nhs == 1) {
993 if (fi->fib_nh->nh_gw)
17fb2c64 994 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
be403ea1 995
1da177e4 996 if (fi->fib_nh->nh_oif)
be403ea1 997 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
8265abc0
PM
998#ifdef CONFIG_NET_CLS_ROUTE
999 if (fi->fib_nh[0].nh_tclassid)
be403ea1 1000 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
8265abc0 1001#endif
1da177e4
LT
1002 }
1003#ifdef CONFIG_IP_ROUTE_MULTIPATH
1004 if (fi->fib_nhs > 1) {
be403ea1
TG
1005 struct rtnexthop *rtnh;
1006 struct nlattr *mp;
1007
1008 mp = nla_nest_start(skb, RTA_MULTIPATH);
1009 if (mp == NULL)
1010 goto nla_put_failure;
1da177e4
LT
1011
1012 for_nexthops(fi) {
be403ea1
TG
1013 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1014 if (rtnh == NULL)
1015 goto nla_put_failure;
1016
1017 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1018 rtnh->rtnh_hops = nh->nh_weight - 1;
1019 rtnh->rtnh_ifindex = nh->nh_oif;
1020
1da177e4 1021 if (nh->nh_gw)
17fb2c64 1022 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
8265abc0
PM
1023#ifdef CONFIG_NET_CLS_ROUTE
1024 if (nh->nh_tclassid)
be403ea1 1025 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
8265abc0 1026#endif
be403ea1
TG
1027 /* length of rtnetlink header + attributes */
1028 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1da177e4 1029 } endfor_nexthops(fi);
be403ea1
TG
1030
1031 nla_nest_end(skb, mp);
1da177e4
LT
1032 }
1033#endif
be403ea1 1034 return nlmsg_end(skb, nlh);
1da177e4 1035
be403ea1 1036nla_put_failure:
26932566
PM
1037 nlmsg_cancel(skb, nlh);
1038 return -EMSGSIZE;
1da177e4
LT
1039}
1040
1da177e4
LT
1041/*
1042 Update FIB if:
1043 - local address disappeared -> we must delete all the entries
1044 referring to it.
1045 - device went down -> we must shutdown all nexthops going via it.
1046 */
1047
81f7bf6c 1048int fib_sync_down(__be32 local, struct net_device *dev, int force)
1da177e4
LT
1049{
1050 int ret = 0;
1051 int scope = RT_SCOPE_NOWHERE;
e905a9ed 1052
1da177e4
LT
1053 if (force)
1054 scope = -1;
1055
1056 if (local && fib_info_laddrhash) {
1057 unsigned int hash = fib_laddr_hashfn(local);
1058 struct hlist_head *head = &fib_info_laddrhash[hash];
1059 struct hlist_node *node;
1060 struct fib_info *fi;
1061
1062 hlist_for_each_entry(fi, node, head, fib_lhash) {
1063 if (fi->fib_prefsrc == local) {
1064 fi->fib_flags |= RTNH_F_DEAD;
1065 ret++;
1066 }
1067 }
1068 }
1069
1070 if (dev) {
1071 struct fib_info *prev_fi = NULL;
1072 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1073 struct hlist_head *head = &fib_info_devhash[hash];
1074 struct hlist_node *node;
1075 struct fib_nh *nh;
1076
1077 hlist_for_each_entry(nh, node, head, nh_hash) {
1078 struct fib_info *fi = nh->nh_parent;
1079 int dead;
1080
1081 BUG_ON(!fi->fib_nhs);
1082 if (nh->nh_dev != dev || fi == prev_fi)
1083 continue;
1084 prev_fi = fi;
1085 dead = 0;
1086 change_nexthops(fi) {
1087 if (nh->nh_flags&RTNH_F_DEAD)
1088 dead++;
1089 else if (nh->nh_dev == dev &&
1090 nh->nh_scope != scope) {
1091 nh->nh_flags |= RTNH_F_DEAD;
1092#ifdef CONFIG_IP_ROUTE_MULTIPATH
1093 spin_lock_bh(&fib_multipath_lock);
1094 fi->fib_power -= nh->nh_power;
1095 nh->nh_power = 0;
1096 spin_unlock_bh(&fib_multipath_lock);
1097#endif
1098 dead++;
1099 }
1100#ifdef CONFIG_IP_ROUTE_MULTIPATH
1101 if (force > 1 && nh->nh_dev == dev) {
1102 dead = fi->fib_nhs;
1103 break;
1104 }
1105#endif
1106 } endfor_nexthops(fi)
1107 if (dead == fi->fib_nhs) {
1108 fi->fib_flags |= RTNH_F_DEAD;
1109 ret++;
1110 }
1111 }
1112 }
1113
1114 return ret;
1115}
1116
1117#ifdef CONFIG_IP_ROUTE_MULTIPATH
1118
1119/*
1120 Dead device goes up. We wake up dead nexthops.
1121 It takes sense only on multipath routes.
1122 */
1123
1124int fib_sync_up(struct net_device *dev)
1125{
1126 struct fib_info *prev_fi;
1127 unsigned int hash;
1128 struct hlist_head *head;
1129 struct hlist_node *node;
1130 struct fib_nh *nh;
1131 int ret;
1132
1133 if (!(dev->flags&IFF_UP))
1134 return 0;
1135
1136 prev_fi = NULL;
1137 hash = fib_devindex_hashfn(dev->ifindex);
1138 head = &fib_info_devhash[hash];
1139 ret = 0;
1140
1141 hlist_for_each_entry(nh, node, head, nh_hash) {
1142 struct fib_info *fi = nh->nh_parent;
1143 int alive;
1144
1145 BUG_ON(!fi->fib_nhs);
1146 if (nh->nh_dev != dev || fi == prev_fi)
1147 continue;
1148
1149 prev_fi = fi;
1150 alive = 0;
1151 change_nexthops(fi) {
1152 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1153 alive++;
1154 continue;
1155 }
1156 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1157 continue;
e5ed6399 1158 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1da177e4
LT
1159 continue;
1160 alive++;
1161 spin_lock_bh(&fib_multipath_lock);
1162 nh->nh_power = 0;
1163 nh->nh_flags &= ~RTNH_F_DEAD;
1164 spin_unlock_bh(&fib_multipath_lock);
1165 } endfor_nexthops(fi)
1166
1167 if (alive > 0) {
1168 fi->fib_flags &= ~RTNH_F_DEAD;
1169 ret++;
1170 }
1171 }
1172
1173 return ret;
1174}
1175
1176/*
1177 The algorithm is suboptimal, but it provides really
1178 fair weighted route distribution.
1179 */
1180
1181void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1182{
1183 struct fib_info *fi = res->fi;
1184 int w;
1185
1186 spin_lock_bh(&fib_multipath_lock);
1187 if (fi->fib_power <= 0) {
1188 int power = 0;
1189 change_nexthops(fi) {
1190 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1191 power += nh->nh_weight;
1192 nh->nh_power = nh->nh_weight;
1193 }
1194 } endfor_nexthops(fi);
1195 fi->fib_power = power;
1196 if (power <= 0) {
1197 spin_unlock_bh(&fib_multipath_lock);
1198 /* Race condition: route has just become dead. */
1199 res->nh_sel = 0;
1200 return;
1201 }
1202 }
1203
1204
1205 /* w should be random number [0..fi->fib_power-1],
1206 it is pretty bad approximation.
1207 */
1208
1209 w = jiffies % fi->fib_power;
1210
1211 change_nexthops(fi) {
1212 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1213 if ((w -= nh->nh_power) <= 0) {
1214 nh->nh_power--;
1215 fi->fib_power--;
1216 res->nh_sel = nhsel;
1217 spin_unlock_bh(&fib_multipath_lock);
1218 return;
1219 }
1220 }
1221 } endfor_nexthops(fi);
1222
1223 /* Race condition: route has just become dead. */
1224 res->nh_sel = 0;
1225 spin_unlock_bh(&fib_multipath_lock);
1226}
1227#endif