]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_semantics.c
[IPSEC]: kmalloc + memset conversion to kzalloc
[net-next-2.6.git] / net / ipv4 / fib_semantics.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
14c85021 31#include <linux/inetdevice.h>
1da177e4
LT
32#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
1da177e4
LT
36#include <linux/init.h>
37
14c85021 38#include <net/arp.h>
1da177e4
LT
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
f21c7bc5 45#include <net/netlink.h>
4e902c57 46#include <net/nexthop.h>
1da177e4
LT
47
48#include "fib_lookup.h"
49
50#define FSprintk(a...)
51
832b4c5e 52static DEFINE_SPINLOCK(fib_info_lock);
1da177e4
LT
53static struct hlist_head *fib_info_hash;
54static struct hlist_head *fib_info_laddrhash;
55static unsigned int fib_hash_size;
56static unsigned int fib_info_cnt;
57
58#define DEVINDEX_HASHBITS 8
59#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61
62#ifdef CONFIG_IP_ROUTE_MULTIPATH
63
64static DEFINE_SPINLOCK(fib_multipath_lock);
65
66#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68
69#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71
72#else /* CONFIG_IP_ROUTE_MULTIPATH */
73
74/* Hope, that gcc will optimize it to get rid of dummy loop */
75
76#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77for (nhsel=0; nhsel < 1; nhsel++)
78
79#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80for (nhsel=0; nhsel < 1; nhsel++)
81
82#endif /* CONFIG_IP_ROUTE_MULTIPATH */
83
84#define endfor_nexthops(fi) }
85
86
e905a9ed 87static const struct
1da177e4
LT
88{
89 int error;
90 u8 scope;
a0ee18b9 91} fib_props[RTN_MAX + 1] = {
e905a9ed 92 {
1da177e4
LT
93 .error = 0,
94 .scope = RT_SCOPE_NOWHERE,
95 }, /* RTN_UNSPEC */
96 {
97 .error = 0,
98 .scope = RT_SCOPE_UNIVERSE,
99 }, /* RTN_UNICAST */
100 {
101 .error = 0,
102 .scope = RT_SCOPE_HOST,
103 }, /* RTN_LOCAL */
104 {
105 .error = 0,
106 .scope = RT_SCOPE_LINK,
107 }, /* RTN_BROADCAST */
108 {
109 .error = 0,
110 .scope = RT_SCOPE_LINK,
111 }, /* RTN_ANYCAST */
112 {
113 .error = 0,
114 .scope = RT_SCOPE_UNIVERSE,
115 }, /* RTN_MULTICAST */
116 {
117 .error = -EINVAL,
118 .scope = RT_SCOPE_UNIVERSE,
119 }, /* RTN_BLACKHOLE */
120 {
121 .error = -EHOSTUNREACH,
122 .scope = RT_SCOPE_UNIVERSE,
123 }, /* RTN_UNREACHABLE */
124 {
125 .error = -EACCES,
126 .scope = RT_SCOPE_UNIVERSE,
127 }, /* RTN_PROHIBIT */
128 {
129 .error = -EAGAIN,
130 .scope = RT_SCOPE_UNIVERSE,
131 }, /* RTN_THROW */
132 {
133 .error = -EINVAL,
134 .scope = RT_SCOPE_NOWHERE,
135 }, /* RTN_NAT */
136 {
137 .error = -EINVAL,
138 .scope = RT_SCOPE_NOWHERE,
139 }, /* RTN_XRESOLVE */
140};
141
142
143/* Release a nexthop info record */
144
145void free_fib_info(struct fib_info *fi)
146{
147 if (fi->fib_dead == 0) {
148 printk("Freeing alive fib_info %p\n", fi);
149 return;
150 }
151 change_nexthops(fi) {
152 if (nh->nh_dev)
153 dev_put(nh->nh_dev);
154 nh->nh_dev = NULL;
155 } endfor_nexthops(fi);
156 fib_info_cnt--;
157 kfree(fi);
158}
159
160void fib_release_info(struct fib_info *fi)
161{
832b4c5e 162 spin_lock_bh(&fib_info_lock);
1da177e4
LT
163 if (fi && --fi->fib_treeref == 0) {
164 hlist_del(&fi->fib_hash);
165 if (fi->fib_prefsrc)
166 hlist_del(&fi->fib_lhash);
167 change_nexthops(fi) {
168 if (!nh->nh_dev)
169 continue;
170 hlist_del(&nh->nh_hash);
171 } endfor_nexthops(fi)
172 fi->fib_dead = 1;
173 fib_info_put(fi);
174 }
832b4c5e 175 spin_unlock_bh(&fib_info_lock);
1da177e4
LT
176}
177
178static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179{
180 const struct fib_nh *onh = ofi->fib_nh;
181
182 for_nexthops(fi) {
183 if (nh->nh_oif != onh->nh_oif ||
184 nh->nh_gw != onh->nh_gw ||
185 nh->nh_scope != onh->nh_scope ||
186#ifdef CONFIG_IP_ROUTE_MULTIPATH
187 nh->nh_weight != onh->nh_weight ||
188#endif
189#ifdef CONFIG_NET_CLS_ROUTE
190 nh->nh_tclassid != onh->nh_tclassid ||
191#endif
192 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193 return -1;
194 onh++;
195 } endfor_nexthops(fi);
196 return 0;
197}
198
199static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200{
201 unsigned int mask = (fib_hash_size - 1);
202 unsigned int val = fi->fib_nhs;
203
204 val ^= fi->fib_protocol;
81f7bf6c 205 val ^= (__force u32)fi->fib_prefsrc;
1da177e4
LT
206 val ^= fi->fib_priority;
207
208 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209}
210
211static struct fib_info *fib_find_info(const struct fib_info *nfi)
212{
213 struct hlist_head *head;
214 struct hlist_node *node;
215 struct fib_info *fi;
216 unsigned int hash;
217
218 hash = fib_info_hashfn(nfi);
219 head = &fib_info_hash[hash];
220
221 hlist_for_each_entry(fi, node, head, fib_hash) {
222 if (fi->fib_nhs != nfi->fib_nhs)
223 continue;
224 if (nfi->fib_protocol == fi->fib_protocol &&
225 nfi->fib_prefsrc == fi->fib_prefsrc &&
226 nfi->fib_priority == fi->fib_priority &&
227 memcmp(nfi->fib_metrics, fi->fib_metrics,
228 sizeof(fi->fib_metrics)) == 0 &&
229 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231 return fi;
232 }
233
234 return NULL;
235}
236
237static inline unsigned int fib_devindex_hashfn(unsigned int val)
238{
239 unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241 return (val ^
242 (val >> DEVINDEX_HASHBITS) ^
243 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244}
245
246/* Check, that the gateway is already configured.
247 Used only by redirect accept routine.
248 */
249
d878e72e 250int ip_fib_check_default(__be32 gw, struct net_device *dev)
1da177e4
LT
251{
252 struct hlist_head *head;
253 struct hlist_node *node;
254 struct fib_nh *nh;
255 unsigned int hash;
256
832b4c5e 257 spin_lock(&fib_info_lock);
1da177e4
LT
258
259 hash = fib_devindex_hashfn(dev->ifindex);
260 head = &fib_info_devhash[hash];
261 hlist_for_each_entry(nh, node, head, nh_hash) {
262 if (nh->nh_dev == dev &&
263 nh->nh_gw == gw &&
264 !(nh->nh_flags&RTNH_F_DEAD)) {
832b4c5e 265 spin_unlock(&fib_info_lock);
1da177e4
LT
266 return 0;
267 }
268 }
269
832b4c5e 270 spin_unlock(&fib_info_lock);
1da177e4
LT
271
272 return -1;
273}
274
339bf98f
TG
275static inline size_t fib_nlmsg_size(struct fib_info *fi)
276{
277 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
278 + nla_total_size(4) /* RTA_TABLE */
279 + nla_total_size(4) /* RTA_DST */
280 + nla_total_size(4) /* RTA_PRIORITY */
281 + nla_total_size(4); /* RTA_PREFSRC */
282
283 /* space for nested metrics */
284 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
285
286 if (fi->fib_nhs) {
287 /* Also handles the special case fib_nhs == 1 */
288
289 /* each nexthop is packed in an attribute */
290 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
291
292 /* may contain flow and gateway attribute */
293 nhsize += 2 * nla_total_size(4);
294
295 /* all nexthops are packed in a nested attribute */
296 payload += nla_total_size(fi->fib_nhs * nhsize);
297 }
298
299 return payload;
300}
301
81f7bf6c 302void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
b8f55831
MK
303 int dst_len, u32 tb_id, struct nl_info *info,
304 unsigned int nlm_flags)
1da177e4
LT
305{
306 struct sk_buff *skb;
4e902c57 307 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
f21c7bc5 308 int err = -ENOBUFS;
1da177e4 309
339bf98f 310 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
f21c7bc5
TG
311 if (skb == NULL)
312 goto errout;
1da177e4 313
4e902c57 314 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
be403ea1 315 fa->fa_type, fa->fa_scope, key, dst_len,
b8f55831 316 fa->fa_tos, fa->fa_info, nlm_flags);
26932566
PM
317 if (err < 0) {
318 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
319 WARN_ON(err == -EMSGSIZE);
320 kfree_skb(skb);
321 goto errout;
322 }
97c53cac 323 err = rtnl_notify(skb, &init_net, info->pid, RTNLGRP_IPV4_ROUTE,
4e902c57 324 info->nlh, GFP_KERNEL);
f21c7bc5
TG
325errout:
326 if (err < 0)
97c53cac 327 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_ROUTE, err);
1da177e4
LT
328}
329
330/* Return the first fib alias matching TOS with
331 * priority less than or equal to PRIO.
332 */
333struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
334{
335 if (fah) {
336 struct fib_alias *fa;
337 list_for_each_entry(fa, fah, fa_list) {
338 if (fa->fa_tos > tos)
339 continue;
340 if (fa->fa_info->fib_priority >= prio ||
341 fa->fa_tos < tos)
342 return fa;
343 }
344 }
345 return NULL;
346}
347
348int fib_detect_death(struct fib_info *fi, int order,
349 struct fib_info **last_resort, int *last_idx, int *dflt)
350{
351 struct neighbour *n;
352 int state = NUD_NONE;
353
354 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
355 if (n) {
356 state = n->nud_state;
357 neigh_release(n);
358 }
359 if (state==NUD_REACHABLE)
360 return 0;
361 if ((state&NUD_VALID) && order != *dflt)
362 return 0;
363 if ((state&NUD_VALID) ||
364 (*last_idx<0 && order > *dflt)) {
365 *last_resort = fi;
366 *last_idx = order;
367 }
368 return 1;
369}
370
371#ifdef CONFIG_IP_ROUTE_MULTIPATH
372
4e902c57 373static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
1da177e4
LT
374{
375 int nhs = 0;
1da177e4 376
4e902c57 377 while (rtnh_ok(rtnh, remaining)) {
1da177e4 378 nhs++;
4e902c57
TG
379 rtnh = rtnh_next(rtnh, &remaining);
380 }
381
382 /* leftover implies invalid nexthop configuration, discard it */
383 return remaining > 0 ? 0 : nhs;
1da177e4
LT
384}
385
4e902c57
TG
386static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
387 int remaining, struct fib_config *cfg)
1da177e4 388{
1da177e4 389 change_nexthops(fi) {
4e902c57
TG
390 int attrlen;
391
392 if (!rtnh_ok(rtnh, remaining))
1da177e4 393 return -EINVAL;
4e902c57
TG
394
395 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
396 nh->nh_oif = rtnh->rtnh_ifindex;
397 nh->nh_weight = rtnh->rtnh_hops + 1;
398
399 attrlen = rtnh_attrlen(rtnh);
400 if (attrlen > 0) {
401 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
402
403 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
17fb2c64 404 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
1da177e4 405#ifdef CONFIG_NET_CLS_ROUTE
4e902c57
TG
406 nla = nla_find(attrs, attrlen, RTA_FLOW);
407 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
1da177e4
LT
408#endif
409 }
4e902c57
TG
410
411 rtnh = rtnh_next(rtnh, &remaining);
1da177e4 412 } endfor_nexthops(fi);
4e902c57 413
1da177e4
LT
414 return 0;
415}
416
417#endif
418
4e902c57 419int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
1da177e4
LT
420{
421#ifdef CONFIG_IP_ROUTE_MULTIPATH
4e902c57
TG
422 struct rtnexthop *rtnh;
423 int remaining;
1da177e4
LT
424#endif
425
4e902c57 426 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
1da177e4
LT
427 return 1;
428
4e902c57
TG
429 if (cfg->fc_oif || cfg->fc_gw) {
430 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
431 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
1da177e4
LT
432 return 0;
433 return 1;
434 }
435
436#ifdef CONFIG_IP_ROUTE_MULTIPATH
4e902c57 437 if (cfg->fc_mp == NULL)
1da177e4 438 return 0;
4e902c57
TG
439
440 rtnh = cfg->fc_mp;
441 remaining = cfg->fc_mp_len;
e905a9ed 442
1da177e4 443 for_nexthops(fi) {
4e902c57 444 int attrlen;
1da177e4 445
4e902c57 446 if (!rtnh_ok(rtnh, remaining))
1da177e4 447 return -EINVAL;
4e902c57
TG
448
449 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
1da177e4 450 return 1;
4e902c57
TG
451
452 attrlen = rtnh_attrlen(rtnh);
453 if (attrlen < 0) {
454 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
455
456 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
17fb2c64 457 if (nla && nla_get_be32(nla) != nh->nh_gw)
1da177e4
LT
458 return 1;
459#ifdef CONFIG_NET_CLS_ROUTE
4e902c57
TG
460 nla = nla_find(attrs, attrlen, RTA_FLOW);
461 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
1da177e4
LT
462 return 1;
463#endif
464 }
4e902c57
TG
465
466 rtnh = rtnh_next(rtnh, &remaining);
1da177e4
LT
467 } endfor_nexthops(fi);
468#endif
469 return 0;
470}
471
472
473/*
474 Picture
475 -------
476
477 Semantics of nexthop is very messy by historical reasons.
478 We have to take into account, that:
479 a) gateway can be actually local interface address,
480 so that gatewayed route is direct.
481 b) gateway must be on-link address, possibly
482 described not by an ifaddr, but also by a direct route.
483 c) If both gateway and interface are specified, they should not
484 contradict.
485 d) If we use tunnel routes, gateway could be not on-link.
486
487 Attempt to reconcile all of these (alas, self-contradictory) conditions
488 results in pretty ugly and hairy code with obscure logic.
489
490 I chose to generalized it instead, so that the size
491 of code does not increase practically, but it becomes
492 much more general.
493 Every prefix is assigned a "scope" value: "host" is local address,
494 "link" is direct route,
495 [ ... "site" ... "interior" ... ]
496 and "universe" is true gateway route with global meaning.
497
498 Every prefix refers to a set of "nexthop"s (gw, oif),
499 where gw must have narrower scope. This recursion stops
500 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
501 which means that gw is forced to be on link.
502
503 Code is still hairy, but now it is apparently logically
504 consistent and very flexible. F.e. as by-product it allows
505 to co-exists in peace independent exterior and interior
506 routing processes.
507
508 Normally it looks as following.
509
510 {universe prefix} -> (gw, oif) [scope link]
e905a9ed 511 |
1da177e4 512 |-> {link prefix} -> (gw, oif) [scope local]
e905a9ed 513 |
1da177e4
LT
514 |-> {local prefix} (terminal node)
515 */
516
4e902c57
TG
517static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518 struct fib_nh *nh)
1da177e4
LT
519{
520 int err;
521
522 if (nh->nh_gw) {
523 struct fib_result res;
524
525#ifdef CONFIG_IP_ROUTE_PERVASIVE
526 if (nh->nh_flags&RTNH_F_PERVASIVE)
527 return 0;
528#endif
529 if (nh->nh_flags&RTNH_F_ONLINK) {
530 struct net_device *dev;
531
4e902c57 532 if (cfg->fc_scope >= RT_SCOPE_LINK)
1da177e4
LT
533 return -EINVAL;
534 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
535 return -EINVAL;
881d966b 536 if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
1da177e4
LT
537 return -ENODEV;
538 if (!(dev->flags&IFF_UP))
539 return -ENETDOWN;
540 nh->nh_dev = dev;
541 dev_hold(dev);
542 nh->nh_scope = RT_SCOPE_LINK;
543 return 0;
544 }
545 {
4e902c57
TG
546 struct flowi fl = {
547 .nl_u = {
548 .ip4_u = {
549 .daddr = nh->nh_gw,
550 .scope = cfg->fc_scope + 1,
551 },
552 },
553 .oif = nh->nh_oif,
554 };
1da177e4
LT
555
556 /* It is not necessary, but requires a bit of thinking */
557 if (fl.fl4_scope < RT_SCOPE_LINK)
558 fl.fl4_scope = RT_SCOPE_LINK;
559 if ((err = fib_lookup(&fl, &res)) != 0)
560 return err;
561 }
562 err = -EINVAL;
563 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
564 goto out;
565 nh->nh_scope = res.scope;
566 nh->nh_oif = FIB_RES_OIF(res);
567 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
568 goto out;
569 dev_hold(nh->nh_dev);
570 err = -ENETDOWN;
571 if (!(nh->nh_dev->flags & IFF_UP))
572 goto out;
573 err = 0;
574out:
575 fib_res_put(&res);
576 return err;
577 } else {
578 struct in_device *in_dev;
579
580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581 return -EINVAL;
582
583 in_dev = inetdev_by_index(nh->nh_oif);
584 if (in_dev == NULL)
585 return -ENODEV;
586 if (!(in_dev->dev->flags&IFF_UP)) {
587 in_dev_put(in_dev);
588 return -ENETDOWN;
589 }
590 nh->nh_dev = in_dev->dev;
591 dev_hold(nh->nh_dev);
592 nh->nh_scope = RT_SCOPE_HOST;
593 in_dev_put(in_dev);
594 }
595 return 0;
596}
597
81f7bf6c 598static inline unsigned int fib_laddr_hashfn(__be32 val)
1da177e4
LT
599{
600 unsigned int mask = (fib_hash_size - 1);
601
81f7bf6c 602 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
1da177e4
LT
603}
604
605static struct hlist_head *fib_hash_alloc(int bytes)
606{
607 if (bytes <= PAGE_SIZE)
608 return kmalloc(bytes, GFP_KERNEL);
609 else
610 return (struct hlist_head *)
611 __get_free_pages(GFP_KERNEL, get_order(bytes));
612}
613
614static void fib_hash_free(struct hlist_head *hash, int bytes)
615{
616 if (!hash)
617 return;
618
619 if (bytes <= PAGE_SIZE)
620 kfree(hash);
621 else
622 free_pages((unsigned long) hash, get_order(bytes));
623}
624
625static void fib_hash_move(struct hlist_head *new_info_hash,
626 struct hlist_head *new_laddrhash,
627 unsigned int new_size)
628{
b7656e7f 629 struct hlist_head *old_info_hash, *old_laddrhash;
1da177e4 630 unsigned int old_size = fib_hash_size;
b7656e7f 631 unsigned int i, bytes;
1da177e4 632
832b4c5e 633 spin_lock_bh(&fib_info_lock);
b7656e7f
DM
634 old_info_hash = fib_info_hash;
635 old_laddrhash = fib_info_laddrhash;
1da177e4
LT
636 fib_hash_size = new_size;
637
638 for (i = 0; i < old_size; i++) {
639 struct hlist_head *head = &fib_info_hash[i];
640 struct hlist_node *node, *n;
641 struct fib_info *fi;
642
643 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
644 struct hlist_head *dest;
645 unsigned int new_hash;
646
647 hlist_del(&fi->fib_hash);
648
649 new_hash = fib_info_hashfn(fi);
650 dest = &new_info_hash[new_hash];
651 hlist_add_head(&fi->fib_hash, dest);
652 }
653 }
654 fib_info_hash = new_info_hash;
655
656 for (i = 0; i < old_size; i++) {
657 struct hlist_head *lhead = &fib_info_laddrhash[i];
658 struct hlist_node *node, *n;
659 struct fib_info *fi;
660
661 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
662 struct hlist_head *ldest;
663 unsigned int new_hash;
664
665 hlist_del(&fi->fib_lhash);
666
667 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
668 ldest = &new_laddrhash[new_hash];
669 hlist_add_head(&fi->fib_lhash, ldest);
670 }
671 }
672 fib_info_laddrhash = new_laddrhash;
673
832b4c5e 674 spin_unlock_bh(&fib_info_lock);
b7656e7f
DM
675
676 bytes = old_size * sizeof(struct hlist_head *);
677 fib_hash_free(old_info_hash, bytes);
678 fib_hash_free(old_laddrhash, bytes);
1da177e4
LT
679}
680
4e902c57 681struct fib_info *fib_create_info(struct fib_config *cfg)
1da177e4
LT
682{
683 int err;
684 struct fib_info *fi = NULL;
685 struct fib_info *ofi;
1da177e4 686 int nhs = 1;
1da177e4
LT
687
688 /* Fast check to catch the most weird cases */
4e902c57 689 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
1da177e4
LT
690 goto err_inval;
691
692#ifdef CONFIG_IP_ROUTE_MULTIPATH
4e902c57
TG
693 if (cfg->fc_mp) {
694 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
1da177e4
LT
695 if (nhs == 0)
696 goto err_inval;
697 }
698#endif
1da177e4
LT
699
700 err = -ENOBUFS;
701 if (fib_info_cnt >= fib_hash_size) {
702 unsigned int new_size = fib_hash_size << 1;
703 struct hlist_head *new_info_hash;
704 struct hlist_head *new_laddrhash;
705 unsigned int bytes;
706
707 if (!new_size)
708 new_size = 1;
709 bytes = new_size * sizeof(struct hlist_head *);
710 new_info_hash = fib_hash_alloc(bytes);
711 new_laddrhash = fib_hash_alloc(bytes);
712 if (!new_info_hash || !new_laddrhash) {
713 fib_hash_free(new_info_hash, bytes);
714 fib_hash_free(new_laddrhash, bytes);
715 } else {
716 memset(new_info_hash, 0, bytes);
717 memset(new_laddrhash, 0, bytes);
718
719 fib_hash_move(new_info_hash, new_laddrhash, new_size);
720 }
721
722 if (!fib_hash_size)
723 goto failure;
724 }
725
0da974f4 726 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
1da177e4
LT
727 if (fi == NULL)
728 goto failure;
729 fib_info_cnt++;
1da177e4 730
4e902c57
TG
731 fi->fib_protocol = cfg->fc_protocol;
732 fi->fib_flags = cfg->fc_flags;
733 fi->fib_priority = cfg->fc_priority;
734 fi->fib_prefsrc = cfg->fc_prefsrc;
1da177e4
LT
735
736 fi->fib_nhs = nhs;
737 change_nexthops(fi) {
738 nh->nh_parent = fi;
739 } endfor_nexthops(fi)
740
4e902c57
TG
741 if (cfg->fc_mx) {
742 struct nlattr *nla;
743 int remaining;
744
745 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 746 int type = nla_type(nla);
4e902c57
TG
747
748 if (type) {
749 if (type > RTAX_MAX)
1da177e4 750 goto err_inval;
4e902c57 751 fi->fib_metrics[type - 1] = nla_get_u32(nla);
1da177e4 752 }
1da177e4
LT
753 }
754 }
1da177e4 755
4e902c57 756 if (cfg->fc_mp) {
1da177e4 757#ifdef CONFIG_IP_ROUTE_MULTIPATH
4e902c57
TG
758 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
759 if (err != 0)
1da177e4 760 goto failure;
4e902c57 761 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
1da177e4 762 goto err_inval;
4e902c57 763 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
1da177e4
LT
764 goto err_inval;
765#ifdef CONFIG_NET_CLS_ROUTE
4e902c57 766 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
1da177e4
LT
767 goto err_inval;
768#endif
769#else
770 goto err_inval;
771#endif
772 } else {
773 struct fib_nh *nh = fi->fib_nh;
4e902c57
TG
774
775 nh->nh_oif = cfg->fc_oif;
776 nh->nh_gw = cfg->fc_gw;
777 nh->nh_flags = cfg->fc_flags;
1da177e4 778#ifdef CONFIG_NET_CLS_ROUTE
4e902c57 779 nh->nh_tclassid = cfg->fc_flow;
1da177e4 780#endif
1da177e4
LT
781#ifdef CONFIG_IP_ROUTE_MULTIPATH
782 nh->nh_weight = 1;
783#endif
784 }
785
4e902c57
TG
786 if (fib_props[cfg->fc_type].error) {
787 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
1da177e4
LT
788 goto err_inval;
789 goto link_it;
790 }
791
4e902c57 792 if (cfg->fc_scope > RT_SCOPE_HOST)
1da177e4
LT
793 goto err_inval;
794
4e902c57 795 if (cfg->fc_scope == RT_SCOPE_HOST) {
1da177e4
LT
796 struct fib_nh *nh = fi->fib_nh;
797
798 /* Local address is added. */
799 if (nhs != 1 || nh->nh_gw)
800 goto err_inval;
801 nh->nh_scope = RT_SCOPE_NOWHERE;
881d966b 802 nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
1da177e4
LT
803 err = -ENODEV;
804 if (nh->nh_dev == NULL)
805 goto failure;
806 } else {
807 change_nexthops(fi) {
4e902c57 808 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
1da177e4
LT
809 goto failure;
810 } endfor_nexthops(fi)
811 }
812
813 if (fi->fib_prefsrc) {
4e902c57
TG
814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
815 fi->fib_prefsrc != cfg->fc_dst)
1da177e4
LT
816 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
817 goto err_inval;
818 }
819
820link_it:
821 if ((ofi = fib_find_info(fi)) != NULL) {
822 fi->fib_dead = 1;
823 free_fib_info(fi);
824 ofi->fib_treeref++;
825 return ofi;
826 }
827
828 fi->fib_treeref++;
829 atomic_inc(&fi->fib_clntref);
832b4c5e 830 spin_lock_bh(&fib_info_lock);
1da177e4
LT
831 hlist_add_head(&fi->fib_hash,
832 &fib_info_hash[fib_info_hashfn(fi)]);
833 if (fi->fib_prefsrc) {
834 struct hlist_head *head;
835
836 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
837 hlist_add_head(&fi->fib_lhash, head);
838 }
839 change_nexthops(fi) {
840 struct hlist_head *head;
841 unsigned int hash;
842
843 if (!nh->nh_dev)
844 continue;
845 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
846 head = &fib_info_devhash[hash];
847 hlist_add_head(&nh->nh_hash, head);
848 } endfor_nexthops(fi)
832b4c5e 849 spin_unlock_bh(&fib_info_lock);
1da177e4
LT
850 return fi;
851
852err_inval:
853 err = -EINVAL;
854
855failure:
e905a9ed 856 if (fi) {
1da177e4
LT
857 fi->fib_dead = 1;
858 free_fib_info(fi);
859 }
4e902c57
TG
860
861 return ERR_PTR(err);
1da177e4
LT
862}
863
e5b43760 864/* Note! fib_semantic_match intentionally uses RCU list functions. */
1da177e4 865int fib_semantic_match(struct list_head *head, const struct flowi *flp,
1ef1b8c8 866 struct fib_result *res, __be32 zone, __be32 mask,
1da177e4
LT
867 int prefixlen)
868{
869 struct fib_alias *fa;
870 int nh_sel = 0;
871
e5b43760 872 list_for_each_entry_rcu(fa, head, fa_list) {
1da177e4
LT
873 int err;
874
875 if (fa->fa_tos &&
876 fa->fa_tos != flp->fl4_tos)
877 continue;
878
879 if (fa->fa_scope < flp->fl4_scope)
880 continue;
881
882 fa->fa_state |= FA_S_ACCESSED;
883
884 err = fib_props[fa->fa_type].error;
885 if (err == 0) {
886 struct fib_info *fi = fa->fa_info;
887
888 if (fi->fib_flags & RTNH_F_DEAD)
889 continue;
890
891 switch (fa->fa_type) {
892 case RTN_UNICAST:
893 case RTN_LOCAL:
894 case RTN_BROADCAST:
895 case RTN_ANYCAST:
896 case RTN_MULTICAST:
897 for_nexthops(fi) {
898 if (nh->nh_flags&RTNH_F_DEAD)
899 continue;
900 if (!flp->oif || flp->oif == nh->nh_oif)
901 break;
902 }
903#ifdef CONFIG_IP_ROUTE_MULTIPATH
904 if (nhsel < fi->fib_nhs) {
905 nh_sel = nhsel;
906 goto out_fill_res;
907 }
908#else
909 if (nhsel < 1) {
910 goto out_fill_res;
911 }
912#endif
913 endfor_nexthops(fi);
914 continue;
915
916 default:
917 printk(KERN_DEBUG "impossible 102\n");
918 return -EINVAL;
3ff50b79 919 }
1da177e4
LT
920 }
921 return err;
922 }
923 return 1;
924
925out_fill_res:
926 res->prefixlen = prefixlen;
927 res->nh_sel = nh_sel;
928 res->type = fa->fa_type;
929 res->scope = fa->fa_scope;
930 res->fi = fa->fa_info;
1da177e4
LT
931 atomic_inc(&res->fi->fib_clntref);
932 return 0;
933}
934
935/* Find appropriate source address to this destination */
936
b83738ae 937__be32 __fib_res_prefsrc(struct fib_result *res)
1da177e4
LT
938{
939 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
940}
941
be403ea1 942int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
81f7bf6c 943 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
be403ea1 944 struct fib_info *fi, unsigned int flags)
1da177e4 945{
be403ea1 946 struct nlmsghdr *nlh;
1da177e4 947 struct rtmsg *rtm;
1da177e4 948
be403ea1
TG
949 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
950 if (nlh == NULL)
26932566 951 return -EMSGSIZE;
be403ea1
TG
952
953 rtm = nlmsg_data(nlh);
1da177e4
LT
954 rtm->rtm_family = AF_INET;
955 rtm->rtm_dst_len = dst_len;
956 rtm->rtm_src_len = 0;
957 rtm->rtm_tos = tos;
958 rtm->rtm_table = tb_id;
be403ea1 959 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
1da177e4
LT
960 rtm->rtm_type = type;
961 rtm->rtm_flags = fi->fib_flags;
962 rtm->rtm_scope = scope;
1da177e4 963 rtm->rtm_protocol = fi->fib_protocol;
be403ea1
TG
964
965 if (rtm->rtm_dst_len)
17fb2c64 966 NLA_PUT_BE32(skb, RTA_DST, dst);
be403ea1 967
1da177e4 968 if (fi->fib_priority)
be403ea1
TG
969 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
970
1da177e4 971 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
be403ea1
TG
972 goto nla_put_failure;
973
1da177e4 974 if (fi->fib_prefsrc)
17fb2c64 975 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
be403ea1 976
1da177e4
LT
977 if (fi->fib_nhs == 1) {
978 if (fi->fib_nh->nh_gw)
17fb2c64 979 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
be403ea1 980
1da177e4 981 if (fi->fib_nh->nh_oif)
be403ea1 982 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
8265abc0
PM
983#ifdef CONFIG_NET_CLS_ROUTE
984 if (fi->fib_nh[0].nh_tclassid)
be403ea1 985 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
8265abc0 986#endif
1da177e4
LT
987 }
988#ifdef CONFIG_IP_ROUTE_MULTIPATH
989 if (fi->fib_nhs > 1) {
be403ea1
TG
990 struct rtnexthop *rtnh;
991 struct nlattr *mp;
992
993 mp = nla_nest_start(skb, RTA_MULTIPATH);
994 if (mp == NULL)
995 goto nla_put_failure;
1da177e4
LT
996
997 for_nexthops(fi) {
be403ea1
TG
998 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
999 if (rtnh == NULL)
1000 goto nla_put_failure;
1001
1002 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1003 rtnh->rtnh_hops = nh->nh_weight - 1;
1004 rtnh->rtnh_ifindex = nh->nh_oif;
1005
1da177e4 1006 if (nh->nh_gw)
17fb2c64 1007 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
8265abc0
PM
1008#ifdef CONFIG_NET_CLS_ROUTE
1009 if (nh->nh_tclassid)
be403ea1 1010 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
8265abc0 1011#endif
be403ea1
TG
1012 /* length of rtnetlink header + attributes */
1013 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1da177e4 1014 } endfor_nexthops(fi);
be403ea1
TG
1015
1016 nla_nest_end(skb, mp);
1da177e4
LT
1017 }
1018#endif
be403ea1 1019 return nlmsg_end(skb, nlh);
1da177e4 1020
be403ea1 1021nla_put_failure:
26932566
PM
1022 nlmsg_cancel(skb, nlh);
1023 return -EMSGSIZE;
1da177e4
LT
1024}
1025
1da177e4
LT
1026/*
1027 Update FIB if:
1028 - local address disappeared -> we must delete all the entries
1029 referring to it.
1030 - device went down -> we must shutdown all nexthops going via it.
1031 */
1032
81f7bf6c 1033int fib_sync_down(__be32 local, struct net_device *dev, int force)
1da177e4
LT
1034{
1035 int ret = 0;
1036 int scope = RT_SCOPE_NOWHERE;
e905a9ed 1037
1da177e4
LT
1038 if (force)
1039 scope = -1;
1040
1041 if (local && fib_info_laddrhash) {
1042 unsigned int hash = fib_laddr_hashfn(local);
1043 struct hlist_head *head = &fib_info_laddrhash[hash];
1044 struct hlist_node *node;
1045 struct fib_info *fi;
1046
1047 hlist_for_each_entry(fi, node, head, fib_lhash) {
1048 if (fi->fib_prefsrc == local) {
1049 fi->fib_flags |= RTNH_F_DEAD;
1050 ret++;
1051 }
1052 }
1053 }
1054
1055 if (dev) {
1056 struct fib_info *prev_fi = NULL;
1057 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1058 struct hlist_head *head = &fib_info_devhash[hash];
1059 struct hlist_node *node;
1060 struct fib_nh *nh;
1061
1062 hlist_for_each_entry(nh, node, head, nh_hash) {
1063 struct fib_info *fi = nh->nh_parent;
1064 int dead;
1065
1066 BUG_ON(!fi->fib_nhs);
1067 if (nh->nh_dev != dev || fi == prev_fi)
1068 continue;
1069 prev_fi = fi;
1070 dead = 0;
1071 change_nexthops(fi) {
1072 if (nh->nh_flags&RTNH_F_DEAD)
1073 dead++;
1074 else if (nh->nh_dev == dev &&
1075 nh->nh_scope != scope) {
1076 nh->nh_flags |= RTNH_F_DEAD;
1077#ifdef CONFIG_IP_ROUTE_MULTIPATH
1078 spin_lock_bh(&fib_multipath_lock);
1079 fi->fib_power -= nh->nh_power;
1080 nh->nh_power = 0;
1081 spin_unlock_bh(&fib_multipath_lock);
1082#endif
1083 dead++;
1084 }
1085#ifdef CONFIG_IP_ROUTE_MULTIPATH
1086 if (force > 1 && nh->nh_dev == dev) {
1087 dead = fi->fib_nhs;
1088 break;
1089 }
1090#endif
1091 } endfor_nexthops(fi)
1092 if (dead == fi->fib_nhs) {
1093 fi->fib_flags |= RTNH_F_DEAD;
1094 ret++;
1095 }
1096 }
1097 }
1098
1099 return ret;
1100}
1101
1102#ifdef CONFIG_IP_ROUTE_MULTIPATH
1103
1104/*
1105 Dead device goes up. We wake up dead nexthops.
1106 It takes sense only on multipath routes.
1107 */
1108
1109int fib_sync_up(struct net_device *dev)
1110{
1111 struct fib_info *prev_fi;
1112 unsigned int hash;
1113 struct hlist_head *head;
1114 struct hlist_node *node;
1115 struct fib_nh *nh;
1116 int ret;
1117
1118 if (!(dev->flags&IFF_UP))
1119 return 0;
1120
1121 prev_fi = NULL;
1122 hash = fib_devindex_hashfn(dev->ifindex);
1123 head = &fib_info_devhash[hash];
1124 ret = 0;
1125
1126 hlist_for_each_entry(nh, node, head, nh_hash) {
1127 struct fib_info *fi = nh->nh_parent;
1128 int alive;
1129
1130 BUG_ON(!fi->fib_nhs);
1131 if (nh->nh_dev != dev || fi == prev_fi)
1132 continue;
1133
1134 prev_fi = fi;
1135 alive = 0;
1136 change_nexthops(fi) {
1137 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1138 alive++;
1139 continue;
1140 }
1141 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1142 continue;
e5ed6399 1143 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1da177e4
LT
1144 continue;
1145 alive++;
1146 spin_lock_bh(&fib_multipath_lock);
1147 nh->nh_power = 0;
1148 nh->nh_flags &= ~RTNH_F_DEAD;
1149 spin_unlock_bh(&fib_multipath_lock);
1150 } endfor_nexthops(fi)
1151
1152 if (alive > 0) {
1153 fi->fib_flags &= ~RTNH_F_DEAD;
1154 ret++;
1155 }
1156 }
1157
1158 return ret;
1159}
1160
1161/*
1162 The algorithm is suboptimal, but it provides really
1163 fair weighted route distribution.
1164 */
1165
1166void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1167{
1168 struct fib_info *fi = res->fi;
1169 int w;
1170
1171 spin_lock_bh(&fib_multipath_lock);
1172 if (fi->fib_power <= 0) {
1173 int power = 0;
1174 change_nexthops(fi) {
1175 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1176 power += nh->nh_weight;
1177 nh->nh_power = nh->nh_weight;
1178 }
1179 } endfor_nexthops(fi);
1180 fi->fib_power = power;
1181 if (power <= 0) {
1182 spin_unlock_bh(&fib_multipath_lock);
1183 /* Race condition: route has just become dead. */
1184 res->nh_sel = 0;
1185 return;
1186 }
1187 }
1188
1189
1190 /* w should be random number [0..fi->fib_power-1],
1191 it is pretty bad approximation.
1192 */
1193
1194 w = jiffies % fi->fib_power;
1195
1196 change_nexthops(fi) {
1197 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1198 if ((w -= nh->nh_power) <= 0) {
1199 nh->nh_power--;
1200 fi->fib_power--;
1201 res->nh_sel = nhsel;
1202 spin_unlock_bh(&fib_multipath_lock);
1203 return;
1204 }
1205 }
1206 } endfor_nexthops(fi);
1207
1208 /* Race condition: route has just become dead. */
1209 res->nh_sel = 0;
1210 spin_unlock_bh(&fib_multipath_lock);
1211}
1212#endif