]> bbs.cooldavid.org Git - net-next-2.6.git/blame_incremental - net/ipv4/fib_frontend.c
wimax: make functions local
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
... / ...
CommitLineData
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
20#include <linux/capability.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/inetdevice.h>
31#include <linux/netdevice.h>
32#include <linux/if_addr.h>
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
35#include <linux/init.h>
36#include <linux/list.h>
37#include <linux/slab.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/arp.h>
45#include <net/ip_fib.h>
46#include <net/rtnetlink.h>
47
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
50static int __net_init fib4_rules_init(struct net *net)
51{
52 struct fib_table *local_table, *main_table;
53
54 local_table = fib_hash_table(RT_TABLE_LOCAL);
55 if (local_table == NULL)
56 return -ENOMEM;
57
58 main_table = fib_hash_table(RT_TABLE_MAIN);
59 if (main_table == NULL)
60 goto fail;
61
62 hlist_add_head_rcu(&local_table->tb_hlist,
63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64 hlist_add_head_rcu(&main_table->tb_hlist,
65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66 return 0;
67
68fail:
69 kfree(local_table);
70 return -ENOMEM;
71}
72#else
73
74struct fib_table *fib_new_table(struct net *net, u32 id)
75{
76 struct fib_table *tb;
77 unsigned int h;
78
79 if (id == 0)
80 id = RT_TABLE_MAIN;
81 tb = fib_get_table(net, id);
82 if (tb)
83 return tb;
84
85 tb = fib_hash_table(id);
86 if (!tb)
87 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1);
89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90 return tb;
91}
92
93struct fib_table *fib_get_table(struct net *net, u32 id)
94{
95 struct fib_table *tb;
96 struct hlist_node *node;
97 struct hlist_head *head;
98 unsigned int h;
99
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
103
104 rcu_read_lock();
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
130}
131
132static void fib_flush(struct net *net)
133{
134 int flushed = 0;
135 struct fib_table *tb;
136 struct hlist_node *node;
137 struct hlist_head *head;
138 unsigned int h;
139
140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
143 flushed += fib_table_flush(tb);
144 }
145
146 if (flushed)
147 rt_cache_flush(net, -1);
148}
149
150/**
151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU
157 */
158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
159{
160 struct flowi fl = {
161 .nl_u = {
162 .ip4_u = {
163 .daddr = addr
164 }
165 },
166 .flags = FLOWI_FLAG_MATCH_ANY_IIF
167 };
168 struct fib_result res = { 0 };
169 struct net_device *dev = NULL;
170
171 if (fib_lookup(net, &fl, &res))
172 return NULL;
173 if (res.type != RTN_LOCAL)
174 goto out;
175 dev = FIB_RES_DEV(res);
176
177 if (dev && devref)
178 dev_hold(dev);
179out:
180 fib_res_put(&res);
181 return dev;
182}
183EXPORT_SYMBOL(__ip_dev_find);
184
185/*
186 * Find address type as if only "dev" was present in the system. If
187 * on_dev is NULL then all interfaces are taken into consideration.
188 */
189static inline unsigned __inet_dev_addr_type(struct net *net,
190 const struct net_device *dev,
191 __be32 addr)
192{
193 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
194 struct fib_result res;
195 unsigned ret = RTN_BROADCAST;
196 struct fib_table *local_table;
197
198 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
199 return RTN_BROADCAST;
200 if (ipv4_is_multicast(addr))
201 return RTN_MULTICAST;
202
203#ifdef CONFIG_IP_MULTIPLE_TABLES
204 res.r = NULL;
205#endif
206
207 local_table = fib_get_table(net, RT_TABLE_LOCAL);
208 if (local_table) {
209 ret = RTN_UNICAST;
210 if (!fib_table_lookup(local_table, &fl, &res)) {
211 if (!dev || dev == res.fi->fib_dev)
212 ret = res.type;
213 fib_res_put(&res);
214 }
215 }
216 return ret;
217}
218
219unsigned int inet_addr_type(struct net *net, __be32 addr)
220{
221 return __inet_dev_addr_type(net, NULL, addr);
222}
223EXPORT_SYMBOL(inet_addr_type);
224
225unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
226 __be32 addr)
227{
228 return __inet_dev_addr_type(net, dev, addr);
229}
230EXPORT_SYMBOL(inet_dev_addr_type);
231
232/* Given (packet source, input interface) and optional (dst, oif, tos):
233 - (main) check, that source is valid i.e. not broadcast or our local
234 address.
235 - figure out what "logical" interface this packet arrived
236 and calculate "specific destination" address.
237 - check, that packet arrived from expected physical interface.
238 */
239
240int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
241 struct net_device *dev, __be32 *spec_dst,
242 u32 *itag, u32 mark)
243{
244 struct in_device *in_dev;
245 struct flowi fl = { .nl_u = { .ip4_u =
246 { .daddr = src,
247 .saddr = dst,
248 .tos = tos } },
249 .mark = mark,
250 .iif = oif };
251
252 struct fib_result res;
253 int no_addr, rpf, accept_local;
254 bool dev_match;
255 int ret;
256 struct net *net;
257
258 no_addr = rpf = accept_local = 0;
259 rcu_read_lock();
260 in_dev = __in_dev_get_rcu(dev);
261 if (in_dev) {
262 no_addr = in_dev->ifa_list == NULL;
263 rpf = IN_DEV_RPFILTER(in_dev);
264 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
265 if (mark && !IN_DEV_SRC_VMARK(in_dev))
266 fl.mark = 0;
267 }
268 rcu_read_unlock();
269
270 if (in_dev == NULL)
271 goto e_inval;
272
273 net = dev_net(dev);
274 if (fib_lookup(net, &fl, &res))
275 goto last_resort;
276 if (res.type != RTN_UNICAST) {
277 if (res.type != RTN_LOCAL || !accept_local)
278 goto e_inval_res;
279 }
280 *spec_dst = FIB_RES_PREFSRC(res);
281 fib_combine_itag(itag, &res);
282 dev_match = false;
283
284#ifdef CONFIG_IP_ROUTE_MULTIPATH
285 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
286 struct fib_nh *nh = &res.fi->fib_nh[ret];
287
288 if (nh->nh_dev == dev) {
289 dev_match = true;
290 break;
291 }
292 }
293#else
294 if (FIB_RES_DEV(res) == dev)
295 dev_match = true;
296#endif
297 if (dev_match) {
298 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
299 fib_res_put(&res);
300 return ret;
301 }
302 fib_res_put(&res);
303 if (no_addr)
304 goto last_resort;
305 if (rpf == 1)
306 goto e_rpf;
307 fl.oif = dev->ifindex;
308
309 ret = 0;
310 if (fib_lookup(net, &fl, &res) == 0) {
311 if (res.type == RTN_UNICAST) {
312 *spec_dst = FIB_RES_PREFSRC(res);
313 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
314 }
315 fib_res_put(&res);
316 }
317 return ret;
318
319last_resort:
320 if (rpf)
321 goto e_rpf;
322 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
323 *itag = 0;
324 return 0;
325
326e_inval_res:
327 fib_res_put(&res);
328e_inval:
329 return -EINVAL;
330e_rpf:
331 return -EXDEV;
332}
333
334static inline __be32 sk_extract_addr(struct sockaddr *addr)
335{
336 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
337}
338
339static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
340{
341 struct nlattr *nla;
342
343 nla = (struct nlattr *) ((char *) mx + len);
344 nla->nla_type = type;
345 nla->nla_len = nla_attr_size(4);
346 *(u32 *) nla_data(nla) = value;
347
348 return len + nla_total_size(4);
349}
350
351static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
352 struct fib_config *cfg)
353{
354 __be32 addr;
355 int plen;
356
357 memset(cfg, 0, sizeof(*cfg));
358 cfg->fc_nlinfo.nl_net = net;
359
360 if (rt->rt_dst.sa_family != AF_INET)
361 return -EAFNOSUPPORT;
362
363 /*
364 * Check mask for validity:
365 * a) it must be contiguous.
366 * b) destination must have all host bits clear.
367 * c) if application forgot to set correct family (AF_INET),
368 * reject request unless it is absolutely clear i.e.
369 * both family and mask are zero.
370 */
371 plen = 32;
372 addr = sk_extract_addr(&rt->rt_dst);
373 if (!(rt->rt_flags & RTF_HOST)) {
374 __be32 mask = sk_extract_addr(&rt->rt_genmask);
375
376 if (rt->rt_genmask.sa_family != AF_INET) {
377 if (mask || rt->rt_genmask.sa_family)
378 return -EAFNOSUPPORT;
379 }
380
381 if (bad_mask(mask, addr))
382 return -EINVAL;
383
384 plen = inet_mask_len(mask);
385 }
386
387 cfg->fc_dst_len = plen;
388 cfg->fc_dst = addr;
389
390 if (cmd != SIOCDELRT) {
391 cfg->fc_nlflags = NLM_F_CREATE;
392 cfg->fc_protocol = RTPROT_BOOT;
393 }
394
395 if (rt->rt_metric)
396 cfg->fc_priority = rt->rt_metric - 1;
397
398 if (rt->rt_flags & RTF_REJECT) {
399 cfg->fc_scope = RT_SCOPE_HOST;
400 cfg->fc_type = RTN_UNREACHABLE;
401 return 0;
402 }
403
404 cfg->fc_scope = RT_SCOPE_NOWHERE;
405 cfg->fc_type = RTN_UNICAST;
406
407 if (rt->rt_dev) {
408 char *colon;
409 struct net_device *dev;
410 char devname[IFNAMSIZ];
411
412 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
413 return -EFAULT;
414
415 devname[IFNAMSIZ-1] = 0;
416 colon = strchr(devname, ':');
417 if (colon)
418 *colon = 0;
419 dev = __dev_get_by_name(net, devname);
420 if (!dev)
421 return -ENODEV;
422 cfg->fc_oif = dev->ifindex;
423 if (colon) {
424 struct in_ifaddr *ifa;
425 struct in_device *in_dev = __in_dev_get_rtnl(dev);
426 if (!in_dev)
427 return -ENODEV;
428 *colon = ':';
429 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
430 if (strcmp(ifa->ifa_label, devname) == 0)
431 break;
432 if (ifa == NULL)
433 return -ENODEV;
434 cfg->fc_prefsrc = ifa->ifa_local;
435 }
436 }
437
438 addr = sk_extract_addr(&rt->rt_gateway);
439 if (rt->rt_gateway.sa_family == AF_INET && addr) {
440 cfg->fc_gw = addr;
441 if (rt->rt_flags & RTF_GATEWAY &&
442 inet_addr_type(net, addr) == RTN_UNICAST)
443 cfg->fc_scope = RT_SCOPE_UNIVERSE;
444 }
445
446 if (cmd == SIOCDELRT)
447 return 0;
448
449 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
450 return -EINVAL;
451
452 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
453 cfg->fc_scope = RT_SCOPE_LINK;
454
455 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
456 struct nlattr *mx;
457 int len = 0;
458
459 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
460 if (mx == NULL)
461 return -ENOMEM;
462
463 if (rt->rt_flags & RTF_MTU)
464 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
465
466 if (rt->rt_flags & RTF_WINDOW)
467 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
468
469 if (rt->rt_flags & RTF_IRTT)
470 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
471
472 cfg->fc_mx = mx;
473 cfg->fc_mx_len = len;
474 }
475
476 return 0;
477}
478
479/*
480 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
481 */
482
483int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
484{
485 struct fib_config cfg;
486 struct rtentry rt;
487 int err;
488
489 switch (cmd) {
490 case SIOCADDRT: /* Add a route */
491 case SIOCDELRT: /* Delete a route */
492 if (!capable(CAP_NET_ADMIN))
493 return -EPERM;
494
495 if (copy_from_user(&rt, arg, sizeof(rt)))
496 return -EFAULT;
497
498 rtnl_lock();
499 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
500 if (err == 0) {
501 struct fib_table *tb;
502
503 if (cmd == SIOCDELRT) {
504 tb = fib_get_table(net, cfg.fc_table);
505 if (tb)
506 err = fib_table_delete(tb, &cfg);
507 else
508 err = -ESRCH;
509 } else {
510 tb = fib_new_table(net, cfg.fc_table);
511 if (tb)
512 err = fib_table_insert(tb, &cfg);
513 else
514 err = -ENOBUFS;
515 }
516
517 /* allocated by rtentry_to_fib_config() */
518 kfree(cfg.fc_mx);
519 }
520 rtnl_unlock();
521 return err;
522 }
523 return -EINVAL;
524}
525
526const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
527 [RTA_DST] = { .type = NLA_U32 },
528 [RTA_SRC] = { .type = NLA_U32 },
529 [RTA_IIF] = { .type = NLA_U32 },
530 [RTA_OIF] = { .type = NLA_U32 },
531 [RTA_GATEWAY] = { .type = NLA_U32 },
532 [RTA_PRIORITY] = { .type = NLA_U32 },
533 [RTA_PREFSRC] = { .type = NLA_U32 },
534 [RTA_METRICS] = { .type = NLA_NESTED },
535 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
536 [RTA_FLOW] = { .type = NLA_U32 },
537};
538
539static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
540 struct nlmsghdr *nlh, struct fib_config *cfg)
541{
542 struct nlattr *attr;
543 int err, remaining;
544 struct rtmsg *rtm;
545
546 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
547 if (err < 0)
548 goto errout;
549
550 memset(cfg, 0, sizeof(*cfg));
551
552 rtm = nlmsg_data(nlh);
553 cfg->fc_dst_len = rtm->rtm_dst_len;
554 cfg->fc_tos = rtm->rtm_tos;
555 cfg->fc_table = rtm->rtm_table;
556 cfg->fc_protocol = rtm->rtm_protocol;
557 cfg->fc_scope = rtm->rtm_scope;
558 cfg->fc_type = rtm->rtm_type;
559 cfg->fc_flags = rtm->rtm_flags;
560 cfg->fc_nlflags = nlh->nlmsg_flags;
561
562 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
563 cfg->fc_nlinfo.nlh = nlh;
564 cfg->fc_nlinfo.nl_net = net;
565
566 if (cfg->fc_type > RTN_MAX) {
567 err = -EINVAL;
568 goto errout;
569 }
570
571 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
572 switch (nla_type(attr)) {
573 case RTA_DST:
574 cfg->fc_dst = nla_get_be32(attr);
575 break;
576 case RTA_OIF:
577 cfg->fc_oif = nla_get_u32(attr);
578 break;
579 case RTA_GATEWAY:
580 cfg->fc_gw = nla_get_be32(attr);
581 break;
582 case RTA_PRIORITY:
583 cfg->fc_priority = nla_get_u32(attr);
584 break;
585 case RTA_PREFSRC:
586 cfg->fc_prefsrc = nla_get_be32(attr);
587 break;
588 case RTA_METRICS:
589 cfg->fc_mx = nla_data(attr);
590 cfg->fc_mx_len = nla_len(attr);
591 break;
592 case RTA_MULTIPATH:
593 cfg->fc_mp = nla_data(attr);
594 cfg->fc_mp_len = nla_len(attr);
595 break;
596 case RTA_FLOW:
597 cfg->fc_flow = nla_get_u32(attr);
598 break;
599 case RTA_TABLE:
600 cfg->fc_table = nla_get_u32(attr);
601 break;
602 }
603 }
604
605 return 0;
606errout:
607 return err;
608}
609
610static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
611{
612 struct net *net = sock_net(skb->sk);
613 struct fib_config cfg;
614 struct fib_table *tb;
615 int err;
616
617 err = rtm_to_fib_config(net, skb, nlh, &cfg);
618 if (err < 0)
619 goto errout;
620
621 tb = fib_get_table(net, cfg.fc_table);
622 if (tb == NULL) {
623 err = -ESRCH;
624 goto errout;
625 }
626
627 err = fib_table_delete(tb, &cfg);
628errout:
629 return err;
630}
631
632static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
633{
634 struct net *net = sock_net(skb->sk);
635 struct fib_config cfg;
636 struct fib_table *tb;
637 int err;
638
639 err = rtm_to_fib_config(net, skb, nlh, &cfg);
640 if (err < 0)
641 goto errout;
642
643 tb = fib_new_table(net, cfg.fc_table);
644 if (tb == NULL) {
645 err = -ENOBUFS;
646 goto errout;
647 }
648
649 err = fib_table_insert(tb, &cfg);
650errout:
651 return err;
652}
653
654static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
655{
656 struct net *net = sock_net(skb->sk);
657 unsigned int h, s_h;
658 unsigned int e = 0, s_e;
659 struct fib_table *tb;
660 struct hlist_node *node;
661 struct hlist_head *head;
662 int dumped = 0;
663
664 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
665 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
666 return ip_rt_dump(skb, cb);
667
668 s_h = cb->args[0];
669 s_e = cb->args[1];
670
671 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
672 e = 0;
673 head = &net->ipv4.fib_table_hash[h];
674 hlist_for_each_entry(tb, node, head, tb_hlist) {
675 if (e < s_e)
676 goto next;
677 if (dumped)
678 memset(&cb->args[2], 0, sizeof(cb->args) -
679 2 * sizeof(cb->args[0]));
680 if (fib_table_dump(tb, skb, cb) < 0)
681 goto out;
682 dumped = 1;
683next:
684 e++;
685 }
686 }
687out:
688 cb->args[1] = e;
689 cb->args[0] = h;
690
691 return skb->len;
692}
693
694/* Prepare and feed intra-kernel routing request.
695 Really, it should be netlink message, but :-( netlink
696 can be not configured, so that we feed it directly
697 to fib engine. It is legal, because all events occur
698 only when netlink is already locked.
699 */
700
701static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
702{
703 struct net *net = dev_net(ifa->ifa_dev->dev);
704 struct fib_table *tb;
705 struct fib_config cfg = {
706 .fc_protocol = RTPROT_KERNEL,
707 .fc_type = type,
708 .fc_dst = dst,
709 .fc_dst_len = dst_len,
710 .fc_prefsrc = ifa->ifa_local,
711 .fc_oif = ifa->ifa_dev->dev->ifindex,
712 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
713 .fc_nlinfo = {
714 .nl_net = net,
715 },
716 };
717
718 if (type == RTN_UNICAST)
719 tb = fib_new_table(net, RT_TABLE_MAIN);
720 else
721 tb = fib_new_table(net, RT_TABLE_LOCAL);
722
723 if (tb == NULL)
724 return;
725
726 cfg.fc_table = tb->tb_id;
727
728 if (type != RTN_LOCAL)
729 cfg.fc_scope = RT_SCOPE_LINK;
730 else
731 cfg.fc_scope = RT_SCOPE_HOST;
732
733 if (cmd == RTM_NEWROUTE)
734 fib_table_insert(tb, &cfg);
735 else
736 fib_table_delete(tb, &cfg);
737}
738
739void fib_add_ifaddr(struct in_ifaddr *ifa)
740{
741 struct in_device *in_dev = ifa->ifa_dev;
742 struct net_device *dev = in_dev->dev;
743 struct in_ifaddr *prim = ifa;
744 __be32 mask = ifa->ifa_mask;
745 __be32 addr = ifa->ifa_local;
746 __be32 prefix = ifa->ifa_address&mask;
747
748 if (ifa->ifa_flags&IFA_F_SECONDARY) {
749 prim = inet_ifa_byprefix(in_dev, prefix, mask);
750 if (prim == NULL) {
751 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
752 return;
753 }
754 }
755
756 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
757
758 if (!(dev->flags&IFF_UP))
759 return;
760
761 /* Add broadcast address, if it is explicitly assigned. */
762 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
763 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
764
765 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
766 (prefix != addr || ifa->ifa_prefixlen < 32)) {
767 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
768 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
769
770 /* Add network specific broadcasts, when it takes a sense */
771 if (ifa->ifa_prefixlen < 31) {
772 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
773 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
774 }
775 }
776}
777
778static void fib_del_ifaddr(struct in_ifaddr *ifa)
779{
780 struct in_device *in_dev = ifa->ifa_dev;
781 struct net_device *dev = in_dev->dev;
782 struct in_ifaddr *ifa1;
783 struct in_ifaddr *prim = ifa;
784 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
785 __be32 any = ifa->ifa_address&ifa->ifa_mask;
786#define LOCAL_OK 1
787#define BRD_OK 2
788#define BRD0_OK 4
789#define BRD1_OK 8
790 unsigned ok = 0;
791
792 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
793 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
794 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
795 else {
796 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
797 if (prim == NULL) {
798 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
799 return;
800 }
801 }
802
803 /* Deletion is more complicated than add.
804 We should take care of not to delete too much :-)
805
806 Scan address list to be sure that addresses are really gone.
807 */
808
809 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
810 if (ifa->ifa_local == ifa1->ifa_local)
811 ok |= LOCAL_OK;
812 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
813 ok |= BRD_OK;
814 if (brd == ifa1->ifa_broadcast)
815 ok |= BRD1_OK;
816 if (any == ifa1->ifa_broadcast)
817 ok |= BRD0_OK;
818 }
819
820 if (!(ok&BRD_OK))
821 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
822 if (!(ok&BRD1_OK))
823 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
824 if (!(ok&BRD0_OK))
825 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
826 if (!(ok&LOCAL_OK)) {
827 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
828
829 /* Check, that this local address finally disappeared. */
830 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
831 /* And the last, but not the least thing.
832 We must flush stray FIB entries.
833
834 First of all, we scan fib_info list searching
835 for stray nexthop entries, then ignite fib_flush.
836 */
837 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
838 fib_flush(dev_net(dev));
839 }
840 }
841#undef LOCAL_OK
842#undef BRD_OK
843#undef BRD0_OK
844#undef BRD1_OK
845}
846
847static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
848{
849
850 struct fib_result res;
851 struct flowi fl = { .mark = frn->fl_mark,
852 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
853 .tos = frn->fl_tos,
854 .scope = frn->fl_scope } } };
855
856#ifdef CONFIG_IP_MULTIPLE_TABLES
857 res.r = NULL;
858#endif
859
860 frn->err = -ENOENT;
861 if (tb) {
862 local_bh_disable();
863
864 frn->tb_id = tb->tb_id;
865 frn->err = fib_table_lookup(tb, &fl, &res);
866
867 if (!frn->err) {
868 frn->prefixlen = res.prefixlen;
869 frn->nh_sel = res.nh_sel;
870 frn->type = res.type;
871 frn->scope = res.scope;
872 fib_res_put(&res);
873 }
874 local_bh_enable();
875 }
876}
877
878static void nl_fib_input(struct sk_buff *skb)
879{
880 struct net *net;
881 struct fib_result_nl *frn;
882 struct nlmsghdr *nlh;
883 struct fib_table *tb;
884 u32 pid;
885
886 net = sock_net(skb->sk);
887 nlh = nlmsg_hdr(skb);
888 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
889 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
890 return;
891
892 skb = skb_clone(skb, GFP_KERNEL);
893 if (skb == NULL)
894 return;
895 nlh = nlmsg_hdr(skb);
896
897 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
898 tb = fib_get_table(net, frn->tb_id_in);
899
900 nl_fib_lookup(frn, tb);
901
902 pid = NETLINK_CB(skb).pid; /* pid of sending process */
903 NETLINK_CB(skb).pid = 0; /* from kernel */
904 NETLINK_CB(skb).dst_group = 0; /* unicast */
905 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
906}
907
908static int __net_init nl_fib_lookup_init(struct net *net)
909{
910 struct sock *sk;
911 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
912 nl_fib_input, NULL, THIS_MODULE);
913 if (sk == NULL)
914 return -EAFNOSUPPORT;
915 net->ipv4.fibnl = sk;
916 return 0;
917}
918
919static void nl_fib_lookup_exit(struct net *net)
920{
921 netlink_kernel_release(net->ipv4.fibnl);
922 net->ipv4.fibnl = NULL;
923}
924
925static void fib_disable_ip(struct net_device *dev, int force, int delay)
926{
927 if (fib_sync_down_dev(dev, force))
928 fib_flush(dev_net(dev));
929 rt_cache_flush(dev_net(dev), delay);
930 arp_ifdown(dev);
931}
932
933static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
934{
935 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
936 struct net_device *dev = ifa->ifa_dev->dev;
937
938 switch (event) {
939 case NETDEV_UP:
940 fib_add_ifaddr(ifa);
941#ifdef CONFIG_IP_ROUTE_MULTIPATH
942 fib_sync_up(dev);
943#endif
944 rt_cache_flush(dev_net(dev), -1);
945 break;
946 case NETDEV_DOWN:
947 fib_del_ifaddr(ifa);
948 if (ifa->ifa_dev->ifa_list == NULL) {
949 /* Last address was deleted from this interface.
950 Disable IP.
951 */
952 fib_disable_ip(dev, 1, 0);
953 } else {
954 rt_cache_flush(dev_net(dev), -1);
955 }
956 break;
957 }
958 return NOTIFY_DONE;
959}
960
961static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
962{
963 struct net_device *dev = ptr;
964 struct in_device *in_dev = __in_dev_get_rtnl(dev);
965
966 if (event == NETDEV_UNREGISTER) {
967 fib_disable_ip(dev, 2, -1);
968 return NOTIFY_DONE;
969 }
970
971 if (!in_dev)
972 return NOTIFY_DONE;
973
974 switch (event) {
975 case NETDEV_UP:
976 for_ifa(in_dev) {
977 fib_add_ifaddr(ifa);
978 } endfor_ifa(in_dev);
979#ifdef CONFIG_IP_ROUTE_MULTIPATH
980 fib_sync_up(dev);
981#endif
982 rt_cache_flush(dev_net(dev), -1);
983 break;
984 case NETDEV_DOWN:
985 fib_disable_ip(dev, 0, 0);
986 break;
987 case NETDEV_CHANGEMTU:
988 case NETDEV_CHANGE:
989 rt_cache_flush(dev_net(dev), 0);
990 break;
991 case NETDEV_UNREGISTER_BATCH:
992 rt_cache_flush_batch();
993 break;
994 }
995 return NOTIFY_DONE;
996}
997
998static struct notifier_block fib_inetaddr_notifier = {
999 .notifier_call = fib_inetaddr_event,
1000};
1001
1002static struct notifier_block fib_netdev_notifier = {
1003 .notifier_call = fib_netdev_event,
1004};
1005
1006static int __net_init ip_fib_net_init(struct net *net)
1007{
1008 int err;
1009 unsigned int i;
1010
1011 net->ipv4.fib_table_hash = kzalloc(
1012 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1013 if (net->ipv4.fib_table_hash == NULL)
1014 return -ENOMEM;
1015
1016 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
1017 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
1018
1019 err = fib4_rules_init(net);
1020 if (err < 0)
1021 goto fail;
1022 return 0;
1023
1024fail:
1025 kfree(net->ipv4.fib_table_hash);
1026 return err;
1027}
1028
1029static void ip_fib_net_exit(struct net *net)
1030{
1031 unsigned int i;
1032
1033#ifdef CONFIG_IP_MULTIPLE_TABLES
1034 fib4_rules_exit(net);
1035#endif
1036
1037 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1038 struct fib_table *tb;
1039 struct hlist_head *head;
1040 struct hlist_node *node, *tmp;
1041
1042 head = &net->ipv4.fib_table_hash[i];
1043 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1044 hlist_del(node);
1045 fib_table_flush(tb);
1046 kfree(tb);
1047 }
1048 }
1049 kfree(net->ipv4.fib_table_hash);
1050}
1051
1052static int __net_init fib_net_init(struct net *net)
1053{
1054 int error;
1055
1056 error = ip_fib_net_init(net);
1057 if (error < 0)
1058 goto out;
1059 error = nl_fib_lookup_init(net);
1060 if (error < 0)
1061 goto out_nlfl;
1062 error = fib_proc_init(net);
1063 if (error < 0)
1064 goto out_proc;
1065out:
1066 return error;
1067
1068out_proc:
1069 nl_fib_lookup_exit(net);
1070out_nlfl:
1071 ip_fib_net_exit(net);
1072 goto out;
1073}
1074
1075static void __net_exit fib_net_exit(struct net *net)
1076{
1077 fib_proc_exit(net);
1078 nl_fib_lookup_exit(net);
1079 ip_fib_net_exit(net);
1080}
1081
1082static struct pernet_operations fib_net_ops = {
1083 .init = fib_net_init,
1084 .exit = fib_net_exit,
1085};
1086
1087void __init ip_fib_init(void)
1088{
1089 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1090 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1091 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1092
1093 register_pernet_subsys(&fib_net_ops);
1094 register_netdevice_notifier(&fib_netdev_notifier);
1095 register_inetaddr_notifier(&fib_inetaddr_notifier);
1096
1097 fib_hash_init();
1098}