]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[NETNS]: Refactor fib initialization so it can handle multiple namespaces.
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
4fc268d2 22#include <linux/capability.h>
1da177e4
LT
23#include <linux/types.h>
24#include <linux/kernel.h>
1da177e4
LT
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
14c85021 32#include <linux/inetdevice.h>
1da177e4 33#include <linux/netdevice.h>
1823730f 34#include <linux/if_addr.h>
1da177e4
LT
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
1da177e4 37#include <linux/init.h>
1af5a8c4 38#include <linux/list.h>
1da177e4
LT
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
63f3444f 48#include <net/rtnetlink.h>
1da177e4
LT
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
28f7b036
DM
52static struct sock *fibnl;
53
1da177e4
LT
54#ifndef CONFIG_IP_MULTIPLE_TABLES
55
1da177e4
LT
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
1af5a8c4
PM
59#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
1da177e4 61
7b1a74fd 62static int __net_init fib4_rules_init(struct net *net)
c3e9a353
PE
63{
64 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
dbb50165
DL
65 if (ip_fib_local_table == NULL)
66 return -ENOMEM;
67
c3e9a353 68 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
dbb50165
DL
69 if (ip_fib_main_table == NULL)
70 goto fail;
71
72 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
c3e9a353 73 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
dbb50165
DL
74 return 0;
75
76fail:
77 kfree(ip_fib_local_table);
78 ip_fib_local_table = NULL;
79 return -ENOMEM;
c3e9a353 80}
1af5a8c4 81#else
1da177e4 82
1af5a8c4
PM
83#define FIB_TABLE_HASHSZ 256
84static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
1da177e4 85
1af5a8c4 86struct fib_table *fib_new_table(u32 id)
1da177e4
LT
87{
88 struct fib_table *tb;
1af5a8c4 89 unsigned int h;
1da177e4 90
1af5a8c4
PM
91 if (id == 0)
92 id = RT_TABLE_MAIN;
93 tb = fib_get_table(id);
94 if (tb)
95 return tb;
1da177e4
LT
96 tb = fib_hash_init(id);
97 if (!tb)
98 return NULL;
1af5a8c4
PM
99 h = id & (FIB_TABLE_HASHSZ - 1);
100 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
1da177e4
LT
101 return tb;
102}
103
1af5a8c4
PM
104struct fib_table *fib_get_table(u32 id)
105{
106 struct fib_table *tb;
107 struct hlist_node *node;
108 unsigned int h;
1da177e4 109
1af5a8c4
PM
110 if (id == 0)
111 id = RT_TABLE_MAIN;
112 h = id & (FIB_TABLE_HASHSZ - 1);
113 rcu_read_lock();
114 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
115 if (tb->tb_id == id) {
116 rcu_read_unlock();
117 return tb;
118 }
119 }
120 rcu_read_unlock();
121 return NULL;
122}
1da177e4
LT
123#endif /* CONFIG_IP_MULTIPLE_TABLES */
124
1da177e4
LT
125static void fib_flush(void)
126{
127 int flushed = 0;
1da177e4 128 struct fib_table *tb;
1af5a8c4
PM
129 struct hlist_node *node;
130 unsigned int h;
1da177e4 131
1af5a8c4
PM
132 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
133 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
134 flushed += tb->tb_flush(tb);
1da177e4 135 }
1da177e4
LT
136
137 if (flushed)
138 rt_cache_flush(-1);
139}
140
141/*
142 * Find the first device with a given source address.
143 */
144
60cad5da 145struct net_device * ip_dev_find(__be32 addr)
1da177e4
LT
146{
147 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
148 struct fib_result res;
149 struct net_device *dev = NULL;
03cf786c 150 struct fib_table *local_table;
1da177e4
LT
151
152#ifdef CONFIG_IP_MULTIPLE_TABLES
153 res.r = NULL;
154#endif
155
03cf786c
PE
156 local_table = fib_get_table(RT_TABLE_LOCAL);
157 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
158 return NULL;
159 if (res.type != RTN_LOCAL)
160 goto out;
161 dev = FIB_RES_DEV(res);
162
163 if (dev)
164 dev_hold(dev);
165out:
166 fib_res_put(&res);
167 return dev;
168}
169
05538116
LAT
170/*
171 * Find address type as if only "dev" was present in the system. If
172 * on_dev is NULL then all interfaces are taken into consideration.
173 */
174static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
175 __be32 addr)
1da177e4
LT
176{
177 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
178 struct fib_result res;
179 unsigned ret = RTN_BROADCAST;
03cf786c 180 struct fib_table *local_table;
1da177e4 181
f97c1e0c 182 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
1da177e4 183 return RTN_BROADCAST;
f97c1e0c 184 if (ipv4_is_multicast(addr))
1da177e4
LT
185 return RTN_MULTICAST;
186
187#ifdef CONFIG_IP_MULTIPLE_TABLES
188 res.r = NULL;
189#endif
e905a9ed 190
03cf786c
PE
191 local_table = fib_get_table(RT_TABLE_LOCAL);
192 if (local_table) {
1da177e4 193 ret = RTN_UNICAST;
03cf786c 194 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
195 if (!dev || dev == res.fi->fib_dev)
196 ret = res.type;
1da177e4
LT
197 fib_res_put(&res);
198 }
199 }
200 return ret;
201}
202
05538116
LAT
203unsigned int inet_addr_type(__be32 addr)
204{
205 return __inet_dev_addr_type(NULL, addr);
206}
207
208unsigned int inet_dev_addr_type(const struct net_device *dev, __be32 addr)
209{
210 return __inet_dev_addr_type(dev, addr);
211}
212
1da177e4
LT
213/* Given (packet source, input interface) and optional (dst, oif, tos):
214 - (main) check, that source is valid i.e. not broadcast or our local
215 address.
216 - figure out what "logical" interface this packet arrived
217 and calculate "specific destination" address.
218 - check, that packet arrived from expected physical interface.
219 */
220
d9c9df8c
AV
221int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
222 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
223{
224 struct in_device *in_dev;
225 struct flowi fl = { .nl_u = { .ip4_u =
226 { .daddr = src,
227 .saddr = dst,
228 .tos = tos } },
229 .iif = oif };
230 struct fib_result res;
231 int no_addr, rpf;
232 int ret;
233
234 no_addr = rpf = 0;
235 rcu_read_lock();
e5ed6399 236 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
237 if (in_dev) {
238 no_addr = in_dev->ifa_list == NULL;
239 rpf = IN_DEV_RPFILTER(in_dev);
240 }
241 rcu_read_unlock();
242
243 if (in_dev == NULL)
244 goto e_inval;
245
246 if (fib_lookup(&fl, &res))
247 goto last_resort;
248 if (res.type != RTN_UNICAST)
249 goto e_inval_res;
250 *spec_dst = FIB_RES_PREFSRC(res);
251 fib_combine_itag(itag, &res);
252#ifdef CONFIG_IP_ROUTE_MULTIPATH
253 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
254#else
255 if (FIB_RES_DEV(res) == dev)
256#endif
257 {
258 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
259 fib_res_put(&res);
260 return ret;
261 }
262 fib_res_put(&res);
263 if (no_addr)
264 goto last_resort;
265 if (rpf)
266 goto e_inval;
267 fl.oif = dev->ifindex;
268
269 ret = 0;
270 if (fib_lookup(&fl, &res) == 0) {
271 if (res.type == RTN_UNICAST) {
272 *spec_dst = FIB_RES_PREFSRC(res);
273 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
274 }
275 fib_res_put(&res);
276 }
277 return ret;
278
279last_resort:
280 if (rpf)
281 goto e_inval;
282 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
283 *itag = 0;
284 return 0;
285
286e_inval_res:
287 fib_res_put(&res);
288e_inval:
289 return -EINVAL;
290}
291
81f7bf6c 292static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
293{
294 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
295}
296
297static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
298{
299 struct nlattr *nla;
300
301 nla = (struct nlattr *) ((char *) mx + len);
302 nla->nla_type = type;
303 nla->nla_len = nla_attr_size(4);
304 *(u32 *) nla_data(nla) = value;
305
306 return len + nla_total_size(4);
307}
308
309static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
310 struct fib_config *cfg)
311{
6d85c10a 312 __be32 addr;
4e902c57
TG
313 int plen;
314
315 memset(cfg, 0, sizeof(*cfg));
316
317 if (rt->rt_dst.sa_family != AF_INET)
318 return -EAFNOSUPPORT;
319
320 /*
321 * Check mask for validity:
322 * a) it must be contiguous.
323 * b) destination must have all host bits clear.
324 * c) if application forgot to set correct family (AF_INET),
325 * reject request unless it is absolutely clear i.e.
326 * both family and mask are zero.
327 */
328 plen = 32;
329 addr = sk_extract_addr(&rt->rt_dst);
330 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 331 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
332
333 if (rt->rt_genmask.sa_family != AF_INET) {
334 if (mask || rt->rt_genmask.sa_family)
335 return -EAFNOSUPPORT;
336 }
337
338 if (bad_mask(mask, addr))
339 return -EINVAL;
340
341 plen = inet_mask_len(mask);
342 }
343
344 cfg->fc_dst_len = plen;
345 cfg->fc_dst = addr;
346
347 if (cmd != SIOCDELRT) {
348 cfg->fc_nlflags = NLM_F_CREATE;
349 cfg->fc_protocol = RTPROT_BOOT;
350 }
351
352 if (rt->rt_metric)
353 cfg->fc_priority = rt->rt_metric - 1;
354
355 if (rt->rt_flags & RTF_REJECT) {
356 cfg->fc_scope = RT_SCOPE_HOST;
357 cfg->fc_type = RTN_UNREACHABLE;
358 return 0;
359 }
360
361 cfg->fc_scope = RT_SCOPE_NOWHERE;
362 cfg->fc_type = RTN_UNICAST;
363
364 if (rt->rt_dev) {
365 char *colon;
366 struct net_device *dev;
367 char devname[IFNAMSIZ];
368
369 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
370 return -EFAULT;
371
372 devname[IFNAMSIZ-1] = 0;
373 colon = strchr(devname, ':');
374 if (colon)
375 *colon = 0;
881d966b 376 dev = __dev_get_by_name(&init_net, devname);
4e902c57
TG
377 if (!dev)
378 return -ENODEV;
379 cfg->fc_oif = dev->ifindex;
380 if (colon) {
381 struct in_ifaddr *ifa;
382 struct in_device *in_dev = __in_dev_get_rtnl(dev);
383 if (!in_dev)
384 return -ENODEV;
385 *colon = ':';
386 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
387 if (strcmp(ifa->ifa_label, devname) == 0)
388 break;
389 if (ifa == NULL)
390 return -ENODEV;
391 cfg->fc_prefsrc = ifa->ifa_local;
392 }
393 }
394
395 addr = sk_extract_addr(&rt->rt_gateway);
396 if (rt->rt_gateway.sa_family == AF_INET && addr) {
397 cfg->fc_gw = addr;
398 if (rt->rt_flags & RTF_GATEWAY &&
399 inet_addr_type(addr) == RTN_UNICAST)
400 cfg->fc_scope = RT_SCOPE_UNIVERSE;
401 }
402
403 if (cmd == SIOCDELRT)
404 return 0;
405
406 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
407 return -EINVAL;
408
409 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
410 cfg->fc_scope = RT_SCOPE_LINK;
411
412 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
413 struct nlattr *mx;
414 int len = 0;
415
416 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 417 if (mx == NULL)
4e902c57
TG
418 return -ENOMEM;
419
420 if (rt->rt_flags & RTF_MTU)
421 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
422
423 if (rt->rt_flags & RTF_WINDOW)
424 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
425
426 if (rt->rt_flags & RTF_IRTT)
427 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
428
429 cfg->fc_mx = mx;
430 cfg->fc_mx_len = len;
431 }
432
433 return 0;
434}
435
1da177e4
LT
436/*
437 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
438 */
e905a9ed 439
1da177e4
LT
440int ip_rt_ioctl(unsigned int cmd, void __user *arg)
441{
4e902c57
TG
442 struct fib_config cfg;
443 struct rtentry rt;
1da177e4 444 int err;
1da177e4
LT
445
446 switch (cmd) {
447 case SIOCADDRT: /* Add a route */
448 case SIOCDELRT: /* Delete a route */
449 if (!capable(CAP_NET_ADMIN))
450 return -EPERM;
4e902c57
TG
451
452 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 453 return -EFAULT;
4e902c57 454
1da177e4 455 rtnl_lock();
4e902c57 456 err = rtentry_to_fib_config(cmd, &rt, &cfg);
1da177e4 457 if (err == 0) {
4e902c57
TG
458 struct fib_table *tb;
459
1da177e4 460 if (cmd == SIOCDELRT) {
4e902c57 461 tb = fib_get_table(cfg.fc_table);
1da177e4 462 if (tb)
4e902c57
TG
463 err = tb->tb_delete(tb, &cfg);
464 else
465 err = -ESRCH;
1da177e4 466 } else {
4e902c57 467 tb = fib_new_table(cfg.fc_table);
1da177e4 468 if (tb)
4e902c57
TG
469 err = tb->tb_insert(tb, &cfg);
470 else
471 err = -ENOBUFS;
1da177e4 472 }
4e902c57
TG
473
474 /* allocated by rtentry_to_fib_config() */
475 kfree(cfg.fc_mx);
1da177e4
LT
476 }
477 rtnl_unlock();
478 return err;
479 }
480 return -EINVAL;
481}
482
ef7c79ed 483const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
484 [RTA_DST] = { .type = NLA_U32 },
485 [RTA_SRC] = { .type = NLA_U32 },
486 [RTA_IIF] = { .type = NLA_U32 },
487 [RTA_OIF] = { .type = NLA_U32 },
488 [RTA_GATEWAY] = { .type = NLA_U32 },
489 [RTA_PRIORITY] = { .type = NLA_U32 },
490 [RTA_PREFSRC] = { .type = NLA_U32 },
491 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 492 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57
TG
493 [RTA_PROTOINFO] = { .type = NLA_U32 },
494 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
495};
496
497static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
498 struct fib_config *cfg)
1da177e4 499{
4e902c57
TG
500 struct nlattr *attr;
501 int err, remaining;
502 struct rtmsg *rtm;
503
504 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
505 if (err < 0)
506 goto errout;
507
508 memset(cfg, 0, sizeof(*cfg));
509
510 rtm = nlmsg_data(nlh);
4e902c57 511 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
512 cfg->fc_tos = rtm->rtm_tos;
513 cfg->fc_table = rtm->rtm_table;
514 cfg->fc_protocol = rtm->rtm_protocol;
515 cfg->fc_scope = rtm->rtm_scope;
516 cfg->fc_type = rtm->rtm_type;
517 cfg->fc_flags = rtm->rtm_flags;
518 cfg->fc_nlflags = nlh->nlmsg_flags;
519
520 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
521 cfg->fc_nlinfo.nlh = nlh;
522
a0ee18b9
TG
523 if (cfg->fc_type > RTN_MAX) {
524 err = -EINVAL;
525 goto errout;
526 }
527
4e902c57 528 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 529 switch (nla_type(attr)) {
4e902c57 530 case RTA_DST:
17fb2c64 531 cfg->fc_dst = nla_get_be32(attr);
4e902c57 532 break;
4e902c57
TG
533 case RTA_OIF:
534 cfg->fc_oif = nla_get_u32(attr);
535 break;
536 case RTA_GATEWAY:
17fb2c64 537 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
538 break;
539 case RTA_PRIORITY:
540 cfg->fc_priority = nla_get_u32(attr);
541 break;
542 case RTA_PREFSRC:
17fb2c64 543 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
544 break;
545 case RTA_METRICS:
546 cfg->fc_mx = nla_data(attr);
547 cfg->fc_mx_len = nla_len(attr);
548 break;
549 case RTA_MULTIPATH:
550 cfg->fc_mp = nla_data(attr);
551 cfg->fc_mp_len = nla_len(attr);
552 break;
553 case RTA_FLOW:
554 cfg->fc_flow = nla_get_u32(attr);
555 break;
4e902c57
TG
556 case RTA_TABLE:
557 cfg->fc_table = nla_get_u32(attr);
558 break;
1da177e4
LT
559 }
560 }
4e902c57 561
1da177e4 562 return 0;
4e902c57
TG
563errout:
564 return err;
1da177e4
LT
565}
566
63f3444f 567static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 568{
b854272b 569 struct net *net = skb->sk->sk_net;
4e902c57
TG
570 struct fib_config cfg;
571 struct fib_table *tb;
572 int err;
1da177e4 573
b854272b
DL
574 if (net != &init_net)
575 return -EINVAL;
576
4e902c57
TG
577 err = rtm_to_fib_config(skb, nlh, &cfg);
578 if (err < 0)
579 goto errout;
1da177e4 580
4e902c57
TG
581 tb = fib_get_table(cfg.fc_table);
582 if (tb == NULL) {
583 err = -ESRCH;
584 goto errout;
585 }
586
587 err = tb->tb_delete(tb, &cfg);
588errout:
589 return err;
1da177e4
LT
590}
591
63f3444f 592static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 593{
b854272b 594 struct net *net = skb->sk->sk_net;
4e902c57
TG
595 struct fib_config cfg;
596 struct fib_table *tb;
597 int err;
1da177e4 598
b854272b
DL
599 if (net != &init_net)
600 return -EINVAL;
601
4e902c57
TG
602 err = rtm_to_fib_config(skb, nlh, &cfg);
603 if (err < 0)
604 goto errout;
1da177e4 605
4e902c57
TG
606 tb = fib_new_table(cfg.fc_table);
607 if (tb == NULL) {
608 err = -ENOBUFS;
609 goto errout;
610 }
611
612 err = tb->tb_insert(tb, &cfg);
613errout:
614 return err;
1da177e4
LT
615}
616
63f3444f 617static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 618{
b854272b 619 struct net *net = skb->sk->sk_net;
1af5a8c4
PM
620 unsigned int h, s_h;
621 unsigned int e = 0, s_e;
1da177e4 622 struct fib_table *tb;
1af5a8c4
PM
623 struct hlist_node *node;
624 int dumped = 0;
1da177e4 625
b854272b
DL
626 if (net != &init_net)
627 return 0;
628
be403ea1
TG
629 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
630 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
631 return ip_rt_dump(skb, cb);
632
1af5a8c4
PM
633 s_h = cb->args[0];
634 s_e = cb->args[1];
635
636 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
637 e = 0;
638 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
639 if (e < s_e)
640 goto next;
641 if (dumped)
642 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 643 2 * sizeof(cb->args[0]));
1af5a8c4
PM
644 if (tb->tb_dump(tb, skb, cb) < 0)
645 goto out;
646 dumped = 1;
647next:
648 e++;
649 }
1da177e4 650 }
1af5a8c4
PM
651out:
652 cb->args[1] = e;
653 cb->args[0] = h;
1da177e4
LT
654
655 return skb->len;
656}
657
658/* Prepare and feed intra-kernel routing request.
659 Really, it should be netlink message, but :-( netlink
660 can be not configured, so that we feed it directly
661 to fib engine. It is legal, because all events occur
662 only when netlink is already locked.
663 */
664
81f7bf6c 665static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 666{
4e902c57
TG
667 struct fib_table *tb;
668 struct fib_config cfg = {
669 .fc_protocol = RTPROT_KERNEL,
670 .fc_type = type,
671 .fc_dst = dst,
672 .fc_dst_len = dst_len,
673 .fc_prefsrc = ifa->ifa_local,
674 .fc_oif = ifa->ifa_dev->dev->ifindex,
675 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
676 };
1da177e4
LT
677
678 if (type == RTN_UNICAST)
679 tb = fib_new_table(RT_TABLE_MAIN);
680 else
681 tb = fib_new_table(RT_TABLE_LOCAL);
682
683 if (tb == NULL)
684 return;
685
4e902c57 686 cfg.fc_table = tb->tb_id;
1da177e4 687
4e902c57
TG
688 if (type != RTN_LOCAL)
689 cfg.fc_scope = RT_SCOPE_LINK;
690 else
691 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
692
693 if (cmd == RTM_NEWROUTE)
4e902c57 694 tb->tb_insert(tb, &cfg);
1da177e4 695 else
4e902c57 696 tb->tb_delete(tb, &cfg);
1da177e4
LT
697}
698
0ff60a45 699void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
700{
701 struct in_device *in_dev = ifa->ifa_dev;
702 struct net_device *dev = in_dev->dev;
703 struct in_ifaddr *prim = ifa;
a144ea4b
AV
704 __be32 mask = ifa->ifa_mask;
705 __be32 addr = ifa->ifa_local;
706 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
707
708 if (ifa->ifa_flags&IFA_F_SECONDARY) {
709 prim = inet_ifa_byprefix(in_dev, prefix, mask);
710 if (prim == NULL) {
711 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
712 return;
713 }
714 }
715
716 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
717
718 if (!(dev->flags&IFF_UP))
719 return;
720
721 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 722 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
723 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
724
f97c1e0c 725 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
726 (prefix != addr || ifa->ifa_prefixlen < 32)) {
727 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
728 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
729
730 /* Add network specific broadcasts, when it takes a sense */
731 if (ifa->ifa_prefixlen < 31) {
732 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
733 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
734 }
735 }
736}
737
738static void fib_del_ifaddr(struct in_ifaddr *ifa)
739{
740 struct in_device *in_dev = ifa->ifa_dev;
741 struct net_device *dev = in_dev->dev;
742 struct in_ifaddr *ifa1;
743 struct in_ifaddr *prim = ifa;
a144ea4b
AV
744 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
745 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
746#define LOCAL_OK 1
747#define BRD_OK 2
748#define BRD0_OK 4
749#define BRD1_OK 8
750 unsigned ok = 0;
751
752 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
753 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
754 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
755 else {
756 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
757 if (prim == NULL) {
758 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
759 return;
760 }
761 }
762
763 /* Deletion is more complicated than add.
764 We should take care of not to delete too much :-)
765
766 Scan address list to be sure that addresses are really gone.
767 */
768
769 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
770 if (ifa->ifa_local == ifa1->ifa_local)
771 ok |= LOCAL_OK;
772 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
773 ok |= BRD_OK;
774 if (brd == ifa1->ifa_broadcast)
775 ok |= BRD1_OK;
776 if (any == ifa1->ifa_broadcast)
777 ok |= BRD0_OK;
778 }
779
780 if (!(ok&BRD_OK))
781 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
782 if (!(ok&BRD1_OK))
783 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
784 if (!(ok&BRD0_OK))
785 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
786 if (!(ok&LOCAL_OK)) {
787 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
788
789 /* Check, that this local address finally disappeared. */
790 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
791 /* And the last, but not the least thing.
792 We must flush stray FIB entries.
793
794 First of all, we scan fib_info list searching
795 for stray nexthop entries, then ignite fib_flush.
796 */
797 if (fib_sync_down(ifa->ifa_local, NULL, 0))
798 fib_flush();
799 }
800 }
801#undef LOCAL_OK
802#undef BRD_OK
803#undef BRD0_OK
804#undef BRD1_OK
805}
806
246955fe
RO
807static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
808{
e905a9ed 809
246955fe 810 struct fib_result res;
5f300893 811 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 812 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
813 .tos = frn->fl_tos,
814 .scope = frn->fl_scope } } };
1194ed0a 815
912a41a4
SV
816#ifdef CONFIG_IP_MULTIPLE_TABLES
817 res.r = NULL;
818#endif
819
1194ed0a 820 frn->err = -ENOENT;
246955fe
RO
821 if (tb) {
822 local_bh_disable();
823
824 frn->tb_id = tb->tb_id;
825 frn->err = tb->tb_lookup(tb, &fl, &res);
826
827 if (!frn->err) {
828 frn->prefixlen = res.prefixlen;
829 frn->nh_sel = res.nh_sel;
830 frn->type = res.type;
831 frn->scope = res.scope;
1194ed0a 832 fib_res_put(&res);
246955fe
RO
833 }
834 local_bh_enable();
835 }
836}
837
28f7b036 838static void nl_fib_input(struct sk_buff *skb)
246955fe 839{
246955fe 840 struct fib_result_nl *frn;
28f7b036 841 struct nlmsghdr *nlh;
246955fe 842 struct fib_table *tb;
28f7b036 843 u32 pid;
1194ed0a 844
b529ccf2 845 nlh = nlmsg_hdr(skb);
ea86575e 846 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 847 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 848 return;
d883a036
DL
849
850 skb = skb_clone(skb, GFP_KERNEL);
851 if (skb == NULL)
852 return;
853 nlh = nlmsg_hdr(skb);
e905a9ed 854
246955fe
RO
855 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
856 tb = fib_get_table(frn->tb_id_in);
857
858 nl_fib_lookup(frn, tb);
e905a9ed 859
1194ed0a 860 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 861 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 862 NETLINK_CB(skb).dst_group = 0; /* unicast */
cd40b7d3 863 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 864}
246955fe 865
7b1a74fd 866static int nl_fib_lookup_init(struct net *net)
246955fe 867{
7b1a74fd 868 fibnl = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
cd40b7d3 869 nl_fib_input, NULL, THIS_MODULE);
7b1a74fd
DL
870 if (fibnl == NULL)
871 return -EAFNOSUPPORT;
872 return 0;
873}
874
875static void nl_fib_lookup_exit(struct net *net)
876{
877 sock_put(fibnl);
246955fe
RO
878}
879
1da177e4
LT
880static void fib_disable_ip(struct net_device *dev, int force)
881{
882 if (fib_sync_down(0, dev, force))
883 fib_flush();
884 rt_cache_flush(0);
885 arp_ifdown(dev);
886}
887
888static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
889{
890 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
891
892 switch (event) {
893 case NETDEV_UP:
894 fib_add_ifaddr(ifa);
895#ifdef CONFIG_IP_ROUTE_MULTIPATH
896 fib_sync_up(ifa->ifa_dev->dev);
897#endif
898 rt_cache_flush(-1);
899 break;
900 case NETDEV_DOWN:
901 fib_del_ifaddr(ifa);
9fcc2e8a 902 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
903 /* Last address was deleted from this interface.
904 Disable IP.
905 */
906 fib_disable_ip(ifa->ifa_dev->dev, 1);
907 } else {
908 rt_cache_flush(-1);
909 }
910 break;
911 }
912 return NOTIFY_DONE;
913}
914
915static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
916{
917 struct net_device *dev = ptr;
e5ed6399 918 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4 919
e9dc8653
EB
920 if (dev->nd_net != &init_net)
921 return NOTIFY_DONE;
922
1da177e4
LT
923 if (event == NETDEV_UNREGISTER) {
924 fib_disable_ip(dev, 2);
925 return NOTIFY_DONE;
926 }
927
928 if (!in_dev)
929 return NOTIFY_DONE;
930
931 switch (event) {
932 case NETDEV_UP:
933 for_ifa(in_dev) {
934 fib_add_ifaddr(ifa);
935 } endfor_ifa(in_dev);
936#ifdef CONFIG_IP_ROUTE_MULTIPATH
937 fib_sync_up(dev);
938#endif
939 rt_cache_flush(-1);
940 break;
941 case NETDEV_DOWN:
942 fib_disable_ip(dev, 0);
943 break;
944 case NETDEV_CHANGEMTU:
945 case NETDEV_CHANGE:
946 rt_cache_flush(0);
947 break;
948 }
949 return NOTIFY_DONE;
950}
951
952static struct notifier_block fib_inetaddr_notifier = {
953 .notifier_call =fib_inetaddr_event,
954};
955
956static struct notifier_block fib_netdev_notifier = {
957 .notifier_call =fib_netdev_event,
958};
959
7b1a74fd 960static int __net_init ip_fib_net_init(struct net *net)
1da177e4 961{
1af5a8c4
PM
962 unsigned int i;
963
964 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
965 INIT_HLIST_HEAD(&fib_table_hash[i]);
c3e9a353 966
7b1a74fd
DL
967 return fib4_rules_init(net);
968}
1da177e4 969
7b1a74fd
DL
970static void __net_exit ip_fib_net_exit(struct net *net)
971{
972 unsigned int i;
973
974#ifdef CONFIG_IP_MULTIPLE_TABLES
975 fib4_rules_exit(net);
976#endif
977
978 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
979 struct fib_table *tb;
980 struct hlist_head *head;
981 struct hlist_node *node, *tmp;
63f3444f 982
7b1a74fd
DL
983 head = &fib_table_hash[i];
984 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
985 hlist_del(node);
986 tb->tb_flush(tb);
987 kfree(tb);
988 }
989 }
990}
991
992static int __net_init fib_net_init(struct net *net)
993{
994 int error;
995
996 error = 0;
997 if (net != &init_net)
998 goto out;
999
1000 error = ip_fib_net_init(net);
1001 if (error < 0)
1002 goto out;
1003 error = nl_fib_lookup_init(net);
1004 if (error < 0)
1005 goto out_nlfl;
1006 error = fib_proc_init(net);
1007 if (error < 0)
1008 goto out_proc;
1009out:
1010 return error;
1011
1012out_proc:
1013 nl_fib_lookup_exit(net);
1014out_nlfl:
1015 ip_fib_net_exit(net);
1016 goto out;
1017}
1018
1019static void __net_exit fib_net_exit(struct net *net)
1020{
1021 fib_proc_exit(net);
1022 nl_fib_lookup_exit(net);
1023 ip_fib_net_exit(net);
1024}
1025
1026static struct pernet_operations fib_net_ops = {
1027 .init = fib_net_init,
1028 .exit = fib_net_exit,
1029};
1030
1031void __init ip_fib_init(void)
1032{
63f3444f
TG
1033 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1034 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1035 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1036
1037 register_pernet_subsys(&fib_net_ops);
1038 register_netdevice_notifier(&fib_netdev_notifier);
1039 register_inetaddr_notifier(&fib_inetaddr_notifier);
1da177e4
LT
1040}
1041
1042EXPORT_SYMBOL(inet_addr_type);
05538116 1043EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1044EXPORT_SYMBOL(ip_dev_find);