]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[NETNS]: Add netns parameter to fib_get_table/fib_new_table.
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
4fc268d2 22#include <linux/capability.h>
1da177e4
LT
23#include <linux/types.h>
24#include <linux/kernel.h>
1da177e4
LT
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
14c85021 32#include <linux/inetdevice.h>
1da177e4 33#include <linux/netdevice.h>
1823730f 34#include <linux/if_addr.h>
1da177e4
LT
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
1da177e4 37#include <linux/init.h>
1af5a8c4 38#include <linux/list.h>
1da177e4
LT
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
63f3444f 48#include <net/rtnetlink.h>
1da177e4
LT
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
28f7b036 52static struct sock *fibnl;
93456b6d 53struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
28f7b036 54
1da177e4
LT
55#ifndef CONFIG_IP_MULTIPLE_TABLES
56
7b1a74fd 57static int __net_init fib4_rules_init(struct net *net)
c3e9a353 58{
93456b6d
DL
59 struct fib_table *local_table, *main_table;
60
61 local_table = fib_hash_init(RT_TABLE_LOCAL);
62 if (local_table == NULL)
dbb50165
DL
63 return -ENOMEM;
64
93456b6d
DL
65 main_table = fib_hash_init(RT_TABLE_MAIN);
66 if (main_table == NULL)
dbb50165
DL
67 goto fail;
68
93456b6d
DL
69 hlist_add_head_rcu(&local_table->tb_hlist,
70 &fib_table_hash[TABLE_LOCAL_INDEX]);
71 hlist_add_head_rcu(&main_table->tb_hlist,
72 &fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
73 return 0;
74
75fail:
93456b6d 76 kfree(local_table);
dbb50165 77 return -ENOMEM;
c3e9a353 78}
1af5a8c4 79#else
1da177e4 80
8ad4942c 81struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
82{
83 struct fib_table *tb;
1af5a8c4 84 unsigned int h;
1da177e4 85
1af5a8c4
PM
86 if (id == 0)
87 id = RT_TABLE_MAIN;
8ad4942c 88 tb = fib_get_table(net, id);
1af5a8c4
PM
89 if (tb)
90 return tb;
1da177e4
LT
91 tb = fib_hash_init(id);
92 if (!tb)
93 return NULL;
1af5a8c4
PM
94 h = id & (FIB_TABLE_HASHSZ - 1);
95 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
1da177e4
LT
96 return tb;
97}
98
8ad4942c 99struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
100{
101 struct fib_table *tb;
102 struct hlist_node *node;
103 unsigned int h;
1da177e4 104
1af5a8c4
PM
105 if (id == 0)
106 id = RT_TABLE_MAIN;
107 h = id & (FIB_TABLE_HASHSZ - 1);
108 rcu_read_lock();
109 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
110 if (tb->tb_id == id) {
111 rcu_read_unlock();
112 return tb;
113 }
114 }
115 rcu_read_unlock();
116 return NULL;
117}
1da177e4
LT
118#endif /* CONFIG_IP_MULTIPLE_TABLES */
119
1da177e4
LT
120static void fib_flush(void)
121{
122 int flushed = 0;
1da177e4 123 struct fib_table *tb;
1af5a8c4
PM
124 struct hlist_node *node;
125 unsigned int h;
1da177e4 126
1af5a8c4
PM
127 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
128 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
129 flushed += tb->tb_flush(tb);
1da177e4 130 }
1da177e4
LT
131
132 if (flushed)
133 rt_cache_flush(-1);
134}
135
136/*
137 * Find the first device with a given source address.
138 */
139
60cad5da 140struct net_device * ip_dev_find(__be32 addr)
1da177e4
LT
141{
142 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
143 struct fib_result res;
144 struct net_device *dev = NULL;
03cf786c 145 struct fib_table *local_table;
1da177e4
LT
146
147#ifdef CONFIG_IP_MULTIPLE_TABLES
148 res.r = NULL;
149#endif
150
8ad4942c 151 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
03cf786c 152 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
153 return NULL;
154 if (res.type != RTN_LOCAL)
155 goto out;
156 dev = FIB_RES_DEV(res);
157
158 if (dev)
159 dev_hold(dev);
160out:
161 fib_res_put(&res);
162 return dev;
163}
164
05538116
LAT
165/*
166 * Find address type as if only "dev" was present in the system. If
167 * on_dev is NULL then all interfaces are taken into consideration.
168 */
169static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
170 __be32 addr)
1da177e4
LT
171{
172 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
173 struct fib_result res;
174 unsigned ret = RTN_BROADCAST;
03cf786c 175 struct fib_table *local_table;
1da177e4 176
f97c1e0c 177 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
1da177e4 178 return RTN_BROADCAST;
f97c1e0c 179 if (ipv4_is_multicast(addr))
1da177e4
LT
180 return RTN_MULTICAST;
181
182#ifdef CONFIG_IP_MULTIPLE_TABLES
183 res.r = NULL;
184#endif
e905a9ed 185
8ad4942c 186 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
03cf786c 187 if (local_table) {
1da177e4 188 ret = RTN_UNICAST;
03cf786c 189 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
190 if (!dev || dev == res.fi->fib_dev)
191 ret = res.type;
1da177e4
LT
192 fib_res_put(&res);
193 }
194 }
195 return ret;
196}
197
05538116
LAT
198unsigned int inet_addr_type(__be32 addr)
199{
200 return __inet_dev_addr_type(NULL, addr);
201}
202
203unsigned int inet_dev_addr_type(const struct net_device *dev, __be32 addr)
204{
205 return __inet_dev_addr_type(dev, addr);
206}
207
1da177e4
LT
208/* Given (packet source, input interface) and optional (dst, oif, tos):
209 - (main) check, that source is valid i.e. not broadcast or our local
210 address.
211 - figure out what "logical" interface this packet arrived
212 and calculate "specific destination" address.
213 - check, that packet arrived from expected physical interface.
214 */
215
d9c9df8c
AV
216int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
217 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
218{
219 struct in_device *in_dev;
220 struct flowi fl = { .nl_u = { .ip4_u =
221 { .daddr = src,
222 .saddr = dst,
223 .tos = tos } },
224 .iif = oif };
225 struct fib_result res;
226 int no_addr, rpf;
227 int ret;
228
229 no_addr = rpf = 0;
230 rcu_read_lock();
e5ed6399 231 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
232 if (in_dev) {
233 no_addr = in_dev->ifa_list == NULL;
234 rpf = IN_DEV_RPFILTER(in_dev);
235 }
236 rcu_read_unlock();
237
238 if (in_dev == NULL)
239 goto e_inval;
240
241 if (fib_lookup(&fl, &res))
242 goto last_resort;
243 if (res.type != RTN_UNICAST)
244 goto e_inval_res;
245 *spec_dst = FIB_RES_PREFSRC(res);
246 fib_combine_itag(itag, &res);
247#ifdef CONFIG_IP_ROUTE_MULTIPATH
248 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
249#else
250 if (FIB_RES_DEV(res) == dev)
251#endif
252 {
253 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
254 fib_res_put(&res);
255 return ret;
256 }
257 fib_res_put(&res);
258 if (no_addr)
259 goto last_resort;
260 if (rpf)
261 goto e_inval;
262 fl.oif = dev->ifindex;
263
264 ret = 0;
265 if (fib_lookup(&fl, &res) == 0) {
266 if (res.type == RTN_UNICAST) {
267 *spec_dst = FIB_RES_PREFSRC(res);
268 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
269 }
270 fib_res_put(&res);
271 }
272 return ret;
273
274last_resort:
275 if (rpf)
276 goto e_inval;
277 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
278 *itag = 0;
279 return 0;
280
281e_inval_res:
282 fib_res_put(&res);
283e_inval:
284 return -EINVAL;
285}
286
81f7bf6c 287static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
288{
289 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
290}
291
292static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
293{
294 struct nlattr *nla;
295
296 nla = (struct nlattr *) ((char *) mx + len);
297 nla->nla_type = type;
298 nla->nla_len = nla_attr_size(4);
299 *(u32 *) nla_data(nla) = value;
300
301 return len + nla_total_size(4);
302}
303
304static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
305 struct fib_config *cfg)
306{
6d85c10a 307 __be32 addr;
4e902c57
TG
308 int plen;
309
310 memset(cfg, 0, sizeof(*cfg));
311
312 if (rt->rt_dst.sa_family != AF_INET)
313 return -EAFNOSUPPORT;
314
315 /*
316 * Check mask for validity:
317 * a) it must be contiguous.
318 * b) destination must have all host bits clear.
319 * c) if application forgot to set correct family (AF_INET),
320 * reject request unless it is absolutely clear i.e.
321 * both family and mask are zero.
322 */
323 plen = 32;
324 addr = sk_extract_addr(&rt->rt_dst);
325 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 326 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
327
328 if (rt->rt_genmask.sa_family != AF_INET) {
329 if (mask || rt->rt_genmask.sa_family)
330 return -EAFNOSUPPORT;
331 }
332
333 if (bad_mask(mask, addr))
334 return -EINVAL;
335
336 plen = inet_mask_len(mask);
337 }
338
339 cfg->fc_dst_len = plen;
340 cfg->fc_dst = addr;
341
342 if (cmd != SIOCDELRT) {
343 cfg->fc_nlflags = NLM_F_CREATE;
344 cfg->fc_protocol = RTPROT_BOOT;
345 }
346
347 if (rt->rt_metric)
348 cfg->fc_priority = rt->rt_metric - 1;
349
350 if (rt->rt_flags & RTF_REJECT) {
351 cfg->fc_scope = RT_SCOPE_HOST;
352 cfg->fc_type = RTN_UNREACHABLE;
353 return 0;
354 }
355
356 cfg->fc_scope = RT_SCOPE_NOWHERE;
357 cfg->fc_type = RTN_UNICAST;
358
359 if (rt->rt_dev) {
360 char *colon;
361 struct net_device *dev;
362 char devname[IFNAMSIZ];
363
364 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
365 return -EFAULT;
366
367 devname[IFNAMSIZ-1] = 0;
368 colon = strchr(devname, ':');
369 if (colon)
370 *colon = 0;
881d966b 371 dev = __dev_get_by_name(&init_net, devname);
4e902c57
TG
372 if (!dev)
373 return -ENODEV;
374 cfg->fc_oif = dev->ifindex;
375 if (colon) {
376 struct in_ifaddr *ifa;
377 struct in_device *in_dev = __in_dev_get_rtnl(dev);
378 if (!in_dev)
379 return -ENODEV;
380 *colon = ':';
381 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
382 if (strcmp(ifa->ifa_label, devname) == 0)
383 break;
384 if (ifa == NULL)
385 return -ENODEV;
386 cfg->fc_prefsrc = ifa->ifa_local;
387 }
388 }
389
390 addr = sk_extract_addr(&rt->rt_gateway);
391 if (rt->rt_gateway.sa_family == AF_INET && addr) {
392 cfg->fc_gw = addr;
393 if (rt->rt_flags & RTF_GATEWAY &&
394 inet_addr_type(addr) == RTN_UNICAST)
395 cfg->fc_scope = RT_SCOPE_UNIVERSE;
396 }
397
398 if (cmd == SIOCDELRT)
399 return 0;
400
401 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
402 return -EINVAL;
403
404 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
405 cfg->fc_scope = RT_SCOPE_LINK;
406
407 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
408 struct nlattr *mx;
409 int len = 0;
410
411 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 412 if (mx == NULL)
4e902c57
TG
413 return -ENOMEM;
414
415 if (rt->rt_flags & RTF_MTU)
416 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
417
418 if (rt->rt_flags & RTF_WINDOW)
419 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
420
421 if (rt->rt_flags & RTF_IRTT)
422 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
423
424 cfg->fc_mx = mx;
425 cfg->fc_mx_len = len;
426 }
427
428 return 0;
429}
430
1da177e4
LT
431/*
432 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
433 */
e905a9ed 434
1da177e4
LT
435int ip_rt_ioctl(unsigned int cmd, void __user *arg)
436{
4e902c57
TG
437 struct fib_config cfg;
438 struct rtentry rt;
1da177e4 439 int err;
1da177e4
LT
440
441 switch (cmd) {
442 case SIOCADDRT: /* Add a route */
443 case SIOCDELRT: /* Delete a route */
444 if (!capable(CAP_NET_ADMIN))
445 return -EPERM;
4e902c57
TG
446
447 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 448 return -EFAULT;
4e902c57 449
1da177e4 450 rtnl_lock();
4e902c57 451 err = rtentry_to_fib_config(cmd, &rt, &cfg);
1da177e4 452 if (err == 0) {
4e902c57
TG
453 struct fib_table *tb;
454
1da177e4 455 if (cmd == SIOCDELRT) {
8ad4942c 456 tb = fib_get_table(&init_net, cfg.fc_table);
1da177e4 457 if (tb)
4e902c57
TG
458 err = tb->tb_delete(tb, &cfg);
459 else
460 err = -ESRCH;
1da177e4 461 } else {
8ad4942c 462 tb = fib_new_table(&init_net, cfg.fc_table);
1da177e4 463 if (tb)
4e902c57
TG
464 err = tb->tb_insert(tb, &cfg);
465 else
466 err = -ENOBUFS;
1da177e4 467 }
4e902c57
TG
468
469 /* allocated by rtentry_to_fib_config() */
470 kfree(cfg.fc_mx);
1da177e4
LT
471 }
472 rtnl_unlock();
473 return err;
474 }
475 return -EINVAL;
476}
477
ef7c79ed 478const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
479 [RTA_DST] = { .type = NLA_U32 },
480 [RTA_SRC] = { .type = NLA_U32 },
481 [RTA_IIF] = { .type = NLA_U32 },
482 [RTA_OIF] = { .type = NLA_U32 },
483 [RTA_GATEWAY] = { .type = NLA_U32 },
484 [RTA_PRIORITY] = { .type = NLA_U32 },
485 [RTA_PREFSRC] = { .type = NLA_U32 },
486 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 487 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57
TG
488 [RTA_PROTOINFO] = { .type = NLA_U32 },
489 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
490};
491
492static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
493 struct fib_config *cfg)
1da177e4 494{
4e902c57
TG
495 struct nlattr *attr;
496 int err, remaining;
497 struct rtmsg *rtm;
498
499 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
500 if (err < 0)
501 goto errout;
502
503 memset(cfg, 0, sizeof(*cfg));
504
505 rtm = nlmsg_data(nlh);
4e902c57 506 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
507 cfg->fc_tos = rtm->rtm_tos;
508 cfg->fc_table = rtm->rtm_table;
509 cfg->fc_protocol = rtm->rtm_protocol;
510 cfg->fc_scope = rtm->rtm_scope;
511 cfg->fc_type = rtm->rtm_type;
512 cfg->fc_flags = rtm->rtm_flags;
513 cfg->fc_nlflags = nlh->nlmsg_flags;
514
515 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
516 cfg->fc_nlinfo.nlh = nlh;
517
a0ee18b9
TG
518 if (cfg->fc_type > RTN_MAX) {
519 err = -EINVAL;
520 goto errout;
521 }
522
4e902c57 523 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 524 switch (nla_type(attr)) {
4e902c57 525 case RTA_DST:
17fb2c64 526 cfg->fc_dst = nla_get_be32(attr);
4e902c57 527 break;
4e902c57
TG
528 case RTA_OIF:
529 cfg->fc_oif = nla_get_u32(attr);
530 break;
531 case RTA_GATEWAY:
17fb2c64 532 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
533 break;
534 case RTA_PRIORITY:
535 cfg->fc_priority = nla_get_u32(attr);
536 break;
537 case RTA_PREFSRC:
17fb2c64 538 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
539 break;
540 case RTA_METRICS:
541 cfg->fc_mx = nla_data(attr);
542 cfg->fc_mx_len = nla_len(attr);
543 break;
544 case RTA_MULTIPATH:
545 cfg->fc_mp = nla_data(attr);
546 cfg->fc_mp_len = nla_len(attr);
547 break;
548 case RTA_FLOW:
549 cfg->fc_flow = nla_get_u32(attr);
550 break;
4e902c57
TG
551 case RTA_TABLE:
552 cfg->fc_table = nla_get_u32(attr);
553 break;
1da177e4
LT
554 }
555 }
4e902c57 556
1da177e4 557 return 0;
4e902c57
TG
558errout:
559 return err;
1da177e4
LT
560}
561
63f3444f 562static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 563{
b854272b 564 struct net *net = skb->sk->sk_net;
4e902c57
TG
565 struct fib_config cfg;
566 struct fib_table *tb;
567 int err;
1da177e4 568
b854272b
DL
569 if (net != &init_net)
570 return -EINVAL;
571
4e902c57
TG
572 err = rtm_to_fib_config(skb, nlh, &cfg);
573 if (err < 0)
574 goto errout;
1da177e4 575
8ad4942c 576 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
577 if (tb == NULL) {
578 err = -ESRCH;
579 goto errout;
580 }
581
582 err = tb->tb_delete(tb, &cfg);
583errout:
584 return err;
1da177e4
LT
585}
586
63f3444f 587static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 588{
b854272b 589 struct net *net = skb->sk->sk_net;
4e902c57
TG
590 struct fib_config cfg;
591 struct fib_table *tb;
592 int err;
1da177e4 593
b854272b
DL
594 if (net != &init_net)
595 return -EINVAL;
596
4e902c57
TG
597 err = rtm_to_fib_config(skb, nlh, &cfg);
598 if (err < 0)
599 goto errout;
1da177e4 600
8ad4942c 601 tb = fib_new_table(&init_net, cfg.fc_table);
4e902c57
TG
602 if (tb == NULL) {
603 err = -ENOBUFS;
604 goto errout;
605 }
606
607 err = tb->tb_insert(tb, &cfg);
608errout:
609 return err;
1da177e4
LT
610}
611
63f3444f 612static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 613{
b854272b 614 struct net *net = skb->sk->sk_net;
1af5a8c4
PM
615 unsigned int h, s_h;
616 unsigned int e = 0, s_e;
1da177e4 617 struct fib_table *tb;
1af5a8c4
PM
618 struct hlist_node *node;
619 int dumped = 0;
1da177e4 620
b854272b
DL
621 if (net != &init_net)
622 return 0;
623
be403ea1
TG
624 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
625 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
626 return ip_rt_dump(skb, cb);
627
1af5a8c4
PM
628 s_h = cb->args[0];
629 s_e = cb->args[1];
630
631 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
632 e = 0;
633 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
634 if (e < s_e)
635 goto next;
636 if (dumped)
637 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 638 2 * sizeof(cb->args[0]));
1af5a8c4
PM
639 if (tb->tb_dump(tb, skb, cb) < 0)
640 goto out;
641 dumped = 1;
642next:
643 e++;
644 }
1da177e4 645 }
1af5a8c4
PM
646out:
647 cb->args[1] = e;
648 cb->args[0] = h;
1da177e4
LT
649
650 return skb->len;
651}
652
653/* Prepare and feed intra-kernel routing request.
654 Really, it should be netlink message, but :-( netlink
655 can be not configured, so that we feed it directly
656 to fib engine. It is legal, because all events occur
657 only when netlink is already locked.
658 */
659
81f7bf6c 660static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 661{
4e902c57
TG
662 struct fib_table *tb;
663 struct fib_config cfg = {
664 .fc_protocol = RTPROT_KERNEL,
665 .fc_type = type,
666 .fc_dst = dst,
667 .fc_dst_len = dst_len,
668 .fc_prefsrc = ifa->ifa_local,
669 .fc_oif = ifa->ifa_dev->dev->ifindex,
670 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
671 };
1da177e4
LT
672
673 if (type == RTN_UNICAST)
8ad4942c 674 tb = fib_new_table(&init_net, RT_TABLE_MAIN);
1da177e4 675 else
8ad4942c 676 tb = fib_new_table(&init_net, RT_TABLE_LOCAL);
1da177e4
LT
677
678 if (tb == NULL)
679 return;
680
4e902c57 681 cfg.fc_table = tb->tb_id;
1da177e4 682
4e902c57
TG
683 if (type != RTN_LOCAL)
684 cfg.fc_scope = RT_SCOPE_LINK;
685 else
686 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
687
688 if (cmd == RTM_NEWROUTE)
4e902c57 689 tb->tb_insert(tb, &cfg);
1da177e4 690 else
4e902c57 691 tb->tb_delete(tb, &cfg);
1da177e4
LT
692}
693
0ff60a45 694void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
695{
696 struct in_device *in_dev = ifa->ifa_dev;
697 struct net_device *dev = in_dev->dev;
698 struct in_ifaddr *prim = ifa;
a144ea4b
AV
699 __be32 mask = ifa->ifa_mask;
700 __be32 addr = ifa->ifa_local;
701 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
702
703 if (ifa->ifa_flags&IFA_F_SECONDARY) {
704 prim = inet_ifa_byprefix(in_dev, prefix, mask);
705 if (prim == NULL) {
706 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
707 return;
708 }
709 }
710
711 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
712
713 if (!(dev->flags&IFF_UP))
714 return;
715
716 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 717 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
718 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
719
f97c1e0c 720 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
721 (prefix != addr || ifa->ifa_prefixlen < 32)) {
722 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
723 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
724
725 /* Add network specific broadcasts, when it takes a sense */
726 if (ifa->ifa_prefixlen < 31) {
727 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
728 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
729 }
730 }
731}
732
733static void fib_del_ifaddr(struct in_ifaddr *ifa)
734{
735 struct in_device *in_dev = ifa->ifa_dev;
736 struct net_device *dev = in_dev->dev;
737 struct in_ifaddr *ifa1;
738 struct in_ifaddr *prim = ifa;
a144ea4b
AV
739 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
740 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
741#define LOCAL_OK 1
742#define BRD_OK 2
743#define BRD0_OK 4
744#define BRD1_OK 8
745 unsigned ok = 0;
746
747 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
748 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
749 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
750 else {
751 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
752 if (prim == NULL) {
753 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
754 return;
755 }
756 }
757
758 /* Deletion is more complicated than add.
759 We should take care of not to delete too much :-)
760
761 Scan address list to be sure that addresses are really gone.
762 */
763
764 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
765 if (ifa->ifa_local == ifa1->ifa_local)
766 ok |= LOCAL_OK;
767 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
768 ok |= BRD_OK;
769 if (brd == ifa1->ifa_broadcast)
770 ok |= BRD1_OK;
771 if (any == ifa1->ifa_broadcast)
772 ok |= BRD0_OK;
773 }
774
775 if (!(ok&BRD_OK))
776 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
777 if (!(ok&BRD1_OK))
778 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
779 if (!(ok&BRD0_OK))
780 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
781 if (!(ok&LOCAL_OK)) {
782 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
783
784 /* Check, that this local address finally disappeared. */
785 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
786 /* And the last, but not the least thing.
787 We must flush stray FIB entries.
788
789 First of all, we scan fib_info list searching
790 for stray nexthop entries, then ignite fib_flush.
791 */
792 if (fib_sync_down(ifa->ifa_local, NULL, 0))
793 fib_flush();
794 }
795 }
796#undef LOCAL_OK
797#undef BRD_OK
798#undef BRD0_OK
799#undef BRD1_OK
800}
801
246955fe
RO
802static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
803{
e905a9ed 804
246955fe 805 struct fib_result res;
5f300893 806 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 807 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
808 .tos = frn->fl_tos,
809 .scope = frn->fl_scope } } };
1194ed0a 810
912a41a4
SV
811#ifdef CONFIG_IP_MULTIPLE_TABLES
812 res.r = NULL;
813#endif
814
1194ed0a 815 frn->err = -ENOENT;
246955fe
RO
816 if (tb) {
817 local_bh_disable();
818
819 frn->tb_id = tb->tb_id;
820 frn->err = tb->tb_lookup(tb, &fl, &res);
821
822 if (!frn->err) {
823 frn->prefixlen = res.prefixlen;
824 frn->nh_sel = res.nh_sel;
825 frn->type = res.type;
826 frn->scope = res.scope;
1194ed0a 827 fib_res_put(&res);
246955fe
RO
828 }
829 local_bh_enable();
830 }
831}
832
28f7b036 833static void nl_fib_input(struct sk_buff *skb)
246955fe 834{
246955fe 835 struct fib_result_nl *frn;
28f7b036 836 struct nlmsghdr *nlh;
246955fe 837 struct fib_table *tb;
28f7b036 838 u32 pid;
1194ed0a 839
b529ccf2 840 nlh = nlmsg_hdr(skb);
ea86575e 841 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 842 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 843 return;
d883a036
DL
844
845 skb = skb_clone(skb, GFP_KERNEL);
846 if (skb == NULL)
847 return;
848 nlh = nlmsg_hdr(skb);
e905a9ed 849
246955fe 850 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
8ad4942c 851 tb = fib_get_table(&init_net, frn->tb_id_in);
246955fe
RO
852
853 nl_fib_lookup(frn, tb);
e905a9ed 854
1194ed0a 855 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 856 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 857 NETLINK_CB(skb).dst_group = 0; /* unicast */
cd40b7d3 858 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 859}
246955fe 860
7b1a74fd 861static int nl_fib_lookup_init(struct net *net)
246955fe 862{
7b1a74fd 863 fibnl = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
cd40b7d3 864 nl_fib_input, NULL, THIS_MODULE);
7b1a74fd
DL
865 if (fibnl == NULL)
866 return -EAFNOSUPPORT;
867 return 0;
868}
869
870static void nl_fib_lookup_exit(struct net *net)
871{
872 sock_put(fibnl);
246955fe
RO
873}
874
1da177e4
LT
875static void fib_disable_ip(struct net_device *dev, int force)
876{
877 if (fib_sync_down(0, dev, force))
878 fib_flush();
879 rt_cache_flush(0);
880 arp_ifdown(dev);
881}
882
883static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
884{
885 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
886
887 switch (event) {
888 case NETDEV_UP:
889 fib_add_ifaddr(ifa);
890#ifdef CONFIG_IP_ROUTE_MULTIPATH
891 fib_sync_up(ifa->ifa_dev->dev);
892#endif
893 rt_cache_flush(-1);
894 break;
895 case NETDEV_DOWN:
896 fib_del_ifaddr(ifa);
9fcc2e8a 897 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
898 /* Last address was deleted from this interface.
899 Disable IP.
900 */
901 fib_disable_ip(ifa->ifa_dev->dev, 1);
902 } else {
903 rt_cache_flush(-1);
904 }
905 break;
906 }
907 return NOTIFY_DONE;
908}
909
910static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
911{
912 struct net_device *dev = ptr;
e5ed6399 913 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4 914
e9dc8653
EB
915 if (dev->nd_net != &init_net)
916 return NOTIFY_DONE;
917
1da177e4
LT
918 if (event == NETDEV_UNREGISTER) {
919 fib_disable_ip(dev, 2);
920 return NOTIFY_DONE;
921 }
922
923 if (!in_dev)
924 return NOTIFY_DONE;
925
926 switch (event) {
927 case NETDEV_UP:
928 for_ifa(in_dev) {
929 fib_add_ifaddr(ifa);
930 } endfor_ifa(in_dev);
931#ifdef CONFIG_IP_ROUTE_MULTIPATH
932 fib_sync_up(dev);
933#endif
934 rt_cache_flush(-1);
935 break;
936 case NETDEV_DOWN:
937 fib_disable_ip(dev, 0);
938 break;
939 case NETDEV_CHANGEMTU:
940 case NETDEV_CHANGE:
941 rt_cache_flush(0);
942 break;
943 }
944 return NOTIFY_DONE;
945}
946
947static struct notifier_block fib_inetaddr_notifier = {
948 .notifier_call =fib_inetaddr_event,
949};
950
951static struct notifier_block fib_netdev_notifier = {
952 .notifier_call =fib_netdev_event,
953};
954
7b1a74fd 955static int __net_init ip_fib_net_init(struct net *net)
1da177e4 956{
1af5a8c4
PM
957 unsigned int i;
958
959 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
960 INIT_HLIST_HEAD(&fib_table_hash[i]);
c3e9a353 961
7b1a74fd
DL
962 return fib4_rules_init(net);
963}
1da177e4 964
7b1a74fd
DL
965static void __net_exit ip_fib_net_exit(struct net *net)
966{
967 unsigned int i;
968
969#ifdef CONFIG_IP_MULTIPLE_TABLES
970 fib4_rules_exit(net);
971#endif
972
973 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
974 struct fib_table *tb;
975 struct hlist_head *head;
976 struct hlist_node *node, *tmp;
63f3444f 977
7b1a74fd
DL
978 head = &fib_table_hash[i];
979 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
980 hlist_del(node);
981 tb->tb_flush(tb);
982 kfree(tb);
983 }
984 }
985}
986
987static int __net_init fib_net_init(struct net *net)
988{
989 int error;
990
991 error = 0;
992 if (net != &init_net)
993 goto out;
994
995 error = ip_fib_net_init(net);
996 if (error < 0)
997 goto out;
998 error = nl_fib_lookup_init(net);
999 if (error < 0)
1000 goto out_nlfl;
1001 error = fib_proc_init(net);
1002 if (error < 0)
1003 goto out_proc;
1004out:
1005 return error;
1006
1007out_proc:
1008 nl_fib_lookup_exit(net);
1009out_nlfl:
1010 ip_fib_net_exit(net);
1011 goto out;
1012}
1013
1014static void __net_exit fib_net_exit(struct net *net)
1015{
1016 fib_proc_exit(net);
1017 nl_fib_lookup_exit(net);
1018 ip_fib_net_exit(net);
1019}
1020
1021static struct pernet_operations fib_net_ops = {
1022 .init = fib_net_init,
1023 .exit = fib_net_exit,
1024};
1025
1026void __init ip_fib_init(void)
1027{
63f3444f
TG
1028 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1029 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1030 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1031
1032 register_pernet_subsys(&fib_net_ops);
1033 register_netdevice_notifier(&fib_netdev_notifier);
1034 register_inetaddr_notifier(&fib_inetaddr_notifier);
1da177e4
LT
1035}
1036
1037EXPORT_SYMBOL(inet_addr_type);
05538116 1038EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1039EXPORT_SYMBOL(ip_dev_find);