]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[NETNS]: Namespacing IPv4 fib rules.
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
4fc268d2 22#include <linux/capability.h>
1da177e4
LT
23#include <linux/types.h>
24#include <linux/kernel.h>
1da177e4
LT
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
14c85021 32#include <linux/inetdevice.h>
1da177e4 33#include <linux/netdevice.h>
1823730f 34#include <linux/if_addr.h>
1da177e4
LT
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
1da177e4 37#include <linux/init.h>
1af5a8c4 38#include <linux/list.h>
1da177e4
LT
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
63f3444f 48#include <net/rtnetlink.h>
1da177e4
LT
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
28f7b036 52static struct sock *fibnl;
93456b6d 53struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
28f7b036 54
1da177e4
LT
55#ifndef CONFIG_IP_MULTIPLE_TABLES
56
7b1a74fd 57static int __net_init fib4_rules_init(struct net *net)
c3e9a353 58{
93456b6d
DL
59 struct fib_table *local_table, *main_table;
60
61 local_table = fib_hash_init(RT_TABLE_LOCAL);
62 if (local_table == NULL)
dbb50165
DL
63 return -ENOMEM;
64
93456b6d
DL
65 main_table = fib_hash_init(RT_TABLE_MAIN);
66 if (main_table == NULL)
dbb50165
DL
67 goto fail;
68
93456b6d
DL
69 hlist_add_head_rcu(&local_table->tb_hlist,
70 &fib_table_hash[TABLE_LOCAL_INDEX]);
71 hlist_add_head_rcu(&main_table->tb_hlist,
72 &fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
73 return 0;
74
75fail:
93456b6d 76 kfree(local_table);
dbb50165 77 return -ENOMEM;
c3e9a353 78}
1af5a8c4 79#else
1da177e4 80
8ad4942c 81struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
82{
83 struct fib_table *tb;
1af5a8c4 84 unsigned int h;
1da177e4 85
1af5a8c4
PM
86 if (id == 0)
87 id = RT_TABLE_MAIN;
8ad4942c 88 tb = fib_get_table(net, id);
1af5a8c4
PM
89 if (tb)
90 return tb;
1da177e4
LT
91 tb = fib_hash_init(id);
92 if (!tb)
93 return NULL;
1af5a8c4
PM
94 h = id & (FIB_TABLE_HASHSZ - 1);
95 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
1da177e4
LT
96 return tb;
97}
98
8ad4942c 99struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
100{
101 struct fib_table *tb;
102 struct hlist_node *node;
103 unsigned int h;
1da177e4 104
1af5a8c4
PM
105 if (id == 0)
106 id = RT_TABLE_MAIN;
107 h = id & (FIB_TABLE_HASHSZ - 1);
108 rcu_read_lock();
109 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
110 if (tb->tb_id == id) {
111 rcu_read_unlock();
112 return tb;
113 }
114 }
115 rcu_read_unlock();
116 return NULL;
117}
1da177e4
LT
118#endif /* CONFIG_IP_MULTIPLE_TABLES */
119
1da177e4
LT
120static void fib_flush(void)
121{
122 int flushed = 0;
1da177e4 123 struct fib_table *tb;
1af5a8c4
PM
124 struct hlist_node *node;
125 unsigned int h;
1da177e4 126
1af5a8c4
PM
127 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
128 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
129 flushed += tb->tb_flush(tb);
1da177e4 130 }
1da177e4
LT
131
132 if (flushed)
133 rt_cache_flush(-1);
134}
135
136/*
137 * Find the first device with a given source address.
138 */
139
60cad5da 140struct net_device * ip_dev_find(__be32 addr)
1da177e4
LT
141{
142 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
143 struct fib_result res;
144 struct net_device *dev = NULL;
03cf786c 145 struct fib_table *local_table;
1da177e4
LT
146
147#ifdef CONFIG_IP_MULTIPLE_TABLES
148 res.r = NULL;
149#endif
150
8ad4942c 151 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
03cf786c 152 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
153 return NULL;
154 if (res.type != RTN_LOCAL)
155 goto out;
156 dev = FIB_RES_DEV(res);
157
158 if (dev)
159 dev_hold(dev);
160out:
161 fib_res_put(&res);
162 return dev;
163}
164
05538116
LAT
165/*
166 * Find address type as if only "dev" was present in the system. If
167 * on_dev is NULL then all interfaces are taken into consideration.
168 */
6b175b26
EB
169static inline unsigned __inet_dev_addr_type(struct net *net,
170 const struct net_device *dev,
05538116 171 __be32 addr)
1da177e4
LT
172{
173 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
174 struct fib_result res;
175 unsigned ret = RTN_BROADCAST;
03cf786c 176 struct fib_table *local_table;
1da177e4 177
f97c1e0c 178 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
1da177e4 179 return RTN_BROADCAST;
f97c1e0c 180 if (ipv4_is_multicast(addr))
1da177e4
LT
181 return RTN_MULTICAST;
182
183#ifdef CONFIG_IP_MULTIPLE_TABLES
184 res.r = NULL;
185#endif
e905a9ed 186
6b175b26 187 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 188 if (local_table) {
1da177e4 189 ret = RTN_UNICAST;
03cf786c 190 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
191 if (!dev || dev == res.fi->fib_dev)
192 ret = res.type;
1da177e4
LT
193 fib_res_put(&res);
194 }
195 }
196 return ret;
197}
198
6b175b26 199unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 200{
6b175b26 201 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
202}
203
6b175b26
EB
204unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
205 __be32 addr)
05538116 206{
6b175b26 207 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
208}
209
1da177e4
LT
210/* Given (packet source, input interface) and optional (dst, oif, tos):
211 - (main) check, that source is valid i.e. not broadcast or our local
212 address.
213 - figure out what "logical" interface this packet arrived
214 and calculate "specific destination" address.
215 - check, that packet arrived from expected physical interface.
216 */
217
d9c9df8c
AV
218int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
219 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
220{
221 struct in_device *in_dev;
222 struct flowi fl = { .nl_u = { .ip4_u =
223 { .daddr = src,
224 .saddr = dst,
225 .tos = tos } },
226 .iif = oif };
227 struct fib_result res;
228 int no_addr, rpf;
229 int ret;
230
231 no_addr = rpf = 0;
232 rcu_read_lock();
e5ed6399 233 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
234 if (in_dev) {
235 no_addr = in_dev->ifa_list == NULL;
236 rpf = IN_DEV_RPFILTER(in_dev);
237 }
238 rcu_read_unlock();
239
240 if (in_dev == NULL)
241 goto e_inval;
242
243 if (fib_lookup(&fl, &res))
244 goto last_resort;
245 if (res.type != RTN_UNICAST)
246 goto e_inval_res;
247 *spec_dst = FIB_RES_PREFSRC(res);
248 fib_combine_itag(itag, &res);
249#ifdef CONFIG_IP_ROUTE_MULTIPATH
250 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
251#else
252 if (FIB_RES_DEV(res) == dev)
253#endif
254 {
255 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
256 fib_res_put(&res);
257 return ret;
258 }
259 fib_res_put(&res);
260 if (no_addr)
261 goto last_resort;
262 if (rpf)
263 goto e_inval;
264 fl.oif = dev->ifindex;
265
266 ret = 0;
267 if (fib_lookup(&fl, &res) == 0) {
268 if (res.type == RTN_UNICAST) {
269 *spec_dst = FIB_RES_PREFSRC(res);
270 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
271 }
272 fib_res_put(&res);
273 }
274 return ret;
275
276last_resort:
277 if (rpf)
278 goto e_inval;
279 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
280 *itag = 0;
281 return 0;
282
283e_inval_res:
284 fib_res_put(&res);
285e_inval:
286 return -EINVAL;
287}
288
81f7bf6c 289static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
290{
291 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
292}
293
294static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
295{
296 struct nlattr *nla;
297
298 nla = (struct nlattr *) ((char *) mx + len);
299 nla->nla_type = type;
300 nla->nla_len = nla_attr_size(4);
301 *(u32 *) nla_data(nla) = value;
302
303 return len + nla_total_size(4);
304}
305
306static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
307 struct fib_config *cfg)
308{
6d85c10a 309 __be32 addr;
4e902c57
TG
310 int plen;
311
312 memset(cfg, 0, sizeof(*cfg));
4d1169c1 313 cfg->fc_nlinfo.nl_net = &init_net;
4e902c57
TG
314
315 if (rt->rt_dst.sa_family != AF_INET)
316 return -EAFNOSUPPORT;
317
318 /*
319 * Check mask for validity:
320 * a) it must be contiguous.
321 * b) destination must have all host bits clear.
322 * c) if application forgot to set correct family (AF_INET),
323 * reject request unless it is absolutely clear i.e.
324 * both family and mask are zero.
325 */
326 plen = 32;
327 addr = sk_extract_addr(&rt->rt_dst);
328 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 329 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
330
331 if (rt->rt_genmask.sa_family != AF_INET) {
332 if (mask || rt->rt_genmask.sa_family)
333 return -EAFNOSUPPORT;
334 }
335
336 if (bad_mask(mask, addr))
337 return -EINVAL;
338
339 plen = inet_mask_len(mask);
340 }
341
342 cfg->fc_dst_len = plen;
343 cfg->fc_dst = addr;
344
345 if (cmd != SIOCDELRT) {
346 cfg->fc_nlflags = NLM_F_CREATE;
347 cfg->fc_protocol = RTPROT_BOOT;
348 }
349
350 if (rt->rt_metric)
351 cfg->fc_priority = rt->rt_metric - 1;
352
353 if (rt->rt_flags & RTF_REJECT) {
354 cfg->fc_scope = RT_SCOPE_HOST;
355 cfg->fc_type = RTN_UNREACHABLE;
356 return 0;
357 }
358
359 cfg->fc_scope = RT_SCOPE_NOWHERE;
360 cfg->fc_type = RTN_UNICAST;
361
362 if (rt->rt_dev) {
363 char *colon;
364 struct net_device *dev;
365 char devname[IFNAMSIZ];
366
367 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
368 return -EFAULT;
369
370 devname[IFNAMSIZ-1] = 0;
371 colon = strchr(devname, ':');
372 if (colon)
373 *colon = 0;
881d966b 374 dev = __dev_get_by_name(&init_net, devname);
4e902c57
TG
375 if (!dev)
376 return -ENODEV;
377 cfg->fc_oif = dev->ifindex;
378 if (colon) {
379 struct in_ifaddr *ifa;
380 struct in_device *in_dev = __in_dev_get_rtnl(dev);
381 if (!in_dev)
382 return -ENODEV;
383 *colon = ':';
384 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
385 if (strcmp(ifa->ifa_label, devname) == 0)
386 break;
387 if (ifa == NULL)
388 return -ENODEV;
389 cfg->fc_prefsrc = ifa->ifa_local;
390 }
391 }
392
393 addr = sk_extract_addr(&rt->rt_gateway);
394 if (rt->rt_gateway.sa_family == AF_INET && addr) {
395 cfg->fc_gw = addr;
396 if (rt->rt_flags & RTF_GATEWAY &&
6b175b26 397 inet_addr_type(&init_net, addr) == RTN_UNICAST)
4e902c57
TG
398 cfg->fc_scope = RT_SCOPE_UNIVERSE;
399 }
400
401 if (cmd == SIOCDELRT)
402 return 0;
403
404 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
405 return -EINVAL;
406
407 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
408 cfg->fc_scope = RT_SCOPE_LINK;
409
410 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
411 struct nlattr *mx;
412 int len = 0;
413
414 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 415 if (mx == NULL)
4e902c57
TG
416 return -ENOMEM;
417
418 if (rt->rt_flags & RTF_MTU)
419 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
420
421 if (rt->rt_flags & RTF_WINDOW)
422 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
423
424 if (rt->rt_flags & RTF_IRTT)
425 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
426
427 cfg->fc_mx = mx;
428 cfg->fc_mx_len = len;
429 }
430
431 return 0;
432}
433
1da177e4
LT
434/*
435 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
436 */
e905a9ed 437
1da177e4
LT
438int ip_rt_ioctl(unsigned int cmd, void __user *arg)
439{
4e902c57
TG
440 struct fib_config cfg;
441 struct rtentry rt;
1da177e4 442 int err;
1da177e4
LT
443
444 switch (cmd) {
445 case SIOCADDRT: /* Add a route */
446 case SIOCDELRT: /* Delete a route */
447 if (!capable(CAP_NET_ADMIN))
448 return -EPERM;
4e902c57
TG
449
450 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 451 return -EFAULT;
4e902c57 452
1da177e4 453 rtnl_lock();
4e902c57 454 err = rtentry_to_fib_config(cmd, &rt, &cfg);
1da177e4 455 if (err == 0) {
4e902c57
TG
456 struct fib_table *tb;
457
1da177e4 458 if (cmd == SIOCDELRT) {
8ad4942c 459 tb = fib_get_table(&init_net, cfg.fc_table);
1da177e4 460 if (tb)
4e902c57
TG
461 err = tb->tb_delete(tb, &cfg);
462 else
463 err = -ESRCH;
1da177e4 464 } else {
8ad4942c 465 tb = fib_new_table(&init_net, cfg.fc_table);
1da177e4 466 if (tb)
4e902c57
TG
467 err = tb->tb_insert(tb, &cfg);
468 else
469 err = -ENOBUFS;
1da177e4 470 }
4e902c57
TG
471
472 /* allocated by rtentry_to_fib_config() */
473 kfree(cfg.fc_mx);
1da177e4
LT
474 }
475 rtnl_unlock();
476 return err;
477 }
478 return -EINVAL;
479}
480
ef7c79ed 481const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
482 [RTA_DST] = { .type = NLA_U32 },
483 [RTA_SRC] = { .type = NLA_U32 },
484 [RTA_IIF] = { .type = NLA_U32 },
485 [RTA_OIF] = { .type = NLA_U32 },
486 [RTA_GATEWAY] = { .type = NLA_U32 },
487 [RTA_PRIORITY] = { .type = NLA_U32 },
488 [RTA_PREFSRC] = { .type = NLA_U32 },
489 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 490 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57
TG
491 [RTA_PROTOINFO] = { .type = NLA_U32 },
492 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
493};
494
495static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
496 struct fib_config *cfg)
1da177e4 497{
4e902c57
TG
498 struct nlattr *attr;
499 int err, remaining;
500 struct rtmsg *rtm;
501
502 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
503 if (err < 0)
504 goto errout;
505
506 memset(cfg, 0, sizeof(*cfg));
507
508 rtm = nlmsg_data(nlh);
4e902c57 509 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
510 cfg->fc_tos = rtm->rtm_tos;
511 cfg->fc_table = rtm->rtm_table;
512 cfg->fc_protocol = rtm->rtm_protocol;
513 cfg->fc_scope = rtm->rtm_scope;
514 cfg->fc_type = rtm->rtm_type;
515 cfg->fc_flags = rtm->rtm_flags;
516 cfg->fc_nlflags = nlh->nlmsg_flags;
517
518 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
519 cfg->fc_nlinfo.nlh = nlh;
4d1169c1 520 cfg->fc_nlinfo.nl_net = &init_net;
4e902c57 521
a0ee18b9
TG
522 if (cfg->fc_type > RTN_MAX) {
523 err = -EINVAL;
524 goto errout;
525 }
526
4e902c57 527 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 528 switch (nla_type(attr)) {
4e902c57 529 case RTA_DST:
17fb2c64 530 cfg->fc_dst = nla_get_be32(attr);
4e902c57 531 break;
4e902c57
TG
532 case RTA_OIF:
533 cfg->fc_oif = nla_get_u32(attr);
534 break;
535 case RTA_GATEWAY:
17fb2c64 536 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
537 break;
538 case RTA_PRIORITY:
539 cfg->fc_priority = nla_get_u32(attr);
540 break;
541 case RTA_PREFSRC:
17fb2c64 542 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
543 break;
544 case RTA_METRICS:
545 cfg->fc_mx = nla_data(attr);
546 cfg->fc_mx_len = nla_len(attr);
547 break;
548 case RTA_MULTIPATH:
549 cfg->fc_mp = nla_data(attr);
550 cfg->fc_mp_len = nla_len(attr);
551 break;
552 case RTA_FLOW:
553 cfg->fc_flow = nla_get_u32(attr);
554 break;
4e902c57
TG
555 case RTA_TABLE:
556 cfg->fc_table = nla_get_u32(attr);
557 break;
1da177e4
LT
558 }
559 }
4e902c57 560
1da177e4 561 return 0;
4e902c57
TG
562errout:
563 return err;
1da177e4
LT
564}
565
63f3444f 566static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 567{
b854272b 568 struct net *net = skb->sk->sk_net;
4e902c57
TG
569 struct fib_config cfg;
570 struct fib_table *tb;
571 int err;
1da177e4 572
b854272b
DL
573 if (net != &init_net)
574 return -EINVAL;
575
4e902c57
TG
576 err = rtm_to_fib_config(skb, nlh, &cfg);
577 if (err < 0)
578 goto errout;
1da177e4 579
8ad4942c 580 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
581 if (tb == NULL) {
582 err = -ESRCH;
583 goto errout;
584 }
585
586 err = tb->tb_delete(tb, &cfg);
587errout:
588 return err;
1da177e4
LT
589}
590
63f3444f 591static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 592{
b854272b 593 struct net *net = skb->sk->sk_net;
4e902c57
TG
594 struct fib_config cfg;
595 struct fib_table *tb;
596 int err;
1da177e4 597
b854272b
DL
598 if (net != &init_net)
599 return -EINVAL;
600
4e902c57
TG
601 err = rtm_to_fib_config(skb, nlh, &cfg);
602 if (err < 0)
603 goto errout;
1da177e4 604
8ad4942c 605 tb = fib_new_table(&init_net, cfg.fc_table);
4e902c57
TG
606 if (tb == NULL) {
607 err = -ENOBUFS;
608 goto errout;
609 }
610
611 err = tb->tb_insert(tb, &cfg);
612errout:
613 return err;
1da177e4
LT
614}
615
63f3444f 616static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 617{
b854272b 618 struct net *net = skb->sk->sk_net;
1af5a8c4
PM
619 unsigned int h, s_h;
620 unsigned int e = 0, s_e;
1da177e4 621 struct fib_table *tb;
1af5a8c4
PM
622 struct hlist_node *node;
623 int dumped = 0;
1da177e4 624
b854272b
DL
625 if (net != &init_net)
626 return 0;
627
be403ea1
TG
628 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
629 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
630 return ip_rt_dump(skb, cb);
631
1af5a8c4
PM
632 s_h = cb->args[0];
633 s_e = cb->args[1];
634
635 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
636 e = 0;
637 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
638 if (e < s_e)
639 goto next;
640 if (dumped)
641 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 642 2 * sizeof(cb->args[0]));
1af5a8c4
PM
643 if (tb->tb_dump(tb, skb, cb) < 0)
644 goto out;
645 dumped = 1;
646next:
647 e++;
648 }
1da177e4 649 }
1af5a8c4
PM
650out:
651 cb->args[1] = e;
652 cb->args[0] = h;
1da177e4
LT
653
654 return skb->len;
655}
656
657/* Prepare and feed intra-kernel routing request.
658 Really, it should be netlink message, but :-( netlink
659 can be not configured, so that we feed it directly
660 to fib engine. It is legal, because all events occur
661 only when netlink is already locked.
662 */
663
81f7bf6c 664static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 665{
4e902c57
TG
666 struct fib_table *tb;
667 struct fib_config cfg = {
668 .fc_protocol = RTPROT_KERNEL,
669 .fc_type = type,
670 .fc_dst = dst,
671 .fc_dst_len = dst_len,
672 .fc_prefsrc = ifa->ifa_local,
673 .fc_oif = ifa->ifa_dev->dev->ifindex,
674 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1
DL
675 .fc_nlinfo = {
676 .nl_net = &init_net,
677 },
4e902c57 678 };
1da177e4
LT
679
680 if (type == RTN_UNICAST)
8ad4942c 681 tb = fib_new_table(&init_net, RT_TABLE_MAIN);
1da177e4 682 else
8ad4942c 683 tb = fib_new_table(&init_net, RT_TABLE_LOCAL);
1da177e4
LT
684
685 if (tb == NULL)
686 return;
687
4e902c57 688 cfg.fc_table = tb->tb_id;
1da177e4 689
4e902c57
TG
690 if (type != RTN_LOCAL)
691 cfg.fc_scope = RT_SCOPE_LINK;
692 else
693 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
694
695 if (cmd == RTM_NEWROUTE)
4e902c57 696 tb->tb_insert(tb, &cfg);
1da177e4 697 else
4e902c57 698 tb->tb_delete(tb, &cfg);
1da177e4
LT
699}
700
0ff60a45 701void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
702{
703 struct in_device *in_dev = ifa->ifa_dev;
704 struct net_device *dev = in_dev->dev;
705 struct in_ifaddr *prim = ifa;
a144ea4b
AV
706 __be32 mask = ifa->ifa_mask;
707 __be32 addr = ifa->ifa_local;
708 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
709
710 if (ifa->ifa_flags&IFA_F_SECONDARY) {
711 prim = inet_ifa_byprefix(in_dev, prefix, mask);
712 if (prim == NULL) {
713 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
714 return;
715 }
716 }
717
718 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
719
720 if (!(dev->flags&IFF_UP))
721 return;
722
723 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 724 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
725 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
726
f97c1e0c 727 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
728 (prefix != addr || ifa->ifa_prefixlen < 32)) {
729 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
730 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
731
732 /* Add network specific broadcasts, when it takes a sense */
733 if (ifa->ifa_prefixlen < 31) {
734 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
735 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
736 }
737 }
738}
739
740static void fib_del_ifaddr(struct in_ifaddr *ifa)
741{
742 struct in_device *in_dev = ifa->ifa_dev;
743 struct net_device *dev = in_dev->dev;
744 struct in_ifaddr *ifa1;
745 struct in_ifaddr *prim = ifa;
a144ea4b
AV
746 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
747 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
748#define LOCAL_OK 1
749#define BRD_OK 2
750#define BRD0_OK 4
751#define BRD1_OK 8
752 unsigned ok = 0;
753
754 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
755 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
756 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
757 else {
758 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
759 if (prim == NULL) {
760 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
761 return;
762 }
763 }
764
765 /* Deletion is more complicated than add.
766 We should take care of not to delete too much :-)
767
768 Scan address list to be sure that addresses are really gone.
769 */
770
771 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
772 if (ifa->ifa_local == ifa1->ifa_local)
773 ok |= LOCAL_OK;
774 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
775 ok |= BRD_OK;
776 if (brd == ifa1->ifa_broadcast)
777 ok |= BRD1_OK;
778 if (any == ifa1->ifa_broadcast)
779 ok |= BRD0_OK;
780 }
781
782 if (!(ok&BRD_OK))
783 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
784 if (!(ok&BRD1_OK))
785 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
786 if (!(ok&BRD0_OK))
787 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
788 if (!(ok&LOCAL_OK)) {
789 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
790
791 /* Check, that this local address finally disappeared. */
6b175b26 792 if (inet_addr_type(&init_net, ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
793 /* And the last, but not the least thing.
794 We must flush stray FIB entries.
795
796 First of all, we scan fib_info list searching
797 for stray nexthop entries, then ignite fib_flush.
798 */
799 if (fib_sync_down(ifa->ifa_local, NULL, 0))
800 fib_flush();
801 }
802 }
803#undef LOCAL_OK
804#undef BRD_OK
805#undef BRD0_OK
806#undef BRD1_OK
807}
808
246955fe
RO
809static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
810{
e905a9ed 811
246955fe 812 struct fib_result res;
5f300893 813 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 814 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
815 .tos = frn->fl_tos,
816 .scope = frn->fl_scope } } };
1194ed0a 817
912a41a4
SV
818#ifdef CONFIG_IP_MULTIPLE_TABLES
819 res.r = NULL;
820#endif
821
1194ed0a 822 frn->err = -ENOENT;
246955fe
RO
823 if (tb) {
824 local_bh_disable();
825
826 frn->tb_id = tb->tb_id;
827 frn->err = tb->tb_lookup(tb, &fl, &res);
828
829 if (!frn->err) {
830 frn->prefixlen = res.prefixlen;
831 frn->nh_sel = res.nh_sel;
832 frn->type = res.type;
833 frn->scope = res.scope;
1194ed0a 834 fib_res_put(&res);
246955fe
RO
835 }
836 local_bh_enable();
837 }
838}
839
28f7b036 840static void nl_fib_input(struct sk_buff *skb)
246955fe 841{
246955fe 842 struct fib_result_nl *frn;
28f7b036 843 struct nlmsghdr *nlh;
246955fe 844 struct fib_table *tb;
28f7b036 845 u32 pid;
1194ed0a 846
b529ccf2 847 nlh = nlmsg_hdr(skb);
ea86575e 848 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 849 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 850 return;
d883a036
DL
851
852 skb = skb_clone(skb, GFP_KERNEL);
853 if (skb == NULL)
854 return;
855 nlh = nlmsg_hdr(skb);
e905a9ed 856
246955fe 857 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
8ad4942c 858 tb = fib_get_table(&init_net, frn->tb_id_in);
246955fe
RO
859
860 nl_fib_lookup(frn, tb);
e905a9ed 861
1194ed0a 862 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 863 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 864 NETLINK_CB(skb).dst_group = 0; /* unicast */
cd40b7d3 865 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 866}
246955fe 867
7b1a74fd 868static int nl_fib_lookup_init(struct net *net)
246955fe 869{
7b1a74fd 870 fibnl = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
cd40b7d3 871 nl_fib_input, NULL, THIS_MODULE);
7b1a74fd
DL
872 if (fibnl == NULL)
873 return -EAFNOSUPPORT;
874 return 0;
875}
876
877static void nl_fib_lookup_exit(struct net *net)
878{
879 sock_put(fibnl);
246955fe
RO
880}
881
1da177e4
LT
882static void fib_disable_ip(struct net_device *dev, int force)
883{
884 if (fib_sync_down(0, dev, force))
885 fib_flush();
886 rt_cache_flush(0);
887 arp_ifdown(dev);
888}
889
890static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
891{
892 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
893
894 switch (event) {
895 case NETDEV_UP:
896 fib_add_ifaddr(ifa);
897#ifdef CONFIG_IP_ROUTE_MULTIPATH
898 fib_sync_up(ifa->ifa_dev->dev);
899#endif
900 rt_cache_flush(-1);
901 break;
902 case NETDEV_DOWN:
903 fib_del_ifaddr(ifa);
9fcc2e8a 904 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
905 /* Last address was deleted from this interface.
906 Disable IP.
907 */
908 fib_disable_ip(ifa->ifa_dev->dev, 1);
909 } else {
910 rt_cache_flush(-1);
911 }
912 break;
913 }
914 return NOTIFY_DONE;
915}
916
917static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
918{
919 struct net_device *dev = ptr;
e5ed6399 920 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4 921
e9dc8653
EB
922 if (dev->nd_net != &init_net)
923 return NOTIFY_DONE;
924
1da177e4
LT
925 if (event == NETDEV_UNREGISTER) {
926 fib_disable_ip(dev, 2);
927 return NOTIFY_DONE;
928 }
929
930 if (!in_dev)
931 return NOTIFY_DONE;
932
933 switch (event) {
934 case NETDEV_UP:
935 for_ifa(in_dev) {
936 fib_add_ifaddr(ifa);
937 } endfor_ifa(in_dev);
938#ifdef CONFIG_IP_ROUTE_MULTIPATH
939 fib_sync_up(dev);
940#endif
941 rt_cache_flush(-1);
942 break;
943 case NETDEV_DOWN:
944 fib_disable_ip(dev, 0);
945 break;
946 case NETDEV_CHANGEMTU:
947 case NETDEV_CHANGE:
948 rt_cache_flush(0);
949 break;
950 }
951 return NOTIFY_DONE;
952}
953
954static struct notifier_block fib_inetaddr_notifier = {
955 .notifier_call =fib_inetaddr_event,
956};
957
958static struct notifier_block fib_netdev_notifier = {
959 .notifier_call =fib_netdev_event,
960};
961
7b1a74fd 962static int __net_init ip_fib_net_init(struct net *net)
1da177e4 963{
1af5a8c4
PM
964 unsigned int i;
965
966 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
967 INIT_HLIST_HEAD(&fib_table_hash[i]);
c3e9a353 968
7b1a74fd
DL
969 return fib4_rules_init(net);
970}
1da177e4 971
7b1a74fd
DL
972static void __net_exit ip_fib_net_exit(struct net *net)
973{
974 unsigned int i;
975
976#ifdef CONFIG_IP_MULTIPLE_TABLES
977 fib4_rules_exit(net);
978#endif
979
980 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
981 struct fib_table *tb;
982 struct hlist_head *head;
983 struct hlist_node *node, *tmp;
63f3444f 984
7b1a74fd
DL
985 head = &fib_table_hash[i];
986 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
987 hlist_del(node);
988 tb->tb_flush(tb);
989 kfree(tb);
990 }
991 }
992}
993
994static int __net_init fib_net_init(struct net *net)
995{
996 int error;
997
998 error = 0;
999 if (net != &init_net)
1000 goto out;
1001
1002 error = ip_fib_net_init(net);
1003 if (error < 0)
1004 goto out;
1005 error = nl_fib_lookup_init(net);
1006 if (error < 0)
1007 goto out_nlfl;
1008 error = fib_proc_init(net);
1009 if (error < 0)
1010 goto out_proc;
1011out:
1012 return error;
1013
1014out_proc:
1015 nl_fib_lookup_exit(net);
1016out_nlfl:
1017 ip_fib_net_exit(net);
1018 goto out;
1019}
1020
1021static void __net_exit fib_net_exit(struct net *net)
1022{
1023 fib_proc_exit(net);
1024 nl_fib_lookup_exit(net);
1025 ip_fib_net_exit(net);
1026}
1027
1028static struct pernet_operations fib_net_ops = {
1029 .init = fib_net_init,
1030 .exit = fib_net_exit,
1031};
1032
1033void __init ip_fib_init(void)
1034{
63f3444f
TG
1035 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1036 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1037 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1038
1039 register_pernet_subsys(&fib_net_ops);
1040 register_netdevice_notifier(&fib_netdev_notifier);
1041 register_inetaddr_notifier(&fib_inetaddr_notifier);
1da177e4
LT
1042}
1043
1044EXPORT_SYMBOL(inet_addr_type);
05538116 1045EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1046EXPORT_SYMBOL(ip_dev_find);