]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[NETNS]: Place fib tables into netns.
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
4fc268d2 22#include <linux/capability.h>
1da177e4
LT
23#include <linux/types.h>
24#include <linux/kernel.h>
1da177e4
LT
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
14c85021 32#include <linux/inetdevice.h>
1da177e4 33#include <linux/netdevice.h>
1823730f 34#include <linux/if_addr.h>
1da177e4
LT
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
1da177e4 37#include <linux/init.h>
1af5a8c4 38#include <linux/list.h>
1da177e4
LT
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
63f3444f 48#include <net/rtnetlink.h>
1da177e4
LT
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
28f7b036
DM
52static struct sock *fibnl;
53
1da177e4
LT
54#ifndef CONFIG_IP_MULTIPLE_TABLES
55
7b1a74fd 56static int __net_init fib4_rules_init(struct net *net)
c3e9a353 57{
93456b6d
DL
58 struct fib_table *local_table, *main_table;
59
60 local_table = fib_hash_init(RT_TABLE_LOCAL);
61 if (local_table == NULL)
dbb50165
DL
62 return -ENOMEM;
63
93456b6d
DL
64 main_table = fib_hash_init(RT_TABLE_MAIN);
65 if (main_table == NULL)
dbb50165
DL
66 goto fail;
67
93456b6d 68 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 69 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 70 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 71 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
72 return 0;
73
74fail:
93456b6d 75 kfree(local_table);
dbb50165 76 return -ENOMEM;
c3e9a353 77}
1af5a8c4 78#else
1da177e4 79
8ad4942c 80struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
81{
82 struct fib_table *tb;
1af5a8c4 83 unsigned int h;
1da177e4 84
1af5a8c4
PM
85 if (id == 0)
86 id = RT_TABLE_MAIN;
8ad4942c 87 tb = fib_get_table(net, id);
1af5a8c4
PM
88 if (tb)
89 return tb;
1da177e4
LT
90 tb = fib_hash_init(id);
91 if (!tb)
92 return NULL;
1af5a8c4 93 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 94 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
95 return tb;
96}
97
8ad4942c 98struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
99{
100 struct fib_table *tb;
101 struct hlist_node *node;
e4aef8ae 102 struct hlist_head *head;
1af5a8c4 103 unsigned int h;
1da177e4 104
1af5a8c4
PM
105 if (id == 0)
106 id = RT_TABLE_MAIN;
107 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 108
1af5a8c4 109 rcu_read_lock();
e4aef8ae
DL
110 head = &net->ipv4.fib_table_hash[h];
111 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
112 if (tb->tb_id == id) {
113 rcu_read_unlock();
114 return tb;
115 }
116 }
117 rcu_read_unlock();
118 return NULL;
119}
1da177e4
LT
120#endif /* CONFIG_IP_MULTIPLE_TABLES */
121
e4aef8ae 122static void fib_flush(struct net *net)
1da177e4
LT
123{
124 int flushed = 0;
1da177e4 125 struct fib_table *tb;
1af5a8c4 126 struct hlist_node *node;
e4aef8ae 127 struct hlist_head *head;
1af5a8c4 128 unsigned int h;
1da177e4 129
1af5a8c4 130 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
131 head = &net->ipv4.fib_table_hash[h];
132 hlist_for_each_entry(tb, node, head, tb_hlist)
1af5a8c4 133 flushed += tb->tb_flush(tb);
1da177e4 134 }
1da177e4
LT
135
136 if (flushed)
137 rt_cache_flush(-1);
138}
139
140/*
141 * Find the first device with a given source address.
142 */
143
60cad5da 144struct net_device * ip_dev_find(__be32 addr)
1da177e4
LT
145{
146 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
147 struct fib_result res;
148 struct net_device *dev = NULL;
03cf786c 149 struct fib_table *local_table;
1da177e4
LT
150
151#ifdef CONFIG_IP_MULTIPLE_TABLES
152 res.r = NULL;
153#endif
154
8ad4942c 155 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
03cf786c 156 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
157 return NULL;
158 if (res.type != RTN_LOCAL)
159 goto out;
160 dev = FIB_RES_DEV(res);
161
162 if (dev)
163 dev_hold(dev);
164out:
165 fib_res_put(&res);
166 return dev;
167}
168
05538116
LAT
169/*
170 * Find address type as if only "dev" was present in the system. If
171 * on_dev is NULL then all interfaces are taken into consideration.
172 */
6b175b26
EB
173static inline unsigned __inet_dev_addr_type(struct net *net,
174 const struct net_device *dev,
05538116 175 __be32 addr)
1da177e4
LT
176{
177 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
178 struct fib_result res;
179 unsigned ret = RTN_BROADCAST;
03cf786c 180 struct fib_table *local_table;
1da177e4 181
f97c1e0c 182 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
1da177e4 183 return RTN_BROADCAST;
f97c1e0c 184 if (ipv4_is_multicast(addr))
1da177e4
LT
185 return RTN_MULTICAST;
186
187#ifdef CONFIG_IP_MULTIPLE_TABLES
188 res.r = NULL;
189#endif
e905a9ed 190
6b175b26 191 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 192 if (local_table) {
1da177e4 193 ret = RTN_UNICAST;
03cf786c 194 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
195 if (!dev || dev == res.fi->fib_dev)
196 ret = res.type;
1da177e4
LT
197 fib_res_put(&res);
198 }
199 }
200 return ret;
201}
202
6b175b26 203unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 204{
6b175b26 205 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
206}
207
6b175b26
EB
208unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
209 __be32 addr)
05538116 210{
6b175b26 211 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
212}
213
1da177e4
LT
214/* Given (packet source, input interface) and optional (dst, oif, tos):
215 - (main) check, that source is valid i.e. not broadcast or our local
216 address.
217 - figure out what "logical" interface this packet arrived
218 and calculate "specific destination" address.
219 - check, that packet arrived from expected physical interface.
220 */
221
d9c9df8c
AV
222int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
223 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
224{
225 struct in_device *in_dev;
226 struct flowi fl = { .nl_u = { .ip4_u =
227 { .daddr = src,
228 .saddr = dst,
229 .tos = tos } },
230 .iif = oif };
231 struct fib_result res;
232 int no_addr, rpf;
233 int ret;
234
235 no_addr = rpf = 0;
236 rcu_read_lock();
e5ed6399 237 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
238 if (in_dev) {
239 no_addr = in_dev->ifa_list == NULL;
240 rpf = IN_DEV_RPFILTER(in_dev);
241 }
242 rcu_read_unlock();
243
244 if (in_dev == NULL)
245 goto e_inval;
246
247 if (fib_lookup(&fl, &res))
248 goto last_resort;
249 if (res.type != RTN_UNICAST)
250 goto e_inval_res;
251 *spec_dst = FIB_RES_PREFSRC(res);
252 fib_combine_itag(itag, &res);
253#ifdef CONFIG_IP_ROUTE_MULTIPATH
254 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
255#else
256 if (FIB_RES_DEV(res) == dev)
257#endif
258 {
259 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
260 fib_res_put(&res);
261 return ret;
262 }
263 fib_res_put(&res);
264 if (no_addr)
265 goto last_resort;
266 if (rpf)
267 goto e_inval;
268 fl.oif = dev->ifindex;
269
270 ret = 0;
271 if (fib_lookup(&fl, &res) == 0) {
272 if (res.type == RTN_UNICAST) {
273 *spec_dst = FIB_RES_PREFSRC(res);
274 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
275 }
276 fib_res_put(&res);
277 }
278 return ret;
279
280last_resort:
281 if (rpf)
282 goto e_inval;
283 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
284 *itag = 0;
285 return 0;
286
287e_inval_res:
288 fib_res_put(&res);
289e_inval:
290 return -EINVAL;
291}
292
81f7bf6c 293static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
294{
295 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
296}
297
298static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
299{
300 struct nlattr *nla;
301
302 nla = (struct nlattr *) ((char *) mx + len);
303 nla->nla_type = type;
304 nla->nla_len = nla_attr_size(4);
305 *(u32 *) nla_data(nla) = value;
306
307 return len + nla_total_size(4);
308}
309
310static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
311 struct fib_config *cfg)
312{
6d85c10a 313 __be32 addr;
4e902c57
TG
314 int plen;
315
316 memset(cfg, 0, sizeof(*cfg));
4d1169c1 317 cfg->fc_nlinfo.nl_net = &init_net;
4e902c57
TG
318
319 if (rt->rt_dst.sa_family != AF_INET)
320 return -EAFNOSUPPORT;
321
322 /*
323 * Check mask for validity:
324 * a) it must be contiguous.
325 * b) destination must have all host bits clear.
326 * c) if application forgot to set correct family (AF_INET),
327 * reject request unless it is absolutely clear i.e.
328 * both family and mask are zero.
329 */
330 plen = 32;
331 addr = sk_extract_addr(&rt->rt_dst);
332 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 333 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
334
335 if (rt->rt_genmask.sa_family != AF_INET) {
336 if (mask || rt->rt_genmask.sa_family)
337 return -EAFNOSUPPORT;
338 }
339
340 if (bad_mask(mask, addr))
341 return -EINVAL;
342
343 plen = inet_mask_len(mask);
344 }
345
346 cfg->fc_dst_len = plen;
347 cfg->fc_dst = addr;
348
349 if (cmd != SIOCDELRT) {
350 cfg->fc_nlflags = NLM_F_CREATE;
351 cfg->fc_protocol = RTPROT_BOOT;
352 }
353
354 if (rt->rt_metric)
355 cfg->fc_priority = rt->rt_metric - 1;
356
357 if (rt->rt_flags & RTF_REJECT) {
358 cfg->fc_scope = RT_SCOPE_HOST;
359 cfg->fc_type = RTN_UNREACHABLE;
360 return 0;
361 }
362
363 cfg->fc_scope = RT_SCOPE_NOWHERE;
364 cfg->fc_type = RTN_UNICAST;
365
366 if (rt->rt_dev) {
367 char *colon;
368 struct net_device *dev;
369 char devname[IFNAMSIZ];
370
371 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
372 return -EFAULT;
373
374 devname[IFNAMSIZ-1] = 0;
375 colon = strchr(devname, ':');
376 if (colon)
377 *colon = 0;
881d966b 378 dev = __dev_get_by_name(&init_net, devname);
4e902c57
TG
379 if (!dev)
380 return -ENODEV;
381 cfg->fc_oif = dev->ifindex;
382 if (colon) {
383 struct in_ifaddr *ifa;
384 struct in_device *in_dev = __in_dev_get_rtnl(dev);
385 if (!in_dev)
386 return -ENODEV;
387 *colon = ':';
388 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
389 if (strcmp(ifa->ifa_label, devname) == 0)
390 break;
391 if (ifa == NULL)
392 return -ENODEV;
393 cfg->fc_prefsrc = ifa->ifa_local;
394 }
395 }
396
397 addr = sk_extract_addr(&rt->rt_gateway);
398 if (rt->rt_gateway.sa_family == AF_INET && addr) {
399 cfg->fc_gw = addr;
400 if (rt->rt_flags & RTF_GATEWAY &&
6b175b26 401 inet_addr_type(&init_net, addr) == RTN_UNICAST)
4e902c57
TG
402 cfg->fc_scope = RT_SCOPE_UNIVERSE;
403 }
404
405 if (cmd == SIOCDELRT)
406 return 0;
407
408 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
409 return -EINVAL;
410
411 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
412 cfg->fc_scope = RT_SCOPE_LINK;
413
414 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
415 struct nlattr *mx;
416 int len = 0;
417
418 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 419 if (mx == NULL)
4e902c57
TG
420 return -ENOMEM;
421
422 if (rt->rt_flags & RTF_MTU)
423 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
424
425 if (rt->rt_flags & RTF_WINDOW)
426 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
427
428 if (rt->rt_flags & RTF_IRTT)
429 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
430
431 cfg->fc_mx = mx;
432 cfg->fc_mx_len = len;
433 }
434
435 return 0;
436}
437
1da177e4
LT
438/*
439 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
440 */
e905a9ed 441
1da177e4
LT
442int ip_rt_ioctl(unsigned int cmd, void __user *arg)
443{
4e902c57
TG
444 struct fib_config cfg;
445 struct rtentry rt;
1da177e4 446 int err;
1da177e4
LT
447
448 switch (cmd) {
449 case SIOCADDRT: /* Add a route */
450 case SIOCDELRT: /* Delete a route */
451 if (!capable(CAP_NET_ADMIN))
452 return -EPERM;
4e902c57
TG
453
454 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 455 return -EFAULT;
4e902c57 456
1da177e4 457 rtnl_lock();
4e902c57 458 err = rtentry_to_fib_config(cmd, &rt, &cfg);
1da177e4 459 if (err == 0) {
4e902c57
TG
460 struct fib_table *tb;
461
1da177e4 462 if (cmd == SIOCDELRT) {
8ad4942c 463 tb = fib_get_table(&init_net, cfg.fc_table);
1da177e4 464 if (tb)
4e902c57
TG
465 err = tb->tb_delete(tb, &cfg);
466 else
467 err = -ESRCH;
1da177e4 468 } else {
8ad4942c 469 tb = fib_new_table(&init_net, cfg.fc_table);
1da177e4 470 if (tb)
4e902c57
TG
471 err = tb->tb_insert(tb, &cfg);
472 else
473 err = -ENOBUFS;
1da177e4 474 }
4e902c57
TG
475
476 /* allocated by rtentry_to_fib_config() */
477 kfree(cfg.fc_mx);
1da177e4
LT
478 }
479 rtnl_unlock();
480 return err;
481 }
482 return -EINVAL;
483}
484
ef7c79ed 485const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
486 [RTA_DST] = { .type = NLA_U32 },
487 [RTA_SRC] = { .type = NLA_U32 },
488 [RTA_IIF] = { .type = NLA_U32 },
489 [RTA_OIF] = { .type = NLA_U32 },
490 [RTA_GATEWAY] = { .type = NLA_U32 },
491 [RTA_PRIORITY] = { .type = NLA_U32 },
492 [RTA_PREFSRC] = { .type = NLA_U32 },
493 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 494 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57
TG
495 [RTA_PROTOINFO] = { .type = NLA_U32 },
496 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
497};
498
499static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
500 struct fib_config *cfg)
1da177e4 501{
4e902c57
TG
502 struct nlattr *attr;
503 int err, remaining;
504 struct rtmsg *rtm;
505
506 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
507 if (err < 0)
508 goto errout;
509
510 memset(cfg, 0, sizeof(*cfg));
511
512 rtm = nlmsg_data(nlh);
4e902c57 513 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
514 cfg->fc_tos = rtm->rtm_tos;
515 cfg->fc_table = rtm->rtm_table;
516 cfg->fc_protocol = rtm->rtm_protocol;
517 cfg->fc_scope = rtm->rtm_scope;
518 cfg->fc_type = rtm->rtm_type;
519 cfg->fc_flags = rtm->rtm_flags;
520 cfg->fc_nlflags = nlh->nlmsg_flags;
521
522 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
523 cfg->fc_nlinfo.nlh = nlh;
4d1169c1 524 cfg->fc_nlinfo.nl_net = &init_net;
4e902c57 525
a0ee18b9
TG
526 if (cfg->fc_type > RTN_MAX) {
527 err = -EINVAL;
528 goto errout;
529 }
530
4e902c57 531 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 532 switch (nla_type(attr)) {
4e902c57 533 case RTA_DST:
17fb2c64 534 cfg->fc_dst = nla_get_be32(attr);
4e902c57 535 break;
4e902c57
TG
536 case RTA_OIF:
537 cfg->fc_oif = nla_get_u32(attr);
538 break;
539 case RTA_GATEWAY:
17fb2c64 540 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
541 break;
542 case RTA_PRIORITY:
543 cfg->fc_priority = nla_get_u32(attr);
544 break;
545 case RTA_PREFSRC:
17fb2c64 546 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
547 break;
548 case RTA_METRICS:
549 cfg->fc_mx = nla_data(attr);
550 cfg->fc_mx_len = nla_len(attr);
551 break;
552 case RTA_MULTIPATH:
553 cfg->fc_mp = nla_data(attr);
554 cfg->fc_mp_len = nla_len(attr);
555 break;
556 case RTA_FLOW:
557 cfg->fc_flow = nla_get_u32(attr);
558 break;
4e902c57
TG
559 case RTA_TABLE:
560 cfg->fc_table = nla_get_u32(attr);
561 break;
1da177e4
LT
562 }
563 }
4e902c57 564
1da177e4 565 return 0;
4e902c57
TG
566errout:
567 return err;
1da177e4
LT
568}
569
63f3444f 570static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 571{
b854272b 572 struct net *net = skb->sk->sk_net;
4e902c57
TG
573 struct fib_config cfg;
574 struct fib_table *tb;
575 int err;
1da177e4 576
b854272b
DL
577 if (net != &init_net)
578 return -EINVAL;
579
4e902c57
TG
580 err = rtm_to_fib_config(skb, nlh, &cfg);
581 if (err < 0)
582 goto errout;
1da177e4 583
8ad4942c 584 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
585 if (tb == NULL) {
586 err = -ESRCH;
587 goto errout;
588 }
589
590 err = tb->tb_delete(tb, &cfg);
591errout:
592 return err;
1da177e4
LT
593}
594
63f3444f 595static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 596{
b854272b 597 struct net *net = skb->sk->sk_net;
4e902c57
TG
598 struct fib_config cfg;
599 struct fib_table *tb;
600 int err;
1da177e4 601
b854272b
DL
602 if (net != &init_net)
603 return -EINVAL;
604
4e902c57
TG
605 err = rtm_to_fib_config(skb, nlh, &cfg);
606 if (err < 0)
607 goto errout;
1da177e4 608
8ad4942c 609 tb = fib_new_table(&init_net, cfg.fc_table);
4e902c57
TG
610 if (tb == NULL) {
611 err = -ENOBUFS;
612 goto errout;
613 }
614
615 err = tb->tb_insert(tb, &cfg);
616errout:
617 return err;
1da177e4
LT
618}
619
63f3444f 620static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 621{
b854272b 622 struct net *net = skb->sk->sk_net;
1af5a8c4
PM
623 unsigned int h, s_h;
624 unsigned int e = 0, s_e;
1da177e4 625 struct fib_table *tb;
1af5a8c4 626 struct hlist_node *node;
e4aef8ae 627 struct hlist_head *head;
1af5a8c4 628 int dumped = 0;
1da177e4 629
b854272b
DL
630 if (net != &init_net)
631 return 0;
632
be403ea1
TG
633 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
634 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
635 return ip_rt_dump(skb, cb);
636
1af5a8c4
PM
637 s_h = cb->args[0];
638 s_e = cb->args[1];
639
640 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
641 e = 0;
e4aef8ae
DL
642 head = &net->ipv4.fib_table_hash[h];
643 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
644 if (e < s_e)
645 goto next;
646 if (dumped)
647 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 648 2 * sizeof(cb->args[0]));
1af5a8c4
PM
649 if (tb->tb_dump(tb, skb, cb) < 0)
650 goto out;
651 dumped = 1;
652next:
653 e++;
654 }
1da177e4 655 }
1af5a8c4
PM
656out:
657 cb->args[1] = e;
658 cb->args[0] = h;
1da177e4
LT
659
660 return skb->len;
661}
662
663/* Prepare and feed intra-kernel routing request.
664 Really, it should be netlink message, but :-( netlink
665 can be not configured, so that we feed it directly
666 to fib engine. It is legal, because all events occur
667 only when netlink is already locked.
668 */
669
81f7bf6c 670static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 671{
4e902c57
TG
672 struct fib_table *tb;
673 struct fib_config cfg = {
674 .fc_protocol = RTPROT_KERNEL,
675 .fc_type = type,
676 .fc_dst = dst,
677 .fc_dst_len = dst_len,
678 .fc_prefsrc = ifa->ifa_local,
679 .fc_oif = ifa->ifa_dev->dev->ifindex,
680 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1
DL
681 .fc_nlinfo = {
682 .nl_net = &init_net,
683 },
4e902c57 684 };
1da177e4
LT
685
686 if (type == RTN_UNICAST)
8ad4942c 687 tb = fib_new_table(&init_net, RT_TABLE_MAIN);
1da177e4 688 else
8ad4942c 689 tb = fib_new_table(&init_net, RT_TABLE_LOCAL);
1da177e4
LT
690
691 if (tb == NULL)
692 return;
693
4e902c57 694 cfg.fc_table = tb->tb_id;
1da177e4 695
4e902c57
TG
696 if (type != RTN_LOCAL)
697 cfg.fc_scope = RT_SCOPE_LINK;
698 else
699 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
700
701 if (cmd == RTM_NEWROUTE)
4e902c57 702 tb->tb_insert(tb, &cfg);
1da177e4 703 else
4e902c57 704 tb->tb_delete(tb, &cfg);
1da177e4
LT
705}
706
0ff60a45 707void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
708{
709 struct in_device *in_dev = ifa->ifa_dev;
710 struct net_device *dev = in_dev->dev;
711 struct in_ifaddr *prim = ifa;
a144ea4b
AV
712 __be32 mask = ifa->ifa_mask;
713 __be32 addr = ifa->ifa_local;
714 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
715
716 if (ifa->ifa_flags&IFA_F_SECONDARY) {
717 prim = inet_ifa_byprefix(in_dev, prefix, mask);
718 if (prim == NULL) {
719 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
720 return;
721 }
722 }
723
724 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
725
726 if (!(dev->flags&IFF_UP))
727 return;
728
729 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 730 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
731 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
732
f97c1e0c 733 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
734 (prefix != addr || ifa->ifa_prefixlen < 32)) {
735 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
736 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
737
738 /* Add network specific broadcasts, when it takes a sense */
739 if (ifa->ifa_prefixlen < 31) {
740 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
741 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
742 }
743 }
744}
745
746static void fib_del_ifaddr(struct in_ifaddr *ifa)
747{
748 struct in_device *in_dev = ifa->ifa_dev;
749 struct net_device *dev = in_dev->dev;
750 struct in_ifaddr *ifa1;
751 struct in_ifaddr *prim = ifa;
a144ea4b
AV
752 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
753 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
754#define LOCAL_OK 1
755#define BRD_OK 2
756#define BRD0_OK 4
757#define BRD1_OK 8
758 unsigned ok = 0;
759
760 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
761 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
762 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
763 else {
764 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
765 if (prim == NULL) {
766 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
767 return;
768 }
769 }
770
771 /* Deletion is more complicated than add.
772 We should take care of not to delete too much :-)
773
774 Scan address list to be sure that addresses are really gone.
775 */
776
777 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
778 if (ifa->ifa_local == ifa1->ifa_local)
779 ok |= LOCAL_OK;
780 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
781 ok |= BRD_OK;
782 if (brd == ifa1->ifa_broadcast)
783 ok |= BRD1_OK;
784 if (any == ifa1->ifa_broadcast)
785 ok |= BRD0_OK;
786 }
787
788 if (!(ok&BRD_OK))
789 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
790 if (!(ok&BRD1_OK))
791 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
792 if (!(ok&BRD0_OK))
793 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
794 if (!(ok&LOCAL_OK)) {
795 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
796
797 /* Check, that this local address finally disappeared. */
6b175b26 798 if (inet_addr_type(&init_net, ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
799 /* And the last, but not the least thing.
800 We must flush stray FIB entries.
801
802 First of all, we scan fib_info list searching
803 for stray nexthop entries, then ignite fib_flush.
804 */
805 if (fib_sync_down(ifa->ifa_local, NULL, 0))
e4aef8ae 806 fib_flush(&init_net);
1da177e4
LT
807 }
808 }
809#undef LOCAL_OK
810#undef BRD_OK
811#undef BRD0_OK
812#undef BRD1_OK
813}
814
246955fe
RO
815static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
816{
e905a9ed 817
246955fe 818 struct fib_result res;
5f300893 819 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 820 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
821 .tos = frn->fl_tos,
822 .scope = frn->fl_scope } } };
1194ed0a 823
912a41a4
SV
824#ifdef CONFIG_IP_MULTIPLE_TABLES
825 res.r = NULL;
826#endif
827
1194ed0a 828 frn->err = -ENOENT;
246955fe
RO
829 if (tb) {
830 local_bh_disable();
831
832 frn->tb_id = tb->tb_id;
833 frn->err = tb->tb_lookup(tb, &fl, &res);
834
835 if (!frn->err) {
836 frn->prefixlen = res.prefixlen;
837 frn->nh_sel = res.nh_sel;
838 frn->type = res.type;
839 frn->scope = res.scope;
1194ed0a 840 fib_res_put(&res);
246955fe
RO
841 }
842 local_bh_enable();
843 }
844}
845
28f7b036 846static void nl_fib_input(struct sk_buff *skb)
246955fe 847{
246955fe 848 struct fib_result_nl *frn;
28f7b036 849 struct nlmsghdr *nlh;
246955fe 850 struct fib_table *tb;
28f7b036 851 u32 pid;
1194ed0a 852
b529ccf2 853 nlh = nlmsg_hdr(skb);
ea86575e 854 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 855 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 856 return;
d883a036
DL
857
858 skb = skb_clone(skb, GFP_KERNEL);
859 if (skb == NULL)
860 return;
861 nlh = nlmsg_hdr(skb);
e905a9ed 862
246955fe 863 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
8ad4942c 864 tb = fib_get_table(&init_net, frn->tb_id_in);
246955fe
RO
865
866 nl_fib_lookup(frn, tb);
e905a9ed 867
1194ed0a 868 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 869 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 870 NETLINK_CB(skb).dst_group = 0; /* unicast */
cd40b7d3 871 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 872}
246955fe 873
7b1a74fd 874static int nl_fib_lookup_init(struct net *net)
246955fe 875{
7b1a74fd 876 fibnl = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
cd40b7d3 877 nl_fib_input, NULL, THIS_MODULE);
7b1a74fd
DL
878 if (fibnl == NULL)
879 return -EAFNOSUPPORT;
880 return 0;
881}
882
883static void nl_fib_lookup_exit(struct net *net)
884{
885 sock_put(fibnl);
246955fe
RO
886}
887
1da177e4
LT
888static void fib_disable_ip(struct net_device *dev, int force)
889{
890 if (fib_sync_down(0, dev, force))
e4aef8ae 891 fib_flush(&init_net);
1da177e4
LT
892 rt_cache_flush(0);
893 arp_ifdown(dev);
894}
895
896static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
897{
898 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
899
900 switch (event) {
901 case NETDEV_UP:
902 fib_add_ifaddr(ifa);
903#ifdef CONFIG_IP_ROUTE_MULTIPATH
904 fib_sync_up(ifa->ifa_dev->dev);
905#endif
906 rt_cache_flush(-1);
907 break;
908 case NETDEV_DOWN:
909 fib_del_ifaddr(ifa);
9fcc2e8a 910 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
911 /* Last address was deleted from this interface.
912 Disable IP.
913 */
914 fib_disable_ip(ifa->ifa_dev->dev, 1);
915 } else {
916 rt_cache_flush(-1);
917 }
918 break;
919 }
920 return NOTIFY_DONE;
921}
922
923static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
924{
925 struct net_device *dev = ptr;
e5ed6399 926 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4 927
e9dc8653
EB
928 if (dev->nd_net != &init_net)
929 return NOTIFY_DONE;
930
1da177e4
LT
931 if (event == NETDEV_UNREGISTER) {
932 fib_disable_ip(dev, 2);
933 return NOTIFY_DONE;
934 }
935
936 if (!in_dev)
937 return NOTIFY_DONE;
938
939 switch (event) {
940 case NETDEV_UP:
941 for_ifa(in_dev) {
942 fib_add_ifaddr(ifa);
943 } endfor_ifa(in_dev);
944#ifdef CONFIG_IP_ROUTE_MULTIPATH
945 fib_sync_up(dev);
946#endif
947 rt_cache_flush(-1);
948 break;
949 case NETDEV_DOWN:
950 fib_disable_ip(dev, 0);
951 break;
952 case NETDEV_CHANGEMTU:
953 case NETDEV_CHANGE:
954 rt_cache_flush(0);
955 break;
956 }
957 return NOTIFY_DONE;
958}
959
960static struct notifier_block fib_inetaddr_notifier = {
961 .notifier_call =fib_inetaddr_event,
962};
963
964static struct notifier_block fib_netdev_notifier = {
965 .notifier_call =fib_netdev_event,
966};
967
7b1a74fd 968static int __net_init ip_fib_net_init(struct net *net)
1da177e4 969{
1af5a8c4
PM
970 unsigned int i;
971
e4aef8ae
DL
972 net->ipv4.fib_table_hash = kzalloc(
973 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
974 if (net->ipv4.fib_table_hash == NULL)
975 return -ENOMEM;
976
1af5a8c4 977 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 978 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 979
7b1a74fd
DL
980 return fib4_rules_init(net);
981}
1da177e4 982
7b1a74fd
DL
983static void __net_exit ip_fib_net_exit(struct net *net)
984{
985 unsigned int i;
986
987#ifdef CONFIG_IP_MULTIPLE_TABLES
988 fib4_rules_exit(net);
989#endif
990
991 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
992 struct fib_table *tb;
993 struct hlist_head *head;
994 struct hlist_node *node, *tmp;
63f3444f 995
e4aef8ae 996 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
997 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
998 hlist_del(node);
999 tb->tb_flush(tb);
1000 kfree(tb);
1001 }
1002 }
e4aef8ae 1003 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1004}
1005
1006static int __net_init fib_net_init(struct net *net)
1007{
1008 int error;
1009
1010 error = 0;
1011 if (net != &init_net)
1012 goto out;
1013
1014 error = ip_fib_net_init(net);
1015 if (error < 0)
1016 goto out;
1017 error = nl_fib_lookup_init(net);
1018 if (error < 0)
1019 goto out_nlfl;
1020 error = fib_proc_init(net);
1021 if (error < 0)
1022 goto out_proc;
1023out:
1024 return error;
1025
1026out_proc:
1027 nl_fib_lookup_exit(net);
1028out_nlfl:
1029 ip_fib_net_exit(net);
1030 goto out;
1031}
1032
1033static void __net_exit fib_net_exit(struct net *net)
1034{
1035 fib_proc_exit(net);
1036 nl_fib_lookup_exit(net);
1037 ip_fib_net_exit(net);
1038}
1039
1040static struct pernet_operations fib_net_ops = {
1041 .init = fib_net_init,
1042 .exit = fib_net_exit,
1043};
1044
1045void __init ip_fib_init(void)
1046{
63f3444f
TG
1047 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1048 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1049 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1050
1051 register_pernet_subsys(&fib_net_ops);
1052 register_netdevice_notifier(&fib_netdev_notifier);
1053 register_inetaddr_notifier(&fib_inetaddr_notifier);
1da177e4
LT
1054}
1055
1056EXPORT_SYMBOL(inet_addr_type);
05538116 1057EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1058EXPORT_SYMBOL(ip_dev_find);