]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[IPV4] fib_trie: size and statistics
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
4fc268d2 22#include <linux/capability.h>
1da177e4
LT
23#include <linux/types.h>
24#include <linux/kernel.h>
1da177e4
LT
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
14c85021 32#include <linux/inetdevice.h>
1da177e4 33#include <linux/netdevice.h>
1823730f 34#include <linux/if_addr.h>
1da177e4
LT
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
1da177e4 37#include <linux/init.h>
1af5a8c4 38#include <linux/list.h>
1da177e4
LT
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
63f3444f 48#include <net/rtnetlink.h>
1da177e4 49
1da177e4
LT
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
7b1a74fd 52static int __net_init fib4_rules_init(struct net *net)
c3e9a353 53{
93456b6d
DL
54 struct fib_table *local_table, *main_table;
55
56 local_table = fib_hash_init(RT_TABLE_LOCAL);
57 if (local_table == NULL)
dbb50165
DL
58 return -ENOMEM;
59
93456b6d
DL
60 main_table = fib_hash_init(RT_TABLE_MAIN);
61 if (main_table == NULL)
dbb50165
DL
62 goto fail;
63
93456b6d 64 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 66 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 67 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
68 return 0;
69
70fail:
93456b6d 71 kfree(local_table);
dbb50165 72 return -ENOMEM;
c3e9a353 73}
1af5a8c4 74#else
1da177e4 75
8ad4942c 76struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
77{
78 struct fib_table *tb;
1af5a8c4 79 unsigned int h;
1da177e4 80
1af5a8c4
PM
81 if (id == 0)
82 id = RT_TABLE_MAIN;
8ad4942c 83 tb = fib_get_table(net, id);
1af5a8c4
PM
84 if (tb)
85 return tb;
1da177e4
LT
86 tb = fib_hash_init(id);
87 if (!tb)
88 return NULL;
1af5a8c4 89 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 90 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
91 return tb;
92}
93
8ad4942c 94struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
95{
96 struct fib_table *tb;
97 struct hlist_node *node;
e4aef8ae 98 struct hlist_head *head;
1af5a8c4 99 unsigned int h;
1da177e4 100
1af5a8c4
PM
101 if (id == 0)
102 id = RT_TABLE_MAIN;
103 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 104
1af5a8c4 105 rcu_read_lock();
e4aef8ae
DL
106 head = &net->ipv4.fib_table_hash[h];
107 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
108 if (tb->tb_id == id) {
109 rcu_read_unlock();
110 return tb;
111 }
112 }
113 rcu_read_unlock();
114 return NULL;
115}
1da177e4
LT
116#endif /* CONFIG_IP_MULTIPLE_TABLES */
117
e4aef8ae 118static void fib_flush(struct net *net)
1da177e4
LT
119{
120 int flushed = 0;
1da177e4 121 struct fib_table *tb;
1af5a8c4 122 struct hlist_node *node;
e4aef8ae 123 struct hlist_head *head;
1af5a8c4 124 unsigned int h;
1da177e4 125
1af5a8c4 126 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
127 head = &net->ipv4.fib_table_hash[h];
128 hlist_for_each_entry(tb, node, head, tb_hlist)
1af5a8c4 129 flushed += tb->tb_flush(tb);
1da177e4 130 }
1da177e4
LT
131
132 if (flushed)
133 rt_cache_flush(-1);
134}
135
136/*
137 * Find the first device with a given source address.
138 */
139
60cad5da 140struct net_device * ip_dev_find(__be32 addr)
1da177e4
LT
141{
142 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
143 struct fib_result res;
144 struct net_device *dev = NULL;
03cf786c 145 struct fib_table *local_table;
1da177e4
LT
146
147#ifdef CONFIG_IP_MULTIPLE_TABLES
148 res.r = NULL;
149#endif
150
8ad4942c 151 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
03cf786c 152 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
153 return NULL;
154 if (res.type != RTN_LOCAL)
155 goto out;
156 dev = FIB_RES_DEV(res);
157
158 if (dev)
159 dev_hold(dev);
160out:
161 fib_res_put(&res);
162 return dev;
163}
164
05538116
LAT
165/*
166 * Find address type as if only "dev" was present in the system. If
167 * on_dev is NULL then all interfaces are taken into consideration.
168 */
6b175b26
EB
169static inline unsigned __inet_dev_addr_type(struct net *net,
170 const struct net_device *dev,
05538116 171 __be32 addr)
1da177e4
LT
172{
173 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
174 struct fib_result res;
175 unsigned ret = RTN_BROADCAST;
03cf786c 176 struct fib_table *local_table;
1da177e4 177
f97c1e0c 178 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
1da177e4 179 return RTN_BROADCAST;
f97c1e0c 180 if (ipv4_is_multicast(addr))
1da177e4
LT
181 return RTN_MULTICAST;
182
183#ifdef CONFIG_IP_MULTIPLE_TABLES
184 res.r = NULL;
185#endif
e905a9ed 186
6b175b26 187 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 188 if (local_table) {
1da177e4 189 ret = RTN_UNICAST;
03cf786c 190 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
191 if (!dev || dev == res.fi->fib_dev)
192 ret = res.type;
1da177e4
LT
193 fib_res_put(&res);
194 }
195 }
196 return ret;
197}
198
6b175b26 199unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 200{
6b175b26 201 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
202}
203
6b175b26
EB
204unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
205 __be32 addr)
05538116 206{
6b175b26 207 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
208}
209
1da177e4
LT
210/* Given (packet source, input interface) and optional (dst, oif, tos):
211 - (main) check, that source is valid i.e. not broadcast or our local
212 address.
213 - figure out what "logical" interface this packet arrived
214 and calculate "specific destination" address.
215 - check, that packet arrived from expected physical interface.
216 */
217
d9c9df8c
AV
218int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
219 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
220{
221 struct in_device *in_dev;
222 struct flowi fl = { .nl_u = { .ip4_u =
223 { .daddr = src,
224 .saddr = dst,
225 .tos = tos } },
226 .iif = oif };
227 struct fib_result res;
228 int no_addr, rpf;
229 int ret;
230
231 no_addr = rpf = 0;
232 rcu_read_lock();
e5ed6399 233 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
234 if (in_dev) {
235 no_addr = in_dev->ifa_list == NULL;
236 rpf = IN_DEV_RPFILTER(in_dev);
237 }
238 rcu_read_unlock();
239
240 if (in_dev == NULL)
241 goto e_inval;
242
243 if (fib_lookup(&fl, &res))
244 goto last_resort;
245 if (res.type != RTN_UNICAST)
246 goto e_inval_res;
247 *spec_dst = FIB_RES_PREFSRC(res);
248 fib_combine_itag(itag, &res);
249#ifdef CONFIG_IP_ROUTE_MULTIPATH
250 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
251#else
252 if (FIB_RES_DEV(res) == dev)
253#endif
254 {
255 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
256 fib_res_put(&res);
257 return ret;
258 }
259 fib_res_put(&res);
260 if (no_addr)
261 goto last_resort;
262 if (rpf)
263 goto e_inval;
264 fl.oif = dev->ifindex;
265
266 ret = 0;
267 if (fib_lookup(&fl, &res) == 0) {
268 if (res.type == RTN_UNICAST) {
269 *spec_dst = FIB_RES_PREFSRC(res);
270 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
271 }
272 fib_res_put(&res);
273 }
274 return ret;
275
276last_resort:
277 if (rpf)
278 goto e_inval;
279 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
280 *itag = 0;
281 return 0;
282
283e_inval_res:
284 fib_res_put(&res);
285e_inval:
286 return -EINVAL;
287}
288
81f7bf6c 289static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
290{
291 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
292}
293
294static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
295{
296 struct nlattr *nla;
297
298 nla = (struct nlattr *) ((char *) mx + len);
299 nla->nla_type = type;
300 nla->nla_len = nla_attr_size(4);
301 *(u32 *) nla_data(nla) = value;
302
303 return len + nla_total_size(4);
304}
305
4b5d47d4 306static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
307 struct fib_config *cfg)
308{
6d85c10a 309 __be32 addr;
4e902c57
TG
310 int plen;
311
312 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 313 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
314
315 if (rt->rt_dst.sa_family != AF_INET)
316 return -EAFNOSUPPORT;
317
318 /*
319 * Check mask for validity:
320 * a) it must be contiguous.
321 * b) destination must have all host bits clear.
322 * c) if application forgot to set correct family (AF_INET),
323 * reject request unless it is absolutely clear i.e.
324 * both family and mask are zero.
325 */
326 plen = 32;
327 addr = sk_extract_addr(&rt->rt_dst);
328 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 329 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
330
331 if (rt->rt_genmask.sa_family != AF_INET) {
332 if (mask || rt->rt_genmask.sa_family)
333 return -EAFNOSUPPORT;
334 }
335
336 if (bad_mask(mask, addr))
337 return -EINVAL;
338
339 plen = inet_mask_len(mask);
340 }
341
342 cfg->fc_dst_len = plen;
343 cfg->fc_dst = addr;
344
345 if (cmd != SIOCDELRT) {
346 cfg->fc_nlflags = NLM_F_CREATE;
347 cfg->fc_protocol = RTPROT_BOOT;
348 }
349
350 if (rt->rt_metric)
351 cfg->fc_priority = rt->rt_metric - 1;
352
353 if (rt->rt_flags & RTF_REJECT) {
354 cfg->fc_scope = RT_SCOPE_HOST;
355 cfg->fc_type = RTN_UNREACHABLE;
356 return 0;
357 }
358
359 cfg->fc_scope = RT_SCOPE_NOWHERE;
360 cfg->fc_type = RTN_UNICAST;
361
362 if (rt->rt_dev) {
363 char *colon;
364 struct net_device *dev;
365 char devname[IFNAMSIZ];
366
367 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
368 return -EFAULT;
369
370 devname[IFNAMSIZ-1] = 0;
371 colon = strchr(devname, ':');
372 if (colon)
373 *colon = 0;
4b5d47d4 374 dev = __dev_get_by_name(net, devname);
4e902c57
TG
375 if (!dev)
376 return -ENODEV;
377 cfg->fc_oif = dev->ifindex;
378 if (colon) {
379 struct in_ifaddr *ifa;
380 struct in_device *in_dev = __in_dev_get_rtnl(dev);
381 if (!in_dev)
382 return -ENODEV;
383 *colon = ':';
384 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
385 if (strcmp(ifa->ifa_label, devname) == 0)
386 break;
387 if (ifa == NULL)
388 return -ENODEV;
389 cfg->fc_prefsrc = ifa->ifa_local;
390 }
391 }
392
393 addr = sk_extract_addr(&rt->rt_gateway);
394 if (rt->rt_gateway.sa_family == AF_INET && addr) {
395 cfg->fc_gw = addr;
396 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 397 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
398 cfg->fc_scope = RT_SCOPE_UNIVERSE;
399 }
400
401 if (cmd == SIOCDELRT)
402 return 0;
403
404 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
405 return -EINVAL;
406
407 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
408 cfg->fc_scope = RT_SCOPE_LINK;
409
410 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
411 struct nlattr *mx;
412 int len = 0;
413
414 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 415 if (mx == NULL)
4e902c57
TG
416 return -ENOMEM;
417
418 if (rt->rt_flags & RTF_MTU)
419 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
420
421 if (rt->rt_flags & RTF_WINDOW)
422 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
423
424 if (rt->rt_flags & RTF_IRTT)
425 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
426
427 cfg->fc_mx = mx;
428 cfg->fc_mx_len = len;
429 }
430
431 return 0;
432}
433
1da177e4
LT
434/*
435 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
436 */
e905a9ed 437
1bad118a 438int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 439{
4e902c57
TG
440 struct fib_config cfg;
441 struct rtentry rt;
1da177e4 442 int err;
1da177e4
LT
443
444 switch (cmd) {
445 case SIOCADDRT: /* Add a route */
446 case SIOCDELRT: /* Delete a route */
447 if (!capable(CAP_NET_ADMIN))
448 return -EPERM;
4e902c57
TG
449
450 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 451 return -EFAULT;
4e902c57 452
1da177e4 453 rtnl_lock();
1bad118a 454 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 455 if (err == 0) {
4e902c57
TG
456 struct fib_table *tb;
457
1da177e4 458 if (cmd == SIOCDELRT) {
1bad118a 459 tb = fib_get_table(net, cfg.fc_table);
1da177e4 460 if (tb)
4e902c57
TG
461 err = tb->tb_delete(tb, &cfg);
462 else
463 err = -ESRCH;
1da177e4 464 } else {
1bad118a 465 tb = fib_new_table(net, cfg.fc_table);
1da177e4 466 if (tb)
4e902c57
TG
467 err = tb->tb_insert(tb, &cfg);
468 else
469 err = -ENOBUFS;
1da177e4 470 }
4e902c57
TG
471
472 /* allocated by rtentry_to_fib_config() */
473 kfree(cfg.fc_mx);
1da177e4
LT
474 }
475 rtnl_unlock();
476 return err;
477 }
478 return -EINVAL;
479}
480
ef7c79ed 481const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
482 [RTA_DST] = { .type = NLA_U32 },
483 [RTA_SRC] = { .type = NLA_U32 },
484 [RTA_IIF] = { .type = NLA_U32 },
485 [RTA_OIF] = { .type = NLA_U32 },
486 [RTA_GATEWAY] = { .type = NLA_U32 },
487 [RTA_PRIORITY] = { .type = NLA_U32 },
488 [RTA_PREFSRC] = { .type = NLA_U32 },
489 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 490 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57
TG
491 [RTA_PROTOINFO] = { .type = NLA_U32 },
492 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
493};
494
4b5d47d4
DL
495static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
496 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 497{
4e902c57
TG
498 struct nlattr *attr;
499 int err, remaining;
500 struct rtmsg *rtm;
501
502 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
503 if (err < 0)
504 goto errout;
505
506 memset(cfg, 0, sizeof(*cfg));
507
508 rtm = nlmsg_data(nlh);
4e902c57 509 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
510 cfg->fc_tos = rtm->rtm_tos;
511 cfg->fc_table = rtm->rtm_table;
512 cfg->fc_protocol = rtm->rtm_protocol;
513 cfg->fc_scope = rtm->rtm_scope;
514 cfg->fc_type = rtm->rtm_type;
515 cfg->fc_flags = rtm->rtm_flags;
516 cfg->fc_nlflags = nlh->nlmsg_flags;
517
518 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
519 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 520 cfg->fc_nlinfo.nl_net = net;
4e902c57 521
a0ee18b9
TG
522 if (cfg->fc_type > RTN_MAX) {
523 err = -EINVAL;
524 goto errout;
525 }
526
4e902c57 527 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 528 switch (nla_type(attr)) {
4e902c57 529 case RTA_DST:
17fb2c64 530 cfg->fc_dst = nla_get_be32(attr);
4e902c57 531 break;
4e902c57
TG
532 case RTA_OIF:
533 cfg->fc_oif = nla_get_u32(attr);
534 break;
535 case RTA_GATEWAY:
17fb2c64 536 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
537 break;
538 case RTA_PRIORITY:
539 cfg->fc_priority = nla_get_u32(attr);
540 break;
541 case RTA_PREFSRC:
17fb2c64 542 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
543 break;
544 case RTA_METRICS:
545 cfg->fc_mx = nla_data(attr);
546 cfg->fc_mx_len = nla_len(attr);
547 break;
548 case RTA_MULTIPATH:
549 cfg->fc_mp = nla_data(attr);
550 cfg->fc_mp_len = nla_len(attr);
551 break;
552 case RTA_FLOW:
553 cfg->fc_flow = nla_get_u32(attr);
554 break;
4e902c57
TG
555 case RTA_TABLE:
556 cfg->fc_table = nla_get_u32(attr);
557 break;
1da177e4
LT
558 }
559 }
4e902c57 560
1da177e4 561 return 0;
4e902c57
TG
562errout:
563 return err;
1da177e4
LT
564}
565
63f3444f 566static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 567{
b854272b 568 struct net *net = skb->sk->sk_net;
4e902c57
TG
569 struct fib_config cfg;
570 struct fib_table *tb;
571 int err;
1da177e4 572
4b5d47d4 573 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
574 if (err < 0)
575 goto errout;
1da177e4 576
8ad4942c 577 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
578 if (tb == NULL) {
579 err = -ESRCH;
580 goto errout;
581 }
582
583 err = tb->tb_delete(tb, &cfg);
584errout:
585 return err;
1da177e4
LT
586}
587
63f3444f 588static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 589{
b854272b 590 struct net *net = skb->sk->sk_net;
4e902c57
TG
591 struct fib_config cfg;
592 struct fib_table *tb;
593 int err;
1da177e4 594
4b5d47d4 595 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
596 if (err < 0)
597 goto errout;
1da177e4 598
226b0b4a 599 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
600 if (tb == NULL) {
601 err = -ENOBUFS;
602 goto errout;
603 }
604
605 err = tb->tb_insert(tb, &cfg);
606errout:
607 return err;
1da177e4
LT
608}
609
63f3444f 610static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 611{
b854272b 612 struct net *net = skb->sk->sk_net;
1af5a8c4
PM
613 unsigned int h, s_h;
614 unsigned int e = 0, s_e;
1da177e4 615 struct fib_table *tb;
1af5a8c4 616 struct hlist_node *node;
e4aef8ae 617 struct hlist_head *head;
1af5a8c4 618 int dumped = 0;
1da177e4 619
be403ea1
TG
620 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
621 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
622 return ip_rt_dump(skb, cb);
623
1af5a8c4
PM
624 s_h = cb->args[0];
625 s_e = cb->args[1];
626
627 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
628 e = 0;
e4aef8ae
DL
629 head = &net->ipv4.fib_table_hash[h];
630 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
631 if (e < s_e)
632 goto next;
633 if (dumped)
634 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 635 2 * sizeof(cb->args[0]));
1af5a8c4
PM
636 if (tb->tb_dump(tb, skb, cb) < 0)
637 goto out;
638 dumped = 1;
639next:
640 e++;
641 }
1da177e4 642 }
1af5a8c4
PM
643out:
644 cb->args[1] = e;
645 cb->args[0] = h;
1da177e4
LT
646
647 return skb->len;
648}
649
650/* Prepare and feed intra-kernel routing request.
651 Really, it should be netlink message, but :-( netlink
652 can be not configured, so that we feed it directly
653 to fib engine. It is legal, because all events occur
654 only when netlink is already locked.
655 */
656
81f7bf6c 657static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 658{
4b5d47d4 659 struct net *net = ifa->ifa_dev->dev->nd_net;
4e902c57
TG
660 struct fib_table *tb;
661 struct fib_config cfg = {
662 .fc_protocol = RTPROT_KERNEL,
663 .fc_type = type,
664 .fc_dst = dst,
665 .fc_dst_len = dst_len,
666 .fc_prefsrc = ifa->ifa_local,
667 .fc_oif = ifa->ifa_dev->dev->ifindex,
668 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 669 .fc_nlinfo = {
4b5d47d4 670 .nl_net = net,
4d1169c1 671 },
4e902c57 672 };
1da177e4
LT
673
674 if (type == RTN_UNICAST)
4b5d47d4 675 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 676 else
4b5d47d4 677 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
678
679 if (tb == NULL)
680 return;
681
4e902c57 682 cfg.fc_table = tb->tb_id;
1da177e4 683
4e902c57
TG
684 if (type != RTN_LOCAL)
685 cfg.fc_scope = RT_SCOPE_LINK;
686 else
687 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
688
689 if (cmd == RTM_NEWROUTE)
4e902c57 690 tb->tb_insert(tb, &cfg);
1da177e4 691 else
4e902c57 692 tb->tb_delete(tb, &cfg);
1da177e4
LT
693}
694
0ff60a45 695void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
696{
697 struct in_device *in_dev = ifa->ifa_dev;
698 struct net_device *dev = in_dev->dev;
699 struct in_ifaddr *prim = ifa;
a144ea4b
AV
700 __be32 mask = ifa->ifa_mask;
701 __be32 addr = ifa->ifa_local;
702 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
703
704 if (ifa->ifa_flags&IFA_F_SECONDARY) {
705 prim = inet_ifa_byprefix(in_dev, prefix, mask);
706 if (prim == NULL) {
a6db9010 707 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
708 return;
709 }
710 }
711
712 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
713
714 if (!(dev->flags&IFF_UP))
715 return;
716
717 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 718 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
719 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
720
f97c1e0c 721 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
722 (prefix != addr || ifa->ifa_prefixlen < 32)) {
723 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
724 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
725
726 /* Add network specific broadcasts, when it takes a sense */
727 if (ifa->ifa_prefixlen < 31) {
728 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
729 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
730 }
731 }
732}
733
734static void fib_del_ifaddr(struct in_ifaddr *ifa)
735{
736 struct in_device *in_dev = ifa->ifa_dev;
737 struct net_device *dev = in_dev->dev;
738 struct in_ifaddr *ifa1;
739 struct in_ifaddr *prim = ifa;
a144ea4b
AV
740 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
741 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
742#define LOCAL_OK 1
743#define BRD_OK 2
744#define BRD0_OK 4
745#define BRD1_OK 8
746 unsigned ok = 0;
747
748 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
749 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
750 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
751 else {
752 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
753 if (prim == NULL) {
a6db9010 754 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
755 return;
756 }
757 }
758
759 /* Deletion is more complicated than add.
760 We should take care of not to delete too much :-)
761
762 Scan address list to be sure that addresses are really gone.
763 */
764
765 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
766 if (ifa->ifa_local == ifa1->ifa_local)
767 ok |= LOCAL_OK;
768 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
769 ok |= BRD_OK;
770 if (brd == ifa1->ifa_broadcast)
771 ok |= BRD1_OK;
772 if (any == ifa1->ifa_broadcast)
773 ok |= BRD0_OK;
774 }
775
776 if (!(ok&BRD_OK))
777 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
778 if (!(ok&BRD1_OK))
779 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
780 if (!(ok&BRD0_OK))
781 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
782 if (!(ok&LOCAL_OK)) {
783 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
784
785 /* Check, that this local address finally disappeared. */
226b0b4a 786 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
787 /* And the last, but not the least thing.
788 We must flush stray FIB entries.
789
790 First of all, we scan fib_info list searching
791 for stray nexthop entries, then ignite fib_flush.
792 */
793 if (fib_sync_down(ifa->ifa_local, NULL, 0))
226b0b4a 794 fib_flush(dev->nd_net);
1da177e4
LT
795 }
796 }
797#undef LOCAL_OK
798#undef BRD_OK
799#undef BRD0_OK
800#undef BRD1_OK
801}
802
246955fe
RO
803static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
804{
e905a9ed 805
246955fe 806 struct fib_result res;
5f300893 807 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 808 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
809 .tos = frn->fl_tos,
810 .scope = frn->fl_scope } } };
1194ed0a 811
912a41a4
SV
812#ifdef CONFIG_IP_MULTIPLE_TABLES
813 res.r = NULL;
814#endif
815
1194ed0a 816 frn->err = -ENOENT;
246955fe
RO
817 if (tb) {
818 local_bh_disable();
819
820 frn->tb_id = tb->tb_id;
821 frn->err = tb->tb_lookup(tb, &fl, &res);
822
823 if (!frn->err) {
824 frn->prefixlen = res.prefixlen;
825 frn->nh_sel = res.nh_sel;
826 frn->type = res.type;
827 frn->scope = res.scope;
1194ed0a 828 fib_res_put(&res);
246955fe
RO
829 }
830 local_bh_enable();
831 }
832}
833
28f7b036 834static void nl_fib_input(struct sk_buff *skb)
246955fe 835{
6bd48fcf 836 struct net *net;
246955fe 837 struct fib_result_nl *frn;
28f7b036 838 struct nlmsghdr *nlh;
246955fe 839 struct fib_table *tb;
28f7b036 840 u32 pid;
1194ed0a 841
6bd48fcf 842 net = skb->sk->sk_net;
b529ccf2 843 nlh = nlmsg_hdr(skb);
ea86575e 844 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 845 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 846 return;
d883a036
DL
847
848 skb = skb_clone(skb, GFP_KERNEL);
849 if (skb == NULL)
850 return;
851 nlh = nlmsg_hdr(skb);
e905a9ed 852
246955fe 853 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 854 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
855
856 nl_fib_lookup(frn, tb);
e905a9ed 857
1194ed0a 858 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 859 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 860 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 861 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 862}
246955fe 863
7b1a74fd 864static int nl_fib_lookup_init(struct net *net)
246955fe 865{
6bd48fcf
DL
866 struct sock *sk;
867 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
868 nl_fib_input, NULL, THIS_MODULE);
869 if (sk == NULL)
7b1a74fd 870 return -EAFNOSUPPORT;
6bd48fcf
DL
871 /* Don't hold an extra reference on the namespace */
872 put_net(sk->sk_net);
873 net->ipv4.fibnl = sk;
7b1a74fd
DL
874 return 0;
875}
876
877static void nl_fib_lookup_exit(struct net *net)
878{
6bd48fcf
DL
879 /* At the last minute lie and say this is a socket for the
880 * initial network namespace. So the socket will be safe to free.
881 */
882 net->ipv4.fibnl->sk_net = get_net(&init_net);
883 sock_put(net->ipv4.fibnl);
246955fe
RO
884}
885
1da177e4
LT
886static void fib_disable_ip(struct net_device *dev, int force)
887{
888 if (fib_sync_down(0, dev, force))
226b0b4a 889 fib_flush(dev->nd_net);
1da177e4
LT
890 rt_cache_flush(0);
891 arp_ifdown(dev);
892}
893
894static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
895{
896 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
897
898 switch (event) {
899 case NETDEV_UP:
900 fib_add_ifaddr(ifa);
901#ifdef CONFIG_IP_ROUTE_MULTIPATH
902 fib_sync_up(ifa->ifa_dev->dev);
903#endif
904 rt_cache_flush(-1);
905 break;
906 case NETDEV_DOWN:
907 fib_del_ifaddr(ifa);
9fcc2e8a 908 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
909 /* Last address was deleted from this interface.
910 Disable IP.
911 */
912 fib_disable_ip(ifa->ifa_dev->dev, 1);
913 } else {
914 rt_cache_flush(-1);
915 }
916 break;
917 }
918 return NOTIFY_DONE;
919}
920
921static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
922{
923 struct net_device *dev = ptr;
e5ed6399 924 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
925
926 if (event == NETDEV_UNREGISTER) {
927 fib_disable_ip(dev, 2);
928 return NOTIFY_DONE;
929 }
930
931 if (!in_dev)
932 return NOTIFY_DONE;
933
934 switch (event) {
935 case NETDEV_UP:
936 for_ifa(in_dev) {
937 fib_add_ifaddr(ifa);
938 } endfor_ifa(in_dev);
939#ifdef CONFIG_IP_ROUTE_MULTIPATH
940 fib_sync_up(dev);
941#endif
942 rt_cache_flush(-1);
943 break;
944 case NETDEV_DOWN:
945 fib_disable_ip(dev, 0);
946 break;
947 case NETDEV_CHANGEMTU:
948 case NETDEV_CHANGE:
949 rt_cache_flush(0);
950 break;
951 }
952 return NOTIFY_DONE;
953}
954
955static struct notifier_block fib_inetaddr_notifier = {
956 .notifier_call =fib_inetaddr_event,
957};
958
959static struct notifier_block fib_netdev_notifier = {
960 .notifier_call =fib_netdev_event,
961};
962
7b1a74fd 963static int __net_init ip_fib_net_init(struct net *net)
1da177e4 964{
1af5a8c4
PM
965 unsigned int i;
966
e4aef8ae
DL
967 net->ipv4.fib_table_hash = kzalloc(
968 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
969 if (net->ipv4.fib_table_hash == NULL)
970 return -ENOMEM;
971
1af5a8c4 972 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 973 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 974
7b1a74fd
DL
975 return fib4_rules_init(net);
976}
1da177e4 977
7b1a74fd
DL
978static void __net_exit ip_fib_net_exit(struct net *net)
979{
980 unsigned int i;
981
982#ifdef CONFIG_IP_MULTIPLE_TABLES
983 fib4_rules_exit(net);
984#endif
985
986 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
987 struct fib_table *tb;
988 struct hlist_head *head;
989 struct hlist_node *node, *tmp;
63f3444f 990
e4aef8ae 991 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
992 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
993 hlist_del(node);
994 tb->tb_flush(tb);
995 kfree(tb);
996 }
997 }
e4aef8ae 998 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
999}
1000
1001static int __net_init fib_net_init(struct net *net)
1002{
1003 int error;
1004
7b1a74fd
DL
1005 error = ip_fib_net_init(net);
1006 if (error < 0)
1007 goto out;
1008 error = nl_fib_lookup_init(net);
1009 if (error < 0)
1010 goto out_nlfl;
1011 error = fib_proc_init(net);
1012 if (error < 0)
1013 goto out_proc;
1014out:
1015 return error;
1016
1017out_proc:
1018 nl_fib_lookup_exit(net);
1019out_nlfl:
1020 ip_fib_net_exit(net);
1021 goto out;
1022}
1023
1024static void __net_exit fib_net_exit(struct net *net)
1025{
1026 fib_proc_exit(net);
1027 nl_fib_lookup_exit(net);
1028 ip_fib_net_exit(net);
1029}
1030
1031static struct pernet_operations fib_net_ops = {
1032 .init = fib_net_init,
1033 .exit = fib_net_exit,
1034};
1035
1036void __init ip_fib_init(void)
1037{
63f3444f
TG
1038 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1039 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1040 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1041
1042 register_pernet_subsys(&fib_net_ops);
1043 register_netdevice_notifier(&fib_netdev_notifier);
1044 register_inetaddr_notifier(&fib_inetaddr_notifier);
1da177e4
LT
1045}
1046
1047EXPORT_SYMBOL(inet_addr_type);
05538116 1048EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1049EXPORT_SYMBOL(ip_dev_find);