]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[NETNS]: Memory leak on network namespace stop.
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
4fc268d2 22#include <linux/capability.h>
1da177e4
LT
23#include <linux/types.h>
24#include <linux/kernel.h>
1da177e4
LT
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
14c85021 32#include <linux/inetdevice.h>
1da177e4 33#include <linux/netdevice.h>
1823730f 34#include <linux/if_addr.h>
1da177e4
LT
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
1da177e4 37#include <linux/init.h>
1af5a8c4 38#include <linux/list.h>
1da177e4
LT
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
63f3444f 48#include <net/rtnetlink.h>
1da177e4 49
1da177e4
LT
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
7b1a74fd 52static int __net_init fib4_rules_init(struct net *net)
c3e9a353 53{
93456b6d
DL
54 struct fib_table *local_table, *main_table;
55
7f9b8052 56 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 57 if (local_table == NULL)
dbb50165
DL
58 return -ENOMEM;
59
7f9b8052 60 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 61 if (main_table == NULL)
dbb50165
DL
62 goto fail;
63
93456b6d 64 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 66 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 67 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
68 return 0;
69
70fail:
93456b6d 71 kfree(local_table);
dbb50165 72 return -ENOMEM;
c3e9a353 73}
1af5a8c4 74#else
1da177e4 75
8ad4942c 76struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
77{
78 struct fib_table *tb;
1af5a8c4 79 unsigned int h;
1da177e4 80
1af5a8c4
PM
81 if (id == 0)
82 id = RT_TABLE_MAIN;
8ad4942c 83 tb = fib_get_table(net, id);
1af5a8c4
PM
84 if (tb)
85 return tb;
7f9b8052
SH
86
87 tb = fib_hash_table(id);
1da177e4
LT
88 if (!tb)
89 return NULL;
1af5a8c4 90 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 91 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
92 return tb;
93}
94
8ad4942c 95struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
96{
97 struct fib_table *tb;
98 struct hlist_node *node;
e4aef8ae 99 struct hlist_head *head;
1af5a8c4 100 unsigned int h;
1da177e4 101
1af5a8c4
PM
102 if (id == 0)
103 id = RT_TABLE_MAIN;
104 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 105
1af5a8c4 106 rcu_read_lock();
e4aef8ae
DL
107 head = &net->ipv4.fib_table_hash[h];
108 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
109 if (tb->tb_id == id) {
110 rcu_read_unlock();
111 return tb;
112 }
113 }
114 rcu_read_unlock();
115 return NULL;
116}
1da177e4
LT
117#endif /* CONFIG_IP_MULTIPLE_TABLES */
118
e4aef8ae 119static void fib_flush(struct net *net)
1da177e4
LT
120{
121 int flushed = 0;
1da177e4 122 struct fib_table *tb;
1af5a8c4 123 struct hlist_node *node;
e4aef8ae 124 struct hlist_head *head;
1af5a8c4 125 unsigned int h;
1da177e4 126
1af5a8c4 127 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
128 head = &net->ipv4.fib_table_hash[h];
129 hlist_for_each_entry(tb, node, head, tb_hlist)
1af5a8c4 130 flushed += tb->tb_flush(tb);
1da177e4 131 }
1da177e4
LT
132
133 if (flushed)
134 rt_cache_flush(-1);
135}
136
137/*
138 * Find the first device with a given source address.
139 */
140
60cad5da 141struct net_device * ip_dev_find(__be32 addr)
1da177e4
LT
142{
143 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
144 struct fib_result res;
145 struct net_device *dev = NULL;
03cf786c 146 struct fib_table *local_table;
1da177e4
LT
147
148#ifdef CONFIG_IP_MULTIPLE_TABLES
149 res.r = NULL;
150#endif
151
8ad4942c 152 local_table = fib_get_table(&init_net, RT_TABLE_LOCAL);
03cf786c 153 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
154 return NULL;
155 if (res.type != RTN_LOCAL)
156 goto out;
157 dev = FIB_RES_DEV(res);
158
159 if (dev)
160 dev_hold(dev);
161out:
162 fib_res_put(&res);
163 return dev;
164}
165
05538116
LAT
166/*
167 * Find address type as if only "dev" was present in the system. If
168 * on_dev is NULL then all interfaces are taken into consideration.
169 */
6b175b26
EB
170static inline unsigned __inet_dev_addr_type(struct net *net,
171 const struct net_device *dev,
05538116 172 __be32 addr)
1da177e4
LT
173{
174 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
175 struct fib_result res;
176 unsigned ret = RTN_BROADCAST;
03cf786c 177 struct fib_table *local_table;
1da177e4 178
f97c1e0c 179 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
1da177e4 180 return RTN_BROADCAST;
f97c1e0c 181 if (ipv4_is_multicast(addr))
1da177e4
LT
182 return RTN_MULTICAST;
183
184#ifdef CONFIG_IP_MULTIPLE_TABLES
185 res.r = NULL;
186#endif
e905a9ed 187
6b175b26 188 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 189 if (local_table) {
1da177e4 190 ret = RTN_UNICAST;
03cf786c 191 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
192 if (!dev || dev == res.fi->fib_dev)
193 ret = res.type;
1da177e4
LT
194 fib_res_put(&res);
195 }
196 }
197 return ret;
198}
199
6b175b26 200unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 201{
6b175b26 202 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
203}
204
6b175b26
EB
205unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
206 __be32 addr)
05538116 207{
6b175b26 208 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
209}
210
1da177e4
LT
211/* Given (packet source, input interface) and optional (dst, oif, tos):
212 - (main) check, that source is valid i.e. not broadcast or our local
213 address.
214 - figure out what "logical" interface this packet arrived
215 and calculate "specific destination" address.
216 - check, that packet arrived from expected physical interface.
217 */
218
d9c9df8c
AV
219int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
220 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
221{
222 struct in_device *in_dev;
223 struct flowi fl = { .nl_u = { .ip4_u =
224 { .daddr = src,
225 .saddr = dst,
226 .tos = tos } },
227 .iif = oif };
228 struct fib_result res;
229 int no_addr, rpf;
230 int ret;
231
232 no_addr = rpf = 0;
233 rcu_read_lock();
e5ed6399 234 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
235 if (in_dev) {
236 no_addr = in_dev->ifa_list == NULL;
237 rpf = IN_DEV_RPFILTER(in_dev);
238 }
239 rcu_read_unlock();
240
241 if (in_dev == NULL)
242 goto e_inval;
243
244 if (fib_lookup(&fl, &res))
245 goto last_resort;
246 if (res.type != RTN_UNICAST)
247 goto e_inval_res;
248 *spec_dst = FIB_RES_PREFSRC(res);
249 fib_combine_itag(itag, &res);
250#ifdef CONFIG_IP_ROUTE_MULTIPATH
251 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
252#else
253 if (FIB_RES_DEV(res) == dev)
254#endif
255 {
256 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
257 fib_res_put(&res);
258 return ret;
259 }
260 fib_res_put(&res);
261 if (no_addr)
262 goto last_resort;
263 if (rpf)
264 goto e_inval;
265 fl.oif = dev->ifindex;
266
267 ret = 0;
268 if (fib_lookup(&fl, &res) == 0) {
269 if (res.type == RTN_UNICAST) {
270 *spec_dst = FIB_RES_PREFSRC(res);
271 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
272 }
273 fib_res_put(&res);
274 }
275 return ret;
276
277last_resort:
278 if (rpf)
279 goto e_inval;
280 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
281 *itag = 0;
282 return 0;
283
284e_inval_res:
285 fib_res_put(&res);
286e_inval:
287 return -EINVAL;
288}
289
81f7bf6c 290static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
291{
292 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
293}
294
295static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
296{
297 struct nlattr *nla;
298
299 nla = (struct nlattr *) ((char *) mx + len);
300 nla->nla_type = type;
301 nla->nla_len = nla_attr_size(4);
302 *(u32 *) nla_data(nla) = value;
303
304 return len + nla_total_size(4);
305}
306
4b5d47d4 307static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
308 struct fib_config *cfg)
309{
6d85c10a 310 __be32 addr;
4e902c57
TG
311 int plen;
312
313 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 314 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
315
316 if (rt->rt_dst.sa_family != AF_INET)
317 return -EAFNOSUPPORT;
318
319 /*
320 * Check mask for validity:
321 * a) it must be contiguous.
322 * b) destination must have all host bits clear.
323 * c) if application forgot to set correct family (AF_INET),
324 * reject request unless it is absolutely clear i.e.
325 * both family and mask are zero.
326 */
327 plen = 32;
328 addr = sk_extract_addr(&rt->rt_dst);
329 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 330 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
331
332 if (rt->rt_genmask.sa_family != AF_INET) {
333 if (mask || rt->rt_genmask.sa_family)
334 return -EAFNOSUPPORT;
335 }
336
337 if (bad_mask(mask, addr))
338 return -EINVAL;
339
340 plen = inet_mask_len(mask);
341 }
342
343 cfg->fc_dst_len = plen;
344 cfg->fc_dst = addr;
345
346 if (cmd != SIOCDELRT) {
347 cfg->fc_nlflags = NLM_F_CREATE;
348 cfg->fc_protocol = RTPROT_BOOT;
349 }
350
351 if (rt->rt_metric)
352 cfg->fc_priority = rt->rt_metric - 1;
353
354 if (rt->rt_flags & RTF_REJECT) {
355 cfg->fc_scope = RT_SCOPE_HOST;
356 cfg->fc_type = RTN_UNREACHABLE;
357 return 0;
358 }
359
360 cfg->fc_scope = RT_SCOPE_NOWHERE;
361 cfg->fc_type = RTN_UNICAST;
362
363 if (rt->rt_dev) {
364 char *colon;
365 struct net_device *dev;
366 char devname[IFNAMSIZ];
367
368 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
369 return -EFAULT;
370
371 devname[IFNAMSIZ-1] = 0;
372 colon = strchr(devname, ':');
373 if (colon)
374 *colon = 0;
4b5d47d4 375 dev = __dev_get_by_name(net, devname);
4e902c57
TG
376 if (!dev)
377 return -ENODEV;
378 cfg->fc_oif = dev->ifindex;
379 if (colon) {
380 struct in_ifaddr *ifa;
381 struct in_device *in_dev = __in_dev_get_rtnl(dev);
382 if (!in_dev)
383 return -ENODEV;
384 *colon = ':';
385 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
386 if (strcmp(ifa->ifa_label, devname) == 0)
387 break;
388 if (ifa == NULL)
389 return -ENODEV;
390 cfg->fc_prefsrc = ifa->ifa_local;
391 }
392 }
393
394 addr = sk_extract_addr(&rt->rt_gateway);
395 if (rt->rt_gateway.sa_family == AF_INET && addr) {
396 cfg->fc_gw = addr;
397 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 398 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
399 cfg->fc_scope = RT_SCOPE_UNIVERSE;
400 }
401
402 if (cmd == SIOCDELRT)
403 return 0;
404
405 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
406 return -EINVAL;
407
408 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
409 cfg->fc_scope = RT_SCOPE_LINK;
410
411 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
412 struct nlattr *mx;
413 int len = 0;
414
415 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 416 if (mx == NULL)
4e902c57
TG
417 return -ENOMEM;
418
419 if (rt->rt_flags & RTF_MTU)
420 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
421
422 if (rt->rt_flags & RTF_WINDOW)
423 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
424
425 if (rt->rt_flags & RTF_IRTT)
426 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
427
428 cfg->fc_mx = mx;
429 cfg->fc_mx_len = len;
430 }
431
432 return 0;
433}
434
1da177e4
LT
435/*
436 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
437 */
e905a9ed 438
1bad118a 439int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 440{
4e902c57
TG
441 struct fib_config cfg;
442 struct rtentry rt;
1da177e4 443 int err;
1da177e4
LT
444
445 switch (cmd) {
446 case SIOCADDRT: /* Add a route */
447 case SIOCDELRT: /* Delete a route */
448 if (!capable(CAP_NET_ADMIN))
449 return -EPERM;
4e902c57
TG
450
451 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 452 return -EFAULT;
4e902c57 453
1da177e4 454 rtnl_lock();
1bad118a 455 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 456 if (err == 0) {
4e902c57
TG
457 struct fib_table *tb;
458
1da177e4 459 if (cmd == SIOCDELRT) {
1bad118a 460 tb = fib_get_table(net, cfg.fc_table);
1da177e4 461 if (tb)
4e902c57
TG
462 err = tb->tb_delete(tb, &cfg);
463 else
464 err = -ESRCH;
1da177e4 465 } else {
1bad118a 466 tb = fib_new_table(net, cfg.fc_table);
1da177e4 467 if (tb)
4e902c57
TG
468 err = tb->tb_insert(tb, &cfg);
469 else
470 err = -ENOBUFS;
1da177e4 471 }
4e902c57
TG
472
473 /* allocated by rtentry_to_fib_config() */
474 kfree(cfg.fc_mx);
1da177e4
LT
475 }
476 rtnl_unlock();
477 return err;
478 }
479 return -EINVAL;
480}
481
ef7c79ed 482const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
483 [RTA_DST] = { .type = NLA_U32 },
484 [RTA_SRC] = { .type = NLA_U32 },
485 [RTA_IIF] = { .type = NLA_U32 },
486 [RTA_OIF] = { .type = NLA_U32 },
487 [RTA_GATEWAY] = { .type = NLA_U32 },
488 [RTA_PRIORITY] = { .type = NLA_U32 },
489 [RTA_PREFSRC] = { .type = NLA_U32 },
490 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 491 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57
TG
492 [RTA_PROTOINFO] = { .type = NLA_U32 },
493 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
494};
495
4b5d47d4
DL
496static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
497 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 498{
4e902c57
TG
499 struct nlattr *attr;
500 int err, remaining;
501 struct rtmsg *rtm;
502
503 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
504 if (err < 0)
505 goto errout;
506
507 memset(cfg, 0, sizeof(*cfg));
508
509 rtm = nlmsg_data(nlh);
4e902c57 510 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
511 cfg->fc_tos = rtm->rtm_tos;
512 cfg->fc_table = rtm->rtm_table;
513 cfg->fc_protocol = rtm->rtm_protocol;
514 cfg->fc_scope = rtm->rtm_scope;
515 cfg->fc_type = rtm->rtm_type;
516 cfg->fc_flags = rtm->rtm_flags;
517 cfg->fc_nlflags = nlh->nlmsg_flags;
518
519 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
520 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 521 cfg->fc_nlinfo.nl_net = net;
4e902c57 522
a0ee18b9
TG
523 if (cfg->fc_type > RTN_MAX) {
524 err = -EINVAL;
525 goto errout;
526 }
527
4e902c57 528 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 529 switch (nla_type(attr)) {
4e902c57 530 case RTA_DST:
17fb2c64 531 cfg->fc_dst = nla_get_be32(attr);
4e902c57 532 break;
4e902c57
TG
533 case RTA_OIF:
534 cfg->fc_oif = nla_get_u32(attr);
535 break;
536 case RTA_GATEWAY:
17fb2c64 537 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
538 break;
539 case RTA_PRIORITY:
540 cfg->fc_priority = nla_get_u32(attr);
541 break;
542 case RTA_PREFSRC:
17fb2c64 543 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
544 break;
545 case RTA_METRICS:
546 cfg->fc_mx = nla_data(attr);
547 cfg->fc_mx_len = nla_len(attr);
548 break;
549 case RTA_MULTIPATH:
550 cfg->fc_mp = nla_data(attr);
551 cfg->fc_mp_len = nla_len(attr);
552 break;
553 case RTA_FLOW:
554 cfg->fc_flow = nla_get_u32(attr);
555 break;
4e902c57
TG
556 case RTA_TABLE:
557 cfg->fc_table = nla_get_u32(attr);
558 break;
1da177e4
LT
559 }
560 }
4e902c57 561
1da177e4 562 return 0;
4e902c57
TG
563errout:
564 return err;
1da177e4
LT
565}
566
63f3444f 567static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 568{
b854272b 569 struct net *net = skb->sk->sk_net;
4e902c57
TG
570 struct fib_config cfg;
571 struct fib_table *tb;
572 int err;
1da177e4 573
4b5d47d4 574 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
575 if (err < 0)
576 goto errout;
1da177e4 577
8ad4942c 578 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
579 if (tb == NULL) {
580 err = -ESRCH;
581 goto errout;
582 }
583
584 err = tb->tb_delete(tb, &cfg);
585errout:
586 return err;
1da177e4
LT
587}
588
63f3444f 589static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 590{
b854272b 591 struct net *net = skb->sk->sk_net;
4e902c57
TG
592 struct fib_config cfg;
593 struct fib_table *tb;
594 int err;
1da177e4 595
4b5d47d4 596 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
597 if (err < 0)
598 goto errout;
1da177e4 599
226b0b4a 600 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
601 if (tb == NULL) {
602 err = -ENOBUFS;
603 goto errout;
604 }
605
606 err = tb->tb_insert(tb, &cfg);
607errout:
608 return err;
1da177e4
LT
609}
610
63f3444f 611static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 612{
b854272b 613 struct net *net = skb->sk->sk_net;
1af5a8c4
PM
614 unsigned int h, s_h;
615 unsigned int e = 0, s_e;
1da177e4 616 struct fib_table *tb;
1af5a8c4 617 struct hlist_node *node;
e4aef8ae 618 struct hlist_head *head;
1af5a8c4 619 int dumped = 0;
1da177e4 620
be403ea1
TG
621 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
622 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
623 return ip_rt_dump(skb, cb);
624
1af5a8c4
PM
625 s_h = cb->args[0];
626 s_e = cb->args[1];
627
628 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
629 e = 0;
e4aef8ae
DL
630 head = &net->ipv4.fib_table_hash[h];
631 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
632 if (e < s_e)
633 goto next;
634 if (dumped)
635 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 636 2 * sizeof(cb->args[0]));
1af5a8c4
PM
637 if (tb->tb_dump(tb, skb, cb) < 0)
638 goto out;
639 dumped = 1;
640next:
641 e++;
642 }
1da177e4 643 }
1af5a8c4
PM
644out:
645 cb->args[1] = e;
646 cb->args[0] = h;
1da177e4
LT
647
648 return skb->len;
649}
650
651/* Prepare and feed intra-kernel routing request.
652 Really, it should be netlink message, but :-( netlink
653 can be not configured, so that we feed it directly
654 to fib engine. It is legal, because all events occur
655 only when netlink is already locked.
656 */
657
81f7bf6c 658static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 659{
4b5d47d4 660 struct net *net = ifa->ifa_dev->dev->nd_net;
4e902c57
TG
661 struct fib_table *tb;
662 struct fib_config cfg = {
663 .fc_protocol = RTPROT_KERNEL,
664 .fc_type = type,
665 .fc_dst = dst,
666 .fc_dst_len = dst_len,
667 .fc_prefsrc = ifa->ifa_local,
668 .fc_oif = ifa->ifa_dev->dev->ifindex,
669 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 670 .fc_nlinfo = {
4b5d47d4 671 .nl_net = net,
4d1169c1 672 },
4e902c57 673 };
1da177e4
LT
674
675 if (type == RTN_UNICAST)
4b5d47d4 676 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 677 else
4b5d47d4 678 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
679
680 if (tb == NULL)
681 return;
682
4e902c57 683 cfg.fc_table = tb->tb_id;
1da177e4 684
4e902c57
TG
685 if (type != RTN_LOCAL)
686 cfg.fc_scope = RT_SCOPE_LINK;
687 else
688 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
689
690 if (cmd == RTM_NEWROUTE)
4e902c57 691 tb->tb_insert(tb, &cfg);
1da177e4 692 else
4e902c57 693 tb->tb_delete(tb, &cfg);
1da177e4
LT
694}
695
0ff60a45 696void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
697{
698 struct in_device *in_dev = ifa->ifa_dev;
699 struct net_device *dev = in_dev->dev;
700 struct in_ifaddr *prim = ifa;
a144ea4b
AV
701 __be32 mask = ifa->ifa_mask;
702 __be32 addr = ifa->ifa_local;
703 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
704
705 if (ifa->ifa_flags&IFA_F_SECONDARY) {
706 prim = inet_ifa_byprefix(in_dev, prefix, mask);
707 if (prim == NULL) {
a6db9010 708 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
709 return;
710 }
711 }
712
713 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
714
715 if (!(dev->flags&IFF_UP))
716 return;
717
718 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 719 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
720 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
721
f97c1e0c 722 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
723 (prefix != addr || ifa->ifa_prefixlen < 32)) {
724 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
725 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
726
727 /* Add network specific broadcasts, when it takes a sense */
728 if (ifa->ifa_prefixlen < 31) {
729 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
730 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
731 }
732 }
733}
734
735static void fib_del_ifaddr(struct in_ifaddr *ifa)
736{
737 struct in_device *in_dev = ifa->ifa_dev;
738 struct net_device *dev = in_dev->dev;
739 struct in_ifaddr *ifa1;
740 struct in_ifaddr *prim = ifa;
a144ea4b
AV
741 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
742 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
743#define LOCAL_OK 1
744#define BRD_OK 2
745#define BRD0_OK 4
746#define BRD1_OK 8
747 unsigned ok = 0;
748
749 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
750 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
751 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
752 else {
753 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
754 if (prim == NULL) {
a6db9010 755 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
756 return;
757 }
758 }
759
760 /* Deletion is more complicated than add.
761 We should take care of not to delete too much :-)
762
763 Scan address list to be sure that addresses are really gone.
764 */
765
766 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
767 if (ifa->ifa_local == ifa1->ifa_local)
768 ok |= LOCAL_OK;
769 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
770 ok |= BRD_OK;
771 if (brd == ifa1->ifa_broadcast)
772 ok |= BRD1_OK;
773 if (any == ifa1->ifa_broadcast)
774 ok |= BRD0_OK;
775 }
776
777 if (!(ok&BRD_OK))
778 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
779 if (!(ok&BRD1_OK))
780 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
781 if (!(ok&BRD0_OK))
782 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
783 if (!(ok&LOCAL_OK)) {
784 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
785
786 /* Check, that this local address finally disappeared. */
226b0b4a 787 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
788 /* And the last, but not the least thing.
789 We must flush stray FIB entries.
790
791 First of all, we scan fib_info list searching
792 for stray nexthop entries, then ignite fib_flush.
793 */
794 if (fib_sync_down(ifa->ifa_local, NULL, 0))
226b0b4a 795 fib_flush(dev->nd_net);
1da177e4
LT
796 }
797 }
798#undef LOCAL_OK
799#undef BRD_OK
800#undef BRD0_OK
801#undef BRD1_OK
802}
803
246955fe
RO
804static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
805{
e905a9ed 806
246955fe 807 struct fib_result res;
5f300893 808 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 809 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
810 .tos = frn->fl_tos,
811 .scope = frn->fl_scope } } };
1194ed0a 812
912a41a4
SV
813#ifdef CONFIG_IP_MULTIPLE_TABLES
814 res.r = NULL;
815#endif
816
1194ed0a 817 frn->err = -ENOENT;
246955fe
RO
818 if (tb) {
819 local_bh_disable();
820
821 frn->tb_id = tb->tb_id;
822 frn->err = tb->tb_lookup(tb, &fl, &res);
823
824 if (!frn->err) {
825 frn->prefixlen = res.prefixlen;
826 frn->nh_sel = res.nh_sel;
827 frn->type = res.type;
828 frn->scope = res.scope;
1194ed0a 829 fib_res_put(&res);
246955fe
RO
830 }
831 local_bh_enable();
832 }
833}
834
28f7b036 835static void nl_fib_input(struct sk_buff *skb)
246955fe 836{
6bd48fcf 837 struct net *net;
246955fe 838 struct fib_result_nl *frn;
28f7b036 839 struct nlmsghdr *nlh;
246955fe 840 struct fib_table *tb;
28f7b036 841 u32 pid;
1194ed0a 842
6bd48fcf 843 net = skb->sk->sk_net;
b529ccf2 844 nlh = nlmsg_hdr(skb);
ea86575e 845 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 846 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 847 return;
d883a036
DL
848
849 skb = skb_clone(skb, GFP_KERNEL);
850 if (skb == NULL)
851 return;
852 nlh = nlmsg_hdr(skb);
e905a9ed 853
246955fe 854 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 855 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
856
857 nl_fib_lookup(frn, tb);
e905a9ed 858
1194ed0a 859 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 860 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 861 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 862 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 863}
246955fe 864
7b1a74fd 865static int nl_fib_lookup_init(struct net *net)
246955fe 866{
6bd48fcf
DL
867 struct sock *sk;
868 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
869 nl_fib_input, NULL, THIS_MODULE);
870 if (sk == NULL)
7b1a74fd 871 return -EAFNOSUPPORT;
6bd48fcf
DL
872 /* Don't hold an extra reference on the namespace */
873 put_net(sk->sk_net);
874 net->ipv4.fibnl = sk;
7b1a74fd
DL
875 return 0;
876}
877
878static void nl_fib_lookup_exit(struct net *net)
879{
6bd48fcf
DL
880 /* At the last minute lie and say this is a socket for the
881 * initial network namespace. So the socket will be safe to free.
882 */
883 net->ipv4.fibnl->sk_net = get_net(&init_net);
4f84d82f 884 sock_release(net->ipv4.fibnl->sk_socket);
246955fe
RO
885}
886
1da177e4
LT
887static void fib_disable_ip(struct net_device *dev, int force)
888{
889 if (fib_sync_down(0, dev, force))
226b0b4a 890 fib_flush(dev->nd_net);
1da177e4
LT
891 rt_cache_flush(0);
892 arp_ifdown(dev);
893}
894
895static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
896{
897 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
898
899 switch (event) {
900 case NETDEV_UP:
901 fib_add_ifaddr(ifa);
902#ifdef CONFIG_IP_ROUTE_MULTIPATH
903 fib_sync_up(ifa->ifa_dev->dev);
904#endif
905 rt_cache_flush(-1);
906 break;
907 case NETDEV_DOWN:
908 fib_del_ifaddr(ifa);
9fcc2e8a 909 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
910 /* Last address was deleted from this interface.
911 Disable IP.
912 */
913 fib_disable_ip(ifa->ifa_dev->dev, 1);
914 } else {
915 rt_cache_flush(-1);
916 }
917 break;
918 }
919 return NOTIFY_DONE;
920}
921
922static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
923{
924 struct net_device *dev = ptr;
e5ed6399 925 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
926
927 if (event == NETDEV_UNREGISTER) {
928 fib_disable_ip(dev, 2);
929 return NOTIFY_DONE;
930 }
931
932 if (!in_dev)
933 return NOTIFY_DONE;
934
935 switch (event) {
936 case NETDEV_UP:
937 for_ifa(in_dev) {
938 fib_add_ifaddr(ifa);
939 } endfor_ifa(in_dev);
940#ifdef CONFIG_IP_ROUTE_MULTIPATH
941 fib_sync_up(dev);
942#endif
943 rt_cache_flush(-1);
944 break;
945 case NETDEV_DOWN:
946 fib_disable_ip(dev, 0);
947 break;
948 case NETDEV_CHANGEMTU:
949 case NETDEV_CHANGE:
950 rt_cache_flush(0);
951 break;
952 }
953 return NOTIFY_DONE;
954}
955
956static struct notifier_block fib_inetaddr_notifier = {
957 .notifier_call =fib_inetaddr_event,
958};
959
960static struct notifier_block fib_netdev_notifier = {
961 .notifier_call =fib_netdev_event,
962};
963
7b1a74fd 964static int __net_init ip_fib_net_init(struct net *net)
1da177e4 965{
1af5a8c4
PM
966 unsigned int i;
967
e4aef8ae
DL
968 net->ipv4.fib_table_hash = kzalloc(
969 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
970 if (net->ipv4.fib_table_hash == NULL)
971 return -ENOMEM;
972
1af5a8c4 973 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 974 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 975
7b1a74fd
DL
976 return fib4_rules_init(net);
977}
1da177e4 978
7b1a74fd
DL
979static void __net_exit ip_fib_net_exit(struct net *net)
980{
981 unsigned int i;
982
983#ifdef CONFIG_IP_MULTIPLE_TABLES
984 fib4_rules_exit(net);
985#endif
986
987 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
988 struct fib_table *tb;
989 struct hlist_head *head;
990 struct hlist_node *node, *tmp;
63f3444f 991
e4aef8ae 992 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
993 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
994 hlist_del(node);
995 tb->tb_flush(tb);
996 kfree(tb);
997 }
998 }
e4aef8ae 999 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1000}
1001
1002static int __net_init fib_net_init(struct net *net)
1003{
1004 int error;
1005
7b1a74fd
DL
1006 error = ip_fib_net_init(net);
1007 if (error < 0)
1008 goto out;
1009 error = nl_fib_lookup_init(net);
1010 if (error < 0)
1011 goto out_nlfl;
1012 error = fib_proc_init(net);
1013 if (error < 0)
1014 goto out_proc;
1015out:
1016 return error;
1017
1018out_proc:
1019 nl_fib_lookup_exit(net);
1020out_nlfl:
1021 ip_fib_net_exit(net);
1022 goto out;
1023}
1024
1025static void __net_exit fib_net_exit(struct net *net)
1026{
1027 fib_proc_exit(net);
1028 nl_fib_lookup_exit(net);
1029 ip_fib_net_exit(net);
1030}
1031
1032static struct pernet_operations fib_net_ops = {
1033 .init = fib_net_init,
1034 .exit = fib_net_exit,
1035};
1036
1037void __init ip_fib_init(void)
1038{
63f3444f
TG
1039 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1040 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1041 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1042
1043 register_pernet_subsys(&fib_net_ops);
1044 register_netdevice_notifier(&fib_netdev_notifier);
1045 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1046
1047 fib_hash_init();
1da177e4
LT
1048}
1049
1050EXPORT_SYMBOL(inet_addr_type);
05538116 1051EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1052EXPORT_SYMBOL(ip_dev_find);