]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[NETNS]: Add namespace to API for routing /proc entries creation.
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
4fc268d2 22#include <linux/capability.h>
1da177e4
LT
23#include <linux/types.h>
24#include <linux/kernel.h>
1da177e4
LT
25#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
14c85021 32#include <linux/inetdevice.h>
1da177e4 33#include <linux/netdevice.h>
1823730f 34#include <linux/if_addr.h>
1da177e4
LT
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
1da177e4 37#include <linux/init.h>
1af5a8c4 38#include <linux/list.h>
1da177e4
LT
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
63f3444f 48#include <net/rtnetlink.h>
1da177e4
LT
49
50#define FFprint(a...) printk(KERN_DEBUG a)
51
28f7b036
DM
52static struct sock *fibnl;
53
1da177e4
LT
54#ifndef CONFIG_IP_MULTIPLE_TABLES
55
1da177e4
LT
56struct fib_table *ip_fib_local_table;
57struct fib_table *ip_fib_main_table;
58
1af5a8c4
PM
59#define FIB_TABLE_HASHSZ 1
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
1da177e4 61
c3e9a353
PE
62static void __init fib4_rules_init(void)
63{
64 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
65 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
66 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
67 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
68}
1af5a8c4 69#else
1da177e4 70
1af5a8c4
PM
71#define FIB_TABLE_HASHSZ 256
72static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
1da177e4 73
1af5a8c4 74struct fib_table *fib_new_table(u32 id)
1da177e4
LT
75{
76 struct fib_table *tb;
1af5a8c4 77 unsigned int h;
1da177e4 78
1af5a8c4
PM
79 if (id == 0)
80 id = RT_TABLE_MAIN;
81 tb = fib_get_table(id);
82 if (tb)
83 return tb;
1da177e4
LT
84 tb = fib_hash_init(id);
85 if (!tb)
86 return NULL;
1af5a8c4
PM
87 h = id & (FIB_TABLE_HASHSZ - 1);
88 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
1da177e4
LT
89 return tb;
90}
91
1af5a8c4
PM
92struct fib_table *fib_get_table(u32 id)
93{
94 struct fib_table *tb;
95 struct hlist_node *node;
96 unsigned int h;
1da177e4 97
1af5a8c4
PM
98 if (id == 0)
99 id = RT_TABLE_MAIN;
100 h = id & (FIB_TABLE_HASHSZ - 1);
101 rcu_read_lock();
102 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
103 if (tb->tb_id == id) {
104 rcu_read_unlock();
105 return tb;
106 }
107 }
108 rcu_read_unlock();
109 return NULL;
110}
1da177e4
LT
111#endif /* CONFIG_IP_MULTIPLE_TABLES */
112
1da177e4
LT
113static void fib_flush(void)
114{
115 int flushed = 0;
1da177e4 116 struct fib_table *tb;
1af5a8c4
PM
117 struct hlist_node *node;
118 unsigned int h;
1da177e4 119
1af5a8c4
PM
120 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
121 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
122 flushed += tb->tb_flush(tb);
1da177e4 123 }
1da177e4
LT
124
125 if (flushed)
126 rt_cache_flush(-1);
127}
128
129/*
130 * Find the first device with a given source address.
131 */
132
60cad5da 133struct net_device * ip_dev_find(__be32 addr)
1da177e4
LT
134{
135 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
136 struct fib_result res;
137 struct net_device *dev = NULL;
03cf786c 138 struct fib_table *local_table;
1da177e4
LT
139
140#ifdef CONFIG_IP_MULTIPLE_TABLES
141 res.r = NULL;
142#endif
143
03cf786c
PE
144 local_table = fib_get_table(RT_TABLE_LOCAL);
145 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
146 return NULL;
147 if (res.type != RTN_LOCAL)
148 goto out;
149 dev = FIB_RES_DEV(res);
150
151 if (dev)
152 dev_hold(dev);
153out:
154 fib_res_put(&res);
155 return dev;
156}
157
05538116
LAT
158/*
159 * Find address type as if only "dev" was present in the system. If
160 * on_dev is NULL then all interfaces are taken into consideration.
161 */
162static inline unsigned __inet_dev_addr_type(const struct net_device *dev,
163 __be32 addr)
1da177e4
LT
164{
165 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
166 struct fib_result res;
167 unsigned ret = RTN_BROADCAST;
03cf786c 168 struct fib_table *local_table;
1da177e4 169
f97c1e0c 170 if (ipv4_is_zeronet(addr) || ipv4_is_badclass(addr))
1da177e4 171 return RTN_BROADCAST;
f97c1e0c 172 if (ipv4_is_multicast(addr))
1da177e4
LT
173 return RTN_MULTICAST;
174
175#ifdef CONFIG_IP_MULTIPLE_TABLES
176 res.r = NULL;
177#endif
e905a9ed 178
03cf786c
PE
179 local_table = fib_get_table(RT_TABLE_LOCAL);
180 if (local_table) {
1da177e4 181 ret = RTN_UNICAST;
03cf786c 182 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
183 if (!dev || dev == res.fi->fib_dev)
184 ret = res.type;
1da177e4
LT
185 fib_res_put(&res);
186 }
187 }
188 return ret;
189}
190
05538116
LAT
191unsigned int inet_addr_type(__be32 addr)
192{
193 return __inet_dev_addr_type(NULL, addr);
194}
195
196unsigned int inet_dev_addr_type(const struct net_device *dev, __be32 addr)
197{
198 return __inet_dev_addr_type(dev, addr);
199}
200
1da177e4
LT
201/* Given (packet source, input interface) and optional (dst, oif, tos):
202 - (main) check, that source is valid i.e. not broadcast or our local
203 address.
204 - figure out what "logical" interface this packet arrived
205 and calculate "specific destination" address.
206 - check, that packet arrived from expected physical interface.
207 */
208
d9c9df8c
AV
209int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
210 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
211{
212 struct in_device *in_dev;
213 struct flowi fl = { .nl_u = { .ip4_u =
214 { .daddr = src,
215 .saddr = dst,
216 .tos = tos } },
217 .iif = oif };
218 struct fib_result res;
219 int no_addr, rpf;
220 int ret;
221
222 no_addr = rpf = 0;
223 rcu_read_lock();
e5ed6399 224 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
225 if (in_dev) {
226 no_addr = in_dev->ifa_list == NULL;
227 rpf = IN_DEV_RPFILTER(in_dev);
228 }
229 rcu_read_unlock();
230
231 if (in_dev == NULL)
232 goto e_inval;
233
234 if (fib_lookup(&fl, &res))
235 goto last_resort;
236 if (res.type != RTN_UNICAST)
237 goto e_inval_res;
238 *spec_dst = FIB_RES_PREFSRC(res);
239 fib_combine_itag(itag, &res);
240#ifdef CONFIG_IP_ROUTE_MULTIPATH
241 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
242#else
243 if (FIB_RES_DEV(res) == dev)
244#endif
245 {
246 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
247 fib_res_put(&res);
248 return ret;
249 }
250 fib_res_put(&res);
251 if (no_addr)
252 goto last_resort;
253 if (rpf)
254 goto e_inval;
255 fl.oif = dev->ifindex;
256
257 ret = 0;
258 if (fib_lookup(&fl, &res) == 0) {
259 if (res.type == RTN_UNICAST) {
260 *spec_dst = FIB_RES_PREFSRC(res);
261 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
262 }
263 fib_res_put(&res);
264 }
265 return ret;
266
267last_resort:
268 if (rpf)
269 goto e_inval;
270 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
271 *itag = 0;
272 return 0;
273
274e_inval_res:
275 fib_res_put(&res);
276e_inval:
277 return -EINVAL;
278}
279
81f7bf6c 280static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
281{
282 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
283}
284
285static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
286{
287 struct nlattr *nla;
288
289 nla = (struct nlattr *) ((char *) mx + len);
290 nla->nla_type = type;
291 nla->nla_len = nla_attr_size(4);
292 *(u32 *) nla_data(nla) = value;
293
294 return len + nla_total_size(4);
295}
296
297static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
298 struct fib_config *cfg)
299{
6d85c10a 300 __be32 addr;
4e902c57
TG
301 int plen;
302
303 memset(cfg, 0, sizeof(*cfg));
304
305 if (rt->rt_dst.sa_family != AF_INET)
306 return -EAFNOSUPPORT;
307
308 /*
309 * Check mask for validity:
310 * a) it must be contiguous.
311 * b) destination must have all host bits clear.
312 * c) if application forgot to set correct family (AF_INET),
313 * reject request unless it is absolutely clear i.e.
314 * both family and mask are zero.
315 */
316 plen = 32;
317 addr = sk_extract_addr(&rt->rt_dst);
318 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 319 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
320
321 if (rt->rt_genmask.sa_family != AF_INET) {
322 if (mask || rt->rt_genmask.sa_family)
323 return -EAFNOSUPPORT;
324 }
325
326 if (bad_mask(mask, addr))
327 return -EINVAL;
328
329 plen = inet_mask_len(mask);
330 }
331
332 cfg->fc_dst_len = plen;
333 cfg->fc_dst = addr;
334
335 if (cmd != SIOCDELRT) {
336 cfg->fc_nlflags = NLM_F_CREATE;
337 cfg->fc_protocol = RTPROT_BOOT;
338 }
339
340 if (rt->rt_metric)
341 cfg->fc_priority = rt->rt_metric - 1;
342
343 if (rt->rt_flags & RTF_REJECT) {
344 cfg->fc_scope = RT_SCOPE_HOST;
345 cfg->fc_type = RTN_UNREACHABLE;
346 return 0;
347 }
348
349 cfg->fc_scope = RT_SCOPE_NOWHERE;
350 cfg->fc_type = RTN_UNICAST;
351
352 if (rt->rt_dev) {
353 char *colon;
354 struct net_device *dev;
355 char devname[IFNAMSIZ];
356
357 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
358 return -EFAULT;
359
360 devname[IFNAMSIZ-1] = 0;
361 colon = strchr(devname, ':');
362 if (colon)
363 *colon = 0;
881d966b 364 dev = __dev_get_by_name(&init_net, devname);
4e902c57
TG
365 if (!dev)
366 return -ENODEV;
367 cfg->fc_oif = dev->ifindex;
368 if (colon) {
369 struct in_ifaddr *ifa;
370 struct in_device *in_dev = __in_dev_get_rtnl(dev);
371 if (!in_dev)
372 return -ENODEV;
373 *colon = ':';
374 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
375 if (strcmp(ifa->ifa_label, devname) == 0)
376 break;
377 if (ifa == NULL)
378 return -ENODEV;
379 cfg->fc_prefsrc = ifa->ifa_local;
380 }
381 }
382
383 addr = sk_extract_addr(&rt->rt_gateway);
384 if (rt->rt_gateway.sa_family == AF_INET && addr) {
385 cfg->fc_gw = addr;
386 if (rt->rt_flags & RTF_GATEWAY &&
387 inet_addr_type(addr) == RTN_UNICAST)
388 cfg->fc_scope = RT_SCOPE_UNIVERSE;
389 }
390
391 if (cmd == SIOCDELRT)
392 return 0;
393
394 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
395 return -EINVAL;
396
397 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
398 cfg->fc_scope = RT_SCOPE_LINK;
399
400 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
401 struct nlattr *mx;
402 int len = 0;
403
404 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 405 if (mx == NULL)
4e902c57
TG
406 return -ENOMEM;
407
408 if (rt->rt_flags & RTF_MTU)
409 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
410
411 if (rt->rt_flags & RTF_WINDOW)
412 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
413
414 if (rt->rt_flags & RTF_IRTT)
415 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
416
417 cfg->fc_mx = mx;
418 cfg->fc_mx_len = len;
419 }
420
421 return 0;
422}
423
1da177e4
LT
424/*
425 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
426 */
e905a9ed 427
1da177e4
LT
428int ip_rt_ioctl(unsigned int cmd, void __user *arg)
429{
4e902c57
TG
430 struct fib_config cfg;
431 struct rtentry rt;
1da177e4 432 int err;
1da177e4
LT
433
434 switch (cmd) {
435 case SIOCADDRT: /* Add a route */
436 case SIOCDELRT: /* Delete a route */
437 if (!capable(CAP_NET_ADMIN))
438 return -EPERM;
4e902c57
TG
439
440 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 441 return -EFAULT;
4e902c57 442
1da177e4 443 rtnl_lock();
4e902c57 444 err = rtentry_to_fib_config(cmd, &rt, &cfg);
1da177e4 445 if (err == 0) {
4e902c57
TG
446 struct fib_table *tb;
447
1da177e4 448 if (cmd == SIOCDELRT) {
4e902c57 449 tb = fib_get_table(cfg.fc_table);
1da177e4 450 if (tb)
4e902c57
TG
451 err = tb->tb_delete(tb, &cfg);
452 else
453 err = -ESRCH;
1da177e4 454 } else {
4e902c57 455 tb = fib_new_table(cfg.fc_table);
1da177e4 456 if (tb)
4e902c57
TG
457 err = tb->tb_insert(tb, &cfg);
458 else
459 err = -ENOBUFS;
1da177e4 460 }
4e902c57
TG
461
462 /* allocated by rtentry_to_fib_config() */
463 kfree(cfg.fc_mx);
1da177e4
LT
464 }
465 rtnl_unlock();
466 return err;
467 }
468 return -EINVAL;
469}
470
ef7c79ed 471const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
472 [RTA_DST] = { .type = NLA_U32 },
473 [RTA_SRC] = { .type = NLA_U32 },
474 [RTA_IIF] = { .type = NLA_U32 },
475 [RTA_OIF] = { .type = NLA_U32 },
476 [RTA_GATEWAY] = { .type = NLA_U32 },
477 [RTA_PRIORITY] = { .type = NLA_U32 },
478 [RTA_PREFSRC] = { .type = NLA_U32 },
479 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 480 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57
TG
481 [RTA_PROTOINFO] = { .type = NLA_U32 },
482 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
483};
484
485static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
486 struct fib_config *cfg)
1da177e4 487{
4e902c57
TG
488 struct nlattr *attr;
489 int err, remaining;
490 struct rtmsg *rtm;
491
492 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
493 if (err < 0)
494 goto errout;
495
496 memset(cfg, 0, sizeof(*cfg));
497
498 rtm = nlmsg_data(nlh);
4e902c57 499 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
500 cfg->fc_tos = rtm->rtm_tos;
501 cfg->fc_table = rtm->rtm_table;
502 cfg->fc_protocol = rtm->rtm_protocol;
503 cfg->fc_scope = rtm->rtm_scope;
504 cfg->fc_type = rtm->rtm_type;
505 cfg->fc_flags = rtm->rtm_flags;
506 cfg->fc_nlflags = nlh->nlmsg_flags;
507
508 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
509 cfg->fc_nlinfo.nlh = nlh;
510
a0ee18b9
TG
511 if (cfg->fc_type > RTN_MAX) {
512 err = -EINVAL;
513 goto errout;
514 }
515
4e902c57 516 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 517 switch (nla_type(attr)) {
4e902c57 518 case RTA_DST:
17fb2c64 519 cfg->fc_dst = nla_get_be32(attr);
4e902c57 520 break;
4e902c57
TG
521 case RTA_OIF:
522 cfg->fc_oif = nla_get_u32(attr);
523 break;
524 case RTA_GATEWAY:
17fb2c64 525 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
526 break;
527 case RTA_PRIORITY:
528 cfg->fc_priority = nla_get_u32(attr);
529 break;
530 case RTA_PREFSRC:
17fb2c64 531 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
532 break;
533 case RTA_METRICS:
534 cfg->fc_mx = nla_data(attr);
535 cfg->fc_mx_len = nla_len(attr);
536 break;
537 case RTA_MULTIPATH:
538 cfg->fc_mp = nla_data(attr);
539 cfg->fc_mp_len = nla_len(attr);
540 break;
541 case RTA_FLOW:
542 cfg->fc_flow = nla_get_u32(attr);
543 break;
4e902c57
TG
544 case RTA_TABLE:
545 cfg->fc_table = nla_get_u32(attr);
546 break;
1da177e4
LT
547 }
548 }
4e902c57 549
1da177e4 550 return 0;
4e902c57
TG
551errout:
552 return err;
1da177e4
LT
553}
554
63f3444f 555static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 556{
b854272b 557 struct net *net = skb->sk->sk_net;
4e902c57
TG
558 struct fib_config cfg;
559 struct fib_table *tb;
560 int err;
1da177e4 561
b854272b
DL
562 if (net != &init_net)
563 return -EINVAL;
564
4e902c57
TG
565 err = rtm_to_fib_config(skb, nlh, &cfg);
566 if (err < 0)
567 goto errout;
1da177e4 568
4e902c57
TG
569 tb = fib_get_table(cfg.fc_table);
570 if (tb == NULL) {
571 err = -ESRCH;
572 goto errout;
573 }
574
575 err = tb->tb_delete(tb, &cfg);
576errout:
577 return err;
1da177e4
LT
578}
579
63f3444f 580static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 581{
b854272b 582 struct net *net = skb->sk->sk_net;
4e902c57
TG
583 struct fib_config cfg;
584 struct fib_table *tb;
585 int err;
1da177e4 586
b854272b
DL
587 if (net != &init_net)
588 return -EINVAL;
589
4e902c57
TG
590 err = rtm_to_fib_config(skb, nlh, &cfg);
591 if (err < 0)
592 goto errout;
1da177e4 593
4e902c57
TG
594 tb = fib_new_table(cfg.fc_table);
595 if (tb == NULL) {
596 err = -ENOBUFS;
597 goto errout;
598 }
599
600 err = tb->tb_insert(tb, &cfg);
601errout:
602 return err;
1da177e4
LT
603}
604
63f3444f 605static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 606{
b854272b 607 struct net *net = skb->sk->sk_net;
1af5a8c4
PM
608 unsigned int h, s_h;
609 unsigned int e = 0, s_e;
1da177e4 610 struct fib_table *tb;
1af5a8c4
PM
611 struct hlist_node *node;
612 int dumped = 0;
1da177e4 613
b854272b
DL
614 if (net != &init_net)
615 return 0;
616
be403ea1
TG
617 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
618 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
619 return ip_rt_dump(skb, cb);
620
1af5a8c4
PM
621 s_h = cb->args[0];
622 s_e = cb->args[1];
623
624 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
625 e = 0;
626 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
627 if (e < s_e)
628 goto next;
629 if (dumped)
630 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 631 2 * sizeof(cb->args[0]));
1af5a8c4
PM
632 if (tb->tb_dump(tb, skb, cb) < 0)
633 goto out;
634 dumped = 1;
635next:
636 e++;
637 }
1da177e4 638 }
1af5a8c4
PM
639out:
640 cb->args[1] = e;
641 cb->args[0] = h;
1da177e4
LT
642
643 return skb->len;
644}
645
646/* Prepare and feed intra-kernel routing request.
647 Really, it should be netlink message, but :-( netlink
648 can be not configured, so that we feed it directly
649 to fib engine. It is legal, because all events occur
650 only when netlink is already locked.
651 */
652
81f7bf6c 653static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 654{
4e902c57
TG
655 struct fib_table *tb;
656 struct fib_config cfg = {
657 .fc_protocol = RTPROT_KERNEL,
658 .fc_type = type,
659 .fc_dst = dst,
660 .fc_dst_len = dst_len,
661 .fc_prefsrc = ifa->ifa_local,
662 .fc_oif = ifa->ifa_dev->dev->ifindex,
663 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
664 };
1da177e4
LT
665
666 if (type == RTN_UNICAST)
667 tb = fib_new_table(RT_TABLE_MAIN);
668 else
669 tb = fib_new_table(RT_TABLE_LOCAL);
670
671 if (tb == NULL)
672 return;
673
4e902c57 674 cfg.fc_table = tb->tb_id;
1da177e4 675
4e902c57
TG
676 if (type != RTN_LOCAL)
677 cfg.fc_scope = RT_SCOPE_LINK;
678 else
679 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
680
681 if (cmd == RTM_NEWROUTE)
4e902c57 682 tb->tb_insert(tb, &cfg);
1da177e4 683 else
4e902c57 684 tb->tb_delete(tb, &cfg);
1da177e4
LT
685}
686
0ff60a45 687void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
688{
689 struct in_device *in_dev = ifa->ifa_dev;
690 struct net_device *dev = in_dev->dev;
691 struct in_ifaddr *prim = ifa;
a144ea4b
AV
692 __be32 mask = ifa->ifa_mask;
693 __be32 addr = ifa->ifa_local;
694 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
695
696 if (ifa->ifa_flags&IFA_F_SECONDARY) {
697 prim = inet_ifa_byprefix(in_dev, prefix, mask);
698 if (prim == NULL) {
699 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
700 return;
701 }
702 }
703
704 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
705
706 if (!(dev->flags&IFF_UP))
707 return;
708
709 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 710 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
711 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
712
f97c1e0c 713 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
714 (prefix != addr || ifa->ifa_prefixlen < 32)) {
715 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
716 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
717
718 /* Add network specific broadcasts, when it takes a sense */
719 if (ifa->ifa_prefixlen < 31) {
720 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
721 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
722 }
723 }
724}
725
726static void fib_del_ifaddr(struct in_ifaddr *ifa)
727{
728 struct in_device *in_dev = ifa->ifa_dev;
729 struct net_device *dev = in_dev->dev;
730 struct in_ifaddr *ifa1;
731 struct in_ifaddr *prim = ifa;
a144ea4b
AV
732 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
733 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
734#define LOCAL_OK 1
735#define BRD_OK 2
736#define BRD0_OK 4
737#define BRD1_OK 8
738 unsigned ok = 0;
739
740 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
741 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
742 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
743 else {
744 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
745 if (prim == NULL) {
746 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
747 return;
748 }
749 }
750
751 /* Deletion is more complicated than add.
752 We should take care of not to delete too much :-)
753
754 Scan address list to be sure that addresses are really gone.
755 */
756
757 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
758 if (ifa->ifa_local == ifa1->ifa_local)
759 ok |= LOCAL_OK;
760 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
761 ok |= BRD_OK;
762 if (brd == ifa1->ifa_broadcast)
763 ok |= BRD1_OK;
764 if (any == ifa1->ifa_broadcast)
765 ok |= BRD0_OK;
766 }
767
768 if (!(ok&BRD_OK))
769 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
770 if (!(ok&BRD1_OK))
771 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
772 if (!(ok&BRD0_OK))
773 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
774 if (!(ok&LOCAL_OK)) {
775 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
776
777 /* Check, that this local address finally disappeared. */
778 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
779 /* And the last, but not the least thing.
780 We must flush stray FIB entries.
781
782 First of all, we scan fib_info list searching
783 for stray nexthop entries, then ignite fib_flush.
784 */
785 if (fib_sync_down(ifa->ifa_local, NULL, 0))
786 fib_flush();
787 }
788 }
789#undef LOCAL_OK
790#undef BRD_OK
791#undef BRD0_OK
792#undef BRD1_OK
793}
794
246955fe
RO
795static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
796{
e905a9ed 797
246955fe 798 struct fib_result res;
5f300893 799 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 800 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
801 .tos = frn->fl_tos,
802 .scope = frn->fl_scope } } };
1194ed0a 803
912a41a4
SV
804#ifdef CONFIG_IP_MULTIPLE_TABLES
805 res.r = NULL;
806#endif
807
1194ed0a 808 frn->err = -ENOENT;
246955fe
RO
809 if (tb) {
810 local_bh_disable();
811
812 frn->tb_id = tb->tb_id;
813 frn->err = tb->tb_lookup(tb, &fl, &res);
814
815 if (!frn->err) {
816 frn->prefixlen = res.prefixlen;
817 frn->nh_sel = res.nh_sel;
818 frn->type = res.type;
819 frn->scope = res.scope;
1194ed0a 820 fib_res_put(&res);
246955fe
RO
821 }
822 local_bh_enable();
823 }
824}
825
28f7b036 826static void nl_fib_input(struct sk_buff *skb)
246955fe 827{
246955fe 828 struct fib_result_nl *frn;
28f7b036 829 struct nlmsghdr *nlh;
246955fe 830 struct fib_table *tb;
28f7b036 831 u32 pid;
1194ed0a 832
b529ccf2 833 nlh = nlmsg_hdr(skb);
ea86575e 834 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 835 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 836 return;
d883a036
DL
837
838 skb = skb_clone(skb, GFP_KERNEL);
839 if (skb == NULL)
840 return;
841 nlh = nlmsg_hdr(skb);
e905a9ed 842
246955fe
RO
843 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
844 tb = fib_get_table(frn->tb_id_in);
845
846 nl_fib_lookup(frn, tb);
e905a9ed 847
1194ed0a 848 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 849 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 850 NETLINK_CB(skb).dst_group = 0; /* unicast */
cd40b7d3 851 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 852}
246955fe
RO
853
854static void nl_fib_lookup_init(void)
855{
cd40b7d3
DL
856 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
857 nl_fib_input, NULL, THIS_MODULE);
246955fe
RO
858}
859
1da177e4
LT
860static void fib_disable_ip(struct net_device *dev, int force)
861{
862 if (fib_sync_down(0, dev, force))
863 fib_flush();
864 rt_cache_flush(0);
865 arp_ifdown(dev);
866}
867
868static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
869{
870 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
871
872 switch (event) {
873 case NETDEV_UP:
874 fib_add_ifaddr(ifa);
875#ifdef CONFIG_IP_ROUTE_MULTIPATH
876 fib_sync_up(ifa->ifa_dev->dev);
877#endif
878 rt_cache_flush(-1);
879 break;
880 case NETDEV_DOWN:
881 fib_del_ifaddr(ifa);
9fcc2e8a 882 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
883 /* Last address was deleted from this interface.
884 Disable IP.
885 */
886 fib_disable_ip(ifa->ifa_dev->dev, 1);
887 } else {
888 rt_cache_flush(-1);
889 }
890 break;
891 }
892 return NOTIFY_DONE;
893}
894
895static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
896{
897 struct net_device *dev = ptr;
e5ed6399 898 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4 899
e9dc8653
EB
900 if (dev->nd_net != &init_net)
901 return NOTIFY_DONE;
902
1da177e4
LT
903 if (event == NETDEV_UNREGISTER) {
904 fib_disable_ip(dev, 2);
905 return NOTIFY_DONE;
906 }
907
908 if (!in_dev)
909 return NOTIFY_DONE;
910
911 switch (event) {
912 case NETDEV_UP:
913 for_ifa(in_dev) {
914 fib_add_ifaddr(ifa);
915 } endfor_ifa(in_dev);
916#ifdef CONFIG_IP_ROUTE_MULTIPATH
917 fib_sync_up(dev);
918#endif
919 rt_cache_flush(-1);
920 break;
921 case NETDEV_DOWN:
922 fib_disable_ip(dev, 0);
923 break;
924 case NETDEV_CHANGEMTU:
925 case NETDEV_CHANGE:
926 rt_cache_flush(0);
927 break;
928 }
929 return NOTIFY_DONE;
930}
931
932static struct notifier_block fib_inetaddr_notifier = {
933 .notifier_call =fib_inetaddr_event,
934};
935
936static struct notifier_block fib_netdev_notifier = {
937 .notifier_call =fib_netdev_event,
938};
939
940void __init ip_fib_init(void)
941{
1af5a8c4
PM
942 unsigned int i;
943
944 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
945 INIT_HLIST_HEAD(&fib_table_hash[i]);
c3e9a353 946
e1ef4bf2 947 fib4_rules_init();
1da177e4
LT
948
949 register_netdevice_notifier(&fib_netdev_notifier);
950 register_inetaddr_notifier(&fib_inetaddr_notifier);
246955fe 951 nl_fib_lookup_init();
63f3444f
TG
952
953 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
954 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
955 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1da177e4
LT
956}
957
958EXPORT_SYMBOL(inet_addr_type);
05538116 959EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 960EXPORT_SYMBOL(ip_dev_find);