]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
UNIX: Do not loop forever at unix_autobind().
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
5a0e3ad6 37#include <linux/slab.h>
1da177e4
LT
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
1da177e4
LT
44#include <net/arp.h>
45#include <net/ip_fib.h>
63f3444f 46#include <net/rtnetlink.h>
1da177e4 47
1da177e4
LT
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
7b1a74fd 50static int __net_init fib4_rules_init(struct net *net)
c3e9a353 51{
93456b6d
DL
52 struct fib_table *local_table, *main_table;
53
7f9b8052 54 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 55 if (local_table == NULL)
dbb50165
DL
56 return -ENOMEM;
57
7f9b8052 58 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 59 if (main_table == NULL)
dbb50165
DL
60 goto fail;
61
93456b6d 62 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 64 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
66 return 0;
67
68fail:
93456b6d 69 kfree(local_table);
dbb50165 70 return -ENOMEM;
c3e9a353 71}
1af5a8c4 72#else
1da177e4 73
8ad4942c 74struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
75{
76 struct fib_table *tb;
1af5a8c4 77 unsigned int h;
1da177e4 78
1af5a8c4
PM
79 if (id == 0)
80 id = RT_TABLE_MAIN;
8ad4942c 81 tb = fib_get_table(net, id);
1af5a8c4
PM
82 if (tb)
83 return tb;
7f9b8052
SH
84
85 tb = fib_hash_table(id);
1da177e4
LT
86 if (!tb)
87 return NULL;
1af5a8c4 88 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
90 return tb;
91}
92
8ad4942c 93struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
94{
95 struct fib_table *tb;
96 struct hlist_node *node;
e4aef8ae 97 struct hlist_head *head;
1af5a8c4 98 unsigned int h;
1da177e4 99
1af5a8c4
PM
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 103
1af5a8c4 104 rcu_read_lock();
e4aef8ae
DL
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
1da177e4
LT
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
010278ec
DL
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
010278ec 127 tb = fib_get_table(net, table);
64c2d538 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
16c6cf8b 129 fib_table_select_default(tb, flp, res);
64c2d538
DL
130}
131
e4aef8ae 132static void fib_flush(struct net *net)
1da177e4
LT
133{
134 int flushed = 0;
1da177e4 135 struct fib_table *tb;
1af5a8c4 136 struct hlist_node *node;
e4aef8ae 137 struct hlist_head *head;
1af5a8c4 138 unsigned int h;
1da177e4 139
1af5a8c4 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
16c6cf8b 143 flushed += fib_table_flush(tb);
1da177e4 144 }
1da177e4
LT
145
146 if (flushed)
76e6ebfb 147 rt_cache_flush(net, -1);
1da177e4
LT
148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
1ab35276 154struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4
LT
155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
03cf786c 159 struct fib_table *local_table;
1da177e4
LT
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
1ab35276 165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
16c6cf8b 166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
1da177e4
LT
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
4bc2f18b 178EXPORT_SYMBOL(ip_dev_find);
1da177e4 179
05538116
LAT
180/*
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
183 */
6b175b26
EB
184static inline unsigned __inet_dev_addr_type(struct net *net,
185 const struct net_device *dev,
05538116 186 __be32 addr)
1da177e4
LT
187{
188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189 struct fib_result res;
190 unsigned ret = RTN_BROADCAST;
03cf786c 191 struct fib_table *local_table;
1da177e4 192
1e637c74 193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 194 return RTN_BROADCAST;
f97c1e0c 195 if (ipv4_is_multicast(addr))
1da177e4
LT
196 return RTN_MULTICAST;
197
198#ifdef CONFIG_IP_MULTIPLE_TABLES
199 res.r = NULL;
200#endif
e905a9ed 201
6b175b26 202 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 203 if (local_table) {
1da177e4 204 ret = RTN_UNICAST;
16c6cf8b 205 if (!fib_table_lookup(local_table, &fl, &res)) {
05538116
LAT
206 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type;
1da177e4
LT
208 fib_res_put(&res);
209 }
210 }
211 return ret;
212}
213
6b175b26 214unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 215{
6b175b26 216 return __inet_dev_addr_type(net, NULL, addr);
05538116 217}
4bc2f18b 218EXPORT_SYMBOL(inet_addr_type);
05538116 219
6b175b26
EB
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
05538116 222{
6b175b26 223 return __inet_dev_addr_type(net, dev, addr);
05538116 224}
4bc2f18b 225EXPORT_SYMBOL(inet_dev_addr_type);
05538116 226
1da177e4
LT
227/* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
229 address.
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
233 */
234
d9c9df8c 235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
b0c110ca 236 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark)
1da177e4
LT
238{
239 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u =
241 { .daddr = src,
242 .saddr = dst,
243 .tos = tos } },
b0c110ca 244 .mark = mark,
1da177e4 245 .iif = oif };
b0c110ca 246
1da177e4 247 struct fib_result res;
8153a10c 248 int no_addr, rpf, accept_local;
1da177e4 249 int ret;
5b707aaa 250 struct net *net;
1da177e4 251
8153a10c 252 no_addr = rpf = accept_local = 0;
1da177e4 253 rcu_read_lock();
e5ed6399 254 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
255 if (in_dev) {
256 no_addr = in_dev->ifa_list == NULL;
257 rpf = IN_DEV_RPFILTER(in_dev);
8153a10c 258 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
28f6aeea
JHS
259 if (mark && !IN_DEV_SRC_VMARK(in_dev))
260 fl.mark = 0;
1da177e4
LT
261 }
262 rcu_read_unlock();
263
264 if (in_dev == NULL)
265 goto e_inval;
266
c346dca1 267 net = dev_net(dev);
5b707aaa 268 if (fib_lookup(net, &fl, &res))
1da177e4 269 goto last_resort;
8153a10c
PM
270 if (res.type != RTN_UNICAST) {
271 if (res.type != RTN_LOCAL || !accept_local)
272 goto e_inval_res;
273 }
1da177e4
LT
274 *spec_dst = FIB_RES_PREFSRC(res);
275 fib_combine_itag(itag, &res);
276#ifdef CONFIG_IP_ROUTE_MULTIPATH
277 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
278#else
279 if (FIB_RES_DEV(res) == dev)
280#endif
281 {
282 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
283 fib_res_put(&res);
284 return ret;
285 }
286 fib_res_put(&res);
287 if (no_addr)
288 goto last_resort;
c1cf8422 289 if (rpf == 1)
b5f7e755 290 goto e_rpf;
1da177e4
LT
291 fl.oif = dev->ifindex;
292
293 ret = 0;
5b707aaa 294 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
295 if (res.type == RTN_UNICAST) {
296 *spec_dst = FIB_RES_PREFSRC(res);
297 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
298 }
299 fib_res_put(&res);
300 }
301 return ret;
302
303last_resort:
304 if (rpf)
b5f7e755 305 goto e_rpf;
1da177e4
LT
306 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
307 *itag = 0;
308 return 0;
309
310e_inval_res:
311 fib_res_put(&res);
312e_inval:
313 return -EINVAL;
b5f7e755
ED
314e_rpf:
315 return -EXDEV;
1da177e4
LT
316}
317
81f7bf6c 318static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
319{
320 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
321}
322
323static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
324{
325 struct nlattr *nla;
326
327 nla = (struct nlattr *) ((char *) mx + len);
328 nla->nla_type = type;
329 nla->nla_len = nla_attr_size(4);
330 *(u32 *) nla_data(nla) = value;
331
332 return len + nla_total_size(4);
333}
334
4b5d47d4 335static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
336 struct fib_config *cfg)
337{
6d85c10a 338 __be32 addr;
4e902c57
TG
339 int plen;
340
341 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 342 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
343
344 if (rt->rt_dst.sa_family != AF_INET)
345 return -EAFNOSUPPORT;
346
347 /*
348 * Check mask for validity:
349 * a) it must be contiguous.
350 * b) destination must have all host bits clear.
351 * c) if application forgot to set correct family (AF_INET),
352 * reject request unless it is absolutely clear i.e.
353 * both family and mask are zero.
354 */
355 plen = 32;
356 addr = sk_extract_addr(&rt->rt_dst);
357 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 358 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
359
360 if (rt->rt_genmask.sa_family != AF_INET) {
361 if (mask || rt->rt_genmask.sa_family)
362 return -EAFNOSUPPORT;
363 }
364
365 if (bad_mask(mask, addr))
366 return -EINVAL;
367
368 plen = inet_mask_len(mask);
369 }
370
371 cfg->fc_dst_len = plen;
372 cfg->fc_dst = addr;
373
374 if (cmd != SIOCDELRT) {
375 cfg->fc_nlflags = NLM_F_CREATE;
376 cfg->fc_protocol = RTPROT_BOOT;
377 }
378
379 if (rt->rt_metric)
380 cfg->fc_priority = rt->rt_metric - 1;
381
382 if (rt->rt_flags & RTF_REJECT) {
383 cfg->fc_scope = RT_SCOPE_HOST;
384 cfg->fc_type = RTN_UNREACHABLE;
385 return 0;
386 }
387
388 cfg->fc_scope = RT_SCOPE_NOWHERE;
389 cfg->fc_type = RTN_UNICAST;
390
391 if (rt->rt_dev) {
392 char *colon;
393 struct net_device *dev;
394 char devname[IFNAMSIZ];
395
396 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
397 return -EFAULT;
398
399 devname[IFNAMSIZ-1] = 0;
400 colon = strchr(devname, ':');
401 if (colon)
402 *colon = 0;
4b5d47d4 403 dev = __dev_get_by_name(net, devname);
4e902c57
TG
404 if (!dev)
405 return -ENODEV;
406 cfg->fc_oif = dev->ifindex;
407 if (colon) {
408 struct in_ifaddr *ifa;
409 struct in_device *in_dev = __in_dev_get_rtnl(dev);
410 if (!in_dev)
411 return -ENODEV;
412 *colon = ':';
413 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
414 if (strcmp(ifa->ifa_label, devname) == 0)
415 break;
416 if (ifa == NULL)
417 return -ENODEV;
418 cfg->fc_prefsrc = ifa->ifa_local;
419 }
420 }
421
422 addr = sk_extract_addr(&rt->rt_gateway);
423 if (rt->rt_gateway.sa_family == AF_INET && addr) {
424 cfg->fc_gw = addr;
425 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 426 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
427 cfg->fc_scope = RT_SCOPE_UNIVERSE;
428 }
429
430 if (cmd == SIOCDELRT)
431 return 0;
432
433 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
434 return -EINVAL;
435
436 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
437 cfg->fc_scope = RT_SCOPE_LINK;
438
439 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
440 struct nlattr *mx;
441 int len = 0;
442
443 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 444 if (mx == NULL)
4e902c57
TG
445 return -ENOMEM;
446
447 if (rt->rt_flags & RTF_MTU)
448 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
449
450 if (rt->rt_flags & RTF_WINDOW)
451 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
452
453 if (rt->rt_flags & RTF_IRTT)
454 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
455
456 cfg->fc_mx = mx;
457 cfg->fc_mx_len = len;
458 }
459
460 return 0;
461}
462
1da177e4
LT
463/*
464 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
465 */
e905a9ed 466
1bad118a 467int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 468{
4e902c57
TG
469 struct fib_config cfg;
470 struct rtentry rt;
1da177e4 471 int err;
1da177e4
LT
472
473 switch (cmd) {
474 case SIOCADDRT: /* Add a route */
475 case SIOCDELRT: /* Delete a route */
476 if (!capable(CAP_NET_ADMIN))
477 return -EPERM;
4e902c57
TG
478
479 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 480 return -EFAULT;
4e902c57 481
1da177e4 482 rtnl_lock();
1bad118a 483 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 484 if (err == 0) {
4e902c57
TG
485 struct fib_table *tb;
486
1da177e4 487 if (cmd == SIOCDELRT) {
1bad118a 488 tb = fib_get_table(net, cfg.fc_table);
1da177e4 489 if (tb)
16c6cf8b 490 err = fib_table_delete(tb, &cfg);
4e902c57
TG
491 else
492 err = -ESRCH;
1da177e4 493 } else {
1bad118a 494 tb = fib_new_table(net, cfg.fc_table);
1da177e4 495 if (tb)
16c6cf8b 496 err = fib_table_insert(tb, &cfg);
4e902c57
TG
497 else
498 err = -ENOBUFS;
1da177e4 499 }
4e902c57
TG
500
501 /* allocated by rtentry_to_fib_config() */
502 kfree(cfg.fc_mx);
1da177e4
LT
503 }
504 rtnl_unlock();
505 return err;
506 }
507 return -EINVAL;
508}
509
ef7c79ed 510const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
511 [RTA_DST] = { .type = NLA_U32 },
512 [RTA_SRC] = { .type = NLA_U32 },
513 [RTA_IIF] = { .type = NLA_U32 },
514 [RTA_OIF] = { .type = NLA_U32 },
515 [RTA_GATEWAY] = { .type = NLA_U32 },
516 [RTA_PRIORITY] = { .type = NLA_U32 },
517 [RTA_PREFSRC] = { .type = NLA_U32 },
518 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 519 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 520 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
521};
522
4b5d47d4
DL
523static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
524 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 525{
4e902c57
TG
526 struct nlattr *attr;
527 int err, remaining;
528 struct rtmsg *rtm;
529
530 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
531 if (err < 0)
532 goto errout;
533
534 memset(cfg, 0, sizeof(*cfg));
535
536 rtm = nlmsg_data(nlh);
4e902c57 537 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
538 cfg->fc_tos = rtm->rtm_tos;
539 cfg->fc_table = rtm->rtm_table;
540 cfg->fc_protocol = rtm->rtm_protocol;
541 cfg->fc_scope = rtm->rtm_scope;
542 cfg->fc_type = rtm->rtm_type;
543 cfg->fc_flags = rtm->rtm_flags;
544 cfg->fc_nlflags = nlh->nlmsg_flags;
545
546 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
547 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 548 cfg->fc_nlinfo.nl_net = net;
4e902c57 549
a0ee18b9
TG
550 if (cfg->fc_type > RTN_MAX) {
551 err = -EINVAL;
552 goto errout;
553 }
554
4e902c57 555 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 556 switch (nla_type(attr)) {
4e902c57 557 case RTA_DST:
17fb2c64 558 cfg->fc_dst = nla_get_be32(attr);
4e902c57 559 break;
4e902c57
TG
560 case RTA_OIF:
561 cfg->fc_oif = nla_get_u32(attr);
562 break;
563 case RTA_GATEWAY:
17fb2c64 564 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
565 break;
566 case RTA_PRIORITY:
567 cfg->fc_priority = nla_get_u32(attr);
568 break;
569 case RTA_PREFSRC:
17fb2c64 570 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
571 break;
572 case RTA_METRICS:
573 cfg->fc_mx = nla_data(attr);
574 cfg->fc_mx_len = nla_len(attr);
575 break;
576 case RTA_MULTIPATH:
577 cfg->fc_mp = nla_data(attr);
578 cfg->fc_mp_len = nla_len(attr);
579 break;
580 case RTA_FLOW:
581 cfg->fc_flow = nla_get_u32(attr);
582 break;
4e902c57
TG
583 case RTA_TABLE:
584 cfg->fc_table = nla_get_u32(attr);
585 break;
1da177e4
LT
586 }
587 }
4e902c57 588
1da177e4 589 return 0;
4e902c57
TG
590errout:
591 return err;
1da177e4
LT
592}
593
6ed2533e 594static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 595{
3b1e0a65 596 struct net *net = sock_net(skb->sk);
4e902c57
TG
597 struct fib_config cfg;
598 struct fib_table *tb;
599 int err;
1da177e4 600
4b5d47d4 601 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
602 if (err < 0)
603 goto errout;
1da177e4 604
8ad4942c 605 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
606 if (tb == NULL) {
607 err = -ESRCH;
608 goto errout;
609 }
610
16c6cf8b 611 err = fib_table_delete(tb, &cfg);
4e902c57
TG
612errout:
613 return err;
1da177e4
LT
614}
615
6ed2533e 616static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 617{
3b1e0a65 618 struct net *net = sock_net(skb->sk);
4e902c57
TG
619 struct fib_config cfg;
620 struct fib_table *tb;
621 int err;
1da177e4 622
4b5d47d4 623 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
624 if (err < 0)
625 goto errout;
1da177e4 626
226b0b4a 627 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
628 if (tb == NULL) {
629 err = -ENOBUFS;
630 goto errout;
631 }
632
16c6cf8b 633 err = fib_table_insert(tb, &cfg);
4e902c57
TG
634errout:
635 return err;
1da177e4
LT
636}
637
63f3444f 638static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 639{
3b1e0a65 640 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
641 unsigned int h, s_h;
642 unsigned int e = 0, s_e;
1da177e4 643 struct fib_table *tb;
1af5a8c4 644 struct hlist_node *node;
e4aef8ae 645 struct hlist_head *head;
1af5a8c4 646 int dumped = 0;
1da177e4 647
be403ea1
TG
648 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
649 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
650 return ip_rt_dump(skb, cb);
651
1af5a8c4
PM
652 s_h = cb->args[0];
653 s_e = cb->args[1];
654
655 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
656 e = 0;
e4aef8ae
DL
657 head = &net->ipv4.fib_table_hash[h];
658 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
659 if (e < s_e)
660 goto next;
661 if (dumped)
662 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 663 2 * sizeof(cb->args[0]));
16c6cf8b 664 if (fib_table_dump(tb, skb, cb) < 0)
1af5a8c4
PM
665 goto out;
666 dumped = 1;
667next:
668 e++;
669 }
1da177e4 670 }
1af5a8c4
PM
671out:
672 cb->args[1] = e;
673 cb->args[0] = h;
1da177e4
LT
674
675 return skb->len;
676}
677
678/* Prepare and feed intra-kernel routing request.
679 Really, it should be netlink message, but :-( netlink
680 can be not configured, so that we feed it directly
681 to fib engine. It is legal, because all events occur
682 only when netlink is already locked.
683 */
684
81f7bf6c 685static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 686{
c346dca1 687 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
688 struct fib_table *tb;
689 struct fib_config cfg = {
690 .fc_protocol = RTPROT_KERNEL,
691 .fc_type = type,
692 .fc_dst = dst,
693 .fc_dst_len = dst_len,
694 .fc_prefsrc = ifa->ifa_local,
695 .fc_oif = ifa->ifa_dev->dev->ifindex,
696 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 697 .fc_nlinfo = {
4b5d47d4 698 .nl_net = net,
4d1169c1 699 },
4e902c57 700 };
1da177e4
LT
701
702 if (type == RTN_UNICAST)
4b5d47d4 703 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 704 else
4b5d47d4 705 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
706
707 if (tb == NULL)
708 return;
709
4e902c57 710 cfg.fc_table = tb->tb_id;
1da177e4 711
4e902c57
TG
712 if (type != RTN_LOCAL)
713 cfg.fc_scope = RT_SCOPE_LINK;
714 else
715 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
716
717 if (cmd == RTM_NEWROUTE)
16c6cf8b 718 fib_table_insert(tb, &cfg);
1da177e4 719 else
16c6cf8b 720 fib_table_delete(tb, &cfg);
1da177e4
LT
721}
722
0ff60a45 723void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
724{
725 struct in_device *in_dev = ifa->ifa_dev;
726 struct net_device *dev = in_dev->dev;
727 struct in_ifaddr *prim = ifa;
a144ea4b
AV
728 __be32 mask = ifa->ifa_mask;
729 __be32 addr = ifa->ifa_local;
730 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
731
732 if (ifa->ifa_flags&IFA_F_SECONDARY) {
733 prim = inet_ifa_byprefix(in_dev, prefix, mask);
734 if (prim == NULL) {
a6db9010 735 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
736 return;
737 }
738 }
739
740 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
741
742 if (!(dev->flags&IFF_UP))
743 return;
744
745 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 746 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
747 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
748
f97c1e0c 749 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
750 (prefix != addr || ifa->ifa_prefixlen < 32)) {
751 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
752 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
753
754 /* Add network specific broadcasts, when it takes a sense */
755 if (ifa->ifa_prefixlen < 31) {
756 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
757 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
758 }
759 }
760}
761
762static void fib_del_ifaddr(struct in_ifaddr *ifa)
763{
764 struct in_device *in_dev = ifa->ifa_dev;
765 struct net_device *dev = in_dev->dev;
766 struct in_ifaddr *ifa1;
767 struct in_ifaddr *prim = ifa;
a144ea4b
AV
768 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
769 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
770#define LOCAL_OK 1
771#define BRD_OK 2
772#define BRD0_OK 4
773#define BRD1_OK 8
774 unsigned ok = 0;
775
776 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
777 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
778 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
779 else {
780 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
781 if (prim == NULL) {
a6db9010 782 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
783 return;
784 }
785 }
786
787 /* Deletion is more complicated than add.
788 We should take care of not to delete too much :-)
789
790 Scan address list to be sure that addresses are really gone.
791 */
792
793 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
794 if (ifa->ifa_local == ifa1->ifa_local)
795 ok |= LOCAL_OK;
796 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
797 ok |= BRD_OK;
798 if (brd == ifa1->ifa_broadcast)
799 ok |= BRD1_OK;
800 if (any == ifa1->ifa_broadcast)
801 ok |= BRD0_OK;
802 }
803
804 if (!(ok&BRD_OK))
805 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
806 if (!(ok&BRD1_OK))
807 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
808 if (!(ok&BRD0_OK))
809 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
810 if (!(ok&LOCAL_OK)) {
811 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
812
813 /* Check, that this local address finally disappeared. */
c346dca1 814 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
815 /* And the last, but not the least thing.
816 We must flush stray FIB entries.
817
818 First of all, we scan fib_info list searching
819 for stray nexthop entries, then ignite fib_flush.
820 */
c346dca1
YH
821 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
822 fib_flush(dev_net(dev));
1da177e4
LT
823 }
824 }
825#undef LOCAL_OK
826#undef BRD_OK
827#undef BRD0_OK
828#undef BRD1_OK
829}
830
246955fe
RO
831static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
832{
e905a9ed 833
246955fe 834 struct fib_result res;
5f300893 835 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 836 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
837 .tos = frn->fl_tos,
838 .scope = frn->fl_scope } } };
1194ed0a 839
912a41a4
SV
840#ifdef CONFIG_IP_MULTIPLE_TABLES
841 res.r = NULL;
842#endif
843
1194ed0a 844 frn->err = -ENOENT;
246955fe
RO
845 if (tb) {
846 local_bh_disable();
847
848 frn->tb_id = tb->tb_id;
16c6cf8b 849 frn->err = fib_table_lookup(tb, &fl, &res);
246955fe
RO
850
851 if (!frn->err) {
852 frn->prefixlen = res.prefixlen;
853 frn->nh_sel = res.nh_sel;
854 frn->type = res.type;
855 frn->scope = res.scope;
1194ed0a 856 fib_res_put(&res);
246955fe
RO
857 }
858 local_bh_enable();
859 }
860}
861
28f7b036 862static void nl_fib_input(struct sk_buff *skb)
246955fe 863{
6bd48fcf 864 struct net *net;
246955fe 865 struct fib_result_nl *frn;
28f7b036 866 struct nlmsghdr *nlh;
246955fe 867 struct fib_table *tb;
28f7b036 868 u32 pid;
1194ed0a 869
3b1e0a65 870 net = sock_net(skb->sk);
b529ccf2 871 nlh = nlmsg_hdr(skb);
ea86575e 872 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 873 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 874 return;
d883a036
DL
875
876 skb = skb_clone(skb, GFP_KERNEL);
877 if (skb == NULL)
878 return;
879 nlh = nlmsg_hdr(skb);
e905a9ed 880
246955fe 881 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 882 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
883
884 nl_fib_lookup(frn, tb);
e905a9ed 885
1194ed0a 886 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 887 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 888 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 889 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 890}
246955fe 891
2c8c1e72 892static int __net_init nl_fib_lookup_init(struct net *net)
246955fe 893{
6bd48fcf
DL
894 struct sock *sk;
895 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
896 nl_fib_input, NULL, THIS_MODULE);
897 if (sk == NULL)
7b1a74fd 898 return -EAFNOSUPPORT;
6bd48fcf 899 net->ipv4.fibnl = sk;
7b1a74fd
DL
900 return 0;
901}
902
903static void nl_fib_lookup_exit(struct net *net)
904{
b7c6ba6e 905 netlink_kernel_release(net->ipv4.fibnl);
775516bf 906 net->ipv4.fibnl = NULL;
246955fe
RO
907}
908
e2ce1468 909static void fib_disable_ip(struct net_device *dev, int force, int delay)
1da177e4 910{
85326fa5 911 if (fib_sync_down_dev(dev, force))
c346dca1 912 fib_flush(dev_net(dev));
e2ce1468 913 rt_cache_flush(dev_net(dev), delay);
1da177e4
LT
914 arp_ifdown(dev);
915}
916
917static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
918{
6ed2533e 919 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 920 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
921
922 switch (event) {
923 case NETDEV_UP:
924 fib_add_ifaddr(ifa);
925#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 926 fib_sync_up(dev);
1da177e4 927#endif
76e6ebfb 928 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
929 break;
930 case NETDEV_DOWN:
931 fib_del_ifaddr(ifa);
9fcc2e8a 932 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
933 /* Last address was deleted from this interface.
934 Disable IP.
935 */
e2ce1468 936 fib_disable_ip(dev, 1, 0);
1da177e4 937 } else {
76e6ebfb 938 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
939 }
940 break;
941 }
942 return NOTIFY_DONE;
943}
944
945static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
946{
947 struct net_device *dev = ptr;
e5ed6399 948 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
949
950 if (event == NETDEV_UNREGISTER) {
e2ce1468 951 fib_disable_ip(dev, 2, -1);
1da177e4
LT
952 return NOTIFY_DONE;
953 }
954
955 if (!in_dev)
956 return NOTIFY_DONE;
957
958 switch (event) {
959 case NETDEV_UP:
960 for_ifa(in_dev) {
961 fib_add_ifaddr(ifa);
962 } endfor_ifa(in_dev);
963#ifdef CONFIG_IP_ROUTE_MULTIPATH
964 fib_sync_up(dev);
965#endif
76e6ebfb 966 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
967 break;
968 case NETDEV_DOWN:
e2ce1468 969 fib_disable_ip(dev, 0, 0);
1da177e4
LT
970 break;
971 case NETDEV_CHANGEMTU:
972 case NETDEV_CHANGE:
76e6ebfb 973 rt_cache_flush(dev_net(dev), 0);
1da177e4 974 break;
a5ee1551
EB
975 case NETDEV_UNREGISTER_BATCH:
976 rt_cache_flush_batch();
977 break;
1da177e4
LT
978 }
979 return NOTIFY_DONE;
980}
981
982static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 983 .notifier_call = fib_inetaddr_event,
1da177e4
LT
984};
985
986static struct notifier_block fib_netdev_notifier = {
6ed2533e 987 .notifier_call = fib_netdev_event,
1da177e4
LT
988};
989
7b1a74fd 990static int __net_init ip_fib_net_init(struct net *net)
1da177e4 991{
dce5cbee 992 int err;
1af5a8c4
PM
993 unsigned int i;
994
e4aef8ae
DL
995 net->ipv4.fib_table_hash = kzalloc(
996 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
997 if (net->ipv4.fib_table_hash == NULL)
998 return -ENOMEM;
999
1af5a8c4 1000 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 1001 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 1002
dce5cbee
DL
1003 err = fib4_rules_init(net);
1004 if (err < 0)
1005 goto fail;
1006 return 0;
1007
1008fail:
1009 kfree(net->ipv4.fib_table_hash);
1010 return err;
7b1a74fd 1011}
1da177e4 1012
2c8c1e72 1013static void ip_fib_net_exit(struct net *net)
7b1a74fd
DL
1014{
1015 unsigned int i;
1016
1017#ifdef CONFIG_IP_MULTIPLE_TABLES
1018 fib4_rules_exit(net);
1019#endif
1020
1021 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1022 struct fib_table *tb;
1023 struct hlist_head *head;
1024 struct hlist_node *node, *tmp;
63f3444f 1025
e4aef8ae 1026 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1027 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1028 hlist_del(node);
16c6cf8b 1029 fib_table_flush(tb);
7b1a74fd
DL
1030 kfree(tb);
1031 }
1032 }
e4aef8ae 1033 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1034}
1035
1036static int __net_init fib_net_init(struct net *net)
1037{
1038 int error;
1039
7b1a74fd
DL
1040 error = ip_fib_net_init(net);
1041 if (error < 0)
1042 goto out;
1043 error = nl_fib_lookup_init(net);
1044 if (error < 0)
1045 goto out_nlfl;
1046 error = fib_proc_init(net);
1047 if (error < 0)
1048 goto out_proc;
1049out:
1050 return error;
1051
1052out_proc:
1053 nl_fib_lookup_exit(net);
1054out_nlfl:
1055 ip_fib_net_exit(net);
1056 goto out;
1057}
1058
1059static void __net_exit fib_net_exit(struct net *net)
1060{
1061 fib_proc_exit(net);
1062 nl_fib_lookup_exit(net);
1063 ip_fib_net_exit(net);
1064}
1065
1066static struct pernet_operations fib_net_ops = {
1067 .init = fib_net_init,
1068 .exit = fib_net_exit,
1069};
1070
1071void __init ip_fib_init(void)
1072{
63f3444f
TG
1073 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1074 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1075 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1076
1077 register_pernet_subsys(&fib_net_ops);
1078 register_netdevice_notifier(&fib_netdev_notifier);
1079 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1080
1081 fib_hash_init();
1da177e4 1082}