]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
e1000e: 82579 performance improvements
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
5a0e3ad6 37#include <linux/slab.h>
1da177e4
LT
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
1da177e4
LT
44#include <net/arp.h>
45#include <net/ip_fib.h>
63f3444f 46#include <net/rtnetlink.h>
1da177e4 47
1da177e4
LT
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
7b1a74fd 50static int __net_init fib4_rules_init(struct net *net)
c3e9a353 51{
93456b6d
DL
52 struct fib_table *local_table, *main_table;
53
7f9b8052 54 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 55 if (local_table == NULL)
dbb50165
DL
56 return -ENOMEM;
57
7f9b8052 58 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 59 if (main_table == NULL)
dbb50165
DL
60 goto fail;
61
93456b6d 62 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 64 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
66 return 0;
67
68fail:
93456b6d 69 kfree(local_table);
dbb50165 70 return -ENOMEM;
c3e9a353 71}
1af5a8c4 72#else
1da177e4 73
8ad4942c 74struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
75{
76 struct fib_table *tb;
1af5a8c4 77 unsigned int h;
1da177e4 78
1af5a8c4
PM
79 if (id == 0)
80 id = RT_TABLE_MAIN;
8ad4942c 81 tb = fib_get_table(net, id);
1af5a8c4
PM
82 if (tb)
83 return tb;
7f9b8052
SH
84
85 tb = fib_hash_table(id);
1da177e4
LT
86 if (!tb)
87 return NULL;
1af5a8c4 88 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
90 return tb;
91}
92
8ad4942c 93struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
94{
95 struct fib_table *tb;
96 struct hlist_node *node;
e4aef8ae 97 struct hlist_head *head;
1af5a8c4 98 unsigned int h;
1da177e4 99
1af5a8c4
PM
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 103
1af5a8c4 104 rcu_read_lock();
e4aef8ae
DL
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
1da177e4
LT
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
010278ec
DL
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
010278ec 127 tb = fib_get_table(net, table);
64c2d538 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
16c6cf8b 129 fib_table_select_default(tb, flp, res);
64c2d538
DL
130}
131
e4aef8ae 132static void fib_flush(struct net *net)
1da177e4
LT
133{
134 int flushed = 0;
1da177e4 135 struct fib_table *tb;
1af5a8c4 136 struct hlist_node *node;
e4aef8ae 137 struct hlist_head *head;
1af5a8c4 138 unsigned int h;
1da177e4 139
1af5a8c4 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
16c6cf8b 143 flushed += fib_table_flush(tb);
1da177e4 144 }
1da177e4
LT
145
146 if (flushed)
76e6ebfb 147 rt_cache_flush(net, -1);
1da177e4
LT
148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
1ab35276 154struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4 155{
4465b469
TH
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } },
157 .flags = FLOWI_FLAG_MATCH_ANY_IIF };
1da177e4
LT
158 struct fib_result res;
159 struct net_device *dev = NULL;
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
4465b469 165 if (fib_lookup(net, &fl, &res))
1da177e4
LT
166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173out:
174 fib_res_put(&res);
175 return dev;
176}
4bc2f18b 177EXPORT_SYMBOL(ip_dev_find);
1da177e4 178
05538116
LAT
179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
6b175b26
EB
183static inline unsigned __inet_dev_addr_type(struct net *net,
184 const struct net_device *dev,
05538116 185 __be32 addr)
1da177e4
LT
186{
187 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188 struct fib_result res;
189 unsigned ret = RTN_BROADCAST;
03cf786c 190 struct fib_table *local_table;
1da177e4 191
1e637c74 192 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 193 return RTN_BROADCAST;
f97c1e0c 194 if (ipv4_is_multicast(addr))
1da177e4
LT
195 return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198 res.r = NULL;
199#endif
e905a9ed 200
6b175b26 201 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 202 if (local_table) {
1da177e4 203 ret = RTN_UNICAST;
16c6cf8b 204 if (!fib_table_lookup(local_table, &fl, &res)) {
05538116
LAT
205 if (!dev || dev == res.fi->fib_dev)
206 ret = res.type;
1da177e4
LT
207 fib_res_put(&res);
208 }
209 }
210 return ret;
211}
212
6b175b26 213unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 214{
6b175b26 215 return __inet_dev_addr_type(net, NULL, addr);
05538116 216}
4bc2f18b 217EXPORT_SYMBOL(inet_addr_type);
05538116 218
6b175b26
EB
219unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
220 __be32 addr)
05538116 221{
6b175b26 222 return __inet_dev_addr_type(net, dev, addr);
05538116 223}
4bc2f18b 224EXPORT_SYMBOL(inet_dev_addr_type);
05538116 225
1da177e4
LT
226/* Given (packet source, input interface) and optional (dst, oif, tos):
227 - (main) check, that source is valid i.e. not broadcast or our local
228 address.
229 - figure out what "logical" interface this packet arrived
230 and calculate "specific destination" address.
231 - check, that packet arrived from expected physical interface.
232 */
233
d9c9df8c 234int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
b0c110ca 235 struct net_device *dev, __be32 *spec_dst,
236 u32 *itag, u32 mark)
1da177e4
LT
237{
238 struct in_device *in_dev;
239 struct flowi fl = { .nl_u = { .ip4_u =
240 { .daddr = src,
241 .saddr = dst,
242 .tos = tos } },
b0c110ca 243 .mark = mark,
1da177e4 244 .iif = oif };
b0c110ca 245
1da177e4 246 struct fib_result res;
8153a10c 247 int no_addr, rpf, accept_local;
6f86b325 248 bool dev_match;
1da177e4 249 int ret;
5b707aaa 250 struct net *net;
1da177e4 251
8153a10c 252 no_addr = rpf = accept_local = 0;
1da177e4 253 rcu_read_lock();
e5ed6399 254 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
255 if (in_dev) {
256 no_addr = in_dev->ifa_list == NULL;
257 rpf = IN_DEV_RPFILTER(in_dev);
8153a10c 258 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
28f6aeea
JHS
259 if (mark && !IN_DEV_SRC_VMARK(in_dev))
260 fl.mark = 0;
1da177e4
LT
261 }
262 rcu_read_unlock();
263
264 if (in_dev == NULL)
265 goto e_inval;
266
c346dca1 267 net = dev_net(dev);
5b707aaa 268 if (fib_lookup(net, &fl, &res))
1da177e4 269 goto last_resort;
8153a10c
PM
270 if (res.type != RTN_UNICAST) {
271 if (res.type != RTN_LOCAL || !accept_local)
272 goto e_inval_res;
273 }
1da177e4
LT
274 *spec_dst = FIB_RES_PREFSRC(res);
275 fib_combine_itag(itag, &res);
6f86b325
DM
276 dev_match = false;
277
1da177e4 278#ifdef CONFIG_IP_ROUTE_MULTIPATH
6f86b325
DM
279 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
280 struct fib_nh *nh = &res.fi->fib_nh[ret];
281
282 if (nh->nh_dev == dev) {
283 dev_match = true;
284 break;
285 }
286 }
1da177e4
LT
287#else
288 if (FIB_RES_DEV(res) == dev)
6f86b325 289 dev_match = true;
1da177e4 290#endif
6f86b325 291 if (dev_match) {
1da177e4
LT
292 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
293 fib_res_put(&res);
294 return ret;
295 }
296 fib_res_put(&res);
297 if (no_addr)
298 goto last_resort;
c1cf8422 299 if (rpf == 1)
b5f7e755 300 goto e_rpf;
1da177e4
LT
301 fl.oif = dev->ifindex;
302
303 ret = 0;
5b707aaa 304 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
305 if (res.type == RTN_UNICAST) {
306 *spec_dst = FIB_RES_PREFSRC(res);
307 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
308 }
309 fib_res_put(&res);
310 }
311 return ret;
312
313last_resort:
314 if (rpf)
b5f7e755 315 goto e_rpf;
1da177e4
LT
316 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
317 *itag = 0;
318 return 0;
319
320e_inval_res:
321 fib_res_put(&res);
322e_inval:
323 return -EINVAL;
b5f7e755
ED
324e_rpf:
325 return -EXDEV;
1da177e4
LT
326}
327
81f7bf6c 328static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
329{
330 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
331}
332
333static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
334{
335 struct nlattr *nla;
336
337 nla = (struct nlattr *) ((char *) mx + len);
338 nla->nla_type = type;
339 nla->nla_len = nla_attr_size(4);
340 *(u32 *) nla_data(nla) = value;
341
342 return len + nla_total_size(4);
343}
344
4b5d47d4 345static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
346 struct fib_config *cfg)
347{
6d85c10a 348 __be32 addr;
4e902c57
TG
349 int plen;
350
351 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 352 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
353
354 if (rt->rt_dst.sa_family != AF_INET)
355 return -EAFNOSUPPORT;
356
357 /*
358 * Check mask for validity:
359 * a) it must be contiguous.
360 * b) destination must have all host bits clear.
361 * c) if application forgot to set correct family (AF_INET),
362 * reject request unless it is absolutely clear i.e.
363 * both family and mask are zero.
364 */
365 plen = 32;
366 addr = sk_extract_addr(&rt->rt_dst);
367 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 368 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
369
370 if (rt->rt_genmask.sa_family != AF_INET) {
371 if (mask || rt->rt_genmask.sa_family)
372 return -EAFNOSUPPORT;
373 }
374
375 if (bad_mask(mask, addr))
376 return -EINVAL;
377
378 plen = inet_mask_len(mask);
379 }
380
381 cfg->fc_dst_len = plen;
382 cfg->fc_dst = addr;
383
384 if (cmd != SIOCDELRT) {
385 cfg->fc_nlflags = NLM_F_CREATE;
386 cfg->fc_protocol = RTPROT_BOOT;
387 }
388
389 if (rt->rt_metric)
390 cfg->fc_priority = rt->rt_metric - 1;
391
392 if (rt->rt_flags & RTF_REJECT) {
393 cfg->fc_scope = RT_SCOPE_HOST;
394 cfg->fc_type = RTN_UNREACHABLE;
395 return 0;
396 }
397
398 cfg->fc_scope = RT_SCOPE_NOWHERE;
399 cfg->fc_type = RTN_UNICAST;
400
401 if (rt->rt_dev) {
402 char *colon;
403 struct net_device *dev;
404 char devname[IFNAMSIZ];
405
406 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
407 return -EFAULT;
408
409 devname[IFNAMSIZ-1] = 0;
410 colon = strchr(devname, ':');
411 if (colon)
412 *colon = 0;
4b5d47d4 413 dev = __dev_get_by_name(net, devname);
4e902c57
TG
414 if (!dev)
415 return -ENODEV;
416 cfg->fc_oif = dev->ifindex;
417 if (colon) {
418 struct in_ifaddr *ifa;
419 struct in_device *in_dev = __in_dev_get_rtnl(dev);
420 if (!in_dev)
421 return -ENODEV;
422 *colon = ':';
423 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
424 if (strcmp(ifa->ifa_label, devname) == 0)
425 break;
426 if (ifa == NULL)
427 return -ENODEV;
428 cfg->fc_prefsrc = ifa->ifa_local;
429 }
430 }
431
432 addr = sk_extract_addr(&rt->rt_gateway);
433 if (rt->rt_gateway.sa_family == AF_INET && addr) {
434 cfg->fc_gw = addr;
435 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 436 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
437 cfg->fc_scope = RT_SCOPE_UNIVERSE;
438 }
439
440 if (cmd == SIOCDELRT)
441 return 0;
442
443 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
444 return -EINVAL;
445
446 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
447 cfg->fc_scope = RT_SCOPE_LINK;
448
449 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
450 struct nlattr *mx;
451 int len = 0;
452
453 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 454 if (mx == NULL)
4e902c57
TG
455 return -ENOMEM;
456
457 if (rt->rt_flags & RTF_MTU)
458 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
459
460 if (rt->rt_flags & RTF_WINDOW)
461 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
462
463 if (rt->rt_flags & RTF_IRTT)
464 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
465
466 cfg->fc_mx = mx;
467 cfg->fc_mx_len = len;
468 }
469
470 return 0;
471}
472
1da177e4
LT
473/*
474 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
475 */
e905a9ed 476
1bad118a 477int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 478{
4e902c57
TG
479 struct fib_config cfg;
480 struct rtentry rt;
1da177e4 481 int err;
1da177e4
LT
482
483 switch (cmd) {
484 case SIOCADDRT: /* Add a route */
485 case SIOCDELRT: /* Delete a route */
486 if (!capable(CAP_NET_ADMIN))
487 return -EPERM;
4e902c57
TG
488
489 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 490 return -EFAULT;
4e902c57 491
1da177e4 492 rtnl_lock();
1bad118a 493 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 494 if (err == 0) {
4e902c57
TG
495 struct fib_table *tb;
496
1da177e4 497 if (cmd == SIOCDELRT) {
1bad118a 498 tb = fib_get_table(net, cfg.fc_table);
1da177e4 499 if (tb)
16c6cf8b 500 err = fib_table_delete(tb, &cfg);
4e902c57
TG
501 else
502 err = -ESRCH;
1da177e4 503 } else {
1bad118a 504 tb = fib_new_table(net, cfg.fc_table);
1da177e4 505 if (tb)
16c6cf8b 506 err = fib_table_insert(tb, &cfg);
4e902c57
TG
507 else
508 err = -ENOBUFS;
1da177e4 509 }
4e902c57
TG
510
511 /* allocated by rtentry_to_fib_config() */
512 kfree(cfg.fc_mx);
1da177e4
LT
513 }
514 rtnl_unlock();
515 return err;
516 }
517 return -EINVAL;
518}
519
ef7c79ed 520const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
521 [RTA_DST] = { .type = NLA_U32 },
522 [RTA_SRC] = { .type = NLA_U32 },
523 [RTA_IIF] = { .type = NLA_U32 },
524 [RTA_OIF] = { .type = NLA_U32 },
525 [RTA_GATEWAY] = { .type = NLA_U32 },
526 [RTA_PRIORITY] = { .type = NLA_U32 },
527 [RTA_PREFSRC] = { .type = NLA_U32 },
528 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 529 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 530 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
531};
532
4b5d47d4
DL
533static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
534 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 535{
4e902c57
TG
536 struct nlattr *attr;
537 int err, remaining;
538 struct rtmsg *rtm;
539
540 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
541 if (err < 0)
542 goto errout;
543
544 memset(cfg, 0, sizeof(*cfg));
545
546 rtm = nlmsg_data(nlh);
4e902c57 547 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
548 cfg->fc_tos = rtm->rtm_tos;
549 cfg->fc_table = rtm->rtm_table;
550 cfg->fc_protocol = rtm->rtm_protocol;
551 cfg->fc_scope = rtm->rtm_scope;
552 cfg->fc_type = rtm->rtm_type;
553 cfg->fc_flags = rtm->rtm_flags;
554 cfg->fc_nlflags = nlh->nlmsg_flags;
555
556 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
557 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 558 cfg->fc_nlinfo.nl_net = net;
4e902c57 559
a0ee18b9
TG
560 if (cfg->fc_type > RTN_MAX) {
561 err = -EINVAL;
562 goto errout;
563 }
564
4e902c57 565 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 566 switch (nla_type(attr)) {
4e902c57 567 case RTA_DST:
17fb2c64 568 cfg->fc_dst = nla_get_be32(attr);
4e902c57 569 break;
4e902c57
TG
570 case RTA_OIF:
571 cfg->fc_oif = nla_get_u32(attr);
572 break;
573 case RTA_GATEWAY:
17fb2c64 574 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
575 break;
576 case RTA_PRIORITY:
577 cfg->fc_priority = nla_get_u32(attr);
578 break;
579 case RTA_PREFSRC:
17fb2c64 580 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
581 break;
582 case RTA_METRICS:
583 cfg->fc_mx = nla_data(attr);
584 cfg->fc_mx_len = nla_len(attr);
585 break;
586 case RTA_MULTIPATH:
587 cfg->fc_mp = nla_data(attr);
588 cfg->fc_mp_len = nla_len(attr);
589 break;
590 case RTA_FLOW:
591 cfg->fc_flow = nla_get_u32(attr);
592 break;
4e902c57
TG
593 case RTA_TABLE:
594 cfg->fc_table = nla_get_u32(attr);
595 break;
1da177e4
LT
596 }
597 }
4e902c57 598
1da177e4 599 return 0;
4e902c57
TG
600errout:
601 return err;
1da177e4
LT
602}
603
6ed2533e 604static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 605{
3b1e0a65 606 struct net *net = sock_net(skb->sk);
4e902c57
TG
607 struct fib_config cfg;
608 struct fib_table *tb;
609 int err;
1da177e4 610
4b5d47d4 611 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
612 if (err < 0)
613 goto errout;
1da177e4 614
8ad4942c 615 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
616 if (tb == NULL) {
617 err = -ESRCH;
618 goto errout;
619 }
620
16c6cf8b 621 err = fib_table_delete(tb, &cfg);
4e902c57
TG
622errout:
623 return err;
1da177e4
LT
624}
625
6ed2533e 626static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 627{
3b1e0a65 628 struct net *net = sock_net(skb->sk);
4e902c57
TG
629 struct fib_config cfg;
630 struct fib_table *tb;
631 int err;
1da177e4 632
4b5d47d4 633 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
634 if (err < 0)
635 goto errout;
1da177e4 636
226b0b4a 637 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
638 if (tb == NULL) {
639 err = -ENOBUFS;
640 goto errout;
641 }
642
16c6cf8b 643 err = fib_table_insert(tb, &cfg);
4e902c57
TG
644errout:
645 return err;
1da177e4
LT
646}
647
63f3444f 648static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 649{
3b1e0a65 650 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
651 unsigned int h, s_h;
652 unsigned int e = 0, s_e;
1da177e4 653 struct fib_table *tb;
1af5a8c4 654 struct hlist_node *node;
e4aef8ae 655 struct hlist_head *head;
1af5a8c4 656 int dumped = 0;
1da177e4 657
be403ea1
TG
658 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
659 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
660 return ip_rt_dump(skb, cb);
661
1af5a8c4
PM
662 s_h = cb->args[0];
663 s_e = cb->args[1];
664
665 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
666 e = 0;
e4aef8ae
DL
667 head = &net->ipv4.fib_table_hash[h];
668 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
669 if (e < s_e)
670 goto next;
671 if (dumped)
672 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 673 2 * sizeof(cb->args[0]));
16c6cf8b 674 if (fib_table_dump(tb, skb, cb) < 0)
1af5a8c4
PM
675 goto out;
676 dumped = 1;
677next:
678 e++;
679 }
1da177e4 680 }
1af5a8c4
PM
681out:
682 cb->args[1] = e;
683 cb->args[0] = h;
1da177e4
LT
684
685 return skb->len;
686}
687
688/* Prepare and feed intra-kernel routing request.
689 Really, it should be netlink message, but :-( netlink
690 can be not configured, so that we feed it directly
691 to fib engine. It is legal, because all events occur
692 only when netlink is already locked.
693 */
694
81f7bf6c 695static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 696{
c346dca1 697 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
698 struct fib_table *tb;
699 struct fib_config cfg = {
700 .fc_protocol = RTPROT_KERNEL,
701 .fc_type = type,
702 .fc_dst = dst,
703 .fc_dst_len = dst_len,
704 .fc_prefsrc = ifa->ifa_local,
705 .fc_oif = ifa->ifa_dev->dev->ifindex,
706 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 707 .fc_nlinfo = {
4b5d47d4 708 .nl_net = net,
4d1169c1 709 },
4e902c57 710 };
1da177e4
LT
711
712 if (type == RTN_UNICAST)
4b5d47d4 713 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 714 else
4b5d47d4 715 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
716
717 if (tb == NULL)
718 return;
719
4e902c57 720 cfg.fc_table = tb->tb_id;
1da177e4 721
4e902c57
TG
722 if (type != RTN_LOCAL)
723 cfg.fc_scope = RT_SCOPE_LINK;
724 else
725 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
726
727 if (cmd == RTM_NEWROUTE)
16c6cf8b 728 fib_table_insert(tb, &cfg);
1da177e4 729 else
16c6cf8b 730 fib_table_delete(tb, &cfg);
1da177e4
LT
731}
732
0ff60a45 733void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
734{
735 struct in_device *in_dev = ifa->ifa_dev;
736 struct net_device *dev = in_dev->dev;
737 struct in_ifaddr *prim = ifa;
a144ea4b
AV
738 __be32 mask = ifa->ifa_mask;
739 __be32 addr = ifa->ifa_local;
740 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
741
742 if (ifa->ifa_flags&IFA_F_SECONDARY) {
743 prim = inet_ifa_byprefix(in_dev, prefix, mask);
744 if (prim == NULL) {
a6db9010 745 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
746 return;
747 }
748 }
749
750 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
751
752 if (!(dev->flags&IFF_UP))
753 return;
754
755 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 756 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
757 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
758
f97c1e0c 759 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
760 (prefix != addr || ifa->ifa_prefixlen < 32)) {
761 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
762 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
763
764 /* Add network specific broadcasts, when it takes a sense */
765 if (ifa->ifa_prefixlen < 31) {
766 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
768 }
769 }
770}
771
772static void fib_del_ifaddr(struct in_ifaddr *ifa)
773{
774 struct in_device *in_dev = ifa->ifa_dev;
775 struct net_device *dev = in_dev->dev;
776 struct in_ifaddr *ifa1;
777 struct in_ifaddr *prim = ifa;
a144ea4b
AV
778 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
779 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
780#define LOCAL_OK 1
781#define BRD_OK 2
782#define BRD0_OK 4
783#define BRD1_OK 8
784 unsigned ok = 0;
785
786 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
787 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
788 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
789 else {
790 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
791 if (prim == NULL) {
a6db9010 792 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
793 return;
794 }
795 }
796
797 /* Deletion is more complicated than add.
798 We should take care of not to delete too much :-)
799
800 Scan address list to be sure that addresses are really gone.
801 */
802
803 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
804 if (ifa->ifa_local == ifa1->ifa_local)
805 ok |= LOCAL_OK;
806 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
807 ok |= BRD_OK;
808 if (brd == ifa1->ifa_broadcast)
809 ok |= BRD1_OK;
810 if (any == ifa1->ifa_broadcast)
811 ok |= BRD0_OK;
812 }
813
814 if (!(ok&BRD_OK))
815 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
816 if (!(ok&BRD1_OK))
817 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
818 if (!(ok&BRD0_OK))
819 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
820 if (!(ok&LOCAL_OK)) {
821 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
822
823 /* Check, that this local address finally disappeared. */
c346dca1 824 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
825 /* And the last, but not the least thing.
826 We must flush stray FIB entries.
827
828 First of all, we scan fib_info list searching
829 for stray nexthop entries, then ignite fib_flush.
830 */
c346dca1
YH
831 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
832 fib_flush(dev_net(dev));
1da177e4
LT
833 }
834 }
835#undef LOCAL_OK
836#undef BRD_OK
837#undef BRD0_OK
838#undef BRD1_OK
839}
840
246955fe
RO
841static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
842{
e905a9ed 843
246955fe 844 struct fib_result res;
5f300893 845 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 846 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
847 .tos = frn->fl_tos,
848 .scope = frn->fl_scope } } };
1194ed0a 849
912a41a4
SV
850#ifdef CONFIG_IP_MULTIPLE_TABLES
851 res.r = NULL;
852#endif
853
1194ed0a 854 frn->err = -ENOENT;
246955fe
RO
855 if (tb) {
856 local_bh_disable();
857
858 frn->tb_id = tb->tb_id;
16c6cf8b 859 frn->err = fib_table_lookup(tb, &fl, &res);
246955fe
RO
860
861 if (!frn->err) {
862 frn->prefixlen = res.prefixlen;
863 frn->nh_sel = res.nh_sel;
864 frn->type = res.type;
865 frn->scope = res.scope;
1194ed0a 866 fib_res_put(&res);
246955fe
RO
867 }
868 local_bh_enable();
869 }
870}
871
28f7b036 872static void nl_fib_input(struct sk_buff *skb)
246955fe 873{
6bd48fcf 874 struct net *net;
246955fe 875 struct fib_result_nl *frn;
28f7b036 876 struct nlmsghdr *nlh;
246955fe 877 struct fib_table *tb;
28f7b036 878 u32 pid;
1194ed0a 879
3b1e0a65 880 net = sock_net(skb->sk);
b529ccf2 881 nlh = nlmsg_hdr(skb);
ea86575e 882 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 883 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 884 return;
d883a036
DL
885
886 skb = skb_clone(skb, GFP_KERNEL);
887 if (skb == NULL)
888 return;
889 nlh = nlmsg_hdr(skb);
e905a9ed 890
246955fe 891 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 892 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
893
894 nl_fib_lookup(frn, tb);
e905a9ed 895
1194ed0a 896 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 897 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 898 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 899 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 900}
246955fe 901
2c8c1e72 902static int __net_init nl_fib_lookup_init(struct net *net)
246955fe 903{
6bd48fcf
DL
904 struct sock *sk;
905 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
906 nl_fib_input, NULL, THIS_MODULE);
907 if (sk == NULL)
7b1a74fd 908 return -EAFNOSUPPORT;
6bd48fcf 909 net->ipv4.fibnl = sk;
7b1a74fd
DL
910 return 0;
911}
912
913static void nl_fib_lookup_exit(struct net *net)
914{
b7c6ba6e 915 netlink_kernel_release(net->ipv4.fibnl);
775516bf 916 net->ipv4.fibnl = NULL;
246955fe
RO
917}
918
e2ce1468 919static void fib_disable_ip(struct net_device *dev, int force, int delay)
1da177e4 920{
85326fa5 921 if (fib_sync_down_dev(dev, force))
c346dca1 922 fib_flush(dev_net(dev));
e2ce1468 923 rt_cache_flush(dev_net(dev), delay);
1da177e4
LT
924 arp_ifdown(dev);
925}
926
927static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
928{
6ed2533e 929 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 930 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
931
932 switch (event) {
933 case NETDEV_UP:
934 fib_add_ifaddr(ifa);
935#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 936 fib_sync_up(dev);
1da177e4 937#endif
76e6ebfb 938 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
939 break;
940 case NETDEV_DOWN:
941 fib_del_ifaddr(ifa);
9fcc2e8a 942 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
943 /* Last address was deleted from this interface.
944 Disable IP.
945 */
e2ce1468 946 fib_disable_ip(dev, 1, 0);
1da177e4 947 } else {
76e6ebfb 948 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
949 }
950 break;
951 }
952 return NOTIFY_DONE;
953}
954
955static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
956{
957 struct net_device *dev = ptr;
e5ed6399 958 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
959
960 if (event == NETDEV_UNREGISTER) {
e2ce1468 961 fib_disable_ip(dev, 2, -1);
1da177e4
LT
962 return NOTIFY_DONE;
963 }
964
965 if (!in_dev)
966 return NOTIFY_DONE;
967
968 switch (event) {
969 case NETDEV_UP:
970 for_ifa(in_dev) {
971 fib_add_ifaddr(ifa);
972 } endfor_ifa(in_dev);
973#ifdef CONFIG_IP_ROUTE_MULTIPATH
974 fib_sync_up(dev);
975#endif
76e6ebfb 976 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
977 break;
978 case NETDEV_DOWN:
e2ce1468 979 fib_disable_ip(dev, 0, 0);
1da177e4
LT
980 break;
981 case NETDEV_CHANGEMTU:
982 case NETDEV_CHANGE:
76e6ebfb 983 rt_cache_flush(dev_net(dev), 0);
1da177e4 984 break;
a5ee1551
EB
985 case NETDEV_UNREGISTER_BATCH:
986 rt_cache_flush_batch();
987 break;
1da177e4
LT
988 }
989 return NOTIFY_DONE;
990}
991
992static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 993 .notifier_call = fib_inetaddr_event,
1da177e4
LT
994};
995
996static struct notifier_block fib_netdev_notifier = {
6ed2533e 997 .notifier_call = fib_netdev_event,
1da177e4
LT
998};
999
7b1a74fd 1000static int __net_init ip_fib_net_init(struct net *net)
1da177e4 1001{
dce5cbee 1002 int err;
1af5a8c4
PM
1003 unsigned int i;
1004
e4aef8ae
DL
1005 net->ipv4.fib_table_hash = kzalloc(
1006 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1007 if (net->ipv4.fib_table_hash == NULL)
1008 return -ENOMEM;
1009
1af5a8c4 1010 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 1011 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 1012
dce5cbee
DL
1013 err = fib4_rules_init(net);
1014 if (err < 0)
1015 goto fail;
1016 return 0;
1017
1018fail:
1019 kfree(net->ipv4.fib_table_hash);
1020 return err;
7b1a74fd 1021}
1da177e4 1022
2c8c1e72 1023static void ip_fib_net_exit(struct net *net)
7b1a74fd
DL
1024{
1025 unsigned int i;
1026
1027#ifdef CONFIG_IP_MULTIPLE_TABLES
1028 fib4_rules_exit(net);
1029#endif
1030
1031 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1032 struct fib_table *tb;
1033 struct hlist_head *head;
1034 struct hlist_node *node, *tmp;
63f3444f 1035
e4aef8ae 1036 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1037 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1038 hlist_del(node);
16c6cf8b 1039 fib_table_flush(tb);
7b1a74fd
DL
1040 kfree(tb);
1041 }
1042 }
e4aef8ae 1043 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1044}
1045
1046static int __net_init fib_net_init(struct net *net)
1047{
1048 int error;
1049
7b1a74fd
DL
1050 error = ip_fib_net_init(net);
1051 if (error < 0)
1052 goto out;
1053 error = nl_fib_lookup_init(net);
1054 if (error < 0)
1055 goto out_nlfl;
1056 error = fib_proc_init(net);
1057 if (error < 0)
1058 goto out_proc;
1059out:
1060 return error;
1061
1062out_proc:
1063 nl_fib_lookup_exit(net);
1064out_nlfl:
1065 ip_fib_net_exit(net);
1066 goto out;
1067}
1068
1069static void __net_exit fib_net_exit(struct net *net)
1070{
1071 fib_proc_exit(net);
1072 nl_fib_lookup_exit(net);
1073 ip_fib_net_exit(net);
1074}
1075
1076static struct pernet_operations fib_net_ops = {
1077 .init = fib_net_init,
1078 .exit = fib_net_exit,
1079};
1080
1081void __init ip_fib_init(void)
1082{
63f3444f
TG
1083 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1084 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1085 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1086
1087 register_pernet_subsys(&fib_net_ops);
1088 register_netdevice_notifier(&fib_netdev_notifier);
1089 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1090
1091 fib_hash_init();
1da177e4 1092}