]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
ipv4: Fix reverse path filtering with multipath routing.
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
5a0e3ad6 37#include <linux/slab.h>
1da177e4
LT
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
1da177e4
LT
44#include <net/arp.h>
45#include <net/ip_fib.h>
63f3444f 46#include <net/rtnetlink.h>
1da177e4 47
1da177e4
LT
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
7b1a74fd 50static int __net_init fib4_rules_init(struct net *net)
c3e9a353 51{
93456b6d
DL
52 struct fib_table *local_table, *main_table;
53
7f9b8052 54 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 55 if (local_table == NULL)
dbb50165
DL
56 return -ENOMEM;
57
7f9b8052 58 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 59 if (main_table == NULL)
dbb50165
DL
60 goto fail;
61
93456b6d 62 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 64 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
66 return 0;
67
68fail:
93456b6d 69 kfree(local_table);
dbb50165 70 return -ENOMEM;
c3e9a353 71}
1af5a8c4 72#else
1da177e4 73
8ad4942c 74struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
75{
76 struct fib_table *tb;
1af5a8c4 77 unsigned int h;
1da177e4 78
1af5a8c4
PM
79 if (id == 0)
80 id = RT_TABLE_MAIN;
8ad4942c 81 tb = fib_get_table(net, id);
1af5a8c4
PM
82 if (tb)
83 return tb;
7f9b8052
SH
84
85 tb = fib_hash_table(id);
1da177e4
LT
86 if (!tb)
87 return NULL;
1af5a8c4 88 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
90 return tb;
91}
92
8ad4942c 93struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
94{
95 struct fib_table *tb;
96 struct hlist_node *node;
e4aef8ae 97 struct hlist_head *head;
1af5a8c4 98 unsigned int h;
1da177e4 99
1af5a8c4
PM
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 103
1af5a8c4 104 rcu_read_lock();
e4aef8ae
DL
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
1da177e4
LT
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
010278ec
DL
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
010278ec 127 tb = fib_get_table(net, table);
64c2d538 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
16c6cf8b 129 fib_table_select_default(tb, flp, res);
64c2d538
DL
130}
131
e4aef8ae 132static void fib_flush(struct net *net)
1da177e4
LT
133{
134 int flushed = 0;
1da177e4 135 struct fib_table *tb;
1af5a8c4 136 struct hlist_node *node;
e4aef8ae 137 struct hlist_head *head;
1af5a8c4 138 unsigned int h;
1da177e4 139
1af5a8c4 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
16c6cf8b 143 flushed += fib_table_flush(tb);
1da177e4 144 }
1da177e4
LT
145
146 if (flushed)
76e6ebfb 147 rt_cache_flush(net, -1);
1da177e4
LT
148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
1ab35276 154struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4
LT
155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
03cf786c 159 struct fib_table *local_table;
1da177e4
LT
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
1ab35276 165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
16c6cf8b 166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
1da177e4
LT
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
4bc2f18b 178EXPORT_SYMBOL(ip_dev_find);
1da177e4 179
05538116
LAT
180/*
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
183 */
6b175b26
EB
184static inline unsigned __inet_dev_addr_type(struct net *net,
185 const struct net_device *dev,
05538116 186 __be32 addr)
1da177e4
LT
187{
188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189 struct fib_result res;
190 unsigned ret = RTN_BROADCAST;
03cf786c 191 struct fib_table *local_table;
1da177e4 192
1e637c74 193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 194 return RTN_BROADCAST;
f97c1e0c 195 if (ipv4_is_multicast(addr))
1da177e4
LT
196 return RTN_MULTICAST;
197
198#ifdef CONFIG_IP_MULTIPLE_TABLES
199 res.r = NULL;
200#endif
e905a9ed 201
6b175b26 202 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 203 if (local_table) {
1da177e4 204 ret = RTN_UNICAST;
16c6cf8b 205 if (!fib_table_lookup(local_table, &fl, &res)) {
05538116
LAT
206 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type;
1da177e4
LT
208 fib_res_put(&res);
209 }
210 }
211 return ret;
212}
213
6b175b26 214unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 215{
6b175b26 216 return __inet_dev_addr_type(net, NULL, addr);
05538116 217}
4bc2f18b 218EXPORT_SYMBOL(inet_addr_type);
05538116 219
6b175b26
EB
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
05538116 222{
6b175b26 223 return __inet_dev_addr_type(net, dev, addr);
05538116 224}
4bc2f18b 225EXPORT_SYMBOL(inet_dev_addr_type);
05538116 226
1da177e4
LT
227/* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
229 address.
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
233 */
234
d9c9df8c 235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
b0c110ca 236 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark)
1da177e4
LT
238{
239 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u =
241 { .daddr = src,
242 .saddr = dst,
243 .tos = tos } },
b0c110ca 244 .mark = mark,
1da177e4 245 .iif = oif };
b0c110ca 246
1da177e4 247 struct fib_result res;
8153a10c 248 int no_addr, rpf, accept_local;
6f86b325 249 bool dev_match;
1da177e4 250 int ret;
5b707aaa 251 struct net *net;
1da177e4 252
8153a10c 253 no_addr = rpf = accept_local = 0;
1da177e4 254 rcu_read_lock();
e5ed6399 255 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
256 if (in_dev) {
257 no_addr = in_dev->ifa_list == NULL;
258 rpf = IN_DEV_RPFILTER(in_dev);
8153a10c 259 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
28f6aeea
JHS
260 if (mark && !IN_DEV_SRC_VMARK(in_dev))
261 fl.mark = 0;
1da177e4
LT
262 }
263 rcu_read_unlock();
264
265 if (in_dev == NULL)
266 goto e_inval;
267
c346dca1 268 net = dev_net(dev);
5b707aaa 269 if (fib_lookup(net, &fl, &res))
1da177e4 270 goto last_resort;
8153a10c
PM
271 if (res.type != RTN_UNICAST) {
272 if (res.type != RTN_LOCAL || !accept_local)
273 goto e_inval_res;
274 }
1da177e4
LT
275 *spec_dst = FIB_RES_PREFSRC(res);
276 fib_combine_itag(itag, &res);
6f86b325
DM
277 dev_match = false;
278
1da177e4 279#ifdef CONFIG_IP_ROUTE_MULTIPATH
6f86b325
DM
280 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
281 struct fib_nh *nh = &res.fi->fib_nh[ret];
282
283 if (nh->nh_dev == dev) {
284 dev_match = true;
285 break;
286 }
287 }
1da177e4
LT
288#else
289 if (FIB_RES_DEV(res) == dev)
6f86b325 290 dev_match = true;
1da177e4 291#endif
6f86b325 292 if (dev_match) {
1da177e4
LT
293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294 fib_res_put(&res);
295 return ret;
296 }
297 fib_res_put(&res);
298 if (no_addr)
299 goto last_resort;
c1cf8422 300 if (rpf == 1)
b5f7e755 301 goto e_rpf;
1da177e4
LT
302 fl.oif = dev->ifindex;
303
304 ret = 0;
5b707aaa 305 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
306 if (res.type == RTN_UNICAST) {
307 *spec_dst = FIB_RES_PREFSRC(res);
308 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
309 }
310 fib_res_put(&res);
311 }
312 return ret;
313
314last_resort:
315 if (rpf)
b5f7e755 316 goto e_rpf;
1da177e4
LT
317 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
318 *itag = 0;
319 return 0;
320
321e_inval_res:
322 fib_res_put(&res);
323e_inval:
324 return -EINVAL;
b5f7e755
ED
325e_rpf:
326 return -EXDEV;
1da177e4
LT
327}
328
81f7bf6c 329static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
330{
331 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
332}
333
334static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
335{
336 struct nlattr *nla;
337
338 nla = (struct nlattr *) ((char *) mx + len);
339 nla->nla_type = type;
340 nla->nla_len = nla_attr_size(4);
341 *(u32 *) nla_data(nla) = value;
342
343 return len + nla_total_size(4);
344}
345
4b5d47d4 346static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
347 struct fib_config *cfg)
348{
6d85c10a 349 __be32 addr;
4e902c57
TG
350 int plen;
351
352 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 353 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
354
355 if (rt->rt_dst.sa_family != AF_INET)
356 return -EAFNOSUPPORT;
357
358 /*
359 * Check mask for validity:
360 * a) it must be contiguous.
361 * b) destination must have all host bits clear.
362 * c) if application forgot to set correct family (AF_INET),
363 * reject request unless it is absolutely clear i.e.
364 * both family and mask are zero.
365 */
366 plen = 32;
367 addr = sk_extract_addr(&rt->rt_dst);
368 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 369 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
370
371 if (rt->rt_genmask.sa_family != AF_INET) {
372 if (mask || rt->rt_genmask.sa_family)
373 return -EAFNOSUPPORT;
374 }
375
376 if (bad_mask(mask, addr))
377 return -EINVAL;
378
379 plen = inet_mask_len(mask);
380 }
381
382 cfg->fc_dst_len = plen;
383 cfg->fc_dst = addr;
384
385 if (cmd != SIOCDELRT) {
386 cfg->fc_nlflags = NLM_F_CREATE;
387 cfg->fc_protocol = RTPROT_BOOT;
388 }
389
390 if (rt->rt_metric)
391 cfg->fc_priority = rt->rt_metric - 1;
392
393 if (rt->rt_flags & RTF_REJECT) {
394 cfg->fc_scope = RT_SCOPE_HOST;
395 cfg->fc_type = RTN_UNREACHABLE;
396 return 0;
397 }
398
399 cfg->fc_scope = RT_SCOPE_NOWHERE;
400 cfg->fc_type = RTN_UNICAST;
401
402 if (rt->rt_dev) {
403 char *colon;
404 struct net_device *dev;
405 char devname[IFNAMSIZ];
406
407 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
408 return -EFAULT;
409
410 devname[IFNAMSIZ-1] = 0;
411 colon = strchr(devname, ':');
412 if (colon)
413 *colon = 0;
4b5d47d4 414 dev = __dev_get_by_name(net, devname);
4e902c57
TG
415 if (!dev)
416 return -ENODEV;
417 cfg->fc_oif = dev->ifindex;
418 if (colon) {
419 struct in_ifaddr *ifa;
420 struct in_device *in_dev = __in_dev_get_rtnl(dev);
421 if (!in_dev)
422 return -ENODEV;
423 *colon = ':';
424 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
425 if (strcmp(ifa->ifa_label, devname) == 0)
426 break;
427 if (ifa == NULL)
428 return -ENODEV;
429 cfg->fc_prefsrc = ifa->ifa_local;
430 }
431 }
432
433 addr = sk_extract_addr(&rt->rt_gateway);
434 if (rt->rt_gateway.sa_family == AF_INET && addr) {
435 cfg->fc_gw = addr;
436 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 437 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
438 cfg->fc_scope = RT_SCOPE_UNIVERSE;
439 }
440
441 if (cmd == SIOCDELRT)
442 return 0;
443
444 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
445 return -EINVAL;
446
447 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
448 cfg->fc_scope = RT_SCOPE_LINK;
449
450 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
451 struct nlattr *mx;
452 int len = 0;
453
454 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 455 if (mx == NULL)
4e902c57
TG
456 return -ENOMEM;
457
458 if (rt->rt_flags & RTF_MTU)
459 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
460
461 if (rt->rt_flags & RTF_WINDOW)
462 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
463
464 if (rt->rt_flags & RTF_IRTT)
465 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
466
467 cfg->fc_mx = mx;
468 cfg->fc_mx_len = len;
469 }
470
471 return 0;
472}
473
1da177e4
LT
474/*
475 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
476 */
e905a9ed 477
1bad118a 478int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 479{
4e902c57
TG
480 struct fib_config cfg;
481 struct rtentry rt;
1da177e4 482 int err;
1da177e4
LT
483
484 switch (cmd) {
485 case SIOCADDRT: /* Add a route */
486 case SIOCDELRT: /* Delete a route */
487 if (!capable(CAP_NET_ADMIN))
488 return -EPERM;
4e902c57
TG
489
490 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 491 return -EFAULT;
4e902c57 492
1da177e4 493 rtnl_lock();
1bad118a 494 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 495 if (err == 0) {
4e902c57
TG
496 struct fib_table *tb;
497
1da177e4 498 if (cmd == SIOCDELRT) {
1bad118a 499 tb = fib_get_table(net, cfg.fc_table);
1da177e4 500 if (tb)
16c6cf8b 501 err = fib_table_delete(tb, &cfg);
4e902c57
TG
502 else
503 err = -ESRCH;
1da177e4 504 } else {
1bad118a 505 tb = fib_new_table(net, cfg.fc_table);
1da177e4 506 if (tb)
16c6cf8b 507 err = fib_table_insert(tb, &cfg);
4e902c57
TG
508 else
509 err = -ENOBUFS;
1da177e4 510 }
4e902c57
TG
511
512 /* allocated by rtentry_to_fib_config() */
513 kfree(cfg.fc_mx);
1da177e4
LT
514 }
515 rtnl_unlock();
516 return err;
517 }
518 return -EINVAL;
519}
520
ef7c79ed 521const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
522 [RTA_DST] = { .type = NLA_U32 },
523 [RTA_SRC] = { .type = NLA_U32 },
524 [RTA_IIF] = { .type = NLA_U32 },
525 [RTA_OIF] = { .type = NLA_U32 },
526 [RTA_GATEWAY] = { .type = NLA_U32 },
527 [RTA_PRIORITY] = { .type = NLA_U32 },
528 [RTA_PREFSRC] = { .type = NLA_U32 },
529 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 530 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 531 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
532};
533
4b5d47d4
DL
534static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
535 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 536{
4e902c57
TG
537 struct nlattr *attr;
538 int err, remaining;
539 struct rtmsg *rtm;
540
541 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
542 if (err < 0)
543 goto errout;
544
545 memset(cfg, 0, sizeof(*cfg));
546
547 rtm = nlmsg_data(nlh);
4e902c57 548 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
549 cfg->fc_tos = rtm->rtm_tos;
550 cfg->fc_table = rtm->rtm_table;
551 cfg->fc_protocol = rtm->rtm_protocol;
552 cfg->fc_scope = rtm->rtm_scope;
553 cfg->fc_type = rtm->rtm_type;
554 cfg->fc_flags = rtm->rtm_flags;
555 cfg->fc_nlflags = nlh->nlmsg_flags;
556
557 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
558 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 559 cfg->fc_nlinfo.nl_net = net;
4e902c57 560
a0ee18b9
TG
561 if (cfg->fc_type > RTN_MAX) {
562 err = -EINVAL;
563 goto errout;
564 }
565
4e902c57 566 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 567 switch (nla_type(attr)) {
4e902c57 568 case RTA_DST:
17fb2c64 569 cfg->fc_dst = nla_get_be32(attr);
4e902c57 570 break;
4e902c57
TG
571 case RTA_OIF:
572 cfg->fc_oif = nla_get_u32(attr);
573 break;
574 case RTA_GATEWAY:
17fb2c64 575 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
576 break;
577 case RTA_PRIORITY:
578 cfg->fc_priority = nla_get_u32(attr);
579 break;
580 case RTA_PREFSRC:
17fb2c64 581 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
582 break;
583 case RTA_METRICS:
584 cfg->fc_mx = nla_data(attr);
585 cfg->fc_mx_len = nla_len(attr);
586 break;
587 case RTA_MULTIPATH:
588 cfg->fc_mp = nla_data(attr);
589 cfg->fc_mp_len = nla_len(attr);
590 break;
591 case RTA_FLOW:
592 cfg->fc_flow = nla_get_u32(attr);
593 break;
4e902c57
TG
594 case RTA_TABLE:
595 cfg->fc_table = nla_get_u32(attr);
596 break;
1da177e4
LT
597 }
598 }
4e902c57 599
1da177e4 600 return 0;
4e902c57
TG
601errout:
602 return err;
1da177e4
LT
603}
604
6ed2533e 605static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 606{
3b1e0a65 607 struct net *net = sock_net(skb->sk);
4e902c57
TG
608 struct fib_config cfg;
609 struct fib_table *tb;
610 int err;
1da177e4 611
4b5d47d4 612 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
613 if (err < 0)
614 goto errout;
1da177e4 615
8ad4942c 616 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
617 if (tb == NULL) {
618 err = -ESRCH;
619 goto errout;
620 }
621
16c6cf8b 622 err = fib_table_delete(tb, &cfg);
4e902c57
TG
623errout:
624 return err;
1da177e4
LT
625}
626
6ed2533e 627static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 628{
3b1e0a65 629 struct net *net = sock_net(skb->sk);
4e902c57
TG
630 struct fib_config cfg;
631 struct fib_table *tb;
632 int err;
1da177e4 633
4b5d47d4 634 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
635 if (err < 0)
636 goto errout;
1da177e4 637
226b0b4a 638 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
639 if (tb == NULL) {
640 err = -ENOBUFS;
641 goto errout;
642 }
643
16c6cf8b 644 err = fib_table_insert(tb, &cfg);
4e902c57
TG
645errout:
646 return err;
1da177e4
LT
647}
648
63f3444f 649static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 650{
3b1e0a65 651 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
652 unsigned int h, s_h;
653 unsigned int e = 0, s_e;
1da177e4 654 struct fib_table *tb;
1af5a8c4 655 struct hlist_node *node;
e4aef8ae 656 struct hlist_head *head;
1af5a8c4 657 int dumped = 0;
1da177e4 658
be403ea1
TG
659 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
660 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
661 return ip_rt_dump(skb, cb);
662
1af5a8c4
PM
663 s_h = cb->args[0];
664 s_e = cb->args[1];
665
666 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
667 e = 0;
e4aef8ae
DL
668 head = &net->ipv4.fib_table_hash[h];
669 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
670 if (e < s_e)
671 goto next;
672 if (dumped)
673 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 674 2 * sizeof(cb->args[0]));
16c6cf8b 675 if (fib_table_dump(tb, skb, cb) < 0)
1af5a8c4
PM
676 goto out;
677 dumped = 1;
678next:
679 e++;
680 }
1da177e4 681 }
1af5a8c4
PM
682out:
683 cb->args[1] = e;
684 cb->args[0] = h;
1da177e4
LT
685
686 return skb->len;
687}
688
689/* Prepare and feed intra-kernel routing request.
690 Really, it should be netlink message, but :-( netlink
691 can be not configured, so that we feed it directly
692 to fib engine. It is legal, because all events occur
693 only when netlink is already locked.
694 */
695
81f7bf6c 696static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 697{
c346dca1 698 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
699 struct fib_table *tb;
700 struct fib_config cfg = {
701 .fc_protocol = RTPROT_KERNEL,
702 .fc_type = type,
703 .fc_dst = dst,
704 .fc_dst_len = dst_len,
705 .fc_prefsrc = ifa->ifa_local,
706 .fc_oif = ifa->ifa_dev->dev->ifindex,
707 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 708 .fc_nlinfo = {
4b5d47d4 709 .nl_net = net,
4d1169c1 710 },
4e902c57 711 };
1da177e4
LT
712
713 if (type == RTN_UNICAST)
4b5d47d4 714 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 715 else
4b5d47d4 716 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
717
718 if (tb == NULL)
719 return;
720
4e902c57 721 cfg.fc_table = tb->tb_id;
1da177e4 722
4e902c57
TG
723 if (type != RTN_LOCAL)
724 cfg.fc_scope = RT_SCOPE_LINK;
725 else
726 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
727
728 if (cmd == RTM_NEWROUTE)
16c6cf8b 729 fib_table_insert(tb, &cfg);
1da177e4 730 else
16c6cf8b 731 fib_table_delete(tb, &cfg);
1da177e4
LT
732}
733
0ff60a45 734void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
735{
736 struct in_device *in_dev = ifa->ifa_dev;
737 struct net_device *dev = in_dev->dev;
738 struct in_ifaddr *prim = ifa;
a144ea4b
AV
739 __be32 mask = ifa->ifa_mask;
740 __be32 addr = ifa->ifa_local;
741 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
742
743 if (ifa->ifa_flags&IFA_F_SECONDARY) {
744 prim = inet_ifa_byprefix(in_dev, prefix, mask);
745 if (prim == NULL) {
a6db9010 746 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
747 return;
748 }
749 }
750
751 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
752
753 if (!(dev->flags&IFF_UP))
754 return;
755
756 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 757 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
758 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
759
f97c1e0c 760 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
761 (prefix != addr || ifa->ifa_prefixlen < 32)) {
762 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
763 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
764
765 /* Add network specific broadcasts, when it takes a sense */
766 if (ifa->ifa_prefixlen < 31) {
767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
768 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
769 }
770 }
771}
772
773static void fib_del_ifaddr(struct in_ifaddr *ifa)
774{
775 struct in_device *in_dev = ifa->ifa_dev;
776 struct net_device *dev = in_dev->dev;
777 struct in_ifaddr *ifa1;
778 struct in_ifaddr *prim = ifa;
a144ea4b
AV
779 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
780 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
781#define LOCAL_OK 1
782#define BRD_OK 2
783#define BRD0_OK 4
784#define BRD1_OK 8
785 unsigned ok = 0;
786
787 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
788 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
789 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
790 else {
791 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
792 if (prim == NULL) {
a6db9010 793 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
794 return;
795 }
796 }
797
798 /* Deletion is more complicated than add.
799 We should take care of not to delete too much :-)
800
801 Scan address list to be sure that addresses are really gone.
802 */
803
804 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
805 if (ifa->ifa_local == ifa1->ifa_local)
806 ok |= LOCAL_OK;
807 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
808 ok |= BRD_OK;
809 if (brd == ifa1->ifa_broadcast)
810 ok |= BRD1_OK;
811 if (any == ifa1->ifa_broadcast)
812 ok |= BRD0_OK;
813 }
814
815 if (!(ok&BRD_OK))
816 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
817 if (!(ok&BRD1_OK))
818 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
819 if (!(ok&BRD0_OK))
820 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
821 if (!(ok&LOCAL_OK)) {
822 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
823
824 /* Check, that this local address finally disappeared. */
c346dca1 825 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
826 /* And the last, but not the least thing.
827 We must flush stray FIB entries.
828
829 First of all, we scan fib_info list searching
830 for stray nexthop entries, then ignite fib_flush.
831 */
c346dca1
YH
832 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
833 fib_flush(dev_net(dev));
1da177e4
LT
834 }
835 }
836#undef LOCAL_OK
837#undef BRD_OK
838#undef BRD0_OK
839#undef BRD1_OK
840}
841
246955fe
RO
842static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
843{
e905a9ed 844
246955fe 845 struct fib_result res;
5f300893 846 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 847 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
848 .tos = frn->fl_tos,
849 .scope = frn->fl_scope } } };
1194ed0a 850
912a41a4
SV
851#ifdef CONFIG_IP_MULTIPLE_TABLES
852 res.r = NULL;
853#endif
854
1194ed0a 855 frn->err = -ENOENT;
246955fe
RO
856 if (tb) {
857 local_bh_disable();
858
859 frn->tb_id = tb->tb_id;
16c6cf8b 860 frn->err = fib_table_lookup(tb, &fl, &res);
246955fe
RO
861
862 if (!frn->err) {
863 frn->prefixlen = res.prefixlen;
864 frn->nh_sel = res.nh_sel;
865 frn->type = res.type;
866 frn->scope = res.scope;
1194ed0a 867 fib_res_put(&res);
246955fe
RO
868 }
869 local_bh_enable();
870 }
871}
872
28f7b036 873static void nl_fib_input(struct sk_buff *skb)
246955fe 874{
6bd48fcf 875 struct net *net;
246955fe 876 struct fib_result_nl *frn;
28f7b036 877 struct nlmsghdr *nlh;
246955fe 878 struct fib_table *tb;
28f7b036 879 u32 pid;
1194ed0a 880
3b1e0a65 881 net = sock_net(skb->sk);
b529ccf2 882 nlh = nlmsg_hdr(skb);
ea86575e 883 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 884 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 885 return;
d883a036
DL
886
887 skb = skb_clone(skb, GFP_KERNEL);
888 if (skb == NULL)
889 return;
890 nlh = nlmsg_hdr(skb);
e905a9ed 891
246955fe 892 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 893 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
894
895 nl_fib_lookup(frn, tb);
e905a9ed 896
1194ed0a 897 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 898 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 899 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 900 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 901}
246955fe 902
2c8c1e72 903static int __net_init nl_fib_lookup_init(struct net *net)
246955fe 904{
6bd48fcf
DL
905 struct sock *sk;
906 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
907 nl_fib_input, NULL, THIS_MODULE);
908 if (sk == NULL)
7b1a74fd 909 return -EAFNOSUPPORT;
6bd48fcf 910 net->ipv4.fibnl = sk;
7b1a74fd
DL
911 return 0;
912}
913
914static void nl_fib_lookup_exit(struct net *net)
915{
b7c6ba6e 916 netlink_kernel_release(net->ipv4.fibnl);
775516bf 917 net->ipv4.fibnl = NULL;
246955fe
RO
918}
919
e2ce1468 920static void fib_disable_ip(struct net_device *dev, int force, int delay)
1da177e4 921{
85326fa5 922 if (fib_sync_down_dev(dev, force))
c346dca1 923 fib_flush(dev_net(dev));
e2ce1468 924 rt_cache_flush(dev_net(dev), delay);
1da177e4
LT
925 arp_ifdown(dev);
926}
927
928static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
929{
6ed2533e 930 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 931 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
932
933 switch (event) {
934 case NETDEV_UP:
935 fib_add_ifaddr(ifa);
936#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 937 fib_sync_up(dev);
1da177e4 938#endif
76e6ebfb 939 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
940 break;
941 case NETDEV_DOWN:
942 fib_del_ifaddr(ifa);
9fcc2e8a 943 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
944 /* Last address was deleted from this interface.
945 Disable IP.
946 */
e2ce1468 947 fib_disable_ip(dev, 1, 0);
1da177e4 948 } else {
76e6ebfb 949 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
950 }
951 break;
952 }
953 return NOTIFY_DONE;
954}
955
956static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
957{
958 struct net_device *dev = ptr;
e5ed6399 959 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
960
961 if (event == NETDEV_UNREGISTER) {
e2ce1468 962 fib_disable_ip(dev, 2, -1);
1da177e4
LT
963 return NOTIFY_DONE;
964 }
965
966 if (!in_dev)
967 return NOTIFY_DONE;
968
969 switch (event) {
970 case NETDEV_UP:
971 for_ifa(in_dev) {
972 fib_add_ifaddr(ifa);
973 } endfor_ifa(in_dev);
974#ifdef CONFIG_IP_ROUTE_MULTIPATH
975 fib_sync_up(dev);
976#endif
76e6ebfb 977 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
978 break;
979 case NETDEV_DOWN:
e2ce1468 980 fib_disable_ip(dev, 0, 0);
1da177e4
LT
981 break;
982 case NETDEV_CHANGEMTU:
983 case NETDEV_CHANGE:
76e6ebfb 984 rt_cache_flush(dev_net(dev), 0);
1da177e4 985 break;
a5ee1551
EB
986 case NETDEV_UNREGISTER_BATCH:
987 rt_cache_flush_batch();
988 break;
1da177e4
LT
989 }
990 return NOTIFY_DONE;
991}
992
993static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 994 .notifier_call = fib_inetaddr_event,
1da177e4
LT
995};
996
997static struct notifier_block fib_netdev_notifier = {
6ed2533e 998 .notifier_call = fib_netdev_event,
1da177e4
LT
999};
1000
7b1a74fd 1001static int __net_init ip_fib_net_init(struct net *net)
1da177e4 1002{
dce5cbee 1003 int err;
1af5a8c4
PM
1004 unsigned int i;
1005
e4aef8ae
DL
1006 net->ipv4.fib_table_hash = kzalloc(
1007 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1008 if (net->ipv4.fib_table_hash == NULL)
1009 return -ENOMEM;
1010
1af5a8c4 1011 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 1012 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 1013
dce5cbee
DL
1014 err = fib4_rules_init(net);
1015 if (err < 0)
1016 goto fail;
1017 return 0;
1018
1019fail:
1020 kfree(net->ipv4.fib_table_hash);
1021 return err;
7b1a74fd 1022}
1da177e4 1023
2c8c1e72 1024static void ip_fib_net_exit(struct net *net)
7b1a74fd
DL
1025{
1026 unsigned int i;
1027
1028#ifdef CONFIG_IP_MULTIPLE_TABLES
1029 fib4_rules_exit(net);
1030#endif
1031
1032 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1033 struct fib_table *tb;
1034 struct hlist_head *head;
1035 struct hlist_node *node, *tmp;
63f3444f 1036
e4aef8ae 1037 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1038 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1039 hlist_del(node);
16c6cf8b 1040 fib_table_flush(tb);
7b1a74fd
DL
1041 kfree(tb);
1042 }
1043 }
e4aef8ae 1044 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1045}
1046
1047static int __net_init fib_net_init(struct net *net)
1048{
1049 int error;
1050
7b1a74fd
DL
1051 error = ip_fib_net_init(net);
1052 if (error < 0)
1053 goto out;
1054 error = nl_fib_lookup_init(net);
1055 if (error < 0)
1056 goto out_nlfl;
1057 error = fib_proc_init(net);
1058 if (error < 0)
1059 goto out_proc;
1060out:
1061 return error;
1062
1063out_proc:
1064 nl_fib_lookup_exit(net);
1065out_nlfl:
1066 ip_fib_net_exit(net);
1067 goto out;
1068}
1069
1070static void __net_exit fib_net_exit(struct net *net)
1071{
1072 fib_proc_exit(net);
1073 nl_fib_lookup_exit(net);
1074 ip_fib_net_exit(net);
1075}
1076
1077static struct pernet_operations fib_net_ops = {
1078 .init = fib_net_init,
1079 .exit = fib_net_exit,
1080};
1081
1082void __init ip_fib_init(void)
1083{
63f3444f
TG
1084 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1085 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1086 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1087
1088 register_pernet_subsys(&fib_net_ops);
1089 register_netdevice_notifier(&fib_netdev_notifier);
1090 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1091
1092 fib_hash_init();
1da177e4 1093}