]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
wimax: make functions local
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
5a0e3ad6 37#include <linux/slab.h>
1da177e4
LT
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
1da177e4
LT
44#include <net/arp.h>
45#include <net/ip_fib.h>
63f3444f 46#include <net/rtnetlink.h>
1da177e4 47
1da177e4
LT
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
7b1a74fd 50static int __net_init fib4_rules_init(struct net *net)
c3e9a353 51{
93456b6d
DL
52 struct fib_table *local_table, *main_table;
53
7f9b8052 54 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 55 if (local_table == NULL)
dbb50165
DL
56 return -ENOMEM;
57
7f9b8052 58 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 59 if (main_table == NULL)
dbb50165
DL
60 goto fail;
61
93456b6d 62 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 64 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
66 return 0;
67
68fail:
93456b6d 69 kfree(local_table);
dbb50165 70 return -ENOMEM;
c3e9a353 71}
1af5a8c4 72#else
1da177e4 73
8ad4942c 74struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
75{
76 struct fib_table *tb;
1af5a8c4 77 unsigned int h;
1da177e4 78
1af5a8c4
PM
79 if (id == 0)
80 id = RT_TABLE_MAIN;
8ad4942c 81 tb = fib_get_table(net, id);
1af5a8c4
PM
82 if (tb)
83 return tb;
7f9b8052
SH
84
85 tb = fib_hash_table(id);
1da177e4
LT
86 if (!tb)
87 return NULL;
1af5a8c4 88 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
90 return tb;
91}
92
8ad4942c 93struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
94{
95 struct fib_table *tb;
96 struct hlist_node *node;
e4aef8ae 97 struct hlist_head *head;
1af5a8c4 98 unsigned int h;
1da177e4 99
1af5a8c4
PM
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 103
1af5a8c4 104 rcu_read_lock();
e4aef8ae
DL
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
1da177e4
LT
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
010278ec
DL
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
010278ec 127 tb = fib_get_table(net, table);
64c2d538 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
16c6cf8b 129 fib_table_select_default(tb, flp, res);
64c2d538
DL
130}
131
e4aef8ae 132static void fib_flush(struct net *net)
1da177e4
LT
133{
134 int flushed = 0;
1da177e4 135 struct fib_table *tb;
1af5a8c4 136 struct hlist_node *node;
e4aef8ae 137 struct hlist_head *head;
1af5a8c4 138 unsigned int h;
1da177e4 139
1af5a8c4 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
16c6cf8b 143 flushed += fib_table_flush(tb);
1da177e4 144 }
1da177e4
LT
145
146 if (flushed)
76e6ebfb 147 rt_cache_flush(net, -1);
1da177e4
LT
148}
149
82efee14
ED
150/**
151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU
1da177e4 157 */
82efee14 158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
1da177e4 159{
82efee14
ED
160 struct flowi fl = {
161 .nl_u = {
162 .ip4_u = {
163 .daddr = addr
164 }
165 },
166 .flags = FLOWI_FLAG_MATCH_ANY_IIF
167 };
168 struct fib_result res = { 0 };
1da177e4
LT
169 struct net_device *dev = NULL;
170
4465b469 171 if (fib_lookup(net, &fl, &res))
1da177e4
LT
172 return NULL;
173 if (res.type != RTN_LOCAL)
174 goto out;
175 dev = FIB_RES_DEV(res);
176
82efee14 177 if (dev && devref)
1da177e4
LT
178 dev_hold(dev);
179out:
180 fib_res_put(&res);
181 return dev;
182}
82efee14 183EXPORT_SYMBOL(__ip_dev_find);
1da177e4 184
05538116
LAT
185/*
186 * Find address type as if only "dev" was present in the system. If
187 * on_dev is NULL then all interfaces are taken into consideration.
188 */
6b175b26
EB
189static inline unsigned __inet_dev_addr_type(struct net *net,
190 const struct net_device *dev,
05538116 191 __be32 addr)
1da177e4
LT
192{
193 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
194 struct fib_result res;
195 unsigned ret = RTN_BROADCAST;
03cf786c 196 struct fib_table *local_table;
1da177e4 197
1e637c74 198 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 199 return RTN_BROADCAST;
f97c1e0c 200 if (ipv4_is_multicast(addr))
1da177e4
LT
201 return RTN_MULTICAST;
202
203#ifdef CONFIG_IP_MULTIPLE_TABLES
204 res.r = NULL;
205#endif
e905a9ed 206
6b175b26 207 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 208 if (local_table) {
1da177e4 209 ret = RTN_UNICAST;
16c6cf8b 210 if (!fib_table_lookup(local_table, &fl, &res)) {
05538116
LAT
211 if (!dev || dev == res.fi->fib_dev)
212 ret = res.type;
1da177e4
LT
213 fib_res_put(&res);
214 }
215 }
216 return ret;
217}
218
6b175b26 219unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 220{
6b175b26 221 return __inet_dev_addr_type(net, NULL, addr);
05538116 222}
4bc2f18b 223EXPORT_SYMBOL(inet_addr_type);
05538116 224
6b175b26
EB
225unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
226 __be32 addr)
05538116 227{
6b175b26 228 return __inet_dev_addr_type(net, dev, addr);
05538116 229}
4bc2f18b 230EXPORT_SYMBOL(inet_dev_addr_type);
05538116 231
1da177e4
LT
232/* Given (packet source, input interface) and optional (dst, oif, tos):
233 - (main) check, that source is valid i.e. not broadcast or our local
234 address.
235 - figure out what "logical" interface this packet arrived
236 and calculate "specific destination" address.
237 - check, that packet arrived from expected physical interface.
238 */
239
d9c9df8c 240int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
b0c110ca 241 struct net_device *dev, __be32 *spec_dst,
242 u32 *itag, u32 mark)
1da177e4
LT
243{
244 struct in_device *in_dev;
245 struct flowi fl = { .nl_u = { .ip4_u =
246 { .daddr = src,
247 .saddr = dst,
248 .tos = tos } },
b0c110ca 249 .mark = mark,
1da177e4 250 .iif = oif };
b0c110ca 251
1da177e4 252 struct fib_result res;
8153a10c 253 int no_addr, rpf, accept_local;
6f86b325 254 bool dev_match;
1da177e4 255 int ret;
5b707aaa 256 struct net *net;
1da177e4 257
8153a10c 258 no_addr = rpf = accept_local = 0;
1da177e4 259 rcu_read_lock();
e5ed6399 260 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
261 if (in_dev) {
262 no_addr = in_dev->ifa_list == NULL;
263 rpf = IN_DEV_RPFILTER(in_dev);
8153a10c 264 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
28f6aeea
JHS
265 if (mark && !IN_DEV_SRC_VMARK(in_dev))
266 fl.mark = 0;
1da177e4
LT
267 }
268 rcu_read_unlock();
269
270 if (in_dev == NULL)
271 goto e_inval;
272
c346dca1 273 net = dev_net(dev);
5b707aaa 274 if (fib_lookup(net, &fl, &res))
1da177e4 275 goto last_resort;
8153a10c
PM
276 if (res.type != RTN_UNICAST) {
277 if (res.type != RTN_LOCAL || !accept_local)
278 goto e_inval_res;
279 }
1da177e4
LT
280 *spec_dst = FIB_RES_PREFSRC(res);
281 fib_combine_itag(itag, &res);
6f86b325
DM
282 dev_match = false;
283
1da177e4 284#ifdef CONFIG_IP_ROUTE_MULTIPATH
6f86b325
DM
285 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
286 struct fib_nh *nh = &res.fi->fib_nh[ret];
287
288 if (nh->nh_dev == dev) {
289 dev_match = true;
290 break;
291 }
292 }
1da177e4
LT
293#else
294 if (FIB_RES_DEV(res) == dev)
6f86b325 295 dev_match = true;
1da177e4 296#endif
6f86b325 297 if (dev_match) {
1da177e4
LT
298 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
299 fib_res_put(&res);
300 return ret;
301 }
302 fib_res_put(&res);
303 if (no_addr)
304 goto last_resort;
c1cf8422 305 if (rpf == 1)
b5f7e755 306 goto e_rpf;
1da177e4
LT
307 fl.oif = dev->ifindex;
308
309 ret = 0;
5b707aaa 310 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
311 if (res.type == RTN_UNICAST) {
312 *spec_dst = FIB_RES_PREFSRC(res);
313 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
314 }
315 fib_res_put(&res);
316 }
317 return ret;
318
319last_resort:
320 if (rpf)
b5f7e755 321 goto e_rpf;
1da177e4
LT
322 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
323 *itag = 0;
324 return 0;
325
326e_inval_res:
327 fib_res_put(&res);
328e_inval:
329 return -EINVAL;
b5f7e755
ED
330e_rpf:
331 return -EXDEV;
1da177e4
LT
332}
333
81f7bf6c 334static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
335{
336 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
337}
338
339static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
340{
341 struct nlattr *nla;
342
343 nla = (struct nlattr *) ((char *) mx + len);
344 nla->nla_type = type;
345 nla->nla_len = nla_attr_size(4);
346 *(u32 *) nla_data(nla) = value;
347
348 return len + nla_total_size(4);
349}
350
4b5d47d4 351static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
352 struct fib_config *cfg)
353{
6d85c10a 354 __be32 addr;
4e902c57
TG
355 int plen;
356
357 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 358 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
359
360 if (rt->rt_dst.sa_family != AF_INET)
361 return -EAFNOSUPPORT;
362
363 /*
364 * Check mask for validity:
365 * a) it must be contiguous.
366 * b) destination must have all host bits clear.
367 * c) if application forgot to set correct family (AF_INET),
368 * reject request unless it is absolutely clear i.e.
369 * both family and mask are zero.
370 */
371 plen = 32;
372 addr = sk_extract_addr(&rt->rt_dst);
373 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 374 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
375
376 if (rt->rt_genmask.sa_family != AF_INET) {
377 if (mask || rt->rt_genmask.sa_family)
378 return -EAFNOSUPPORT;
379 }
380
381 if (bad_mask(mask, addr))
382 return -EINVAL;
383
384 plen = inet_mask_len(mask);
385 }
386
387 cfg->fc_dst_len = plen;
388 cfg->fc_dst = addr;
389
390 if (cmd != SIOCDELRT) {
391 cfg->fc_nlflags = NLM_F_CREATE;
392 cfg->fc_protocol = RTPROT_BOOT;
393 }
394
395 if (rt->rt_metric)
396 cfg->fc_priority = rt->rt_metric - 1;
397
398 if (rt->rt_flags & RTF_REJECT) {
399 cfg->fc_scope = RT_SCOPE_HOST;
400 cfg->fc_type = RTN_UNREACHABLE;
401 return 0;
402 }
403
404 cfg->fc_scope = RT_SCOPE_NOWHERE;
405 cfg->fc_type = RTN_UNICAST;
406
407 if (rt->rt_dev) {
408 char *colon;
409 struct net_device *dev;
410 char devname[IFNAMSIZ];
411
412 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
413 return -EFAULT;
414
415 devname[IFNAMSIZ-1] = 0;
416 colon = strchr(devname, ':');
417 if (colon)
418 *colon = 0;
4b5d47d4 419 dev = __dev_get_by_name(net, devname);
4e902c57
TG
420 if (!dev)
421 return -ENODEV;
422 cfg->fc_oif = dev->ifindex;
423 if (colon) {
424 struct in_ifaddr *ifa;
425 struct in_device *in_dev = __in_dev_get_rtnl(dev);
426 if (!in_dev)
427 return -ENODEV;
428 *colon = ':';
429 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
430 if (strcmp(ifa->ifa_label, devname) == 0)
431 break;
432 if (ifa == NULL)
433 return -ENODEV;
434 cfg->fc_prefsrc = ifa->ifa_local;
435 }
436 }
437
438 addr = sk_extract_addr(&rt->rt_gateway);
439 if (rt->rt_gateway.sa_family == AF_INET && addr) {
440 cfg->fc_gw = addr;
441 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 442 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
443 cfg->fc_scope = RT_SCOPE_UNIVERSE;
444 }
445
446 if (cmd == SIOCDELRT)
447 return 0;
448
449 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
450 return -EINVAL;
451
452 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
453 cfg->fc_scope = RT_SCOPE_LINK;
454
455 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
456 struct nlattr *mx;
457 int len = 0;
458
459 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 460 if (mx == NULL)
4e902c57
TG
461 return -ENOMEM;
462
463 if (rt->rt_flags & RTF_MTU)
464 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
465
466 if (rt->rt_flags & RTF_WINDOW)
467 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
468
469 if (rt->rt_flags & RTF_IRTT)
470 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
471
472 cfg->fc_mx = mx;
473 cfg->fc_mx_len = len;
474 }
475
476 return 0;
477}
478
1da177e4
LT
479/*
480 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
481 */
e905a9ed 482
1bad118a 483int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 484{
4e902c57
TG
485 struct fib_config cfg;
486 struct rtentry rt;
1da177e4 487 int err;
1da177e4
LT
488
489 switch (cmd) {
490 case SIOCADDRT: /* Add a route */
491 case SIOCDELRT: /* Delete a route */
492 if (!capable(CAP_NET_ADMIN))
493 return -EPERM;
4e902c57
TG
494
495 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 496 return -EFAULT;
4e902c57 497
1da177e4 498 rtnl_lock();
1bad118a 499 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 500 if (err == 0) {
4e902c57
TG
501 struct fib_table *tb;
502
1da177e4 503 if (cmd == SIOCDELRT) {
1bad118a 504 tb = fib_get_table(net, cfg.fc_table);
1da177e4 505 if (tb)
16c6cf8b 506 err = fib_table_delete(tb, &cfg);
4e902c57
TG
507 else
508 err = -ESRCH;
1da177e4 509 } else {
1bad118a 510 tb = fib_new_table(net, cfg.fc_table);
1da177e4 511 if (tb)
16c6cf8b 512 err = fib_table_insert(tb, &cfg);
4e902c57
TG
513 else
514 err = -ENOBUFS;
1da177e4 515 }
4e902c57
TG
516
517 /* allocated by rtentry_to_fib_config() */
518 kfree(cfg.fc_mx);
1da177e4
LT
519 }
520 rtnl_unlock();
521 return err;
522 }
523 return -EINVAL;
524}
525
ef7c79ed 526const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
527 [RTA_DST] = { .type = NLA_U32 },
528 [RTA_SRC] = { .type = NLA_U32 },
529 [RTA_IIF] = { .type = NLA_U32 },
530 [RTA_OIF] = { .type = NLA_U32 },
531 [RTA_GATEWAY] = { .type = NLA_U32 },
532 [RTA_PRIORITY] = { .type = NLA_U32 },
533 [RTA_PREFSRC] = { .type = NLA_U32 },
534 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 535 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 536 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
537};
538
4b5d47d4
DL
539static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
540 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 541{
4e902c57
TG
542 struct nlattr *attr;
543 int err, remaining;
544 struct rtmsg *rtm;
545
546 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
547 if (err < 0)
548 goto errout;
549
550 memset(cfg, 0, sizeof(*cfg));
551
552 rtm = nlmsg_data(nlh);
4e902c57 553 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
554 cfg->fc_tos = rtm->rtm_tos;
555 cfg->fc_table = rtm->rtm_table;
556 cfg->fc_protocol = rtm->rtm_protocol;
557 cfg->fc_scope = rtm->rtm_scope;
558 cfg->fc_type = rtm->rtm_type;
559 cfg->fc_flags = rtm->rtm_flags;
560 cfg->fc_nlflags = nlh->nlmsg_flags;
561
562 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
563 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 564 cfg->fc_nlinfo.nl_net = net;
4e902c57 565
a0ee18b9
TG
566 if (cfg->fc_type > RTN_MAX) {
567 err = -EINVAL;
568 goto errout;
569 }
570
4e902c57 571 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 572 switch (nla_type(attr)) {
4e902c57 573 case RTA_DST:
17fb2c64 574 cfg->fc_dst = nla_get_be32(attr);
4e902c57 575 break;
4e902c57
TG
576 case RTA_OIF:
577 cfg->fc_oif = nla_get_u32(attr);
578 break;
579 case RTA_GATEWAY:
17fb2c64 580 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
581 break;
582 case RTA_PRIORITY:
583 cfg->fc_priority = nla_get_u32(attr);
584 break;
585 case RTA_PREFSRC:
17fb2c64 586 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
587 break;
588 case RTA_METRICS:
589 cfg->fc_mx = nla_data(attr);
590 cfg->fc_mx_len = nla_len(attr);
591 break;
592 case RTA_MULTIPATH:
593 cfg->fc_mp = nla_data(attr);
594 cfg->fc_mp_len = nla_len(attr);
595 break;
596 case RTA_FLOW:
597 cfg->fc_flow = nla_get_u32(attr);
598 break;
4e902c57
TG
599 case RTA_TABLE:
600 cfg->fc_table = nla_get_u32(attr);
601 break;
1da177e4
LT
602 }
603 }
4e902c57 604
1da177e4 605 return 0;
4e902c57
TG
606errout:
607 return err;
1da177e4
LT
608}
609
6ed2533e 610static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 611{
3b1e0a65 612 struct net *net = sock_net(skb->sk);
4e902c57
TG
613 struct fib_config cfg;
614 struct fib_table *tb;
615 int err;
1da177e4 616
4b5d47d4 617 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
618 if (err < 0)
619 goto errout;
1da177e4 620
8ad4942c 621 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
622 if (tb == NULL) {
623 err = -ESRCH;
624 goto errout;
625 }
626
16c6cf8b 627 err = fib_table_delete(tb, &cfg);
4e902c57
TG
628errout:
629 return err;
1da177e4
LT
630}
631
6ed2533e 632static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 633{
3b1e0a65 634 struct net *net = sock_net(skb->sk);
4e902c57
TG
635 struct fib_config cfg;
636 struct fib_table *tb;
637 int err;
1da177e4 638
4b5d47d4 639 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
640 if (err < 0)
641 goto errout;
1da177e4 642
226b0b4a 643 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
644 if (tb == NULL) {
645 err = -ENOBUFS;
646 goto errout;
647 }
648
16c6cf8b 649 err = fib_table_insert(tb, &cfg);
4e902c57
TG
650errout:
651 return err;
1da177e4
LT
652}
653
63f3444f 654static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 655{
3b1e0a65 656 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
657 unsigned int h, s_h;
658 unsigned int e = 0, s_e;
1da177e4 659 struct fib_table *tb;
1af5a8c4 660 struct hlist_node *node;
e4aef8ae 661 struct hlist_head *head;
1af5a8c4 662 int dumped = 0;
1da177e4 663
be403ea1
TG
664 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
665 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
666 return ip_rt_dump(skb, cb);
667
1af5a8c4
PM
668 s_h = cb->args[0];
669 s_e = cb->args[1];
670
671 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
672 e = 0;
e4aef8ae
DL
673 head = &net->ipv4.fib_table_hash[h];
674 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
675 if (e < s_e)
676 goto next;
677 if (dumped)
678 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 679 2 * sizeof(cb->args[0]));
16c6cf8b 680 if (fib_table_dump(tb, skb, cb) < 0)
1af5a8c4
PM
681 goto out;
682 dumped = 1;
683next:
684 e++;
685 }
1da177e4 686 }
1af5a8c4
PM
687out:
688 cb->args[1] = e;
689 cb->args[0] = h;
1da177e4
LT
690
691 return skb->len;
692}
693
694/* Prepare and feed intra-kernel routing request.
695 Really, it should be netlink message, but :-( netlink
696 can be not configured, so that we feed it directly
697 to fib engine. It is legal, because all events occur
698 only when netlink is already locked.
699 */
700
81f7bf6c 701static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 702{
c346dca1 703 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
704 struct fib_table *tb;
705 struct fib_config cfg = {
706 .fc_protocol = RTPROT_KERNEL,
707 .fc_type = type,
708 .fc_dst = dst,
709 .fc_dst_len = dst_len,
710 .fc_prefsrc = ifa->ifa_local,
711 .fc_oif = ifa->ifa_dev->dev->ifindex,
712 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 713 .fc_nlinfo = {
4b5d47d4 714 .nl_net = net,
4d1169c1 715 },
4e902c57 716 };
1da177e4
LT
717
718 if (type == RTN_UNICAST)
4b5d47d4 719 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 720 else
4b5d47d4 721 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
722
723 if (tb == NULL)
724 return;
725
4e902c57 726 cfg.fc_table = tb->tb_id;
1da177e4 727
4e902c57
TG
728 if (type != RTN_LOCAL)
729 cfg.fc_scope = RT_SCOPE_LINK;
730 else
731 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
732
733 if (cmd == RTM_NEWROUTE)
16c6cf8b 734 fib_table_insert(tb, &cfg);
1da177e4 735 else
16c6cf8b 736 fib_table_delete(tb, &cfg);
1da177e4
LT
737}
738
0ff60a45 739void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
740{
741 struct in_device *in_dev = ifa->ifa_dev;
742 struct net_device *dev = in_dev->dev;
743 struct in_ifaddr *prim = ifa;
a144ea4b
AV
744 __be32 mask = ifa->ifa_mask;
745 __be32 addr = ifa->ifa_local;
746 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
747
748 if (ifa->ifa_flags&IFA_F_SECONDARY) {
749 prim = inet_ifa_byprefix(in_dev, prefix, mask);
750 if (prim == NULL) {
a6db9010 751 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
752 return;
753 }
754 }
755
756 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
757
758 if (!(dev->flags&IFF_UP))
759 return;
760
761 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 762 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
763 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
764
f97c1e0c 765 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
766 (prefix != addr || ifa->ifa_prefixlen < 32)) {
767 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
768 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
769
770 /* Add network specific broadcasts, when it takes a sense */
771 if (ifa->ifa_prefixlen < 31) {
772 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
773 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
774 }
775 }
776}
777
778static void fib_del_ifaddr(struct in_ifaddr *ifa)
779{
780 struct in_device *in_dev = ifa->ifa_dev;
781 struct net_device *dev = in_dev->dev;
782 struct in_ifaddr *ifa1;
783 struct in_ifaddr *prim = ifa;
a144ea4b
AV
784 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
785 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
786#define LOCAL_OK 1
787#define BRD_OK 2
788#define BRD0_OK 4
789#define BRD1_OK 8
790 unsigned ok = 0;
791
792 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
793 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
794 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
795 else {
796 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
797 if (prim == NULL) {
a6db9010 798 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
799 return;
800 }
801 }
802
803 /* Deletion is more complicated than add.
804 We should take care of not to delete too much :-)
805
806 Scan address list to be sure that addresses are really gone.
807 */
808
809 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
810 if (ifa->ifa_local == ifa1->ifa_local)
811 ok |= LOCAL_OK;
812 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
813 ok |= BRD_OK;
814 if (brd == ifa1->ifa_broadcast)
815 ok |= BRD1_OK;
816 if (any == ifa1->ifa_broadcast)
817 ok |= BRD0_OK;
818 }
819
820 if (!(ok&BRD_OK))
821 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
822 if (!(ok&BRD1_OK))
823 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
824 if (!(ok&BRD0_OK))
825 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
826 if (!(ok&LOCAL_OK)) {
827 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
828
829 /* Check, that this local address finally disappeared. */
c346dca1 830 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
831 /* And the last, but not the least thing.
832 We must flush stray FIB entries.
833
834 First of all, we scan fib_info list searching
835 for stray nexthop entries, then ignite fib_flush.
836 */
c346dca1
YH
837 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
838 fib_flush(dev_net(dev));
1da177e4
LT
839 }
840 }
841#undef LOCAL_OK
842#undef BRD_OK
843#undef BRD0_OK
844#undef BRD1_OK
845}
846
246955fe
RO
847static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
848{
e905a9ed 849
246955fe 850 struct fib_result res;
5f300893 851 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 852 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
853 .tos = frn->fl_tos,
854 .scope = frn->fl_scope } } };
1194ed0a 855
912a41a4
SV
856#ifdef CONFIG_IP_MULTIPLE_TABLES
857 res.r = NULL;
858#endif
859
1194ed0a 860 frn->err = -ENOENT;
246955fe
RO
861 if (tb) {
862 local_bh_disable();
863
864 frn->tb_id = tb->tb_id;
16c6cf8b 865 frn->err = fib_table_lookup(tb, &fl, &res);
246955fe
RO
866
867 if (!frn->err) {
868 frn->prefixlen = res.prefixlen;
869 frn->nh_sel = res.nh_sel;
870 frn->type = res.type;
871 frn->scope = res.scope;
1194ed0a 872 fib_res_put(&res);
246955fe
RO
873 }
874 local_bh_enable();
875 }
876}
877
28f7b036 878static void nl_fib_input(struct sk_buff *skb)
246955fe 879{
6bd48fcf 880 struct net *net;
246955fe 881 struct fib_result_nl *frn;
28f7b036 882 struct nlmsghdr *nlh;
246955fe 883 struct fib_table *tb;
28f7b036 884 u32 pid;
1194ed0a 885
3b1e0a65 886 net = sock_net(skb->sk);
b529ccf2 887 nlh = nlmsg_hdr(skb);
ea86575e 888 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 889 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 890 return;
d883a036
DL
891
892 skb = skb_clone(skb, GFP_KERNEL);
893 if (skb == NULL)
894 return;
895 nlh = nlmsg_hdr(skb);
e905a9ed 896
246955fe 897 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 898 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
899
900 nl_fib_lookup(frn, tb);
e905a9ed 901
1194ed0a 902 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 903 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 904 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 905 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 906}
246955fe 907
2c8c1e72 908static int __net_init nl_fib_lookup_init(struct net *net)
246955fe 909{
6bd48fcf
DL
910 struct sock *sk;
911 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
912 nl_fib_input, NULL, THIS_MODULE);
913 if (sk == NULL)
7b1a74fd 914 return -EAFNOSUPPORT;
6bd48fcf 915 net->ipv4.fibnl = sk;
7b1a74fd
DL
916 return 0;
917}
918
919static void nl_fib_lookup_exit(struct net *net)
920{
b7c6ba6e 921 netlink_kernel_release(net->ipv4.fibnl);
775516bf 922 net->ipv4.fibnl = NULL;
246955fe
RO
923}
924
e2ce1468 925static void fib_disable_ip(struct net_device *dev, int force, int delay)
1da177e4 926{
85326fa5 927 if (fib_sync_down_dev(dev, force))
c346dca1 928 fib_flush(dev_net(dev));
e2ce1468 929 rt_cache_flush(dev_net(dev), delay);
1da177e4
LT
930 arp_ifdown(dev);
931}
932
933static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
934{
6ed2533e 935 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 936 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
937
938 switch (event) {
939 case NETDEV_UP:
940 fib_add_ifaddr(ifa);
941#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 942 fib_sync_up(dev);
1da177e4 943#endif
76e6ebfb 944 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
945 break;
946 case NETDEV_DOWN:
947 fib_del_ifaddr(ifa);
9fcc2e8a 948 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
949 /* Last address was deleted from this interface.
950 Disable IP.
951 */
e2ce1468 952 fib_disable_ip(dev, 1, 0);
1da177e4 953 } else {
76e6ebfb 954 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
955 }
956 break;
957 }
958 return NOTIFY_DONE;
959}
960
961static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
962{
963 struct net_device *dev = ptr;
e5ed6399 964 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
965
966 if (event == NETDEV_UNREGISTER) {
e2ce1468 967 fib_disable_ip(dev, 2, -1);
1da177e4
LT
968 return NOTIFY_DONE;
969 }
970
971 if (!in_dev)
972 return NOTIFY_DONE;
973
974 switch (event) {
975 case NETDEV_UP:
976 for_ifa(in_dev) {
977 fib_add_ifaddr(ifa);
978 } endfor_ifa(in_dev);
979#ifdef CONFIG_IP_ROUTE_MULTIPATH
980 fib_sync_up(dev);
981#endif
76e6ebfb 982 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
983 break;
984 case NETDEV_DOWN:
e2ce1468 985 fib_disable_ip(dev, 0, 0);
1da177e4
LT
986 break;
987 case NETDEV_CHANGEMTU:
988 case NETDEV_CHANGE:
76e6ebfb 989 rt_cache_flush(dev_net(dev), 0);
1da177e4 990 break;
a5ee1551
EB
991 case NETDEV_UNREGISTER_BATCH:
992 rt_cache_flush_batch();
993 break;
1da177e4
LT
994 }
995 return NOTIFY_DONE;
996}
997
998static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 999 .notifier_call = fib_inetaddr_event,
1da177e4
LT
1000};
1001
1002static struct notifier_block fib_netdev_notifier = {
6ed2533e 1003 .notifier_call = fib_netdev_event,
1da177e4
LT
1004};
1005
7b1a74fd 1006static int __net_init ip_fib_net_init(struct net *net)
1da177e4 1007{
dce5cbee 1008 int err;
1af5a8c4
PM
1009 unsigned int i;
1010
e4aef8ae
DL
1011 net->ipv4.fib_table_hash = kzalloc(
1012 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1013 if (net->ipv4.fib_table_hash == NULL)
1014 return -ENOMEM;
1015
1af5a8c4 1016 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 1017 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 1018
dce5cbee
DL
1019 err = fib4_rules_init(net);
1020 if (err < 0)
1021 goto fail;
1022 return 0;
1023
1024fail:
1025 kfree(net->ipv4.fib_table_hash);
1026 return err;
7b1a74fd 1027}
1da177e4 1028
2c8c1e72 1029static void ip_fib_net_exit(struct net *net)
7b1a74fd
DL
1030{
1031 unsigned int i;
1032
1033#ifdef CONFIG_IP_MULTIPLE_TABLES
1034 fib4_rules_exit(net);
1035#endif
1036
1037 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1038 struct fib_table *tb;
1039 struct hlist_head *head;
1040 struct hlist_node *node, *tmp;
63f3444f 1041
e4aef8ae 1042 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1043 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1044 hlist_del(node);
16c6cf8b 1045 fib_table_flush(tb);
7b1a74fd
DL
1046 kfree(tb);
1047 }
1048 }
e4aef8ae 1049 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1050}
1051
1052static int __net_init fib_net_init(struct net *net)
1053{
1054 int error;
1055
7b1a74fd
DL
1056 error = ip_fib_net_init(net);
1057 if (error < 0)
1058 goto out;
1059 error = nl_fib_lookup_init(net);
1060 if (error < 0)
1061 goto out_nlfl;
1062 error = fib_proc_init(net);
1063 if (error < 0)
1064 goto out_proc;
1065out:
1066 return error;
1067
1068out_proc:
1069 nl_fib_lookup_exit(net);
1070out_nlfl:
1071 ip_fib_net_exit(net);
1072 goto out;
1073}
1074
1075static void __net_exit fib_net_exit(struct net *net)
1076{
1077 fib_proc_exit(net);
1078 nl_fib_lookup_exit(net);
1079 ip_fib_net_exit(net);
1080}
1081
1082static struct pernet_operations fib_net_ops = {
1083 .init = fib_net_init,
1084 .exit = fib_net_exit,
1085};
1086
1087void __init ip_fib_init(void)
1088{
63f3444f
TG
1089 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1090 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1091 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1092
1093 register_pernet_subsys(&fib_net_ops);
1094 register_netdevice_notifier(&fib_netdev_notifier);
1095 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1096
1097 fib_hash_init();
1da177e4 1098}