]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
bnx2x: Initialize cnic status block during chip reset
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
1da177e4
LT
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
1da177e4
LT
43#include <net/arp.h>
44#include <net/ip_fib.h>
63f3444f 45#include <net/rtnetlink.h>
1da177e4 46
1da177e4
LT
47#ifndef CONFIG_IP_MULTIPLE_TABLES
48
7b1a74fd 49static int __net_init fib4_rules_init(struct net *net)
c3e9a353 50{
93456b6d
DL
51 struct fib_table *local_table, *main_table;
52
7f9b8052 53 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 54 if (local_table == NULL)
dbb50165
DL
55 return -ENOMEM;
56
7f9b8052 57 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 58 if (main_table == NULL)
dbb50165
DL
59 goto fail;
60
93456b6d 61 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 62 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 63 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 64 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
65 return 0;
66
67fail:
93456b6d 68 kfree(local_table);
dbb50165 69 return -ENOMEM;
c3e9a353 70}
1af5a8c4 71#else
1da177e4 72
8ad4942c 73struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
74{
75 struct fib_table *tb;
1af5a8c4 76 unsigned int h;
1da177e4 77
1af5a8c4
PM
78 if (id == 0)
79 id = RT_TABLE_MAIN;
8ad4942c 80 tb = fib_get_table(net, id);
1af5a8c4
PM
81 if (tb)
82 return tb;
7f9b8052
SH
83
84 tb = fib_hash_table(id);
1da177e4
LT
85 if (!tb)
86 return NULL;
1af5a8c4 87 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 88 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
89 return tb;
90}
91
8ad4942c 92struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
93{
94 struct fib_table *tb;
95 struct hlist_node *node;
e4aef8ae 96 struct hlist_head *head;
1af5a8c4 97 unsigned int h;
1da177e4 98
1af5a8c4
PM
99 if (id == 0)
100 id = RT_TABLE_MAIN;
101 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 102
1af5a8c4 103 rcu_read_lock();
e4aef8ae
DL
104 head = &net->ipv4.fib_table_hash[h];
105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
106 if (tb->tb_id == id) {
107 rcu_read_unlock();
108 return tb;
109 }
110 }
111 rcu_read_unlock();
112 return NULL;
113}
1da177e4
LT
114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
010278ec
DL
116void fib_select_default(struct net *net,
117 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
118{
119 struct fib_table *tb;
120 int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 return;
124 table = res->r->table;
125#endif
010278ec 126 tb = fib_get_table(net, table);
64c2d538 127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
16c6cf8b 128 fib_table_select_default(tb, flp, res);
64c2d538
DL
129}
130
e4aef8ae 131static void fib_flush(struct net *net)
1da177e4
LT
132{
133 int flushed = 0;
1da177e4 134 struct fib_table *tb;
1af5a8c4 135 struct hlist_node *node;
e4aef8ae 136 struct hlist_head *head;
1af5a8c4 137 unsigned int h;
1da177e4 138
1af5a8c4 139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist)
16c6cf8b 142 flushed += fib_table_flush(tb);
1da177e4 143 }
1da177e4
LT
144
145 if (flushed)
76e6ebfb 146 rt_cache_flush(net, -1);
1da177e4
LT
147}
148
149/*
150 * Find the first device with a given source address.
151 */
152
1ab35276 153struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4
LT
154{
155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 struct fib_result res;
157 struct net_device *dev = NULL;
03cf786c 158 struct fib_table *local_table;
1da177e4
LT
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161 res.r = NULL;
162#endif
163
1ab35276 164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
16c6cf8b 165 if (!local_table || fib_table_lookup(local_table, &fl, &res))
1da177e4
LT
166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173out:
174 fib_res_put(&res);
175 return dev;
176}
177
05538116
LAT
178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
6b175b26
EB
182static inline unsigned __inet_dev_addr_type(struct net *net,
183 const struct net_device *dev,
05538116 184 __be32 addr)
1da177e4
LT
185{
186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 struct fib_result res;
188 unsigned ret = RTN_BROADCAST;
03cf786c 189 struct fib_table *local_table;
1da177e4 190
1e637c74 191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 192 return RTN_BROADCAST;
f97c1e0c 193 if (ipv4_is_multicast(addr))
1da177e4
LT
194 return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197 res.r = NULL;
198#endif
e905a9ed 199
6b175b26 200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 201 if (local_table) {
1da177e4 202 ret = RTN_UNICAST;
16c6cf8b 203 if (!fib_table_lookup(local_table, &fl, &res)) {
05538116
LAT
204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type;
1da177e4
LT
206 fib_res_put(&res);
207 }
208 }
209 return ret;
210}
211
6b175b26 212unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 213{
6b175b26 214 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
215}
216
6b175b26
EB
217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 __be32 addr)
05538116 219{
6b175b26 220 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
221}
222
1da177e4
LT
223/* Given (packet source, input interface) and optional (dst, oif, tos):
224 - (main) check, that source is valid i.e. not broadcast or our local
225 address.
226 - figure out what "logical" interface this packet arrived
227 and calculate "specific destination" address.
228 - check, that packet arrived from expected physical interface.
229 */
230
d9c9df8c 231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
b0c110ca 232 struct net_device *dev, __be32 *spec_dst,
233 u32 *itag, u32 mark)
1da177e4
LT
234{
235 struct in_device *in_dev;
236 struct flowi fl = { .nl_u = { .ip4_u =
237 { .daddr = src,
238 .saddr = dst,
239 .tos = tos } },
b0c110ca 240 .mark = mark,
1da177e4 241 .iif = oif };
b0c110ca 242
1da177e4 243 struct fib_result res;
8153a10c 244 int no_addr, rpf, accept_local;
1da177e4 245 int ret;
5b707aaa 246 struct net *net;
1da177e4 247
8153a10c 248 no_addr = rpf = accept_local = 0;
1da177e4 249 rcu_read_lock();
e5ed6399 250 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
251 if (in_dev) {
252 no_addr = in_dev->ifa_list == NULL;
253 rpf = IN_DEV_RPFILTER(in_dev);
8153a10c 254 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
1da177e4
LT
255 }
256 rcu_read_unlock();
257
258 if (in_dev == NULL)
259 goto e_inval;
260
c346dca1 261 net = dev_net(dev);
5b707aaa 262 if (fib_lookup(net, &fl, &res))
1da177e4 263 goto last_resort;
8153a10c
PM
264 if (res.type != RTN_UNICAST) {
265 if (res.type != RTN_LOCAL || !accept_local)
266 goto e_inval_res;
267 }
1da177e4
LT
268 *spec_dst = FIB_RES_PREFSRC(res);
269 fib_combine_itag(itag, &res);
270#ifdef CONFIG_IP_ROUTE_MULTIPATH
271 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
272#else
273 if (FIB_RES_DEV(res) == dev)
274#endif
275 {
276 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
277 fib_res_put(&res);
278 return ret;
279 }
280 fib_res_put(&res);
281 if (no_addr)
282 goto last_resort;
c1cf8422 283 if (rpf == 1)
1da177e4
LT
284 goto e_inval;
285 fl.oif = dev->ifindex;
286
287 ret = 0;
5b707aaa 288 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
289 if (res.type == RTN_UNICAST) {
290 *spec_dst = FIB_RES_PREFSRC(res);
291 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
292 }
293 fib_res_put(&res);
294 }
295 return ret;
296
297last_resort:
298 if (rpf)
299 goto e_inval;
300 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
301 *itag = 0;
302 return 0;
303
304e_inval_res:
305 fib_res_put(&res);
306e_inval:
307 return -EINVAL;
308}
309
81f7bf6c 310static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
311{
312 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
313}
314
315static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
316{
317 struct nlattr *nla;
318
319 nla = (struct nlattr *) ((char *) mx + len);
320 nla->nla_type = type;
321 nla->nla_len = nla_attr_size(4);
322 *(u32 *) nla_data(nla) = value;
323
324 return len + nla_total_size(4);
325}
326
4b5d47d4 327static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
328 struct fib_config *cfg)
329{
6d85c10a 330 __be32 addr;
4e902c57
TG
331 int plen;
332
333 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 334 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
335
336 if (rt->rt_dst.sa_family != AF_INET)
337 return -EAFNOSUPPORT;
338
339 /*
340 * Check mask for validity:
341 * a) it must be contiguous.
342 * b) destination must have all host bits clear.
343 * c) if application forgot to set correct family (AF_INET),
344 * reject request unless it is absolutely clear i.e.
345 * both family and mask are zero.
346 */
347 plen = 32;
348 addr = sk_extract_addr(&rt->rt_dst);
349 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 350 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
351
352 if (rt->rt_genmask.sa_family != AF_INET) {
353 if (mask || rt->rt_genmask.sa_family)
354 return -EAFNOSUPPORT;
355 }
356
357 if (bad_mask(mask, addr))
358 return -EINVAL;
359
360 plen = inet_mask_len(mask);
361 }
362
363 cfg->fc_dst_len = plen;
364 cfg->fc_dst = addr;
365
366 if (cmd != SIOCDELRT) {
367 cfg->fc_nlflags = NLM_F_CREATE;
368 cfg->fc_protocol = RTPROT_BOOT;
369 }
370
371 if (rt->rt_metric)
372 cfg->fc_priority = rt->rt_metric - 1;
373
374 if (rt->rt_flags & RTF_REJECT) {
375 cfg->fc_scope = RT_SCOPE_HOST;
376 cfg->fc_type = RTN_UNREACHABLE;
377 return 0;
378 }
379
380 cfg->fc_scope = RT_SCOPE_NOWHERE;
381 cfg->fc_type = RTN_UNICAST;
382
383 if (rt->rt_dev) {
384 char *colon;
385 struct net_device *dev;
386 char devname[IFNAMSIZ];
387
388 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
389 return -EFAULT;
390
391 devname[IFNAMSIZ-1] = 0;
392 colon = strchr(devname, ':');
393 if (colon)
394 *colon = 0;
4b5d47d4 395 dev = __dev_get_by_name(net, devname);
4e902c57
TG
396 if (!dev)
397 return -ENODEV;
398 cfg->fc_oif = dev->ifindex;
399 if (colon) {
400 struct in_ifaddr *ifa;
401 struct in_device *in_dev = __in_dev_get_rtnl(dev);
402 if (!in_dev)
403 return -ENODEV;
404 *colon = ':';
405 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
406 if (strcmp(ifa->ifa_label, devname) == 0)
407 break;
408 if (ifa == NULL)
409 return -ENODEV;
410 cfg->fc_prefsrc = ifa->ifa_local;
411 }
412 }
413
414 addr = sk_extract_addr(&rt->rt_gateway);
415 if (rt->rt_gateway.sa_family == AF_INET && addr) {
416 cfg->fc_gw = addr;
417 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 418 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
419 cfg->fc_scope = RT_SCOPE_UNIVERSE;
420 }
421
422 if (cmd == SIOCDELRT)
423 return 0;
424
425 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
426 return -EINVAL;
427
428 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
429 cfg->fc_scope = RT_SCOPE_LINK;
430
431 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
432 struct nlattr *mx;
433 int len = 0;
434
435 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 436 if (mx == NULL)
4e902c57
TG
437 return -ENOMEM;
438
439 if (rt->rt_flags & RTF_MTU)
440 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
441
442 if (rt->rt_flags & RTF_WINDOW)
443 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
444
445 if (rt->rt_flags & RTF_IRTT)
446 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
447
448 cfg->fc_mx = mx;
449 cfg->fc_mx_len = len;
450 }
451
452 return 0;
453}
454
1da177e4
LT
455/*
456 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
457 */
e905a9ed 458
1bad118a 459int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 460{
4e902c57
TG
461 struct fib_config cfg;
462 struct rtentry rt;
1da177e4 463 int err;
1da177e4
LT
464
465 switch (cmd) {
466 case SIOCADDRT: /* Add a route */
467 case SIOCDELRT: /* Delete a route */
468 if (!capable(CAP_NET_ADMIN))
469 return -EPERM;
4e902c57
TG
470
471 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 472 return -EFAULT;
4e902c57 473
1da177e4 474 rtnl_lock();
1bad118a 475 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 476 if (err == 0) {
4e902c57
TG
477 struct fib_table *tb;
478
1da177e4 479 if (cmd == SIOCDELRT) {
1bad118a 480 tb = fib_get_table(net, cfg.fc_table);
1da177e4 481 if (tb)
16c6cf8b 482 err = fib_table_delete(tb, &cfg);
4e902c57
TG
483 else
484 err = -ESRCH;
1da177e4 485 } else {
1bad118a 486 tb = fib_new_table(net, cfg.fc_table);
1da177e4 487 if (tb)
16c6cf8b 488 err = fib_table_insert(tb, &cfg);
4e902c57
TG
489 else
490 err = -ENOBUFS;
1da177e4 491 }
4e902c57
TG
492
493 /* allocated by rtentry_to_fib_config() */
494 kfree(cfg.fc_mx);
1da177e4
LT
495 }
496 rtnl_unlock();
497 return err;
498 }
499 return -EINVAL;
500}
501
ef7c79ed 502const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
503 [RTA_DST] = { .type = NLA_U32 },
504 [RTA_SRC] = { .type = NLA_U32 },
505 [RTA_IIF] = { .type = NLA_U32 },
506 [RTA_OIF] = { .type = NLA_U32 },
507 [RTA_GATEWAY] = { .type = NLA_U32 },
508 [RTA_PRIORITY] = { .type = NLA_U32 },
509 [RTA_PREFSRC] = { .type = NLA_U32 },
510 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 511 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 512 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
513};
514
4b5d47d4
DL
515static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
516 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 517{
4e902c57
TG
518 struct nlattr *attr;
519 int err, remaining;
520 struct rtmsg *rtm;
521
522 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
523 if (err < 0)
524 goto errout;
525
526 memset(cfg, 0, sizeof(*cfg));
527
528 rtm = nlmsg_data(nlh);
4e902c57 529 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
530 cfg->fc_tos = rtm->rtm_tos;
531 cfg->fc_table = rtm->rtm_table;
532 cfg->fc_protocol = rtm->rtm_protocol;
533 cfg->fc_scope = rtm->rtm_scope;
534 cfg->fc_type = rtm->rtm_type;
535 cfg->fc_flags = rtm->rtm_flags;
536 cfg->fc_nlflags = nlh->nlmsg_flags;
537
538 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
539 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 540 cfg->fc_nlinfo.nl_net = net;
4e902c57 541
a0ee18b9
TG
542 if (cfg->fc_type > RTN_MAX) {
543 err = -EINVAL;
544 goto errout;
545 }
546
4e902c57 547 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 548 switch (nla_type(attr)) {
4e902c57 549 case RTA_DST:
17fb2c64 550 cfg->fc_dst = nla_get_be32(attr);
4e902c57 551 break;
4e902c57
TG
552 case RTA_OIF:
553 cfg->fc_oif = nla_get_u32(attr);
554 break;
555 case RTA_GATEWAY:
17fb2c64 556 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
557 break;
558 case RTA_PRIORITY:
559 cfg->fc_priority = nla_get_u32(attr);
560 break;
561 case RTA_PREFSRC:
17fb2c64 562 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
563 break;
564 case RTA_METRICS:
565 cfg->fc_mx = nla_data(attr);
566 cfg->fc_mx_len = nla_len(attr);
567 break;
568 case RTA_MULTIPATH:
569 cfg->fc_mp = nla_data(attr);
570 cfg->fc_mp_len = nla_len(attr);
571 break;
572 case RTA_FLOW:
573 cfg->fc_flow = nla_get_u32(attr);
574 break;
4e902c57
TG
575 case RTA_TABLE:
576 cfg->fc_table = nla_get_u32(attr);
577 break;
1da177e4
LT
578 }
579 }
4e902c57 580
1da177e4 581 return 0;
4e902c57
TG
582errout:
583 return err;
1da177e4
LT
584}
585
6ed2533e 586static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 587{
3b1e0a65 588 struct net *net = sock_net(skb->sk);
4e902c57
TG
589 struct fib_config cfg;
590 struct fib_table *tb;
591 int err;
1da177e4 592
4b5d47d4 593 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
594 if (err < 0)
595 goto errout;
1da177e4 596
8ad4942c 597 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
598 if (tb == NULL) {
599 err = -ESRCH;
600 goto errout;
601 }
602
16c6cf8b 603 err = fib_table_delete(tb, &cfg);
4e902c57
TG
604errout:
605 return err;
1da177e4
LT
606}
607
6ed2533e 608static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 609{
3b1e0a65 610 struct net *net = sock_net(skb->sk);
4e902c57
TG
611 struct fib_config cfg;
612 struct fib_table *tb;
613 int err;
1da177e4 614
4b5d47d4 615 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
616 if (err < 0)
617 goto errout;
1da177e4 618
226b0b4a 619 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
620 if (tb == NULL) {
621 err = -ENOBUFS;
622 goto errout;
623 }
624
16c6cf8b 625 err = fib_table_insert(tb, &cfg);
4e902c57
TG
626errout:
627 return err;
1da177e4
LT
628}
629
63f3444f 630static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 631{
3b1e0a65 632 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
633 unsigned int h, s_h;
634 unsigned int e = 0, s_e;
1da177e4 635 struct fib_table *tb;
1af5a8c4 636 struct hlist_node *node;
e4aef8ae 637 struct hlist_head *head;
1af5a8c4 638 int dumped = 0;
1da177e4 639
be403ea1
TG
640 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
641 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
642 return ip_rt_dump(skb, cb);
643
1af5a8c4
PM
644 s_h = cb->args[0];
645 s_e = cb->args[1];
646
647 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
648 e = 0;
e4aef8ae
DL
649 head = &net->ipv4.fib_table_hash[h];
650 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
651 if (e < s_e)
652 goto next;
653 if (dumped)
654 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 655 2 * sizeof(cb->args[0]));
16c6cf8b 656 if (fib_table_dump(tb, skb, cb) < 0)
1af5a8c4
PM
657 goto out;
658 dumped = 1;
659next:
660 e++;
661 }
1da177e4 662 }
1af5a8c4
PM
663out:
664 cb->args[1] = e;
665 cb->args[0] = h;
1da177e4
LT
666
667 return skb->len;
668}
669
670/* Prepare and feed intra-kernel routing request.
671 Really, it should be netlink message, but :-( netlink
672 can be not configured, so that we feed it directly
673 to fib engine. It is legal, because all events occur
674 only when netlink is already locked.
675 */
676
81f7bf6c 677static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 678{
c346dca1 679 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
680 struct fib_table *tb;
681 struct fib_config cfg = {
682 .fc_protocol = RTPROT_KERNEL,
683 .fc_type = type,
684 .fc_dst = dst,
685 .fc_dst_len = dst_len,
686 .fc_prefsrc = ifa->ifa_local,
687 .fc_oif = ifa->ifa_dev->dev->ifindex,
688 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 689 .fc_nlinfo = {
4b5d47d4 690 .nl_net = net,
4d1169c1 691 },
4e902c57 692 };
1da177e4
LT
693
694 if (type == RTN_UNICAST)
4b5d47d4 695 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 696 else
4b5d47d4 697 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
698
699 if (tb == NULL)
700 return;
701
4e902c57 702 cfg.fc_table = tb->tb_id;
1da177e4 703
4e902c57
TG
704 if (type != RTN_LOCAL)
705 cfg.fc_scope = RT_SCOPE_LINK;
706 else
707 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
708
709 if (cmd == RTM_NEWROUTE)
16c6cf8b 710 fib_table_insert(tb, &cfg);
1da177e4 711 else
16c6cf8b 712 fib_table_delete(tb, &cfg);
1da177e4
LT
713}
714
0ff60a45 715void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
716{
717 struct in_device *in_dev = ifa->ifa_dev;
718 struct net_device *dev = in_dev->dev;
719 struct in_ifaddr *prim = ifa;
a144ea4b
AV
720 __be32 mask = ifa->ifa_mask;
721 __be32 addr = ifa->ifa_local;
722 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
723
724 if (ifa->ifa_flags&IFA_F_SECONDARY) {
725 prim = inet_ifa_byprefix(in_dev, prefix, mask);
726 if (prim == NULL) {
a6db9010 727 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
728 return;
729 }
730 }
731
732 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
733
734 if (!(dev->flags&IFF_UP))
735 return;
736
737 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 738 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
739 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
740
f97c1e0c 741 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
742 (prefix != addr || ifa->ifa_prefixlen < 32)) {
743 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
744 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
745
746 /* Add network specific broadcasts, when it takes a sense */
747 if (ifa->ifa_prefixlen < 31) {
748 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
749 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
750 }
751 }
752}
753
754static void fib_del_ifaddr(struct in_ifaddr *ifa)
755{
756 struct in_device *in_dev = ifa->ifa_dev;
757 struct net_device *dev = in_dev->dev;
758 struct in_ifaddr *ifa1;
759 struct in_ifaddr *prim = ifa;
a144ea4b
AV
760 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
761 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
762#define LOCAL_OK 1
763#define BRD_OK 2
764#define BRD0_OK 4
765#define BRD1_OK 8
766 unsigned ok = 0;
767
768 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
769 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
770 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
771 else {
772 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
773 if (prim == NULL) {
a6db9010 774 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
775 return;
776 }
777 }
778
779 /* Deletion is more complicated than add.
780 We should take care of not to delete too much :-)
781
782 Scan address list to be sure that addresses are really gone.
783 */
784
785 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
786 if (ifa->ifa_local == ifa1->ifa_local)
787 ok |= LOCAL_OK;
788 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
789 ok |= BRD_OK;
790 if (brd == ifa1->ifa_broadcast)
791 ok |= BRD1_OK;
792 if (any == ifa1->ifa_broadcast)
793 ok |= BRD0_OK;
794 }
795
796 if (!(ok&BRD_OK))
797 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
798 if (!(ok&BRD1_OK))
799 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
800 if (!(ok&BRD0_OK))
801 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
802 if (!(ok&LOCAL_OK)) {
803 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
804
805 /* Check, that this local address finally disappeared. */
c346dca1 806 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
807 /* And the last, but not the least thing.
808 We must flush stray FIB entries.
809
810 First of all, we scan fib_info list searching
811 for stray nexthop entries, then ignite fib_flush.
812 */
c346dca1
YH
813 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
814 fib_flush(dev_net(dev));
1da177e4
LT
815 }
816 }
817#undef LOCAL_OK
818#undef BRD_OK
819#undef BRD0_OK
820#undef BRD1_OK
821}
822
246955fe
RO
823static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
824{
e905a9ed 825
246955fe 826 struct fib_result res;
5f300893 827 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 828 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
829 .tos = frn->fl_tos,
830 .scope = frn->fl_scope } } };
1194ed0a 831
912a41a4
SV
832#ifdef CONFIG_IP_MULTIPLE_TABLES
833 res.r = NULL;
834#endif
835
1194ed0a 836 frn->err = -ENOENT;
246955fe
RO
837 if (tb) {
838 local_bh_disable();
839
840 frn->tb_id = tb->tb_id;
16c6cf8b 841 frn->err = fib_table_lookup(tb, &fl, &res);
246955fe
RO
842
843 if (!frn->err) {
844 frn->prefixlen = res.prefixlen;
845 frn->nh_sel = res.nh_sel;
846 frn->type = res.type;
847 frn->scope = res.scope;
1194ed0a 848 fib_res_put(&res);
246955fe
RO
849 }
850 local_bh_enable();
851 }
852}
853
28f7b036 854static void nl_fib_input(struct sk_buff *skb)
246955fe 855{
6bd48fcf 856 struct net *net;
246955fe 857 struct fib_result_nl *frn;
28f7b036 858 struct nlmsghdr *nlh;
246955fe 859 struct fib_table *tb;
28f7b036 860 u32 pid;
1194ed0a 861
3b1e0a65 862 net = sock_net(skb->sk);
b529ccf2 863 nlh = nlmsg_hdr(skb);
ea86575e 864 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 865 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 866 return;
d883a036
DL
867
868 skb = skb_clone(skb, GFP_KERNEL);
869 if (skb == NULL)
870 return;
871 nlh = nlmsg_hdr(skb);
e905a9ed 872
246955fe 873 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 874 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
875
876 nl_fib_lookup(frn, tb);
e905a9ed 877
1194ed0a 878 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 879 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 880 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 881 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 882}
246955fe 883
7b1a74fd 884static int nl_fib_lookup_init(struct net *net)
246955fe 885{
6bd48fcf
DL
886 struct sock *sk;
887 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
888 nl_fib_input, NULL, THIS_MODULE);
889 if (sk == NULL)
7b1a74fd 890 return -EAFNOSUPPORT;
6bd48fcf 891 net->ipv4.fibnl = sk;
7b1a74fd
DL
892 return 0;
893}
894
895static void nl_fib_lookup_exit(struct net *net)
896{
b7c6ba6e 897 netlink_kernel_release(net->ipv4.fibnl);
775516bf 898 net->ipv4.fibnl = NULL;
246955fe
RO
899}
900
e2ce1468 901static void fib_disable_ip(struct net_device *dev, int force, int delay)
1da177e4 902{
85326fa5 903 if (fib_sync_down_dev(dev, force))
c346dca1 904 fib_flush(dev_net(dev));
e2ce1468 905 rt_cache_flush(dev_net(dev), delay);
1da177e4
LT
906 arp_ifdown(dev);
907}
908
909static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
910{
6ed2533e 911 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 912 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
913
914 switch (event) {
915 case NETDEV_UP:
916 fib_add_ifaddr(ifa);
917#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 918 fib_sync_up(dev);
1da177e4 919#endif
76e6ebfb 920 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
921 break;
922 case NETDEV_DOWN:
923 fib_del_ifaddr(ifa);
9fcc2e8a 924 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
925 /* Last address was deleted from this interface.
926 Disable IP.
927 */
e2ce1468 928 fib_disable_ip(dev, 1, 0);
1da177e4 929 } else {
76e6ebfb 930 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
931 }
932 break;
933 }
934 return NOTIFY_DONE;
935}
936
937static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
938{
939 struct net_device *dev = ptr;
e5ed6399 940 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
941
942 if (event == NETDEV_UNREGISTER) {
e2ce1468 943 fib_disable_ip(dev, 2, -1);
1da177e4
LT
944 return NOTIFY_DONE;
945 }
946
947 if (!in_dev)
948 return NOTIFY_DONE;
949
950 switch (event) {
951 case NETDEV_UP:
952 for_ifa(in_dev) {
953 fib_add_ifaddr(ifa);
954 } endfor_ifa(in_dev);
955#ifdef CONFIG_IP_ROUTE_MULTIPATH
956 fib_sync_up(dev);
957#endif
76e6ebfb 958 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
959 break;
960 case NETDEV_DOWN:
e2ce1468 961 fib_disable_ip(dev, 0, 0);
1da177e4
LT
962 break;
963 case NETDEV_CHANGEMTU:
964 case NETDEV_CHANGE:
76e6ebfb 965 rt_cache_flush(dev_net(dev), 0);
1da177e4 966 break;
a5ee1551
EB
967 case NETDEV_UNREGISTER_BATCH:
968 rt_cache_flush_batch();
969 break;
1da177e4
LT
970 }
971 return NOTIFY_DONE;
972}
973
974static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 975 .notifier_call = fib_inetaddr_event,
1da177e4
LT
976};
977
978static struct notifier_block fib_netdev_notifier = {
6ed2533e 979 .notifier_call = fib_netdev_event,
1da177e4
LT
980};
981
7b1a74fd 982static int __net_init ip_fib_net_init(struct net *net)
1da177e4 983{
dce5cbee 984 int err;
1af5a8c4
PM
985 unsigned int i;
986
e4aef8ae
DL
987 net->ipv4.fib_table_hash = kzalloc(
988 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
989 if (net->ipv4.fib_table_hash == NULL)
990 return -ENOMEM;
991
1af5a8c4 992 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 993 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 994
dce5cbee
DL
995 err = fib4_rules_init(net);
996 if (err < 0)
997 goto fail;
998 return 0;
999
1000fail:
1001 kfree(net->ipv4.fib_table_hash);
1002 return err;
7b1a74fd 1003}
1da177e4 1004
7b1a74fd
DL
1005static void __net_exit ip_fib_net_exit(struct net *net)
1006{
1007 unsigned int i;
1008
1009#ifdef CONFIG_IP_MULTIPLE_TABLES
1010 fib4_rules_exit(net);
1011#endif
1012
1013 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1014 struct fib_table *tb;
1015 struct hlist_head *head;
1016 struct hlist_node *node, *tmp;
63f3444f 1017
e4aef8ae 1018 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1019 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1020 hlist_del(node);
16c6cf8b 1021 fib_table_flush(tb);
7b1a74fd
DL
1022 kfree(tb);
1023 }
1024 }
e4aef8ae 1025 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1026}
1027
1028static int __net_init fib_net_init(struct net *net)
1029{
1030 int error;
1031
7b1a74fd
DL
1032 error = ip_fib_net_init(net);
1033 if (error < 0)
1034 goto out;
1035 error = nl_fib_lookup_init(net);
1036 if (error < 0)
1037 goto out_nlfl;
1038 error = fib_proc_init(net);
1039 if (error < 0)
1040 goto out_proc;
1041out:
1042 return error;
1043
1044out_proc:
1045 nl_fib_lookup_exit(net);
1046out_nlfl:
1047 ip_fib_net_exit(net);
1048 goto out;
1049}
1050
1051static void __net_exit fib_net_exit(struct net *net)
1052{
1053 fib_proc_exit(net);
1054 nl_fib_lookup_exit(net);
1055 ip_fib_net_exit(net);
1056}
1057
1058static struct pernet_operations fib_net_ops = {
1059 .init = fib_net_init,
1060 .exit = fib_net_exit,
1061};
1062
1063void __init ip_fib_init(void)
1064{
63f3444f
TG
1065 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1066 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1067 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1068
1069 register_pernet_subsys(&fib_net_ops);
1070 register_netdevice_notifier(&fib_netdev_notifier);
1071 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1072
1073 fib_hash_init();
1da177e4
LT
1074}
1075
1076EXPORT_SYMBOL(inet_addr_type);
05538116 1077EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1078EXPORT_SYMBOL(ip_dev_find);