]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
ac3200: fix error path
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
5a0e3ad6 37#include <linux/slab.h>
1da177e4
LT
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
1da177e4
LT
44#include <net/arp.h>
45#include <net/ip_fib.h>
63f3444f 46#include <net/rtnetlink.h>
1da177e4 47
1da177e4
LT
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
7b1a74fd 50static int __net_init fib4_rules_init(struct net *net)
c3e9a353 51{
93456b6d
DL
52 struct fib_table *local_table, *main_table;
53
7f9b8052 54 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 55 if (local_table == NULL)
dbb50165
DL
56 return -ENOMEM;
57
7f9b8052 58 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 59 if (main_table == NULL)
dbb50165
DL
60 goto fail;
61
93456b6d 62 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 64 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
66 return 0;
67
68fail:
93456b6d 69 kfree(local_table);
dbb50165 70 return -ENOMEM;
c3e9a353 71}
1af5a8c4 72#else
1da177e4 73
8ad4942c 74struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
75{
76 struct fib_table *tb;
1af5a8c4 77 unsigned int h;
1da177e4 78
1af5a8c4
PM
79 if (id == 0)
80 id = RT_TABLE_MAIN;
8ad4942c 81 tb = fib_get_table(net, id);
1af5a8c4
PM
82 if (tb)
83 return tb;
7f9b8052
SH
84
85 tb = fib_hash_table(id);
1da177e4
LT
86 if (!tb)
87 return NULL;
1af5a8c4 88 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
90 return tb;
91}
92
8ad4942c 93struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
94{
95 struct fib_table *tb;
96 struct hlist_node *node;
e4aef8ae 97 struct hlist_head *head;
1af5a8c4 98 unsigned int h;
1da177e4 99
1af5a8c4
PM
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 103
1af5a8c4 104 rcu_read_lock();
e4aef8ae
DL
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
1da177e4
LT
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
010278ec
DL
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
010278ec 127 tb = fib_get_table(net, table);
64c2d538 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
16c6cf8b 129 fib_table_select_default(tb, flp, res);
64c2d538
DL
130}
131
e4aef8ae 132static void fib_flush(struct net *net)
1da177e4
LT
133{
134 int flushed = 0;
1da177e4 135 struct fib_table *tb;
1af5a8c4 136 struct hlist_node *node;
e4aef8ae 137 struct hlist_head *head;
1af5a8c4 138 unsigned int h;
1da177e4 139
1af5a8c4 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
16c6cf8b 143 flushed += fib_table_flush(tb);
1da177e4 144 }
1da177e4
LT
145
146 if (flushed)
76e6ebfb 147 rt_cache_flush(net, -1);
1da177e4
LT
148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
1ab35276 154struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4
LT
155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
03cf786c 159 struct fib_table *local_table;
1da177e4
LT
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
1ab35276 165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
16c6cf8b 166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
1da177e4
LT
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
178
05538116
LAT
179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
6b175b26
EB
183static inline unsigned __inet_dev_addr_type(struct net *net,
184 const struct net_device *dev,
05538116 185 __be32 addr)
1da177e4
LT
186{
187 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188 struct fib_result res;
189 unsigned ret = RTN_BROADCAST;
03cf786c 190 struct fib_table *local_table;
1da177e4 191
1e637c74 192 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 193 return RTN_BROADCAST;
f97c1e0c 194 if (ipv4_is_multicast(addr))
1da177e4
LT
195 return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198 res.r = NULL;
199#endif
e905a9ed 200
6b175b26 201 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 202 if (local_table) {
1da177e4 203 ret = RTN_UNICAST;
16c6cf8b 204 if (!fib_table_lookup(local_table, &fl, &res)) {
05538116
LAT
205 if (!dev || dev == res.fi->fib_dev)
206 ret = res.type;
1da177e4
LT
207 fib_res_put(&res);
208 }
209 }
210 return ret;
211}
212
6b175b26 213unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 214{
6b175b26 215 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
216}
217
6b175b26
EB
218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 __be32 addr)
05538116 220{
6b175b26 221 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
222}
223
1da177e4
LT
224/* Given (packet source, input interface) and optional (dst, oif, tos):
225 - (main) check, that source is valid i.e. not broadcast or our local
226 address.
227 - figure out what "logical" interface this packet arrived
228 and calculate "specific destination" address.
229 - check, that packet arrived from expected physical interface.
230 */
231
d9c9df8c 232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
b0c110ca 233 struct net_device *dev, __be32 *spec_dst,
234 u32 *itag, u32 mark)
1da177e4
LT
235{
236 struct in_device *in_dev;
237 struct flowi fl = { .nl_u = { .ip4_u =
238 { .daddr = src,
239 .saddr = dst,
240 .tos = tos } },
b0c110ca 241 .mark = mark,
1da177e4 242 .iif = oif };
b0c110ca 243
1da177e4 244 struct fib_result res;
8153a10c 245 int no_addr, rpf, accept_local;
1da177e4 246 int ret;
5b707aaa 247 struct net *net;
1da177e4 248
8153a10c 249 no_addr = rpf = accept_local = 0;
1da177e4 250 rcu_read_lock();
e5ed6399 251 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
252 if (in_dev) {
253 no_addr = in_dev->ifa_list == NULL;
254 rpf = IN_DEV_RPFILTER(in_dev);
8153a10c 255 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
28f6aeea
JHS
256 if (mark && !IN_DEV_SRC_VMARK(in_dev))
257 fl.mark = 0;
1da177e4
LT
258 }
259 rcu_read_unlock();
260
261 if (in_dev == NULL)
262 goto e_inval;
263
c346dca1 264 net = dev_net(dev);
5b707aaa 265 if (fib_lookup(net, &fl, &res))
1da177e4 266 goto last_resort;
8153a10c
PM
267 if (res.type != RTN_UNICAST) {
268 if (res.type != RTN_LOCAL || !accept_local)
269 goto e_inval_res;
270 }
1da177e4
LT
271 *spec_dst = FIB_RES_PREFSRC(res);
272 fib_combine_itag(itag, &res);
273#ifdef CONFIG_IP_ROUTE_MULTIPATH
274 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
275#else
276 if (FIB_RES_DEV(res) == dev)
277#endif
278 {
279 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
280 fib_res_put(&res);
281 return ret;
282 }
283 fib_res_put(&res);
284 if (no_addr)
285 goto last_resort;
c1cf8422 286 if (rpf == 1)
b5f7e755 287 goto e_rpf;
1da177e4
LT
288 fl.oif = dev->ifindex;
289
290 ret = 0;
5b707aaa 291 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
292 if (res.type == RTN_UNICAST) {
293 *spec_dst = FIB_RES_PREFSRC(res);
294 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
295 }
296 fib_res_put(&res);
297 }
298 return ret;
299
300last_resort:
301 if (rpf)
b5f7e755 302 goto e_rpf;
1da177e4
LT
303 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304 *itag = 0;
305 return 0;
306
307e_inval_res:
308 fib_res_put(&res);
309e_inval:
310 return -EINVAL;
b5f7e755
ED
311e_rpf:
312 return -EXDEV;
1da177e4
LT
313}
314
81f7bf6c 315static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
316{
317 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
318}
319
320static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
321{
322 struct nlattr *nla;
323
324 nla = (struct nlattr *) ((char *) mx + len);
325 nla->nla_type = type;
326 nla->nla_len = nla_attr_size(4);
327 *(u32 *) nla_data(nla) = value;
328
329 return len + nla_total_size(4);
330}
331
4b5d47d4 332static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
333 struct fib_config *cfg)
334{
6d85c10a 335 __be32 addr;
4e902c57
TG
336 int plen;
337
338 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 339 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
340
341 if (rt->rt_dst.sa_family != AF_INET)
342 return -EAFNOSUPPORT;
343
344 /*
345 * Check mask for validity:
346 * a) it must be contiguous.
347 * b) destination must have all host bits clear.
348 * c) if application forgot to set correct family (AF_INET),
349 * reject request unless it is absolutely clear i.e.
350 * both family and mask are zero.
351 */
352 plen = 32;
353 addr = sk_extract_addr(&rt->rt_dst);
354 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 355 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
356
357 if (rt->rt_genmask.sa_family != AF_INET) {
358 if (mask || rt->rt_genmask.sa_family)
359 return -EAFNOSUPPORT;
360 }
361
362 if (bad_mask(mask, addr))
363 return -EINVAL;
364
365 plen = inet_mask_len(mask);
366 }
367
368 cfg->fc_dst_len = plen;
369 cfg->fc_dst = addr;
370
371 if (cmd != SIOCDELRT) {
372 cfg->fc_nlflags = NLM_F_CREATE;
373 cfg->fc_protocol = RTPROT_BOOT;
374 }
375
376 if (rt->rt_metric)
377 cfg->fc_priority = rt->rt_metric - 1;
378
379 if (rt->rt_flags & RTF_REJECT) {
380 cfg->fc_scope = RT_SCOPE_HOST;
381 cfg->fc_type = RTN_UNREACHABLE;
382 return 0;
383 }
384
385 cfg->fc_scope = RT_SCOPE_NOWHERE;
386 cfg->fc_type = RTN_UNICAST;
387
388 if (rt->rt_dev) {
389 char *colon;
390 struct net_device *dev;
391 char devname[IFNAMSIZ];
392
393 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
394 return -EFAULT;
395
396 devname[IFNAMSIZ-1] = 0;
397 colon = strchr(devname, ':');
398 if (colon)
399 *colon = 0;
4b5d47d4 400 dev = __dev_get_by_name(net, devname);
4e902c57
TG
401 if (!dev)
402 return -ENODEV;
403 cfg->fc_oif = dev->ifindex;
404 if (colon) {
405 struct in_ifaddr *ifa;
406 struct in_device *in_dev = __in_dev_get_rtnl(dev);
407 if (!in_dev)
408 return -ENODEV;
409 *colon = ':';
410 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
411 if (strcmp(ifa->ifa_label, devname) == 0)
412 break;
413 if (ifa == NULL)
414 return -ENODEV;
415 cfg->fc_prefsrc = ifa->ifa_local;
416 }
417 }
418
419 addr = sk_extract_addr(&rt->rt_gateway);
420 if (rt->rt_gateway.sa_family == AF_INET && addr) {
421 cfg->fc_gw = addr;
422 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 423 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
424 cfg->fc_scope = RT_SCOPE_UNIVERSE;
425 }
426
427 if (cmd == SIOCDELRT)
428 return 0;
429
430 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
431 return -EINVAL;
432
433 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
434 cfg->fc_scope = RT_SCOPE_LINK;
435
436 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
437 struct nlattr *mx;
438 int len = 0;
439
440 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 441 if (mx == NULL)
4e902c57
TG
442 return -ENOMEM;
443
444 if (rt->rt_flags & RTF_MTU)
445 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
446
447 if (rt->rt_flags & RTF_WINDOW)
448 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
449
450 if (rt->rt_flags & RTF_IRTT)
451 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
452
453 cfg->fc_mx = mx;
454 cfg->fc_mx_len = len;
455 }
456
457 return 0;
458}
459
1da177e4
LT
460/*
461 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
462 */
e905a9ed 463
1bad118a 464int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 465{
4e902c57
TG
466 struct fib_config cfg;
467 struct rtentry rt;
1da177e4 468 int err;
1da177e4
LT
469
470 switch (cmd) {
471 case SIOCADDRT: /* Add a route */
472 case SIOCDELRT: /* Delete a route */
473 if (!capable(CAP_NET_ADMIN))
474 return -EPERM;
4e902c57
TG
475
476 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 477 return -EFAULT;
4e902c57 478
1da177e4 479 rtnl_lock();
1bad118a 480 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 481 if (err == 0) {
4e902c57
TG
482 struct fib_table *tb;
483
1da177e4 484 if (cmd == SIOCDELRT) {
1bad118a 485 tb = fib_get_table(net, cfg.fc_table);
1da177e4 486 if (tb)
16c6cf8b 487 err = fib_table_delete(tb, &cfg);
4e902c57
TG
488 else
489 err = -ESRCH;
1da177e4 490 } else {
1bad118a 491 tb = fib_new_table(net, cfg.fc_table);
1da177e4 492 if (tb)
16c6cf8b 493 err = fib_table_insert(tb, &cfg);
4e902c57
TG
494 else
495 err = -ENOBUFS;
1da177e4 496 }
4e902c57
TG
497
498 /* allocated by rtentry_to_fib_config() */
499 kfree(cfg.fc_mx);
1da177e4
LT
500 }
501 rtnl_unlock();
502 return err;
503 }
504 return -EINVAL;
505}
506
ef7c79ed 507const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
508 [RTA_DST] = { .type = NLA_U32 },
509 [RTA_SRC] = { .type = NLA_U32 },
510 [RTA_IIF] = { .type = NLA_U32 },
511 [RTA_OIF] = { .type = NLA_U32 },
512 [RTA_GATEWAY] = { .type = NLA_U32 },
513 [RTA_PRIORITY] = { .type = NLA_U32 },
514 [RTA_PREFSRC] = { .type = NLA_U32 },
515 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 516 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 517 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
518};
519
4b5d47d4
DL
520static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
521 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 522{
4e902c57
TG
523 struct nlattr *attr;
524 int err, remaining;
525 struct rtmsg *rtm;
526
527 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
528 if (err < 0)
529 goto errout;
530
531 memset(cfg, 0, sizeof(*cfg));
532
533 rtm = nlmsg_data(nlh);
4e902c57 534 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
535 cfg->fc_tos = rtm->rtm_tos;
536 cfg->fc_table = rtm->rtm_table;
537 cfg->fc_protocol = rtm->rtm_protocol;
538 cfg->fc_scope = rtm->rtm_scope;
539 cfg->fc_type = rtm->rtm_type;
540 cfg->fc_flags = rtm->rtm_flags;
541 cfg->fc_nlflags = nlh->nlmsg_flags;
542
543 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
544 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 545 cfg->fc_nlinfo.nl_net = net;
4e902c57 546
a0ee18b9
TG
547 if (cfg->fc_type > RTN_MAX) {
548 err = -EINVAL;
549 goto errout;
550 }
551
4e902c57 552 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 553 switch (nla_type(attr)) {
4e902c57 554 case RTA_DST:
17fb2c64 555 cfg->fc_dst = nla_get_be32(attr);
4e902c57 556 break;
4e902c57
TG
557 case RTA_OIF:
558 cfg->fc_oif = nla_get_u32(attr);
559 break;
560 case RTA_GATEWAY:
17fb2c64 561 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
562 break;
563 case RTA_PRIORITY:
564 cfg->fc_priority = nla_get_u32(attr);
565 break;
566 case RTA_PREFSRC:
17fb2c64 567 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
568 break;
569 case RTA_METRICS:
570 cfg->fc_mx = nla_data(attr);
571 cfg->fc_mx_len = nla_len(attr);
572 break;
573 case RTA_MULTIPATH:
574 cfg->fc_mp = nla_data(attr);
575 cfg->fc_mp_len = nla_len(attr);
576 break;
577 case RTA_FLOW:
578 cfg->fc_flow = nla_get_u32(attr);
579 break;
4e902c57
TG
580 case RTA_TABLE:
581 cfg->fc_table = nla_get_u32(attr);
582 break;
1da177e4
LT
583 }
584 }
4e902c57 585
1da177e4 586 return 0;
4e902c57
TG
587errout:
588 return err;
1da177e4
LT
589}
590
6ed2533e 591static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 592{
3b1e0a65 593 struct net *net = sock_net(skb->sk);
4e902c57
TG
594 struct fib_config cfg;
595 struct fib_table *tb;
596 int err;
1da177e4 597
4b5d47d4 598 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
599 if (err < 0)
600 goto errout;
1da177e4 601
8ad4942c 602 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
603 if (tb == NULL) {
604 err = -ESRCH;
605 goto errout;
606 }
607
16c6cf8b 608 err = fib_table_delete(tb, &cfg);
4e902c57
TG
609errout:
610 return err;
1da177e4
LT
611}
612
6ed2533e 613static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 614{
3b1e0a65 615 struct net *net = sock_net(skb->sk);
4e902c57
TG
616 struct fib_config cfg;
617 struct fib_table *tb;
618 int err;
1da177e4 619
4b5d47d4 620 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
621 if (err < 0)
622 goto errout;
1da177e4 623
226b0b4a 624 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
625 if (tb == NULL) {
626 err = -ENOBUFS;
627 goto errout;
628 }
629
16c6cf8b 630 err = fib_table_insert(tb, &cfg);
4e902c57
TG
631errout:
632 return err;
1da177e4
LT
633}
634
63f3444f 635static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 636{
3b1e0a65 637 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
638 unsigned int h, s_h;
639 unsigned int e = 0, s_e;
1da177e4 640 struct fib_table *tb;
1af5a8c4 641 struct hlist_node *node;
e4aef8ae 642 struct hlist_head *head;
1af5a8c4 643 int dumped = 0;
1da177e4 644
be403ea1
TG
645 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
646 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
647 return ip_rt_dump(skb, cb);
648
1af5a8c4
PM
649 s_h = cb->args[0];
650 s_e = cb->args[1];
651
652 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
653 e = 0;
e4aef8ae
DL
654 head = &net->ipv4.fib_table_hash[h];
655 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
656 if (e < s_e)
657 goto next;
658 if (dumped)
659 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 660 2 * sizeof(cb->args[0]));
16c6cf8b 661 if (fib_table_dump(tb, skb, cb) < 0)
1af5a8c4
PM
662 goto out;
663 dumped = 1;
664next:
665 e++;
666 }
1da177e4 667 }
1af5a8c4
PM
668out:
669 cb->args[1] = e;
670 cb->args[0] = h;
1da177e4
LT
671
672 return skb->len;
673}
674
675/* Prepare and feed intra-kernel routing request.
676 Really, it should be netlink message, but :-( netlink
677 can be not configured, so that we feed it directly
678 to fib engine. It is legal, because all events occur
679 only when netlink is already locked.
680 */
681
81f7bf6c 682static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 683{
c346dca1 684 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
685 struct fib_table *tb;
686 struct fib_config cfg = {
687 .fc_protocol = RTPROT_KERNEL,
688 .fc_type = type,
689 .fc_dst = dst,
690 .fc_dst_len = dst_len,
691 .fc_prefsrc = ifa->ifa_local,
692 .fc_oif = ifa->ifa_dev->dev->ifindex,
693 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 694 .fc_nlinfo = {
4b5d47d4 695 .nl_net = net,
4d1169c1 696 },
4e902c57 697 };
1da177e4
LT
698
699 if (type == RTN_UNICAST)
4b5d47d4 700 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 701 else
4b5d47d4 702 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
703
704 if (tb == NULL)
705 return;
706
4e902c57 707 cfg.fc_table = tb->tb_id;
1da177e4 708
4e902c57
TG
709 if (type != RTN_LOCAL)
710 cfg.fc_scope = RT_SCOPE_LINK;
711 else
712 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
713
714 if (cmd == RTM_NEWROUTE)
16c6cf8b 715 fib_table_insert(tb, &cfg);
1da177e4 716 else
16c6cf8b 717 fib_table_delete(tb, &cfg);
1da177e4
LT
718}
719
0ff60a45 720void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
721{
722 struct in_device *in_dev = ifa->ifa_dev;
723 struct net_device *dev = in_dev->dev;
724 struct in_ifaddr *prim = ifa;
a144ea4b
AV
725 __be32 mask = ifa->ifa_mask;
726 __be32 addr = ifa->ifa_local;
727 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
728
729 if (ifa->ifa_flags&IFA_F_SECONDARY) {
730 prim = inet_ifa_byprefix(in_dev, prefix, mask);
731 if (prim == NULL) {
a6db9010 732 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
733 return;
734 }
735 }
736
737 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
738
739 if (!(dev->flags&IFF_UP))
740 return;
741
742 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 743 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
744 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
745
f97c1e0c 746 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
747 (prefix != addr || ifa->ifa_prefixlen < 32)) {
748 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
749 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
750
751 /* Add network specific broadcasts, when it takes a sense */
752 if (ifa->ifa_prefixlen < 31) {
753 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
754 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
755 }
756 }
757}
758
759static void fib_del_ifaddr(struct in_ifaddr *ifa)
760{
761 struct in_device *in_dev = ifa->ifa_dev;
762 struct net_device *dev = in_dev->dev;
763 struct in_ifaddr *ifa1;
764 struct in_ifaddr *prim = ifa;
a144ea4b
AV
765 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
766 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
767#define LOCAL_OK 1
768#define BRD_OK 2
769#define BRD0_OK 4
770#define BRD1_OK 8
771 unsigned ok = 0;
772
773 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
774 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
775 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
776 else {
777 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
778 if (prim == NULL) {
a6db9010 779 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
780 return;
781 }
782 }
783
784 /* Deletion is more complicated than add.
785 We should take care of not to delete too much :-)
786
787 Scan address list to be sure that addresses are really gone.
788 */
789
790 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
791 if (ifa->ifa_local == ifa1->ifa_local)
792 ok |= LOCAL_OK;
793 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
794 ok |= BRD_OK;
795 if (brd == ifa1->ifa_broadcast)
796 ok |= BRD1_OK;
797 if (any == ifa1->ifa_broadcast)
798 ok |= BRD0_OK;
799 }
800
801 if (!(ok&BRD_OK))
802 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
803 if (!(ok&BRD1_OK))
804 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
805 if (!(ok&BRD0_OK))
806 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
807 if (!(ok&LOCAL_OK)) {
808 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
809
810 /* Check, that this local address finally disappeared. */
c346dca1 811 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
812 /* And the last, but not the least thing.
813 We must flush stray FIB entries.
814
815 First of all, we scan fib_info list searching
816 for stray nexthop entries, then ignite fib_flush.
817 */
c346dca1
YH
818 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
819 fib_flush(dev_net(dev));
1da177e4
LT
820 }
821 }
822#undef LOCAL_OK
823#undef BRD_OK
824#undef BRD0_OK
825#undef BRD1_OK
826}
827
246955fe
RO
828static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
829{
e905a9ed 830
246955fe 831 struct fib_result res;
5f300893 832 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 833 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
834 .tos = frn->fl_tos,
835 .scope = frn->fl_scope } } };
1194ed0a 836
912a41a4
SV
837#ifdef CONFIG_IP_MULTIPLE_TABLES
838 res.r = NULL;
839#endif
840
1194ed0a 841 frn->err = -ENOENT;
246955fe
RO
842 if (tb) {
843 local_bh_disable();
844
845 frn->tb_id = tb->tb_id;
16c6cf8b 846 frn->err = fib_table_lookup(tb, &fl, &res);
246955fe
RO
847
848 if (!frn->err) {
849 frn->prefixlen = res.prefixlen;
850 frn->nh_sel = res.nh_sel;
851 frn->type = res.type;
852 frn->scope = res.scope;
1194ed0a 853 fib_res_put(&res);
246955fe
RO
854 }
855 local_bh_enable();
856 }
857}
858
28f7b036 859static void nl_fib_input(struct sk_buff *skb)
246955fe 860{
6bd48fcf 861 struct net *net;
246955fe 862 struct fib_result_nl *frn;
28f7b036 863 struct nlmsghdr *nlh;
246955fe 864 struct fib_table *tb;
28f7b036 865 u32 pid;
1194ed0a 866
3b1e0a65 867 net = sock_net(skb->sk);
b529ccf2 868 nlh = nlmsg_hdr(skb);
ea86575e 869 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 870 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 871 return;
d883a036
DL
872
873 skb = skb_clone(skb, GFP_KERNEL);
874 if (skb == NULL)
875 return;
876 nlh = nlmsg_hdr(skb);
e905a9ed 877
246955fe 878 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 879 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
880
881 nl_fib_lookup(frn, tb);
e905a9ed 882
1194ed0a 883 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 884 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 885 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 886 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 887}
246955fe 888
2c8c1e72 889static int __net_init nl_fib_lookup_init(struct net *net)
246955fe 890{
6bd48fcf
DL
891 struct sock *sk;
892 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
893 nl_fib_input, NULL, THIS_MODULE);
894 if (sk == NULL)
7b1a74fd 895 return -EAFNOSUPPORT;
6bd48fcf 896 net->ipv4.fibnl = sk;
7b1a74fd
DL
897 return 0;
898}
899
900static void nl_fib_lookup_exit(struct net *net)
901{
b7c6ba6e 902 netlink_kernel_release(net->ipv4.fibnl);
775516bf 903 net->ipv4.fibnl = NULL;
246955fe
RO
904}
905
e2ce1468 906static void fib_disable_ip(struct net_device *dev, int force, int delay)
1da177e4 907{
85326fa5 908 if (fib_sync_down_dev(dev, force))
c346dca1 909 fib_flush(dev_net(dev));
e2ce1468 910 rt_cache_flush(dev_net(dev), delay);
1da177e4
LT
911 arp_ifdown(dev);
912}
913
914static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
915{
6ed2533e 916 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 917 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
918
919 switch (event) {
920 case NETDEV_UP:
921 fib_add_ifaddr(ifa);
922#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 923 fib_sync_up(dev);
1da177e4 924#endif
76e6ebfb 925 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
926 break;
927 case NETDEV_DOWN:
928 fib_del_ifaddr(ifa);
9fcc2e8a 929 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
930 /* Last address was deleted from this interface.
931 Disable IP.
932 */
e2ce1468 933 fib_disable_ip(dev, 1, 0);
1da177e4 934 } else {
76e6ebfb 935 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
936 }
937 break;
938 }
939 return NOTIFY_DONE;
940}
941
942static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
943{
944 struct net_device *dev = ptr;
e5ed6399 945 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
946
947 if (event == NETDEV_UNREGISTER) {
e2ce1468 948 fib_disable_ip(dev, 2, -1);
1da177e4
LT
949 return NOTIFY_DONE;
950 }
951
952 if (!in_dev)
953 return NOTIFY_DONE;
954
955 switch (event) {
956 case NETDEV_UP:
957 for_ifa(in_dev) {
958 fib_add_ifaddr(ifa);
959 } endfor_ifa(in_dev);
960#ifdef CONFIG_IP_ROUTE_MULTIPATH
961 fib_sync_up(dev);
962#endif
76e6ebfb 963 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
964 break;
965 case NETDEV_DOWN:
e2ce1468 966 fib_disable_ip(dev, 0, 0);
1da177e4
LT
967 break;
968 case NETDEV_CHANGEMTU:
969 case NETDEV_CHANGE:
76e6ebfb 970 rt_cache_flush(dev_net(dev), 0);
1da177e4 971 break;
a5ee1551
EB
972 case NETDEV_UNREGISTER_BATCH:
973 rt_cache_flush_batch();
974 break;
1da177e4
LT
975 }
976 return NOTIFY_DONE;
977}
978
979static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 980 .notifier_call = fib_inetaddr_event,
1da177e4
LT
981};
982
983static struct notifier_block fib_netdev_notifier = {
6ed2533e 984 .notifier_call = fib_netdev_event,
1da177e4
LT
985};
986
7b1a74fd 987static int __net_init ip_fib_net_init(struct net *net)
1da177e4 988{
dce5cbee 989 int err;
1af5a8c4
PM
990 unsigned int i;
991
e4aef8ae
DL
992 net->ipv4.fib_table_hash = kzalloc(
993 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
994 if (net->ipv4.fib_table_hash == NULL)
995 return -ENOMEM;
996
1af5a8c4 997 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 998 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 999
dce5cbee
DL
1000 err = fib4_rules_init(net);
1001 if (err < 0)
1002 goto fail;
1003 return 0;
1004
1005fail:
1006 kfree(net->ipv4.fib_table_hash);
1007 return err;
7b1a74fd 1008}
1da177e4 1009
2c8c1e72 1010static void ip_fib_net_exit(struct net *net)
7b1a74fd
DL
1011{
1012 unsigned int i;
1013
1014#ifdef CONFIG_IP_MULTIPLE_TABLES
1015 fib4_rules_exit(net);
1016#endif
1017
1018 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1019 struct fib_table *tb;
1020 struct hlist_head *head;
1021 struct hlist_node *node, *tmp;
63f3444f 1022
e4aef8ae 1023 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1024 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1025 hlist_del(node);
16c6cf8b 1026 fib_table_flush(tb);
7b1a74fd
DL
1027 kfree(tb);
1028 }
1029 }
e4aef8ae 1030 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1031}
1032
1033static int __net_init fib_net_init(struct net *net)
1034{
1035 int error;
1036
7b1a74fd
DL
1037 error = ip_fib_net_init(net);
1038 if (error < 0)
1039 goto out;
1040 error = nl_fib_lookup_init(net);
1041 if (error < 0)
1042 goto out_nlfl;
1043 error = fib_proc_init(net);
1044 if (error < 0)
1045 goto out_proc;
1046out:
1047 return error;
1048
1049out_proc:
1050 nl_fib_lookup_exit(net);
1051out_nlfl:
1052 ip_fib_net_exit(net);
1053 goto out;
1054}
1055
1056static void __net_exit fib_net_exit(struct net *net)
1057{
1058 fib_proc_exit(net);
1059 nl_fib_lookup_exit(net);
1060 ip_fib_net_exit(net);
1061}
1062
1063static struct pernet_operations fib_net_ops = {
1064 .init = fib_net_init,
1065 .exit = fib_net_exit,
1066};
1067
1068void __init ip_fib_init(void)
1069{
63f3444f
TG
1070 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1071 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1072 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1073
1074 register_pernet_subsys(&fib_net_ops);
1075 register_netdevice_notifier(&fib_netdev_notifier);
1076 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1077
1078 fib_hash_init();
1da177e4
LT
1079}
1080
1081EXPORT_SYMBOL(inet_addr_type);
05538116 1082EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1083EXPORT_SYMBOL(ip_dev_find);