]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
net: fix kmemcheck annotations
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
1da177e4
LT
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
1da177e4
LT
43#include <net/arp.h>
44#include <net/ip_fib.h>
63f3444f 45#include <net/rtnetlink.h>
1da177e4 46
1da177e4
LT
47#ifndef CONFIG_IP_MULTIPLE_TABLES
48
7b1a74fd 49static int __net_init fib4_rules_init(struct net *net)
c3e9a353 50{
93456b6d
DL
51 struct fib_table *local_table, *main_table;
52
7f9b8052 53 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 54 if (local_table == NULL)
dbb50165
DL
55 return -ENOMEM;
56
7f9b8052 57 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 58 if (main_table == NULL)
dbb50165
DL
59 goto fail;
60
93456b6d 61 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 62 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 63 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 64 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
65 return 0;
66
67fail:
93456b6d 68 kfree(local_table);
dbb50165 69 return -ENOMEM;
c3e9a353 70}
1af5a8c4 71#else
1da177e4 72
8ad4942c 73struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
74{
75 struct fib_table *tb;
1af5a8c4 76 unsigned int h;
1da177e4 77
1af5a8c4
PM
78 if (id == 0)
79 id = RT_TABLE_MAIN;
8ad4942c 80 tb = fib_get_table(net, id);
1af5a8c4
PM
81 if (tb)
82 return tb;
7f9b8052
SH
83
84 tb = fib_hash_table(id);
1da177e4
LT
85 if (!tb)
86 return NULL;
1af5a8c4 87 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 88 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
89 return tb;
90}
91
8ad4942c 92struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
93{
94 struct fib_table *tb;
95 struct hlist_node *node;
e4aef8ae 96 struct hlist_head *head;
1af5a8c4 97 unsigned int h;
1da177e4 98
1af5a8c4
PM
99 if (id == 0)
100 id = RT_TABLE_MAIN;
101 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 102
1af5a8c4 103 rcu_read_lock();
e4aef8ae
DL
104 head = &net->ipv4.fib_table_hash[h];
105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
106 if (tb->tb_id == id) {
107 rcu_read_unlock();
108 return tb;
109 }
110 }
111 rcu_read_unlock();
112 return NULL;
113}
1da177e4
LT
114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
010278ec
DL
116void fib_select_default(struct net *net,
117 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
118{
119 struct fib_table *tb;
120 int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 return;
124 table = res->r->table;
125#endif
010278ec 126 tb = fib_get_table(net, table);
64c2d538
DL
127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128 tb->tb_select_default(tb, flp, res);
129}
130
e4aef8ae 131static void fib_flush(struct net *net)
1da177e4
LT
132{
133 int flushed = 0;
1da177e4 134 struct fib_table *tb;
1af5a8c4 135 struct hlist_node *node;
e4aef8ae 136 struct hlist_head *head;
1af5a8c4 137 unsigned int h;
1da177e4 138
1af5a8c4 139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist)
1af5a8c4 142 flushed += tb->tb_flush(tb);
1da177e4 143 }
1da177e4
LT
144
145 if (flushed)
76e6ebfb 146 rt_cache_flush(net, -1);
1da177e4
LT
147}
148
149/*
150 * Find the first device with a given source address.
151 */
152
1ab35276 153struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4
LT
154{
155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 struct fib_result res;
157 struct net_device *dev = NULL;
03cf786c 158 struct fib_table *local_table;
1da177e4
LT
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161 res.r = NULL;
162#endif
163
1ab35276 164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 165 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173out:
174 fib_res_put(&res);
175 return dev;
176}
177
05538116
LAT
178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
6b175b26
EB
182static inline unsigned __inet_dev_addr_type(struct net *net,
183 const struct net_device *dev,
05538116 184 __be32 addr)
1da177e4
LT
185{
186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 struct fib_result res;
188 unsigned ret = RTN_BROADCAST;
03cf786c 189 struct fib_table *local_table;
1da177e4 190
1e637c74 191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 192 return RTN_BROADCAST;
f97c1e0c 193 if (ipv4_is_multicast(addr))
1da177e4
LT
194 return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197 res.r = NULL;
198#endif
e905a9ed 199
6b175b26 200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 201 if (local_table) {
1da177e4 202 ret = RTN_UNICAST;
03cf786c 203 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type;
1da177e4
LT
206 fib_res_put(&res);
207 }
208 }
209 return ret;
210}
211
6b175b26 212unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 213{
6b175b26 214 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
215}
216
6b175b26
EB
217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 __be32 addr)
05538116 219{
6b175b26 220 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
221}
222
1da177e4
LT
223/* Given (packet source, input interface) and optional (dst, oif, tos):
224 - (main) check, that source is valid i.e. not broadcast or our local
225 address.
226 - figure out what "logical" interface this packet arrived
227 and calculate "specific destination" address.
228 - check, that packet arrived from expected physical interface.
229 */
230
d9c9df8c
AV
231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
233{
234 struct in_device *in_dev;
235 struct flowi fl = { .nl_u = { .ip4_u =
236 { .daddr = src,
237 .saddr = dst,
238 .tos = tos } },
239 .iif = oif };
240 struct fib_result res;
241 int no_addr, rpf;
242 int ret;
5b707aaa 243 struct net *net;
1da177e4
LT
244
245 no_addr = rpf = 0;
246 rcu_read_lock();
e5ed6399 247 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
248 if (in_dev) {
249 no_addr = in_dev->ifa_list == NULL;
250 rpf = IN_DEV_RPFILTER(in_dev);
251 }
252 rcu_read_unlock();
253
254 if (in_dev == NULL)
255 goto e_inval;
256
c346dca1 257 net = dev_net(dev);
5b707aaa 258 if (fib_lookup(net, &fl, &res))
1da177e4
LT
259 goto last_resort;
260 if (res.type != RTN_UNICAST)
261 goto e_inval_res;
262 *spec_dst = FIB_RES_PREFSRC(res);
263 fib_combine_itag(itag, &res);
264#ifdef CONFIG_IP_ROUTE_MULTIPATH
265 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
266#else
267 if (FIB_RES_DEV(res) == dev)
268#endif
269 {
270 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
271 fib_res_put(&res);
272 return ret;
273 }
274 fib_res_put(&res);
275 if (no_addr)
276 goto last_resort;
c1cf8422 277 if (rpf == 1)
1da177e4
LT
278 goto e_inval;
279 fl.oif = dev->ifindex;
280
281 ret = 0;
5b707aaa 282 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
283 if (res.type == RTN_UNICAST) {
284 *spec_dst = FIB_RES_PREFSRC(res);
285 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
286 }
287 fib_res_put(&res);
288 }
289 return ret;
290
291last_resort:
292 if (rpf)
293 goto e_inval;
294 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
295 *itag = 0;
296 return 0;
297
298e_inval_res:
299 fib_res_put(&res);
300e_inval:
301 return -EINVAL;
302}
303
81f7bf6c 304static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
305{
306 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
307}
308
309static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
310{
311 struct nlattr *nla;
312
313 nla = (struct nlattr *) ((char *) mx + len);
314 nla->nla_type = type;
315 nla->nla_len = nla_attr_size(4);
316 *(u32 *) nla_data(nla) = value;
317
318 return len + nla_total_size(4);
319}
320
4b5d47d4 321static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
322 struct fib_config *cfg)
323{
6d85c10a 324 __be32 addr;
4e902c57
TG
325 int plen;
326
327 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 328 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
329
330 if (rt->rt_dst.sa_family != AF_INET)
331 return -EAFNOSUPPORT;
332
333 /*
334 * Check mask for validity:
335 * a) it must be contiguous.
336 * b) destination must have all host bits clear.
337 * c) if application forgot to set correct family (AF_INET),
338 * reject request unless it is absolutely clear i.e.
339 * both family and mask are zero.
340 */
341 plen = 32;
342 addr = sk_extract_addr(&rt->rt_dst);
343 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 344 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
345
346 if (rt->rt_genmask.sa_family != AF_INET) {
347 if (mask || rt->rt_genmask.sa_family)
348 return -EAFNOSUPPORT;
349 }
350
351 if (bad_mask(mask, addr))
352 return -EINVAL;
353
354 plen = inet_mask_len(mask);
355 }
356
357 cfg->fc_dst_len = plen;
358 cfg->fc_dst = addr;
359
360 if (cmd != SIOCDELRT) {
361 cfg->fc_nlflags = NLM_F_CREATE;
362 cfg->fc_protocol = RTPROT_BOOT;
363 }
364
365 if (rt->rt_metric)
366 cfg->fc_priority = rt->rt_metric - 1;
367
368 if (rt->rt_flags & RTF_REJECT) {
369 cfg->fc_scope = RT_SCOPE_HOST;
370 cfg->fc_type = RTN_UNREACHABLE;
371 return 0;
372 }
373
374 cfg->fc_scope = RT_SCOPE_NOWHERE;
375 cfg->fc_type = RTN_UNICAST;
376
377 if (rt->rt_dev) {
378 char *colon;
379 struct net_device *dev;
380 char devname[IFNAMSIZ];
381
382 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
383 return -EFAULT;
384
385 devname[IFNAMSIZ-1] = 0;
386 colon = strchr(devname, ':');
387 if (colon)
388 *colon = 0;
4b5d47d4 389 dev = __dev_get_by_name(net, devname);
4e902c57
TG
390 if (!dev)
391 return -ENODEV;
392 cfg->fc_oif = dev->ifindex;
393 if (colon) {
394 struct in_ifaddr *ifa;
395 struct in_device *in_dev = __in_dev_get_rtnl(dev);
396 if (!in_dev)
397 return -ENODEV;
398 *colon = ':';
399 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
400 if (strcmp(ifa->ifa_label, devname) == 0)
401 break;
402 if (ifa == NULL)
403 return -ENODEV;
404 cfg->fc_prefsrc = ifa->ifa_local;
405 }
406 }
407
408 addr = sk_extract_addr(&rt->rt_gateway);
409 if (rt->rt_gateway.sa_family == AF_INET && addr) {
410 cfg->fc_gw = addr;
411 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 412 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
413 cfg->fc_scope = RT_SCOPE_UNIVERSE;
414 }
415
416 if (cmd == SIOCDELRT)
417 return 0;
418
419 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
420 return -EINVAL;
421
422 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
423 cfg->fc_scope = RT_SCOPE_LINK;
424
425 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
426 struct nlattr *mx;
427 int len = 0;
428
429 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 430 if (mx == NULL)
4e902c57
TG
431 return -ENOMEM;
432
433 if (rt->rt_flags & RTF_MTU)
434 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
435
436 if (rt->rt_flags & RTF_WINDOW)
437 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
438
439 if (rt->rt_flags & RTF_IRTT)
440 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
441
442 cfg->fc_mx = mx;
443 cfg->fc_mx_len = len;
444 }
445
446 return 0;
447}
448
1da177e4
LT
449/*
450 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
451 */
e905a9ed 452
1bad118a 453int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 454{
4e902c57
TG
455 struct fib_config cfg;
456 struct rtentry rt;
1da177e4 457 int err;
1da177e4
LT
458
459 switch (cmd) {
460 case SIOCADDRT: /* Add a route */
461 case SIOCDELRT: /* Delete a route */
462 if (!capable(CAP_NET_ADMIN))
463 return -EPERM;
4e902c57
TG
464
465 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 466 return -EFAULT;
4e902c57 467
1da177e4 468 rtnl_lock();
1bad118a 469 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 470 if (err == 0) {
4e902c57
TG
471 struct fib_table *tb;
472
1da177e4 473 if (cmd == SIOCDELRT) {
1bad118a 474 tb = fib_get_table(net, cfg.fc_table);
1da177e4 475 if (tb)
4e902c57
TG
476 err = tb->tb_delete(tb, &cfg);
477 else
478 err = -ESRCH;
1da177e4 479 } else {
1bad118a 480 tb = fib_new_table(net, cfg.fc_table);
1da177e4 481 if (tb)
4e902c57
TG
482 err = tb->tb_insert(tb, &cfg);
483 else
484 err = -ENOBUFS;
1da177e4 485 }
4e902c57
TG
486
487 /* allocated by rtentry_to_fib_config() */
488 kfree(cfg.fc_mx);
1da177e4
LT
489 }
490 rtnl_unlock();
491 return err;
492 }
493 return -EINVAL;
494}
495
ef7c79ed 496const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
497 [RTA_DST] = { .type = NLA_U32 },
498 [RTA_SRC] = { .type = NLA_U32 },
499 [RTA_IIF] = { .type = NLA_U32 },
500 [RTA_OIF] = { .type = NLA_U32 },
501 [RTA_GATEWAY] = { .type = NLA_U32 },
502 [RTA_PRIORITY] = { .type = NLA_U32 },
503 [RTA_PREFSRC] = { .type = NLA_U32 },
504 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 505 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 506 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
507};
508
4b5d47d4
DL
509static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
510 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 511{
4e902c57
TG
512 struct nlattr *attr;
513 int err, remaining;
514 struct rtmsg *rtm;
515
516 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
517 if (err < 0)
518 goto errout;
519
520 memset(cfg, 0, sizeof(*cfg));
521
522 rtm = nlmsg_data(nlh);
4e902c57 523 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
524 cfg->fc_tos = rtm->rtm_tos;
525 cfg->fc_table = rtm->rtm_table;
526 cfg->fc_protocol = rtm->rtm_protocol;
527 cfg->fc_scope = rtm->rtm_scope;
528 cfg->fc_type = rtm->rtm_type;
529 cfg->fc_flags = rtm->rtm_flags;
530 cfg->fc_nlflags = nlh->nlmsg_flags;
531
532 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
533 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 534 cfg->fc_nlinfo.nl_net = net;
4e902c57 535
a0ee18b9
TG
536 if (cfg->fc_type > RTN_MAX) {
537 err = -EINVAL;
538 goto errout;
539 }
540
4e902c57 541 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 542 switch (nla_type(attr)) {
4e902c57 543 case RTA_DST:
17fb2c64 544 cfg->fc_dst = nla_get_be32(attr);
4e902c57 545 break;
4e902c57
TG
546 case RTA_OIF:
547 cfg->fc_oif = nla_get_u32(attr);
548 break;
549 case RTA_GATEWAY:
17fb2c64 550 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
551 break;
552 case RTA_PRIORITY:
553 cfg->fc_priority = nla_get_u32(attr);
554 break;
555 case RTA_PREFSRC:
17fb2c64 556 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
557 break;
558 case RTA_METRICS:
559 cfg->fc_mx = nla_data(attr);
560 cfg->fc_mx_len = nla_len(attr);
561 break;
562 case RTA_MULTIPATH:
563 cfg->fc_mp = nla_data(attr);
564 cfg->fc_mp_len = nla_len(attr);
565 break;
566 case RTA_FLOW:
567 cfg->fc_flow = nla_get_u32(attr);
568 break;
4e902c57
TG
569 case RTA_TABLE:
570 cfg->fc_table = nla_get_u32(attr);
571 break;
1da177e4
LT
572 }
573 }
4e902c57 574
1da177e4 575 return 0;
4e902c57
TG
576errout:
577 return err;
1da177e4
LT
578}
579
6ed2533e 580static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 581{
3b1e0a65 582 struct net *net = sock_net(skb->sk);
4e902c57
TG
583 struct fib_config cfg;
584 struct fib_table *tb;
585 int err;
1da177e4 586
4b5d47d4 587 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
588 if (err < 0)
589 goto errout;
1da177e4 590
8ad4942c 591 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
592 if (tb == NULL) {
593 err = -ESRCH;
594 goto errout;
595 }
596
597 err = tb->tb_delete(tb, &cfg);
598errout:
599 return err;
1da177e4
LT
600}
601
6ed2533e 602static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 603{
3b1e0a65 604 struct net *net = sock_net(skb->sk);
4e902c57
TG
605 struct fib_config cfg;
606 struct fib_table *tb;
607 int err;
1da177e4 608
4b5d47d4 609 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
610 if (err < 0)
611 goto errout;
1da177e4 612
226b0b4a 613 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
614 if (tb == NULL) {
615 err = -ENOBUFS;
616 goto errout;
617 }
618
619 err = tb->tb_insert(tb, &cfg);
620errout:
621 return err;
1da177e4
LT
622}
623
63f3444f 624static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 625{
3b1e0a65 626 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
627 unsigned int h, s_h;
628 unsigned int e = 0, s_e;
1da177e4 629 struct fib_table *tb;
1af5a8c4 630 struct hlist_node *node;
e4aef8ae 631 struct hlist_head *head;
1af5a8c4 632 int dumped = 0;
1da177e4 633
be403ea1
TG
634 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
635 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
636 return ip_rt_dump(skb, cb);
637
1af5a8c4
PM
638 s_h = cb->args[0];
639 s_e = cb->args[1];
640
641 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
642 e = 0;
e4aef8ae
DL
643 head = &net->ipv4.fib_table_hash[h];
644 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
645 if (e < s_e)
646 goto next;
647 if (dumped)
648 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 649 2 * sizeof(cb->args[0]));
1af5a8c4
PM
650 if (tb->tb_dump(tb, skb, cb) < 0)
651 goto out;
652 dumped = 1;
653next:
654 e++;
655 }
1da177e4 656 }
1af5a8c4
PM
657out:
658 cb->args[1] = e;
659 cb->args[0] = h;
1da177e4
LT
660
661 return skb->len;
662}
663
664/* Prepare and feed intra-kernel routing request.
665 Really, it should be netlink message, but :-( netlink
666 can be not configured, so that we feed it directly
667 to fib engine. It is legal, because all events occur
668 only when netlink is already locked.
669 */
670
81f7bf6c 671static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 672{
c346dca1 673 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
674 struct fib_table *tb;
675 struct fib_config cfg = {
676 .fc_protocol = RTPROT_KERNEL,
677 .fc_type = type,
678 .fc_dst = dst,
679 .fc_dst_len = dst_len,
680 .fc_prefsrc = ifa->ifa_local,
681 .fc_oif = ifa->ifa_dev->dev->ifindex,
682 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 683 .fc_nlinfo = {
4b5d47d4 684 .nl_net = net,
4d1169c1 685 },
4e902c57 686 };
1da177e4
LT
687
688 if (type == RTN_UNICAST)
4b5d47d4 689 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 690 else
4b5d47d4 691 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
692
693 if (tb == NULL)
694 return;
695
4e902c57 696 cfg.fc_table = tb->tb_id;
1da177e4 697
4e902c57
TG
698 if (type != RTN_LOCAL)
699 cfg.fc_scope = RT_SCOPE_LINK;
700 else
701 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
702
703 if (cmd == RTM_NEWROUTE)
4e902c57 704 tb->tb_insert(tb, &cfg);
1da177e4 705 else
4e902c57 706 tb->tb_delete(tb, &cfg);
1da177e4
LT
707}
708
0ff60a45 709void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
710{
711 struct in_device *in_dev = ifa->ifa_dev;
712 struct net_device *dev = in_dev->dev;
713 struct in_ifaddr *prim = ifa;
a144ea4b
AV
714 __be32 mask = ifa->ifa_mask;
715 __be32 addr = ifa->ifa_local;
716 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
717
718 if (ifa->ifa_flags&IFA_F_SECONDARY) {
719 prim = inet_ifa_byprefix(in_dev, prefix, mask);
720 if (prim == NULL) {
a6db9010 721 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
722 return;
723 }
724 }
725
726 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
727
728 if (!(dev->flags&IFF_UP))
729 return;
730
731 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 732 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
733 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
734
f97c1e0c 735 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
736 (prefix != addr || ifa->ifa_prefixlen < 32)) {
737 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
738 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
739
740 /* Add network specific broadcasts, when it takes a sense */
741 if (ifa->ifa_prefixlen < 31) {
742 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
743 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
744 }
745 }
746}
747
748static void fib_del_ifaddr(struct in_ifaddr *ifa)
749{
750 struct in_device *in_dev = ifa->ifa_dev;
751 struct net_device *dev = in_dev->dev;
752 struct in_ifaddr *ifa1;
753 struct in_ifaddr *prim = ifa;
a144ea4b
AV
754 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
755 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
756#define LOCAL_OK 1
757#define BRD_OK 2
758#define BRD0_OK 4
759#define BRD1_OK 8
760 unsigned ok = 0;
761
762 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
763 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
764 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
765 else {
766 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
767 if (prim == NULL) {
a6db9010 768 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
769 return;
770 }
771 }
772
773 /* Deletion is more complicated than add.
774 We should take care of not to delete too much :-)
775
776 Scan address list to be sure that addresses are really gone.
777 */
778
779 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
780 if (ifa->ifa_local == ifa1->ifa_local)
781 ok |= LOCAL_OK;
782 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
783 ok |= BRD_OK;
784 if (brd == ifa1->ifa_broadcast)
785 ok |= BRD1_OK;
786 if (any == ifa1->ifa_broadcast)
787 ok |= BRD0_OK;
788 }
789
790 if (!(ok&BRD_OK))
791 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
792 if (!(ok&BRD1_OK))
793 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
794 if (!(ok&BRD0_OK))
795 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
796 if (!(ok&LOCAL_OK)) {
797 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
798
799 /* Check, that this local address finally disappeared. */
c346dca1 800 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
801 /* And the last, but not the least thing.
802 We must flush stray FIB entries.
803
804 First of all, we scan fib_info list searching
805 for stray nexthop entries, then ignite fib_flush.
806 */
c346dca1
YH
807 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
808 fib_flush(dev_net(dev));
1da177e4
LT
809 }
810 }
811#undef LOCAL_OK
812#undef BRD_OK
813#undef BRD0_OK
814#undef BRD1_OK
815}
816
246955fe
RO
817static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
818{
e905a9ed 819
246955fe 820 struct fib_result res;
5f300893 821 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 822 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
823 .tos = frn->fl_tos,
824 .scope = frn->fl_scope } } };
1194ed0a 825
912a41a4
SV
826#ifdef CONFIG_IP_MULTIPLE_TABLES
827 res.r = NULL;
828#endif
829
1194ed0a 830 frn->err = -ENOENT;
246955fe
RO
831 if (tb) {
832 local_bh_disable();
833
834 frn->tb_id = tb->tb_id;
835 frn->err = tb->tb_lookup(tb, &fl, &res);
836
837 if (!frn->err) {
838 frn->prefixlen = res.prefixlen;
839 frn->nh_sel = res.nh_sel;
840 frn->type = res.type;
841 frn->scope = res.scope;
1194ed0a 842 fib_res_put(&res);
246955fe
RO
843 }
844 local_bh_enable();
845 }
846}
847
28f7b036 848static void nl_fib_input(struct sk_buff *skb)
246955fe 849{
6bd48fcf 850 struct net *net;
246955fe 851 struct fib_result_nl *frn;
28f7b036 852 struct nlmsghdr *nlh;
246955fe 853 struct fib_table *tb;
28f7b036 854 u32 pid;
1194ed0a 855
3b1e0a65 856 net = sock_net(skb->sk);
b529ccf2 857 nlh = nlmsg_hdr(skb);
ea86575e 858 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 859 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 860 return;
d883a036
DL
861
862 skb = skb_clone(skb, GFP_KERNEL);
863 if (skb == NULL)
864 return;
865 nlh = nlmsg_hdr(skb);
e905a9ed 866
246955fe 867 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 868 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
869
870 nl_fib_lookup(frn, tb);
e905a9ed 871
1194ed0a 872 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 873 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 874 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 875 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 876}
246955fe 877
7b1a74fd 878static int nl_fib_lookup_init(struct net *net)
246955fe 879{
6bd48fcf
DL
880 struct sock *sk;
881 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
882 nl_fib_input, NULL, THIS_MODULE);
883 if (sk == NULL)
7b1a74fd 884 return -EAFNOSUPPORT;
6bd48fcf 885 net->ipv4.fibnl = sk;
7b1a74fd
DL
886 return 0;
887}
888
889static void nl_fib_lookup_exit(struct net *net)
890{
b7c6ba6e 891 netlink_kernel_release(net->ipv4.fibnl);
775516bf 892 net->ipv4.fibnl = NULL;
246955fe
RO
893}
894
1da177e4
LT
895static void fib_disable_ip(struct net_device *dev, int force)
896{
85326fa5 897 if (fib_sync_down_dev(dev, force))
c346dca1 898 fib_flush(dev_net(dev));
76e6ebfb 899 rt_cache_flush(dev_net(dev), 0);
1da177e4
LT
900 arp_ifdown(dev);
901}
902
903static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
904{
6ed2533e 905 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 906 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
907
908 switch (event) {
909 case NETDEV_UP:
910 fib_add_ifaddr(ifa);
911#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 912 fib_sync_up(dev);
1da177e4 913#endif
76e6ebfb 914 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
915 break;
916 case NETDEV_DOWN:
917 fib_del_ifaddr(ifa);
9fcc2e8a 918 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
919 /* Last address was deleted from this interface.
920 Disable IP.
921 */
76e6ebfb 922 fib_disable_ip(dev, 1);
1da177e4 923 } else {
76e6ebfb 924 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
925 }
926 break;
927 }
928 return NOTIFY_DONE;
929}
930
931static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
932{
933 struct net_device *dev = ptr;
e5ed6399 934 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
935
936 if (event == NETDEV_UNREGISTER) {
937 fib_disable_ip(dev, 2);
938 return NOTIFY_DONE;
939 }
940
941 if (!in_dev)
942 return NOTIFY_DONE;
943
944 switch (event) {
945 case NETDEV_UP:
946 for_ifa(in_dev) {
947 fib_add_ifaddr(ifa);
948 } endfor_ifa(in_dev);
949#ifdef CONFIG_IP_ROUTE_MULTIPATH
950 fib_sync_up(dev);
951#endif
76e6ebfb 952 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
953 break;
954 case NETDEV_DOWN:
955 fib_disable_ip(dev, 0);
956 break;
957 case NETDEV_CHANGEMTU:
958 case NETDEV_CHANGE:
76e6ebfb 959 rt_cache_flush(dev_net(dev), 0);
1da177e4
LT
960 break;
961 }
962 return NOTIFY_DONE;
963}
964
965static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 966 .notifier_call = fib_inetaddr_event,
1da177e4
LT
967};
968
969static struct notifier_block fib_netdev_notifier = {
6ed2533e 970 .notifier_call = fib_netdev_event,
1da177e4
LT
971};
972
7b1a74fd 973static int __net_init ip_fib_net_init(struct net *net)
1da177e4 974{
dce5cbee 975 int err;
1af5a8c4
PM
976 unsigned int i;
977
e4aef8ae
DL
978 net->ipv4.fib_table_hash = kzalloc(
979 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
980 if (net->ipv4.fib_table_hash == NULL)
981 return -ENOMEM;
982
1af5a8c4 983 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 984 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 985
dce5cbee
DL
986 err = fib4_rules_init(net);
987 if (err < 0)
988 goto fail;
989 return 0;
990
991fail:
992 kfree(net->ipv4.fib_table_hash);
993 return err;
7b1a74fd 994}
1da177e4 995
7b1a74fd
DL
996static void __net_exit ip_fib_net_exit(struct net *net)
997{
998 unsigned int i;
999
1000#ifdef CONFIG_IP_MULTIPLE_TABLES
1001 fib4_rules_exit(net);
1002#endif
1003
1004 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1005 struct fib_table *tb;
1006 struct hlist_head *head;
1007 struct hlist_node *node, *tmp;
63f3444f 1008
e4aef8ae 1009 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1010 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1011 hlist_del(node);
1012 tb->tb_flush(tb);
1013 kfree(tb);
1014 }
1015 }
e4aef8ae 1016 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1017}
1018
1019static int __net_init fib_net_init(struct net *net)
1020{
1021 int error;
1022
7b1a74fd
DL
1023 error = ip_fib_net_init(net);
1024 if (error < 0)
1025 goto out;
1026 error = nl_fib_lookup_init(net);
1027 if (error < 0)
1028 goto out_nlfl;
1029 error = fib_proc_init(net);
1030 if (error < 0)
1031 goto out_proc;
1032out:
1033 return error;
1034
1035out_proc:
1036 nl_fib_lookup_exit(net);
1037out_nlfl:
1038 ip_fib_net_exit(net);
1039 goto out;
1040}
1041
1042static void __net_exit fib_net_exit(struct net *net)
1043{
1044 fib_proc_exit(net);
1045 nl_fib_lookup_exit(net);
1046 ip_fib_net_exit(net);
1047}
1048
1049static struct pernet_operations fib_net_ops = {
1050 .init = fib_net_init,
1051 .exit = fib_net_exit,
1052};
1053
1054void __init ip_fib_init(void)
1055{
63f3444f
TG
1056 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1057 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1058 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1059
1060 register_pernet_subsys(&fib_net_ops);
1061 register_netdevice_notifier(&fib_netdev_notifier);
1062 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1063
1064 fib_hash_init();
1da177e4
LT
1065}
1066
1067EXPORT_SYMBOL(inet_addr_type);
05538116 1068EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1069EXPORT_SYMBOL(ip_dev_find);