]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
tun: Allow GSO using virtio_net_hdr
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
1da177e4
LT
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/icmp.h>
44#include <net/arp.h>
45#include <net/ip_fib.h>
63f3444f 46#include <net/rtnetlink.h>
1da177e4 47
1da177e4
LT
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
7b1a74fd 50static int __net_init fib4_rules_init(struct net *net)
c3e9a353 51{
93456b6d
DL
52 struct fib_table *local_table, *main_table;
53
7f9b8052 54 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 55 if (local_table == NULL)
dbb50165
DL
56 return -ENOMEM;
57
7f9b8052 58 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 59 if (main_table == NULL)
dbb50165
DL
60 goto fail;
61
93456b6d 62 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 64 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
66 return 0;
67
68fail:
93456b6d 69 kfree(local_table);
dbb50165 70 return -ENOMEM;
c3e9a353 71}
1af5a8c4 72#else
1da177e4 73
8ad4942c 74struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
75{
76 struct fib_table *tb;
1af5a8c4 77 unsigned int h;
1da177e4 78
1af5a8c4
PM
79 if (id == 0)
80 id = RT_TABLE_MAIN;
8ad4942c 81 tb = fib_get_table(net, id);
1af5a8c4
PM
82 if (tb)
83 return tb;
7f9b8052
SH
84
85 tb = fib_hash_table(id);
1da177e4
LT
86 if (!tb)
87 return NULL;
1af5a8c4 88 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
90 return tb;
91}
92
8ad4942c 93struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
94{
95 struct fib_table *tb;
96 struct hlist_node *node;
e4aef8ae 97 struct hlist_head *head;
1af5a8c4 98 unsigned int h;
1da177e4 99
1af5a8c4
PM
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 103
1af5a8c4 104 rcu_read_lock();
e4aef8ae
DL
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
1da177e4
LT
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
010278ec
DL
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
010278ec 127 tb = fib_get_table(net, table);
64c2d538
DL
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 tb->tb_select_default(tb, flp, res);
130}
131
e4aef8ae 132static void fib_flush(struct net *net)
1da177e4
LT
133{
134 int flushed = 0;
1da177e4 135 struct fib_table *tb;
1af5a8c4 136 struct hlist_node *node;
e4aef8ae 137 struct hlist_head *head;
1af5a8c4 138 unsigned int h;
1da177e4 139
1af5a8c4 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
1af5a8c4 143 flushed += tb->tb_flush(tb);
1da177e4 144 }
1da177e4
LT
145
146 if (flushed)
147 rt_cache_flush(-1);
148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
1ab35276 154struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4
LT
155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
03cf786c 159 struct fib_table *local_table;
1da177e4
LT
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
1ab35276 165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 166 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
1da177e4
LT
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
178
05538116
LAT
179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
6b175b26
EB
183static inline unsigned __inet_dev_addr_type(struct net *net,
184 const struct net_device *dev,
05538116 185 __be32 addr)
1da177e4
LT
186{
187 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188 struct fib_result res;
189 unsigned ret = RTN_BROADCAST;
03cf786c 190 struct fib_table *local_table;
1da177e4 191
1e637c74 192 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 193 return RTN_BROADCAST;
f97c1e0c 194 if (ipv4_is_multicast(addr))
1da177e4
LT
195 return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198 res.r = NULL;
199#endif
e905a9ed 200
6b175b26 201 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 202 if (local_table) {
1da177e4 203 ret = RTN_UNICAST;
03cf786c 204 if (!local_table->tb_lookup(local_table, &fl, &res)) {
05538116
LAT
205 if (!dev || dev == res.fi->fib_dev)
206 ret = res.type;
1da177e4
LT
207 fib_res_put(&res);
208 }
209 }
210 return ret;
211}
212
6b175b26 213unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 214{
6b175b26 215 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
216}
217
6b175b26
EB
218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 __be32 addr)
05538116 220{
6b175b26 221 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
222}
223
1da177e4
LT
224/* Given (packet source, input interface) and optional (dst, oif, tos):
225 - (main) check, that source is valid i.e. not broadcast or our local
226 address.
227 - figure out what "logical" interface this packet arrived
228 and calculate "specific destination" address.
229 - check, that packet arrived from expected physical interface.
230 */
231
d9c9df8c
AV
232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
233 struct net_device *dev, __be32 *spec_dst, u32 *itag)
1da177e4
LT
234{
235 struct in_device *in_dev;
236 struct flowi fl = { .nl_u = { .ip4_u =
237 { .daddr = src,
238 .saddr = dst,
239 .tos = tos } },
240 .iif = oif };
241 struct fib_result res;
242 int no_addr, rpf;
243 int ret;
5b707aaa 244 struct net *net;
1da177e4
LT
245
246 no_addr = rpf = 0;
247 rcu_read_lock();
e5ed6399 248 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
249 if (in_dev) {
250 no_addr = in_dev->ifa_list == NULL;
251 rpf = IN_DEV_RPFILTER(in_dev);
252 }
253 rcu_read_unlock();
254
255 if (in_dev == NULL)
256 goto e_inval;
257
c346dca1 258 net = dev_net(dev);
5b707aaa 259 if (fib_lookup(net, &fl, &res))
1da177e4
LT
260 goto last_resort;
261 if (res.type != RTN_UNICAST)
262 goto e_inval_res;
263 *spec_dst = FIB_RES_PREFSRC(res);
264 fib_combine_itag(itag, &res);
265#ifdef CONFIG_IP_ROUTE_MULTIPATH
266 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
267#else
268 if (FIB_RES_DEV(res) == dev)
269#endif
270 {
271 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
272 fib_res_put(&res);
273 return ret;
274 }
275 fib_res_put(&res);
276 if (no_addr)
277 goto last_resort;
278 if (rpf)
279 goto e_inval;
280 fl.oif = dev->ifindex;
281
282 ret = 0;
5b707aaa 283 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
284 if (res.type == RTN_UNICAST) {
285 *spec_dst = FIB_RES_PREFSRC(res);
286 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
287 }
288 fib_res_put(&res);
289 }
290 return ret;
291
292last_resort:
293 if (rpf)
294 goto e_inval;
295 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
296 *itag = 0;
297 return 0;
298
299e_inval_res:
300 fib_res_put(&res);
301e_inval:
302 return -EINVAL;
303}
304
81f7bf6c 305static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
306{
307 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
308}
309
310static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
311{
312 struct nlattr *nla;
313
314 nla = (struct nlattr *) ((char *) mx + len);
315 nla->nla_type = type;
316 nla->nla_len = nla_attr_size(4);
317 *(u32 *) nla_data(nla) = value;
318
319 return len + nla_total_size(4);
320}
321
4b5d47d4 322static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
323 struct fib_config *cfg)
324{
6d85c10a 325 __be32 addr;
4e902c57
TG
326 int plen;
327
328 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 329 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
330
331 if (rt->rt_dst.sa_family != AF_INET)
332 return -EAFNOSUPPORT;
333
334 /*
335 * Check mask for validity:
336 * a) it must be contiguous.
337 * b) destination must have all host bits clear.
338 * c) if application forgot to set correct family (AF_INET),
339 * reject request unless it is absolutely clear i.e.
340 * both family and mask are zero.
341 */
342 plen = 32;
343 addr = sk_extract_addr(&rt->rt_dst);
344 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 345 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
346
347 if (rt->rt_genmask.sa_family != AF_INET) {
348 if (mask || rt->rt_genmask.sa_family)
349 return -EAFNOSUPPORT;
350 }
351
352 if (bad_mask(mask, addr))
353 return -EINVAL;
354
355 plen = inet_mask_len(mask);
356 }
357
358 cfg->fc_dst_len = plen;
359 cfg->fc_dst = addr;
360
361 if (cmd != SIOCDELRT) {
362 cfg->fc_nlflags = NLM_F_CREATE;
363 cfg->fc_protocol = RTPROT_BOOT;
364 }
365
366 if (rt->rt_metric)
367 cfg->fc_priority = rt->rt_metric - 1;
368
369 if (rt->rt_flags & RTF_REJECT) {
370 cfg->fc_scope = RT_SCOPE_HOST;
371 cfg->fc_type = RTN_UNREACHABLE;
372 return 0;
373 }
374
375 cfg->fc_scope = RT_SCOPE_NOWHERE;
376 cfg->fc_type = RTN_UNICAST;
377
378 if (rt->rt_dev) {
379 char *colon;
380 struct net_device *dev;
381 char devname[IFNAMSIZ];
382
383 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
384 return -EFAULT;
385
386 devname[IFNAMSIZ-1] = 0;
387 colon = strchr(devname, ':');
388 if (colon)
389 *colon = 0;
4b5d47d4 390 dev = __dev_get_by_name(net, devname);
4e902c57
TG
391 if (!dev)
392 return -ENODEV;
393 cfg->fc_oif = dev->ifindex;
394 if (colon) {
395 struct in_ifaddr *ifa;
396 struct in_device *in_dev = __in_dev_get_rtnl(dev);
397 if (!in_dev)
398 return -ENODEV;
399 *colon = ':';
400 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
401 if (strcmp(ifa->ifa_label, devname) == 0)
402 break;
403 if (ifa == NULL)
404 return -ENODEV;
405 cfg->fc_prefsrc = ifa->ifa_local;
406 }
407 }
408
409 addr = sk_extract_addr(&rt->rt_gateway);
410 if (rt->rt_gateway.sa_family == AF_INET && addr) {
411 cfg->fc_gw = addr;
412 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 413 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
414 cfg->fc_scope = RT_SCOPE_UNIVERSE;
415 }
416
417 if (cmd == SIOCDELRT)
418 return 0;
419
420 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
421 return -EINVAL;
422
423 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
424 cfg->fc_scope = RT_SCOPE_LINK;
425
426 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
427 struct nlattr *mx;
428 int len = 0;
429
430 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 431 if (mx == NULL)
4e902c57
TG
432 return -ENOMEM;
433
434 if (rt->rt_flags & RTF_MTU)
435 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
436
437 if (rt->rt_flags & RTF_WINDOW)
438 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
439
440 if (rt->rt_flags & RTF_IRTT)
441 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
442
443 cfg->fc_mx = mx;
444 cfg->fc_mx_len = len;
445 }
446
447 return 0;
448}
449
1da177e4
LT
450/*
451 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
452 */
e905a9ed 453
1bad118a 454int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 455{
4e902c57
TG
456 struct fib_config cfg;
457 struct rtentry rt;
1da177e4 458 int err;
1da177e4
LT
459
460 switch (cmd) {
461 case SIOCADDRT: /* Add a route */
462 case SIOCDELRT: /* Delete a route */
463 if (!capable(CAP_NET_ADMIN))
464 return -EPERM;
4e902c57
TG
465
466 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 467 return -EFAULT;
4e902c57 468
1da177e4 469 rtnl_lock();
1bad118a 470 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 471 if (err == 0) {
4e902c57
TG
472 struct fib_table *tb;
473
1da177e4 474 if (cmd == SIOCDELRT) {
1bad118a 475 tb = fib_get_table(net, cfg.fc_table);
1da177e4 476 if (tb)
4e902c57
TG
477 err = tb->tb_delete(tb, &cfg);
478 else
479 err = -ESRCH;
1da177e4 480 } else {
1bad118a 481 tb = fib_new_table(net, cfg.fc_table);
1da177e4 482 if (tb)
4e902c57
TG
483 err = tb->tb_insert(tb, &cfg);
484 else
485 err = -ENOBUFS;
1da177e4 486 }
4e902c57
TG
487
488 /* allocated by rtentry_to_fib_config() */
489 kfree(cfg.fc_mx);
1da177e4
LT
490 }
491 rtnl_unlock();
492 return err;
493 }
494 return -EINVAL;
495}
496
ef7c79ed 497const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
498 [RTA_DST] = { .type = NLA_U32 },
499 [RTA_SRC] = { .type = NLA_U32 },
500 [RTA_IIF] = { .type = NLA_U32 },
501 [RTA_OIF] = { .type = NLA_U32 },
502 [RTA_GATEWAY] = { .type = NLA_U32 },
503 [RTA_PRIORITY] = { .type = NLA_U32 },
504 [RTA_PREFSRC] = { .type = NLA_U32 },
505 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 506 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 507 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
508};
509
4b5d47d4
DL
510static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
511 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 512{
4e902c57
TG
513 struct nlattr *attr;
514 int err, remaining;
515 struct rtmsg *rtm;
516
517 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
518 if (err < 0)
519 goto errout;
520
521 memset(cfg, 0, sizeof(*cfg));
522
523 rtm = nlmsg_data(nlh);
4e902c57 524 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
525 cfg->fc_tos = rtm->rtm_tos;
526 cfg->fc_table = rtm->rtm_table;
527 cfg->fc_protocol = rtm->rtm_protocol;
528 cfg->fc_scope = rtm->rtm_scope;
529 cfg->fc_type = rtm->rtm_type;
530 cfg->fc_flags = rtm->rtm_flags;
531 cfg->fc_nlflags = nlh->nlmsg_flags;
532
533 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
534 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 535 cfg->fc_nlinfo.nl_net = net;
4e902c57 536
a0ee18b9
TG
537 if (cfg->fc_type > RTN_MAX) {
538 err = -EINVAL;
539 goto errout;
540 }
541
4e902c57 542 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 543 switch (nla_type(attr)) {
4e902c57 544 case RTA_DST:
17fb2c64 545 cfg->fc_dst = nla_get_be32(attr);
4e902c57 546 break;
4e902c57
TG
547 case RTA_OIF:
548 cfg->fc_oif = nla_get_u32(attr);
549 break;
550 case RTA_GATEWAY:
17fb2c64 551 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
552 break;
553 case RTA_PRIORITY:
554 cfg->fc_priority = nla_get_u32(attr);
555 break;
556 case RTA_PREFSRC:
17fb2c64 557 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
558 break;
559 case RTA_METRICS:
560 cfg->fc_mx = nla_data(attr);
561 cfg->fc_mx_len = nla_len(attr);
562 break;
563 case RTA_MULTIPATH:
564 cfg->fc_mp = nla_data(attr);
565 cfg->fc_mp_len = nla_len(attr);
566 break;
567 case RTA_FLOW:
568 cfg->fc_flow = nla_get_u32(attr);
569 break;
4e902c57
TG
570 case RTA_TABLE:
571 cfg->fc_table = nla_get_u32(attr);
572 break;
1da177e4
LT
573 }
574 }
4e902c57 575
1da177e4 576 return 0;
4e902c57
TG
577errout:
578 return err;
1da177e4
LT
579}
580
63f3444f 581static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 582{
3b1e0a65 583 struct net *net = sock_net(skb->sk);
4e902c57
TG
584 struct fib_config cfg;
585 struct fib_table *tb;
586 int err;
1da177e4 587
4b5d47d4 588 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
589 if (err < 0)
590 goto errout;
1da177e4 591
8ad4942c 592 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
593 if (tb == NULL) {
594 err = -ESRCH;
595 goto errout;
596 }
597
598 err = tb->tb_delete(tb, &cfg);
599errout:
600 return err;
1da177e4
LT
601}
602
63f3444f 603static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 604{
3b1e0a65 605 struct net *net = sock_net(skb->sk);
4e902c57
TG
606 struct fib_config cfg;
607 struct fib_table *tb;
608 int err;
1da177e4 609
4b5d47d4 610 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
611 if (err < 0)
612 goto errout;
1da177e4 613
226b0b4a 614 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
615 if (tb == NULL) {
616 err = -ENOBUFS;
617 goto errout;
618 }
619
620 err = tb->tb_insert(tb, &cfg);
621errout:
622 return err;
1da177e4
LT
623}
624
63f3444f 625static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 626{
3b1e0a65 627 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
628 unsigned int h, s_h;
629 unsigned int e = 0, s_e;
1da177e4 630 struct fib_table *tb;
1af5a8c4 631 struct hlist_node *node;
e4aef8ae 632 struct hlist_head *head;
1af5a8c4 633 int dumped = 0;
1da177e4 634
be403ea1
TG
635 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
636 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
637 return ip_rt_dump(skb, cb);
638
1af5a8c4
PM
639 s_h = cb->args[0];
640 s_e = cb->args[1];
641
642 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
643 e = 0;
e4aef8ae
DL
644 head = &net->ipv4.fib_table_hash[h];
645 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
646 if (e < s_e)
647 goto next;
648 if (dumped)
649 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 650 2 * sizeof(cb->args[0]));
1af5a8c4
PM
651 if (tb->tb_dump(tb, skb, cb) < 0)
652 goto out;
653 dumped = 1;
654next:
655 e++;
656 }
1da177e4 657 }
1af5a8c4
PM
658out:
659 cb->args[1] = e;
660 cb->args[0] = h;
1da177e4
LT
661
662 return skb->len;
663}
664
665/* Prepare and feed intra-kernel routing request.
666 Really, it should be netlink message, but :-( netlink
667 can be not configured, so that we feed it directly
668 to fib engine. It is legal, because all events occur
669 only when netlink is already locked.
670 */
671
81f7bf6c 672static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 673{
c346dca1 674 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
675 struct fib_table *tb;
676 struct fib_config cfg = {
677 .fc_protocol = RTPROT_KERNEL,
678 .fc_type = type,
679 .fc_dst = dst,
680 .fc_dst_len = dst_len,
681 .fc_prefsrc = ifa->ifa_local,
682 .fc_oif = ifa->ifa_dev->dev->ifindex,
683 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 684 .fc_nlinfo = {
4b5d47d4 685 .nl_net = net,
4d1169c1 686 },
4e902c57 687 };
1da177e4
LT
688
689 if (type == RTN_UNICAST)
4b5d47d4 690 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 691 else
4b5d47d4 692 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
693
694 if (tb == NULL)
695 return;
696
4e902c57 697 cfg.fc_table = tb->tb_id;
1da177e4 698
4e902c57
TG
699 if (type != RTN_LOCAL)
700 cfg.fc_scope = RT_SCOPE_LINK;
701 else
702 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
703
704 if (cmd == RTM_NEWROUTE)
4e902c57 705 tb->tb_insert(tb, &cfg);
1da177e4 706 else
4e902c57 707 tb->tb_delete(tb, &cfg);
1da177e4
LT
708}
709
0ff60a45 710void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
711{
712 struct in_device *in_dev = ifa->ifa_dev;
713 struct net_device *dev = in_dev->dev;
714 struct in_ifaddr *prim = ifa;
a144ea4b
AV
715 __be32 mask = ifa->ifa_mask;
716 __be32 addr = ifa->ifa_local;
717 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
718
719 if (ifa->ifa_flags&IFA_F_SECONDARY) {
720 prim = inet_ifa_byprefix(in_dev, prefix, mask);
721 if (prim == NULL) {
a6db9010 722 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
723 return;
724 }
725 }
726
727 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
728
729 if (!(dev->flags&IFF_UP))
730 return;
731
732 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 733 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
734 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735
f97c1e0c 736 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
737 (prefix != addr || ifa->ifa_prefixlen < 32)) {
738 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
739 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
740
741 /* Add network specific broadcasts, when it takes a sense */
742 if (ifa->ifa_prefixlen < 31) {
743 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
744 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
745 }
746 }
747}
748
749static void fib_del_ifaddr(struct in_ifaddr *ifa)
750{
751 struct in_device *in_dev = ifa->ifa_dev;
752 struct net_device *dev = in_dev->dev;
753 struct in_ifaddr *ifa1;
754 struct in_ifaddr *prim = ifa;
a144ea4b
AV
755 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
756 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
757#define LOCAL_OK 1
758#define BRD_OK 2
759#define BRD0_OK 4
760#define BRD1_OK 8
761 unsigned ok = 0;
762
763 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
764 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
765 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
766 else {
767 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
768 if (prim == NULL) {
a6db9010 769 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
770 return;
771 }
772 }
773
774 /* Deletion is more complicated than add.
775 We should take care of not to delete too much :-)
776
777 Scan address list to be sure that addresses are really gone.
778 */
779
780 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
781 if (ifa->ifa_local == ifa1->ifa_local)
782 ok |= LOCAL_OK;
783 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
784 ok |= BRD_OK;
785 if (brd == ifa1->ifa_broadcast)
786 ok |= BRD1_OK;
787 if (any == ifa1->ifa_broadcast)
788 ok |= BRD0_OK;
789 }
790
791 if (!(ok&BRD_OK))
792 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
793 if (!(ok&BRD1_OK))
794 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
795 if (!(ok&BRD0_OK))
796 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
797 if (!(ok&LOCAL_OK)) {
798 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
799
800 /* Check, that this local address finally disappeared. */
c346dca1 801 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
802 /* And the last, but not the least thing.
803 We must flush stray FIB entries.
804
805 First of all, we scan fib_info list searching
806 for stray nexthop entries, then ignite fib_flush.
807 */
c346dca1
YH
808 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
809 fib_flush(dev_net(dev));
1da177e4
LT
810 }
811 }
812#undef LOCAL_OK
813#undef BRD_OK
814#undef BRD0_OK
815#undef BRD1_OK
816}
817
246955fe
RO
818static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
819{
e905a9ed 820
246955fe 821 struct fib_result res;
5f300893 822 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 823 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
824 .tos = frn->fl_tos,
825 .scope = frn->fl_scope } } };
1194ed0a 826
912a41a4
SV
827#ifdef CONFIG_IP_MULTIPLE_TABLES
828 res.r = NULL;
829#endif
830
1194ed0a 831 frn->err = -ENOENT;
246955fe
RO
832 if (tb) {
833 local_bh_disable();
834
835 frn->tb_id = tb->tb_id;
836 frn->err = tb->tb_lookup(tb, &fl, &res);
837
838 if (!frn->err) {
839 frn->prefixlen = res.prefixlen;
840 frn->nh_sel = res.nh_sel;
841 frn->type = res.type;
842 frn->scope = res.scope;
1194ed0a 843 fib_res_put(&res);
246955fe
RO
844 }
845 local_bh_enable();
846 }
847}
848
28f7b036 849static void nl_fib_input(struct sk_buff *skb)
246955fe 850{
6bd48fcf 851 struct net *net;
246955fe 852 struct fib_result_nl *frn;
28f7b036 853 struct nlmsghdr *nlh;
246955fe 854 struct fib_table *tb;
28f7b036 855 u32 pid;
1194ed0a 856
3b1e0a65 857 net = sock_net(skb->sk);
b529ccf2 858 nlh = nlmsg_hdr(skb);
ea86575e 859 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 860 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 861 return;
d883a036
DL
862
863 skb = skb_clone(skb, GFP_KERNEL);
864 if (skb == NULL)
865 return;
866 nlh = nlmsg_hdr(skb);
e905a9ed 867
246955fe 868 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 869 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
870
871 nl_fib_lookup(frn, tb);
e905a9ed 872
1194ed0a 873 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 874 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 875 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 876 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 877}
246955fe 878
7b1a74fd 879static int nl_fib_lookup_init(struct net *net)
246955fe 880{
6bd48fcf
DL
881 struct sock *sk;
882 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
883 nl_fib_input, NULL, THIS_MODULE);
884 if (sk == NULL)
7b1a74fd 885 return -EAFNOSUPPORT;
6bd48fcf 886 net->ipv4.fibnl = sk;
7b1a74fd
DL
887 return 0;
888}
889
890static void nl_fib_lookup_exit(struct net *net)
891{
b7c6ba6e 892 netlink_kernel_release(net->ipv4.fibnl);
775516bf 893 net->ipv4.fibnl = NULL;
246955fe
RO
894}
895
1da177e4
LT
896static void fib_disable_ip(struct net_device *dev, int force)
897{
85326fa5 898 if (fib_sync_down_dev(dev, force))
c346dca1 899 fib_flush(dev_net(dev));
1da177e4
LT
900 rt_cache_flush(0);
901 arp_ifdown(dev);
902}
903
904static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
905{
906 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
907
908 switch (event) {
909 case NETDEV_UP:
910 fib_add_ifaddr(ifa);
911#ifdef CONFIG_IP_ROUTE_MULTIPATH
912 fib_sync_up(ifa->ifa_dev->dev);
913#endif
914 rt_cache_flush(-1);
915 break;
916 case NETDEV_DOWN:
917 fib_del_ifaddr(ifa);
9fcc2e8a 918 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
919 /* Last address was deleted from this interface.
920 Disable IP.
921 */
922 fib_disable_ip(ifa->ifa_dev->dev, 1);
923 } else {
924 rt_cache_flush(-1);
925 }
926 break;
927 }
928 return NOTIFY_DONE;
929}
930
931static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
932{
933 struct net_device *dev = ptr;
e5ed6399 934 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
935
936 if (event == NETDEV_UNREGISTER) {
937 fib_disable_ip(dev, 2);
938 return NOTIFY_DONE;
939 }
940
941 if (!in_dev)
942 return NOTIFY_DONE;
943
944 switch (event) {
945 case NETDEV_UP:
946 for_ifa(in_dev) {
947 fib_add_ifaddr(ifa);
948 } endfor_ifa(in_dev);
949#ifdef CONFIG_IP_ROUTE_MULTIPATH
950 fib_sync_up(dev);
951#endif
952 rt_cache_flush(-1);
953 break;
954 case NETDEV_DOWN:
955 fib_disable_ip(dev, 0);
956 break;
957 case NETDEV_CHANGEMTU:
958 case NETDEV_CHANGE:
959 rt_cache_flush(0);
960 break;
961 }
962 return NOTIFY_DONE;
963}
964
965static struct notifier_block fib_inetaddr_notifier = {
966 .notifier_call =fib_inetaddr_event,
967};
968
969static struct notifier_block fib_netdev_notifier = {
970 .notifier_call =fib_netdev_event,
971};
972
7b1a74fd 973static int __net_init ip_fib_net_init(struct net *net)
1da177e4 974{
dce5cbee 975 int err;
1af5a8c4
PM
976 unsigned int i;
977
e4aef8ae
DL
978 net->ipv4.fib_table_hash = kzalloc(
979 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
980 if (net->ipv4.fib_table_hash == NULL)
981 return -ENOMEM;
982
1af5a8c4 983 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 984 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 985
dce5cbee
DL
986 err = fib4_rules_init(net);
987 if (err < 0)
988 goto fail;
989 return 0;
990
991fail:
992 kfree(net->ipv4.fib_table_hash);
993 return err;
7b1a74fd 994}
1da177e4 995
7b1a74fd
DL
996static void __net_exit ip_fib_net_exit(struct net *net)
997{
998 unsigned int i;
999
1000#ifdef CONFIG_IP_MULTIPLE_TABLES
1001 fib4_rules_exit(net);
1002#endif
1003
1004 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1005 struct fib_table *tb;
1006 struct hlist_head *head;
1007 struct hlist_node *node, *tmp;
63f3444f 1008
e4aef8ae 1009 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1010 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1011 hlist_del(node);
1012 tb->tb_flush(tb);
1013 kfree(tb);
1014 }
1015 }
e4aef8ae 1016 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1017}
1018
1019static int __net_init fib_net_init(struct net *net)
1020{
1021 int error;
1022
7b1a74fd
DL
1023 error = ip_fib_net_init(net);
1024 if (error < 0)
1025 goto out;
1026 error = nl_fib_lookup_init(net);
1027 if (error < 0)
1028 goto out_nlfl;
1029 error = fib_proc_init(net);
1030 if (error < 0)
1031 goto out_proc;
1032out:
1033 return error;
1034
1035out_proc:
1036 nl_fib_lookup_exit(net);
1037out_nlfl:
1038 ip_fib_net_exit(net);
1039 goto out;
1040}
1041
1042static void __net_exit fib_net_exit(struct net *net)
1043{
1044 fib_proc_exit(net);
1045 nl_fib_lookup_exit(net);
1046 ip_fib_net_exit(net);
1047}
1048
1049static struct pernet_operations fib_net_ops = {
1050 .init = fib_net_init,
1051 .exit = fib_net_exit,
1052};
1053
1054void __init ip_fib_init(void)
1055{
63f3444f
TG
1056 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1057 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1058 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1059
1060 register_pernet_subsys(&fib_net_ops);
1061 register_netdevice_notifier(&fib_netdev_notifier);
1062 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1063
1064 fib_hash_init();
1da177e4
LT
1065}
1066
1067EXPORT_SYMBOL(inet_addr_type);
05538116 1068EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1069EXPORT_SYMBOL(ip_dev_find);