]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
tcp: account SYN-ACK timeouts & retransmissions
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
1da177e4
LT
37
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
1da177e4
LT
43#include <net/arp.h>
44#include <net/ip_fib.h>
63f3444f 45#include <net/rtnetlink.h>
1da177e4 46
1da177e4
LT
47#ifndef CONFIG_IP_MULTIPLE_TABLES
48
7b1a74fd 49static int __net_init fib4_rules_init(struct net *net)
c3e9a353 50{
93456b6d
DL
51 struct fib_table *local_table, *main_table;
52
7f9b8052 53 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 54 if (local_table == NULL)
dbb50165
DL
55 return -ENOMEM;
56
7f9b8052 57 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 58 if (main_table == NULL)
dbb50165
DL
59 goto fail;
60
93456b6d 61 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 62 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 63 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 64 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
65 return 0;
66
67fail:
93456b6d 68 kfree(local_table);
dbb50165 69 return -ENOMEM;
c3e9a353 70}
1af5a8c4 71#else
1da177e4 72
8ad4942c 73struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
74{
75 struct fib_table *tb;
1af5a8c4 76 unsigned int h;
1da177e4 77
1af5a8c4
PM
78 if (id == 0)
79 id = RT_TABLE_MAIN;
8ad4942c 80 tb = fib_get_table(net, id);
1af5a8c4
PM
81 if (tb)
82 return tb;
7f9b8052
SH
83
84 tb = fib_hash_table(id);
1da177e4
LT
85 if (!tb)
86 return NULL;
1af5a8c4 87 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 88 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
89 return tb;
90}
91
8ad4942c 92struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
93{
94 struct fib_table *tb;
95 struct hlist_node *node;
e4aef8ae 96 struct hlist_head *head;
1af5a8c4 97 unsigned int h;
1da177e4 98
1af5a8c4
PM
99 if (id == 0)
100 id = RT_TABLE_MAIN;
101 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 102
1af5a8c4 103 rcu_read_lock();
e4aef8ae
DL
104 head = &net->ipv4.fib_table_hash[h];
105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
106 if (tb->tb_id == id) {
107 rcu_read_unlock();
108 return tb;
109 }
110 }
111 rcu_read_unlock();
112 return NULL;
113}
1da177e4
LT
114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
010278ec
DL
116void fib_select_default(struct net *net,
117 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
118{
119 struct fib_table *tb;
120 int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 return;
124 table = res->r->table;
125#endif
010278ec 126 tb = fib_get_table(net, table);
64c2d538 127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
16c6cf8b 128 fib_table_select_default(tb, flp, res);
64c2d538
DL
129}
130
e4aef8ae 131static void fib_flush(struct net *net)
1da177e4
LT
132{
133 int flushed = 0;
1da177e4 134 struct fib_table *tb;
1af5a8c4 135 struct hlist_node *node;
e4aef8ae 136 struct hlist_head *head;
1af5a8c4 137 unsigned int h;
1da177e4 138
1af5a8c4 139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist)
16c6cf8b 142 flushed += fib_table_flush(tb);
1da177e4 143 }
1da177e4
LT
144
145 if (flushed)
76e6ebfb 146 rt_cache_flush(net, -1);
1da177e4
LT
147}
148
149/*
150 * Find the first device with a given source address.
151 */
152
1ab35276 153struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4
LT
154{
155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 struct fib_result res;
157 struct net_device *dev = NULL;
03cf786c 158 struct fib_table *local_table;
1da177e4
LT
159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161 res.r = NULL;
162#endif
163
1ab35276 164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
16c6cf8b 165 if (!local_table || fib_table_lookup(local_table, &fl, &res))
1da177e4
LT
166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173out:
174 fib_res_put(&res);
175 return dev;
176}
177
05538116
LAT
178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
6b175b26
EB
182static inline unsigned __inet_dev_addr_type(struct net *net,
183 const struct net_device *dev,
05538116 184 __be32 addr)
1da177e4
LT
185{
186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 struct fib_result res;
188 unsigned ret = RTN_BROADCAST;
03cf786c 189 struct fib_table *local_table;
1da177e4 190
1e637c74 191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 192 return RTN_BROADCAST;
f97c1e0c 193 if (ipv4_is_multicast(addr))
1da177e4
LT
194 return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197 res.r = NULL;
198#endif
e905a9ed 199
6b175b26 200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 201 if (local_table) {
1da177e4 202 ret = RTN_UNICAST;
16c6cf8b 203 if (!fib_table_lookup(local_table, &fl, &res)) {
05538116
LAT
204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type;
1da177e4
LT
206 fib_res_put(&res);
207 }
208 }
209 return ret;
210}
211
6b175b26 212unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 213{
6b175b26 214 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
215}
216
6b175b26
EB
217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 __be32 addr)
05538116 219{
6b175b26 220 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
221}
222
1da177e4
LT
223/* Given (packet source, input interface) and optional (dst, oif, tos):
224 - (main) check, that source is valid i.e. not broadcast or our local
225 address.
226 - figure out what "logical" interface this packet arrived
227 and calculate "specific destination" address.
228 - check, that packet arrived from expected physical interface.
229 */
230
d9c9df8c 231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
b0c110ca 232 struct net_device *dev, __be32 *spec_dst,
233 u32 *itag, u32 mark)
1da177e4
LT
234{
235 struct in_device *in_dev;
236 struct flowi fl = { .nl_u = { .ip4_u =
237 { .daddr = src,
238 .saddr = dst,
239 .tos = tos } },
b0c110ca 240 .mark = mark,
1da177e4 241 .iif = oif };
b0c110ca 242
1da177e4 243 struct fib_result res;
8153a10c 244 int no_addr, rpf, accept_local;
1da177e4 245 int ret;
5b707aaa 246 struct net *net;
1da177e4 247
8153a10c 248 no_addr = rpf = accept_local = 0;
1da177e4 249 rcu_read_lock();
e5ed6399 250 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
251 if (in_dev) {
252 no_addr = in_dev->ifa_list == NULL;
253 rpf = IN_DEV_RPFILTER(in_dev);
8153a10c 254 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
28f6aeea
JHS
255 if (mark && !IN_DEV_SRC_VMARK(in_dev))
256 fl.mark = 0;
1da177e4
LT
257 }
258 rcu_read_unlock();
259
260 if (in_dev == NULL)
261 goto e_inval;
262
c346dca1 263 net = dev_net(dev);
5b707aaa 264 if (fib_lookup(net, &fl, &res))
1da177e4 265 goto last_resort;
8153a10c
PM
266 if (res.type != RTN_UNICAST) {
267 if (res.type != RTN_LOCAL || !accept_local)
268 goto e_inval_res;
269 }
1da177e4
LT
270 *spec_dst = FIB_RES_PREFSRC(res);
271 fib_combine_itag(itag, &res);
272#ifdef CONFIG_IP_ROUTE_MULTIPATH
273 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
274#else
275 if (FIB_RES_DEV(res) == dev)
276#endif
277 {
278 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
279 fib_res_put(&res);
280 return ret;
281 }
282 fib_res_put(&res);
283 if (no_addr)
284 goto last_resort;
c1cf8422 285 if (rpf == 1)
1da177e4
LT
286 goto e_inval;
287 fl.oif = dev->ifindex;
288
289 ret = 0;
5b707aaa 290 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
291 if (res.type == RTN_UNICAST) {
292 *spec_dst = FIB_RES_PREFSRC(res);
293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294 }
295 fib_res_put(&res);
296 }
297 return ret;
298
299last_resort:
300 if (rpf)
301 goto e_inval;
302 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
303 *itag = 0;
304 return 0;
305
306e_inval_res:
307 fib_res_put(&res);
308e_inval:
309 return -EINVAL;
310}
311
81f7bf6c 312static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
313{
314 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
315}
316
317static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
318{
319 struct nlattr *nla;
320
321 nla = (struct nlattr *) ((char *) mx + len);
322 nla->nla_type = type;
323 nla->nla_len = nla_attr_size(4);
324 *(u32 *) nla_data(nla) = value;
325
326 return len + nla_total_size(4);
327}
328
4b5d47d4 329static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
330 struct fib_config *cfg)
331{
6d85c10a 332 __be32 addr;
4e902c57
TG
333 int plen;
334
335 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 336 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
337
338 if (rt->rt_dst.sa_family != AF_INET)
339 return -EAFNOSUPPORT;
340
341 /*
342 * Check mask for validity:
343 * a) it must be contiguous.
344 * b) destination must have all host bits clear.
345 * c) if application forgot to set correct family (AF_INET),
346 * reject request unless it is absolutely clear i.e.
347 * both family and mask are zero.
348 */
349 plen = 32;
350 addr = sk_extract_addr(&rt->rt_dst);
351 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 352 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
353
354 if (rt->rt_genmask.sa_family != AF_INET) {
355 if (mask || rt->rt_genmask.sa_family)
356 return -EAFNOSUPPORT;
357 }
358
359 if (bad_mask(mask, addr))
360 return -EINVAL;
361
362 plen = inet_mask_len(mask);
363 }
364
365 cfg->fc_dst_len = plen;
366 cfg->fc_dst = addr;
367
368 if (cmd != SIOCDELRT) {
369 cfg->fc_nlflags = NLM_F_CREATE;
370 cfg->fc_protocol = RTPROT_BOOT;
371 }
372
373 if (rt->rt_metric)
374 cfg->fc_priority = rt->rt_metric - 1;
375
376 if (rt->rt_flags & RTF_REJECT) {
377 cfg->fc_scope = RT_SCOPE_HOST;
378 cfg->fc_type = RTN_UNREACHABLE;
379 return 0;
380 }
381
382 cfg->fc_scope = RT_SCOPE_NOWHERE;
383 cfg->fc_type = RTN_UNICAST;
384
385 if (rt->rt_dev) {
386 char *colon;
387 struct net_device *dev;
388 char devname[IFNAMSIZ];
389
390 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
391 return -EFAULT;
392
393 devname[IFNAMSIZ-1] = 0;
394 colon = strchr(devname, ':');
395 if (colon)
396 *colon = 0;
4b5d47d4 397 dev = __dev_get_by_name(net, devname);
4e902c57
TG
398 if (!dev)
399 return -ENODEV;
400 cfg->fc_oif = dev->ifindex;
401 if (colon) {
402 struct in_ifaddr *ifa;
403 struct in_device *in_dev = __in_dev_get_rtnl(dev);
404 if (!in_dev)
405 return -ENODEV;
406 *colon = ':';
407 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
408 if (strcmp(ifa->ifa_label, devname) == 0)
409 break;
410 if (ifa == NULL)
411 return -ENODEV;
412 cfg->fc_prefsrc = ifa->ifa_local;
413 }
414 }
415
416 addr = sk_extract_addr(&rt->rt_gateway);
417 if (rt->rt_gateway.sa_family == AF_INET && addr) {
418 cfg->fc_gw = addr;
419 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 420 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
421 cfg->fc_scope = RT_SCOPE_UNIVERSE;
422 }
423
424 if (cmd == SIOCDELRT)
425 return 0;
426
427 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
428 return -EINVAL;
429
430 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
431 cfg->fc_scope = RT_SCOPE_LINK;
432
433 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
434 struct nlattr *mx;
435 int len = 0;
436
437 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 438 if (mx == NULL)
4e902c57
TG
439 return -ENOMEM;
440
441 if (rt->rt_flags & RTF_MTU)
442 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
443
444 if (rt->rt_flags & RTF_WINDOW)
445 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
446
447 if (rt->rt_flags & RTF_IRTT)
448 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
449
450 cfg->fc_mx = mx;
451 cfg->fc_mx_len = len;
452 }
453
454 return 0;
455}
456
1da177e4
LT
457/*
458 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
459 */
e905a9ed 460
1bad118a 461int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 462{
4e902c57
TG
463 struct fib_config cfg;
464 struct rtentry rt;
1da177e4 465 int err;
1da177e4
LT
466
467 switch (cmd) {
468 case SIOCADDRT: /* Add a route */
469 case SIOCDELRT: /* Delete a route */
470 if (!capable(CAP_NET_ADMIN))
471 return -EPERM;
4e902c57
TG
472
473 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 474 return -EFAULT;
4e902c57 475
1da177e4 476 rtnl_lock();
1bad118a 477 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 478 if (err == 0) {
4e902c57
TG
479 struct fib_table *tb;
480
1da177e4 481 if (cmd == SIOCDELRT) {
1bad118a 482 tb = fib_get_table(net, cfg.fc_table);
1da177e4 483 if (tb)
16c6cf8b 484 err = fib_table_delete(tb, &cfg);
4e902c57
TG
485 else
486 err = -ESRCH;
1da177e4 487 } else {
1bad118a 488 tb = fib_new_table(net, cfg.fc_table);
1da177e4 489 if (tb)
16c6cf8b 490 err = fib_table_insert(tb, &cfg);
4e902c57
TG
491 else
492 err = -ENOBUFS;
1da177e4 493 }
4e902c57
TG
494
495 /* allocated by rtentry_to_fib_config() */
496 kfree(cfg.fc_mx);
1da177e4
LT
497 }
498 rtnl_unlock();
499 return err;
500 }
501 return -EINVAL;
502}
503
ef7c79ed 504const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
505 [RTA_DST] = { .type = NLA_U32 },
506 [RTA_SRC] = { .type = NLA_U32 },
507 [RTA_IIF] = { .type = NLA_U32 },
508 [RTA_OIF] = { .type = NLA_U32 },
509 [RTA_GATEWAY] = { .type = NLA_U32 },
510 [RTA_PRIORITY] = { .type = NLA_U32 },
511 [RTA_PREFSRC] = { .type = NLA_U32 },
512 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 513 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 514 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
515};
516
4b5d47d4
DL
517static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
518 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 519{
4e902c57
TG
520 struct nlattr *attr;
521 int err, remaining;
522 struct rtmsg *rtm;
523
524 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
525 if (err < 0)
526 goto errout;
527
528 memset(cfg, 0, sizeof(*cfg));
529
530 rtm = nlmsg_data(nlh);
4e902c57 531 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
532 cfg->fc_tos = rtm->rtm_tos;
533 cfg->fc_table = rtm->rtm_table;
534 cfg->fc_protocol = rtm->rtm_protocol;
535 cfg->fc_scope = rtm->rtm_scope;
536 cfg->fc_type = rtm->rtm_type;
537 cfg->fc_flags = rtm->rtm_flags;
538 cfg->fc_nlflags = nlh->nlmsg_flags;
539
540 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
541 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 542 cfg->fc_nlinfo.nl_net = net;
4e902c57 543
a0ee18b9
TG
544 if (cfg->fc_type > RTN_MAX) {
545 err = -EINVAL;
546 goto errout;
547 }
548
4e902c57 549 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 550 switch (nla_type(attr)) {
4e902c57 551 case RTA_DST:
17fb2c64 552 cfg->fc_dst = nla_get_be32(attr);
4e902c57 553 break;
4e902c57
TG
554 case RTA_OIF:
555 cfg->fc_oif = nla_get_u32(attr);
556 break;
557 case RTA_GATEWAY:
17fb2c64 558 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
559 break;
560 case RTA_PRIORITY:
561 cfg->fc_priority = nla_get_u32(attr);
562 break;
563 case RTA_PREFSRC:
17fb2c64 564 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
565 break;
566 case RTA_METRICS:
567 cfg->fc_mx = nla_data(attr);
568 cfg->fc_mx_len = nla_len(attr);
569 break;
570 case RTA_MULTIPATH:
571 cfg->fc_mp = nla_data(attr);
572 cfg->fc_mp_len = nla_len(attr);
573 break;
574 case RTA_FLOW:
575 cfg->fc_flow = nla_get_u32(attr);
576 break;
4e902c57
TG
577 case RTA_TABLE:
578 cfg->fc_table = nla_get_u32(attr);
579 break;
1da177e4
LT
580 }
581 }
4e902c57 582
1da177e4 583 return 0;
4e902c57
TG
584errout:
585 return err;
1da177e4
LT
586}
587
6ed2533e 588static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 589{
3b1e0a65 590 struct net *net = sock_net(skb->sk);
4e902c57
TG
591 struct fib_config cfg;
592 struct fib_table *tb;
593 int err;
1da177e4 594
4b5d47d4 595 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
596 if (err < 0)
597 goto errout;
1da177e4 598
8ad4942c 599 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
600 if (tb == NULL) {
601 err = -ESRCH;
602 goto errout;
603 }
604
16c6cf8b 605 err = fib_table_delete(tb, &cfg);
4e902c57
TG
606errout:
607 return err;
1da177e4
LT
608}
609
6ed2533e 610static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 611{
3b1e0a65 612 struct net *net = sock_net(skb->sk);
4e902c57
TG
613 struct fib_config cfg;
614 struct fib_table *tb;
615 int err;
1da177e4 616
4b5d47d4 617 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
618 if (err < 0)
619 goto errout;
1da177e4 620
226b0b4a 621 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
622 if (tb == NULL) {
623 err = -ENOBUFS;
624 goto errout;
625 }
626
16c6cf8b 627 err = fib_table_insert(tb, &cfg);
4e902c57
TG
628errout:
629 return err;
1da177e4
LT
630}
631
63f3444f 632static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 633{
3b1e0a65 634 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
635 unsigned int h, s_h;
636 unsigned int e = 0, s_e;
1da177e4 637 struct fib_table *tb;
1af5a8c4 638 struct hlist_node *node;
e4aef8ae 639 struct hlist_head *head;
1af5a8c4 640 int dumped = 0;
1da177e4 641
be403ea1
TG
642 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
643 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
644 return ip_rt_dump(skb, cb);
645
1af5a8c4
PM
646 s_h = cb->args[0];
647 s_e = cb->args[1];
648
649 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
650 e = 0;
e4aef8ae
DL
651 head = &net->ipv4.fib_table_hash[h];
652 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
653 if (e < s_e)
654 goto next;
655 if (dumped)
656 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 657 2 * sizeof(cb->args[0]));
16c6cf8b 658 if (fib_table_dump(tb, skb, cb) < 0)
1af5a8c4
PM
659 goto out;
660 dumped = 1;
661next:
662 e++;
663 }
1da177e4 664 }
1af5a8c4
PM
665out:
666 cb->args[1] = e;
667 cb->args[0] = h;
1da177e4
LT
668
669 return skb->len;
670}
671
672/* Prepare and feed intra-kernel routing request.
673 Really, it should be netlink message, but :-( netlink
674 can be not configured, so that we feed it directly
675 to fib engine. It is legal, because all events occur
676 only when netlink is already locked.
677 */
678
81f7bf6c 679static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 680{
c346dca1 681 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
682 struct fib_table *tb;
683 struct fib_config cfg = {
684 .fc_protocol = RTPROT_KERNEL,
685 .fc_type = type,
686 .fc_dst = dst,
687 .fc_dst_len = dst_len,
688 .fc_prefsrc = ifa->ifa_local,
689 .fc_oif = ifa->ifa_dev->dev->ifindex,
690 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 691 .fc_nlinfo = {
4b5d47d4 692 .nl_net = net,
4d1169c1 693 },
4e902c57 694 };
1da177e4
LT
695
696 if (type == RTN_UNICAST)
4b5d47d4 697 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 698 else
4b5d47d4 699 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
700
701 if (tb == NULL)
702 return;
703
4e902c57 704 cfg.fc_table = tb->tb_id;
1da177e4 705
4e902c57
TG
706 if (type != RTN_LOCAL)
707 cfg.fc_scope = RT_SCOPE_LINK;
708 else
709 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
710
711 if (cmd == RTM_NEWROUTE)
16c6cf8b 712 fib_table_insert(tb, &cfg);
1da177e4 713 else
16c6cf8b 714 fib_table_delete(tb, &cfg);
1da177e4
LT
715}
716
0ff60a45 717void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
718{
719 struct in_device *in_dev = ifa->ifa_dev;
720 struct net_device *dev = in_dev->dev;
721 struct in_ifaddr *prim = ifa;
a144ea4b
AV
722 __be32 mask = ifa->ifa_mask;
723 __be32 addr = ifa->ifa_local;
724 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
725
726 if (ifa->ifa_flags&IFA_F_SECONDARY) {
727 prim = inet_ifa_byprefix(in_dev, prefix, mask);
728 if (prim == NULL) {
a6db9010 729 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
730 return;
731 }
732 }
733
734 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
735
736 if (!(dev->flags&IFF_UP))
737 return;
738
739 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 740 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
741 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
742
f97c1e0c 743 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
744 (prefix != addr || ifa->ifa_prefixlen < 32)) {
745 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
746 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
747
748 /* Add network specific broadcasts, when it takes a sense */
749 if (ifa->ifa_prefixlen < 31) {
750 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
751 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
752 }
753 }
754}
755
756static void fib_del_ifaddr(struct in_ifaddr *ifa)
757{
758 struct in_device *in_dev = ifa->ifa_dev;
759 struct net_device *dev = in_dev->dev;
760 struct in_ifaddr *ifa1;
761 struct in_ifaddr *prim = ifa;
a144ea4b
AV
762 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
763 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
764#define LOCAL_OK 1
765#define BRD_OK 2
766#define BRD0_OK 4
767#define BRD1_OK 8
768 unsigned ok = 0;
769
770 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
771 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
772 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
773 else {
774 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
775 if (prim == NULL) {
a6db9010 776 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
777 return;
778 }
779 }
780
781 /* Deletion is more complicated than add.
782 We should take care of not to delete too much :-)
783
784 Scan address list to be sure that addresses are really gone.
785 */
786
787 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
788 if (ifa->ifa_local == ifa1->ifa_local)
789 ok |= LOCAL_OK;
790 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
791 ok |= BRD_OK;
792 if (brd == ifa1->ifa_broadcast)
793 ok |= BRD1_OK;
794 if (any == ifa1->ifa_broadcast)
795 ok |= BRD0_OK;
796 }
797
798 if (!(ok&BRD_OK))
799 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
800 if (!(ok&BRD1_OK))
801 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
802 if (!(ok&BRD0_OK))
803 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
804 if (!(ok&LOCAL_OK)) {
805 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
806
807 /* Check, that this local address finally disappeared. */
c346dca1 808 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
809 /* And the last, but not the least thing.
810 We must flush stray FIB entries.
811
812 First of all, we scan fib_info list searching
813 for stray nexthop entries, then ignite fib_flush.
814 */
c346dca1
YH
815 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
816 fib_flush(dev_net(dev));
1da177e4
LT
817 }
818 }
819#undef LOCAL_OK
820#undef BRD_OK
821#undef BRD0_OK
822#undef BRD1_OK
823}
824
246955fe
RO
825static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
826{
e905a9ed 827
246955fe 828 struct fib_result res;
5f300893 829 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 830 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
831 .tos = frn->fl_tos,
832 .scope = frn->fl_scope } } };
1194ed0a 833
912a41a4
SV
834#ifdef CONFIG_IP_MULTIPLE_TABLES
835 res.r = NULL;
836#endif
837
1194ed0a 838 frn->err = -ENOENT;
246955fe
RO
839 if (tb) {
840 local_bh_disable();
841
842 frn->tb_id = tb->tb_id;
16c6cf8b 843 frn->err = fib_table_lookup(tb, &fl, &res);
246955fe
RO
844
845 if (!frn->err) {
846 frn->prefixlen = res.prefixlen;
847 frn->nh_sel = res.nh_sel;
848 frn->type = res.type;
849 frn->scope = res.scope;
1194ed0a 850 fib_res_put(&res);
246955fe
RO
851 }
852 local_bh_enable();
853 }
854}
855
28f7b036 856static void nl_fib_input(struct sk_buff *skb)
246955fe 857{
6bd48fcf 858 struct net *net;
246955fe 859 struct fib_result_nl *frn;
28f7b036 860 struct nlmsghdr *nlh;
246955fe 861 struct fib_table *tb;
28f7b036 862 u32 pid;
1194ed0a 863
3b1e0a65 864 net = sock_net(skb->sk);
b529ccf2 865 nlh = nlmsg_hdr(skb);
ea86575e 866 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 867 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 868 return;
d883a036
DL
869
870 skb = skb_clone(skb, GFP_KERNEL);
871 if (skb == NULL)
872 return;
873 nlh = nlmsg_hdr(skb);
e905a9ed 874
246955fe 875 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 876 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
877
878 nl_fib_lookup(frn, tb);
e905a9ed 879
1194ed0a 880 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 881 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 882 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 883 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 884}
246955fe 885
7b1a74fd 886static int nl_fib_lookup_init(struct net *net)
246955fe 887{
6bd48fcf
DL
888 struct sock *sk;
889 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
890 nl_fib_input, NULL, THIS_MODULE);
891 if (sk == NULL)
7b1a74fd 892 return -EAFNOSUPPORT;
6bd48fcf 893 net->ipv4.fibnl = sk;
7b1a74fd
DL
894 return 0;
895}
896
897static void nl_fib_lookup_exit(struct net *net)
898{
b7c6ba6e 899 netlink_kernel_release(net->ipv4.fibnl);
775516bf 900 net->ipv4.fibnl = NULL;
246955fe
RO
901}
902
e2ce1468 903static void fib_disable_ip(struct net_device *dev, int force, int delay)
1da177e4 904{
85326fa5 905 if (fib_sync_down_dev(dev, force))
c346dca1 906 fib_flush(dev_net(dev));
e2ce1468 907 rt_cache_flush(dev_net(dev), delay);
1da177e4
LT
908 arp_ifdown(dev);
909}
910
911static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
912{
6ed2533e 913 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 914 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
915
916 switch (event) {
917 case NETDEV_UP:
918 fib_add_ifaddr(ifa);
919#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 920 fib_sync_up(dev);
1da177e4 921#endif
76e6ebfb 922 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
923 break;
924 case NETDEV_DOWN:
925 fib_del_ifaddr(ifa);
9fcc2e8a 926 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
927 /* Last address was deleted from this interface.
928 Disable IP.
929 */
e2ce1468 930 fib_disable_ip(dev, 1, 0);
1da177e4 931 } else {
76e6ebfb 932 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
933 }
934 break;
935 }
936 return NOTIFY_DONE;
937}
938
939static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
940{
941 struct net_device *dev = ptr;
e5ed6399 942 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
943
944 if (event == NETDEV_UNREGISTER) {
e2ce1468 945 fib_disable_ip(dev, 2, -1);
1da177e4
LT
946 return NOTIFY_DONE;
947 }
948
949 if (!in_dev)
950 return NOTIFY_DONE;
951
952 switch (event) {
953 case NETDEV_UP:
954 for_ifa(in_dev) {
955 fib_add_ifaddr(ifa);
956 } endfor_ifa(in_dev);
957#ifdef CONFIG_IP_ROUTE_MULTIPATH
958 fib_sync_up(dev);
959#endif
76e6ebfb 960 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
961 break;
962 case NETDEV_DOWN:
e2ce1468 963 fib_disable_ip(dev, 0, 0);
1da177e4
LT
964 break;
965 case NETDEV_CHANGEMTU:
966 case NETDEV_CHANGE:
76e6ebfb 967 rt_cache_flush(dev_net(dev), 0);
1da177e4 968 break;
a5ee1551
EB
969 case NETDEV_UNREGISTER_BATCH:
970 rt_cache_flush_batch();
971 break;
1da177e4
LT
972 }
973 return NOTIFY_DONE;
974}
975
976static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 977 .notifier_call = fib_inetaddr_event,
1da177e4
LT
978};
979
980static struct notifier_block fib_netdev_notifier = {
6ed2533e 981 .notifier_call = fib_netdev_event,
1da177e4
LT
982};
983
7b1a74fd 984static int __net_init ip_fib_net_init(struct net *net)
1da177e4 985{
dce5cbee 986 int err;
1af5a8c4
PM
987 unsigned int i;
988
e4aef8ae
DL
989 net->ipv4.fib_table_hash = kzalloc(
990 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
991 if (net->ipv4.fib_table_hash == NULL)
992 return -ENOMEM;
993
1af5a8c4 994 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 995 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 996
dce5cbee
DL
997 err = fib4_rules_init(net);
998 if (err < 0)
999 goto fail;
1000 return 0;
1001
1002fail:
1003 kfree(net->ipv4.fib_table_hash);
1004 return err;
7b1a74fd 1005}
1da177e4 1006
7b1a74fd
DL
1007static void __net_exit ip_fib_net_exit(struct net *net)
1008{
1009 unsigned int i;
1010
1011#ifdef CONFIG_IP_MULTIPLE_TABLES
1012 fib4_rules_exit(net);
1013#endif
1014
1015 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1016 struct fib_table *tb;
1017 struct hlist_head *head;
1018 struct hlist_node *node, *tmp;
63f3444f 1019
e4aef8ae 1020 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1021 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1022 hlist_del(node);
16c6cf8b 1023 fib_table_flush(tb);
7b1a74fd
DL
1024 kfree(tb);
1025 }
1026 }
e4aef8ae 1027 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1028}
1029
1030static int __net_init fib_net_init(struct net *net)
1031{
1032 int error;
1033
7b1a74fd
DL
1034 error = ip_fib_net_init(net);
1035 if (error < 0)
1036 goto out;
1037 error = nl_fib_lookup_init(net);
1038 if (error < 0)
1039 goto out_nlfl;
1040 error = fib_proc_init(net);
1041 if (error < 0)
1042 goto out_proc;
1043out:
1044 return error;
1045
1046out_proc:
1047 nl_fib_lookup_exit(net);
1048out_nlfl:
1049 ip_fib_net_exit(net);
1050 goto out;
1051}
1052
1053static void __net_exit fib_net_exit(struct net *net)
1054{
1055 fib_proc_exit(net);
1056 nl_fib_lookup_exit(net);
1057 ip_fib_net_exit(net);
1058}
1059
1060static struct pernet_operations fib_net_ops = {
1061 .init = fib_net_init,
1062 .exit = fib_net_exit,
1063};
1064
1065void __init ip_fib_init(void)
1066{
63f3444f
TG
1067 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1068 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1069 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1070
1071 register_pernet_subsys(&fib_net_ops);
1072 register_netdevice_notifier(&fib_netdev_notifier);
1073 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1074
1075 fib_hash_init();
1da177e4
LT
1076}
1077
1078EXPORT_SYMBOL(inet_addr_type);
05538116 1079EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1080EXPORT_SYMBOL(ip_dev_find);