]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
ipconfig: document DHCP hostname and DNS record
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
1da177e4
LT
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
4fc268d2 20#include <linux/capability.h>
1da177e4
LT
21#include <linux/types.h>
22#include <linux/kernel.h>
1da177e4
LT
23#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
14c85021 30#include <linux/inetdevice.h>
1da177e4 31#include <linux/netdevice.h>
1823730f 32#include <linux/if_addr.h>
1da177e4
LT
33#include <linux/if_arp.h>
34#include <linux/skbuff.h>
1da177e4 35#include <linux/init.h>
1af5a8c4 36#include <linux/list.h>
5a0e3ad6 37#include <linux/slab.h>
1da177e4
LT
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
1da177e4
LT
44#include <net/arp.h>
45#include <net/ip_fib.h>
63f3444f 46#include <net/rtnetlink.h>
1da177e4 47
1da177e4
LT
48#ifndef CONFIG_IP_MULTIPLE_TABLES
49
7b1a74fd 50static int __net_init fib4_rules_init(struct net *net)
c3e9a353 51{
93456b6d
DL
52 struct fib_table *local_table, *main_table;
53
7f9b8052 54 local_table = fib_hash_table(RT_TABLE_LOCAL);
93456b6d 55 if (local_table == NULL)
dbb50165
DL
56 return -ENOMEM;
57
7f9b8052 58 main_table = fib_hash_table(RT_TABLE_MAIN);
93456b6d 59 if (main_table == NULL)
dbb50165
DL
60 goto fail;
61
93456b6d 62 hlist_add_head_rcu(&local_table->tb_hlist,
e4aef8ae 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
93456b6d 64 hlist_add_head_rcu(&main_table->tb_hlist,
e4aef8ae 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
dbb50165
DL
66 return 0;
67
68fail:
93456b6d 69 kfree(local_table);
dbb50165 70 return -ENOMEM;
c3e9a353 71}
1af5a8c4 72#else
1da177e4 73
8ad4942c 74struct fib_table *fib_new_table(struct net *net, u32 id)
1da177e4
LT
75{
76 struct fib_table *tb;
1af5a8c4 77 unsigned int h;
1da177e4 78
1af5a8c4
PM
79 if (id == 0)
80 id = RT_TABLE_MAIN;
8ad4942c 81 tb = fib_get_table(net, id);
1af5a8c4
PM
82 if (tb)
83 return tb;
7f9b8052
SH
84
85 tb = fib_hash_table(id);
1da177e4
LT
86 if (!tb)
87 return NULL;
1af5a8c4 88 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
1da177e4
LT
90 return tb;
91}
92
8ad4942c 93struct fib_table *fib_get_table(struct net *net, u32 id)
1af5a8c4
PM
94{
95 struct fib_table *tb;
96 struct hlist_node *node;
e4aef8ae 97 struct hlist_head *head;
1af5a8c4 98 unsigned int h;
1da177e4 99
1af5a8c4
PM
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
e4aef8ae 103
1af5a8c4 104 rcu_read_lock();
e4aef8ae
DL
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
1af5a8c4
PM
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
1da177e4
LT
115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
010278ec
DL
117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
64c2d538
DL
119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
010278ec 127 tb = fib_get_table(net, table);
64c2d538 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
16c6cf8b 129 fib_table_select_default(tb, flp, res);
64c2d538
DL
130}
131
e4aef8ae 132static void fib_flush(struct net *net)
1da177e4
LT
133{
134 int flushed = 0;
1da177e4 135 struct fib_table *tb;
1af5a8c4 136 struct hlist_node *node;
e4aef8ae 137 struct hlist_head *head;
1af5a8c4 138 unsigned int h;
1da177e4 139
1af5a8c4 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
e4aef8ae
DL
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
16c6cf8b 143 flushed += fib_table_flush(tb);
1da177e4 144 }
1da177e4
LT
145
146 if (flushed)
76e6ebfb 147 rt_cache_flush(net, -1);
1da177e4
LT
148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
1ab35276 154struct net_device * ip_dev_find(struct net *net, __be32 addr)
1da177e4
LT
155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
03cf786c 159 struct fib_table *local_table;
1da177e4
LT
160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
1ab35276 165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
16c6cf8b 166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
1da177e4
LT
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
178
05538116
LAT
179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
6b175b26
EB
183static inline unsigned __inet_dev_addr_type(struct net *net,
184 const struct net_device *dev,
05538116 185 __be32 addr)
1da177e4
LT
186{
187 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188 struct fib_result res;
189 unsigned ret = RTN_BROADCAST;
03cf786c 190 struct fib_table *local_table;
1da177e4 191
1e637c74 192 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1da177e4 193 return RTN_BROADCAST;
f97c1e0c 194 if (ipv4_is_multicast(addr))
1da177e4
LT
195 return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198 res.r = NULL;
199#endif
e905a9ed 200
6b175b26 201 local_table = fib_get_table(net, RT_TABLE_LOCAL);
03cf786c 202 if (local_table) {
1da177e4 203 ret = RTN_UNICAST;
16c6cf8b 204 if (!fib_table_lookup(local_table, &fl, &res)) {
05538116
LAT
205 if (!dev || dev == res.fi->fib_dev)
206 ret = res.type;
1da177e4
LT
207 fib_res_put(&res);
208 }
209 }
210 return ret;
211}
212
6b175b26 213unsigned int inet_addr_type(struct net *net, __be32 addr)
05538116 214{
6b175b26 215 return __inet_dev_addr_type(net, NULL, addr);
05538116
LAT
216}
217
6b175b26
EB
218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 __be32 addr)
05538116 220{
6b175b26 221 return __inet_dev_addr_type(net, dev, addr);
05538116
LAT
222}
223
1da177e4
LT
224/* Given (packet source, input interface) and optional (dst, oif, tos):
225 - (main) check, that source is valid i.e. not broadcast or our local
226 address.
227 - figure out what "logical" interface this packet arrived
228 and calculate "specific destination" address.
229 - check, that packet arrived from expected physical interface.
230 */
231
d9c9df8c 232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
b0c110ca 233 struct net_device *dev, __be32 *spec_dst,
234 u32 *itag, u32 mark)
1da177e4
LT
235{
236 struct in_device *in_dev;
237 struct flowi fl = { .nl_u = { .ip4_u =
238 { .daddr = src,
239 .saddr = dst,
240 .tos = tos } },
b0c110ca 241 .mark = mark,
1da177e4 242 .iif = oif };
b0c110ca 243
1da177e4 244 struct fib_result res;
8153a10c 245 int no_addr, rpf, accept_local;
1da177e4 246 int ret;
5b707aaa 247 struct net *net;
1da177e4 248
8153a10c 249 no_addr = rpf = accept_local = 0;
1da177e4 250 rcu_read_lock();
e5ed6399 251 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
252 if (in_dev) {
253 no_addr = in_dev->ifa_list == NULL;
254 rpf = IN_DEV_RPFILTER(in_dev);
8153a10c 255 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
28f6aeea
JHS
256 if (mark && !IN_DEV_SRC_VMARK(in_dev))
257 fl.mark = 0;
1da177e4
LT
258 }
259 rcu_read_unlock();
260
261 if (in_dev == NULL)
262 goto e_inval;
263
c346dca1 264 net = dev_net(dev);
5b707aaa 265 if (fib_lookup(net, &fl, &res))
1da177e4 266 goto last_resort;
8153a10c
PM
267 if (res.type != RTN_UNICAST) {
268 if (res.type != RTN_LOCAL || !accept_local)
269 goto e_inval_res;
270 }
1da177e4
LT
271 *spec_dst = FIB_RES_PREFSRC(res);
272 fib_combine_itag(itag, &res);
273#ifdef CONFIG_IP_ROUTE_MULTIPATH
274 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
275#else
276 if (FIB_RES_DEV(res) == dev)
277#endif
278 {
279 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
280 fib_res_put(&res);
281 return ret;
282 }
283 fib_res_put(&res);
284 if (no_addr)
285 goto last_resort;
c1cf8422 286 if (rpf == 1)
1da177e4
LT
287 goto e_inval;
288 fl.oif = dev->ifindex;
289
290 ret = 0;
5b707aaa 291 if (fib_lookup(net, &fl, &res) == 0) {
1da177e4
LT
292 if (res.type == RTN_UNICAST) {
293 *spec_dst = FIB_RES_PREFSRC(res);
294 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
295 }
296 fib_res_put(&res);
297 }
298 return ret;
299
300last_resort:
301 if (rpf)
302 goto e_inval;
303 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304 *itag = 0;
305 return 0;
306
307e_inval_res:
308 fib_res_put(&res);
309e_inval:
310 return -EINVAL;
311}
312
81f7bf6c 313static inline __be32 sk_extract_addr(struct sockaddr *addr)
4e902c57
TG
314{
315 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
316}
317
318static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
319{
320 struct nlattr *nla;
321
322 nla = (struct nlattr *) ((char *) mx + len);
323 nla->nla_type = type;
324 nla->nla_len = nla_attr_size(4);
325 *(u32 *) nla_data(nla) = value;
326
327 return len + nla_total_size(4);
328}
329
4b5d47d4 330static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
4e902c57
TG
331 struct fib_config *cfg)
332{
6d85c10a 333 __be32 addr;
4e902c57
TG
334 int plen;
335
336 memset(cfg, 0, sizeof(*cfg));
4b5d47d4 337 cfg->fc_nlinfo.nl_net = net;
4e902c57
TG
338
339 if (rt->rt_dst.sa_family != AF_INET)
340 return -EAFNOSUPPORT;
341
342 /*
343 * Check mask for validity:
344 * a) it must be contiguous.
345 * b) destination must have all host bits clear.
346 * c) if application forgot to set correct family (AF_INET),
347 * reject request unless it is absolutely clear i.e.
348 * both family and mask are zero.
349 */
350 plen = 32;
351 addr = sk_extract_addr(&rt->rt_dst);
352 if (!(rt->rt_flags & RTF_HOST)) {
81f7bf6c 353 __be32 mask = sk_extract_addr(&rt->rt_genmask);
4e902c57
TG
354
355 if (rt->rt_genmask.sa_family != AF_INET) {
356 if (mask || rt->rt_genmask.sa_family)
357 return -EAFNOSUPPORT;
358 }
359
360 if (bad_mask(mask, addr))
361 return -EINVAL;
362
363 plen = inet_mask_len(mask);
364 }
365
366 cfg->fc_dst_len = plen;
367 cfg->fc_dst = addr;
368
369 if (cmd != SIOCDELRT) {
370 cfg->fc_nlflags = NLM_F_CREATE;
371 cfg->fc_protocol = RTPROT_BOOT;
372 }
373
374 if (rt->rt_metric)
375 cfg->fc_priority = rt->rt_metric - 1;
376
377 if (rt->rt_flags & RTF_REJECT) {
378 cfg->fc_scope = RT_SCOPE_HOST;
379 cfg->fc_type = RTN_UNREACHABLE;
380 return 0;
381 }
382
383 cfg->fc_scope = RT_SCOPE_NOWHERE;
384 cfg->fc_type = RTN_UNICAST;
385
386 if (rt->rt_dev) {
387 char *colon;
388 struct net_device *dev;
389 char devname[IFNAMSIZ];
390
391 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
392 return -EFAULT;
393
394 devname[IFNAMSIZ-1] = 0;
395 colon = strchr(devname, ':');
396 if (colon)
397 *colon = 0;
4b5d47d4 398 dev = __dev_get_by_name(net, devname);
4e902c57
TG
399 if (!dev)
400 return -ENODEV;
401 cfg->fc_oif = dev->ifindex;
402 if (colon) {
403 struct in_ifaddr *ifa;
404 struct in_device *in_dev = __in_dev_get_rtnl(dev);
405 if (!in_dev)
406 return -ENODEV;
407 *colon = ':';
408 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
409 if (strcmp(ifa->ifa_label, devname) == 0)
410 break;
411 if (ifa == NULL)
412 return -ENODEV;
413 cfg->fc_prefsrc = ifa->ifa_local;
414 }
415 }
416
417 addr = sk_extract_addr(&rt->rt_gateway);
418 if (rt->rt_gateway.sa_family == AF_INET && addr) {
419 cfg->fc_gw = addr;
420 if (rt->rt_flags & RTF_GATEWAY &&
4b5d47d4 421 inet_addr_type(net, addr) == RTN_UNICAST)
4e902c57
TG
422 cfg->fc_scope = RT_SCOPE_UNIVERSE;
423 }
424
425 if (cmd == SIOCDELRT)
426 return 0;
427
428 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
429 return -EINVAL;
430
431 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
432 cfg->fc_scope = RT_SCOPE_LINK;
433
434 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
435 struct nlattr *mx;
436 int len = 0;
437
438 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
e905a9ed 439 if (mx == NULL)
4e902c57
TG
440 return -ENOMEM;
441
442 if (rt->rt_flags & RTF_MTU)
443 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
444
445 if (rt->rt_flags & RTF_WINDOW)
446 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
447
448 if (rt->rt_flags & RTF_IRTT)
449 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
450
451 cfg->fc_mx = mx;
452 cfg->fc_mx_len = len;
453 }
454
455 return 0;
456}
457
1da177e4
LT
458/*
459 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
460 */
e905a9ed 461
1bad118a 462int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 463{
4e902c57
TG
464 struct fib_config cfg;
465 struct rtentry rt;
1da177e4 466 int err;
1da177e4
LT
467
468 switch (cmd) {
469 case SIOCADDRT: /* Add a route */
470 case SIOCDELRT: /* Delete a route */
471 if (!capable(CAP_NET_ADMIN))
472 return -EPERM;
4e902c57
TG
473
474 if (copy_from_user(&rt, arg, sizeof(rt)))
1da177e4 475 return -EFAULT;
4e902c57 476
1da177e4 477 rtnl_lock();
1bad118a 478 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
1da177e4 479 if (err == 0) {
4e902c57
TG
480 struct fib_table *tb;
481
1da177e4 482 if (cmd == SIOCDELRT) {
1bad118a 483 tb = fib_get_table(net, cfg.fc_table);
1da177e4 484 if (tb)
16c6cf8b 485 err = fib_table_delete(tb, &cfg);
4e902c57
TG
486 else
487 err = -ESRCH;
1da177e4 488 } else {
1bad118a 489 tb = fib_new_table(net, cfg.fc_table);
1da177e4 490 if (tb)
16c6cf8b 491 err = fib_table_insert(tb, &cfg);
4e902c57
TG
492 else
493 err = -ENOBUFS;
1da177e4 494 }
4e902c57
TG
495
496 /* allocated by rtentry_to_fib_config() */
497 kfree(cfg.fc_mx);
1da177e4
LT
498 }
499 rtnl_unlock();
500 return err;
501 }
502 return -EINVAL;
503}
504
ef7c79ed 505const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
4e902c57
TG
506 [RTA_DST] = { .type = NLA_U32 },
507 [RTA_SRC] = { .type = NLA_U32 },
508 [RTA_IIF] = { .type = NLA_U32 },
509 [RTA_OIF] = { .type = NLA_U32 },
510 [RTA_GATEWAY] = { .type = NLA_U32 },
511 [RTA_PRIORITY] = { .type = NLA_U32 },
512 [RTA_PREFSRC] = { .type = NLA_U32 },
513 [RTA_METRICS] = { .type = NLA_NESTED },
5176f91e 514 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4e902c57 515 [RTA_FLOW] = { .type = NLA_U32 },
4e902c57
TG
516};
517
4b5d47d4
DL
518static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
519 struct nlmsghdr *nlh, struct fib_config *cfg)
1da177e4 520{
4e902c57
TG
521 struct nlattr *attr;
522 int err, remaining;
523 struct rtmsg *rtm;
524
525 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
526 if (err < 0)
527 goto errout;
528
529 memset(cfg, 0, sizeof(*cfg));
530
531 rtm = nlmsg_data(nlh);
4e902c57 532 cfg->fc_dst_len = rtm->rtm_dst_len;
4e902c57
TG
533 cfg->fc_tos = rtm->rtm_tos;
534 cfg->fc_table = rtm->rtm_table;
535 cfg->fc_protocol = rtm->rtm_protocol;
536 cfg->fc_scope = rtm->rtm_scope;
537 cfg->fc_type = rtm->rtm_type;
538 cfg->fc_flags = rtm->rtm_flags;
539 cfg->fc_nlflags = nlh->nlmsg_flags;
540
541 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
542 cfg->fc_nlinfo.nlh = nlh;
4b5d47d4 543 cfg->fc_nlinfo.nl_net = net;
4e902c57 544
a0ee18b9
TG
545 if (cfg->fc_type > RTN_MAX) {
546 err = -EINVAL;
547 goto errout;
548 }
549
4e902c57 550 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
8f4c1f9b 551 switch (nla_type(attr)) {
4e902c57 552 case RTA_DST:
17fb2c64 553 cfg->fc_dst = nla_get_be32(attr);
4e902c57 554 break;
4e902c57
TG
555 case RTA_OIF:
556 cfg->fc_oif = nla_get_u32(attr);
557 break;
558 case RTA_GATEWAY:
17fb2c64 559 cfg->fc_gw = nla_get_be32(attr);
4e902c57
TG
560 break;
561 case RTA_PRIORITY:
562 cfg->fc_priority = nla_get_u32(attr);
563 break;
564 case RTA_PREFSRC:
17fb2c64 565 cfg->fc_prefsrc = nla_get_be32(attr);
4e902c57
TG
566 break;
567 case RTA_METRICS:
568 cfg->fc_mx = nla_data(attr);
569 cfg->fc_mx_len = nla_len(attr);
570 break;
571 case RTA_MULTIPATH:
572 cfg->fc_mp = nla_data(attr);
573 cfg->fc_mp_len = nla_len(attr);
574 break;
575 case RTA_FLOW:
576 cfg->fc_flow = nla_get_u32(attr);
577 break;
4e902c57
TG
578 case RTA_TABLE:
579 cfg->fc_table = nla_get_u32(attr);
580 break;
1da177e4
LT
581 }
582 }
4e902c57 583
1da177e4 584 return 0;
4e902c57
TG
585errout:
586 return err;
1da177e4
LT
587}
588
6ed2533e 589static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 590{
3b1e0a65 591 struct net *net = sock_net(skb->sk);
4e902c57
TG
592 struct fib_config cfg;
593 struct fib_table *tb;
594 int err;
1da177e4 595
4b5d47d4 596 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
597 if (err < 0)
598 goto errout;
1da177e4 599
8ad4942c 600 tb = fib_get_table(net, cfg.fc_table);
4e902c57
TG
601 if (tb == NULL) {
602 err = -ESRCH;
603 goto errout;
604 }
605
16c6cf8b 606 err = fib_table_delete(tb, &cfg);
4e902c57
TG
607errout:
608 return err;
1da177e4
LT
609}
610
6ed2533e 611static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1da177e4 612{
3b1e0a65 613 struct net *net = sock_net(skb->sk);
4e902c57
TG
614 struct fib_config cfg;
615 struct fib_table *tb;
616 int err;
1da177e4 617
4b5d47d4 618 err = rtm_to_fib_config(net, skb, nlh, &cfg);
4e902c57
TG
619 if (err < 0)
620 goto errout;
1da177e4 621
226b0b4a 622 tb = fib_new_table(net, cfg.fc_table);
4e902c57
TG
623 if (tb == NULL) {
624 err = -ENOBUFS;
625 goto errout;
626 }
627
16c6cf8b 628 err = fib_table_insert(tb, &cfg);
4e902c57
TG
629errout:
630 return err;
1da177e4
LT
631}
632
63f3444f 633static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1da177e4 634{
3b1e0a65 635 struct net *net = sock_net(skb->sk);
1af5a8c4
PM
636 unsigned int h, s_h;
637 unsigned int e = 0, s_e;
1da177e4 638 struct fib_table *tb;
1af5a8c4 639 struct hlist_node *node;
e4aef8ae 640 struct hlist_head *head;
1af5a8c4 641 int dumped = 0;
1da177e4 642
be403ea1
TG
643 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
644 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
1da177e4
LT
645 return ip_rt_dump(skb, cb);
646
1af5a8c4
PM
647 s_h = cb->args[0];
648 s_e = cb->args[1];
649
650 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
651 e = 0;
e4aef8ae
DL
652 head = &net->ipv4.fib_table_hash[h];
653 hlist_for_each_entry(tb, node, head, tb_hlist) {
1af5a8c4
PM
654 if (e < s_e)
655 goto next;
656 if (dumped)
657 memset(&cb->args[2], 0, sizeof(cb->args) -
e905a9ed 658 2 * sizeof(cb->args[0]));
16c6cf8b 659 if (fib_table_dump(tb, skb, cb) < 0)
1af5a8c4
PM
660 goto out;
661 dumped = 1;
662next:
663 e++;
664 }
1da177e4 665 }
1af5a8c4
PM
666out:
667 cb->args[1] = e;
668 cb->args[0] = h;
1da177e4
LT
669
670 return skb->len;
671}
672
673/* Prepare and feed intra-kernel routing request.
674 Really, it should be netlink message, but :-( netlink
675 can be not configured, so that we feed it directly
676 to fib engine. It is legal, because all events occur
677 only when netlink is already locked.
678 */
679
81f7bf6c 680static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
1da177e4 681{
c346dca1 682 struct net *net = dev_net(ifa->ifa_dev->dev);
4e902c57
TG
683 struct fib_table *tb;
684 struct fib_config cfg = {
685 .fc_protocol = RTPROT_KERNEL,
686 .fc_type = type,
687 .fc_dst = dst,
688 .fc_dst_len = dst_len,
689 .fc_prefsrc = ifa->ifa_local,
690 .fc_oif = ifa->ifa_dev->dev->ifindex,
691 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
4d1169c1 692 .fc_nlinfo = {
4b5d47d4 693 .nl_net = net,
4d1169c1 694 },
4e902c57 695 };
1da177e4
LT
696
697 if (type == RTN_UNICAST)
4b5d47d4 698 tb = fib_new_table(net, RT_TABLE_MAIN);
1da177e4 699 else
4b5d47d4 700 tb = fib_new_table(net, RT_TABLE_LOCAL);
1da177e4
LT
701
702 if (tb == NULL)
703 return;
704
4e902c57 705 cfg.fc_table = tb->tb_id;
1da177e4 706
4e902c57
TG
707 if (type != RTN_LOCAL)
708 cfg.fc_scope = RT_SCOPE_LINK;
709 else
710 cfg.fc_scope = RT_SCOPE_HOST;
1da177e4
LT
711
712 if (cmd == RTM_NEWROUTE)
16c6cf8b 713 fib_table_insert(tb, &cfg);
1da177e4 714 else
16c6cf8b 715 fib_table_delete(tb, &cfg);
1da177e4
LT
716}
717
0ff60a45 718void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
719{
720 struct in_device *in_dev = ifa->ifa_dev;
721 struct net_device *dev = in_dev->dev;
722 struct in_ifaddr *prim = ifa;
a144ea4b
AV
723 __be32 mask = ifa->ifa_mask;
724 __be32 addr = ifa->ifa_local;
725 __be32 prefix = ifa->ifa_address&mask;
1da177e4
LT
726
727 if (ifa->ifa_flags&IFA_F_SECONDARY) {
728 prim = inet_ifa_byprefix(in_dev, prefix, mask);
729 if (prim == NULL) {
a6db9010 730 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
1da177e4
LT
731 return;
732 }
733 }
734
735 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
736
737 if (!(dev->flags&IFF_UP))
738 return;
739
740 /* Add broadcast address, if it is explicitly assigned. */
a144ea4b 741 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
1da177e4
LT
742 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
743
f97c1e0c 744 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
1da177e4
LT
745 (prefix != addr || ifa->ifa_prefixlen < 32)) {
746 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
747 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
748
749 /* Add network specific broadcasts, when it takes a sense */
750 if (ifa->ifa_prefixlen < 31) {
751 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
752 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
753 }
754 }
755}
756
757static void fib_del_ifaddr(struct in_ifaddr *ifa)
758{
759 struct in_device *in_dev = ifa->ifa_dev;
760 struct net_device *dev = in_dev->dev;
761 struct in_ifaddr *ifa1;
762 struct in_ifaddr *prim = ifa;
a144ea4b
AV
763 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
764 __be32 any = ifa->ifa_address&ifa->ifa_mask;
1da177e4
LT
765#define LOCAL_OK 1
766#define BRD_OK 2
767#define BRD0_OK 4
768#define BRD1_OK 8
769 unsigned ok = 0;
770
771 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
772 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
773 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
774 else {
775 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
776 if (prim == NULL) {
a6db9010 777 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
1da177e4
LT
778 return;
779 }
780 }
781
782 /* Deletion is more complicated than add.
783 We should take care of not to delete too much :-)
784
785 Scan address list to be sure that addresses are really gone.
786 */
787
788 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
789 if (ifa->ifa_local == ifa1->ifa_local)
790 ok |= LOCAL_OK;
791 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
792 ok |= BRD_OK;
793 if (brd == ifa1->ifa_broadcast)
794 ok |= BRD1_OK;
795 if (any == ifa1->ifa_broadcast)
796 ok |= BRD0_OK;
797 }
798
799 if (!(ok&BRD_OK))
800 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
801 if (!(ok&BRD1_OK))
802 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
803 if (!(ok&BRD0_OK))
804 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
805 if (!(ok&LOCAL_OK)) {
806 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
807
808 /* Check, that this local address finally disappeared. */
c346dca1 809 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
1da177e4
LT
810 /* And the last, but not the least thing.
811 We must flush stray FIB entries.
812
813 First of all, we scan fib_info list searching
814 for stray nexthop entries, then ignite fib_flush.
815 */
c346dca1
YH
816 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
817 fib_flush(dev_net(dev));
1da177e4
LT
818 }
819 }
820#undef LOCAL_OK
821#undef BRD_OK
822#undef BRD0_OK
823#undef BRD1_OK
824}
825
246955fe
RO
826static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
827{
e905a9ed 828
246955fe 829 struct fib_result res;
5f300893 830 struct flowi fl = { .mark = frn->fl_mark,
47dcf0cb 831 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
246955fe
RO
832 .tos = frn->fl_tos,
833 .scope = frn->fl_scope } } };
1194ed0a 834
912a41a4
SV
835#ifdef CONFIG_IP_MULTIPLE_TABLES
836 res.r = NULL;
837#endif
838
1194ed0a 839 frn->err = -ENOENT;
246955fe
RO
840 if (tb) {
841 local_bh_disable();
842
843 frn->tb_id = tb->tb_id;
16c6cf8b 844 frn->err = fib_table_lookup(tb, &fl, &res);
246955fe
RO
845
846 if (!frn->err) {
847 frn->prefixlen = res.prefixlen;
848 frn->nh_sel = res.nh_sel;
849 frn->type = res.type;
850 frn->scope = res.scope;
1194ed0a 851 fib_res_put(&res);
246955fe
RO
852 }
853 local_bh_enable();
854 }
855}
856
28f7b036 857static void nl_fib_input(struct sk_buff *skb)
246955fe 858{
6bd48fcf 859 struct net *net;
246955fe 860 struct fib_result_nl *frn;
28f7b036 861 struct nlmsghdr *nlh;
246955fe 862 struct fib_table *tb;
28f7b036 863 u32 pid;
1194ed0a 864
3b1e0a65 865 net = sock_net(skb->sk);
b529ccf2 866 nlh = nlmsg_hdr(skb);
ea86575e 867 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
d883a036 868 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
ea86575e 869 return;
d883a036
DL
870
871 skb = skb_clone(skb, GFP_KERNEL);
872 if (skb == NULL)
873 return;
874 nlh = nlmsg_hdr(skb);
e905a9ed 875
246955fe 876 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
6bd48fcf 877 tb = fib_get_table(net, frn->tb_id_in);
246955fe
RO
878
879 nl_fib_lookup(frn, tb);
e905a9ed 880
1194ed0a 881 pid = NETLINK_CB(skb).pid; /* pid of sending process */
246955fe 882 NETLINK_CB(skb).pid = 0; /* from kernel */
ac6d439d 883 NETLINK_CB(skb).dst_group = 0; /* unicast */
6bd48fcf 884 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
e905a9ed 885}
246955fe 886
2c8c1e72 887static int __net_init nl_fib_lookup_init(struct net *net)
246955fe 888{
6bd48fcf
DL
889 struct sock *sk;
890 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
891 nl_fib_input, NULL, THIS_MODULE);
892 if (sk == NULL)
7b1a74fd 893 return -EAFNOSUPPORT;
6bd48fcf 894 net->ipv4.fibnl = sk;
7b1a74fd
DL
895 return 0;
896}
897
898static void nl_fib_lookup_exit(struct net *net)
899{
b7c6ba6e 900 netlink_kernel_release(net->ipv4.fibnl);
775516bf 901 net->ipv4.fibnl = NULL;
246955fe
RO
902}
903
e2ce1468 904static void fib_disable_ip(struct net_device *dev, int force, int delay)
1da177e4 905{
85326fa5 906 if (fib_sync_down_dev(dev, force))
c346dca1 907 fib_flush(dev_net(dev));
e2ce1468 908 rt_cache_flush(dev_net(dev), delay);
1da177e4
LT
909 arp_ifdown(dev);
910}
911
912static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
913{
6ed2533e 914 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
76e6ebfb 915 struct net_device *dev = ifa->ifa_dev->dev;
1da177e4
LT
916
917 switch (event) {
918 case NETDEV_UP:
919 fib_add_ifaddr(ifa);
920#ifdef CONFIG_IP_ROUTE_MULTIPATH
76e6ebfb 921 fib_sync_up(dev);
1da177e4 922#endif
76e6ebfb 923 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
924 break;
925 case NETDEV_DOWN:
926 fib_del_ifaddr(ifa);
9fcc2e8a 927 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
928 /* Last address was deleted from this interface.
929 Disable IP.
930 */
e2ce1468 931 fib_disable_ip(dev, 1, 0);
1da177e4 932 } else {
76e6ebfb 933 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
934 }
935 break;
936 }
937 return NOTIFY_DONE;
938}
939
940static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
941{
942 struct net_device *dev = ptr;
e5ed6399 943 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
944
945 if (event == NETDEV_UNREGISTER) {
e2ce1468 946 fib_disable_ip(dev, 2, -1);
1da177e4
LT
947 return NOTIFY_DONE;
948 }
949
950 if (!in_dev)
951 return NOTIFY_DONE;
952
953 switch (event) {
954 case NETDEV_UP:
955 for_ifa(in_dev) {
956 fib_add_ifaddr(ifa);
957 } endfor_ifa(in_dev);
958#ifdef CONFIG_IP_ROUTE_MULTIPATH
959 fib_sync_up(dev);
960#endif
76e6ebfb 961 rt_cache_flush(dev_net(dev), -1);
1da177e4
LT
962 break;
963 case NETDEV_DOWN:
e2ce1468 964 fib_disable_ip(dev, 0, 0);
1da177e4
LT
965 break;
966 case NETDEV_CHANGEMTU:
967 case NETDEV_CHANGE:
76e6ebfb 968 rt_cache_flush(dev_net(dev), 0);
1da177e4 969 break;
a5ee1551
EB
970 case NETDEV_UNREGISTER_BATCH:
971 rt_cache_flush_batch();
972 break;
1da177e4
LT
973 }
974 return NOTIFY_DONE;
975}
976
977static struct notifier_block fib_inetaddr_notifier = {
6ed2533e 978 .notifier_call = fib_inetaddr_event,
1da177e4
LT
979};
980
981static struct notifier_block fib_netdev_notifier = {
6ed2533e 982 .notifier_call = fib_netdev_event,
1da177e4
LT
983};
984
7b1a74fd 985static int __net_init ip_fib_net_init(struct net *net)
1da177e4 986{
dce5cbee 987 int err;
1af5a8c4
PM
988 unsigned int i;
989
e4aef8ae
DL
990 net->ipv4.fib_table_hash = kzalloc(
991 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
992 if (net->ipv4.fib_table_hash == NULL)
993 return -ENOMEM;
994
1af5a8c4 995 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
e4aef8ae 996 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
c3e9a353 997
dce5cbee
DL
998 err = fib4_rules_init(net);
999 if (err < 0)
1000 goto fail;
1001 return 0;
1002
1003fail:
1004 kfree(net->ipv4.fib_table_hash);
1005 return err;
7b1a74fd 1006}
1da177e4 1007
2c8c1e72 1008static void ip_fib_net_exit(struct net *net)
7b1a74fd
DL
1009{
1010 unsigned int i;
1011
1012#ifdef CONFIG_IP_MULTIPLE_TABLES
1013 fib4_rules_exit(net);
1014#endif
1015
1016 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1017 struct fib_table *tb;
1018 struct hlist_head *head;
1019 struct hlist_node *node, *tmp;
63f3444f 1020
e4aef8ae 1021 head = &net->ipv4.fib_table_hash[i];
7b1a74fd
DL
1022 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1023 hlist_del(node);
16c6cf8b 1024 fib_table_flush(tb);
7b1a74fd
DL
1025 kfree(tb);
1026 }
1027 }
e4aef8ae 1028 kfree(net->ipv4.fib_table_hash);
7b1a74fd
DL
1029}
1030
1031static int __net_init fib_net_init(struct net *net)
1032{
1033 int error;
1034
7b1a74fd
DL
1035 error = ip_fib_net_init(net);
1036 if (error < 0)
1037 goto out;
1038 error = nl_fib_lookup_init(net);
1039 if (error < 0)
1040 goto out_nlfl;
1041 error = fib_proc_init(net);
1042 if (error < 0)
1043 goto out_proc;
1044out:
1045 return error;
1046
1047out_proc:
1048 nl_fib_lookup_exit(net);
1049out_nlfl:
1050 ip_fib_net_exit(net);
1051 goto out;
1052}
1053
1054static void __net_exit fib_net_exit(struct net *net)
1055{
1056 fib_proc_exit(net);
1057 nl_fib_lookup_exit(net);
1058 ip_fib_net_exit(net);
1059}
1060
1061static struct pernet_operations fib_net_ops = {
1062 .init = fib_net_init,
1063 .exit = fib_net_exit,
1064};
1065
1066void __init ip_fib_init(void)
1067{
63f3444f
TG
1068 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1069 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1070 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
7b1a74fd
DL
1071
1072 register_pernet_subsys(&fib_net_ops);
1073 register_netdevice_notifier(&fib_netdev_notifier);
1074 register_inetaddr_notifier(&fib_inetaddr_notifier);
7f9b8052
SH
1075
1076 fib_hash_init();
1da177e4
LT
1077}
1078
1079EXPORT_SYMBOL(inet_addr_type);
05538116 1080EXPORT_SYMBOL(inet_dev_addr_type);
a1e8733e 1081EXPORT_SYMBOL(ip_dev_find);