]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[PATCH] capable/capability.h (fs/)
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/module.h>
20#include <asm/uaccess.h>
21#include <asm/system.h>
22#include <linux/bitops.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/mm.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/errno.h>
31#include <linux/in.h>
32#include <linux/inet.h>
14c85021 33#include <linux/inetdevice.h>
1da177e4
LT
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/skbuff.h>
37#include <linux/netlink.h>
38#include <linux/init.h>
39
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
48
49#define FFprint(a...) printk(KERN_DEBUG a)
50
51#ifndef CONFIG_IP_MULTIPLE_TABLES
52
53#define RT_TABLE_MIN RT_TABLE_MAIN
54
55struct fib_table *ip_fib_local_table;
56struct fib_table *ip_fib_main_table;
57
58#else
59
60#define RT_TABLE_MIN 1
61
62struct fib_table *fib_tables[RT_TABLE_MAX+1];
63
64struct fib_table *__fib_new_table(int id)
65{
66 struct fib_table *tb;
67
68 tb = fib_hash_init(id);
69 if (!tb)
70 return NULL;
71 fib_tables[id] = tb;
72 return tb;
73}
74
75
76#endif /* CONFIG_IP_MULTIPLE_TABLES */
77
78
79static void fib_flush(void)
80{
81 int flushed = 0;
82#ifdef CONFIG_IP_MULTIPLE_TABLES
83 struct fib_table *tb;
84 int id;
85
86 for (id = RT_TABLE_MAX; id>0; id--) {
87 if ((tb = fib_get_table(id))==NULL)
88 continue;
89 flushed += tb->tb_flush(tb);
90 }
91#else /* CONFIG_IP_MULTIPLE_TABLES */
92 flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
93 flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
94#endif /* CONFIG_IP_MULTIPLE_TABLES */
95
96 if (flushed)
97 rt_cache_flush(-1);
98}
99
100/*
101 * Find the first device with a given source address.
102 */
103
104struct net_device * ip_dev_find(u32 addr)
105{
106 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
107 struct fib_result res;
108 struct net_device *dev = NULL;
109
110#ifdef CONFIG_IP_MULTIPLE_TABLES
111 res.r = NULL;
112#endif
113
114 if (!ip_fib_local_table ||
115 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
116 return NULL;
117 if (res.type != RTN_LOCAL)
118 goto out;
119 dev = FIB_RES_DEV(res);
120
121 if (dev)
122 dev_hold(dev);
123out:
124 fib_res_put(&res);
125 return dev;
126}
127
128unsigned inet_addr_type(u32 addr)
129{
130 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
131 struct fib_result res;
132 unsigned ret = RTN_BROADCAST;
133
134 if (ZERONET(addr) || BADCLASS(addr))
135 return RTN_BROADCAST;
136 if (MULTICAST(addr))
137 return RTN_MULTICAST;
138
139#ifdef CONFIG_IP_MULTIPLE_TABLES
140 res.r = NULL;
141#endif
142
143 if (ip_fib_local_table) {
144 ret = RTN_UNICAST;
145 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
146 &fl, &res)) {
147 ret = res.type;
148 fib_res_put(&res);
149 }
150 }
151 return ret;
152}
153
154/* Given (packet source, input interface) and optional (dst, oif, tos):
155 - (main) check, that source is valid i.e. not broadcast or our local
156 address.
157 - figure out what "logical" interface this packet arrived
158 and calculate "specific destination" address.
159 - check, that packet arrived from expected physical interface.
160 */
161
162int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
163 struct net_device *dev, u32 *spec_dst, u32 *itag)
164{
165 struct in_device *in_dev;
166 struct flowi fl = { .nl_u = { .ip4_u =
167 { .daddr = src,
168 .saddr = dst,
169 .tos = tos } },
170 .iif = oif };
171 struct fib_result res;
172 int no_addr, rpf;
173 int ret;
174
175 no_addr = rpf = 0;
176 rcu_read_lock();
e5ed6399 177 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
178 if (in_dev) {
179 no_addr = in_dev->ifa_list == NULL;
180 rpf = IN_DEV_RPFILTER(in_dev);
181 }
182 rcu_read_unlock();
183
184 if (in_dev == NULL)
185 goto e_inval;
186
187 if (fib_lookup(&fl, &res))
188 goto last_resort;
189 if (res.type != RTN_UNICAST)
190 goto e_inval_res;
191 *spec_dst = FIB_RES_PREFSRC(res);
192 fib_combine_itag(itag, &res);
193#ifdef CONFIG_IP_ROUTE_MULTIPATH
194 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
195#else
196 if (FIB_RES_DEV(res) == dev)
197#endif
198 {
199 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
200 fib_res_put(&res);
201 return ret;
202 }
203 fib_res_put(&res);
204 if (no_addr)
205 goto last_resort;
206 if (rpf)
207 goto e_inval;
208 fl.oif = dev->ifindex;
209
210 ret = 0;
211 if (fib_lookup(&fl, &res) == 0) {
212 if (res.type == RTN_UNICAST) {
213 *spec_dst = FIB_RES_PREFSRC(res);
214 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
215 }
216 fib_res_put(&res);
217 }
218 return ret;
219
220last_resort:
221 if (rpf)
222 goto e_inval;
223 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
224 *itag = 0;
225 return 0;
226
227e_inval_res:
228 fib_res_put(&res);
229e_inval:
230 return -EINVAL;
231}
232
233#ifndef CONFIG_IP_NOSIOCRT
234
235/*
236 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
237 */
238
239int ip_rt_ioctl(unsigned int cmd, void __user *arg)
240{
241 int err;
242 struct kern_rta rta;
243 struct rtentry r;
244 struct {
245 struct nlmsghdr nlh;
246 struct rtmsg rtm;
247 } req;
248
249 switch (cmd) {
250 case SIOCADDRT: /* Add a route */
251 case SIOCDELRT: /* Delete a route */
252 if (!capable(CAP_NET_ADMIN))
253 return -EPERM;
254 if (copy_from_user(&r, arg, sizeof(struct rtentry)))
255 return -EFAULT;
256 rtnl_lock();
257 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
258 if (err == 0) {
259 if (cmd == SIOCDELRT) {
260 struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
261 err = -ESRCH;
262 if (tb)
263 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
264 } else {
265 struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
266 err = -ENOBUFS;
267 if (tb)
268 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
269 }
a51482bd 270 kfree(rta.rta_mx);
1da177e4
LT
271 }
272 rtnl_unlock();
273 return err;
274 }
275 return -EINVAL;
276}
277
278#else
279
280int ip_rt_ioctl(unsigned int cmd, void *arg)
281{
282 return -EINVAL;
283}
284
285#endif
286
287static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
288{
289 int i;
290
291 for (i=1; i<=RTA_MAX; i++) {
292 struct rtattr *attr = rta[i-1];
293 if (attr) {
294 if (RTA_PAYLOAD(attr) < 4)
295 return -EINVAL;
296 if (i != RTA_MULTIPATH && i != RTA_METRICS)
297 rta[i-1] = (struct rtattr*)RTA_DATA(attr);
298 }
299 }
300 return 0;
301}
302
303int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
304{
305 struct fib_table * tb;
306 struct rtattr **rta = arg;
307 struct rtmsg *r = NLMSG_DATA(nlh);
308
309 if (inet_check_attr(r, rta))
310 return -EINVAL;
311
312 tb = fib_get_table(r->rtm_table);
313 if (tb)
314 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
315 return -ESRCH;
316}
317
318int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
319{
320 struct fib_table * tb;
321 struct rtattr **rta = arg;
322 struct rtmsg *r = NLMSG_DATA(nlh);
323
324 if (inet_check_attr(r, rta))
325 return -EINVAL;
326
327 tb = fib_new_table(r->rtm_table);
328 if (tb)
329 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
330 return -ENOBUFS;
331}
332
333int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
334{
335 int t;
336 int s_t;
337 struct fib_table *tb;
338
339 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
340 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
341 return ip_rt_dump(skb, cb);
342
343 s_t = cb->args[0];
344 if (s_t == 0)
345 s_t = cb->args[0] = RT_TABLE_MIN;
346
347 for (t=s_t; t<=RT_TABLE_MAX; t++) {
348 if (t < s_t) continue;
349 if (t > s_t)
350 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
351 if ((tb = fib_get_table(t))==NULL)
352 continue;
353 if (tb->tb_dump(tb, skb, cb) < 0)
354 break;
355 }
356
357 cb->args[0] = t;
358
359 return skb->len;
360}
361
362/* Prepare and feed intra-kernel routing request.
363 Really, it should be netlink message, but :-( netlink
364 can be not configured, so that we feed it directly
365 to fib engine. It is legal, because all events occur
366 only when netlink is already locked.
367 */
368
369static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
370{
371 struct fib_table * tb;
372 struct {
373 struct nlmsghdr nlh;
374 struct rtmsg rtm;
375 } req;
376 struct kern_rta rta;
377
378 memset(&req.rtm, 0, sizeof(req.rtm));
379 memset(&rta, 0, sizeof(rta));
380
381 if (type == RTN_UNICAST)
382 tb = fib_new_table(RT_TABLE_MAIN);
383 else
384 tb = fib_new_table(RT_TABLE_LOCAL);
385
386 if (tb == NULL)
387 return;
388
389 req.nlh.nlmsg_len = sizeof(req);
390 req.nlh.nlmsg_type = cmd;
391 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
392 req.nlh.nlmsg_pid = 0;
393 req.nlh.nlmsg_seq = 0;
394
395 req.rtm.rtm_dst_len = dst_len;
396 req.rtm.rtm_table = tb->tb_id;
397 req.rtm.rtm_protocol = RTPROT_KERNEL;
398 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
399 req.rtm.rtm_type = type;
400
401 rta.rta_dst = &dst;
402 rta.rta_prefsrc = &ifa->ifa_local;
403 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
404
405 if (cmd == RTM_NEWROUTE)
406 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
407 else
408 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
409}
410
0ff60a45 411void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
412{
413 struct in_device *in_dev = ifa->ifa_dev;
414 struct net_device *dev = in_dev->dev;
415 struct in_ifaddr *prim = ifa;
416 u32 mask = ifa->ifa_mask;
417 u32 addr = ifa->ifa_local;
418 u32 prefix = ifa->ifa_address&mask;
419
420 if (ifa->ifa_flags&IFA_F_SECONDARY) {
421 prim = inet_ifa_byprefix(in_dev, prefix, mask);
422 if (prim == NULL) {
423 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
424 return;
425 }
426 }
427
428 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
429
430 if (!(dev->flags&IFF_UP))
431 return;
432
433 /* Add broadcast address, if it is explicitly assigned. */
434 if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
435 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
436
437 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
438 (prefix != addr || ifa->ifa_prefixlen < 32)) {
439 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
440 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
441
442 /* Add network specific broadcasts, when it takes a sense */
443 if (ifa->ifa_prefixlen < 31) {
444 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
445 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
446 }
447 }
448}
449
450static void fib_del_ifaddr(struct in_ifaddr *ifa)
451{
452 struct in_device *in_dev = ifa->ifa_dev;
453 struct net_device *dev = in_dev->dev;
454 struct in_ifaddr *ifa1;
455 struct in_ifaddr *prim = ifa;
456 u32 brd = ifa->ifa_address|~ifa->ifa_mask;
457 u32 any = ifa->ifa_address&ifa->ifa_mask;
458#define LOCAL_OK 1
459#define BRD_OK 2
460#define BRD0_OK 4
461#define BRD1_OK 8
462 unsigned ok = 0;
463
464 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
465 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
466 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
467 else {
468 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
469 if (prim == NULL) {
470 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
471 return;
472 }
473 }
474
475 /* Deletion is more complicated than add.
476 We should take care of not to delete too much :-)
477
478 Scan address list to be sure that addresses are really gone.
479 */
480
481 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
482 if (ifa->ifa_local == ifa1->ifa_local)
483 ok |= LOCAL_OK;
484 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
485 ok |= BRD_OK;
486 if (brd == ifa1->ifa_broadcast)
487 ok |= BRD1_OK;
488 if (any == ifa1->ifa_broadcast)
489 ok |= BRD0_OK;
490 }
491
492 if (!(ok&BRD_OK))
493 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
494 if (!(ok&BRD1_OK))
495 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
496 if (!(ok&BRD0_OK))
497 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
498 if (!(ok&LOCAL_OK)) {
499 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
500
501 /* Check, that this local address finally disappeared. */
502 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
503 /* And the last, but not the least thing.
504 We must flush stray FIB entries.
505
506 First of all, we scan fib_info list searching
507 for stray nexthop entries, then ignite fib_flush.
508 */
509 if (fib_sync_down(ifa->ifa_local, NULL, 0))
510 fib_flush();
511 }
512 }
513#undef LOCAL_OK
514#undef BRD_OK
515#undef BRD0_OK
516#undef BRD1_OK
517}
518
246955fe
RO
519static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
520{
521
522 struct fib_result res;
523 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
524 .fwmark = frn->fl_fwmark,
525 .tos = frn->fl_tos,
526 .scope = frn->fl_scope } } };
527 if (tb) {
528 local_bh_disable();
529
530 frn->tb_id = tb->tb_id;
531 frn->err = tb->tb_lookup(tb, &fl, &res);
532
533 if (!frn->err) {
534 frn->prefixlen = res.prefixlen;
535 frn->nh_sel = res.nh_sel;
536 frn->type = res.type;
537 frn->scope = res.scope;
538 }
539 local_bh_enable();
540 }
541}
542
543static void nl_fib_input(struct sock *sk, int len)
544{
545 struct sk_buff *skb = NULL;
546 struct nlmsghdr *nlh = NULL;
547 struct fib_result_nl *frn;
246955fe
RO
548 u32 pid;
549 struct fib_table *tb;
550
ea86575e 551 skb = skb_dequeue(&sk->sk_receive_queue);
246955fe 552 nlh = (struct nlmsghdr *)skb->data;
ea86575e
TG
553 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
554 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
555 kfree_skb(skb);
556 return;
557 }
246955fe
RO
558
559 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
560 tb = fib_get_table(frn->tb_id_in);
561
562 nl_fib_lookup(frn, tb);
563
564 pid = nlh->nlmsg_pid; /*pid of sending process */
246955fe
RO
565 NETLINK_CB(skb).pid = 0; /* from kernel */
566 NETLINK_CB(skb).dst_pid = pid;
ac6d439d 567 NETLINK_CB(skb).dst_group = 0; /* unicast */
246955fe
RO
568 netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
569}
570
571static void nl_fib_lookup_init(void)
572{
06628607 573 netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
246955fe
RO
574}
575
1da177e4
LT
576static void fib_disable_ip(struct net_device *dev, int force)
577{
578 if (fib_sync_down(0, dev, force))
579 fib_flush();
580 rt_cache_flush(0);
581 arp_ifdown(dev);
582}
583
584static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
585{
586 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
587
588 switch (event) {
589 case NETDEV_UP:
590 fib_add_ifaddr(ifa);
591#ifdef CONFIG_IP_ROUTE_MULTIPATH
592 fib_sync_up(ifa->ifa_dev->dev);
593#endif
594 rt_cache_flush(-1);
595 break;
596 case NETDEV_DOWN:
597 fib_del_ifaddr(ifa);
9fcc2e8a 598 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
599 /* Last address was deleted from this interface.
600 Disable IP.
601 */
602 fib_disable_ip(ifa->ifa_dev->dev, 1);
603 } else {
604 rt_cache_flush(-1);
605 }
606 break;
607 }
608 return NOTIFY_DONE;
609}
610
611static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
612{
613 struct net_device *dev = ptr;
e5ed6399 614 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
615
616 if (event == NETDEV_UNREGISTER) {
617 fib_disable_ip(dev, 2);
618 return NOTIFY_DONE;
619 }
620
621 if (!in_dev)
622 return NOTIFY_DONE;
623
624 switch (event) {
625 case NETDEV_UP:
626 for_ifa(in_dev) {
627 fib_add_ifaddr(ifa);
628 } endfor_ifa(in_dev);
629#ifdef CONFIG_IP_ROUTE_MULTIPATH
630 fib_sync_up(dev);
631#endif
632 rt_cache_flush(-1);
633 break;
634 case NETDEV_DOWN:
635 fib_disable_ip(dev, 0);
636 break;
637 case NETDEV_CHANGEMTU:
638 case NETDEV_CHANGE:
639 rt_cache_flush(0);
640 break;
641 }
642 return NOTIFY_DONE;
643}
644
645static struct notifier_block fib_inetaddr_notifier = {
646 .notifier_call =fib_inetaddr_event,
647};
648
649static struct notifier_block fib_netdev_notifier = {
650 .notifier_call =fib_netdev_event,
651};
652
653void __init ip_fib_init(void)
654{
655#ifndef CONFIG_IP_MULTIPLE_TABLES
656 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
657 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
658#else
659 fib_rules_init();
660#endif
661
662 register_netdevice_notifier(&fib_netdev_notifier);
663 register_inetaddr_notifier(&fib_inetaddr_notifier);
246955fe 664 nl_fib_lookup_init();
1da177e4
LT
665}
666
667EXPORT_SYMBOL(inet_addr_type);
1da177e4 668EXPORT_SYMBOL(ip_rt_ioctl);