]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/fib_frontend.c
[SOCK]: Introduce sk_receive_skb
[net-next-2.6.git] / net / ipv4 / fib_frontend.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/module.h>
20#include <asm/uaccess.h>
21#include <asm/system.h>
22#include <linux/bitops.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/mm.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/sockios.h>
30#include <linux/errno.h>
31#include <linux/in.h>
32#include <linux/inet.h>
33#include <linux/netdevice.h>
34#include <linux/if_arp.h>
35#include <linux/skbuff.h>
36#include <linux/netlink.h>
37#include <linux/init.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/icmp.h>
45#include <net/arp.h>
46#include <net/ip_fib.h>
47
48#define FFprint(a...) printk(KERN_DEBUG a)
49
50#ifndef CONFIG_IP_MULTIPLE_TABLES
51
52#define RT_TABLE_MIN RT_TABLE_MAIN
53
54struct fib_table *ip_fib_local_table;
55struct fib_table *ip_fib_main_table;
56
57#else
58
59#define RT_TABLE_MIN 1
60
61struct fib_table *fib_tables[RT_TABLE_MAX+1];
62
63struct fib_table *__fib_new_table(int id)
64{
65 struct fib_table *tb;
66
67 tb = fib_hash_init(id);
68 if (!tb)
69 return NULL;
70 fib_tables[id] = tb;
71 return tb;
72}
73
74
75#endif /* CONFIG_IP_MULTIPLE_TABLES */
76
77
78static void fib_flush(void)
79{
80 int flushed = 0;
81#ifdef CONFIG_IP_MULTIPLE_TABLES
82 struct fib_table *tb;
83 int id;
84
85 for (id = RT_TABLE_MAX; id>0; id--) {
86 if ((tb = fib_get_table(id))==NULL)
87 continue;
88 flushed += tb->tb_flush(tb);
89 }
90#else /* CONFIG_IP_MULTIPLE_TABLES */
91 flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
92 flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
93#endif /* CONFIG_IP_MULTIPLE_TABLES */
94
95 if (flushed)
96 rt_cache_flush(-1);
97}
98
99/*
100 * Find the first device with a given source address.
101 */
102
103struct net_device * ip_dev_find(u32 addr)
104{
105 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
106 struct fib_result res;
107 struct net_device *dev = NULL;
108
109#ifdef CONFIG_IP_MULTIPLE_TABLES
110 res.r = NULL;
111#endif
112
113 if (!ip_fib_local_table ||
114 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
115 return NULL;
116 if (res.type != RTN_LOCAL)
117 goto out;
118 dev = FIB_RES_DEV(res);
119
120 if (dev)
121 dev_hold(dev);
122out:
123 fib_res_put(&res);
124 return dev;
125}
126
127unsigned inet_addr_type(u32 addr)
128{
129 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
130 struct fib_result res;
131 unsigned ret = RTN_BROADCAST;
132
133 if (ZERONET(addr) || BADCLASS(addr))
134 return RTN_BROADCAST;
135 if (MULTICAST(addr))
136 return RTN_MULTICAST;
137
138#ifdef CONFIG_IP_MULTIPLE_TABLES
139 res.r = NULL;
140#endif
141
142 if (ip_fib_local_table) {
143 ret = RTN_UNICAST;
144 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
145 &fl, &res)) {
146 ret = res.type;
147 fib_res_put(&res);
148 }
149 }
150 return ret;
151}
152
153/* Given (packet source, input interface) and optional (dst, oif, tos):
154 - (main) check, that source is valid i.e. not broadcast or our local
155 address.
156 - figure out what "logical" interface this packet arrived
157 and calculate "specific destination" address.
158 - check, that packet arrived from expected physical interface.
159 */
160
161int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
162 struct net_device *dev, u32 *spec_dst, u32 *itag)
163{
164 struct in_device *in_dev;
165 struct flowi fl = { .nl_u = { .ip4_u =
166 { .daddr = src,
167 .saddr = dst,
168 .tos = tos } },
169 .iif = oif };
170 struct fib_result res;
171 int no_addr, rpf;
172 int ret;
173
174 no_addr = rpf = 0;
175 rcu_read_lock();
e5ed6399 176 in_dev = __in_dev_get_rcu(dev);
1da177e4
LT
177 if (in_dev) {
178 no_addr = in_dev->ifa_list == NULL;
179 rpf = IN_DEV_RPFILTER(in_dev);
180 }
181 rcu_read_unlock();
182
183 if (in_dev == NULL)
184 goto e_inval;
185
186 if (fib_lookup(&fl, &res))
187 goto last_resort;
188 if (res.type != RTN_UNICAST)
189 goto e_inval_res;
190 *spec_dst = FIB_RES_PREFSRC(res);
191 fib_combine_itag(itag, &res);
192#ifdef CONFIG_IP_ROUTE_MULTIPATH
193 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
194#else
195 if (FIB_RES_DEV(res) == dev)
196#endif
197 {
198 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
199 fib_res_put(&res);
200 return ret;
201 }
202 fib_res_put(&res);
203 if (no_addr)
204 goto last_resort;
205 if (rpf)
206 goto e_inval;
207 fl.oif = dev->ifindex;
208
209 ret = 0;
210 if (fib_lookup(&fl, &res) == 0) {
211 if (res.type == RTN_UNICAST) {
212 *spec_dst = FIB_RES_PREFSRC(res);
213 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
214 }
215 fib_res_put(&res);
216 }
217 return ret;
218
219last_resort:
220 if (rpf)
221 goto e_inval;
222 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
223 *itag = 0;
224 return 0;
225
226e_inval_res:
227 fib_res_put(&res);
228e_inval:
229 return -EINVAL;
230}
231
232#ifndef CONFIG_IP_NOSIOCRT
233
234/*
235 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
236 */
237
238int ip_rt_ioctl(unsigned int cmd, void __user *arg)
239{
240 int err;
241 struct kern_rta rta;
242 struct rtentry r;
243 struct {
244 struct nlmsghdr nlh;
245 struct rtmsg rtm;
246 } req;
247
248 switch (cmd) {
249 case SIOCADDRT: /* Add a route */
250 case SIOCDELRT: /* Delete a route */
251 if (!capable(CAP_NET_ADMIN))
252 return -EPERM;
253 if (copy_from_user(&r, arg, sizeof(struct rtentry)))
254 return -EFAULT;
255 rtnl_lock();
256 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
257 if (err == 0) {
258 if (cmd == SIOCDELRT) {
259 struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
260 err = -ESRCH;
261 if (tb)
262 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
263 } else {
264 struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
265 err = -ENOBUFS;
266 if (tb)
267 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
268 }
a51482bd 269 kfree(rta.rta_mx);
1da177e4
LT
270 }
271 rtnl_unlock();
272 return err;
273 }
274 return -EINVAL;
275}
276
277#else
278
279int ip_rt_ioctl(unsigned int cmd, void *arg)
280{
281 return -EINVAL;
282}
283
284#endif
285
286static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
287{
288 int i;
289
290 for (i=1; i<=RTA_MAX; i++) {
291 struct rtattr *attr = rta[i-1];
292 if (attr) {
293 if (RTA_PAYLOAD(attr) < 4)
294 return -EINVAL;
295 if (i != RTA_MULTIPATH && i != RTA_METRICS)
296 rta[i-1] = (struct rtattr*)RTA_DATA(attr);
297 }
298 }
299 return 0;
300}
301
302int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
303{
304 struct fib_table * tb;
305 struct rtattr **rta = arg;
306 struct rtmsg *r = NLMSG_DATA(nlh);
307
308 if (inet_check_attr(r, rta))
309 return -EINVAL;
310
311 tb = fib_get_table(r->rtm_table);
312 if (tb)
313 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
314 return -ESRCH;
315}
316
317int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
318{
319 struct fib_table * tb;
320 struct rtattr **rta = arg;
321 struct rtmsg *r = NLMSG_DATA(nlh);
322
323 if (inet_check_attr(r, rta))
324 return -EINVAL;
325
326 tb = fib_new_table(r->rtm_table);
327 if (tb)
328 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
329 return -ENOBUFS;
330}
331
332int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
333{
334 int t;
335 int s_t;
336 struct fib_table *tb;
337
338 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
339 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
340 return ip_rt_dump(skb, cb);
341
342 s_t = cb->args[0];
343 if (s_t == 0)
344 s_t = cb->args[0] = RT_TABLE_MIN;
345
346 for (t=s_t; t<=RT_TABLE_MAX; t++) {
347 if (t < s_t) continue;
348 if (t > s_t)
349 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
350 if ((tb = fib_get_table(t))==NULL)
351 continue;
352 if (tb->tb_dump(tb, skb, cb) < 0)
353 break;
354 }
355
356 cb->args[0] = t;
357
358 return skb->len;
359}
360
361/* Prepare and feed intra-kernel routing request.
362 Really, it should be netlink message, but :-( netlink
363 can be not configured, so that we feed it directly
364 to fib engine. It is legal, because all events occur
365 only when netlink is already locked.
366 */
367
368static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
369{
370 struct fib_table * tb;
371 struct {
372 struct nlmsghdr nlh;
373 struct rtmsg rtm;
374 } req;
375 struct kern_rta rta;
376
377 memset(&req.rtm, 0, sizeof(req.rtm));
378 memset(&rta, 0, sizeof(rta));
379
380 if (type == RTN_UNICAST)
381 tb = fib_new_table(RT_TABLE_MAIN);
382 else
383 tb = fib_new_table(RT_TABLE_LOCAL);
384
385 if (tb == NULL)
386 return;
387
388 req.nlh.nlmsg_len = sizeof(req);
389 req.nlh.nlmsg_type = cmd;
390 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
391 req.nlh.nlmsg_pid = 0;
392 req.nlh.nlmsg_seq = 0;
393
394 req.rtm.rtm_dst_len = dst_len;
395 req.rtm.rtm_table = tb->tb_id;
396 req.rtm.rtm_protocol = RTPROT_KERNEL;
397 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
398 req.rtm.rtm_type = type;
399
400 rta.rta_dst = &dst;
401 rta.rta_prefsrc = &ifa->ifa_local;
402 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
403
404 if (cmd == RTM_NEWROUTE)
405 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
406 else
407 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
408}
409
0ff60a45 410void fib_add_ifaddr(struct in_ifaddr *ifa)
1da177e4
LT
411{
412 struct in_device *in_dev = ifa->ifa_dev;
413 struct net_device *dev = in_dev->dev;
414 struct in_ifaddr *prim = ifa;
415 u32 mask = ifa->ifa_mask;
416 u32 addr = ifa->ifa_local;
417 u32 prefix = ifa->ifa_address&mask;
418
419 if (ifa->ifa_flags&IFA_F_SECONDARY) {
420 prim = inet_ifa_byprefix(in_dev, prefix, mask);
421 if (prim == NULL) {
422 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
423 return;
424 }
425 }
426
427 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
428
429 if (!(dev->flags&IFF_UP))
430 return;
431
432 /* Add broadcast address, if it is explicitly assigned. */
433 if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
434 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
435
436 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
437 (prefix != addr || ifa->ifa_prefixlen < 32)) {
438 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
439 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
440
441 /* Add network specific broadcasts, when it takes a sense */
442 if (ifa->ifa_prefixlen < 31) {
443 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
444 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
445 }
446 }
447}
448
449static void fib_del_ifaddr(struct in_ifaddr *ifa)
450{
451 struct in_device *in_dev = ifa->ifa_dev;
452 struct net_device *dev = in_dev->dev;
453 struct in_ifaddr *ifa1;
454 struct in_ifaddr *prim = ifa;
455 u32 brd = ifa->ifa_address|~ifa->ifa_mask;
456 u32 any = ifa->ifa_address&ifa->ifa_mask;
457#define LOCAL_OK 1
458#define BRD_OK 2
459#define BRD0_OK 4
460#define BRD1_OK 8
461 unsigned ok = 0;
462
463 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
464 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
465 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
466 else {
467 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
468 if (prim == NULL) {
469 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
470 return;
471 }
472 }
473
474 /* Deletion is more complicated than add.
475 We should take care of not to delete too much :-)
476
477 Scan address list to be sure that addresses are really gone.
478 */
479
480 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
481 if (ifa->ifa_local == ifa1->ifa_local)
482 ok |= LOCAL_OK;
483 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
484 ok |= BRD_OK;
485 if (brd == ifa1->ifa_broadcast)
486 ok |= BRD1_OK;
487 if (any == ifa1->ifa_broadcast)
488 ok |= BRD0_OK;
489 }
490
491 if (!(ok&BRD_OK))
492 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
493 if (!(ok&BRD1_OK))
494 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
495 if (!(ok&BRD0_OK))
496 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
497 if (!(ok&LOCAL_OK)) {
498 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
499
500 /* Check, that this local address finally disappeared. */
501 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
502 /* And the last, but not the least thing.
503 We must flush stray FIB entries.
504
505 First of all, we scan fib_info list searching
506 for stray nexthop entries, then ignite fib_flush.
507 */
508 if (fib_sync_down(ifa->ifa_local, NULL, 0))
509 fib_flush();
510 }
511 }
512#undef LOCAL_OK
513#undef BRD_OK
514#undef BRD0_OK
515#undef BRD1_OK
516}
517
246955fe
RO
518static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
519{
520
521 struct fib_result res;
522 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
523 .fwmark = frn->fl_fwmark,
524 .tos = frn->fl_tos,
525 .scope = frn->fl_scope } } };
526 if (tb) {
527 local_bh_disable();
528
529 frn->tb_id = tb->tb_id;
530 frn->err = tb->tb_lookup(tb, &fl, &res);
531
532 if (!frn->err) {
533 frn->prefixlen = res.prefixlen;
534 frn->nh_sel = res.nh_sel;
535 frn->type = res.type;
536 frn->scope = res.scope;
537 }
538 local_bh_enable();
539 }
540}
541
542static void nl_fib_input(struct sock *sk, int len)
543{
544 struct sk_buff *skb = NULL;
545 struct nlmsghdr *nlh = NULL;
546 struct fib_result_nl *frn;
246955fe
RO
547 u32 pid;
548 struct fib_table *tb;
549
ea86575e 550 skb = skb_dequeue(&sk->sk_receive_queue);
246955fe 551 nlh = (struct nlmsghdr *)skb->data;
ea86575e
TG
552 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
553 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
554 kfree_skb(skb);
555 return;
556 }
246955fe
RO
557
558 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
559 tb = fib_get_table(frn->tb_id_in);
560
561 nl_fib_lookup(frn, tb);
562
563 pid = nlh->nlmsg_pid; /*pid of sending process */
246955fe
RO
564 NETLINK_CB(skb).pid = 0; /* from kernel */
565 NETLINK_CB(skb).dst_pid = pid;
ac6d439d 566 NETLINK_CB(skb).dst_group = 0; /* unicast */
246955fe
RO
567 netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
568}
569
570static void nl_fib_lookup_init(void)
571{
06628607 572 netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
246955fe
RO
573}
574
1da177e4
LT
575static void fib_disable_ip(struct net_device *dev, int force)
576{
577 if (fib_sync_down(0, dev, force))
578 fib_flush();
579 rt_cache_flush(0);
580 arp_ifdown(dev);
581}
582
583static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
584{
585 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
586
587 switch (event) {
588 case NETDEV_UP:
589 fib_add_ifaddr(ifa);
590#ifdef CONFIG_IP_ROUTE_MULTIPATH
591 fib_sync_up(ifa->ifa_dev->dev);
592#endif
593 rt_cache_flush(-1);
594 break;
595 case NETDEV_DOWN:
596 fib_del_ifaddr(ifa);
9fcc2e8a 597 if (ifa->ifa_dev->ifa_list == NULL) {
1da177e4
LT
598 /* Last address was deleted from this interface.
599 Disable IP.
600 */
601 fib_disable_ip(ifa->ifa_dev->dev, 1);
602 } else {
603 rt_cache_flush(-1);
604 }
605 break;
606 }
607 return NOTIFY_DONE;
608}
609
610static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
611{
612 struct net_device *dev = ptr;
e5ed6399 613 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1da177e4
LT
614
615 if (event == NETDEV_UNREGISTER) {
616 fib_disable_ip(dev, 2);
617 return NOTIFY_DONE;
618 }
619
620 if (!in_dev)
621 return NOTIFY_DONE;
622
623 switch (event) {
624 case NETDEV_UP:
625 for_ifa(in_dev) {
626 fib_add_ifaddr(ifa);
627 } endfor_ifa(in_dev);
628#ifdef CONFIG_IP_ROUTE_MULTIPATH
629 fib_sync_up(dev);
630#endif
631 rt_cache_flush(-1);
632 break;
633 case NETDEV_DOWN:
634 fib_disable_ip(dev, 0);
635 break;
636 case NETDEV_CHANGEMTU:
637 case NETDEV_CHANGE:
638 rt_cache_flush(0);
639 break;
640 }
641 return NOTIFY_DONE;
642}
643
644static struct notifier_block fib_inetaddr_notifier = {
645 .notifier_call =fib_inetaddr_event,
646};
647
648static struct notifier_block fib_netdev_notifier = {
649 .notifier_call =fib_netdev_event,
650};
651
652void __init ip_fib_init(void)
653{
654#ifndef CONFIG_IP_MULTIPLE_TABLES
655 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
656 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
657#else
658 fib_rules_init();
659#endif
660
661 register_netdevice_notifier(&fib_netdev_notifier);
662 register_inetaddr_notifier(&fib_inetaddr_notifier);
246955fe 663 nl_fib_lookup_init();
1da177e4
LT
664}
665
666EXPORT_SYMBOL(inet_addr_type);
1da177e4 667EXPORT_SYMBOL(ip_rt_ioctl);