]> bbs.cooldavid.org Git - net-next-2.6.git/blame_incremental - net/ipv4/devinet.c
ipv4: AF_INET link address family
[net-next-2.6.git] / net / ipv4 / devinet.c
... / ...
CommitLineData
1/*
2 * NET3 IP device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17 *
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
26 */
27
28
29#include <asm/uaccess.h>
30#include <asm/system.h>
31#include <linux/bitops.h>
32#include <linux/capability.h>
33#include <linux/module.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/string.h>
37#include <linux/mm.h>
38#include <linux/socket.h>
39#include <linux/sockios.h>
40#include <linux/in.h>
41#include <linux/errno.h>
42#include <linux/interrupt.h>
43#include <linux/if_addr.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/skbuff.h>
49#include <linux/init.h>
50#include <linux/notifier.h>
51#include <linux/inetdevice.h>
52#include <linux/igmp.h>
53#include <linux/slab.h>
54#ifdef CONFIG_SYSCTL
55#include <linux/sysctl.h>
56#endif
57#include <linux/kmod.h>
58
59#include <net/arp.h>
60#include <net/ip.h>
61#include <net/route.h>
62#include <net/ip_fib.h>
63#include <net/rtnetlink.h>
64#include <net/net_namespace.h>
65
66static struct ipv4_devconf ipv4_devconf = {
67 .data = {
68 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72 },
73};
74
75static struct ipv4_devconf ipv4_devconf_dflt = {
76 .data = {
77 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82 },
83};
84
85#define IPV4_DEVCONF_DFLT(net, attr) \
86 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87
88static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89 [IFA_LOCAL] = { .type = NLA_U32 },
90 [IFA_ADDRESS] = { .type = NLA_U32 },
91 [IFA_BROADCAST] = { .type = NLA_U32 },
92 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93};
94
95static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 int destroy);
100#ifdef CONFIG_SYSCTL
101static void devinet_sysctl_register(struct in_device *idev);
102static void devinet_sysctl_unregister(struct in_device *idev);
103#else
104static inline void devinet_sysctl_register(struct in_device *idev)
105{
106}
107static inline void devinet_sysctl_unregister(struct in_device *idev)
108{
109}
110#endif
111
112/* Locks all the inet devices. */
113
114static struct in_ifaddr *inet_alloc_ifa(void)
115{
116 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117}
118
119static void inet_rcu_free_ifa(struct rcu_head *head)
120{
121 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 if (ifa->ifa_dev)
123 in_dev_put(ifa->ifa_dev);
124 kfree(ifa);
125}
126
127static inline void inet_free_ifa(struct in_ifaddr *ifa)
128{
129 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130}
131
132void in_dev_finish_destroy(struct in_device *idev)
133{
134 struct net_device *dev = idev->dev;
135
136 WARN_ON(idev->ifa_list);
137 WARN_ON(idev->mc_list);
138#ifdef NET_REFCNT_DEBUG
139 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 idev, dev ? dev->name : "NIL");
141#endif
142 dev_put(dev);
143 if (!idev->dead)
144 pr_err("Freeing alive in_device %p\n", idev);
145 else
146 kfree(idev);
147}
148EXPORT_SYMBOL(in_dev_finish_destroy);
149
150static struct in_device *inetdev_init(struct net_device *dev)
151{
152 struct in_device *in_dev;
153
154 ASSERT_RTNL();
155
156 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157 if (!in_dev)
158 goto out;
159 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160 sizeof(in_dev->cnf));
161 in_dev->cnf.sysctl = NULL;
162 in_dev->dev = dev;
163 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164 if (!in_dev->arp_parms)
165 goto out_kfree;
166 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167 dev_disable_lro(dev);
168 /* Reference in_dev->dev */
169 dev_hold(dev);
170 /* Account for reference dev->ip_ptr (below) */
171 in_dev_hold(in_dev);
172
173 devinet_sysctl_register(in_dev);
174 ip_mc_init_dev(in_dev);
175 if (dev->flags & IFF_UP)
176 ip_mc_up(in_dev);
177
178 /* we can receive as soon as ip_ptr is set -- do this last */
179 rcu_assign_pointer(dev->ip_ptr, in_dev);
180out:
181 return in_dev;
182out_kfree:
183 kfree(in_dev);
184 in_dev = NULL;
185 goto out;
186}
187
188static void in_dev_rcu_put(struct rcu_head *head)
189{
190 struct in_device *idev = container_of(head, struct in_device, rcu_head);
191 in_dev_put(idev);
192}
193
194static void inetdev_destroy(struct in_device *in_dev)
195{
196 struct in_ifaddr *ifa;
197 struct net_device *dev;
198
199 ASSERT_RTNL();
200
201 dev = in_dev->dev;
202
203 in_dev->dead = 1;
204
205 ip_mc_destroy_dev(in_dev);
206
207 while ((ifa = in_dev->ifa_list) != NULL) {
208 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209 inet_free_ifa(ifa);
210 }
211
212 rcu_assign_pointer(dev->ip_ptr, NULL);
213
214 devinet_sysctl_unregister(in_dev);
215 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216 arp_ifdown(dev);
217
218 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219}
220
221int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222{
223 rcu_read_lock();
224 for_primary_ifa(in_dev) {
225 if (inet_ifa_match(a, ifa)) {
226 if (!b || inet_ifa_match(b, ifa)) {
227 rcu_read_unlock();
228 return 1;
229 }
230 }
231 } endfor_ifa(in_dev);
232 rcu_read_unlock();
233 return 0;
234}
235
236static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237 int destroy, struct nlmsghdr *nlh, u32 pid)
238{
239 struct in_ifaddr *promote = NULL;
240 struct in_ifaddr *ifa, *ifa1 = *ifap;
241 struct in_ifaddr *last_prim = in_dev->ifa_list;
242 struct in_ifaddr *prev_prom = NULL;
243 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244
245 ASSERT_RTNL();
246
247 /* 1. Deleting primary ifaddr forces deletion all secondaries
248 * unless alias promotion is set
249 **/
250
251 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253
254 while ((ifa = *ifap1) != NULL) {
255 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256 ifa1->ifa_scope <= ifa->ifa_scope)
257 last_prim = ifa;
258
259 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260 ifa1->ifa_mask != ifa->ifa_mask ||
261 !inet_ifa_match(ifa1->ifa_address, ifa)) {
262 ifap1 = &ifa->ifa_next;
263 prev_prom = ifa;
264 continue;
265 }
266
267 if (!do_promote) {
268 *ifap1 = ifa->ifa_next;
269
270 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271 blocking_notifier_call_chain(&inetaddr_chain,
272 NETDEV_DOWN, ifa);
273 inet_free_ifa(ifa);
274 } else {
275 promote = ifa;
276 break;
277 }
278 }
279 }
280
281 /* 2. Unlink it */
282
283 *ifap = ifa1->ifa_next;
284
285 /* 3. Announce address deletion */
286
287 /* Send message first, then call notifier.
288 At first sight, FIB update triggered by notifier
289 will refer to already deleted ifaddr, that could confuse
290 netlink listeners. It is not true: look, gated sees
291 that route deleted and if it still thinks that ifaddr
292 is valid, it will try to restore deleted routes... Grr.
293 So that, this order is correct.
294 */
295 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297
298 if (promote) {
299
300 if (prev_prom) {
301 prev_prom->ifa_next = promote->ifa_next;
302 promote->ifa_next = last_prim->ifa_next;
303 last_prim->ifa_next = promote;
304 }
305
306 promote->ifa_flags &= ~IFA_F_SECONDARY;
307 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308 blocking_notifier_call_chain(&inetaddr_chain,
309 NETDEV_UP, promote);
310 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311 if (ifa1->ifa_mask != ifa->ifa_mask ||
312 !inet_ifa_match(ifa1->ifa_address, ifa))
313 continue;
314 fib_add_ifaddr(ifa);
315 }
316
317 }
318 if (destroy)
319 inet_free_ifa(ifa1);
320}
321
322static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323 int destroy)
324{
325 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326}
327
328static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329 u32 pid)
330{
331 struct in_device *in_dev = ifa->ifa_dev;
332 struct in_ifaddr *ifa1, **ifap, **last_primary;
333
334 ASSERT_RTNL();
335
336 if (!ifa->ifa_local) {
337 inet_free_ifa(ifa);
338 return 0;
339 }
340
341 ifa->ifa_flags &= ~IFA_F_SECONDARY;
342 last_primary = &in_dev->ifa_list;
343
344 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345 ifap = &ifa1->ifa_next) {
346 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347 ifa->ifa_scope <= ifa1->ifa_scope)
348 last_primary = &ifa1->ifa_next;
349 if (ifa1->ifa_mask == ifa->ifa_mask &&
350 inet_ifa_match(ifa1->ifa_address, ifa)) {
351 if (ifa1->ifa_local == ifa->ifa_local) {
352 inet_free_ifa(ifa);
353 return -EEXIST;
354 }
355 if (ifa1->ifa_scope != ifa->ifa_scope) {
356 inet_free_ifa(ifa);
357 return -EINVAL;
358 }
359 ifa->ifa_flags |= IFA_F_SECONDARY;
360 }
361 }
362
363 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364 net_srandom(ifa->ifa_local);
365 ifap = last_primary;
366 }
367
368 ifa->ifa_next = *ifap;
369 *ifap = ifa;
370
371 /* Send message first, then call notifier.
372 Notifier will trigger FIB update, so that
373 listeners of netlink will know about new ifaddr */
374 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376
377 return 0;
378}
379
380static int inet_insert_ifa(struct in_ifaddr *ifa)
381{
382 return __inet_insert_ifa(ifa, NULL, 0);
383}
384
385static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386{
387 struct in_device *in_dev = __in_dev_get_rtnl(dev);
388
389 ASSERT_RTNL();
390
391 if (!in_dev) {
392 inet_free_ifa(ifa);
393 return -ENOBUFS;
394 }
395 ipv4_devconf_setall(in_dev);
396 if (ifa->ifa_dev != in_dev) {
397 WARN_ON(ifa->ifa_dev);
398 in_dev_hold(in_dev);
399 ifa->ifa_dev = in_dev;
400 }
401 if (ipv4_is_loopback(ifa->ifa_local))
402 ifa->ifa_scope = RT_SCOPE_HOST;
403 return inet_insert_ifa(ifa);
404}
405
406/* Caller must hold RCU or RTNL :
407 * We dont take a reference on found in_device
408 */
409struct in_device *inetdev_by_index(struct net *net, int ifindex)
410{
411 struct net_device *dev;
412 struct in_device *in_dev = NULL;
413
414 rcu_read_lock();
415 dev = dev_get_by_index_rcu(net, ifindex);
416 if (dev)
417 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
418 rcu_read_unlock();
419 return in_dev;
420}
421EXPORT_SYMBOL(inetdev_by_index);
422
423/* Called only from RTNL semaphored context. No locks. */
424
425struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426 __be32 mask)
427{
428 ASSERT_RTNL();
429
430 for_primary_ifa(in_dev) {
431 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432 return ifa;
433 } endfor_ifa(in_dev);
434 return NULL;
435}
436
437static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438{
439 struct net *net = sock_net(skb->sk);
440 struct nlattr *tb[IFA_MAX+1];
441 struct in_device *in_dev;
442 struct ifaddrmsg *ifm;
443 struct in_ifaddr *ifa, **ifap;
444 int err = -EINVAL;
445
446 ASSERT_RTNL();
447
448 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449 if (err < 0)
450 goto errout;
451
452 ifm = nlmsg_data(nlh);
453 in_dev = inetdev_by_index(net, ifm->ifa_index);
454 if (in_dev == NULL) {
455 err = -ENODEV;
456 goto errout;
457 }
458
459 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460 ifap = &ifa->ifa_next) {
461 if (tb[IFA_LOCAL] &&
462 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463 continue;
464
465 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466 continue;
467
468 if (tb[IFA_ADDRESS] &&
469 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471 continue;
472
473 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
474 return 0;
475 }
476
477 err = -EADDRNOTAVAIL;
478errout:
479 return err;
480}
481
482static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483{
484 struct nlattr *tb[IFA_MAX+1];
485 struct in_ifaddr *ifa;
486 struct ifaddrmsg *ifm;
487 struct net_device *dev;
488 struct in_device *in_dev;
489 int err;
490
491 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
492 if (err < 0)
493 goto errout;
494
495 ifm = nlmsg_data(nlh);
496 err = -EINVAL;
497 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498 goto errout;
499
500 dev = __dev_get_by_index(net, ifm->ifa_index);
501 err = -ENODEV;
502 if (dev == NULL)
503 goto errout;
504
505 in_dev = __in_dev_get_rtnl(dev);
506 err = -ENOBUFS;
507 if (in_dev == NULL)
508 goto errout;
509
510 ifa = inet_alloc_ifa();
511 if (ifa == NULL)
512 /*
513 * A potential indev allocation can be left alive, it stays
514 * assigned to its device and is destroy with it.
515 */
516 goto errout;
517
518 ipv4_devconf_setall(in_dev);
519 in_dev_hold(in_dev);
520
521 if (tb[IFA_ADDRESS] == NULL)
522 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523
524 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526 ifa->ifa_flags = ifm->ifa_flags;
527 ifa->ifa_scope = ifm->ifa_scope;
528 ifa->ifa_dev = in_dev;
529
530 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532
533 if (tb[IFA_BROADCAST])
534 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535
536 if (tb[IFA_LABEL])
537 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538 else
539 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
540
541 return ifa;
542
543errout:
544 return ERR_PTR(err);
545}
546
547static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548{
549 struct net *net = sock_net(skb->sk);
550 struct in_ifaddr *ifa;
551
552 ASSERT_RTNL();
553
554 ifa = rtm_to_ifaddr(net, nlh);
555 if (IS_ERR(ifa))
556 return PTR_ERR(ifa);
557
558 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
559}
560
561/*
562 * Determine a default network mask, based on the IP address.
563 */
564
565static inline int inet_abc_len(__be32 addr)
566{
567 int rc = -1; /* Something else, probably a multicast. */
568
569 if (ipv4_is_zeronet(addr))
570 rc = 0;
571 else {
572 __u32 haddr = ntohl(addr);
573
574 if (IN_CLASSA(haddr))
575 rc = 8;
576 else if (IN_CLASSB(haddr))
577 rc = 16;
578 else if (IN_CLASSC(haddr))
579 rc = 24;
580 }
581
582 return rc;
583}
584
585
586int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587{
588 struct ifreq ifr;
589 struct sockaddr_in sin_orig;
590 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591 struct in_device *in_dev;
592 struct in_ifaddr **ifap = NULL;
593 struct in_ifaddr *ifa = NULL;
594 struct net_device *dev;
595 char *colon;
596 int ret = -EFAULT;
597 int tryaddrmatch = 0;
598
599 /*
600 * Fetch the caller's info block into kernel space
601 */
602
603 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604 goto out;
605 ifr.ifr_name[IFNAMSIZ - 1] = 0;
606
607 /* save original address for comparison */
608 memcpy(&sin_orig, sin, sizeof(*sin));
609
610 colon = strchr(ifr.ifr_name, ':');
611 if (colon)
612 *colon = 0;
613
614 dev_load(net, ifr.ifr_name);
615
616 switch (cmd) {
617 case SIOCGIFADDR: /* Get interface address */
618 case SIOCGIFBRDADDR: /* Get the broadcast address */
619 case SIOCGIFDSTADDR: /* Get the destination address */
620 case SIOCGIFNETMASK: /* Get the netmask for the interface */
621 /* Note that these ioctls will not sleep,
622 so that we do not impose a lock.
623 One day we will be forced to put shlock here (I mean SMP)
624 */
625 tryaddrmatch = (sin_orig.sin_family == AF_INET);
626 memset(sin, 0, sizeof(*sin));
627 sin->sin_family = AF_INET;
628 break;
629
630 case SIOCSIFFLAGS:
631 ret = -EACCES;
632 if (!capable(CAP_NET_ADMIN))
633 goto out;
634 break;
635 case SIOCSIFADDR: /* Set interface address (and family) */
636 case SIOCSIFBRDADDR: /* Set the broadcast address */
637 case SIOCSIFDSTADDR: /* Set the destination address */
638 case SIOCSIFNETMASK: /* Set the netmask for the interface */
639 ret = -EACCES;
640 if (!capable(CAP_NET_ADMIN))
641 goto out;
642 ret = -EINVAL;
643 if (sin->sin_family != AF_INET)
644 goto out;
645 break;
646 default:
647 ret = -EINVAL;
648 goto out;
649 }
650
651 rtnl_lock();
652
653 ret = -ENODEV;
654 dev = __dev_get_by_name(net, ifr.ifr_name);
655 if (!dev)
656 goto done;
657
658 if (colon)
659 *colon = ':';
660
661 in_dev = __in_dev_get_rtnl(dev);
662 if (in_dev) {
663 if (tryaddrmatch) {
664 /* Matthias Andree */
665 /* compare label and address (4.4BSD style) */
666 /* note: we only do this for a limited set of ioctls
667 and only if the original address family was AF_INET.
668 This is checked above. */
669 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670 ifap = &ifa->ifa_next) {
671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 sin_orig.sin_addr.s_addr ==
673 ifa->ifa_address) {
674 break; /* found */
675 }
676 }
677 }
678 /* we didn't get a match, maybe the application is
679 4.3BSD-style and passed in junk so we fall back to
680 comparing just the label */
681 if (!ifa) {
682 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683 ifap = &ifa->ifa_next)
684 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685 break;
686 }
687 }
688
689 ret = -EADDRNOTAVAIL;
690 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691 goto done;
692
693 switch (cmd) {
694 case SIOCGIFADDR: /* Get interface address */
695 sin->sin_addr.s_addr = ifa->ifa_local;
696 goto rarok;
697
698 case SIOCGIFBRDADDR: /* Get the broadcast address */
699 sin->sin_addr.s_addr = ifa->ifa_broadcast;
700 goto rarok;
701
702 case SIOCGIFDSTADDR: /* Get the destination address */
703 sin->sin_addr.s_addr = ifa->ifa_address;
704 goto rarok;
705
706 case SIOCGIFNETMASK: /* Get the netmask for the interface */
707 sin->sin_addr.s_addr = ifa->ifa_mask;
708 goto rarok;
709
710 case SIOCSIFFLAGS:
711 if (colon) {
712 ret = -EADDRNOTAVAIL;
713 if (!ifa)
714 break;
715 ret = 0;
716 if (!(ifr.ifr_flags & IFF_UP))
717 inet_del_ifa(in_dev, ifap, 1);
718 break;
719 }
720 ret = dev_change_flags(dev, ifr.ifr_flags);
721 break;
722
723 case SIOCSIFADDR: /* Set interface address (and family) */
724 ret = -EINVAL;
725 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726 break;
727
728 if (!ifa) {
729 ret = -ENOBUFS;
730 ifa = inet_alloc_ifa();
731 if (!ifa)
732 break;
733 if (colon)
734 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
735 else
736 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
737 } else {
738 ret = 0;
739 if (ifa->ifa_local == sin->sin_addr.s_addr)
740 break;
741 inet_del_ifa(in_dev, ifap, 0);
742 ifa->ifa_broadcast = 0;
743 ifa->ifa_scope = 0;
744 }
745
746 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
747
748 if (!(dev->flags & IFF_POINTOPOINT)) {
749 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
750 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
751 if ((dev->flags & IFF_BROADCAST) &&
752 ifa->ifa_prefixlen < 31)
753 ifa->ifa_broadcast = ifa->ifa_address |
754 ~ifa->ifa_mask;
755 } else {
756 ifa->ifa_prefixlen = 32;
757 ifa->ifa_mask = inet_make_mask(32);
758 }
759 ret = inet_set_ifa(dev, ifa);
760 break;
761
762 case SIOCSIFBRDADDR: /* Set the broadcast address */
763 ret = 0;
764 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
765 inet_del_ifa(in_dev, ifap, 0);
766 ifa->ifa_broadcast = sin->sin_addr.s_addr;
767 inet_insert_ifa(ifa);
768 }
769 break;
770
771 case SIOCSIFDSTADDR: /* Set the destination address */
772 ret = 0;
773 if (ifa->ifa_address == sin->sin_addr.s_addr)
774 break;
775 ret = -EINVAL;
776 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
777 break;
778 ret = 0;
779 inet_del_ifa(in_dev, ifap, 0);
780 ifa->ifa_address = sin->sin_addr.s_addr;
781 inet_insert_ifa(ifa);
782 break;
783
784 case SIOCSIFNETMASK: /* Set the netmask for the interface */
785
786 /*
787 * The mask we set must be legal.
788 */
789 ret = -EINVAL;
790 if (bad_mask(sin->sin_addr.s_addr, 0))
791 break;
792 ret = 0;
793 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
794 __be32 old_mask = ifa->ifa_mask;
795 inet_del_ifa(in_dev, ifap, 0);
796 ifa->ifa_mask = sin->sin_addr.s_addr;
797 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
798
799 /* See if current broadcast address matches
800 * with current netmask, then recalculate
801 * the broadcast address. Otherwise it's a
802 * funny address, so don't touch it since
803 * the user seems to know what (s)he's doing...
804 */
805 if ((dev->flags & IFF_BROADCAST) &&
806 (ifa->ifa_prefixlen < 31) &&
807 (ifa->ifa_broadcast ==
808 (ifa->ifa_local|~old_mask))) {
809 ifa->ifa_broadcast = (ifa->ifa_local |
810 ~sin->sin_addr.s_addr);
811 }
812 inet_insert_ifa(ifa);
813 }
814 break;
815 }
816done:
817 rtnl_unlock();
818out:
819 return ret;
820rarok:
821 rtnl_unlock();
822 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
823 goto out;
824}
825
826static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
827{
828 struct in_device *in_dev = __in_dev_get_rtnl(dev);
829 struct in_ifaddr *ifa;
830 struct ifreq ifr;
831 int done = 0;
832
833 if (!in_dev)
834 goto out;
835
836 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
837 if (!buf) {
838 done += sizeof(ifr);
839 continue;
840 }
841 if (len < (int) sizeof(ifr))
842 break;
843 memset(&ifr, 0, sizeof(struct ifreq));
844 if (ifa->ifa_label)
845 strcpy(ifr.ifr_name, ifa->ifa_label);
846 else
847 strcpy(ifr.ifr_name, dev->name);
848
849 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
850 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
851 ifa->ifa_local;
852
853 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
854 done = -EFAULT;
855 break;
856 }
857 buf += sizeof(struct ifreq);
858 len -= sizeof(struct ifreq);
859 done += sizeof(struct ifreq);
860 }
861out:
862 return done;
863}
864
865__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
866{
867 __be32 addr = 0;
868 struct in_device *in_dev;
869 struct net *net = dev_net(dev);
870
871 rcu_read_lock();
872 in_dev = __in_dev_get_rcu(dev);
873 if (!in_dev)
874 goto no_in_dev;
875
876 for_primary_ifa(in_dev) {
877 if (ifa->ifa_scope > scope)
878 continue;
879 if (!dst || inet_ifa_match(dst, ifa)) {
880 addr = ifa->ifa_local;
881 break;
882 }
883 if (!addr)
884 addr = ifa->ifa_local;
885 } endfor_ifa(in_dev);
886
887 if (addr)
888 goto out_unlock;
889no_in_dev:
890
891 /* Not loopback addresses on loopback should be preferred
892 in this case. It is importnat that lo is the first interface
893 in dev_base list.
894 */
895 for_each_netdev_rcu(net, dev) {
896 in_dev = __in_dev_get_rcu(dev);
897 if (!in_dev)
898 continue;
899
900 for_primary_ifa(in_dev) {
901 if (ifa->ifa_scope != RT_SCOPE_LINK &&
902 ifa->ifa_scope <= scope) {
903 addr = ifa->ifa_local;
904 goto out_unlock;
905 }
906 } endfor_ifa(in_dev);
907 }
908out_unlock:
909 rcu_read_unlock();
910 return addr;
911}
912EXPORT_SYMBOL(inet_select_addr);
913
914static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
915 __be32 local, int scope)
916{
917 int same = 0;
918 __be32 addr = 0;
919
920 for_ifa(in_dev) {
921 if (!addr &&
922 (local == ifa->ifa_local || !local) &&
923 ifa->ifa_scope <= scope) {
924 addr = ifa->ifa_local;
925 if (same)
926 break;
927 }
928 if (!same) {
929 same = (!local || inet_ifa_match(local, ifa)) &&
930 (!dst || inet_ifa_match(dst, ifa));
931 if (same && addr) {
932 if (local || !dst)
933 break;
934 /* Is the selected addr into dst subnet? */
935 if (inet_ifa_match(addr, ifa))
936 break;
937 /* No, then can we use new local src? */
938 if (ifa->ifa_scope <= scope) {
939 addr = ifa->ifa_local;
940 break;
941 }
942 /* search for large dst subnet for addr */
943 same = 0;
944 }
945 }
946 } endfor_ifa(in_dev);
947
948 return same ? addr : 0;
949}
950
951/*
952 * Confirm that local IP address exists using wildcards:
953 * - in_dev: only on this interface, 0=any interface
954 * - dst: only in the same subnet as dst, 0=any dst
955 * - local: address, 0=autoselect the local address
956 * - scope: maximum allowed scope value for the local address
957 */
958__be32 inet_confirm_addr(struct in_device *in_dev,
959 __be32 dst, __be32 local, int scope)
960{
961 __be32 addr = 0;
962 struct net_device *dev;
963 struct net *net;
964
965 if (scope != RT_SCOPE_LINK)
966 return confirm_addr_indev(in_dev, dst, local, scope);
967
968 net = dev_net(in_dev->dev);
969 rcu_read_lock();
970 for_each_netdev_rcu(net, dev) {
971 in_dev = __in_dev_get_rcu(dev);
972 if (in_dev) {
973 addr = confirm_addr_indev(in_dev, dst, local, scope);
974 if (addr)
975 break;
976 }
977 }
978 rcu_read_unlock();
979
980 return addr;
981}
982
983/*
984 * Device notifier
985 */
986
987int register_inetaddr_notifier(struct notifier_block *nb)
988{
989 return blocking_notifier_chain_register(&inetaddr_chain, nb);
990}
991EXPORT_SYMBOL(register_inetaddr_notifier);
992
993int unregister_inetaddr_notifier(struct notifier_block *nb)
994{
995 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
996}
997EXPORT_SYMBOL(unregister_inetaddr_notifier);
998
999/* Rename ifa_labels for a device name change. Make some effort to preserve
1000 * existing alias numbering and to create unique labels if possible.
1001*/
1002static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1003{
1004 struct in_ifaddr *ifa;
1005 int named = 0;
1006
1007 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1008 char old[IFNAMSIZ], *dot;
1009
1010 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1011 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1012 if (named++ == 0)
1013 goto skip;
1014 dot = strchr(old, ':');
1015 if (dot == NULL) {
1016 sprintf(old, ":%d", named);
1017 dot = old;
1018 }
1019 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1020 strcat(ifa->ifa_label, dot);
1021 else
1022 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1023skip:
1024 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1025 }
1026}
1027
1028static inline bool inetdev_valid_mtu(unsigned mtu)
1029{
1030 return mtu >= 68;
1031}
1032
1033/* Called only under RTNL semaphore */
1034
1035static int inetdev_event(struct notifier_block *this, unsigned long event,
1036 void *ptr)
1037{
1038 struct net_device *dev = ptr;
1039 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1040
1041 ASSERT_RTNL();
1042
1043 if (!in_dev) {
1044 if (event == NETDEV_REGISTER) {
1045 in_dev = inetdev_init(dev);
1046 if (!in_dev)
1047 return notifier_from_errno(-ENOMEM);
1048 if (dev->flags & IFF_LOOPBACK) {
1049 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1050 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1051 }
1052 } else if (event == NETDEV_CHANGEMTU) {
1053 /* Re-enabling IP */
1054 if (inetdev_valid_mtu(dev->mtu))
1055 in_dev = inetdev_init(dev);
1056 }
1057 goto out;
1058 }
1059
1060 switch (event) {
1061 case NETDEV_REGISTER:
1062 printk(KERN_DEBUG "inetdev_event: bug\n");
1063 rcu_assign_pointer(dev->ip_ptr, NULL);
1064 break;
1065 case NETDEV_UP:
1066 if (!inetdev_valid_mtu(dev->mtu))
1067 break;
1068 if (dev->flags & IFF_LOOPBACK) {
1069 struct in_ifaddr *ifa = inet_alloc_ifa();
1070
1071 if (ifa) {
1072 ifa->ifa_local =
1073 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1074 ifa->ifa_prefixlen = 8;
1075 ifa->ifa_mask = inet_make_mask(8);
1076 in_dev_hold(in_dev);
1077 ifa->ifa_dev = in_dev;
1078 ifa->ifa_scope = RT_SCOPE_HOST;
1079 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1080 inet_insert_ifa(ifa);
1081 }
1082 }
1083 ip_mc_up(in_dev);
1084 /* fall through */
1085 case NETDEV_NOTIFY_PEERS:
1086 case NETDEV_CHANGEADDR:
1087 /* Send gratuitous ARP to notify of link change */
1088 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1089 struct in_ifaddr *ifa = in_dev->ifa_list;
1090
1091 if (ifa)
1092 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1093 ifa->ifa_address, dev,
1094 ifa->ifa_address, NULL,
1095 dev->dev_addr, NULL);
1096 }
1097 break;
1098 case NETDEV_DOWN:
1099 ip_mc_down(in_dev);
1100 break;
1101 case NETDEV_PRE_TYPE_CHANGE:
1102 ip_mc_unmap(in_dev);
1103 break;
1104 case NETDEV_POST_TYPE_CHANGE:
1105 ip_mc_remap(in_dev);
1106 break;
1107 case NETDEV_CHANGEMTU:
1108 if (inetdev_valid_mtu(dev->mtu))
1109 break;
1110 /* disable IP when MTU is not enough */
1111 case NETDEV_UNREGISTER:
1112 inetdev_destroy(in_dev);
1113 break;
1114 case NETDEV_CHANGENAME:
1115 /* Do not notify about label change, this event is
1116 * not interesting to applications using netlink.
1117 */
1118 inetdev_changename(dev, in_dev);
1119
1120 devinet_sysctl_unregister(in_dev);
1121 devinet_sysctl_register(in_dev);
1122 break;
1123 }
1124out:
1125 return NOTIFY_DONE;
1126}
1127
1128static struct notifier_block ip_netdev_notifier = {
1129 .notifier_call = inetdev_event,
1130};
1131
1132static inline size_t inet_nlmsg_size(void)
1133{
1134 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1135 + nla_total_size(4) /* IFA_ADDRESS */
1136 + nla_total_size(4) /* IFA_LOCAL */
1137 + nla_total_size(4) /* IFA_BROADCAST */
1138 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1139}
1140
1141static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1142 u32 pid, u32 seq, int event, unsigned int flags)
1143{
1144 struct ifaddrmsg *ifm;
1145 struct nlmsghdr *nlh;
1146
1147 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1148 if (nlh == NULL)
1149 return -EMSGSIZE;
1150
1151 ifm = nlmsg_data(nlh);
1152 ifm->ifa_family = AF_INET;
1153 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1154 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1155 ifm->ifa_scope = ifa->ifa_scope;
1156 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1157
1158 if (ifa->ifa_address)
1159 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1160
1161 if (ifa->ifa_local)
1162 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1163
1164 if (ifa->ifa_broadcast)
1165 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1166
1167 if (ifa->ifa_label[0])
1168 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1169
1170 return nlmsg_end(skb, nlh);
1171
1172nla_put_failure:
1173 nlmsg_cancel(skb, nlh);
1174 return -EMSGSIZE;
1175}
1176
1177static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1178{
1179 struct net *net = sock_net(skb->sk);
1180 int h, s_h;
1181 int idx, s_idx;
1182 int ip_idx, s_ip_idx;
1183 struct net_device *dev;
1184 struct in_device *in_dev;
1185 struct in_ifaddr *ifa;
1186 struct hlist_head *head;
1187 struct hlist_node *node;
1188
1189 s_h = cb->args[0];
1190 s_idx = idx = cb->args[1];
1191 s_ip_idx = ip_idx = cb->args[2];
1192
1193 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1194 idx = 0;
1195 head = &net->dev_index_head[h];
1196 rcu_read_lock();
1197 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1198 if (idx < s_idx)
1199 goto cont;
1200 if (h > s_h || idx > s_idx)
1201 s_ip_idx = 0;
1202 in_dev = __in_dev_get_rcu(dev);
1203 if (!in_dev)
1204 goto cont;
1205
1206 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1207 ifa = ifa->ifa_next, ip_idx++) {
1208 if (ip_idx < s_ip_idx)
1209 continue;
1210 if (inet_fill_ifaddr(skb, ifa,
1211 NETLINK_CB(cb->skb).pid,
1212 cb->nlh->nlmsg_seq,
1213 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1214 rcu_read_unlock();
1215 goto done;
1216 }
1217 }
1218cont:
1219 idx++;
1220 }
1221 rcu_read_unlock();
1222 }
1223
1224done:
1225 cb->args[0] = h;
1226 cb->args[1] = idx;
1227 cb->args[2] = ip_idx;
1228
1229 return skb->len;
1230}
1231
1232static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1233 u32 pid)
1234{
1235 struct sk_buff *skb;
1236 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1237 int err = -ENOBUFS;
1238 struct net *net;
1239
1240 net = dev_net(ifa->ifa_dev->dev);
1241 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1242 if (skb == NULL)
1243 goto errout;
1244
1245 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1246 if (err < 0) {
1247 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1248 WARN_ON(err == -EMSGSIZE);
1249 kfree_skb(skb);
1250 goto errout;
1251 }
1252 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1253 return;
1254errout:
1255 if (err < 0)
1256 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1257}
1258
1259static size_t inet_get_link_af_size(const struct net_device *dev)
1260{
1261 struct in_device *in_dev = __in_dev_get_rcu(dev);
1262
1263 if (!in_dev)
1264 return 0;
1265
1266 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1267}
1268
1269static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1270{
1271 struct in_device *in_dev = __in_dev_get_rcu(dev);
1272 struct nlattr *nla;
1273 int i;
1274
1275 if (!in_dev)
1276 return -ENODATA;
1277
1278 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1279 if (nla == NULL)
1280 return -EMSGSIZE;
1281
1282 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1283 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1284
1285 return 0;
1286}
1287
1288static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1289 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1290};
1291
1292static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla)
1293{
1294 struct in_device *in_dev = __in_dev_get_rcu(dev);
1295 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1296 int err, rem;
1297
1298 if (!in_dev)
1299 return -EOPNOTSUPP;
1300
1301 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1302 if (err < 0)
1303 return err;
1304
1305 if (tb[IFLA_INET_CONF]) {
1306 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1307 int cfgid = nla_type(a);
1308
1309 if (nla_len(a) < 4)
1310 return -EINVAL;
1311
1312 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1313 return -EINVAL;
1314 }
1315 }
1316
1317 if (tb[IFLA_INET_CONF]) {
1318 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1319 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1320 }
1321
1322 return 0;
1323}
1324
1325#ifdef CONFIG_SYSCTL
1326
1327static void devinet_copy_dflt_conf(struct net *net, int i)
1328{
1329 struct net_device *dev;
1330
1331 rcu_read_lock();
1332 for_each_netdev_rcu(net, dev) {
1333 struct in_device *in_dev;
1334
1335 in_dev = __in_dev_get_rcu(dev);
1336 if (in_dev && !test_bit(i, in_dev->cnf.state))
1337 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1338 }
1339 rcu_read_unlock();
1340}
1341
1342/* called with RTNL locked */
1343static void inet_forward_change(struct net *net)
1344{
1345 struct net_device *dev;
1346 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1347
1348 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1349 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1350
1351 for_each_netdev(net, dev) {
1352 struct in_device *in_dev;
1353 if (on)
1354 dev_disable_lro(dev);
1355 rcu_read_lock();
1356 in_dev = __in_dev_get_rcu(dev);
1357 if (in_dev)
1358 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1359 rcu_read_unlock();
1360 }
1361}
1362
1363static int devinet_conf_proc(ctl_table *ctl, int write,
1364 void __user *buffer,
1365 size_t *lenp, loff_t *ppos)
1366{
1367 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1368
1369 if (write) {
1370 struct ipv4_devconf *cnf = ctl->extra1;
1371 struct net *net = ctl->extra2;
1372 int i = (int *)ctl->data - cnf->data;
1373
1374 set_bit(i, cnf->state);
1375
1376 if (cnf == net->ipv4.devconf_dflt)
1377 devinet_copy_dflt_conf(net, i);
1378 }
1379
1380 return ret;
1381}
1382
1383static int devinet_sysctl_forward(ctl_table *ctl, int write,
1384 void __user *buffer,
1385 size_t *lenp, loff_t *ppos)
1386{
1387 int *valp = ctl->data;
1388 int val = *valp;
1389 loff_t pos = *ppos;
1390 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1391
1392 if (write && *valp != val) {
1393 struct net *net = ctl->extra2;
1394
1395 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1396 if (!rtnl_trylock()) {
1397 /* Restore the original values before restarting */
1398 *valp = val;
1399 *ppos = pos;
1400 return restart_syscall();
1401 }
1402 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1403 inet_forward_change(net);
1404 } else if (*valp) {
1405 struct ipv4_devconf *cnf = ctl->extra1;
1406 struct in_device *idev =
1407 container_of(cnf, struct in_device, cnf);
1408 dev_disable_lro(idev->dev);
1409 }
1410 rtnl_unlock();
1411 rt_cache_flush(net, 0);
1412 }
1413 }
1414
1415 return ret;
1416}
1417
1418int ipv4_doint_and_flush(ctl_table *ctl, int write,
1419 void __user *buffer,
1420 size_t *lenp, loff_t *ppos)
1421{
1422 int *valp = ctl->data;
1423 int val = *valp;
1424 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1425 struct net *net = ctl->extra2;
1426
1427 if (write && *valp != val)
1428 rt_cache_flush(net, 0);
1429
1430 return ret;
1431}
1432
1433#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1434 { \
1435 .procname = name, \
1436 .data = ipv4_devconf.data + \
1437 IPV4_DEVCONF_ ## attr - 1, \
1438 .maxlen = sizeof(int), \
1439 .mode = mval, \
1440 .proc_handler = proc, \
1441 .extra1 = &ipv4_devconf, \
1442 }
1443
1444#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1445 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1446
1447#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1448 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1449
1450#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1451 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1452
1453#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1454 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1455
1456static struct devinet_sysctl_table {
1457 struct ctl_table_header *sysctl_header;
1458 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1459 char *dev_name;
1460} devinet_sysctl = {
1461 .devinet_vars = {
1462 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1463 devinet_sysctl_forward),
1464 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1465
1466 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1467 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1468 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1469 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1470 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1471 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1472 "accept_source_route"),
1473 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1474 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1475 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1476 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1477 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1478 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1479 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1480 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1481 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1482 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1483 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1484 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1485 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1486
1487 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1488 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1489 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1490 "force_igmp_version"),
1491 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1492 "promote_secondaries"),
1493 },
1494};
1495
1496static int __devinet_sysctl_register(struct net *net, char *dev_name,
1497 struct ipv4_devconf *p)
1498{
1499 int i;
1500 struct devinet_sysctl_table *t;
1501
1502#define DEVINET_CTL_PATH_DEV 3
1503
1504 struct ctl_path devinet_ctl_path[] = {
1505 { .procname = "net", },
1506 { .procname = "ipv4", },
1507 { .procname = "conf", },
1508 { /* to be set */ },
1509 { },
1510 };
1511
1512 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1513 if (!t)
1514 goto out;
1515
1516 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1517 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1518 t->devinet_vars[i].extra1 = p;
1519 t->devinet_vars[i].extra2 = net;
1520 }
1521
1522 /*
1523 * Make a copy of dev_name, because '.procname' is regarded as const
1524 * by sysctl and we wouldn't want anyone to change it under our feet
1525 * (see SIOCSIFNAME).
1526 */
1527 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1528 if (!t->dev_name)
1529 goto free;
1530
1531 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1532
1533 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1534 t->devinet_vars);
1535 if (!t->sysctl_header)
1536 goto free_procname;
1537
1538 p->sysctl = t;
1539 return 0;
1540
1541free_procname:
1542 kfree(t->dev_name);
1543free:
1544 kfree(t);
1545out:
1546 return -ENOBUFS;
1547}
1548
1549static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1550{
1551 struct devinet_sysctl_table *t = cnf->sysctl;
1552
1553 if (t == NULL)
1554 return;
1555
1556 cnf->sysctl = NULL;
1557 unregister_sysctl_table(t->sysctl_header);
1558 kfree(t->dev_name);
1559 kfree(t);
1560}
1561
1562static void devinet_sysctl_register(struct in_device *idev)
1563{
1564 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1565 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1566 &idev->cnf);
1567}
1568
1569static void devinet_sysctl_unregister(struct in_device *idev)
1570{
1571 __devinet_sysctl_unregister(&idev->cnf);
1572 neigh_sysctl_unregister(idev->arp_parms);
1573}
1574
1575static struct ctl_table ctl_forward_entry[] = {
1576 {
1577 .procname = "ip_forward",
1578 .data = &ipv4_devconf.data[
1579 IPV4_DEVCONF_FORWARDING - 1],
1580 .maxlen = sizeof(int),
1581 .mode = 0644,
1582 .proc_handler = devinet_sysctl_forward,
1583 .extra1 = &ipv4_devconf,
1584 .extra2 = &init_net,
1585 },
1586 { },
1587};
1588
1589static __net_initdata struct ctl_path net_ipv4_path[] = {
1590 { .procname = "net", },
1591 { .procname = "ipv4", },
1592 { },
1593};
1594#endif
1595
1596static __net_init int devinet_init_net(struct net *net)
1597{
1598 int err;
1599 struct ipv4_devconf *all, *dflt;
1600#ifdef CONFIG_SYSCTL
1601 struct ctl_table *tbl = ctl_forward_entry;
1602 struct ctl_table_header *forw_hdr;
1603#endif
1604
1605 err = -ENOMEM;
1606 all = &ipv4_devconf;
1607 dflt = &ipv4_devconf_dflt;
1608
1609 if (!net_eq(net, &init_net)) {
1610 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1611 if (all == NULL)
1612 goto err_alloc_all;
1613
1614 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1615 if (dflt == NULL)
1616 goto err_alloc_dflt;
1617
1618#ifdef CONFIG_SYSCTL
1619 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1620 if (tbl == NULL)
1621 goto err_alloc_ctl;
1622
1623 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1624 tbl[0].extra1 = all;
1625 tbl[0].extra2 = net;
1626#endif
1627 }
1628
1629#ifdef CONFIG_SYSCTL
1630 err = __devinet_sysctl_register(net, "all", all);
1631 if (err < 0)
1632 goto err_reg_all;
1633
1634 err = __devinet_sysctl_register(net, "default", dflt);
1635 if (err < 0)
1636 goto err_reg_dflt;
1637
1638 err = -ENOMEM;
1639 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1640 if (forw_hdr == NULL)
1641 goto err_reg_ctl;
1642 net->ipv4.forw_hdr = forw_hdr;
1643#endif
1644
1645 net->ipv4.devconf_all = all;
1646 net->ipv4.devconf_dflt = dflt;
1647 return 0;
1648
1649#ifdef CONFIG_SYSCTL
1650err_reg_ctl:
1651 __devinet_sysctl_unregister(dflt);
1652err_reg_dflt:
1653 __devinet_sysctl_unregister(all);
1654err_reg_all:
1655 if (tbl != ctl_forward_entry)
1656 kfree(tbl);
1657err_alloc_ctl:
1658#endif
1659 if (dflt != &ipv4_devconf_dflt)
1660 kfree(dflt);
1661err_alloc_dflt:
1662 if (all != &ipv4_devconf)
1663 kfree(all);
1664err_alloc_all:
1665 return err;
1666}
1667
1668static __net_exit void devinet_exit_net(struct net *net)
1669{
1670#ifdef CONFIG_SYSCTL
1671 struct ctl_table *tbl;
1672
1673 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1674 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1675 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1676 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1677 kfree(tbl);
1678#endif
1679 kfree(net->ipv4.devconf_dflt);
1680 kfree(net->ipv4.devconf_all);
1681}
1682
1683static __net_initdata struct pernet_operations devinet_ops = {
1684 .init = devinet_init_net,
1685 .exit = devinet_exit_net,
1686};
1687
1688static struct rtnl_af_ops inet_af_ops = {
1689 .family = AF_INET,
1690 .fill_link_af = inet_fill_link_af,
1691 .get_link_af_size = inet_get_link_af_size,
1692 .parse_link_af = inet_parse_link_af,
1693};
1694
1695void __init devinet_init(void)
1696{
1697 register_pernet_subsys(&devinet_ops);
1698
1699 register_gifconf(PF_INET, inet_gifconf);
1700 register_netdevice_notifier(&ip_netdev_notifier);
1701
1702 rtnl_af_register(&inet_af_ops);
1703
1704 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1705 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1706 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1707}
1708