]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/devinet.c
em_meta: avoid one dev_put()
[net-next-2.6.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64
65 static struct ipv4_devconf ipv4_devconf = {
66         .data = {
67                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71         },
72 };
73
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75         .data = {
76                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81         },
82 };
83
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98                          int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110
111 /* Locks all the inet devices. */
112
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
116 }
117
118 static void inet_rcu_free_ifa(struct rcu_head *head)
119 {
120         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
121         if (ifa->ifa_dev)
122                 in_dev_put(ifa->ifa_dev);
123         kfree(ifa);
124 }
125
126 static inline void inet_free_ifa(struct in_ifaddr *ifa)
127 {
128         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
129 }
130
131 void in_dev_finish_destroy(struct in_device *idev)
132 {
133         struct net_device *dev = idev->dev;
134
135         WARN_ON(idev->ifa_list);
136         WARN_ON(idev->mc_list);
137 #ifdef NET_REFCNT_DEBUG
138         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
139                idev, dev ? dev->name : "NIL");
140 #endif
141         dev_put(dev);
142         if (!idev->dead)
143                 printk("Freeing alive in_device %p\n", idev);
144         else {
145                 kfree(idev);
146         }
147 }
148
149 static struct in_device *inetdev_init(struct net_device *dev)
150 {
151         struct in_device *in_dev;
152
153         ASSERT_RTNL();
154
155         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
156         if (!in_dev)
157                 goto out;
158         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
159                         sizeof(in_dev->cnf));
160         in_dev->cnf.sysctl = NULL;
161         in_dev->dev = dev;
162         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
163                 goto out_kfree;
164         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
165                 dev_disable_lro(dev);
166         /* Reference in_dev->dev */
167         dev_hold(dev);
168         /* Account for reference dev->ip_ptr (below) */
169         in_dev_hold(in_dev);
170
171         devinet_sysctl_register(in_dev);
172         ip_mc_init_dev(in_dev);
173         if (dev->flags & IFF_UP)
174                 ip_mc_up(in_dev);
175
176         /* we can receive as soon as ip_ptr is set -- do this last */
177         rcu_assign_pointer(dev->ip_ptr, in_dev);
178 out:
179         return in_dev;
180 out_kfree:
181         kfree(in_dev);
182         in_dev = NULL;
183         goto out;
184 }
185
186 static void in_dev_rcu_put(struct rcu_head *head)
187 {
188         struct in_device *idev = container_of(head, struct in_device, rcu_head);
189         in_dev_put(idev);
190 }
191
192 static void inetdev_destroy(struct in_device *in_dev)
193 {
194         struct in_ifaddr *ifa;
195         struct net_device *dev;
196
197         ASSERT_RTNL();
198
199         dev = in_dev->dev;
200
201         in_dev->dead = 1;
202
203         ip_mc_destroy_dev(in_dev);
204
205         while ((ifa = in_dev->ifa_list) != NULL) {
206                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
207                 inet_free_ifa(ifa);
208         }
209
210         dev->ip_ptr = NULL;
211
212         devinet_sysctl_unregister(in_dev);
213         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
214         arp_ifdown(dev);
215
216         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
217 }
218
219 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
220 {
221         rcu_read_lock();
222         for_primary_ifa(in_dev) {
223                 if (inet_ifa_match(a, ifa)) {
224                         if (!b || inet_ifa_match(b, ifa)) {
225                                 rcu_read_unlock();
226                                 return 1;
227                         }
228                 }
229         } endfor_ifa(in_dev);
230         rcu_read_unlock();
231         return 0;
232 }
233
234 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
235                          int destroy, struct nlmsghdr *nlh, u32 pid)
236 {
237         struct in_ifaddr *promote = NULL;
238         struct in_ifaddr *ifa, *ifa1 = *ifap;
239         struct in_ifaddr *last_prim = in_dev->ifa_list;
240         struct in_ifaddr *prev_prom = NULL;
241         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
242
243         ASSERT_RTNL();
244
245         /* 1. Deleting primary ifaddr forces deletion all secondaries
246          * unless alias promotion is set
247          **/
248
249         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
250                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
251
252                 while ((ifa = *ifap1) != NULL) {
253                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
254                             ifa1->ifa_scope <= ifa->ifa_scope)
255                                 last_prim = ifa;
256
257                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
258                             ifa1->ifa_mask != ifa->ifa_mask ||
259                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
260                                 ifap1 = &ifa->ifa_next;
261                                 prev_prom = ifa;
262                                 continue;
263                         }
264
265                         if (!do_promote) {
266                                 *ifap1 = ifa->ifa_next;
267
268                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
269                                 blocking_notifier_call_chain(&inetaddr_chain,
270                                                 NETDEV_DOWN, ifa);
271                                 inet_free_ifa(ifa);
272                         } else {
273                                 promote = ifa;
274                                 break;
275                         }
276                 }
277         }
278
279         /* 2. Unlink it */
280
281         *ifap = ifa1->ifa_next;
282
283         /* 3. Announce address deletion */
284
285         /* Send message first, then call notifier.
286            At first sight, FIB update triggered by notifier
287            will refer to already deleted ifaddr, that could confuse
288            netlink listeners. It is not true: look, gated sees
289            that route deleted and if it still thinks that ifaddr
290            is valid, it will try to restore deleted routes... Grr.
291            So that, this order is correct.
292          */
293         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
294         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
295
296         if (promote) {
297
298                 if (prev_prom) {
299                         prev_prom->ifa_next = promote->ifa_next;
300                         promote->ifa_next = last_prim->ifa_next;
301                         last_prim->ifa_next = promote;
302                 }
303
304                 promote->ifa_flags &= ~IFA_F_SECONDARY;
305                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
306                 blocking_notifier_call_chain(&inetaddr_chain,
307                                 NETDEV_UP, promote);
308                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
309                         if (ifa1->ifa_mask != ifa->ifa_mask ||
310                             !inet_ifa_match(ifa1->ifa_address, ifa))
311                                         continue;
312                         fib_add_ifaddr(ifa);
313                 }
314
315         }
316         if (destroy)
317                 inet_free_ifa(ifa1);
318 }
319
320 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321                          int destroy)
322 {
323         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
324 }
325
326 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
327                              u32 pid)
328 {
329         struct in_device *in_dev = ifa->ifa_dev;
330         struct in_ifaddr *ifa1, **ifap, **last_primary;
331
332         ASSERT_RTNL();
333
334         if (!ifa->ifa_local) {
335                 inet_free_ifa(ifa);
336                 return 0;
337         }
338
339         ifa->ifa_flags &= ~IFA_F_SECONDARY;
340         last_primary = &in_dev->ifa_list;
341
342         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
343              ifap = &ifa1->ifa_next) {
344                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
345                     ifa->ifa_scope <= ifa1->ifa_scope)
346                         last_primary = &ifa1->ifa_next;
347                 if (ifa1->ifa_mask == ifa->ifa_mask &&
348                     inet_ifa_match(ifa1->ifa_address, ifa)) {
349                         if (ifa1->ifa_local == ifa->ifa_local) {
350                                 inet_free_ifa(ifa);
351                                 return -EEXIST;
352                         }
353                         if (ifa1->ifa_scope != ifa->ifa_scope) {
354                                 inet_free_ifa(ifa);
355                                 return -EINVAL;
356                         }
357                         ifa->ifa_flags |= IFA_F_SECONDARY;
358                 }
359         }
360
361         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
362                 net_srandom(ifa->ifa_local);
363                 ifap = last_primary;
364         }
365
366         ifa->ifa_next = *ifap;
367         *ifap = ifa;
368
369         /* Send message first, then call notifier.
370            Notifier will trigger FIB update, so that
371            listeners of netlink will know about new ifaddr */
372         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
373         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
374
375         return 0;
376 }
377
378 static int inet_insert_ifa(struct in_ifaddr *ifa)
379 {
380         return __inet_insert_ifa(ifa, NULL, 0);
381 }
382
383 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
384 {
385         struct in_device *in_dev = __in_dev_get_rtnl(dev);
386
387         ASSERT_RTNL();
388
389         if (!in_dev) {
390                 inet_free_ifa(ifa);
391                 return -ENOBUFS;
392         }
393         ipv4_devconf_setall(in_dev);
394         if (ifa->ifa_dev != in_dev) {
395                 WARN_ON(ifa->ifa_dev);
396                 in_dev_hold(in_dev);
397                 ifa->ifa_dev = in_dev;
398         }
399         if (ipv4_is_loopback(ifa->ifa_local))
400                 ifa->ifa_scope = RT_SCOPE_HOST;
401         return inet_insert_ifa(ifa);
402 }
403
404 struct in_device *inetdev_by_index(struct net *net, int ifindex)
405 {
406         struct net_device *dev;
407         struct in_device *in_dev = NULL;
408
409         rcu_read_lock();
410         dev = dev_get_by_index_rcu(net, ifindex);
411         if (dev)
412                 in_dev = in_dev_get(dev);
413         rcu_read_unlock();
414         return in_dev;
415 }
416
417 /* Called only from RTNL semaphored context. No locks. */
418
419 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
420                                     __be32 mask)
421 {
422         ASSERT_RTNL();
423
424         for_primary_ifa(in_dev) {
425                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
426                         return ifa;
427         } endfor_ifa(in_dev);
428         return NULL;
429 }
430
431 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
432 {
433         struct net *net = sock_net(skb->sk);
434         struct nlattr *tb[IFA_MAX+1];
435         struct in_device *in_dev;
436         struct ifaddrmsg *ifm;
437         struct in_ifaddr *ifa, **ifap;
438         int err = -EINVAL;
439
440         ASSERT_RTNL();
441
442         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
443         if (err < 0)
444                 goto errout;
445
446         ifm = nlmsg_data(nlh);
447         in_dev = inetdev_by_index(net, ifm->ifa_index);
448         if (in_dev == NULL) {
449                 err = -ENODEV;
450                 goto errout;
451         }
452
453         __in_dev_put(in_dev);
454
455         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
456              ifap = &ifa->ifa_next) {
457                 if (tb[IFA_LOCAL] &&
458                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
459                         continue;
460
461                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
462                         continue;
463
464                 if (tb[IFA_ADDRESS] &&
465                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
466                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
467                         continue;
468
469                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
470                 return 0;
471         }
472
473         err = -EADDRNOTAVAIL;
474 errout:
475         return err;
476 }
477
478 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
479 {
480         struct nlattr *tb[IFA_MAX+1];
481         struct in_ifaddr *ifa;
482         struct ifaddrmsg *ifm;
483         struct net_device *dev;
484         struct in_device *in_dev;
485         int err;
486
487         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
488         if (err < 0)
489                 goto errout;
490
491         ifm = nlmsg_data(nlh);
492         err = -EINVAL;
493         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
494                 goto errout;
495
496         dev = __dev_get_by_index(net, ifm->ifa_index);
497         err = -ENODEV;
498         if (dev == NULL)
499                 goto errout;
500
501         in_dev = __in_dev_get_rtnl(dev);
502         err = -ENOBUFS;
503         if (in_dev == NULL)
504                 goto errout;
505
506         ifa = inet_alloc_ifa();
507         if (ifa == NULL)
508                 /*
509                  * A potential indev allocation can be left alive, it stays
510                  * assigned to its device and is destroy with it.
511                  */
512                 goto errout;
513
514         ipv4_devconf_setall(in_dev);
515         in_dev_hold(in_dev);
516
517         if (tb[IFA_ADDRESS] == NULL)
518                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
519
520         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
521         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
522         ifa->ifa_flags = ifm->ifa_flags;
523         ifa->ifa_scope = ifm->ifa_scope;
524         ifa->ifa_dev = in_dev;
525
526         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
527         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
528
529         if (tb[IFA_BROADCAST])
530                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
531
532         if (tb[IFA_LABEL])
533                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
534         else
535                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
536
537         return ifa;
538
539 errout:
540         return ERR_PTR(err);
541 }
542
543 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
544 {
545         struct net *net = sock_net(skb->sk);
546         struct in_ifaddr *ifa;
547
548         ASSERT_RTNL();
549
550         ifa = rtm_to_ifaddr(net, nlh);
551         if (IS_ERR(ifa))
552                 return PTR_ERR(ifa);
553
554         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
555 }
556
557 /*
558  *      Determine a default network mask, based on the IP address.
559  */
560
561 static __inline__ int inet_abc_len(__be32 addr)
562 {
563         int rc = -1;    /* Something else, probably a multicast. */
564
565         if (ipv4_is_zeronet(addr))
566                 rc = 0;
567         else {
568                 __u32 haddr = ntohl(addr);
569
570                 if (IN_CLASSA(haddr))
571                         rc = 8;
572                 else if (IN_CLASSB(haddr))
573                         rc = 16;
574                 else if (IN_CLASSC(haddr))
575                         rc = 24;
576         }
577
578         return rc;
579 }
580
581
582 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
583 {
584         struct ifreq ifr;
585         struct sockaddr_in sin_orig;
586         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
587         struct in_device *in_dev;
588         struct in_ifaddr **ifap = NULL;
589         struct in_ifaddr *ifa = NULL;
590         struct net_device *dev;
591         char *colon;
592         int ret = -EFAULT;
593         int tryaddrmatch = 0;
594
595         /*
596          *      Fetch the caller's info block into kernel space
597          */
598
599         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
600                 goto out;
601         ifr.ifr_name[IFNAMSIZ - 1] = 0;
602
603         /* save original address for comparison */
604         memcpy(&sin_orig, sin, sizeof(*sin));
605
606         colon = strchr(ifr.ifr_name, ':');
607         if (colon)
608                 *colon = 0;
609
610         dev_load(net, ifr.ifr_name);
611
612         switch (cmd) {
613         case SIOCGIFADDR:       /* Get interface address */
614         case SIOCGIFBRDADDR:    /* Get the broadcast address */
615         case SIOCGIFDSTADDR:    /* Get the destination address */
616         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
617                 /* Note that these ioctls will not sleep,
618                    so that we do not impose a lock.
619                    One day we will be forced to put shlock here (I mean SMP)
620                  */
621                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
622                 memset(sin, 0, sizeof(*sin));
623                 sin->sin_family = AF_INET;
624                 break;
625
626         case SIOCSIFFLAGS:
627                 ret = -EACCES;
628                 if (!capable(CAP_NET_ADMIN))
629                         goto out;
630                 break;
631         case SIOCSIFADDR:       /* Set interface address (and family) */
632         case SIOCSIFBRDADDR:    /* Set the broadcast address */
633         case SIOCSIFDSTADDR:    /* Set the destination address */
634         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
635                 ret = -EACCES;
636                 if (!capable(CAP_NET_ADMIN))
637                         goto out;
638                 ret = -EINVAL;
639                 if (sin->sin_family != AF_INET)
640                         goto out;
641                 break;
642         default:
643                 ret = -EINVAL;
644                 goto out;
645         }
646
647         rtnl_lock();
648
649         ret = -ENODEV;
650         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
651                 goto done;
652
653         if (colon)
654                 *colon = ':';
655
656         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
657                 if (tryaddrmatch) {
658                         /* Matthias Andree */
659                         /* compare label and address (4.4BSD style) */
660                         /* note: we only do this for a limited set of ioctls
661                            and only if the original address family was AF_INET.
662                            This is checked above. */
663                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
664                              ifap = &ifa->ifa_next) {
665                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
666                                     sin_orig.sin_addr.s_addr ==
667                                                         ifa->ifa_address) {
668                                         break; /* found */
669                                 }
670                         }
671                 }
672                 /* we didn't get a match, maybe the application is
673                    4.3BSD-style and passed in junk so we fall back to
674                    comparing just the label */
675                 if (!ifa) {
676                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
677                              ifap = &ifa->ifa_next)
678                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
679                                         break;
680                 }
681         }
682
683         ret = -EADDRNOTAVAIL;
684         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
685                 goto done;
686
687         switch (cmd) {
688         case SIOCGIFADDR:       /* Get interface address */
689                 sin->sin_addr.s_addr = ifa->ifa_local;
690                 goto rarok;
691
692         case SIOCGIFBRDADDR:    /* Get the broadcast address */
693                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
694                 goto rarok;
695
696         case SIOCGIFDSTADDR:    /* Get the destination address */
697                 sin->sin_addr.s_addr = ifa->ifa_address;
698                 goto rarok;
699
700         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
701                 sin->sin_addr.s_addr = ifa->ifa_mask;
702                 goto rarok;
703
704         case SIOCSIFFLAGS:
705                 if (colon) {
706                         ret = -EADDRNOTAVAIL;
707                         if (!ifa)
708                                 break;
709                         ret = 0;
710                         if (!(ifr.ifr_flags & IFF_UP))
711                                 inet_del_ifa(in_dev, ifap, 1);
712                         break;
713                 }
714                 ret = dev_change_flags(dev, ifr.ifr_flags);
715                 break;
716
717         case SIOCSIFADDR:       /* Set interface address (and family) */
718                 ret = -EINVAL;
719                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
720                         break;
721
722                 if (!ifa) {
723                         ret = -ENOBUFS;
724                         if ((ifa = inet_alloc_ifa()) == NULL)
725                                 break;
726                         if (colon)
727                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
728                         else
729                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
730                 } else {
731                         ret = 0;
732                         if (ifa->ifa_local == sin->sin_addr.s_addr)
733                                 break;
734                         inet_del_ifa(in_dev, ifap, 0);
735                         ifa->ifa_broadcast = 0;
736                         ifa->ifa_scope = 0;
737                 }
738
739                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
740
741                 if (!(dev->flags & IFF_POINTOPOINT)) {
742                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
743                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
744                         if ((dev->flags & IFF_BROADCAST) &&
745                             ifa->ifa_prefixlen < 31)
746                                 ifa->ifa_broadcast = ifa->ifa_address |
747                                                      ~ifa->ifa_mask;
748                 } else {
749                         ifa->ifa_prefixlen = 32;
750                         ifa->ifa_mask = inet_make_mask(32);
751                 }
752                 ret = inet_set_ifa(dev, ifa);
753                 break;
754
755         case SIOCSIFBRDADDR:    /* Set the broadcast address */
756                 ret = 0;
757                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
758                         inet_del_ifa(in_dev, ifap, 0);
759                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
760                         inet_insert_ifa(ifa);
761                 }
762                 break;
763
764         case SIOCSIFDSTADDR:    /* Set the destination address */
765                 ret = 0;
766                 if (ifa->ifa_address == sin->sin_addr.s_addr)
767                         break;
768                 ret = -EINVAL;
769                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
770                         break;
771                 ret = 0;
772                 inet_del_ifa(in_dev, ifap, 0);
773                 ifa->ifa_address = sin->sin_addr.s_addr;
774                 inet_insert_ifa(ifa);
775                 break;
776
777         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
778
779                 /*
780                  *      The mask we set must be legal.
781                  */
782                 ret = -EINVAL;
783                 if (bad_mask(sin->sin_addr.s_addr, 0))
784                         break;
785                 ret = 0;
786                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
787                         __be32 old_mask = ifa->ifa_mask;
788                         inet_del_ifa(in_dev, ifap, 0);
789                         ifa->ifa_mask = sin->sin_addr.s_addr;
790                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
791
792                         /* See if current broadcast address matches
793                          * with current netmask, then recalculate
794                          * the broadcast address. Otherwise it's a
795                          * funny address, so don't touch it since
796                          * the user seems to know what (s)he's doing...
797                          */
798                         if ((dev->flags & IFF_BROADCAST) &&
799                             (ifa->ifa_prefixlen < 31) &&
800                             (ifa->ifa_broadcast ==
801                              (ifa->ifa_local|~old_mask))) {
802                                 ifa->ifa_broadcast = (ifa->ifa_local |
803                                                       ~sin->sin_addr.s_addr);
804                         }
805                         inet_insert_ifa(ifa);
806                 }
807                 break;
808         }
809 done:
810         rtnl_unlock();
811 out:
812         return ret;
813 rarok:
814         rtnl_unlock();
815         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
816         goto out;
817 }
818
819 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
820 {
821         struct in_device *in_dev = __in_dev_get_rtnl(dev);
822         struct in_ifaddr *ifa;
823         struct ifreq ifr;
824         int done = 0;
825
826         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
827                 goto out;
828
829         for (; ifa; ifa = ifa->ifa_next) {
830                 if (!buf) {
831                         done += sizeof(ifr);
832                         continue;
833                 }
834                 if (len < (int) sizeof(ifr))
835                         break;
836                 memset(&ifr, 0, sizeof(struct ifreq));
837                 if (ifa->ifa_label)
838                         strcpy(ifr.ifr_name, ifa->ifa_label);
839                 else
840                         strcpy(ifr.ifr_name, dev->name);
841
842                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
843                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
844                                                                 ifa->ifa_local;
845
846                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
847                         done = -EFAULT;
848                         break;
849                 }
850                 buf  += sizeof(struct ifreq);
851                 len  -= sizeof(struct ifreq);
852                 done += sizeof(struct ifreq);
853         }
854 out:
855         return done;
856 }
857
858 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
859 {
860         __be32 addr = 0;
861         struct in_device *in_dev;
862         struct net *net = dev_net(dev);
863
864         rcu_read_lock();
865         in_dev = __in_dev_get_rcu(dev);
866         if (!in_dev)
867                 goto no_in_dev;
868
869         for_primary_ifa(in_dev) {
870                 if (ifa->ifa_scope > scope)
871                         continue;
872                 if (!dst || inet_ifa_match(dst, ifa)) {
873                         addr = ifa->ifa_local;
874                         break;
875                 }
876                 if (!addr)
877                         addr = ifa->ifa_local;
878         } endfor_ifa(in_dev);
879 no_in_dev:
880         rcu_read_unlock();
881
882         if (addr)
883                 goto out;
884
885         /* Not loopback addresses on loopback should be preferred
886            in this case. It is importnat that lo is the first interface
887            in dev_base list.
888          */
889         read_lock(&dev_base_lock);
890         rcu_read_lock();
891         for_each_netdev(net, dev) {
892                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
893                         continue;
894
895                 for_primary_ifa(in_dev) {
896                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
897                             ifa->ifa_scope <= scope) {
898                                 addr = ifa->ifa_local;
899                                 goto out_unlock_both;
900                         }
901                 } endfor_ifa(in_dev);
902         }
903 out_unlock_both:
904         read_unlock(&dev_base_lock);
905         rcu_read_unlock();
906 out:
907         return addr;
908 }
909
910 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
911                               __be32 local, int scope)
912 {
913         int same = 0;
914         __be32 addr = 0;
915
916         for_ifa(in_dev) {
917                 if (!addr &&
918                     (local == ifa->ifa_local || !local) &&
919                     ifa->ifa_scope <= scope) {
920                         addr = ifa->ifa_local;
921                         if (same)
922                                 break;
923                 }
924                 if (!same) {
925                         same = (!local || inet_ifa_match(local, ifa)) &&
926                                 (!dst || inet_ifa_match(dst, ifa));
927                         if (same && addr) {
928                                 if (local || !dst)
929                                         break;
930                                 /* Is the selected addr into dst subnet? */
931                                 if (inet_ifa_match(addr, ifa))
932                                         break;
933                                 /* No, then can we use new local src? */
934                                 if (ifa->ifa_scope <= scope) {
935                                         addr = ifa->ifa_local;
936                                         break;
937                                 }
938                                 /* search for large dst subnet for addr */
939                                 same = 0;
940                         }
941                 }
942         } endfor_ifa(in_dev);
943
944         return same? addr : 0;
945 }
946
947 /*
948  * Confirm that local IP address exists using wildcards:
949  * - in_dev: only on this interface, 0=any interface
950  * - dst: only in the same subnet as dst, 0=any dst
951  * - local: address, 0=autoselect the local address
952  * - scope: maximum allowed scope value for the local address
953  */
954 __be32 inet_confirm_addr(struct in_device *in_dev,
955                          __be32 dst, __be32 local, int scope)
956 {
957         __be32 addr = 0;
958         struct net_device *dev;
959         struct net *net;
960
961         if (scope != RT_SCOPE_LINK)
962                 return confirm_addr_indev(in_dev, dst, local, scope);
963
964         net = dev_net(in_dev->dev);
965         read_lock(&dev_base_lock);
966         rcu_read_lock();
967         for_each_netdev(net, dev) {
968                 if ((in_dev = __in_dev_get_rcu(dev))) {
969                         addr = confirm_addr_indev(in_dev, dst, local, scope);
970                         if (addr)
971                                 break;
972                 }
973         }
974         rcu_read_unlock();
975         read_unlock(&dev_base_lock);
976
977         return addr;
978 }
979
980 /*
981  *      Device notifier
982  */
983
984 int register_inetaddr_notifier(struct notifier_block *nb)
985 {
986         return blocking_notifier_chain_register(&inetaddr_chain, nb);
987 }
988
989 int unregister_inetaddr_notifier(struct notifier_block *nb)
990 {
991         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
992 }
993
994 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
995  * alias numbering and to create unique labels if possible.
996 */
997 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
998 {
999         struct in_ifaddr *ifa;
1000         int named = 0;
1001
1002         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1003                 char old[IFNAMSIZ], *dot;
1004
1005                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1006                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1007                 if (named++ == 0)
1008                         goto skip;
1009                 dot = strchr(old, ':');
1010                 if (dot == NULL) {
1011                         sprintf(old, ":%d", named);
1012                         dot = old;
1013                 }
1014                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1015                         strcat(ifa->ifa_label, dot);
1016                 } else {
1017                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1018                 }
1019 skip:
1020                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1021         }
1022 }
1023
1024 static inline bool inetdev_valid_mtu(unsigned mtu)
1025 {
1026         return mtu >= 68;
1027 }
1028
1029 /* Called only under RTNL semaphore */
1030
1031 static int inetdev_event(struct notifier_block *this, unsigned long event,
1032                          void *ptr)
1033 {
1034         struct net_device *dev = ptr;
1035         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1036
1037         ASSERT_RTNL();
1038
1039         if (!in_dev) {
1040                 if (event == NETDEV_REGISTER) {
1041                         in_dev = inetdev_init(dev);
1042                         if (!in_dev)
1043                                 return notifier_from_errno(-ENOMEM);
1044                         if (dev->flags & IFF_LOOPBACK) {
1045                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1046                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1047                         }
1048                 } else if (event == NETDEV_CHANGEMTU) {
1049                         /* Re-enabling IP */
1050                         if (inetdev_valid_mtu(dev->mtu))
1051                                 in_dev = inetdev_init(dev);
1052                 }
1053                 goto out;
1054         }
1055
1056         switch (event) {
1057         case NETDEV_REGISTER:
1058                 printk(KERN_DEBUG "inetdev_event: bug\n");
1059                 dev->ip_ptr = NULL;
1060                 break;
1061         case NETDEV_UP:
1062                 if (!inetdev_valid_mtu(dev->mtu))
1063                         break;
1064                 if (dev->flags & IFF_LOOPBACK) {
1065                         struct in_ifaddr *ifa;
1066                         if ((ifa = inet_alloc_ifa()) != NULL) {
1067                                 ifa->ifa_local =
1068                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1069                                 ifa->ifa_prefixlen = 8;
1070                                 ifa->ifa_mask = inet_make_mask(8);
1071                                 in_dev_hold(in_dev);
1072                                 ifa->ifa_dev = in_dev;
1073                                 ifa->ifa_scope = RT_SCOPE_HOST;
1074                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1075                                 inet_insert_ifa(ifa);
1076                         }
1077                 }
1078                 ip_mc_up(in_dev);
1079                 /* fall through */
1080         case NETDEV_CHANGEADDR:
1081                 /* Send gratuitous ARP to notify of link change */
1082                 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1083                         struct in_ifaddr *ifa = in_dev->ifa_list;
1084
1085                         if (ifa)
1086                                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1087                                          ifa->ifa_address, dev,
1088                                          ifa->ifa_address, NULL,
1089                                          dev->dev_addr, NULL);
1090                 }
1091                 break;
1092         case NETDEV_DOWN:
1093                 ip_mc_down(in_dev);
1094                 break;
1095         case NETDEV_BONDING_OLDTYPE:
1096                 ip_mc_unmap(in_dev);
1097                 break;
1098         case NETDEV_BONDING_NEWTYPE:
1099                 ip_mc_remap(in_dev);
1100                 break;
1101         case NETDEV_CHANGEMTU:
1102                 if (inetdev_valid_mtu(dev->mtu))
1103                         break;
1104                 /* disable IP when MTU is not enough */
1105         case NETDEV_UNREGISTER:
1106                 inetdev_destroy(in_dev);
1107                 break;
1108         case NETDEV_CHANGENAME:
1109                 /* Do not notify about label change, this event is
1110                  * not interesting to applications using netlink.
1111                  */
1112                 inetdev_changename(dev, in_dev);
1113
1114                 devinet_sysctl_unregister(in_dev);
1115                 devinet_sysctl_register(in_dev);
1116                 break;
1117         }
1118 out:
1119         return NOTIFY_DONE;
1120 }
1121
1122 static struct notifier_block ip_netdev_notifier = {
1123         .notifier_call = inetdev_event,
1124 };
1125
1126 static inline size_t inet_nlmsg_size(void)
1127 {
1128         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1129                + nla_total_size(4) /* IFA_ADDRESS */
1130                + nla_total_size(4) /* IFA_LOCAL */
1131                + nla_total_size(4) /* IFA_BROADCAST */
1132                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1133 }
1134
1135 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1136                             u32 pid, u32 seq, int event, unsigned int flags)
1137 {
1138         struct ifaddrmsg *ifm;
1139         struct nlmsghdr  *nlh;
1140
1141         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1142         if (nlh == NULL)
1143                 return -EMSGSIZE;
1144
1145         ifm = nlmsg_data(nlh);
1146         ifm->ifa_family = AF_INET;
1147         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1148         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1149         ifm->ifa_scope = ifa->ifa_scope;
1150         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1151
1152         if (ifa->ifa_address)
1153                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1154
1155         if (ifa->ifa_local)
1156                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1157
1158         if (ifa->ifa_broadcast)
1159                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1160
1161         if (ifa->ifa_label[0])
1162                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1163
1164         return nlmsg_end(skb, nlh);
1165
1166 nla_put_failure:
1167         nlmsg_cancel(skb, nlh);
1168         return -EMSGSIZE;
1169 }
1170
1171 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1172 {
1173         struct net *net = sock_net(skb->sk);
1174         int idx, ip_idx;
1175         struct net_device *dev;
1176         struct in_device *in_dev;
1177         struct in_ifaddr *ifa;
1178         int s_ip_idx, s_idx = cb->args[0];
1179
1180         s_ip_idx = ip_idx = cb->args[1];
1181         idx = 0;
1182         for_each_netdev(net, dev) {
1183                 if (idx < s_idx)
1184                         goto cont;
1185                 if (idx > s_idx)
1186                         s_ip_idx = 0;
1187                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1188                         goto cont;
1189
1190                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1191                      ifa = ifa->ifa_next, ip_idx++) {
1192                         if (ip_idx < s_ip_idx)
1193                                 continue;
1194                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1195                                              cb->nlh->nlmsg_seq,
1196                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1197                                 goto done;
1198                 }
1199 cont:
1200                 idx++;
1201         }
1202
1203 done:
1204         cb->args[0] = idx;
1205         cb->args[1] = ip_idx;
1206
1207         return skb->len;
1208 }
1209
1210 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1211                       u32 pid)
1212 {
1213         struct sk_buff *skb;
1214         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1215         int err = -ENOBUFS;
1216         struct net *net;
1217
1218         net = dev_net(ifa->ifa_dev->dev);
1219         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1220         if (skb == NULL)
1221                 goto errout;
1222
1223         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1224         if (err < 0) {
1225                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1226                 WARN_ON(err == -EMSGSIZE);
1227                 kfree_skb(skb);
1228                 goto errout;
1229         }
1230         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1231         return;
1232 errout:
1233         if (err < 0)
1234                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1235 }
1236
1237 #ifdef CONFIG_SYSCTL
1238
1239 static void devinet_copy_dflt_conf(struct net *net, int i)
1240 {
1241         struct net_device *dev;
1242
1243         read_lock(&dev_base_lock);
1244         for_each_netdev(net, dev) {
1245                 struct in_device *in_dev;
1246                 rcu_read_lock();
1247                 in_dev = __in_dev_get_rcu(dev);
1248                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1249                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1250                 rcu_read_unlock();
1251         }
1252         read_unlock(&dev_base_lock);
1253 }
1254
1255 static void inet_forward_change(struct net *net)
1256 {
1257         struct net_device *dev;
1258         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1259
1260         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1261         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1262
1263         read_lock(&dev_base_lock);
1264         for_each_netdev(net, dev) {
1265                 struct in_device *in_dev;
1266                 if (on)
1267                         dev_disable_lro(dev);
1268                 rcu_read_lock();
1269                 in_dev = __in_dev_get_rcu(dev);
1270                 if (in_dev)
1271                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1272                 rcu_read_unlock();
1273         }
1274         read_unlock(&dev_base_lock);
1275 }
1276
1277 static int devinet_conf_proc(ctl_table *ctl, int write,
1278                              void __user *buffer,
1279                              size_t *lenp, loff_t *ppos)
1280 {
1281         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1282
1283         if (write) {
1284                 struct ipv4_devconf *cnf = ctl->extra1;
1285                 struct net *net = ctl->extra2;
1286                 int i = (int *)ctl->data - cnf->data;
1287
1288                 set_bit(i, cnf->state);
1289
1290                 if (cnf == net->ipv4.devconf_dflt)
1291                         devinet_copy_dflt_conf(net, i);
1292         }
1293
1294         return ret;
1295 }
1296
1297 static int devinet_conf_sysctl(ctl_table *table,
1298                                void __user *oldval, size_t __user *oldlenp,
1299                                void __user *newval, size_t newlen)
1300 {
1301         struct ipv4_devconf *cnf;
1302         struct net *net;
1303         int *valp = table->data;
1304         int new;
1305         int i;
1306
1307         if (!newval || !newlen)
1308                 return 0;
1309
1310         if (newlen != sizeof(int))
1311                 return -EINVAL;
1312
1313         if (get_user(new, (int __user *)newval))
1314                 return -EFAULT;
1315
1316         if (new == *valp)
1317                 return 0;
1318
1319         if (oldval && oldlenp) {
1320                 size_t len;
1321
1322                 if (get_user(len, oldlenp))
1323                         return -EFAULT;
1324
1325                 if (len) {
1326                         if (len > table->maxlen)
1327                                 len = table->maxlen;
1328                         if (copy_to_user(oldval, valp, len))
1329                                 return -EFAULT;
1330                         if (put_user(len, oldlenp))
1331                                 return -EFAULT;
1332                 }
1333         }
1334
1335         *valp = new;
1336
1337         cnf = table->extra1;
1338         net = table->extra2;
1339         i = (int *)table->data - cnf->data;
1340
1341         set_bit(i, cnf->state);
1342
1343         if (cnf == net->ipv4.devconf_dflt)
1344                 devinet_copy_dflt_conf(net, i);
1345
1346         return 1;
1347 }
1348
1349 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1350                                   void __user *buffer,
1351                                   size_t *lenp, loff_t *ppos)
1352 {
1353         int *valp = ctl->data;
1354         int val = *valp;
1355         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1356
1357         if (write && *valp != val) {
1358                 struct net *net = ctl->extra2;
1359
1360                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1361                         if (!rtnl_trylock())
1362                                 return restart_syscall();
1363                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1364                                 inet_forward_change(net);
1365                         } else if (*valp) {
1366                                 struct ipv4_devconf *cnf = ctl->extra1;
1367                                 struct in_device *idev =
1368                                         container_of(cnf, struct in_device, cnf);
1369                                 dev_disable_lro(idev->dev);
1370                         }
1371                         rtnl_unlock();
1372                         rt_cache_flush(net, 0);
1373                 }
1374         }
1375
1376         return ret;
1377 }
1378
1379 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1380                          void __user *buffer,
1381                          size_t *lenp, loff_t *ppos)
1382 {
1383         int *valp = ctl->data;
1384         int val = *valp;
1385         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1386         struct net *net = ctl->extra2;
1387
1388         if (write && *valp != val)
1389                 rt_cache_flush(net, 0);
1390
1391         return ret;
1392 }
1393
1394 int ipv4_doint_and_flush_strategy(ctl_table *table,
1395                                   void __user *oldval, size_t __user *oldlenp,
1396                                   void __user *newval, size_t newlen)
1397 {
1398         int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1399         struct net *net = table->extra2;
1400
1401         if (ret == 1)
1402                 rt_cache_flush(net, 0);
1403
1404         return ret;
1405 }
1406
1407
1408 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1409         { \
1410                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1411                 .procname       = name, \
1412                 .data           = ipv4_devconf.data + \
1413                                   NET_IPV4_CONF_ ## attr - 1, \
1414                 .maxlen         = sizeof(int), \
1415                 .mode           = mval, \
1416                 .proc_handler   = proc, \
1417                 .strategy       = sysctl, \
1418                 .extra1         = &ipv4_devconf, \
1419         }
1420
1421 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1422         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1423                              devinet_conf_sysctl)
1424
1425 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1426         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1427                              devinet_conf_sysctl)
1428
1429 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1430         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1431
1432 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1433         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1434                                      ipv4_doint_and_flush_strategy)
1435
1436 static struct devinet_sysctl_table {
1437         struct ctl_table_header *sysctl_header;
1438         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1439         char *dev_name;
1440 } devinet_sysctl = {
1441         .devinet_vars = {
1442                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1443                                              devinet_sysctl_forward,
1444                                              devinet_conf_sysctl),
1445                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1446
1447                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1448                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1449                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1450                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1451                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1452                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1453                                         "accept_source_route"),
1454                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1455                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1456                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1457                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1458                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1459                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1460                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1461                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1462                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1463                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1464
1465                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1466                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1467                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1468                                               "force_igmp_version"),
1469                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1470                                               "promote_secondaries"),
1471         },
1472 };
1473
1474 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1475                 int ctl_name, struct ipv4_devconf *p)
1476 {
1477         int i;
1478         struct devinet_sysctl_table *t;
1479
1480 #define DEVINET_CTL_PATH_DEV    3
1481
1482         struct ctl_path devinet_ctl_path[] = {
1483                 { .procname = "net", .ctl_name = CTL_NET, },
1484                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1485                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1486                 { /* to be set */ },
1487                 { },
1488         };
1489
1490         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1491         if (!t)
1492                 goto out;
1493
1494         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1495                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1496                 t->devinet_vars[i].extra1 = p;
1497                 t->devinet_vars[i].extra2 = net;
1498         }
1499
1500         /*
1501          * Make a copy of dev_name, because '.procname' is regarded as const
1502          * by sysctl and we wouldn't want anyone to change it under our feet
1503          * (see SIOCSIFNAME).
1504          */
1505         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1506         if (!t->dev_name)
1507                 goto free;
1508
1509         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1510         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1511
1512         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1513                         t->devinet_vars);
1514         if (!t->sysctl_header)
1515                 goto free_procname;
1516
1517         p->sysctl = t;
1518         return 0;
1519
1520 free_procname:
1521         kfree(t->dev_name);
1522 free:
1523         kfree(t);
1524 out:
1525         return -ENOBUFS;
1526 }
1527
1528 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1529 {
1530         struct devinet_sysctl_table *t = cnf->sysctl;
1531
1532         if (t == NULL)
1533                 return;
1534
1535         cnf->sysctl = NULL;
1536         unregister_sysctl_table(t->sysctl_header);
1537         kfree(t->dev_name);
1538         kfree(t);
1539 }
1540
1541 static void devinet_sysctl_register(struct in_device *idev)
1542 {
1543         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1544                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1545         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1546                         idev->dev->ifindex, &idev->cnf);
1547 }
1548
1549 static void devinet_sysctl_unregister(struct in_device *idev)
1550 {
1551         __devinet_sysctl_unregister(&idev->cnf);
1552         neigh_sysctl_unregister(idev->arp_parms);
1553 }
1554
1555 static struct ctl_table ctl_forward_entry[] = {
1556         {
1557                 .ctl_name       = NET_IPV4_FORWARD,
1558                 .procname       = "ip_forward",
1559                 .data           = &ipv4_devconf.data[
1560                                         NET_IPV4_CONF_FORWARDING - 1],
1561                 .maxlen         = sizeof(int),
1562                 .mode           = 0644,
1563                 .proc_handler   = devinet_sysctl_forward,
1564                 .strategy       = devinet_conf_sysctl,
1565                 .extra1         = &ipv4_devconf,
1566                 .extra2         = &init_net,
1567         },
1568         { },
1569 };
1570
1571 static __net_initdata struct ctl_path net_ipv4_path[] = {
1572         { .procname = "net", .ctl_name = CTL_NET, },
1573         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1574         { },
1575 };
1576 #endif
1577
1578 static __net_init int devinet_init_net(struct net *net)
1579 {
1580         int err;
1581         struct ipv4_devconf *all, *dflt;
1582 #ifdef CONFIG_SYSCTL
1583         struct ctl_table *tbl = ctl_forward_entry;
1584         struct ctl_table_header *forw_hdr;
1585 #endif
1586
1587         err = -ENOMEM;
1588         all = &ipv4_devconf;
1589         dflt = &ipv4_devconf_dflt;
1590
1591         if (net != &init_net) {
1592                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1593                 if (all == NULL)
1594                         goto err_alloc_all;
1595
1596                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1597                 if (dflt == NULL)
1598                         goto err_alloc_dflt;
1599
1600 #ifdef CONFIG_SYSCTL
1601                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1602                 if (tbl == NULL)
1603                         goto err_alloc_ctl;
1604
1605                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1606                 tbl[0].extra1 = all;
1607                 tbl[0].extra2 = net;
1608 #endif
1609         }
1610
1611 #ifdef CONFIG_SYSCTL
1612         err = __devinet_sysctl_register(net, "all",
1613                         NET_PROTO_CONF_ALL, all);
1614         if (err < 0)
1615                 goto err_reg_all;
1616
1617         err = __devinet_sysctl_register(net, "default",
1618                         NET_PROTO_CONF_DEFAULT, dflt);
1619         if (err < 0)
1620                 goto err_reg_dflt;
1621
1622         err = -ENOMEM;
1623         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1624         if (forw_hdr == NULL)
1625                 goto err_reg_ctl;
1626         net->ipv4.forw_hdr = forw_hdr;
1627 #endif
1628
1629         net->ipv4.devconf_all = all;
1630         net->ipv4.devconf_dflt = dflt;
1631         return 0;
1632
1633 #ifdef CONFIG_SYSCTL
1634 err_reg_ctl:
1635         __devinet_sysctl_unregister(dflt);
1636 err_reg_dflt:
1637         __devinet_sysctl_unregister(all);
1638 err_reg_all:
1639         if (tbl != ctl_forward_entry)
1640                 kfree(tbl);
1641 err_alloc_ctl:
1642 #endif
1643         if (dflt != &ipv4_devconf_dflt)
1644                 kfree(dflt);
1645 err_alloc_dflt:
1646         if (all != &ipv4_devconf)
1647                 kfree(all);
1648 err_alloc_all:
1649         return err;
1650 }
1651
1652 static __net_exit void devinet_exit_net(struct net *net)
1653 {
1654 #ifdef CONFIG_SYSCTL
1655         struct ctl_table *tbl;
1656
1657         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1658         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1659         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1660         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1661         kfree(tbl);
1662 #endif
1663         kfree(net->ipv4.devconf_dflt);
1664         kfree(net->ipv4.devconf_all);
1665 }
1666
1667 static __net_initdata struct pernet_operations devinet_ops = {
1668         .init = devinet_init_net,
1669         .exit = devinet_exit_net,
1670 };
1671
1672 void __init devinet_init(void)
1673 {
1674         register_pernet_subsys(&devinet_ops);
1675
1676         register_gifconf(PF_INET, inet_gifconf);
1677         register_netdevice_notifier(&ip_netdev_notifier);
1678
1679         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1680         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1681         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1682 }
1683
1684 EXPORT_SYMBOL(in_dev_finish_destroy);
1685 EXPORT_SYMBOL(inet_select_addr);
1686 EXPORT_SYMBOL(inetdev_by_index);
1687 EXPORT_SYMBOL(register_inetaddr_notifier);
1688 EXPORT_SYMBOL(unregister_inetaddr_notifier);