]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/devinet.c
dc94b0316b783fd1c1985e407fb0339782c66df5
[net-next-2.6.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 static struct ipv4_devconf ipv4_devconf = {
67         .data = {
68                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72         },
73 };
74
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76         .data = {
77                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82         },
83 };
84
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89         [IFA_LOCAL]             = { .type = NLA_U32 },
90         [IFA_ADDRESS]           = { .type = NLA_U32 },
91         [IFA_BROADCAST]         = { .type = NLA_U32 },
92         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99                          int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111
112 /* Locks all the inet devices. */
113
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117 }
118
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122         if (ifa->ifa_dev)
123                 in_dev_put(ifa->ifa_dev);
124         kfree(ifa);
125 }
126
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134         struct net_device *dev = idev->dev;
135
136         WARN_ON(idev->ifa_list);
137         WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140                idev, dev ? dev->name : "NIL");
141 #endif
142         dev_put(dev);
143         if (!idev->dead)
144                 pr_err("Freeing alive in_device %p\n", idev);
145         else
146                 kfree(idev);
147 }
148 EXPORT_SYMBOL(in_dev_finish_destroy);
149
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152         struct in_device *in_dev;
153
154         ASSERT_RTNL();
155
156         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157         if (!in_dev)
158                 goto out;
159         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160                         sizeof(in_dev->cnf));
161         in_dev->cnf.sysctl = NULL;
162         in_dev->dev = dev;
163         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164         if (!in_dev->arp_parms)
165                 goto out_kfree;
166         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167                 dev_disable_lro(dev);
168         /* Reference in_dev->dev */
169         dev_hold(dev);
170         /* Account for reference dev->ip_ptr (below) */
171         in_dev_hold(in_dev);
172
173         devinet_sysctl_register(in_dev);
174         ip_mc_init_dev(in_dev);
175         if (dev->flags & IFF_UP)
176                 ip_mc_up(in_dev);
177
178         /* we can receive as soon as ip_ptr is set -- do this last */
179         rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181         return in_dev;
182 out_kfree:
183         kfree(in_dev);
184         in_dev = NULL;
185         goto out;
186 }
187
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190         struct in_device *idev = container_of(head, struct in_device, rcu_head);
191         in_dev_put(idev);
192 }
193
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196         struct in_ifaddr *ifa;
197         struct net_device *dev;
198
199         ASSERT_RTNL();
200
201         dev = in_dev->dev;
202
203         in_dev->dead = 1;
204
205         ip_mc_destroy_dev(in_dev);
206
207         while ((ifa = in_dev->ifa_list) != NULL) {
208                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209                 inet_free_ifa(ifa);
210         }
211
212         rcu_assign_pointer(dev->ip_ptr, NULL);
213
214         devinet_sysctl_unregister(in_dev);
215         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216         arp_ifdown(dev);
217
218         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 }
220
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 {
223         rcu_read_lock();
224         for_primary_ifa(in_dev) {
225                 if (inet_ifa_match(a, ifa)) {
226                         if (!b || inet_ifa_match(b, ifa)) {
227                                 rcu_read_unlock();
228                                 return 1;
229                         }
230                 }
231         } endfor_ifa(in_dev);
232         rcu_read_unlock();
233         return 0;
234 }
235
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237                          int destroy, struct nlmsghdr *nlh, u32 pid)
238 {
239         struct in_ifaddr *promote = NULL;
240         struct in_ifaddr *ifa, *ifa1 = *ifap;
241         struct in_ifaddr *last_prim = in_dev->ifa_list;
242         struct in_ifaddr *prev_prom = NULL;
243         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244
245         ASSERT_RTNL();
246
247         /* 1. Deleting primary ifaddr forces deletion all secondaries
248          * unless alias promotion is set
249          **/
250
251         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253
254                 while ((ifa = *ifap1) != NULL) {
255                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256                             ifa1->ifa_scope <= ifa->ifa_scope)
257                                 last_prim = ifa;
258
259                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260                             ifa1->ifa_mask != ifa->ifa_mask ||
261                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
262                                 ifap1 = &ifa->ifa_next;
263                                 prev_prom = ifa;
264                                 continue;
265                         }
266
267                         if (!do_promote) {
268                                 *ifap1 = ifa->ifa_next;
269
270                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271                                 blocking_notifier_call_chain(&inetaddr_chain,
272                                                 NETDEV_DOWN, ifa);
273                                 inet_free_ifa(ifa);
274                         } else {
275                                 promote = ifa;
276                                 break;
277                         }
278                 }
279         }
280
281         /* 2. Unlink it */
282
283         *ifap = ifa1->ifa_next;
284
285         /* 3. Announce address deletion */
286
287         /* Send message first, then call notifier.
288            At first sight, FIB update triggered by notifier
289            will refer to already deleted ifaddr, that could confuse
290            netlink listeners. It is not true: look, gated sees
291            that route deleted and if it still thinks that ifaddr
292            is valid, it will try to restore deleted routes... Grr.
293            So that, this order is correct.
294          */
295         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297
298         if (promote) {
299
300                 if (prev_prom) {
301                         prev_prom->ifa_next = promote->ifa_next;
302                         promote->ifa_next = last_prim->ifa_next;
303                         last_prim->ifa_next = promote;
304                 }
305
306                 promote->ifa_flags &= ~IFA_F_SECONDARY;
307                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308                 blocking_notifier_call_chain(&inetaddr_chain,
309                                 NETDEV_UP, promote);
310                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311                         if (ifa1->ifa_mask != ifa->ifa_mask ||
312                             !inet_ifa_match(ifa1->ifa_address, ifa))
313                                         continue;
314                         fib_add_ifaddr(ifa);
315                 }
316
317         }
318         if (destroy)
319                 inet_free_ifa(ifa1);
320 }
321
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323                          int destroy)
324 {
325         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 }
327
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329                              u32 pid)
330 {
331         struct in_device *in_dev = ifa->ifa_dev;
332         struct in_ifaddr *ifa1, **ifap, **last_primary;
333
334         ASSERT_RTNL();
335
336         if (!ifa->ifa_local) {
337                 inet_free_ifa(ifa);
338                 return 0;
339         }
340
341         ifa->ifa_flags &= ~IFA_F_SECONDARY;
342         last_primary = &in_dev->ifa_list;
343
344         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345              ifap = &ifa1->ifa_next) {
346                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347                     ifa->ifa_scope <= ifa1->ifa_scope)
348                         last_primary = &ifa1->ifa_next;
349                 if (ifa1->ifa_mask == ifa->ifa_mask &&
350                     inet_ifa_match(ifa1->ifa_address, ifa)) {
351                         if (ifa1->ifa_local == ifa->ifa_local) {
352                                 inet_free_ifa(ifa);
353                                 return -EEXIST;
354                         }
355                         if (ifa1->ifa_scope != ifa->ifa_scope) {
356                                 inet_free_ifa(ifa);
357                                 return -EINVAL;
358                         }
359                         ifa->ifa_flags |= IFA_F_SECONDARY;
360                 }
361         }
362
363         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364                 net_srandom(ifa->ifa_local);
365                 ifap = last_primary;
366         }
367
368         ifa->ifa_next = *ifap;
369         *ifap = ifa;
370
371         /* Send message first, then call notifier.
372            Notifier will trigger FIB update, so that
373            listeners of netlink will know about new ifaddr */
374         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376
377         return 0;
378 }
379
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 {
382         return __inet_insert_ifa(ifa, NULL, 0);
383 }
384
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 {
387         struct in_device *in_dev = __in_dev_get_rtnl(dev);
388
389         ASSERT_RTNL();
390
391         if (!in_dev) {
392                 inet_free_ifa(ifa);
393                 return -ENOBUFS;
394         }
395         ipv4_devconf_setall(in_dev);
396         if (ifa->ifa_dev != in_dev) {
397                 WARN_ON(ifa->ifa_dev);
398                 in_dev_hold(in_dev);
399                 ifa->ifa_dev = in_dev;
400         }
401         if (ipv4_is_loopback(ifa->ifa_local))
402                 ifa->ifa_scope = RT_SCOPE_HOST;
403         return inet_insert_ifa(ifa);
404 }
405
406 /* Caller must hold RCU or RTNL :
407  * We dont take a reference on found in_device
408  */
409 struct in_device *inetdev_by_index(struct net *net, int ifindex)
410 {
411         struct net_device *dev;
412         struct in_device *in_dev = NULL;
413
414         rcu_read_lock();
415         dev = dev_get_by_index_rcu(net, ifindex);
416         if (dev)
417                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
418         rcu_read_unlock();
419         return in_dev;
420 }
421 EXPORT_SYMBOL(inetdev_by_index);
422
423 /* Called only from RTNL semaphored context. No locks. */
424
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426                                     __be32 mask)
427 {
428         ASSERT_RTNL();
429
430         for_primary_ifa(in_dev) {
431                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432                         return ifa;
433         } endfor_ifa(in_dev);
434         return NULL;
435 }
436
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439         struct net *net = sock_net(skb->sk);
440         struct nlattr *tb[IFA_MAX+1];
441         struct in_device *in_dev;
442         struct ifaddrmsg *ifm;
443         struct in_ifaddr *ifa, **ifap;
444         int err = -EINVAL;
445
446         ASSERT_RTNL();
447
448         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449         if (err < 0)
450                 goto errout;
451
452         ifm = nlmsg_data(nlh);
453         in_dev = inetdev_by_index(net, ifm->ifa_index);
454         if (in_dev == NULL) {
455                 err = -ENODEV;
456                 goto errout;
457         }
458
459         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460              ifap = &ifa->ifa_next) {
461                 if (tb[IFA_LOCAL] &&
462                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463                         continue;
464
465                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466                         continue;
467
468                 if (tb[IFA_ADDRESS] &&
469                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471                         continue;
472
473                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
474                 return 0;
475         }
476
477         err = -EADDRNOTAVAIL;
478 errout:
479         return err;
480 }
481
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483 {
484         struct nlattr *tb[IFA_MAX+1];
485         struct in_ifaddr *ifa;
486         struct ifaddrmsg *ifm;
487         struct net_device *dev;
488         struct in_device *in_dev;
489         int err;
490
491         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
492         if (err < 0)
493                 goto errout;
494
495         ifm = nlmsg_data(nlh);
496         err = -EINVAL;
497         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498                 goto errout;
499
500         dev = __dev_get_by_index(net, ifm->ifa_index);
501         err = -ENODEV;
502         if (dev == NULL)
503                 goto errout;
504
505         in_dev = __in_dev_get_rtnl(dev);
506         err = -ENOBUFS;
507         if (in_dev == NULL)
508                 goto errout;
509
510         ifa = inet_alloc_ifa();
511         if (ifa == NULL)
512                 /*
513                  * A potential indev allocation can be left alive, it stays
514                  * assigned to its device and is destroy with it.
515                  */
516                 goto errout;
517
518         ipv4_devconf_setall(in_dev);
519         in_dev_hold(in_dev);
520
521         if (tb[IFA_ADDRESS] == NULL)
522                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523
524         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526         ifa->ifa_flags = ifm->ifa_flags;
527         ifa->ifa_scope = ifm->ifa_scope;
528         ifa->ifa_dev = in_dev;
529
530         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532
533         if (tb[IFA_BROADCAST])
534                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535
536         if (tb[IFA_LABEL])
537                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538         else
539                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
540
541         return ifa;
542
543 errout:
544         return ERR_PTR(err);
545 }
546
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548 {
549         struct net *net = sock_net(skb->sk);
550         struct in_ifaddr *ifa;
551
552         ASSERT_RTNL();
553
554         ifa = rtm_to_ifaddr(net, nlh);
555         if (IS_ERR(ifa))
556                 return PTR_ERR(ifa);
557
558         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
559 }
560
561 /*
562  *      Determine a default network mask, based on the IP address.
563  */
564
565 static inline int inet_abc_len(__be32 addr)
566 {
567         int rc = -1;    /* Something else, probably a multicast. */
568
569         if (ipv4_is_zeronet(addr))
570                 rc = 0;
571         else {
572                 __u32 haddr = ntohl(addr);
573
574                 if (IN_CLASSA(haddr))
575                         rc = 8;
576                 else if (IN_CLASSB(haddr))
577                         rc = 16;
578                 else if (IN_CLASSC(haddr))
579                         rc = 24;
580         }
581
582         return rc;
583 }
584
585
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587 {
588         struct ifreq ifr;
589         struct sockaddr_in sin_orig;
590         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591         struct in_device *in_dev;
592         struct in_ifaddr **ifap = NULL;
593         struct in_ifaddr *ifa = NULL;
594         struct net_device *dev;
595         char *colon;
596         int ret = -EFAULT;
597         int tryaddrmatch = 0;
598
599         /*
600          *      Fetch the caller's info block into kernel space
601          */
602
603         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604                 goto out;
605         ifr.ifr_name[IFNAMSIZ - 1] = 0;
606
607         /* save original address for comparison */
608         memcpy(&sin_orig, sin, sizeof(*sin));
609
610         colon = strchr(ifr.ifr_name, ':');
611         if (colon)
612                 *colon = 0;
613
614         dev_load(net, ifr.ifr_name);
615
616         switch (cmd) {
617         case SIOCGIFADDR:       /* Get interface address */
618         case SIOCGIFBRDADDR:    /* Get the broadcast address */
619         case SIOCGIFDSTADDR:    /* Get the destination address */
620         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
621                 /* Note that these ioctls will not sleep,
622                    so that we do not impose a lock.
623                    One day we will be forced to put shlock here (I mean SMP)
624                  */
625                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
626                 memset(sin, 0, sizeof(*sin));
627                 sin->sin_family = AF_INET;
628                 break;
629
630         case SIOCSIFFLAGS:
631                 ret = -EACCES;
632                 if (!capable(CAP_NET_ADMIN))
633                         goto out;
634                 break;
635         case SIOCSIFADDR:       /* Set interface address (and family) */
636         case SIOCSIFBRDADDR:    /* Set the broadcast address */
637         case SIOCSIFDSTADDR:    /* Set the destination address */
638         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
639                 ret = -EACCES;
640                 if (!capable(CAP_NET_ADMIN))
641                         goto out;
642                 ret = -EINVAL;
643                 if (sin->sin_family != AF_INET)
644                         goto out;
645                 break;
646         default:
647                 ret = -EINVAL;
648                 goto out;
649         }
650
651         rtnl_lock();
652
653         ret = -ENODEV;
654         dev = __dev_get_by_name(net, ifr.ifr_name);
655         if (!dev)
656                 goto done;
657
658         if (colon)
659                 *colon = ':';
660
661         in_dev = __in_dev_get_rtnl(dev);
662         if (in_dev) {
663                 if (tryaddrmatch) {
664                         /* Matthias Andree */
665                         /* compare label and address (4.4BSD style) */
666                         /* note: we only do this for a limited set of ioctls
667                            and only if the original address family was AF_INET.
668                            This is checked above. */
669                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670                              ifap = &ifa->ifa_next) {
671                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672                                     sin_orig.sin_addr.s_addr ==
673                                                         ifa->ifa_address) {
674                                         break; /* found */
675                                 }
676                         }
677                 }
678                 /* we didn't get a match, maybe the application is
679                    4.3BSD-style and passed in junk so we fall back to
680                    comparing just the label */
681                 if (!ifa) {
682                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683                              ifap = &ifa->ifa_next)
684                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685                                         break;
686                 }
687         }
688
689         ret = -EADDRNOTAVAIL;
690         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691                 goto done;
692
693         switch (cmd) {
694         case SIOCGIFADDR:       /* Get interface address */
695                 sin->sin_addr.s_addr = ifa->ifa_local;
696                 goto rarok;
697
698         case SIOCGIFBRDADDR:    /* Get the broadcast address */
699                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
700                 goto rarok;
701
702         case SIOCGIFDSTADDR:    /* Get the destination address */
703                 sin->sin_addr.s_addr = ifa->ifa_address;
704                 goto rarok;
705
706         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
707                 sin->sin_addr.s_addr = ifa->ifa_mask;
708                 goto rarok;
709
710         case SIOCSIFFLAGS:
711                 if (colon) {
712                         ret = -EADDRNOTAVAIL;
713                         if (!ifa)
714                                 break;
715                         ret = 0;
716                         if (!(ifr.ifr_flags & IFF_UP))
717                                 inet_del_ifa(in_dev, ifap, 1);
718                         break;
719                 }
720                 ret = dev_change_flags(dev, ifr.ifr_flags);
721                 break;
722
723         case SIOCSIFADDR:       /* Set interface address (and family) */
724                 ret = -EINVAL;
725                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726                         break;
727
728                 if (!ifa) {
729                         ret = -ENOBUFS;
730                         ifa = inet_alloc_ifa();
731                         if (!ifa)
732                                 break;
733                         if (colon)
734                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
735                         else
736                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
737                 } else {
738                         ret = 0;
739                         if (ifa->ifa_local == sin->sin_addr.s_addr)
740                                 break;
741                         inet_del_ifa(in_dev, ifap, 0);
742                         ifa->ifa_broadcast = 0;
743                         ifa->ifa_scope = 0;
744                 }
745
746                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
747
748                 if (!(dev->flags & IFF_POINTOPOINT)) {
749                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
750                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
751                         if ((dev->flags & IFF_BROADCAST) &&
752                             ifa->ifa_prefixlen < 31)
753                                 ifa->ifa_broadcast = ifa->ifa_address |
754                                                      ~ifa->ifa_mask;
755                 } else {
756                         ifa->ifa_prefixlen = 32;
757                         ifa->ifa_mask = inet_make_mask(32);
758                 }
759                 ret = inet_set_ifa(dev, ifa);
760                 break;
761
762         case SIOCSIFBRDADDR:    /* Set the broadcast address */
763                 ret = 0;
764                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
765                         inet_del_ifa(in_dev, ifap, 0);
766                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
767                         inet_insert_ifa(ifa);
768                 }
769                 break;
770
771         case SIOCSIFDSTADDR:    /* Set the destination address */
772                 ret = 0;
773                 if (ifa->ifa_address == sin->sin_addr.s_addr)
774                         break;
775                 ret = -EINVAL;
776                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
777                         break;
778                 ret = 0;
779                 inet_del_ifa(in_dev, ifap, 0);
780                 ifa->ifa_address = sin->sin_addr.s_addr;
781                 inet_insert_ifa(ifa);
782                 break;
783
784         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
785
786                 /*
787                  *      The mask we set must be legal.
788                  */
789                 ret = -EINVAL;
790                 if (bad_mask(sin->sin_addr.s_addr, 0))
791                         break;
792                 ret = 0;
793                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
794                         __be32 old_mask = ifa->ifa_mask;
795                         inet_del_ifa(in_dev, ifap, 0);
796                         ifa->ifa_mask = sin->sin_addr.s_addr;
797                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
798
799                         /* See if current broadcast address matches
800                          * with current netmask, then recalculate
801                          * the broadcast address. Otherwise it's a
802                          * funny address, so don't touch it since
803                          * the user seems to know what (s)he's doing...
804                          */
805                         if ((dev->flags & IFF_BROADCAST) &&
806                             (ifa->ifa_prefixlen < 31) &&
807                             (ifa->ifa_broadcast ==
808                              (ifa->ifa_local|~old_mask))) {
809                                 ifa->ifa_broadcast = (ifa->ifa_local |
810                                                       ~sin->sin_addr.s_addr);
811                         }
812                         inet_insert_ifa(ifa);
813                 }
814                 break;
815         }
816 done:
817         rtnl_unlock();
818 out:
819         return ret;
820 rarok:
821         rtnl_unlock();
822         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
823         goto out;
824 }
825
826 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
827 {
828         struct in_device *in_dev = __in_dev_get_rtnl(dev);
829         struct in_ifaddr *ifa;
830         struct ifreq ifr;
831         int done = 0;
832
833         if (!in_dev)
834                 goto out;
835
836         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
837                 if (!buf) {
838                         done += sizeof(ifr);
839                         continue;
840                 }
841                 if (len < (int) sizeof(ifr))
842                         break;
843                 memset(&ifr, 0, sizeof(struct ifreq));
844                 if (ifa->ifa_label)
845                         strcpy(ifr.ifr_name, ifa->ifa_label);
846                 else
847                         strcpy(ifr.ifr_name, dev->name);
848
849                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
850                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
851                                                                 ifa->ifa_local;
852
853                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
854                         done = -EFAULT;
855                         break;
856                 }
857                 buf  += sizeof(struct ifreq);
858                 len  -= sizeof(struct ifreq);
859                 done += sizeof(struct ifreq);
860         }
861 out:
862         return done;
863 }
864
865 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
866 {
867         __be32 addr = 0;
868         struct in_device *in_dev;
869         struct net *net = dev_net(dev);
870
871         rcu_read_lock();
872         in_dev = __in_dev_get_rcu(dev);
873         if (!in_dev)
874                 goto no_in_dev;
875
876         for_primary_ifa(in_dev) {
877                 if (ifa->ifa_scope > scope)
878                         continue;
879                 if (!dst || inet_ifa_match(dst, ifa)) {
880                         addr = ifa->ifa_local;
881                         break;
882                 }
883                 if (!addr)
884                         addr = ifa->ifa_local;
885         } endfor_ifa(in_dev);
886
887         if (addr)
888                 goto out_unlock;
889 no_in_dev:
890
891         /* Not loopback addresses on loopback should be preferred
892            in this case. It is importnat that lo is the first interface
893            in dev_base list.
894          */
895         for_each_netdev_rcu(net, dev) {
896                 in_dev = __in_dev_get_rcu(dev);
897                 if (!in_dev)
898                         continue;
899
900                 for_primary_ifa(in_dev) {
901                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
902                             ifa->ifa_scope <= scope) {
903                                 addr = ifa->ifa_local;
904                                 goto out_unlock;
905                         }
906                 } endfor_ifa(in_dev);
907         }
908 out_unlock:
909         rcu_read_unlock();
910         return addr;
911 }
912 EXPORT_SYMBOL(inet_select_addr);
913
914 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
915                               __be32 local, int scope)
916 {
917         int same = 0;
918         __be32 addr = 0;
919
920         for_ifa(in_dev) {
921                 if (!addr &&
922                     (local == ifa->ifa_local || !local) &&
923                     ifa->ifa_scope <= scope) {
924                         addr = ifa->ifa_local;
925                         if (same)
926                                 break;
927                 }
928                 if (!same) {
929                         same = (!local || inet_ifa_match(local, ifa)) &&
930                                 (!dst || inet_ifa_match(dst, ifa));
931                         if (same && addr) {
932                                 if (local || !dst)
933                                         break;
934                                 /* Is the selected addr into dst subnet? */
935                                 if (inet_ifa_match(addr, ifa))
936                                         break;
937                                 /* No, then can we use new local src? */
938                                 if (ifa->ifa_scope <= scope) {
939                                         addr = ifa->ifa_local;
940                                         break;
941                                 }
942                                 /* search for large dst subnet for addr */
943                                 same = 0;
944                         }
945                 }
946         } endfor_ifa(in_dev);
947
948         return same ? addr : 0;
949 }
950
951 /*
952  * Confirm that local IP address exists using wildcards:
953  * - in_dev: only on this interface, 0=any interface
954  * - dst: only in the same subnet as dst, 0=any dst
955  * - local: address, 0=autoselect the local address
956  * - scope: maximum allowed scope value for the local address
957  */
958 __be32 inet_confirm_addr(struct in_device *in_dev,
959                          __be32 dst, __be32 local, int scope)
960 {
961         __be32 addr = 0;
962         struct net_device *dev;
963         struct net *net;
964
965         if (scope != RT_SCOPE_LINK)
966                 return confirm_addr_indev(in_dev, dst, local, scope);
967
968         net = dev_net(in_dev->dev);
969         rcu_read_lock();
970         for_each_netdev_rcu(net, dev) {
971                 in_dev = __in_dev_get_rcu(dev);
972                 if (in_dev) {
973                         addr = confirm_addr_indev(in_dev, dst, local, scope);
974                         if (addr)
975                                 break;
976                 }
977         }
978         rcu_read_unlock();
979
980         return addr;
981 }
982
983 /*
984  *      Device notifier
985  */
986
987 int register_inetaddr_notifier(struct notifier_block *nb)
988 {
989         return blocking_notifier_chain_register(&inetaddr_chain, nb);
990 }
991 EXPORT_SYMBOL(register_inetaddr_notifier);
992
993 int unregister_inetaddr_notifier(struct notifier_block *nb)
994 {
995         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
996 }
997 EXPORT_SYMBOL(unregister_inetaddr_notifier);
998
999 /* Rename ifa_labels for a device name change. Make some effort to preserve
1000  * existing alias numbering and to create unique labels if possible.
1001 */
1002 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1003 {
1004         struct in_ifaddr *ifa;
1005         int named = 0;
1006
1007         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1008                 char old[IFNAMSIZ], *dot;
1009
1010                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1011                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1012                 if (named++ == 0)
1013                         goto skip;
1014                 dot = strchr(old, ':');
1015                 if (dot == NULL) {
1016                         sprintf(old, ":%d", named);
1017                         dot = old;
1018                 }
1019                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1020                         strcat(ifa->ifa_label, dot);
1021                 else
1022                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1023 skip:
1024                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1025         }
1026 }
1027
1028 static inline bool inetdev_valid_mtu(unsigned mtu)
1029 {
1030         return mtu >= 68;
1031 }
1032
1033 /* Called only under RTNL semaphore */
1034
1035 static int inetdev_event(struct notifier_block *this, unsigned long event,
1036                          void *ptr)
1037 {
1038         struct net_device *dev = ptr;
1039         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1040
1041         ASSERT_RTNL();
1042
1043         if (!in_dev) {
1044                 if (event == NETDEV_REGISTER) {
1045                         in_dev = inetdev_init(dev);
1046                         if (!in_dev)
1047                                 return notifier_from_errno(-ENOMEM);
1048                         if (dev->flags & IFF_LOOPBACK) {
1049                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1050                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1051                         }
1052                 } else if (event == NETDEV_CHANGEMTU) {
1053                         /* Re-enabling IP */
1054                         if (inetdev_valid_mtu(dev->mtu))
1055                                 in_dev = inetdev_init(dev);
1056                 }
1057                 goto out;
1058         }
1059
1060         switch (event) {
1061         case NETDEV_REGISTER:
1062                 printk(KERN_DEBUG "inetdev_event: bug\n");
1063                 rcu_assign_pointer(dev->ip_ptr, NULL);
1064                 break;
1065         case NETDEV_UP:
1066                 if (!inetdev_valid_mtu(dev->mtu))
1067                         break;
1068                 if (dev->flags & IFF_LOOPBACK) {
1069                         struct in_ifaddr *ifa = inet_alloc_ifa();
1070
1071                         if (ifa) {
1072                                 ifa->ifa_local =
1073                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1074                                 ifa->ifa_prefixlen = 8;
1075                                 ifa->ifa_mask = inet_make_mask(8);
1076                                 in_dev_hold(in_dev);
1077                                 ifa->ifa_dev = in_dev;
1078                                 ifa->ifa_scope = RT_SCOPE_HOST;
1079                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1080                                 inet_insert_ifa(ifa);
1081                         }
1082                 }
1083                 ip_mc_up(in_dev);
1084                 /* fall through */
1085         case NETDEV_NOTIFY_PEERS:
1086         case NETDEV_CHANGEADDR:
1087                 /* Send gratuitous ARP to notify of link change */
1088                 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1089                         struct in_ifaddr *ifa = in_dev->ifa_list;
1090
1091                         if (ifa)
1092                                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1093                                          ifa->ifa_address, dev,
1094                                          ifa->ifa_address, NULL,
1095                                          dev->dev_addr, NULL);
1096                 }
1097                 break;
1098         case NETDEV_DOWN:
1099                 ip_mc_down(in_dev);
1100                 break;
1101         case NETDEV_PRE_TYPE_CHANGE:
1102                 ip_mc_unmap(in_dev);
1103                 break;
1104         case NETDEV_POST_TYPE_CHANGE:
1105                 ip_mc_remap(in_dev);
1106                 break;
1107         case NETDEV_CHANGEMTU:
1108                 if (inetdev_valid_mtu(dev->mtu))
1109                         break;
1110                 /* disable IP when MTU is not enough */
1111         case NETDEV_UNREGISTER:
1112                 inetdev_destroy(in_dev);
1113                 break;
1114         case NETDEV_CHANGENAME:
1115                 /* Do not notify about label change, this event is
1116                  * not interesting to applications using netlink.
1117                  */
1118                 inetdev_changename(dev, in_dev);
1119
1120                 devinet_sysctl_unregister(in_dev);
1121                 devinet_sysctl_register(in_dev);
1122                 break;
1123         }
1124 out:
1125         return NOTIFY_DONE;
1126 }
1127
1128 static struct notifier_block ip_netdev_notifier = {
1129         .notifier_call = inetdev_event,
1130 };
1131
1132 static inline size_t inet_nlmsg_size(void)
1133 {
1134         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1135                + nla_total_size(4) /* IFA_ADDRESS */
1136                + nla_total_size(4) /* IFA_LOCAL */
1137                + nla_total_size(4) /* IFA_BROADCAST */
1138                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1139 }
1140
1141 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1142                             u32 pid, u32 seq, int event, unsigned int flags)
1143 {
1144         struct ifaddrmsg *ifm;
1145         struct nlmsghdr  *nlh;
1146
1147         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1148         if (nlh == NULL)
1149                 return -EMSGSIZE;
1150
1151         ifm = nlmsg_data(nlh);
1152         ifm->ifa_family = AF_INET;
1153         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1154         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1155         ifm->ifa_scope = ifa->ifa_scope;
1156         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1157
1158         if (ifa->ifa_address)
1159                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1160
1161         if (ifa->ifa_local)
1162                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1163
1164         if (ifa->ifa_broadcast)
1165                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1166
1167         if (ifa->ifa_label[0])
1168                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1169
1170         return nlmsg_end(skb, nlh);
1171
1172 nla_put_failure:
1173         nlmsg_cancel(skb, nlh);
1174         return -EMSGSIZE;
1175 }
1176
1177 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1178 {
1179         struct net *net = sock_net(skb->sk);
1180         int h, s_h;
1181         int idx, s_idx;
1182         int ip_idx, s_ip_idx;
1183         struct net_device *dev;
1184         struct in_device *in_dev;
1185         struct in_ifaddr *ifa;
1186         struct hlist_head *head;
1187         struct hlist_node *node;
1188
1189         s_h = cb->args[0];
1190         s_idx = idx = cb->args[1];
1191         s_ip_idx = ip_idx = cb->args[2];
1192
1193         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1194                 idx = 0;
1195                 head = &net->dev_index_head[h];
1196                 rcu_read_lock();
1197                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1198                         if (idx < s_idx)
1199                                 goto cont;
1200                         if (h > s_h || idx > s_idx)
1201                                 s_ip_idx = 0;
1202                         in_dev = __in_dev_get_rcu(dev);
1203                         if (!in_dev)
1204                                 goto cont;
1205
1206                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1207                              ifa = ifa->ifa_next, ip_idx++) {
1208                                 if (ip_idx < s_ip_idx)
1209                                         continue;
1210                                 if (inet_fill_ifaddr(skb, ifa,
1211                                              NETLINK_CB(cb->skb).pid,
1212                                              cb->nlh->nlmsg_seq,
1213                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1214                                         rcu_read_unlock();
1215                                         goto done;
1216                                 }
1217                         }
1218 cont:
1219                         idx++;
1220                 }
1221                 rcu_read_unlock();
1222         }
1223
1224 done:
1225         cb->args[0] = h;
1226         cb->args[1] = idx;
1227         cb->args[2] = ip_idx;
1228
1229         return skb->len;
1230 }
1231
1232 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1233                       u32 pid)
1234 {
1235         struct sk_buff *skb;
1236         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1237         int err = -ENOBUFS;
1238         struct net *net;
1239
1240         net = dev_net(ifa->ifa_dev->dev);
1241         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1242         if (skb == NULL)
1243                 goto errout;
1244
1245         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1246         if (err < 0) {
1247                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1248                 WARN_ON(err == -EMSGSIZE);
1249                 kfree_skb(skb);
1250                 goto errout;
1251         }
1252         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1253         return;
1254 errout:
1255         if (err < 0)
1256                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1257 }
1258
1259 #ifdef CONFIG_SYSCTL
1260
1261 static void devinet_copy_dflt_conf(struct net *net, int i)
1262 {
1263         struct net_device *dev;
1264
1265         rcu_read_lock();
1266         for_each_netdev_rcu(net, dev) {
1267                 struct in_device *in_dev;
1268
1269                 in_dev = __in_dev_get_rcu(dev);
1270                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1271                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1272         }
1273         rcu_read_unlock();
1274 }
1275
1276 /* called with RTNL locked */
1277 static void inet_forward_change(struct net *net)
1278 {
1279         struct net_device *dev;
1280         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1281
1282         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1283         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1284
1285         for_each_netdev(net, dev) {
1286                 struct in_device *in_dev;
1287                 if (on)
1288                         dev_disable_lro(dev);
1289                 rcu_read_lock();
1290                 in_dev = __in_dev_get_rcu(dev);
1291                 if (in_dev)
1292                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1293                 rcu_read_unlock();
1294         }
1295 }
1296
1297 static int devinet_conf_proc(ctl_table *ctl, int write,
1298                              void __user *buffer,
1299                              size_t *lenp, loff_t *ppos)
1300 {
1301         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1302
1303         if (write) {
1304                 struct ipv4_devconf *cnf = ctl->extra1;
1305                 struct net *net = ctl->extra2;
1306                 int i = (int *)ctl->data - cnf->data;
1307
1308                 set_bit(i, cnf->state);
1309
1310                 if (cnf == net->ipv4.devconf_dflt)
1311                         devinet_copy_dflt_conf(net, i);
1312         }
1313
1314         return ret;
1315 }
1316
1317 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1318                                   void __user *buffer,
1319                                   size_t *lenp, loff_t *ppos)
1320 {
1321         int *valp = ctl->data;
1322         int val = *valp;
1323         loff_t pos = *ppos;
1324         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1325
1326         if (write && *valp != val) {
1327                 struct net *net = ctl->extra2;
1328
1329                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1330                         if (!rtnl_trylock()) {
1331                                 /* Restore the original values before restarting */
1332                                 *valp = val;
1333                                 *ppos = pos;
1334                                 return restart_syscall();
1335                         }
1336                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1337                                 inet_forward_change(net);
1338                         } else if (*valp) {
1339                                 struct ipv4_devconf *cnf = ctl->extra1;
1340                                 struct in_device *idev =
1341                                         container_of(cnf, struct in_device, cnf);
1342                                 dev_disable_lro(idev->dev);
1343                         }
1344                         rtnl_unlock();
1345                         rt_cache_flush(net, 0);
1346                 }
1347         }
1348
1349         return ret;
1350 }
1351
1352 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1353                          void __user *buffer,
1354                          size_t *lenp, loff_t *ppos)
1355 {
1356         int *valp = ctl->data;
1357         int val = *valp;
1358         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1359         struct net *net = ctl->extra2;
1360
1361         if (write && *valp != val)
1362                 rt_cache_flush(net, 0);
1363
1364         return ret;
1365 }
1366
1367 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1368         { \
1369                 .procname       = name, \
1370                 .data           = ipv4_devconf.data + \
1371                                   IPV4_DEVCONF_ ## attr - 1, \
1372                 .maxlen         = sizeof(int), \
1373                 .mode           = mval, \
1374                 .proc_handler   = proc, \
1375                 .extra1         = &ipv4_devconf, \
1376         }
1377
1378 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1379         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1380
1381 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1382         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1383
1384 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1385         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1386
1387 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1388         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1389
1390 static struct devinet_sysctl_table {
1391         struct ctl_table_header *sysctl_header;
1392         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1393         char *dev_name;
1394 } devinet_sysctl = {
1395         .devinet_vars = {
1396                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1397                                              devinet_sysctl_forward),
1398                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1399
1400                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1401                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1402                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1403                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1404                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1405                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1406                                         "accept_source_route"),
1407                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1408                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1409                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1410                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1411                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1412                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1413                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1414                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1415                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1416                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1417                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1418                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1419                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1420
1421                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1422                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1423                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1424                                               "force_igmp_version"),
1425                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1426                                               "promote_secondaries"),
1427         },
1428 };
1429
1430 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1431                                         struct ipv4_devconf *p)
1432 {
1433         int i;
1434         struct devinet_sysctl_table *t;
1435
1436 #define DEVINET_CTL_PATH_DEV    3
1437
1438         struct ctl_path devinet_ctl_path[] = {
1439                 { .procname = "net",  },
1440                 { .procname = "ipv4", },
1441                 { .procname = "conf", },
1442                 { /* to be set */ },
1443                 { },
1444         };
1445
1446         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1447         if (!t)
1448                 goto out;
1449
1450         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1451                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1452                 t->devinet_vars[i].extra1 = p;
1453                 t->devinet_vars[i].extra2 = net;
1454         }
1455
1456         /*
1457          * Make a copy of dev_name, because '.procname' is regarded as const
1458          * by sysctl and we wouldn't want anyone to change it under our feet
1459          * (see SIOCSIFNAME).
1460          */
1461         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1462         if (!t->dev_name)
1463                 goto free;
1464
1465         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1466
1467         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1468                         t->devinet_vars);
1469         if (!t->sysctl_header)
1470                 goto free_procname;
1471
1472         p->sysctl = t;
1473         return 0;
1474
1475 free_procname:
1476         kfree(t->dev_name);
1477 free:
1478         kfree(t);
1479 out:
1480         return -ENOBUFS;
1481 }
1482
1483 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1484 {
1485         struct devinet_sysctl_table *t = cnf->sysctl;
1486
1487         if (t == NULL)
1488                 return;
1489
1490         cnf->sysctl = NULL;
1491         unregister_sysctl_table(t->sysctl_header);
1492         kfree(t->dev_name);
1493         kfree(t);
1494 }
1495
1496 static void devinet_sysctl_register(struct in_device *idev)
1497 {
1498         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1499         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1500                                         &idev->cnf);
1501 }
1502
1503 static void devinet_sysctl_unregister(struct in_device *idev)
1504 {
1505         __devinet_sysctl_unregister(&idev->cnf);
1506         neigh_sysctl_unregister(idev->arp_parms);
1507 }
1508
1509 static struct ctl_table ctl_forward_entry[] = {
1510         {
1511                 .procname       = "ip_forward",
1512                 .data           = &ipv4_devconf.data[
1513                                         IPV4_DEVCONF_FORWARDING - 1],
1514                 .maxlen         = sizeof(int),
1515                 .mode           = 0644,
1516                 .proc_handler   = devinet_sysctl_forward,
1517                 .extra1         = &ipv4_devconf,
1518                 .extra2         = &init_net,
1519         },
1520         { },
1521 };
1522
1523 static __net_initdata struct ctl_path net_ipv4_path[] = {
1524         { .procname = "net", },
1525         { .procname = "ipv4", },
1526         { },
1527 };
1528 #endif
1529
1530 static __net_init int devinet_init_net(struct net *net)
1531 {
1532         int err;
1533         struct ipv4_devconf *all, *dflt;
1534 #ifdef CONFIG_SYSCTL
1535         struct ctl_table *tbl = ctl_forward_entry;
1536         struct ctl_table_header *forw_hdr;
1537 #endif
1538
1539         err = -ENOMEM;
1540         all = &ipv4_devconf;
1541         dflt = &ipv4_devconf_dflt;
1542
1543         if (!net_eq(net, &init_net)) {
1544                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1545                 if (all == NULL)
1546                         goto err_alloc_all;
1547
1548                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1549                 if (dflt == NULL)
1550                         goto err_alloc_dflt;
1551
1552 #ifdef CONFIG_SYSCTL
1553                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1554                 if (tbl == NULL)
1555                         goto err_alloc_ctl;
1556
1557                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1558                 tbl[0].extra1 = all;
1559                 tbl[0].extra2 = net;
1560 #endif
1561         }
1562
1563 #ifdef CONFIG_SYSCTL
1564         err = __devinet_sysctl_register(net, "all", all);
1565         if (err < 0)
1566                 goto err_reg_all;
1567
1568         err = __devinet_sysctl_register(net, "default", dflt);
1569         if (err < 0)
1570                 goto err_reg_dflt;
1571
1572         err = -ENOMEM;
1573         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1574         if (forw_hdr == NULL)
1575                 goto err_reg_ctl;
1576         net->ipv4.forw_hdr = forw_hdr;
1577 #endif
1578
1579         net->ipv4.devconf_all = all;
1580         net->ipv4.devconf_dflt = dflt;
1581         return 0;
1582
1583 #ifdef CONFIG_SYSCTL
1584 err_reg_ctl:
1585         __devinet_sysctl_unregister(dflt);
1586 err_reg_dflt:
1587         __devinet_sysctl_unregister(all);
1588 err_reg_all:
1589         if (tbl != ctl_forward_entry)
1590                 kfree(tbl);
1591 err_alloc_ctl:
1592 #endif
1593         if (dflt != &ipv4_devconf_dflt)
1594                 kfree(dflt);
1595 err_alloc_dflt:
1596         if (all != &ipv4_devconf)
1597                 kfree(all);
1598 err_alloc_all:
1599         return err;
1600 }
1601
1602 static __net_exit void devinet_exit_net(struct net *net)
1603 {
1604 #ifdef CONFIG_SYSCTL
1605         struct ctl_table *tbl;
1606
1607         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1608         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1609         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1610         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1611         kfree(tbl);
1612 #endif
1613         kfree(net->ipv4.devconf_dflt);
1614         kfree(net->ipv4.devconf_all);
1615 }
1616
1617 static __net_initdata struct pernet_operations devinet_ops = {
1618         .init = devinet_init_net,
1619         .exit = devinet_exit_net,
1620 };
1621
1622 void __init devinet_init(void)
1623 {
1624         register_pernet_subsys(&devinet_ops);
1625
1626         register_gifconf(PF_INET, inet_gifconf);
1627         register_netdevice_notifier(&ip_netdev_notifier);
1628
1629         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1630         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1631         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1632 }
1633