]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/devinet.c
Merge branches 'release' and 'gpe-ack' into release
[net-next-2.6.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 static struct ipv4_devconf ipv4_devconf = {
68         .data = {
69                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83         },
84 };
85
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90         [IFA_LOCAL]             = { .type = NLA_U32 },
91         [IFA_ADDRESS]           = { .type = NLA_U32 },
92         [IFA_BROADCAST]         = { .type = NLA_U32 },
93         [IFA_ANYCAST]           = { .type = NLA_U32 },
94         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101                          int destroy);
102 #ifdef CONFIG_SYSCTL
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
105 #else
106 static inline void devinet_sysctl_register(struct in_device *idev)
107 {
108 }
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
110 {
111 }
112 #endif
113
114 /* Locks all the inet devices. */
115
116 static struct in_ifaddr *inet_alloc_ifa(void)
117 {
118         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119
120         if (ifa) {
121                 INIT_RCU_HEAD(&ifa->rcu_head);
122         }
123
124         return ifa;
125 }
126
127 static void inet_rcu_free_ifa(struct rcu_head *head)
128 {
129         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130         if (ifa->ifa_dev)
131                 in_dev_put(ifa->ifa_dev);
132         kfree(ifa);
133 }
134
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
136 {
137         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138 }
139
140 void in_dev_finish_destroy(struct in_device *idev)
141 {
142         struct net_device *dev = idev->dev;
143
144         BUG_TRAP(!idev->ifa_list);
145         BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148                idev, dev ? dev->name : "NIL");
149 #endif
150         dev_put(dev);
151         if (!idev->dead)
152                 printk("Freeing alive in_device %p\n", idev);
153         else {
154                 kfree(idev);
155         }
156 }
157
158 static struct in_device *inetdev_init(struct net_device *dev)
159 {
160         struct in_device *in_dev;
161
162         ASSERT_RTNL();
163
164         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165         if (!in_dev)
166                 goto out;
167         INIT_RCU_HEAD(&in_dev->rcu_head);
168         memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169                         sizeof(in_dev->cnf));
170         in_dev->cnf.sysctl = NULL;
171         in_dev->dev = dev;
172         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173                 goto out_kfree;
174         /* Reference in_dev->dev */
175         dev_hold(dev);
176         /* Account for reference dev->ip_ptr (below) */
177         in_dev_hold(in_dev);
178
179         devinet_sysctl_register(in_dev);
180         ip_mc_init_dev(in_dev);
181         if (dev->flags & IFF_UP)
182                 ip_mc_up(in_dev);
183
184         /* we can receive as soon as ip_ptr is set -- do this last */
185         rcu_assign_pointer(dev->ip_ptr, in_dev);
186 out:
187         return in_dev;
188 out_kfree:
189         kfree(in_dev);
190         in_dev = NULL;
191         goto out;
192 }
193
194 static void in_dev_rcu_put(struct rcu_head *head)
195 {
196         struct in_device *idev = container_of(head, struct in_device, rcu_head);
197         in_dev_put(idev);
198 }
199
200 static void inetdev_destroy(struct in_device *in_dev)
201 {
202         struct in_ifaddr *ifa;
203         struct net_device *dev;
204
205         ASSERT_RTNL();
206
207         dev = in_dev->dev;
208
209         in_dev->dead = 1;
210
211         ip_mc_destroy_dev(in_dev);
212
213         while ((ifa = in_dev->ifa_list) != NULL) {
214                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215                 inet_free_ifa(ifa);
216         }
217
218         dev->ip_ptr = NULL;
219
220         devinet_sysctl_unregister(in_dev);
221         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222         arp_ifdown(dev);
223
224         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 }
226
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 {
229         rcu_read_lock();
230         for_primary_ifa(in_dev) {
231                 if (inet_ifa_match(a, ifa)) {
232                         if (!b || inet_ifa_match(b, ifa)) {
233                                 rcu_read_unlock();
234                                 return 1;
235                         }
236                 }
237         } endfor_ifa(in_dev);
238         rcu_read_unlock();
239         return 0;
240 }
241
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243                          int destroy, struct nlmsghdr *nlh, u32 pid)
244 {
245         struct in_ifaddr *promote = NULL;
246         struct in_ifaddr *ifa, *ifa1 = *ifap;
247         struct in_ifaddr *last_prim = in_dev->ifa_list;
248         struct in_ifaddr *prev_prom = NULL;
249         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250
251         ASSERT_RTNL();
252
253         /* 1. Deleting primary ifaddr forces deletion all secondaries
254          * unless alias promotion is set
255          **/
256
257         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259
260                 while ((ifa = *ifap1) != NULL) {
261                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262                             ifa1->ifa_scope <= ifa->ifa_scope)
263                                 last_prim = ifa;
264
265                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266                             ifa1->ifa_mask != ifa->ifa_mask ||
267                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
268                                 ifap1 = &ifa->ifa_next;
269                                 prev_prom = ifa;
270                                 continue;
271                         }
272
273                         if (!do_promote) {
274                                 *ifap1 = ifa->ifa_next;
275
276                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277                                 blocking_notifier_call_chain(&inetaddr_chain,
278                                                 NETDEV_DOWN, ifa);
279                                 inet_free_ifa(ifa);
280                         } else {
281                                 promote = ifa;
282                                 break;
283                         }
284                 }
285         }
286
287         /* 2. Unlink it */
288
289         *ifap = ifa1->ifa_next;
290
291         /* 3. Announce address deletion */
292
293         /* Send message first, then call notifier.
294            At first sight, FIB update triggered by notifier
295            will refer to already deleted ifaddr, that could confuse
296            netlink listeners. It is not true: look, gated sees
297            that route deleted and if it still thinks that ifaddr
298            is valid, it will try to restore deleted routes... Grr.
299            So that, this order is correct.
300          */
301         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303
304         if (promote) {
305
306                 if (prev_prom) {
307                         prev_prom->ifa_next = promote->ifa_next;
308                         promote->ifa_next = last_prim->ifa_next;
309                         last_prim->ifa_next = promote;
310                 }
311
312                 promote->ifa_flags &= ~IFA_F_SECONDARY;
313                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314                 blocking_notifier_call_chain(&inetaddr_chain,
315                                 NETDEV_UP, promote);
316                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317                         if (ifa1->ifa_mask != ifa->ifa_mask ||
318                             !inet_ifa_match(ifa1->ifa_address, ifa))
319                                         continue;
320                         fib_add_ifaddr(ifa);
321                 }
322
323         }
324         if (destroy)
325                 inet_free_ifa(ifa1);
326 }
327
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329                          int destroy)
330 {
331         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332 }
333
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335                              u32 pid)
336 {
337         struct in_device *in_dev = ifa->ifa_dev;
338         struct in_ifaddr *ifa1, **ifap, **last_primary;
339
340         ASSERT_RTNL();
341
342         if (!ifa->ifa_local) {
343                 inet_free_ifa(ifa);
344                 return 0;
345         }
346
347         ifa->ifa_flags &= ~IFA_F_SECONDARY;
348         last_primary = &in_dev->ifa_list;
349
350         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351              ifap = &ifa1->ifa_next) {
352                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353                     ifa->ifa_scope <= ifa1->ifa_scope)
354                         last_primary = &ifa1->ifa_next;
355                 if (ifa1->ifa_mask == ifa->ifa_mask &&
356                     inet_ifa_match(ifa1->ifa_address, ifa)) {
357                         if (ifa1->ifa_local == ifa->ifa_local) {
358                                 inet_free_ifa(ifa);
359                                 return -EEXIST;
360                         }
361                         if (ifa1->ifa_scope != ifa->ifa_scope) {
362                                 inet_free_ifa(ifa);
363                                 return -EINVAL;
364                         }
365                         ifa->ifa_flags |= IFA_F_SECONDARY;
366                 }
367         }
368
369         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370                 net_srandom(ifa->ifa_local);
371                 ifap = last_primary;
372         }
373
374         ifa->ifa_next = *ifap;
375         *ifap = ifa;
376
377         /* Send message first, then call notifier.
378            Notifier will trigger FIB update, so that
379            listeners of netlink will know about new ifaddr */
380         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382
383         return 0;
384 }
385
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
387 {
388         return __inet_insert_ifa(ifa, NULL, 0);
389 }
390
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392 {
393         struct in_device *in_dev = __in_dev_get_rtnl(dev);
394
395         ASSERT_RTNL();
396
397         if (!in_dev) {
398                 inet_free_ifa(ifa);
399                 return -ENOBUFS;
400         }
401         ipv4_devconf_setall(in_dev);
402         if (ifa->ifa_dev != in_dev) {
403                 BUG_TRAP(!ifa->ifa_dev);
404                 in_dev_hold(in_dev);
405                 ifa->ifa_dev = in_dev;
406         }
407         if (ipv4_is_loopback(ifa->ifa_local))
408                 ifa->ifa_scope = RT_SCOPE_HOST;
409         return inet_insert_ifa(ifa);
410 }
411
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
413 {
414         struct net_device *dev;
415         struct in_device *in_dev = NULL;
416         read_lock(&dev_base_lock);
417         dev = __dev_get_by_index(net, ifindex);
418         if (dev)
419                 in_dev = in_dev_get(dev);
420         read_unlock(&dev_base_lock);
421         return in_dev;
422 }
423
424 /* Called only from RTNL semaphored context. No locks. */
425
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427                                     __be32 mask)
428 {
429         ASSERT_RTNL();
430
431         for_primary_ifa(in_dev) {
432                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433                         return ifa;
434         } endfor_ifa(in_dev);
435         return NULL;
436 }
437
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439 {
440         struct net *net = skb->sk->sk_net;
441         struct nlattr *tb[IFA_MAX+1];
442         struct in_device *in_dev;
443         struct ifaddrmsg *ifm;
444         struct in_ifaddr *ifa, **ifap;
445         int err = -EINVAL;
446
447         ASSERT_RTNL();
448
449         if (net != &init_net)
450                 return -EINVAL;
451
452         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453         if (err < 0)
454                 goto errout;
455
456         ifm = nlmsg_data(nlh);
457         in_dev = inetdev_by_index(net, ifm->ifa_index);
458         if (in_dev == NULL) {
459                 err = -ENODEV;
460                 goto errout;
461         }
462
463         __in_dev_put(in_dev);
464
465         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
466              ifap = &ifa->ifa_next) {
467                 if (tb[IFA_LOCAL] &&
468                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
469                         continue;
470
471                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
472                         continue;
473
474                 if (tb[IFA_ADDRESS] &&
475                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
476                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
477                         continue;
478
479                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480                 return 0;
481         }
482
483         err = -EADDRNOTAVAIL;
484 errout:
485         return err;
486 }
487
488 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
489 {
490         struct nlattr *tb[IFA_MAX+1];
491         struct in_ifaddr *ifa;
492         struct ifaddrmsg *ifm;
493         struct net_device *dev;
494         struct in_device *in_dev;
495         int err;
496
497         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498         if (err < 0)
499                 goto errout;
500
501         ifm = nlmsg_data(nlh);
502         err = -EINVAL;
503         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
504                 goto errout;
505
506         dev = __dev_get_by_index(net, ifm->ifa_index);
507         err = -ENODEV;
508         if (dev == NULL)
509                 goto errout;
510
511         in_dev = __in_dev_get_rtnl(dev);
512         err = -ENOBUFS;
513         if (in_dev == NULL)
514                 goto errout;
515
516         ifa = inet_alloc_ifa();
517         if (ifa == NULL)
518                 /*
519                  * A potential indev allocation can be left alive, it stays
520                  * assigned to its device and is destroy with it.
521                  */
522                 goto errout;
523
524         ipv4_devconf_setall(in_dev);
525         in_dev_hold(in_dev);
526
527         if (tb[IFA_ADDRESS] == NULL)
528                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
529
530         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
531         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
532         ifa->ifa_flags = ifm->ifa_flags;
533         ifa->ifa_scope = ifm->ifa_scope;
534         ifa->ifa_dev = in_dev;
535
536         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
537         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
538
539         if (tb[IFA_BROADCAST])
540                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
541
542         if (tb[IFA_ANYCAST])
543                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
544
545         if (tb[IFA_LABEL])
546                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
547         else
548                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
549
550         return ifa;
551
552 errout:
553         return ERR_PTR(err);
554 }
555
556 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
557 {
558         struct net *net = skb->sk->sk_net;
559         struct in_ifaddr *ifa;
560
561         ASSERT_RTNL();
562
563         if (net != &init_net)
564                 return -EINVAL;
565
566         ifa = rtm_to_ifaddr(net, nlh);
567         if (IS_ERR(ifa))
568                 return PTR_ERR(ifa);
569
570         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
571 }
572
573 /*
574  *      Determine a default network mask, based on the IP address.
575  */
576
577 static __inline__ int inet_abc_len(__be32 addr)
578 {
579         int rc = -1;    /* Something else, probably a multicast. */
580
581         if (ipv4_is_zeronet(addr))
582                 rc = 0;
583         else {
584                 __u32 haddr = ntohl(addr);
585
586                 if (IN_CLASSA(haddr))
587                         rc = 8;
588                 else if (IN_CLASSB(haddr))
589                         rc = 16;
590                 else if (IN_CLASSC(haddr))
591                         rc = 24;
592         }
593
594         return rc;
595 }
596
597
598 int devinet_ioctl(unsigned int cmd, void __user *arg)
599 {
600         struct ifreq ifr;
601         struct sockaddr_in sin_orig;
602         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
603         struct in_device *in_dev;
604         struct in_ifaddr **ifap = NULL;
605         struct in_ifaddr *ifa = NULL;
606         struct net_device *dev;
607         char *colon;
608         int ret = -EFAULT;
609         int tryaddrmatch = 0;
610
611         /*
612          *      Fetch the caller's info block into kernel space
613          */
614
615         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
616                 goto out;
617         ifr.ifr_name[IFNAMSIZ - 1] = 0;
618
619         /* save original address for comparison */
620         memcpy(&sin_orig, sin, sizeof(*sin));
621
622         colon = strchr(ifr.ifr_name, ':');
623         if (colon)
624                 *colon = 0;
625
626 #ifdef CONFIG_KMOD
627         dev_load(&init_net, ifr.ifr_name);
628 #endif
629
630         switch (cmd) {
631         case SIOCGIFADDR:       /* Get interface address */
632         case SIOCGIFBRDADDR:    /* Get the broadcast address */
633         case SIOCGIFDSTADDR:    /* Get the destination address */
634         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
635                 /* Note that these ioctls will not sleep,
636                    so that we do not impose a lock.
637                    One day we will be forced to put shlock here (I mean SMP)
638                  */
639                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
640                 memset(sin, 0, sizeof(*sin));
641                 sin->sin_family = AF_INET;
642                 break;
643
644         case SIOCSIFFLAGS:
645                 ret = -EACCES;
646                 if (!capable(CAP_NET_ADMIN))
647                         goto out;
648                 break;
649         case SIOCSIFADDR:       /* Set interface address (and family) */
650         case SIOCSIFBRDADDR:    /* Set the broadcast address */
651         case SIOCSIFDSTADDR:    /* Set the destination address */
652         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
653                 ret = -EACCES;
654                 if (!capable(CAP_NET_ADMIN))
655                         goto out;
656                 ret = -EINVAL;
657                 if (sin->sin_family != AF_INET)
658                         goto out;
659                 break;
660         default:
661                 ret = -EINVAL;
662                 goto out;
663         }
664
665         rtnl_lock();
666
667         ret = -ENODEV;
668         if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
669                 goto done;
670
671         if (colon)
672                 *colon = ':';
673
674         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
675                 if (tryaddrmatch) {
676                         /* Matthias Andree */
677                         /* compare label and address (4.4BSD style) */
678                         /* note: we only do this for a limited set of ioctls
679                            and only if the original address family was AF_INET.
680                            This is checked above. */
681                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682                              ifap = &ifa->ifa_next) {
683                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
684                                     sin_orig.sin_addr.s_addr ==
685                                                         ifa->ifa_address) {
686                                         break; /* found */
687                                 }
688                         }
689                 }
690                 /* we didn't get a match, maybe the application is
691                    4.3BSD-style and passed in junk so we fall back to
692                    comparing just the label */
693                 if (!ifa) {
694                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
695                              ifap = &ifa->ifa_next)
696                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
697                                         break;
698                 }
699         }
700
701         ret = -EADDRNOTAVAIL;
702         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
703                 goto done;
704
705         switch (cmd) {
706         case SIOCGIFADDR:       /* Get interface address */
707                 sin->sin_addr.s_addr = ifa->ifa_local;
708                 goto rarok;
709
710         case SIOCGIFBRDADDR:    /* Get the broadcast address */
711                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
712                 goto rarok;
713
714         case SIOCGIFDSTADDR:    /* Get the destination address */
715                 sin->sin_addr.s_addr = ifa->ifa_address;
716                 goto rarok;
717
718         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
719                 sin->sin_addr.s_addr = ifa->ifa_mask;
720                 goto rarok;
721
722         case SIOCSIFFLAGS:
723                 if (colon) {
724                         ret = -EADDRNOTAVAIL;
725                         if (!ifa)
726                                 break;
727                         ret = 0;
728                         if (!(ifr.ifr_flags & IFF_UP))
729                                 inet_del_ifa(in_dev, ifap, 1);
730                         break;
731                 }
732                 ret = dev_change_flags(dev, ifr.ifr_flags);
733                 break;
734
735         case SIOCSIFADDR:       /* Set interface address (and family) */
736                 ret = -EINVAL;
737                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
738                         break;
739
740                 if (!ifa) {
741                         ret = -ENOBUFS;
742                         if ((ifa = inet_alloc_ifa()) == NULL)
743                                 break;
744                         if (colon)
745                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
746                         else
747                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
748                 } else {
749                         ret = 0;
750                         if (ifa->ifa_local == sin->sin_addr.s_addr)
751                                 break;
752                         inet_del_ifa(in_dev, ifap, 0);
753                         ifa->ifa_broadcast = 0;
754                         ifa->ifa_anycast = 0;
755                 }
756
757                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
758
759                 if (!(dev->flags & IFF_POINTOPOINT)) {
760                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
761                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
762                         if ((dev->flags & IFF_BROADCAST) &&
763                             ifa->ifa_prefixlen < 31)
764                                 ifa->ifa_broadcast = ifa->ifa_address |
765                                                      ~ifa->ifa_mask;
766                 } else {
767                         ifa->ifa_prefixlen = 32;
768                         ifa->ifa_mask = inet_make_mask(32);
769                 }
770                 ret = inet_set_ifa(dev, ifa);
771                 break;
772
773         case SIOCSIFBRDADDR:    /* Set the broadcast address */
774                 ret = 0;
775                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
776                         inet_del_ifa(in_dev, ifap, 0);
777                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
778                         inet_insert_ifa(ifa);
779                 }
780                 break;
781
782         case SIOCSIFDSTADDR:    /* Set the destination address */
783                 ret = 0;
784                 if (ifa->ifa_address == sin->sin_addr.s_addr)
785                         break;
786                 ret = -EINVAL;
787                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
788                         break;
789                 ret = 0;
790                 inet_del_ifa(in_dev, ifap, 0);
791                 ifa->ifa_address = sin->sin_addr.s_addr;
792                 inet_insert_ifa(ifa);
793                 break;
794
795         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
796
797                 /*
798                  *      The mask we set must be legal.
799                  */
800                 ret = -EINVAL;
801                 if (bad_mask(sin->sin_addr.s_addr, 0))
802                         break;
803                 ret = 0;
804                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
805                         __be32 old_mask = ifa->ifa_mask;
806                         inet_del_ifa(in_dev, ifap, 0);
807                         ifa->ifa_mask = sin->sin_addr.s_addr;
808                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
809
810                         /* See if current broadcast address matches
811                          * with current netmask, then recalculate
812                          * the broadcast address. Otherwise it's a
813                          * funny address, so don't touch it since
814                          * the user seems to know what (s)he's doing...
815                          */
816                         if ((dev->flags & IFF_BROADCAST) &&
817                             (ifa->ifa_prefixlen < 31) &&
818                             (ifa->ifa_broadcast ==
819                              (ifa->ifa_local|~old_mask))) {
820                                 ifa->ifa_broadcast = (ifa->ifa_local |
821                                                       ~sin->sin_addr.s_addr);
822                         }
823                         inet_insert_ifa(ifa);
824                 }
825                 break;
826         }
827 done:
828         rtnl_unlock();
829 out:
830         return ret;
831 rarok:
832         rtnl_unlock();
833         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
834         goto out;
835 }
836
837 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
838 {
839         struct in_device *in_dev = __in_dev_get_rtnl(dev);
840         struct in_ifaddr *ifa;
841         struct ifreq ifr;
842         int done = 0;
843
844         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
845                 goto out;
846
847         for (; ifa; ifa = ifa->ifa_next) {
848                 if (!buf) {
849                         done += sizeof(ifr);
850                         continue;
851                 }
852                 if (len < (int) sizeof(ifr))
853                         break;
854                 memset(&ifr, 0, sizeof(struct ifreq));
855                 if (ifa->ifa_label)
856                         strcpy(ifr.ifr_name, ifa->ifa_label);
857                 else
858                         strcpy(ifr.ifr_name, dev->name);
859
860                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
861                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
862                                                                 ifa->ifa_local;
863
864                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
865                         done = -EFAULT;
866                         break;
867                 }
868                 buf  += sizeof(struct ifreq);
869                 len  -= sizeof(struct ifreq);
870                 done += sizeof(struct ifreq);
871         }
872 out:
873         return done;
874 }
875
876 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
877 {
878         __be32 addr = 0;
879         struct in_device *in_dev;
880
881         rcu_read_lock();
882         in_dev = __in_dev_get_rcu(dev);
883         if (!in_dev)
884                 goto no_in_dev;
885
886         for_primary_ifa(in_dev) {
887                 if (ifa->ifa_scope > scope)
888                         continue;
889                 if (!dst || inet_ifa_match(dst, ifa)) {
890                         addr = ifa->ifa_local;
891                         break;
892                 }
893                 if (!addr)
894                         addr = ifa->ifa_local;
895         } endfor_ifa(in_dev);
896 no_in_dev:
897         rcu_read_unlock();
898
899         if (addr)
900                 goto out;
901
902         /* Not loopback addresses on loopback should be preferred
903            in this case. It is importnat that lo is the first interface
904            in dev_base list.
905          */
906         read_lock(&dev_base_lock);
907         rcu_read_lock();
908         for_each_netdev(&init_net, dev) {
909                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
910                         continue;
911
912                 for_primary_ifa(in_dev) {
913                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
914                             ifa->ifa_scope <= scope) {
915                                 addr = ifa->ifa_local;
916                                 goto out_unlock_both;
917                         }
918                 } endfor_ifa(in_dev);
919         }
920 out_unlock_both:
921         read_unlock(&dev_base_lock);
922         rcu_read_unlock();
923 out:
924         return addr;
925 }
926
927 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
928                               __be32 local, int scope)
929 {
930         int same = 0;
931         __be32 addr = 0;
932
933         for_ifa(in_dev) {
934                 if (!addr &&
935                     (local == ifa->ifa_local || !local) &&
936                     ifa->ifa_scope <= scope) {
937                         addr = ifa->ifa_local;
938                         if (same)
939                                 break;
940                 }
941                 if (!same) {
942                         same = (!local || inet_ifa_match(local, ifa)) &&
943                                 (!dst || inet_ifa_match(dst, ifa));
944                         if (same && addr) {
945                                 if (local || !dst)
946                                         break;
947                                 /* Is the selected addr into dst subnet? */
948                                 if (inet_ifa_match(addr, ifa))
949                                         break;
950                                 /* No, then can we use new local src? */
951                                 if (ifa->ifa_scope <= scope) {
952                                         addr = ifa->ifa_local;
953                                         break;
954                                 }
955                                 /* search for large dst subnet for addr */
956                                 same = 0;
957                         }
958                 }
959         } endfor_ifa(in_dev);
960
961         return same? addr : 0;
962 }
963
964 /*
965  * Confirm that local IP address exists using wildcards:
966  * - in_dev: only on this interface, 0=any interface
967  * - dst: only in the same subnet as dst, 0=any dst
968  * - local: address, 0=autoselect the local address
969  * - scope: maximum allowed scope value for the local address
970  */
971 __be32 inet_confirm_addr(struct in_device *in_dev,
972                          __be32 dst, __be32 local, int scope)
973 {
974         __be32 addr = 0;
975         struct net_device *dev;
976         struct net *net;
977
978         if (scope != RT_SCOPE_LINK)
979                 return confirm_addr_indev(in_dev, dst, local, scope);
980
981         net = in_dev->dev->nd_net;
982         read_lock(&dev_base_lock);
983         rcu_read_lock();
984         for_each_netdev(net, dev) {
985                 if ((in_dev = __in_dev_get_rcu(dev))) {
986                         addr = confirm_addr_indev(in_dev, dst, local, scope);
987                         if (addr)
988                                 break;
989                 }
990         }
991         rcu_read_unlock();
992         read_unlock(&dev_base_lock);
993
994         return addr;
995 }
996
997 /*
998  *      Device notifier
999  */
1000
1001 int register_inetaddr_notifier(struct notifier_block *nb)
1002 {
1003         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1004 }
1005
1006 int unregister_inetaddr_notifier(struct notifier_block *nb)
1007 {
1008         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1009 }
1010
1011 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1012  * alias numbering and to create unique labels if possible.
1013 */
1014 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1015 {
1016         struct in_ifaddr *ifa;
1017         int named = 0;
1018
1019         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1020                 char old[IFNAMSIZ], *dot;
1021
1022                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1023                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1024                 if (named++ == 0)
1025                         continue;
1026                 dot = strchr(old, ':');
1027                 if (dot == NULL) {
1028                         sprintf(old, ":%d", named);
1029                         dot = old;
1030                 }
1031                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1032                         strcat(ifa->ifa_label, dot);
1033                 } else {
1034                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1035                 }
1036         }
1037 }
1038
1039 /* Called only under RTNL semaphore */
1040
1041 static int inetdev_event(struct notifier_block *this, unsigned long event,
1042                          void *ptr)
1043 {
1044         struct net_device *dev = ptr;
1045         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1046
1047         if (dev->nd_net != &init_net)
1048                 return NOTIFY_DONE;
1049
1050         ASSERT_RTNL();
1051
1052         if (!in_dev) {
1053                 if (event == NETDEV_REGISTER) {
1054                         in_dev = inetdev_init(dev);
1055                         if (!in_dev)
1056                                 return notifier_from_errno(-ENOMEM);
1057                         if (dev->flags & IFF_LOOPBACK) {
1058                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1059                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1060                         }
1061                 }
1062                 goto out;
1063         }
1064
1065         switch (event) {
1066         case NETDEV_REGISTER:
1067                 printk(KERN_DEBUG "inetdev_event: bug\n");
1068                 dev->ip_ptr = NULL;
1069                 break;
1070         case NETDEV_UP:
1071                 if (dev->mtu < 68)
1072                         break;
1073                 if (dev->flags & IFF_LOOPBACK) {
1074                         struct in_ifaddr *ifa;
1075                         if ((ifa = inet_alloc_ifa()) != NULL) {
1076                                 ifa->ifa_local =
1077                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1078                                 ifa->ifa_prefixlen = 8;
1079                                 ifa->ifa_mask = inet_make_mask(8);
1080                                 in_dev_hold(in_dev);
1081                                 ifa->ifa_dev = in_dev;
1082                                 ifa->ifa_scope = RT_SCOPE_HOST;
1083                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084                                 inet_insert_ifa(ifa);
1085                         }
1086                 }
1087                 ip_mc_up(in_dev);
1088                 break;
1089         case NETDEV_DOWN:
1090                 ip_mc_down(in_dev);
1091                 break;
1092         case NETDEV_CHANGEMTU:
1093                 if (dev->mtu >= 68)
1094                         break;
1095                 /* MTU falled under 68, disable IP */
1096         case NETDEV_UNREGISTER:
1097                 inetdev_destroy(in_dev);
1098                 break;
1099         case NETDEV_CHANGENAME:
1100                 /* Do not notify about label change, this event is
1101                  * not interesting to applications using netlink.
1102                  */
1103                 inetdev_changename(dev, in_dev);
1104
1105                 devinet_sysctl_unregister(in_dev);
1106                 devinet_sysctl_register(in_dev);
1107                 break;
1108         }
1109 out:
1110         return NOTIFY_DONE;
1111 }
1112
1113 static struct notifier_block ip_netdev_notifier = {
1114         .notifier_call =inetdev_event,
1115 };
1116
1117 static inline size_t inet_nlmsg_size(void)
1118 {
1119         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1120                + nla_total_size(4) /* IFA_ADDRESS */
1121                + nla_total_size(4) /* IFA_LOCAL */
1122                + nla_total_size(4) /* IFA_BROADCAST */
1123                + nla_total_size(4) /* IFA_ANYCAST */
1124                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1125 }
1126
1127 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1128                             u32 pid, u32 seq, int event, unsigned int flags)
1129 {
1130         struct ifaddrmsg *ifm;
1131         struct nlmsghdr  *nlh;
1132
1133         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1134         if (nlh == NULL)
1135                 return -EMSGSIZE;
1136
1137         ifm = nlmsg_data(nlh);
1138         ifm->ifa_family = AF_INET;
1139         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1140         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1141         ifm->ifa_scope = ifa->ifa_scope;
1142         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1143
1144         if (ifa->ifa_address)
1145                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1146
1147         if (ifa->ifa_local)
1148                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1149
1150         if (ifa->ifa_broadcast)
1151                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1152
1153         if (ifa->ifa_anycast)
1154                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1155
1156         if (ifa->ifa_label[0])
1157                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1158
1159         return nlmsg_end(skb, nlh);
1160
1161 nla_put_failure:
1162         nlmsg_cancel(skb, nlh);
1163         return -EMSGSIZE;
1164 }
1165
1166 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1167 {
1168         struct net *net = skb->sk->sk_net;
1169         int idx, ip_idx;
1170         struct net_device *dev;
1171         struct in_device *in_dev;
1172         struct in_ifaddr *ifa;
1173         int s_ip_idx, s_idx = cb->args[0];
1174
1175         if (net != &init_net)
1176                 return 0;
1177
1178         s_ip_idx = ip_idx = cb->args[1];
1179         idx = 0;
1180         for_each_netdev(net, dev) {
1181                 if (idx < s_idx)
1182                         goto cont;
1183                 if (idx > s_idx)
1184                         s_ip_idx = 0;
1185                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1186                         goto cont;
1187
1188                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1189                      ifa = ifa->ifa_next, ip_idx++) {
1190                         if (ip_idx < s_ip_idx)
1191                                 continue;
1192                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1193                                              cb->nlh->nlmsg_seq,
1194                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1195                                 goto done;
1196                 }
1197 cont:
1198                 idx++;
1199         }
1200
1201 done:
1202         cb->args[0] = idx;
1203         cb->args[1] = ip_idx;
1204
1205         return skb->len;
1206 }
1207
1208 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1209                       u32 pid)
1210 {
1211         struct sk_buff *skb;
1212         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1213         int err = -ENOBUFS;
1214         struct net *net;
1215
1216         net = ifa->ifa_dev->dev->nd_net;
1217         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1218         if (skb == NULL)
1219                 goto errout;
1220
1221         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1222         if (err < 0) {
1223                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1224                 WARN_ON(err == -EMSGSIZE);
1225                 kfree_skb(skb);
1226                 goto errout;
1227         }
1228         err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1229 errout:
1230         if (err < 0)
1231                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1232 }
1233
1234 #ifdef CONFIG_SYSCTL
1235
1236 static void devinet_copy_dflt_conf(struct net *net, int i)
1237 {
1238         struct net_device *dev;
1239
1240         read_lock(&dev_base_lock);
1241         for_each_netdev(net, dev) {
1242                 struct in_device *in_dev;
1243                 rcu_read_lock();
1244                 in_dev = __in_dev_get_rcu(dev);
1245                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1246                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1247                 rcu_read_unlock();
1248         }
1249         read_unlock(&dev_base_lock);
1250 }
1251
1252 static void inet_forward_change(struct net *net)
1253 {
1254         struct net_device *dev;
1255         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1256
1257         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1258         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1259
1260         read_lock(&dev_base_lock);
1261         for_each_netdev(net, dev) {
1262                 struct in_device *in_dev;
1263                 rcu_read_lock();
1264                 in_dev = __in_dev_get_rcu(dev);
1265                 if (in_dev)
1266                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1267                 rcu_read_unlock();
1268         }
1269         read_unlock(&dev_base_lock);
1270
1271         rt_cache_flush(0);
1272 }
1273
1274 static int devinet_conf_proc(ctl_table *ctl, int write,
1275                              struct file* filp, void __user *buffer,
1276                              size_t *lenp, loff_t *ppos)
1277 {
1278         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1279
1280         if (write) {
1281                 struct ipv4_devconf *cnf = ctl->extra1;
1282                 struct net *net = ctl->extra2;
1283                 int i = (int *)ctl->data - cnf->data;
1284
1285                 set_bit(i, cnf->state);
1286
1287                 if (cnf == net->ipv4.devconf_dflt)
1288                         devinet_copy_dflt_conf(net, i);
1289         }
1290
1291         return ret;
1292 }
1293
1294 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1295                                void __user *oldval, size_t __user *oldlenp,
1296                                void __user *newval, size_t newlen)
1297 {
1298         struct ipv4_devconf *cnf;
1299         struct net *net;
1300         int *valp = table->data;
1301         int new;
1302         int i;
1303
1304         if (!newval || !newlen)
1305                 return 0;
1306
1307         if (newlen != sizeof(int))
1308                 return -EINVAL;
1309
1310         if (get_user(new, (int __user *)newval))
1311                 return -EFAULT;
1312
1313         if (new == *valp)
1314                 return 0;
1315
1316         if (oldval && oldlenp) {
1317                 size_t len;
1318
1319                 if (get_user(len, oldlenp))
1320                         return -EFAULT;
1321
1322                 if (len) {
1323                         if (len > table->maxlen)
1324                                 len = table->maxlen;
1325                         if (copy_to_user(oldval, valp, len))
1326                                 return -EFAULT;
1327                         if (put_user(len, oldlenp))
1328                                 return -EFAULT;
1329                 }
1330         }
1331
1332         *valp = new;
1333
1334         cnf = table->extra1;
1335         net = table->extra2;
1336         i = (int *)table->data - cnf->data;
1337
1338         set_bit(i, cnf->state);
1339
1340         if (cnf == net->ipv4.devconf_dflt)
1341                 devinet_copy_dflt_conf(net, i);
1342
1343         return 1;
1344 }
1345
1346 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1347                                   struct file* filp, void __user *buffer,
1348                                   size_t *lenp, loff_t *ppos)
1349 {
1350         int *valp = ctl->data;
1351         int val = *valp;
1352         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1353
1354         if (write && *valp != val) {
1355                 struct net *net = ctl->extra2;
1356
1357                 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1358                         inet_forward_change(net);
1359                 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1360                         rt_cache_flush(0);
1361         }
1362
1363         return ret;
1364 }
1365
1366 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1367                          struct file* filp, void __user *buffer,
1368                          size_t *lenp, loff_t *ppos)
1369 {
1370         int *valp = ctl->data;
1371         int val = *valp;
1372         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1373
1374         if (write && *valp != val)
1375                 rt_cache_flush(0);
1376
1377         return ret;
1378 }
1379
1380 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1381                                   void __user *oldval, size_t __user *oldlenp,
1382                                   void __user *newval, size_t newlen)
1383 {
1384         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1385                                       newval, newlen);
1386
1387         if (ret == 1)
1388                 rt_cache_flush(0);
1389
1390         return ret;
1391 }
1392
1393
1394 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1395         { \
1396                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1397                 .procname       = name, \
1398                 .data           = ipv4_devconf.data + \
1399                                   NET_IPV4_CONF_ ## attr - 1, \
1400                 .maxlen         = sizeof(int), \
1401                 .mode           = mval, \
1402                 .proc_handler   = proc, \
1403                 .strategy       = sysctl, \
1404                 .extra1         = &ipv4_devconf, \
1405         }
1406
1407 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1408         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1409                              devinet_conf_sysctl)
1410
1411 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1412         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1413                              devinet_conf_sysctl)
1414
1415 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1416         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1417
1418 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1419         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1420                                      ipv4_doint_and_flush_strategy)
1421
1422 static struct devinet_sysctl_table {
1423         struct ctl_table_header *sysctl_header;
1424         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1425         char *dev_name;
1426 } devinet_sysctl = {
1427         .devinet_vars = {
1428                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1429                                              devinet_sysctl_forward,
1430                                              devinet_conf_sysctl),
1431                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1432
1433                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1434                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1435                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1436                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1437                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1438                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1439                                         "accept_source_route"),
1440                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1441                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1442                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1443                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1444                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1445                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1446                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1447                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1448                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1449
1450                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1451                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1452                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1453                                               "force_igmp_version"),
1454                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1455                                               "promote_secondaries"),
1456         },
1457 };
1458
1459 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1460                 int ctl_name, struct ipv4_devconf *p)
1461 {
1462         int i;
1463         struct devinet_sysctl_table *t;
1464
1465 #define DEVINET_CTL_PATH_DEV    3
1466
1467         struct ctl_path devinet_ctl_path[] = {
1468                 { .procname = "net", .ctl_name = CTL_NET, },
1469                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1470                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1471                 { /* to be set */ },
1472                 { },
1473         };
1474
1475         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1476         if (!t)
1477                 goto out;
1478
1479         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1480                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1481                 t->devinet_vars[i].extra1 = p;
1482                 t->devinet_vars[i].extra2 = net;
1483         }
1484
1485         /*
1486          * Make a copy of dev_name, because '.procname' is regarded as const
1487          * by sysctl and we wouldn't want anyone to change it under our feet
1488          * (see SIOCSIFNAME).
1489          */
1490         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1491         if (!t->dev_name)
1492                 goto free;
1493
1494         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1495         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1496
1497         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1498                         t->devinet_vars);
1499         if (!t->sysctl_header)
1500                 goto free_procname;
1501
1502         p->sysctl = t;
1503         return 0;
1504
1505 free_procname:
1506         kfree(t->dev_name);
1507 free:
1508         kfree(t);
1509 out:
1510         return -ENOBUFS;
1511 }
1512
1513 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1514 {
1515         struct devinet_sysctl_table *t = cnf->sysctl;
1516
1517         if (t == NULL)
1518                 return;
1519
1520         cnf->sysctl = NULL;
1521         unregister_sysctl_table(t->sysctl_header);
1522         kfree(t->dev_name);
1523         kfree(t);
1524 }
1525
1526 static void devinet_sysctl_register(struct in_device *idev)
1527 {
1528         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1529                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1530         __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1531                         idev->dev->ifindex, &idev->cnf);
1532 }
1533
1534 static void devinet_sysctl_unregister(struct in_device *idev)
1535 {
1536         __devinet_sysctl_unregister(&idev->cnf);
1537         neigh_sysctl_unregister(idev->arp_parms);
1538 }
1539
1540 static struct ctl_table ctl_forward_entry[] = {
1541         {
1542                 .ctl_name       = NET_IPV4_FORWARD,
1543                 .procname       = "ip_forward",
1544                 .data           = &ipv4_devconf.data[
1545                                         NET_IPV4_CONF_FORWARDING - 1],
1546                 .maxlen         = sizeof(int),
1547                 .mode           = 0644,
1548                 .proc_handler   = devinet_sysctl_forward,
1549                 .strategy       = devinet_conf_sysctl,
1550                 .extra1         = &ipv4_devconf,
1551                 .extra2         = &init_net,
1552         },
1553         { },
1554 };
1555
1556 static __net_initdata struct ctl_path net_ipv4_path[] = {
1557         { .procname = "net", .ctl_name = CTL_NET, },
1558         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1559         { },
1560 };
1561 #endif
1562
1563 static __net_init int devinet_init_net(struct net *net)
1564 {
1565         int err;
1566         struct ipv4_devconf *all, *dflt;
1567 #ifdef CONFIG_SYSCTL
1568         struct ctl_table *tbl = ctl_forward_entry;
1569         struct ctl_table_header *forw_hdr;
1570 #endif
1571
1572         err = -ENOMEM;
1573         all = &ipv4_devconf;
1574         dflt = &ipv4_devconf_dflt;
1575
1576         if (net != &init_net) {
1577                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1578                 if (all == NULL)
1579                         goto err_alloc_all;
1580
1581                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1582                 if (dflt == NULL)
1583                         goto err_alloc_dflt;
1584
1585 #ifdef CONFIG_SYSCTL
1586                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1587                 if (tbl == NULL)
1588                         goto err_alloc_ctl;
1589
1590                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1591                 tbl[0].extra1 = all;
1592                 tbl[0].extra2 = net;
1593 #endif
1594         }
1595
1596 #ifdef CONFIG_SYSCTL
1597         err = __devinet_sysctl_register(net, "all",
1598                         NET_PROTO_CONF_ALL, all);
1599         if (err < 0)
1600                 goto err_reg_all;
1601
1602         err = __devinet_sysctl_register(net, "default",
1603                         NET_PROTO_CONF_DEFAULT, dflt);
1604         if (err < 0)
1605                 goto err_reg_dflt;
1606
1607         err = -ENOMEM;
1608         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1609         if (forw_hdr == NULL)
1610                 goto err_reg_ctl;
1611         net->ipv4.forw_hdr = forw_hdr;
1612 #endif
1613
1614         net->ipv4.devconf_all = all;
1615         net->ipv4.devconf_dflt = dflt;
1616         return 0;
1617
1618 #ifdef CONFIG_SYSCTL
1619 err_reg_ctl:
1620         __devinet_sysctl_unregister(dflt);
1621 err_reg_dflt:
1622         __devinet_sysctl_unregister(all);
1623 err_reg_all:
1624         if (tbl != ctl_forward_entry)
1625                 kfree(tbl);
1626 err_alloc_ctl:
1627 #endif
1628         if (dflt != &ipv4_devconf_dflt)
1629                 kfree(dflt);
1630 err_alloc_dflt:
1631         if (all != &ipv4_devconf)
1632                 kfree(all);
1633 err_alloc_all:
1634         return err;
1635 }
1636
1637 static __net_exit void devinet_exit_net(struct net *net)
1638 {
1639 #ifdef CONFIG_SYSCTL
1640         struct ctl_table *tbl;
1641
1642         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1644         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1645         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1646         kfree(tbl);
1647 #endif
1648         kfree(net->ipv4.devconf_dflt);
1649         kfree(net->ipv4.devconf_all);
1650 }
1651
1652 static __net_initdata struct pernet_operations devinet_ops = {
1653         .init = devinet_init_net,
1654         .exit = devinet_exit_net,
1655 };
1656
1657 void __init devinet_init(void)
1658 {
1659         register_pernet_subsys(&devinet_ops);
1660
1661         register_gifconf(PF_INET, inet_gifconf);
1662         register_netdevice_notifier(&ip_netdev_notifier);
1663
1664         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1665         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1666         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1667 }
1668
1669 EXPORT_SYMBOL(in_dev_finish_destroy);
1670 EXPORT_SYMBOL(inet_select_addr);
1671 EXPORT_SYMBOL(inetdev_by_index);
1672 EXPORT_SYMBOL(register_inetaddr_notifier);
1673 EXPORT_SYMBOL(unregister_inetaddr_notifier);