]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/core/neighbour.c
scm: lower SCM_MAX_FD
[net-next-2.6.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK0 NEIGH_PRINTK
45 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
46 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
47
48 #if NEIGH_DEBUG >= 1
49 #undef NEIGH_PRINTK1
50 #define NEIGH_PRINTK1 NEIGH_PRINTK
51 #endif
52 #if NEIGH_DEBUG >= 2
53 #undef NEIGH_PRINTK2
54 #define NEIGH_PRINTK2 NEIGH_PRINTK
55 #endif
56
57 #define PNEIGH_HASHMASK         0xF
58
59 static void neigh_timer_handler(unsigned long arg);
60 static void __neigh_notify(struct neighbour *n, int type, int flags);
61 static void neigh_update_notify(struct neighbour *neigh);
62 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
63
64 static struct neigh_table *neigh_tables;
65 #ifdef CONFIG_PROC_FS
66 static const struct file_operations neigh_stat_seq_fops;
67 #endif
68
69 /*
70    Neighbour hash table buckets are protected with rwlock tbl->lock.
71
72    - All the scans/updates to hash buckets MUST be made under this lock.
73    - NOTHING clever should be made under this lock: no callbacks
74      to protocol backends, no attempts to send something to network.
75      It will result in deadlocks, if backend/driver wants to use neighbour
76      cache.
77    - If the entry requires some non-trivial actions, increase
78      its reference count and release table lock.
79
80    Neighbour entries are protected:
81    - with reference count.
82    - with rwlock neigh->lock
83
84    Reference count prevents destruction.
85
86    neigh->lock mainly serializes ll address data and its validity state.
87    However, the same lock is used to protect another entry fields:
88     - timer
89     - resolution queue
90
91    Again, nothing clever shall be made under neigh->lock,
92    the most complicated procedure, which we allow is dev->hard_header.
93    It is supposed, that dev->hard_header is simplistic and does
94    not make callbacks to neighbour tables.
95
96    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
97    list of neighbour tables. This list is used only in process context,
98  */
99
100 static DEFINE_RWLOCK(neigh_tbl_lock);
101
102 static int neigh_blackhole(struct sk_buff *skb)
103 {
104         kfree_skb(skb);
105         return -ENETDOWN;
106 }
107
108 static void neigh_cleanup_and_release(struct neighbour *neigh)
109 {
110         if (neigh->parms->neigh_cleanup)
111                 neigh->parms->neigh_cleanup(neigh);
112
113         __neigh_notify(neigh, RTM_DELNEIGH, 0);
114         neigh_release(neigh);
115 }
116
117 /*
118  * It is random distribution in the interval (1/2)*base...(3/2)*base.
119  * It corresponds to default IPv6 settings and is not overridable,
120  * because it is really reasonable choice.
121  */
122
123 unsigned long neigh_rand_reach_time(unsigned long base)
124 {
125         return base ? (net_random() % base) + (base >> 1) : 0;
126 }
127 EXPORT_SYMBOL(neigh_rand_reach_time);
128
129
130 static int neigh_forced_gc(struct neigh_table *tbl)
131 {
132         int shrunk = 0;
133         int i;
134         struct neigh_hash_table *nht;
135
136         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
137
138         write_lock_bh(&tbl->lock);
139         nht = rcu_dereference_protected(tbl->nht,
140                                         lockdep_is_held(&tbl->lock));
141         for (i = 0; i <= nht->hash_mask; i++) {
142                 struct neighbour *n;
143                 struct neighbour __rcu **np;
144
145                 np = &nht->hash_buckets[i];
146                 while ((n = rcu_dereference_protected(*np,
147                                         lockdep_is_held(&tbl->lock))) != NULL) {
148                         /* Neighbour record may be discarded if:
149                          * - nobody refers to it.
150                          * - it is not permanent
151                          */
152                         write_lock(&n->lock);
153                         if (atomic_read(&n->refcnt) == 1 &&
154                             !(n->nud_state & NUD_PERMANENT)) {
155                                 rcu_assign_pointer(*np,
156                                         rcu_dereference_protected(n->next,
157                                                   lockdep_is_held(&tbl->lock)));
158                                 n->dead = 1;
159                                 shrunk  = 1;
160                                 write_unlock(&n->lock);
161                                 neigh_cleanup_and_release(n);
162                                 continue;
163                         }
164                         write_unlock(&n->lock);
165                         np = &n->next;
166                 }
167         }
168
169         tbl->last_flush = jiffies;
170
171         write_unlock_bh(&tbl->lock);
172
173         return shrunk;
174 }
175
176 static void neigh_add_timer(struct neighbour *n, unsigned long when)
177 {
178         neigh_hold(n);
179         if (unlikely(mod_timer(&n->timer, when))) {
180                 printk("NEIGH: BUG, double timer add, state is %x\n",
181                        n->nud_state);
182                 dump_stack();
183         }
184 }
185
186 static int neigh_del_timer(struct neighbour *n)
187 {
188         if ((n->nud_state & NUD_IN_TIMER) &&
189             del_timer(&n->timer)) {
190                 neigh_release(n);
191                 return 1;
192         }
193         return 0;
194 }
195
196 static void pneigh_queue_purge(struct sk_buff_head *list)
197 {
198         struct sk_buff *skb;
199
200         while ((skb = skb_dequeue(list)) != NULL) {
201                 dev_put(skb->dev);
202                 kfree_skb(skb);
203         }
204 }
205
206 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
207 {
208         int i;
209         struct neigh_hash_table *nht;
210
211         nht = rcu_dereference_protected(tbl->nht,
212                                         lockdep_is_held(&tbl->lock));
213
214         for (i = 0; i <= nht->hash_mask; i++) {
215                 struct neighbour *n;
216                 struct neighbour __rcu **np = &nht->hash_buckets[i];
217
218                 while ((n = rcu_dereference_protected(*np,
219                                         lockdep_is_held(&tbl->lock))) != NULL) {
220                         if (dev && n->dev != dev) {
221                                 np = &n->next;
222                                 continue;
223                         }
224                         rcu_assign_pointer(*np,
225                                    rcu_dereference_protected(n->next,
226                                                 lockdep_is_held(&tbl->lock)));
227                         write_lock(&n->lock);
228                         neigh_del_timer(n);
229                         n->dead = 1;
230
231                         if (atomic_read(&n->refcnt) != 1) {
232                                 /* The most unpleasant situation.
233                                    We must destroy neighbour entry,
234                                    but someone still uses it.
235
236                                    The destroy will be delayed until
237                                    the last user releases us, but
238                                    we must kill timers etc. and move
239                                    it to safe state.
240                                  */
241                                 skb_queue_purge(&n->arp_queue);
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
292         if (!n)
293                 goto out_entries;
294
295         skb_queue_head_init(&n->arp_queue);
296         rwlock_init(&n->lock);
297         seqlock_init(&n->ha_lock);
298         n->updated        = n->used = now;
299         n->nud_state      = NUD_NONE;
300         n->output         = neigh_blackhole;
301         n->parms          = neigh_parms_clone(&tbl->parms);
302         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
303
304         NEIGH_CACHE_STAT_INC(tbl, allocs);
305         n->tbl            = tbl;
306         atomic_set(&n->refcnt, 1);
307         n->dead           = 1;
308 out:
309         return n;
310
311 out_entries:
312         atomic_dec(&tbl->entries);
313         goto out;
314 }
315
316 static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
317 {
318         size_t size = entries * sizeof(struct neighbour *);
319         struct neigh_hash_table *ret;
320         struct neighbour **buckets;
321
322         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323         if (!ret)
324                 return NULL;
325         if (size <= PAGE_SIZE)
326                 buckets = kzalloc(size, GFP_ATOMIC);
327         else
328                 buckets = (struct neighbour **)
329                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330                                            get_order(size));
331         if (!buckets) {
332                 kfree(ret);
333                 return NULL;
334         }
335         rcu_assign_pointer(ret->hash_buckets, buckets);
336         ret->hash_mask = entries - 1;
337         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
338         return ret;
339 }
340
341 static void neigh_hash_free_rcu(struct rcu_head *head)
342 {
343         struct neigh_hash_table *nht = container_of(head,
344                                                     struct neigh_hash_table,
345                                                     rcu);
346         size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
347         struct neighbour **buckets = nht->hash_buckets;
348
349         if (size <= PAGE_SIZE)
350                 kfree(buckets);
351         else
352                 free_pages((unsigned long)buckets, get_order(size));
353         kfree(nht);
354 }
355
356 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
357                                                 unsigned long new_entries)
358 {
359         unsigned int i, hash;
360         struct neigh_hash_table *new_nht, *old_nht;
361
362         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
363
364         BUG_ON(!is_power_of_2(new_entries));
365         old_nht = rcu_dereference_protected(tbl->nht,
366                                             lockdep_is_held(&tbl->lock));
367         new_nht = neigh_hash_alloc(new_entries);
368         if (!new_nht)
369                 return old_nht;
370
371         for (i = 0; i <= old_nht->hash_mask; i++) {
372                 struct neighbour *n, *next;
373
374                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
375                                                    lockdep_is_held(&tbl->lock));
376                      n != NULL;
377                      n = next) {
378                         hash = tbl->hash(n->primary_key, n->dev,
379                                          new_nht->hash_rnd);
380
381                         hash &= new_nht->hash_mask;
382                         next = rcu_dereference_protected(n->next,
383                                                 lockdep_is_held(&tbl->lock));
384
385                         rcu_assign_pointer(n->next,
386                                            rcu_dereference_protected(
387                                                 new_nht->hash_buckets[hash],
388                                                 lockdep_is_held(&tbl->lock)));
389                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
390                 }
391         }
392
393         rcu_assign_pointer(tbl->nht, new_nht);
394         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395         return new_nht;
396 }
397
398 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
399                                struct net_device *dev)
400 {
401         struct neighbour *n;
402         int key_len = tbl->key_len;
403         u32 hash_val;
404         struct neigh_hash_table *nht;
405
406         NEIGH_CACHE_STAT_INC(tbl, lookups);
407
408         rcu_read_lock_bh();
409         nht = rcu_dereference_bh(tbl->nht);
410         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
411
412         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413              n != NULL;
414              n = rcu_dereference_bh(n->next)) {
415                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416                         if (!atomic_inc_not_zero(&n->refcnt))
417                                 n = NULL;
418                         NEIGH_CACHE_STAT_INC(tbl, hits);
419                         break;
420                 }
421         }
422
423         rcu_read_unlock_bh();
424         return n;
425 }
426 EXPORT_SYMBOL(neigh_lookup);
427
428 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
429                                      const void *pkey)
430 {
431         struct neighbour *n;
432         int key_len = tbl->key_len;
433         u32 hash_val;
434         struct neigh_hash_table *nht;
435
436         NEIGH_CACHE_STAT_INC(tbl, lookups);
437
438         rcu_read_lock_bh();
439         nht = rcu_dereference_bh(tbl->nht);
440         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
441
442         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443              n != NULL;
444              n = rcu_dereference_bh(n->next)) {
445                 if (!memcmp(n->primary_key, pkey, key_len) &&
446                     net_eq(dev_net(n->dev), net)) {
447                         if (!atomic_inc_not_zero(&n->refcnt))
448                                 n = NULL;
449                         NEIGH_CACHE_STAT_INC(tbl, hits);
450                         break;
451                 }
452         }
453
454         rcu_read_unlock_bh();
455         return n;
456 }
457 EXPORT_SYMBOL(neigh_lookup_nodev);
458
459 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
460                                struct net_device *dev)
461 {
462         u32 hash_val;
463         int key_len = tbl->key_len;
464         int error;
465         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466         struct neigh_hash_table *nht;
467
468         if (!n) {
469                 rc = ERR_PTR(-ENOBUFS);
470                 goto out;
471         }
472
473         memcpy(n->primary_key, pkey, key_len);
474         n->dev = dev;
475         dev_hold(dev);
476
477         /* Protocol specific setup. */
478         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
479                 rc = ERR_PTR(error);
480                 goto out_neigh_release;
481         }
482
483         /* Device specific setup. */
484         if (n->parms->neigh_setup &&
485             (error = n->parms->neigh_setup(n)) < 0) {
486                 rc = ERR_PTR(error);
487                 goto out_neigh_release;
488         }
489
490         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
491
492         write_lock_bh(&tbl->lock);
493         nht = rcu_dereference_protected(tbl->nht,
494                                         lockdep_is_held(&tbl->lock));
495
496         if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
497                 nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
498
499         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
500
501         if (n->parms->dead) {
502                 rc = ERR_PTR(-EINVAL);
503                 goto out_tbl_unlock;
504         }
505
506         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507                                             lockdep_is_held(&tbl->lock));
508              n1 != NULL;
509              n1 = rcu_dereference_protected(n1->next,
510                         lockdep_is_held(&tbl->lock))) {
511                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
512                         neigh_hold(n1);
513                         rc = n1;
514                         goto out_tbl_unlock;
515                 }
516         }
517
518         n->dead = 0;
519         neigh_hold(n);
520         rcu_assign_pointer(n->next,
521                            rcu_dereference_protected(nht->hash_buckets[hash_val],
522                                                      lockdep_is_held(&tbl->lock)));
523         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
524         write_unlock_bh(&tbl->lock);
525         NEIGH_PRINTK2("neigh %p is created.\n", n);
526         rc = n;
527 out:
528         return rc;
529 out_tbl_unlock:
530         write_unlock_bh(&tbl->lock);
531 out_neigh_release:
532         neigh_release(n);
533         goto out;
534 }
535 EXPORT_SYMBOL(neigh_create);
536
537 static u32 pneigh_hash(const void *pkey, int key_len)
538 {
539         u32 hash_val = *(u32 *)(pkey + key_len - 4);
540         hash_val ^= (hash_val >> 16);
541         hash_val ^= hash_val >> 8;
542         hash_val ^= hash_val >> 4;
543         hash_val &= PNEIGH_HASHMASK;
544         return hash_val;
545 }
546
547 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
548                                               struct net *net,
549                                               const void *pkey,
550                                               int key_len,
551                                               struct net_device *dev)
552 {
553         while (n) {
554                 if (!memcmp(n->key, pkey, key_len) &&
555                     net_eq(pneigh_net(n), net) &&
556                     (n->dev == dev || !n->dev))
557                         return n;
558                 n = n->next;
559         }
560         return NULL;
561 }
562
563 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
564                 struct net *net, const void *pkey, struct net_device *dev)
565 {
566         int key_len = tbl->key_len;
567         u32 hash_val = pneigh_hash(pkey, key_len);
568
569         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
570                                  net, pkey, key_len, dev);
571 }
572 EXPORT_SYMBOL_GPL(__pneigh_lookup);
573
574 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
575                                     struct net *net, const void *pkey,
576                                     struct net_device *dev, int creat)
577 {
578         struct pneigh_entry *n;
579         int key_len = tbl->key_len;
580         u32 hash_val = pneigh_hash(pkey, key_len);
581
582         read_lock_bh(&tbl->lock);
583         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
584                               net, pkey, key_len, dev);
585         read_unlock_bh(&tbl->lock);
586
587         if (n || !creat)
588                 goto out;
589
590         ASSERT_RTNL();
591
592         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
593         if (!n)
594                 goto out;
595
596         write_pnet(&n->net, hold_net(net));
597         memcpy(n->key, pkey, key_len);
598         n->dev = dev;
599         if (dev)
600                 dev_hold(dev);
601
602         if (tbl->pconstructor && tbl->pconstructor(n)) {
603                 if (dev)
604                         dev_put(dev);
605                 release_net(net);
606                 kfree(n);
607                 n = NULL;
608                 goto out;
609         }
610
611         write_lock_bh(&tbl->lock);
612         n->next = tbl->phash_buckets[hash_val];
613         tbl->phash_buckets[hash_val] = n;
614         write_unlock_bh(&tbl->lock);
615 out:
616         return n;
617 }
618 EXPORT_SYMBOL(pneigh_lookup);
619
620
621 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
622                   struct net_device *dev)
623 {
624         struct pneigh_entry *n, **np;
625         int key_len = tbl->key_len;
626         u32 hash_val = pneigh_hash(pkey, key_len);
627
628         write_lock_bh(&tbl->lock);
629         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
630              np = &n->next) {
631                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632                     net_eq(pneigh_net(n), net)) {
633                         *np = n->next;
634                         write_unlock_bh(&tbl->lock);
635                         if (tbl->pdestructor)
636                                 tbl->pdestructor(n);
637                         if (n->dev)
638                                 dev_put(n->dev);
639                         release_net(pneigh_net(n));
640                         kfree(n);
641                         return 0;
642                 }
643         }
644         write_unlock_bh(&tbl->lock);
645         return -ENOENT;
646 }
647
648 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
649 {
650         struct pneigh_entry *n, **np;
651         u32 h;
652
653         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
654                 np = &tbl->phash_buckets[h];
655                 while ((n = *np) != NULL) {
656                         if (!dev || n->dev == dev) {
657                                 *np = n->next;
658                                 if (tbl->pdestructor)
659                                         tbl->pdestructor(n);
660                                 if (n->dev)
661                                         dev_put(n->dev);
662                                 release_net(pneigh_net(n));
663                                 kfree(n);
664                                 continue;
665                         }
666                         np = &n->next;
667                 }
668         }
669         return -ENOENT;
670 }
671
672 static void neigh_parms_destroy(struct neigh_parms *parms);
673
674 static inline void neigh_parms_put(struct neigh_parms *parms)
675 {
676         if (atomic_dec_and_test(&parms->refcnt))
677                 neigh_parms_destroy(parms);
678 }
679
680 static void neigh_destroy_rcu(struct rcu_head *head)
681 {
682         struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685 }
686 /*
687  *      neighbour must already be out of the table;
688  *
689  */
690 void neigh_destroy(struct neighbour *neigh)
691 {
692         struct hh_cache *hh;
693
694         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
695
696         if (!neigh->dead) {
697                 printk(KERN_WARNING
698                        "Destroying alive neighbour %p\n", neigh);
699                 dump_stack();
700                 return;
701         }
702
703         if (neigh_del_timer(neigh))
704                 printk(KERN_WARNING "Impossible event.\n");
705
706         while ((hh = neigh->hh) != NULL) {
707                 neigh->hh = hh->hh_next;
708                 hh->hh_next = NULL;
709
710                 write_seqlock_bh(&hh->hh_lock);
711                 hh->hh_output = neigh_blackhole;
712                 write_sequnlock_bh(&hh->hh_lock);
713                 hh_cache_put(hh);
714         }
715
716         skb_queue_purge(&neigh->arp_queue);
717
718         dev_put(neigh->dev);
719         neigh_parms_put(neigh->parms);
720
721         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
722
723         atomic_dec(&neigh->tbl->entries);
724         call_rcu(&neigh->rcu, neigh_destroy_rcu);
725 }
726 EXPORT_SYMBOL(neigh_destroy);
727
728 /* Neighbour state is suspicious;
729    disable fast path.
730
731    Called with write_locked neigh.
732  */
733 static void neigh_suspect(struct neighbour *neigh)
734 {
735         struct hh_cache *hh;
736
737         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
738
739         neigh->output = neigh->ops->output;
740
741         for (hh = neigh->hh; hh; hh = hh->hh_next)
742                 hh->hh_output = neigh->ops->output;
743 }
744
745 /* Neighbour state is OK;
746    enable fast path.
747
748    Called with write_locked neigh.
749  */
750 static void neigh_connect(struct neighbour *neigh)
751 {
752         struct hh_cache *hh;
753
754         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
755
756         neigh->output = neigh->ops->connected_output;
757
758         for (hh = neigh->hh; hh; hh = hh->hh_next)
759                 hh->hh_output = neigh->ops->hh_output;
760 }
761
762 static void neigh_periodic_work(struct work_struct *work)
763 {
764         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
765         struct neighbour *n;
766         struct neighbour __rcu **np;
767         unsigned int i;
768         struct neigh_hash_table *nht;
769
770         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
771
772         write_lock_bh(&tbl->lock);
773         nht = rcu_dereference_protected(tbl->nht,
774                                         lockdep_is_held(&tbl->lock));
775
776         /*
777          *      periodically recompute ReachableTime from random function
778          */
779
780         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
781                 struct neigh_parms *p;
782                 tbl->last_rand = jiffies;
783                 for (p = &tbl->parms; p; p = p->next)
784                         p->reachable_time =
785                                 neigh_rand_reach_time(p->base_reachable_time);
786         }
787
788         for (i = 0 ; i <= nht->hash_mask; i++) {
789                 np = &nht->hash_buckets[i];
790
791                 while ((n = rcu_dereference_protected(*np,
792                                 lockdep_is_held(&tbl->lock))) != NULL) {
793                         unsigned int state;
794
795                         write_lock(&n->lock);
796
797                         state = n->nud_state;
798                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
799                                 write_unlock(&n->lock);
800                                 goto next_elt;
801                         }
802
803                         if (time_before(n->used, n->confirmed))
804                                 n->used = n->confirmed;
805
806                         if (atomic_read(&n->refcnt) == 1 &&
807                             (state == NUD_FAILED ||
808                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
809                                 *np = n->next;
810                                 n->dead = 1;
811                                 write_unlock(&n->lock);
812                                 neigh_cleanup_and_release(n);
813                                 continue;
814                         }
815                         write_unlock(&n->lock);
816
817 next_elt:
818                         np = &n->next;
819                 }
820                 /*
821                  * It's fine to release lock here, even if hash table
822                  * grows while we are preempted.
823                  */
824                 write_unlock_bh(&tbl->lock);
825                 cond_resched();
826                 write_lock_bh(&tbl->lock);
827         }
828         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
829          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
830          * base_reachable_time.
831          */
832         schedule_delayed_work(&tbl->gc_work,
833                               tbl->parms.base_reachable_time >> 1);
834         write_unlock_bh(&tbl->lock);
835 }
836
837 static __inline__ int neigh_max_probes(struct neighbour *n)
838 {
839         struct neigh_parms *p = n->parms;
840         return (n->nud_state & NUD_PROBE) ?
841                 p->ucast_probes :
842                 p->ucast_probes + p->app_probes + p->mcast_probes;
843 }
844
845 static void neigh_invalidate(struct neighbour *neigh)
846         __releases(neigh->lock)
847         __acquires(neigh->lock)
848 {
849         struct sk_buff *skb;
850
851         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
852         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
853         neigh->updated = jiffies;
854
855         /* It is very thin place. report_unreachable is very complicated
856            routine. Particularly, it can hit the same neighbour entry!
857
858            So that, we try to be accurate and avoid dead loop. --ANK
859          */
860         while (neigh->nud_state == NUD_FAILED &&
861                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
862                 write_unlock(&neigh->lock);
863                 neigh->ops->error_report(neigh, skb);
864                 write_lock(&neigh->lock);
865         }
866         skb_queue_purge(&neigh->arp_queue);
867 }
868
869 /* Called when a timer expires for a neighbour entry. */
870
871 static void neigh_timer_handler(unsigned long arg)
872 {
873         unsigned long now, next;
874         struct neighbour *neigh = (struct neighbour *)arg;
875         unsigned state;
876         int notify = 0;
877
878         write_lock(&neigh->lock);
879
880         state = neigh->nud_state;
881         now = jiffies;
882         next = now + HZ;
883
884         if (!(state & NUD_IN_TIMER)) {
885 #ifndef CONFIG_SMP
886                 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
887 #endif
888                 goto out;
889         }
890
891         if (state & NUD_REACHABLE) {
892                 if (time_before_eq(now,
893                                    neigh->confirmed + neigh->parms->reachable_time)) {
894                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
895                         next = neigh->confirmed + neigh->parms->reachable_time;
896                 } else if (time_before_eq(now,
897                                           neigh->used + neigh->parms->delay_probe_time)) {
898                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
899                         neigh->nud_state = NUD_DELAY;
900                         neigh->updated = jiffies;
901                         neigh_suspect(neigh);
902                         next = now + neigh->parms->delay_probe_time;
903                 } else {
904                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
905                         neigh->nud_state = NUD_STALE;
906                         neigh->updated = jiffies;
907                         neigh_suspect(neigh);
908                         notify = 1;
909                 }
910         } else if (state & NUD_DELAY) {
911                 if (time_before_eq(now,
912                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
913                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
914                         neigh->nud_state = NUD_REACHABLE;
915                         neigh->updated = jiffies;
916                         neigh_connect(neigh);
917                         notify = 1;
918                         next = neigh->confirmed + neigh->parms->reachable_time;
919                 } else {
920                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
921                         neigh->nud_state = NUD_PROBE;
922                         neigh->updated = jiffies;
923                         atomic_set(&neigh->probes, 0);
924                         next = now + neigh->parms->retrans_time;
925                 }
926         } else {
927                 /* NUD_PROBE|NUD_INCOMPLETE */
928                 next = now + neigh->parms->retrans_time;
929         }
930
931         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
932             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
933                 neigh->nud_state = NUD_FAILED;
934                 notify = 1;
935                 neigh_invalidate(neigh);
936         }
937
938         if (neigh->nud_state & NUD_IN_TIMER) {
939                 if (time_before(next, jiffies + HZ/2))
940                         next = jiffies + HZ/2;
941                 if (!mod_timer(&neigh->timer, next))
942                         neigh_hold(neigh);
943         }
944         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
945                 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
946                 /* keep skb alive even if arp_queue overflows */
947                 if (skb)
948                         skb = skb_copy(skb, GFP_ATOMIC);
949                 write_unlock(&neigh->lock);
950                 neigh->ops->solicit(neigh, skb);
951                 atomic_inc(&neigh->probes);
952                 kfree_skb(skb);
953         } else {
954 out:
955                 write_unlock(&neigh->lock);
956         }
957
958         if (notify)
959                 neigh_update_notify(neigh);
960
961         neigh_release(neigh);
962 }
963
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966         int rc;
967         unsigned long now;
968
969         write_lock_bh(&neigh->lock);
970
971         rc = 0;
972         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973                 goto out_unlock_bh;
974
975         now = jiffies;
976
977         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
978                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
979                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980                         neigh->nud_state     = NUD_INCOMPLETE;
981                         neigh->updated = jiffies;
982                         neigh_add_timer(neigh, now + 1);
983                 } else {
984                         neigh->nud_state = NUD_FAILED;
985                         neigh->updated = jiffies;
986                         write_unlock_bh(&neigh->lock);
987
988                         kfree_skb(skb);
989                         return 1;
990                 }
991         } else if (neigh->nud_state & NUD_STALE) {
992                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
993                 neigh->nud_state = NUD_DELAY;
994                 neigh->updated = jiffies;
995                 neigh_add_timer(neigh,
996                                 jiffies + neigh->parms->delay_probe_time);
997         }
998
999         if (neigh->nud_state == NUD_INCOMPLETE) {
1000                 if (skb) {
1001                         if (skb_queue_len(&neigh->arp_queue) >=
1002                             neigh->parms->queue_len) {
1003                                 struct sk_buff *buff;
1004                                 buff = __skb_dequeue(&neigh->arp_queue);
1005                                 kfree_skb(buff);
1006                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1007                         }
1008                         skb_dst_force(skb);
1009                         __skb_queue_tail(&neigh->arp_queue, skb);
1010                 }
1011                 rc = 1;
1012         }
1013 out_unlock_bh:
1014         write_unlock_bh(&neigh->lock);
1015         return rc;
1016 }
1017 EXPORT_SYMBOL(__neigh_event_send);
1018
1019 static void neigh_update_hhs(const struct neighbour *neigh)
1020 {
1021         struct hh_cache *hh;
1022         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1023                 = NULL;
1024
1025         if (neigh->dev->header_ops)
1026                 update = neigh->dev->header_ops->cache_update;
1027
1028         if (update) {
1029                 for (hh = neigh->hh; hh; hh = hh->hh_next) {
1030                         write_seqlock_bh(&hh->hh_lock);
1031                         update(hh, neigh->dev, neigh->ha);
1032                         write_sequnlock_bh(&hh->hh_lock);
1033                 }
1034         }
1035 }
1036
1037
1038
1039 /* Generic update routine.
1040    -- lladdr is new lladdr or NULL, if it is not supplied.
1041    -- new    is new state.
1042    -- flags
1043         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1044                                 if it is different.
1045         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1046                                 lladdr instead of overriding it
1047                                 if it is different.
1048                                 It also allows to retain current state
1049                                 if lladdr is unchanged.
1050         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1051
1052         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1053                                 NTF_ROUTER flag.
1054         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1055                                 a router.
1056
1057    Caller MUST hold reference count on the entry.
1058  */
1059
1060 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1061                  u32 flags)
1062 {
1063         u8 old;
1064         int err;
1065         int notify = 0;
1066         struct net_device *dev;
1067         int update_isrouter = 0;
1068
1069         write_lock_bh(&neigh->lock);
1070
1071         dev    = neigh->dev;
1072         old    = neigh->nud_state;
1073         err    = -EPERM;
1074
1075         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1076             (old & (NUD_NOARP | NUD_PERMANENT)))
1077                 goto out;
1078
1079         if (!(new & NUD_VALID)) {
1080                 neigh_del_timer(neigh);
1081                 if (old & NUD_CONNECTED)
1082                         neigh_suspect(neigh);
1083                 neigh->nud_state = new;
1084                 err = 0;
1085                 notify = old & NUD_VALID;
1086                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1087                     (new & NUD_FAILED)) {
1088                         neigh_invalidate(neigh);
1089                         notify = 1;
1090                 }
1091                 goto out;
1092         }
1093
1094         /* Compare new lladdr with cached one */
1095         if (!dev->addr_len) {
1096                 /* First case: device needs no address. */
1097                 lladdr = neigh->ha;
1098         } else if (lladdr) {
1099                 /* The second case: if something is already cached
1100                    and a new address is proposed:
1101                    - compare new & old
1102                    - if they are different, check override flag
1103                  */
1104                 if ((old & NUD_VALID) &&
1105                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1106                         lladdr = neigh->ha;
1107         } else {
1108                 /* No address is supplied; if we know something,
1109                    use it, otherwise discard the request.
1110                  */
1111                 err = -EINVAL;
1112                 if (!(old & NUD_VALID))
1113                         goto out;
1114                 lladdr = neigh->ha;
1115         }
1116
1117         if (new & NUD_CONNECTED)
1118                 neigh->confirmed = jiffies;
1119         neigh->updated = jiffies;
1120
1121         /* If entry was valid and address is not changed,
1122            do not change entry state, if new one is STALE.
1123          */
1124         err = 0;
1125         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1126         if (old & NUD_VALID) {
1127                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1128                         update_isrouter = 0;
1129                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1130                             (old & NUD_CONNECTED)) {
1131                                 lladdr = neigh->ha;
1132                                 new = NUD_STALE;
1133                         } else
1134                                 goto out;
1135                 } else {
1136                         if (lladdr == neigh->ha && new == NUD_STALE &&
1137                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1138                              (old & NUD_CONNECTED))
1139                             )
1140                                 new = old;
1141                 }
1142         }
1143
1144         if (new != old) {
1145                 neigh_del_timer(neigh);
1146                 if (new & NUD_IN_TIMER)
1147                         neigh_add_timer(neigh, (jiffies +
1148                                                 ((new & NUD_REACHABLE) ?
1149                                                  neigh->parms->reachable_time :
1150                                                  0)));
1151                 neigh->nud_state = new;
1152         }
1153
1154         if (lladdr != neigh->ha) {
1155                 write_seqlock(&neigh->ha_lock);
1156                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1157                 write_sequnlock(&neigh->ha_lock);
1158                 neigh_update_hhs(neigh);
1159                 if (!(new & NUD_CONNECTED))
1160                         neigh->confirmed = jiffies -
1161                                       (neigh->parms->base_reachable_time << 1);
1162                 notify = 1;
1163         }
1164         if (new == old)
1165                 goto out;
1166         if (new & NUD_CONNECTED)
1167                 neigh_connect(neigh);
1168         else
1169                 neigh_suspect(neigh);
1170         if (!(old & NUD_VALID)) {
1171                 struct sk_buff *skb;
1172
1173                 /* Again: avoid dead loop if something went wrong */
1174
1175                 while (neigh->nud_state & NUD_VALID &&
1176                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1177                         struct neighbour *n1 = neigh;
1178                         write_unlock_bh(&neigh->lock);
1179                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1180                         if (skb_dst(skb) && skb_dst(skb)->neighbour)
1181                                 n1 = skb_dst(skb)->neighbour;
1182                         n1->output(skb);
1183                         write_lock_bh(&neigh->lock);
1184                 }
1185                 skb_queue_purge(&neigh->arp_queue);
1186         }
1187 out:
1188         if (update_isrouter) {
1189                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1190                         (neigh->flags | NTF_ROUTER) :
1191                         (neigh->flags & ~NTF_ROUTER);
1192         }
1193         write_unlock_bh(&neigh->lock);
1194
1195         if (notify)
1196                 neigh_update_notify(neigh);
1197
1198         return err;
1199 }
1200 EXPORT_SYMBOL(neigh_update);
1201
1202 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1203                                  u8 *lladdr, void *saddr,
1204                                  struct net_device *dev)
1205 {
1206         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1207                                                  lladdr || !dev->addr_len);
1208         if (neigh)
1209                 neigh_update(neigh, lladdr, NUD_STALE,
1210                              NEIGH_UPDATE_F_OVERRIDE);
1211         return neigh;
1212 }
1213 EXPORT_SYMBOL(neigh_event_ns);
1214
1215 static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
1216                                    __be16 protocol)
1217 {
1218         struct hh_cache *hh;
1219
1220         smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1221         for (hh = n->hh; hh; hh = hh->hh_next) {
1222                 if (hh->hh_type == protocol) {
1223                         atomic_inc(&hh->hh_refcnt);
1224                         if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1225                                 hh_cache_put(hh);
1226                         return true;
1227                 }
1228         }
1229         return false;
1230 }
1231
1232 /* called with read_lock_bh(&n->lock); */
1233 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1234                           __be16 protocol)
1235 {
1236         struct hh_cache *hh;
1237         struct net_device *dev = dst->dev;
1238
1239         if (likely(neigh_hh_lookup(n, dst, protocol)))
1240                 return;
1241
1242         /* slow path */
1243         hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
1244         if (!hh)
1245                 return;
1246
1247         seqlock_init(&hh->hh_lock);
1248         hh->hh_type = protocol;
1249         atomic_set(&hh->hh_refcnt, 2);
1250
1251         if (dev->header_ops->cache(n, hh)) {
1252                 kfree(hh);
1253                 return;
1254         }
1255
1256         write_lock_bh(&n->lock);
1257
1258         /* must check if another thread already did the insert */
1259         if (neigh_hh_lookup(n, dst, protocol)) {
1260                 kfree(hh);
1261                 goto end;
1262         }
1263
1264         if (n->nud_state & NUD_CONNECTED)
1265                 hh->hh_output = n->ops->hh_output;
1266         else
1267                 hh->hh_output = n->ops->output;
1268
1269         hh->hh_next = n->hh;
1270         smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1271         n->hh       = hh;
1272
1273         if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1274                 hh_cache_put(hh);
1275 end:
1276         write_unlock_bh(&n->lock);
1277 }
1278
1279 /* This function can be used in contexts, where only old dev_queue_xmit
1280  * worked, f.e. if you want to override normal output path (eql, shaper),
1281  * but resolution is not made yet.
1282  */
1283
1284 int neigh_compat_output(struct sk_buff *skb)
1285 {
1286         struct net_device *dev = skb->dev;
1287
1288         __skb_pull(skb, skb_network_offset(skb));
1289
1290         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1291                             skb->len) < 0 &&
1292             dev->header_ops->rebuild(skb))
1293                 return 0;
1294
1295         return dev_queue_xmit(skb);
1296 }
1297 EXPORT_SYMBOL(neigh_compat_output);
1298
1299 /* Slow and careful. */
1300
1301 int neigh_resolve_output(struct sk_buff *skb)
1302 {
1303         struct dst_entry *dst = skb_dst(skb);
1304         struct neighbour *neigh;
1305         int rc = 0;
1306
1307         if (!dst || !(neigh = dst->neighbour))
1308                 goto discard;
1309
1310         __skb_pull(skb, skb_network_offset(skb));
1311
1312         if (!neigh_event_send(neigh, skb)) {
1313                 int err;
1314                 struct net_device *dev = neigh->dev;
1315                 unsigned int seq;
1316
1317                 if (dev->header_ops->cache &&
1318                     !dst->hh &&
1319                     !(dst->flags & DST_NOCACHE))
1320                         neigh_hh_init(neigh, dst, dst->ops->protocol);
1321
1322                 do {
1323                         seq = read_seqbegin(&neigh->ha_lock);
1324                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1325                                               neigh->ha, NULL, skb->len);
1326                 } while (read_seqretry(&neigh->ha_lock, seq));
1327
1328                 if (err >= 0)
1329                         rc = neigh->ops->queue_xmit(skb);
1330                 else
1331                         goto out_kfree_skb;
1332         }
1333 out:
1334         return rc;
1335 discard:
1336         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1337                       dst, dst ? dst->neighbour : NULL);
1338 out_kfree_skb:
1339         rc = -EINVAL;
1340         kfree_skb(skb);
1341         goto out;
1342 }
1343 EXPORT_SYMBOL(neigh_resolve_output);
1344
1345 /* As fast as possible without hh cache */
1346
1347 int neigh_connected_output(struct sk_buff *skb)
1348 {
1349         int err;
1350         struct dst_entry *dst = skb_dst(skb);
1351         struct neighbour *neigh = dst->neighbour;
1352         struct net_device *dev = neigh->dev;
1353         unsigned int seq;
1354
1355         __skb_pull(skb, skb_network_offset(skb));
1356
1357         do {
1358                 seq = read_seqbegin(&neigh->ha_lock);
1359                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1360                                       neigh->ha, NULL, skb->len);
1361         } while (read_seqretry(&neigh->ha_lock, seq));
1362
1363         if (err >= 0)
1364                 err = neigh->ops->queue_xmit(skb);
1365         else {
1366                 err = -EINVAL;
1367                 kfree_skb(skb);
1368         }
1369         return err;
1370 }
1371 EXPORT_SYMBOL(neigh_connected_output);
1372
1373 static void neigh_proxy_process(unsigned long arg)
1374 {
1375         struct neigh_table *tbl = (struct neigh_table *)arg;
1376         long sched_next = 0;
1377         unsigned long now = jiffies;
1378         struct sk_buff *skb, *n;
1379
1380         spin_lock(&tbl->proxy_queue.lock);
1381
1382         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1383                 long tdif = NEIGH_CB(skb)->sched_next - now;
1384
1385                 if (tdif <= 0) {
1386                         struct net_device *dev = skb->dev;
1387                         __skb_unlink(skb, &tbl->proxy_queue);
1388                         if (tbl->proxy_redo && netif_running(dev))
1389                                 tbl->proxy_redo(skb);
1390                         else
1391                                 kfree_skb(skb);
1392
1393                         dev_put(dev);
1394                 } else if (!sched_next || tdif < sched_next)
1395                         sched_next = tdif;
1396         }
1397         del_timer(&tbl->proxy_timer);
1398         if (sched_next)
1399                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1400         spin_unlock(&tbl->proxy_queue.lock);
1401 }
1402
1403 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1404                     struct sk_buff *skb)
1405 {
1406         unsigned long now = jiffies;
1407         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1408
1409         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1410                 kfree_skb(skb);
1411                 return;
1412         }
1413
1414         NEIGH_CB(skb)->sched_next = sched_next;
1415         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1416
1417         spin_lock(&tbl->proxy_queue.lock);
1418         if (del_timer(&tbl->proxy_timer)) {
1419                 if (time_before(tbl->proxy_timer.expires, sched_next))
1420                         sched_next = tbl->proxy_timer.expires;
1421         }
1422         skb_dst_drop(skb);
1423         dev_hold(skb->dev);
1424         __skb_queue_tail(&tbl->proxy_queue, skb);
1425         mod_timer(&tbl->proxy_timer, sched_next);
1426         spin_unlock(&tbl->proxy_queue.lock);
1427 }
1428 EXPORT_SYMBOL(pneigh_enqueue);
1429
1430 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1431                                                       struct net *net, int ifindex)
1432 {
1433         struct neigh_parms *p;
1434
1435         for (p = &tbl->parms; p; p = p->next) {
1436                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1437                     (!p->dev && !ifindex))
1438                         return p;
1439         }
1440
1441         return NULL;
1442 }
1443
1444 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1445                                       struct neigh_table *tbl)
1446 {
1447         struct neigh_parms *p, *ref;
1448         struct net *net = dev_net(dev);
1449         const struct net_device_ops *ops = dev->netdev_ops;
1450
1451         ref = lookup_neigh_parms(tbl, net, 0);
1452         if (!ref)
1453                 return NULL;
1454
1455         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1456         if (p) {
1457                 p->tbl            = tbl;
1458                 atomic_set(&p->refcnt, 1);
1459                 p->reachable_time =
1460                                 neigh_rand_reach_time(p->base_reachable_time);
1461
1462                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1463                         kfree(p);
1464                         return NULL;
1465                 }
1466
1467                 dev_hold(dev);
1468                 p->dev = dev;
1469                 write_pnet(&p->net, hold_net(net));
1470                 p->sysctl_table = NULL;
1471                 write_lock_bh(&tbl->lock);
1472                 p->next         = tbl->parms.next;
1473                 tbl->parms.next = p;
1474                 write_unlock_bh(&tbl->lock);
1475         }
1476         return p;
1477 }
1478 EXPORT_SYMBOL(neigh_parms_alloc);
1479
1480 static void neigh_rcu_free_parms(struct rcu_head *head)
1481 {
1482         struct neigh_parms *parms =
1483                 container_of(head, struct neigh_parms, rcu_head);
1484
1485         neigh_parms_put(parms);
1486 }
1487
1488 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1489 {
1490         struct neigh_parms **p;
1491
1492         if (!parms || parms == &tbl->parms)
1493                 return;
1494         write_lock_bh(&tbl->lock);
1495         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1496                 if (*p == parms) {
1497                         *p = parms->next;
1498                         parms->dead = 1;
1499                         write_unlock_bh(&tbl->lock);
1500                         if (parms->dev)
1501                                 dev_put(parms->dev);
1502                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1503                         return;
1504                 }
1505         }
1506         write_unlock_bh(&tbl->lock);
1507         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1508 }
1509 EXPORT_SYMBOL(neigh_parms_release);
1510
1511 static void neigh_parms_destroy(struct neigh_parms *parms)
1512 {
1513         release_net(neigh_parms_net(parms));
1514         kfree(parms);
1515 }
1516
1517 static struct lock_class_key neigh_table_proxy_queue_class;
1518
1519 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1520 {
1521         unsigned long now = jiffies;
1522         unsigned long phsize;
1523
1524         write_pnet(&tbl->parms.net, &init_net);
1525         atomic_set(&tbl->parms.refcnt, 1);
1526         tbl->parms.reachable_time =
1527                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1528
1529         if (!tbl->kmem_cachep)
1530                 tbl->kmem_cachep =
1531                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1532                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1533                                           NULL);
1534         tbl->stats = alloc_percpu(struct neigh_statistics);
1535         if (!tbl->stats)
1536                 panic("cannot create neighbour cache statistics");
1537
1538 #ifdef CONFIG_PROC_FS
1539         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1540                               &neigh_stat_seq_fops, tbl))
1541                 panic("cannot create neighbour proc dir entry");
1542 #endif
1543
1544         tbl->nht = neigh_hash_alloc(8);
1545
1546         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1547         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1548
1549         if (!tbl->nht || !tbl->phash_buckets)
1550                 panic("cannot allocate neighbour cache hashes");
1551
1552         rwlock_init(&tbl->lock);
1553         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1554         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1555         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1556         skb_queue_head_init_class(&tbl->proxy_queue,
1557                         &neigh_table_proxy_queue_class);
1558
1559         tbl->last_flush = now;
1560         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1561 }
1562 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1563
1564 void neigh_table_init(struct neigh_table *tbl)
1565 {
1566         struct neigh_table *tmp;
1567
1568         neigh_table_init_no_netlink(tbl);
1569         write_lock(&neigh_tbl_lock);
1570         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1571                 if (tmp->family == tbl->family)
1572                         break;
1573         }
1574         tbl->next       = neigh_tables;
1575         neigh_tables    = tbl;
1576         write_unlock(&neigh_tbl_lock);
1577
1578         if (unlikely(tmp)) {
1579                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1580                        "family %d\n", tbl->family);
1581                 dump_stack();
1582         }
1583 }
1584 EXPORT_SYMBOL(neigh_table_init);
1585
1586 int neigh_table_clear(struct neigh_table *tbl)
1587 {
1588         struct neigh_table **tp;
1589
1590         /* It is not clean... Fix it to unload IPv6 module safely */
1591         cancel_delayed_work_sync(&tbl->gc_work);
1592         del_timer_sync(&tbl->proxy_timer);
1593         pneigh_queue_purge(&tbl->proxy_queue);
1594         neigh_ifdown(tbl, NULL);
1595         if (atomic_read(&tbl->entries))
1596                 printk(KERN_CRIT "neighbour leakage\n");
1597         write_lock(&neigh_tbl_lock);
1598         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1599                 if (*tp == tbl) {
1600                         *tp = tbl->next;
1601                         break;
1602                 }
1603         }
1604         write_unlock(&neigh_tbl_lock);
1605
1606         call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
1607         tbl->nht = NULL;
1608
1609         kfree(tbl->phash_buckets);
1610         tbl->phash_buckets = NULL;
1611
1612         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1613
1614         free_percpu(tbl->stats);
1615         tbl->stats = NULL;
1616
1617         kmem_cache_destroy(tbl->kmem_cachep);
1618         tbl->kmem_cachep = NULL;
1619
1620         return 0;
1621 }
1622 EXPORT_SYMBOL(neigh_table_clear);
1623
1624 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1625 {
1626         struct net *net = sock_net(skb->sk);
1627         struct ndmsg *ndm;
1628         struct nlattr *dst_attr;
1629         struct neigh_table *tbl;
1630         struct net_device *dev = NULL;
1631         int err = -EINVAL;
1632
1633         ASSERT_RTNL();
1634         if (nlmsg_len(nlh) < sizeof(*ndm))
1635                 goto out;
1636
1637         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1638         if (dst_attr == NULL)
1639                 goto out;
1640
1641         ndm = nlmsg_data(nlh);
1642         if (ndm->ndm_ifindex) {
1643                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1644                 if (dev == NULL) {
1645                         err = -ENODEV;
1646                         goto out;
1647                 }
1648         }
1649
1650         read_lock(&neigh_tbl_lock);
1651         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1652                 struct neighbour *neigh;
1653
1654                 if (tbl->family != ndm->ndm_family)
1655                         continue;
1656                 read_unlock(&neigh_tbl_lock);
1657
1658                 if (nla_len(dst_attr) < tbl->key_len)
1659                         goto out;
1660
1661                 if (ndm->ndm_flags & NTF_PROXY) {
1662                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1663                         goto out;
1664                 }
1665
1666                 if (dev == NULL)
1667                         goto out;
1668
1669                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1670                 if (neigh == NULL) {
1671                         err = -ENOENT;
1672                         goto out;
1673                 }
1674
1675                 err = neigh_update(neigh, NULL, NUD_FAILED,
1676                                    NEIGH_UPDATE_F_OVERRIDE |
1677                                    NEIGH_UPDATE_F_ADMIN);
1678                 neigh_release(neigh);
1679                 goto out;
1680         }
1681         read_unlock(&neigh_tbl_lock);
1682         err = -EAFNOSUPPORT;
1683
1684 out:
1685         return err;
1686 }
1687
1688 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1689 {
1690         struct net *net = sock_net(skb->sk);
1691         struct ndmsg *ndm;
1692         struct nlattr *tb[NDA_MAX+1];
1693         struct neigh_table *tbl;
1694         struct net_device *dev = NULL;
1695         int err;
1696
1697         ASSERT_RTNL();
1698         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1699         if (err < 0)
1700                 goto out;
1701
1702         err = -EINVAL;
1703         if (tb[NDA_DST] == NULL)
1704                 goto out;
1705
1706         ndm = nlmsg_data(nlh);
1707         if (ndm->ndm_ifindex) {
1708                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1709                 if (dev == NULL) {
1710                         err = -ENODEV;
1711                         goto out;
1712                 }
1713
1714                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1715                         goto out;
1716         }
1717
1718         read_lock(&neigh_tbl_lock);
1719         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1720                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1721                 struct neighbour *neigh;
1722                 void *dst, *lladdr;
1723
1724                 if (tbl->family != ndm->ndm_family)
1725                         continue;
1726                 read_unlock(&neigh_tbl_lock);
1727
1728                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1729                         goto out;
1730                 dst = nla_data(tb[NDA_DST]);
1731                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1732
1733                 if (ndm->ndm_flags & NTF_PROXY) {
1734                         struct pneigh_entry *pn;
1735
1736                         err = -ENOBUFS;
1737                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1738                         if (pn) {
1739                                 pn->flags = ndm->ndm_flags;
1740                                 err = 0;
1741                         }
1742                         goto out;
1743                 }
1744
1745                 if (dev == NULL)
1746                         goto out;
1747
1748                 neigh = neigh_lookup(tbl, dst, dev);
1749                 if (neigh == NULL) {
1750                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1751                                 err = -ENOENT;
1752                                 goto out;
1753                         }
1754
1755                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1756                         if (IS_ERR(neigh)) {
1757                                 err = PTR_ERR(neigh);
1758                                 goto out;
1759                         }
1760                 } else {
1761                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1762                                 err = -EEXIST;
1763                                 neigh_release(neigh);
1764                                 goto out;
1765                         }
1766
1767                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1768                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1769                 }
1770
1771                 if (ndm->ndm_flags & NTF_USE) {
1772                         neigh_event_send(neigh, NULL);
1773                         err = 0;
1774                 } else
1775                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1776                 neigh_release(neigh);
1777                 goto out;
1778         }
1779
1780         read_unlock(&neigh_tbl_lock);
1781         err = -EAFNOSUPPORT;
1782 out:
1783         return err;
1784 }
1785
1786 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1787 {
1788         struct nlattr *nest;
1789
1790         nest = nla_nest_start(skb, NDTA_PARMS);
1791         if (nest == NULL)
1792                 return -ENOBUFS;
1793
1794         if (parms->dev)
1795                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1796
1797         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1798         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1799         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1800         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1801         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1802         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1803         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1804         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1805                       parms->base_reachable_time);
1806         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1807         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1808         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1809         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1810         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1811         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1812
1813         return nla_nest_end(skb, nest);
1814
1815 nla_put_failure:
1816         nla_nest_cancel(skb, nest);
1817         return -EMSGSIZE;
1818 }
1819
1820 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1821                               u32 pid, u32 seq, int type, int flags)
1822 {
1823         struct nlmsghdr *nlh;
1824         struct ndtmsg *ndtmsg;
1825
1826         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1827         if (nlh == NULL)
1828                 return -EMSGSIZE;
1829
1830         ndtmsg = nlmsg_data(nlh);
1831
1832         read_lock_bh(&tbl->lock);
1833         ndtmsg->ndtm_family = tbl->family;
1834         ndtmsg->ndtm_pad1   = 0;
1835         ndtmsg->ndtm_pad2   = 0;
1836
1837         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1838         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1839         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1840         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1841         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1842
1843         {
1844                 unsigned long now = jiffies;
1845                 unsigned int flush_delta = now - tbl->last_flush;
1846                 unsigned int rand_delta = now - tbl->last_rand;
1847                 struct neigh_hash_table *nht;
1848                 struct ndt_config ndc = {
1849                         .ndtc_key_len           = tbl->key_len,
1850                         .ndtc_entry_size        = tbl->entry_size,
1851                         .ndtc_entries           = atomic_read(&tbl->entries),
1852                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1853                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1854                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1855                 };
1856
1857                 rcu_read_lock_bh();
1858                 nht = rcu_dereference_bh(tbl->nht);
1859                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1860                 ndc.ndtc_hash_mask = nht->hash_mask;
1861                 rcu_read_unlock_bh();
1862
1863                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1864         }
1865
1866         {
1867                 int cpu;
1868                 struct ndt_stats ndst;
1869
1870                 memset(&ndst, 0, sizeof(ndst));
1871
1872                 for_each_possible_cpu(cpu) {
1873                         struct neigh_statistics *st;
1874
1875                         st = per_cpu_ptr(tbl->stats, cpu);
1876                         ndst.ndts_allocs                += st->allocs;
1877                         ndst.ndts_destroys              += st->destroys;
1878                         ndst.ndts_hash_grows            += st->hash_grows;
1879                         ndst.ndts_res_failed            += st->res_failed;
1880                         ndst.ndts_lookups               += st->lookups;
1881                         ndst.ndts_hits                  += st->hits;
1882                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1883                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1884                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1885                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1886                 }
1887
1888                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1889         }
1890
1891         BUG_ON(tbl->parms.dev);
1892         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1893                 goto nla_put_failure;
1894
1895         read_unlock_bh(&tbl->lock);
1896         return nlmsg_end(skb, nlh);
1897
1898 nla_put_failure:
1899         read_unlock_bh(&tbl->lock);
1900         nlmsg_cancel(skb, nlh);
1901         return -EMSGSIZE;
1902 }
1903
1904 static int neightbl_fill_param_info(struct sk_buff *skb,
1905                                     struct neigh_table *tbl,
1906                                     struct neigh_parms *parms,
1907                                     u32 pid, u32 seq, int type,
1908                                     unsigned int flags)
1909 {
1910         struct ndtmsg *ndtmsg;
1911         struct nlmsghdr *nlh;
1912
1913         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1914         if (nlh == NULL)
1915                 return -EMSGSIZE;
1916
1917         ndtmsg = nlmsg_data(nlh);
1918
1919         read_lock_bh(&tbl->lock);
1920         ndtmsg->ndtm_family = tbl->family;
1921         ndtmsg->ndtm_pad1   = 0;
1922         ndtmsg->ndtm_pad2   = 0;
1923
1924         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1925             neightbl_fill_parms(skb, parms) < 0)
1926                 goto errout;
1927
1928         read_unlock_bh(&tbl->lock);
1929         return nlmsg_end(skb, nlh);
1930 errout:
1931         read_unlock_bh(&tbl->lock);
1932         nlmsg_cancel(skb, nlh);
1933         return -EMSGSIZE;
1934 }
1935
1936 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1937         [NDTA_NAME]             = { .type = NLA_STRING },
1938         [NDTA_THRESH1]          = { .type = NLA_U32 },
1939         [NDTA_THRESH2]          = { .type = NLA_U32 },
1940         [NDTA_THRESH3]          = { .type = NLA_U32 },
1941         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1942         [NDTA_PARMS]            = { .type = NLA_NESTED },
1943 };
1944
1945 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1946         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1947         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1948         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1949         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1950         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1951         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1952         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1953         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1954         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1955         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1956         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1957         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1958         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1959 };
1960
1961 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1962 {
1963         struct net *net = sock_net(skb->sk);
1964         struct neigh_table *tbl;
1965         struct ndtmsg *ndtmsg;
1966         struct nlattr *tb[NDTA_MAX+1];
1967         int err;
1968
1969         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1970                           nl_neightbl_policy);
1971         if (err < 0)
1972                 goto errout;
1973
1974         if (tb[NDTA_NAME] == NULL) {
1975                 err = -EINVAL;
1976                 goto errout;
1977         }
1978
1979         ndtmsg = nlmsg_data(nlh);
1980         read_lock(&neigh_tbl_lock);
1981         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1982                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1983                         continue;
1984
1985                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1986                         break;
1987         }
1988
1989         if (tbl == NULL) {
1990                 err = -ENOENT;
1991                 goto errout_locked;
1992         }
1993
1994         /*
1995          * We acquire tbl->lock to be nice to the periodic timers and
1996          * make sure they always see a consistent set of values.
1997          */
1998         write_lock_bh(&tbl->lock);
1999
2000         if (tb[NDTA_PARMS]) {
2001                 struct nlattr *tbp[NDTPA_MAX+1];
2002                 struct neigh_parms *p;
2003                 int i, ifindex = 0;
2004
2005                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2006                                        nl_ntbl_parm_policy);
2007                 if (err < 0)
2008                         goto errout_tbl_lock;
2009
2010                 if (tbp[NDTPA_IFINDEX])
2011                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2012
2013                 p = lookup_neigh_parms(tbl, net, ifindex);
2014                 if (p == NULL) {
2015                         err = -ENOENT;
2016                         goto errout_tbl_lock;
2017                 }
2018
2019                 for (i = 1; i <= NDTPA_MAX; i++) {
2020                         if (tbp[i] == NULL)
2021                                 continue;
2022
2023                         switch (i) {
2024                         case NDTPA_QUEUE_LEN:
2025                                 p->queue_len = nla_get_u32(tbp[i]);
2026                                 break;
2027                         case NDTPA_PROXY_QLEN:
2028                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2029                                 break;
2030                         case NDTPA_APP_PROBES:
2031                                 p->app_probes = nla_get_u32(tbp[i]);
2032                                 break;
2033                         case NDTPA_UCAST_PROBES:
2034                                 p->ucast_probes = nla_get_u32(tbp[i]);
2035                                 break;
2036                         case NDTPA_MCAST_PROBES:
2037                                 p->mcast_probes = nla_get_u32(tbp[i]);
2038                                 break;
2039                         case NDTPA_BASE_REACHABLE_TIME:
2040                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2041                                 break;
2042                         case NDTPA_GC_STALETIME:
2043                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2044                                 break;
2045                         case NDTPA_DELAY_PROBE_TIME:
2046                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2047                                 break;
2048                         case NDTPA_RETRANS_TIME:
2049                                 p->retrans_time = nla_get_msecs(tbp[i]);
2050                                 break;
2051                         case NDTPA_ANYCAST_DELAY:
2052                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2053                                 break;
2054                         case NDTPA_PROXY_DELAY:
2055                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2056                                 break;
2057                         case NDTPA_LOCKTIME:
2058                                 p->locktime = nla_get_msecs(tbp[i]);
2059                                 break;
2060                         }
2061                 }
2062         }
2063
2064         if (tb[NDTA_THRESH1])
2065                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2066
2067         if (tb[NDTA_THRESH2])
2068                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2069
2070         if (tb[NDTA_THRESH3])
2071                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2072
2073         if (tb[NDTA_GC_INTERVAL])
2074                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2075
2076         err = 0;
2077
2078 errout_tbl_lock:
2079         write_unlock_bh(&tbl->lock);
2080 errout_locked:
2081         read_unlock(&neigh_tbl_lock);
2082 errout:
2083         return err;
2084 }
2085
2086 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2087 {
2088         struct net *net = sock_net(skb->sk);
2089         int family, tidx, nidx = 0;
2090         int tbl_skip = cb->args[0];
2091         int neigh_skip = cb->args[1];
2092         struct neigh_table *tbl;
2093
2094         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2095
2096         read_lock(&neigh_tbl_lock);
2097         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2098                 struct neigh_parms *p;
2099
2100                 if (tidx < tbl_skip || (family && tbl->family != family))
2101                         continue;
2102
2103                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2104                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2105                                        NLM_F_MULTI) <= 0)
2106                         break;
2107
2108                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2109                         if (!net_eq(neigh_parms_net(p), net))
2110                                 continue;
2111
2112                         if (nidx < neigh_skip)
2113                                 goto next;
2114
2115                         if (neightbl_fill_param_info(skb, tbl, p,
2116                                                      NETLINK_CB(cb->skb).pid,
2117                                                      cb->nlh->nlmsg_seq,
2118                                                      RTM_NEWNEIGHTBL,
2119                                                      NLM_F_MULTI) <= 0)
2120                                 goto out;
2121                 next:
2122                         nidx++;
2123                 }
2124
2125                 neigh_skip = 0;
2126         }
2127 out:
2128         read_unlock(&neigh_tbl_lock);
2129         cb->args[0] = tidx;
2130         cb->args[1] = nidx;
2131
2132         return skb->len;
2133 }
2134
2135 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2136                            u32 pid, u32 seq, int type, unsigned int flags)
2137 {
2138         unsigned long now = jiffies;
2139         struct nda_cacheinfo ci;
2140         struct nlmsghdr *nlh;
2141         struct ndmsg *ndm;
2142
2143         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2144         if (nlh == NULL)
2145                 return -EMSGSIZE;
2146
2147         ndm = nlmsg_data(nlh);
2148         ndm->ndm_family  = neigh->ops->family;
2149         ndm->ndm_pad1    = 0;
2150         ndm->ndm_pad2    = 0;
2151         ndm->ndm_flags   = neigh->flags;
2152         ndm->ndm_type    = neigh->type;
2153         ndm->ndm_ifindex = neigh->dev->ifindex;
2154
2155         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2156
2157         read_lock_bh(&neigh->lock);
2158         ndm->ndm_state   = neigh->nud_state;
2159         if (neigh->nud_state & NUD_VALID) {
2160                 char haddr[MAX_ADDR_LEN];
2161
2162                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164                         read_unlock_bh(&neigh->lock);
2165                         goto nla_put_failure;
2166                 }
2167         }
2168
2169         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2170         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2171         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2172         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2173         read_unlock_bh(&neigh->lock);
2174
2175         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2176         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2177
2178         return nlmsg_end(skb, nlh);
2179
2180 nla_put_failure:
2181         nlmsg_cancel(skb, nlh);
2182         return -EMSGSIZE;
2183 }
2184
2185 static void neigh_update_notify(struct neighbour *neigh)
2186 {
2187         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2188         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2189 }
2190
2191 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2192                             struct netlink_callback *cb)
2193 {
2194         struct net *net = sock_net(skb->sk);
2195         struct neighbour *n;
2196         int rc, h, s_h = cb->args[1];
2197         int idx, s_idx = idx = cb->args[2];
2198         struct neigh_hash_table *nht;
2199
2200         rcu_read_lock_bh();
2201         nht = rcu_dereference_bh(tbl->nht);
2202
2203         for (h = 0; h <= nht->hash_mask; h++) {
2204                 if (h < s_h)
2205                         continue;
2206                 if (h > s_h)
2207                         s_idx = 0;
2208                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2209                      n != NULL;
2210                      n = rcu_dereference_bh(n->next)) {
2211                         if (!net_eq(dev_net(n->dev), net))
2212                                 continue;
2213                         if (idx < s_idx)
2214                                 goto next;
2215                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2216                                             cb->nlh->nlmsg_seq,
2217                                             RTM_NEWNEIGH,
2218                                             NLM_F_MULTI) <= 0) {
2219                                 rc = -1;
2220                                 goto out;
2221                         }
2222 next:
2223                         idx++;
2224                 }
2225         }
2226         rc = skb->len;
2227 out:
2228         rcu_read_unlock_bh();
2229         cb->args[1] = h;
2230         cb->args[2] = idx;
2231         return rc;
2232 }
2233
2234 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2235 {
2236         struct neigh_table *tbl;
2237         int t, family, s_t;
2238
2239         read_lock(&neigh_tbl_lock);
2240         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2241         s_t = cb->args[0];
2242
2243         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2244                 if (t < s_t || (family && tbl->family != family))
2245                         continue;
2246                 if (t > s_t)
2247                         memset(&cb->args[1], 0, sizeof(cb->args) -
2248                                                 sizeof(cb->args[0]));
2249                 if (neigh_dump_table(tbl, skb, cb) < 0)
2250                         break;
2251         }
2252         read_unlock(&neigh_tbl_lock);
2253
2254         cb->args[0] = t;
2255         return skb->len;
2256 }
2257
2258 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2259 {
2260         int chain;
2261         struct neigh_hash_table *nht;
2262
2263         rcu_read_lock_bh();
2264         nht = rcu_dereference_bh(tbl->nht);
2265
2266         read_lock(&tbl->lock); /* avoid resizes */
2267         for (chain = 0; chain <= nht->hash_mask; chain++) {
2268                 struct neighbour *n;
2269
2270                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2271                      n != NULL;
2272                      n = rcu_dereference_bh(n->next))
2273                         cb(n, cookie);
2274         }
2275         read_unlock(&tbl->lock);
2276         rcu_read_unlock_bh();
2277 }
2278 EXPORT_SYMBOL(neigh_for_each);
2279
2280 /* The tbl->lock must be held as a writer and BH disabled. */
2281 void __neigh_for_each_release(struct neigh_table *tbl,
2282                               int (*cb)(struct neighbour *))
2283 {
2284         int chain;
2285         struct neigh_hash_table *nht;
2286
2287         nht = rcu_dereference_protected(tbl->nht,
2288                                         lockdep_is_held(&tbl->lock));
2289         for (chain = 0; chain <= nht->hash_mask; chain++) {
2290                 struct neighbour *n;
2291                 struct neighbour __rcu **np;
2292
2293                 np = &nht->hash_buckets[chain];
2294                 while ((n = rcu_dereference_protected(*np,
2295                                         lockdep_is_held(&tbl->lock))) != NULL) {
2296                         int release;
2297
2298                         write_lock(&n->lock);
2299                         release = cb(n);
2300                         if (release) {
2301                                 rcu_assign_pointer(*np,
2302                                         rcu_dereference_protected(n->next,
2303                                                 lockdep_is_held(&tbl->lock)));
2304                                 n->dead = 1;
2305                         } else
2306                                 np = &n->next;
2307                         write_unlock(&n->lock);
2308                         if (release)
2309                                 neigh_cleanup_and_release(n);
2310                 }
2311         }
2312 }
2313 EXPORT_SYMBOL(__neigh_for_each_release);
2314
2315 #ifdef CONFIG_PROC_FS
2316
2317 static struct neighbour *neigh_get_first(struct seq_file *seq)
2318 {
2319         struct neigh_seq_state *state = seq->private;
2320         struct net *net = seq_file_net(seq);
2321         struct neigh_hash_table *nht = state->nht;
2322         struct neighbour *n = NULL;
2323         int bucket = state->bucket;
2324
2325         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2326         for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
2327                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2328
2329                 while (n) {
2330                         if (!net_eq(dev_net(n->dev), net))
2331                                 goto next;
2332                         if (state->neigh_sub_iter) {
2333                                 loff_t fakep = 0;
2334                                 void *v;
2335
2336                                 v = state->neigh_sub_iter(state, n, &fakep);
2337                                 if (!v)
2338                                         goto next;
2339                         }
2340                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2341                                 break;
2342                         if (n->nud_state & ~NUD_NOARP)
2343                                 break;
2344 next:
2345                         n = rcu_dereference_bh(n->next);
2346                 }
2347
2348                 if (n)
2349                         break;
2350         }
2351         state->bucket = bucket;
2352
2353         return n;
2354 }
2355
2356 static struct neighbour *neigh_get_next(struct seq_file *seq,
2357                                         struct neighbour *n,
2358                                         loff_t *pos)
2359 {
2360         struct neigh_seq_state *state = seq->private;
2361         struct net *net = seq_file_net(seq);
2362         struct neigh_hash_table *nht = state->nht;
2363
2364         if (state->neigh_sub_iter) {
2365                 void *v = state->neigh_sub_iter(state, n, pos);
2366                 if (v)
2367                         return n;
2368         }
2369         n = rcu_dereference_bh(n->next);
2370
2371         while (1) {
2372                 while (n) {
2373                         if (!net_eq(dev_net(n->dev), net))
2374                                 goto next;
2375                         if (state->neigh_sub_iter) {
2376                                 void *v = state->neigh_sub_iter(state, n, pos);
2377                                 if (v)
2378                                         return n;
2379                                 goto next;
2380                         }
2381                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2382                                 break;
2383
2384                         if (n->nud_state & ~NUD_NOARP)
2385                                 break;
2386 next:
2387                         n = rcu_dereference_bh(n->next);
2388                 }
2389
2390                 if (n)
2391                         break;
2392
2393                 if (++state->bucket > nht->hash_mask)
2394                         break;
2395
2396                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2397         }
2398
2399         if (n && pos)
2400                 --(*pos);
2401         return n;
2402 }
2403
2404 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2405 {
2406         struct neighbour *n = neigh_get_first(seq);
2407
2408         if (n) {
2409                 --(*pos);
2410                 while (*pos) {
2411                         n = neigh_get_next(seq, n, pos);
2412                         if (!n)
2413                                 break;
2414                 }
2415         }
2416         return *pos ? NULL : n;
2417 }
2418
2419 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2420 {
2421         struct neigh_seq_state *state = seq->private;
2422         struct net *net = seq_file_net(seq);
2423         struct neigh_table *tbl = state->tbl;
2424         struct pneigh_entry *pn = NULL;
2425         int bucket = state->bucket;
2426
2427         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2428         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2429                 pn = tbl->phash_buckets[bucket];
2430                 while (pn && !net_eq(pneigh_net(pn), net))
2431                         pn = pn->next;
2432                 if (pn)
2433                         break;
2434         }
2435         state->bucket = bucket;
2436
2437         return pn;
2438 }
2439
2440 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2441                                             struct pneigh_entry *pn,
2442                                             loff_t *pos)
2443 {
2444         struct neigh_seq_state *state = seq->private;
2445         struct net *net = seq_file_net(seq);
2446         struct neigh_table *tbl = state->tbl;
2447
2448         pn = pn->next;
2449         while (!pn) {
2450                 if (++state->bucket > PNEIGH_HASHMASK)
2451                         break;
2452                 pn = tbl->phash_buckets[state->bucket];
2453                 while (pn && !net_eq(pneigh_net(pn), net))
2454                         pn = pn->next;
2455                 if (pn)
2456                         break;
2457         }
2458
2459         if (pn && pos)
2460                 --(*pos);
2461
2462         return pn;
2463 }
2464
2465 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2466 {
2467         struct pneigh_entry *pn = pneigh_get_first(seq);
2468
2469         if (pn) {
2470                 --(*pos);
2471                 while (*pos) {
2472                         pn = pneigh_get_next(seq, pn, pos);
2473                         if (!pn)
2474                                 break;
2475                 }
2476         }
2477         return *pos ? NULL : pn;
2478 }
2479
2480 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2481 {
2482         struct neigh_seq_state *state = seq->private;
2483         void *rc;
2484         loff_t idxpos = *pos;
2485
2486         rc = neigh_get_idx(seq, &idxpos);
2487         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2488                 rc = pneigh_get_idx(seq, &idxpos);
2489
2490         return rc;
2491 }
2492
2493 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2494         __acquires(rcu_bh)
2495 {
2496         struct neigh_seq_state *state = seq->private;
2497
2498         state->tbl = tbl;
2499         state->bucket = 0;
2500         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2501
2502         rcu_read_lock_bh();
2503         state->nht = rcu_dereference_bh(tbl->nht);
2504
2505         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2506 }
2507 EXPORT_SYMBOL(neigh_seq_start);
2508
2509 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2510 {
2511         struct neigh_seq_state *state;
2512         void *rc;
2513
2514         if (v == SEQ_START_TOKEN) {
2515                 rc = neigh_get_first(seq);
2516                 goto out;
2517         }
2518
2519         state = seq->private;
2520         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2521                 rc = neigh_get_next(seq, v, NULL);
2522                 if (rc)
2523                         goto out;
2524                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2525                         rc = pneigh_get_first(seq);
2526         } else {
2527                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2528                 rc = pneigh_get_next(seq, v, NULL);
2529         }
2530 out:
2531         ++(*pos);
2532         return rc;
2533 }
2534 EXPORT_SYMBOL(neigh_seq_next);
2535
2536 void neigh_seq_stop(struct seq_file *seq, void *v)
2537         __releases(rcu_bh)
2538 {
2539         rcu_read_unlock_bh();
2540 }
2541 EXPORT_SYMBOL(neigh_seq_stop);
2542
2543 /* statistics via seq_file */
2544
2545 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2546 {
2547         struct neigh_table *tbl = seq->private;
2548         int cpu;
2549
2550         if (*pos == 0)
2551                 return SEQ_START_TOKEN;
2552
2553         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2554                 if (!cpu_possible(cpu))
2555                         continue;
2556                 *pos = cpu+1;
2557                 return per_cpu_ptr(tbl->stats, cpu);
2558         }
2559         return NULL;
2560 }
2561
2562 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2563 {
2564         struct neigh_table *tbl = seq->private;
2565         int cpu;
2566
2567         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2568                 if (!cpu_possible(cpu))
2569                         continue;
2570                 *pos = cpu+1;
2571                 return per_cpu_ptr(tbl->stats, cpu);
2572         }
2573         return NULL;
2574 }
2575
2576 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2577 {
2578
2579 }
2580
2581 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2582 {
2583         struct neigh_table *tbl = seq->private;
2584         struct neigh_statistics *st = v;
2585
2586         if (v == SEQ_START_TOKEN) {
2587                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2588                 return 0;
2589         }
2590
2591         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2592                         "%08lx %08lx  %08lx %08lx %08lx\n",
2593                    atomic_read(&tbl->entries),
2594
2595                    st->allocs,
2596                    st->destroys,
2597                    st->hash_grows,
2598
2599                    st->lookups,
2600                    st->hits,
2601
2602                    st->res_failed,
2603
2604                    st->rcv_probes_mcast,
2605                    st->rcv_probes_ucast,
2606
2607                    st->periodic_gc_runs,
2608                    st->forced_gc_runs,
2609                    st->unres_discards
2610                    );
2611
2612         return 0;
2613 }
2614
2615 static const struct seq_operations neigh_stat_seq_ops = {
2616         .start  = neigh_stat_seq_start,
2617         .next   = neigh_stat_seq_next,
2618         .stop   = neigh_stat_seq_stop,
2619         .show   = neigh_stat_seq_show,
2620 };
2621
2622 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2623 {
2624         int ret = seq_open(file, &neigh_stat_seq_ops);
2625
2626         if (!ret) {
2627                 struct seq_file *sf = file->private_data;
2628                 sf->private = PDE(inode)->data;
2629         }
2630         return ret;
2631 };
2632
2633 static const struct file_operations neigh_stat_seq_fops = {
2634         .owner   = THIS_MODULE,
2635         .open    = neigh_stat_seq_open,
2636         .read    = seq_read,
2637         .llseek  = seq_lseek,
2638         .release = seq_release,
2639 };
2640
2641 #endif /* CONFIG_PROC_FS */
2642
2643 static inline size_t neigh_nlmsg_size(void)
2644 {
2645         return NLMSG_ALIGN(sizeof(struct ndmsg))
2646                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2647                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2648                + nla_total_size(sizeof(struct nda_cacheinfo))
2649                + nla_total_size(4); /* NDA_PROBES */
2650 }
2651
2652 static void __neigh_notify(struct neighbour *n, int type, int flags)
2653 {
2654         struct net *net = dev_net(n->dev);
2655         struct sk_buff *skb;
2656         int err = -ENOBUFS;
2657
2658         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2659         if (skb == NULL)
2660                 goto errout;
2661
2662         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2663         if (err < 0) {
2664                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2665                 WARN_ON(err == -EMSGSIZE);
2666                 kfree_skb(skb);
2667                 goto errout;
2668         }
2669         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2670         return;
2671 errout:
2672         if (err < 0)
2673                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2674 }
2675
2676 #ifdef CONFIG_ARPD
2677 void neigh_app_ns(struct neighbour *n)
2678 {
2679         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2680 }
2681 EXPORT_SYMBOL(neigh_app_ns);
2682 #endif /* CONFIG_ARPD */
2683
2684 #ifdef CONFIG_SYSCTL
2685
2686 #define NEIGH_VARS_MAX 19
2687
2688 static struct neigh_sysctl_table {
2689         struct ctl_table_header *sysctl_header;
2690         struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2691         char *dev_name;
2692 } neigh_sysctl_template __read_mostly = {
2693         .neigh_vars = {
2694                 {
2695                         .procname       = "mcast_solicit",
2696                         .maxlen         = sizeof(int),
2697                         .mode           = 0644,
2698                         .proc_handler   = proc_dointvec,
2699                 },
2700                 {
2701                         .procname       = "ucast_solicit",
2702                         .maxlen         = sizeof(int),
2703                         .mode           = 0644,
2704                         .proc_handler   = proc_dointvec,
2705                 },
2706                 {
2707                         .procname       = "app_solicit",
2708                         .maxlen         = sizeof(int),
2709                         .mode           = 0644,
2710                         .proc_handler   = proc_dointvec,
2711                 },
2712                 {
2713                         .procname       = "retrans_time",
2714                         .maxlen         = sizeof(int),
2715                         .mode           = 0644,
2716                         .proc_handler   = proc_dointvec_userhz_jiffies,
2717                 },
2718                 {
2719                         .procname       = "base_reachable_time",
2720                         .maxlen         = sizeof(int),
2721                         .mode           = 0644,
2722                         .proc_handler   = proc_dointvec_jiffies,
2723                 },
2724                 {
2725                         .procname       = "delay_first_probe_time",
2726                         .maxlen         = sizeof(int),
2727                         .mode           = 0644,
2728                         .proc_handler   = proc_dointvec_jiffies,
2729                 },
2730                 {
2731                         .procname       = "gc_stale_time",
2732                         .maxlen         = sizeof(int),
2733                         .mode           = 0644,
2734                         .proc_handler   = proc_dointvec_jiffies,
2735                 },
2736                 {
2737                         .procname       = "unres_qlen",
2738                         .maxlen         = sizeof(int),
2739                         .mode           = 0644,
2740                         .proc_handler   = proc_dointvec,
2741                 },
2742                 {
2743                         .procname       = "proxy_qlen",
2744                         .maxlen         = sizeof(int),
2745                         .mode           = 0644,
2746                         .proc_handler   = proc_dointvec,
2747                 },
2748                 {
2749                         .procname       = "anycast_delay",
2750                         .maxlen         = sizeof(int),
2751                         .mode           = 0644,
2752                         .proc_handler   = proc_dointvec_userhz_jiffies,
2753                 },
2754                 {
2755                         .procname       = "proxy_delay",
2756                         .maxlen         = sizeof(int),
2757                         .mode           = 0644,
2758                         .proc_handler   = proc_dointvec_userhz_jiffies,
2759                 },
2760                 {
2761                         .procname       = "locktime",
2762                         .maxlen         = sizeof(int),
2763                         .mode           = 0644,
2764                         .proc_handler   = proc_dointvec_userhz_jiffies,
2765                 },
2766                 {
2767                         .procname       = "retrans_time_ms",
2768                         .maxlen         = sizeof(int),
2769                         .mode           = 0644,
2770                         .proc_handler   = proc_dointvec_ms_jiffies,
2771                 },
2772                 {
2773                         .procname       = "base_reachable_time_ms",
2774                         .maxlen         = sizeof(int),
2775                         .mode           = 0644,
2776                         .proc_handler   = proc_dointvec_ms_jiffies,
2777                 },
2778                 {
2779                         .procname       = "gc_interval",
2780                         .maxlen         = sizeof(int),
2781                         .mode           = 0644,
2782                         .proc_handler   = proc_dointvec_jiffies,
2783                 },
2784                 {
2785                         .procname       = "gc_thresh1",
2786                         .maxlen         = sizeof(int),
2787                         .mode           = 0644,
2788                         .proc_handler   = proc_dointvec,
2789                 },
2790                 {
2791                         .procname       = "gc_thresh2",
2792                         .maxlen         = sizeof(int),
2793                         .mode           = 0644,
2794                         .proc_handler   = proc_dointvec,
2795                 },
2796                 {
2797                         .procname       = "gc_thresh3",
2798                         .maxlen         = sizeof(int),
2799                         .mode           = 0644,
2800                         .proc_handler   = proc_dointvec,
2801                 },
2802                 {},
2803         },
2804 };
2805
2806 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2807                           char *p_name, proc_handler *handler)
2808 {
2809         struct neigh_sysctl_table *t;
2810         const char *dev_name_source = NULL;
2811
2812 #define NEIGH_CTL_PATH_ROOT     0
2813 #define NEIGH_CTL_PATH_PROTO    1
2814 #define NEIGH_CTL_PATH_NEIGH    2
2815 #define NEIGH_CTL_PATH_DEV      3
2816
2817         struct ctl_path neigh_path[] = {
2818                 { .procname = "net",     },
2819                 { .procname = "proto",   },
2820                 { .procname = "neigh",   },
2821                 { .procname = "default", },
2822                 { },
2823         };
2824
2825         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2826         if (!t)
2827                 goto err;
2828
2829         t->neigh_vars[0].data  = &p->mcast_probes;
2830         t->neigh_vars[1].data  = &p->ucast_probes;
2831         t->neigh_vars[2].data  = &p->app_probes;
2832         t->neigh_vars[3].data  = &p->retrans_time;
2833         t->neigh_vars[4].data  = &p->base_reachable_time;
2834         t->neigh_vars[5].data  = &p->delay_probe_time;
2835         t->neigh_vars[6].data  = &p->gc_staletime;
2836         t->neigh_vars[7].data  = &p->queue_len;
2837         t->neigh_vars[8].data  = &p->proxy_qlen;
2838         t->neigh_vars[9].data  = &p->anycast_delay;
2839         t->neigh_vars[10].data = &p->proxy_delay;
2840         t->neigh_vars[11].data = &p->locktime;
2841         t->neigh_vars[12].data  = &p->retrans_time;
2842         t->neigh_vars[13].data  = &p->base_reachable_time;
2843
2844         if (dev) {
2845                 dev_name_source = dev->name;
2846                 /* Terminate the table early */
2847                 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2848         } else {
2849                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2850                 t->neigh_vars[14].data = (int *)(p + 1);
2851                 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2852                 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2853                 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2854         }
2855
2856
2857         if (handler) {
2858                 /* RetransTime */
2859                 t->neigh_vars[3].proc_handler = handler;
2860                 t->neigh_vars[3].extra1 = dev;
2861                 /* ReachableTime */
2862                 t->neigh_vars[4].proc_handler = handler;
2863                 t->neigh_vars[4].extra1 = dev;
2864                 /* RetransTime (in milliseconds)*/
2865                 t->neigh_vars[12].proc_handler = handler;
2866                 t->neigh_vars[12].extra1 = dev;
2867                 /* ReachableTime (in milliseconds) */
2868                 t->neigh_vars[13].proc_handler = handler;
2869                 t->neigh_vars[13].extra1 = dev;
2870         }
2871
2872         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2873         if (!t->dev_name)
2874                 goto free;
2875
2876         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2877         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2878
2879         t->sysctl_header =
2880                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2881         if (!t->sysctl_header)
2882                 goto free_procname;
2883
2884         p->sysctl_table = t;
2885         return 0;
2886
2887 free_procname:
2888         kfree(t->dev_name);
2889 free:
2890         kfree(t);
2891 err:
2892         return -ENOBUFS;
2893 }
2894 EXPORT_SYMBOL(neigh_sysctl_register);
2895
2896 void neigh_sysctl_unregister(struct neigh_parms *p)
2897 {
2898         if (p->sysctl_table) {
2899                 struct neigh_sysctl_table *t = p->sysctl_table;
2900                 p->sysctl_table = NULL;
2901                 unregister_sysctl_table(t->sysctl_header);
2902                 kfree(t->dev_name);
2903                 kfree(t);
2904         }
2905 }
2906 EXPORT_SYMBOL(neigh_sysctl_unregister);
2907
2908 #endif  /* CONFIG_SYSCTL */
2909
2910 static int __init neigh_init(void)
2911 {
2912         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2913         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2914         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2915
2916         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2917         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2918
2919         return 0;
2920 }
2921
2922 subsys_initcall(neigh_init);
2923