]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/core/neighbour.c
neigh: Protect neigh->ha[] with a seqlock
[net-next-2.6.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK0 NEIGH_PRINTK
45 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
46 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
47
48 #if NEIGH_DEBUG >= 1
49 #undef NEIGH_PRINTK1
50 #define NEIGH_PRINTK1 NEIGH_PRINTK
51 #endif
52 #if NEIGH_DEBUG >= 2
53 #undef NEIGH_PRINTK2
54 #define NEIGH_PRINTK2 NEIGH_PRINTK
55 #endif
56
57 #define PNEIGH_HASHMASK         0xF
58
59 static void neigh_timer_handler(unsigned long arg);
60 static void __neigh_notify(struct neighbour *n, int type, int flags);
61 static void neigh_update_notify(struct neighbour *neigh);
62 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
63
64 static struct neigh_table *neigh_tables;
65 #ifdef CONFIG_PROC_FS
66 static const struct file_operations neigh_stat_seq_fops;
67 #endif
68
69 /*
70    Neighbour hash table buckets are protected with rwlock tbl->lock.
71
72    - All the scans/updates to hash buckets MUST be made under this lock.
73    - NOTHING clever should be made under this lock: no callbacks
74      to protocol backends, no attempts to send something to network.
75      It will result in deadlocks, if backend/driver wants to use neighbour
76      cache.
77    - If the entry requires some non-trivial actions, increase
78      its reference count and release table lock.
79
80    Neighbour entries are protected:
81    - with reference count.
82    - with rwlock neigh->lock
83
84    Reference count prevents destruction.
85
86    neigh->lock mainly serializes ll address data and its validity state.
87    However, the same lock is used to protect another entry fields:
88     - timer
89     - resolution queue
90
91    Again, nothing clever shall be made under neigh->lock,
92    the most complicated procedure, which we allow is dev->hard_header.
93    It is supposed, that dev->hard_header is simplistic and does
94    not make callbacks to neighbour tables.
95
96    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
97    list of neighbour tables. This list is used only in process context,
98  */
99
100 static DEFINE_RWLOCK(neigh_tbl_lock);
101
102 static int neigh_blackhole(struct sk_buff *skb)
103 {
104         kfree_skb(skb);
105         return -ENETDOWN;
106 }
107
108 static void neigh_cleanup_and_release(struct neighbour *neigh)
109 {
110         if (neigh->parms->neigh_cleanup)
111                 neigh->parms->neigh_cleanup(neigh);
112
113         __neigh_notify(neigh, RTM_DELNEIGH, 0);
114         neigh_release(neigh);
115 }
116
117 /*
118  * It is random distribution in the interval (1/2)*base...(3/2)*base.
119  * It corresponds to default IPv6 settings and is not overridable,
120  * because it is really reasonable choice.
121  */
122
123 unsigned long neigh_rand_reach_time(unsigned long base)
124 {
125         return base ? (net_random() % base) + (base >> 1) : 0;
126 }
127 EXPORT_SYMBOL(neigh_rand_reach_time);
128
129
130 static int neigh_forced_gc(struct neigh_table *tbl)
131 {
132         int shrunk = 0;
133         int i;
134         struct neigh_hash_table *nht;
135
136         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
137
138         write_lock_bh(&tbl->lock);
139         nht = rcu_dereference_protected(tbl->nht,
140                                         lockdep_is_held(&tbl->lock));
141         for (i = 0; i <= nht->hash_mask; i++) {
142                 struct neighbour *n;
143                 struct neighbour __rcu **np;
144
145                 np = &nht->hash_buckets[i];
146                 while ((n = rcu_dereference_protected(*np,
147                                         lockdep_is_held(&tbl->lock))) != NULL) {
148                         /* Neighbour record may be discarded if:
149                          * - nobody refers to it.
150                          * - it is not permanent
151                          */
152                         write_lock(&n->lock);
153                         if (atomic_read(&n->refcnt) == 1 &&
154                             !(n->nud_state & NUD_PERMANENT)) {
155                                 rcu_assign_pointer(*np,
156                                         rcu_dereference_protected(n->next,
157                                                   lockdep_is_held(&tbl->lock)));
158                                 n->dead = 1;
159                                 shrunk  = 1;
160                                 write_unlock(&n->lock);
161                                 neigh_cleanup_and_release(n);
162                                 continue;
163                         }
164                         write_unlock(&n->lock);
165                         np = &n->next;
166                 }
167         }
168
169         tbl->last_flush = jiffies;
170
171         write_unlock_bh(&tbl->lock);
172
173         return shrunk;
174 }
175
176 static void neigh_add_timer(struct neighbour *n, unsigned long when)
177 {
178         neigh_hold(n);
179         if (unlikely(mod_timer(&n->timer, when))) {
180                 printk("NEIGH: BUG, double timer add, state is %x\n",
181                        n->nud_state);
182                 dump_stack();
183         }
184 }
185
186 static int neigh_del_timer(struct neighbour *n)
187 {
188         if ((n->nud_state & NUD_IN_TIMER) &&
189             del_timer(&n->timer)) {
190                 neigh_release(n);
191                 return 1;
192         }
193         return 0;
194 }
195
196 static void pneigh_queue_purge(struct sk_buff_head *list)
197 {
198         struct sk_buff *skb;
199
200         while ((skb = skb_dequeue(list)) != NULL) {
201                 dev_put(skb->dev);
202                 kfree_skb(skb);
203         }
204 }
205
206 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
207 {
208         int i;
209         struct neigh_hash_table *nht;
210
211         nht = rcu_dereference_protected(tbl->nht,
212                                         lockdep_is_held(&tbl->lock));
213
214         for (i = 0; i <= nht->hash_mask; i++) {
215                 struct neighbour *n;
216                 struct neighbour __rcu **np = &nht->hash_buckets[i];
217
218                 while ((n = rcu_dereference_protected(*np,
219                                         lockdep_is_held(&tbl->lock))) != NULL) {
220                         if (dev && n->dev != dev) {
221                                 np = &n->next;
222                                 continue;
223                         }
224                         rcu_assign_pointer(*np,
225                                    rcu_dereference_protected(n->next,
226                                                 lockdep_is_held(&tbl->lock)));
227                         write_lock(&n->lock);
228                         neigh_del_timer(n);
229                         n->dead = 1;
230
231                         if (atomic_read(&n->refcnt) != 1) {
232                                 /* The most unpleasant situation.
233                                    We must destroy neighbour entry,
234                                    but someone still uses it.
235
236                                    The destroy will be delayed until
237                                    the last user releases us, but
238                                    we must kill timers etc. and move
239                                    it to safe state.
240                                  */
241                                 skb_queue_purge(&n->arp_queue);
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
292         if (!n)
293                 goto out_entries;
294
295         skb_queue_head_init(&n->arp_queue);
296         rwlock_init(&n->lock);
297         seqlock_init(&n->ha_lock);
298         n->updated        = n->used = now;
299         n->nud_state      = NUD_NONE;
300         n->output         = neigh_blackhole;
301         n->parms          = neigh_parms_clone(&tbl->parms);
302         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
303
304         NEIGH_CACHE_STAT_INC(tbl, allocs);
305         n->tbl            = tbl;
306         atomic_set(&n->refcnt, 1);
307         n->dead           = 1;
308 out:
309         return n;
310
311 out_entries:
312         atomic_dec(&tbl->entries);
313         goto out;
314 }
315
316 static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
317 {
318         size_t size = entries * sizeof(struct neighbour *);
319         struct neigh_hash_table *ret;
320         struct neighbour **buckets;
321
322         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323         if (!ret)
324                 return NULL;
325         if (size <= PAGE_SIZE)
326                 buckets = kzalloc(size, GFP_ATOMIC);
327         else
328                 buckets = (struct neighbour **)
329                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330                                            get_order(size));
331         if (!buckets) {
332                 kfree(ret);
333                 return NULL;
334         }
335         rcu_assign_pointer(ret->hash_buckets, buckets);
336         ret->hash_mask = entries - 1;
337         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
338         return ret;
339 }
340
341 static void neigh_hash_free_rcu(struct rcu_head *head)
342 {
343         struct neigh_hash_table *nht = container_of(head,
344                                                     struct neigh_hash_table,
345                                                     rcu);
346         size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
347         struct neighbour **buckets = nht->hash_buckets;
348
349         if (size <= PAGE_SIZE)
350                 kfree(buckets);
351         else
352                 free_pages((unsigned long)buckets, get_order(size));
353         kfree(nht);
354 }
355
356 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
357                                                 unsigned long new_entries)
358 {
359         unsigned int i, hash;
360         struct neigh_hash_table *new_nht, *old_nht;
361
362         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
363
364         BUG_ON(!is_power_of_2(new_entries));
365         old_nht = rcu_dereference_protected(tbl->nht,
366                                             lockdep_is_held(&tbl->lock));
367         new_nht = neigh_hash_alloc(new_entries);
368         if (!new_nht)
369                 return old_nht;
370
371         for (i = 0; i <= old_nht->hash_mask; i++) {
372                 struct neighbour *n, *next;
373
374                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
375                                                    lockdep_is_held(&tbl->lock));
376                      n != NULL;
377                      n = next) {
378                         hash = tbl->hash(n->primary_key, n->dev,
379                                          new_nht->hash_rnd);
380
381                         hash &= new_nht->hash_mask;
382                         next = rcu_dereference_protected(n->next,
383                                                 lockdep_is_held(&tbl->lock));
384
385                         rcu_assign_pointer(n->next,
386                                            rcu_dereference_protected(
387                                                 new_nht->hash_buckets[hash],
388                                                 lockdep_is_held(&tbl->lock)));
389                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
390                 }
391         }
392
393         rcu_assign_pointer(tbl->nht, new_nht);
394         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395         return new_nht;
396 }
397
398 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
399                                struct net_device *dev)
400 {
401         struct neighbour *n;
402         int key_len = tbl->key_len;
403         u32 hash_val;
404         struct neigh_hash_table *nht;
405
406         NEIGH_CACHE_STAT_INC(tbl, lookups);
407
408         rcu_read_lock_bh();
409         nht = rcu_dereference_bh(tbl->nht);
410         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
411
412         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413              n != NULL;
414              n = rcu_dereference_bh(n->next)) {
415                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416                         if (!atomic_inc_not_zero(&n->refcnt))
417                                 n = NULL;
418                         NEIGH_CACHE_STAT_INC(tbl, hits);
419                         break;
420                 }
421         }
422
423         rcu_read_unlock_bh();
424         return n;
425 }
426 EXPORT_SYMBOL(neigh_lookup);
427
428 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
429                                      const void *pkey)
430 {
431         struct neighbour *n;
432         int key_len = tbl->key_len;
433         u32 hash_val;
434         struct neigh_hash_table *nht;
435
436         NEIGH_CACHE_STAT_INC(tbl, lookups);
437
438         rcu_read_lock_bh();
439         nht = rcu_dereference_bh(tbl->nht);
440         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
441
442         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443              n != NULL;
444              n = rcu_dereference_bh(n->next)) {
445                 if (!memcmp(n->primary_key, pkey, key_len) &&
446                     net_eq(dev_net(n->dev), net)) {
447                         if (!atomic_inc_not_zero(&n->refcnt))
448                                 n = NULL;
449                         NEIGH_CACHE_STAT_INC(tbl, hits);
450                         break;
451                 }
452         }
453
454         rcu_read_unlock_bh();
455         return n;
456 }
457 EXPORT_SYMBOL(neigh_lookup_nodev);
458
459 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
460                                struct net_device *dev)
461 {
462         u32 hash_val;
463         int key_len = tbl->key_len;
464         int error;
465         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466         struct neigh_hash_table *nht;
467
468         if (!n) {
469                 rc = ERR_PTR(-ENOBUFS);
470                 goto out;
471         }
472
473         memcpy(n->primary_key, pkey, key_len);
474         n->dev = dev;
475         dev_hold(dev);
476
477         /* Protocol specific setup. */
478         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
479                 rc = ERR_PTR(error);
480                 goto out_neigh_release;
481         }
482
483         /* Device specific setup. */
484         if (n->parms->neigh_setup &&
485             (error = n->parms->neigh_setup(n)) < 0) {
486                 rc = ERR_PTR(error);
487                 goto out_neigh_release;
488         }
489
490         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
491
492         write_lock_bh(&tbl->lock);
493         nht = rcu_dereference_protected(tbl->nht,
494                                         lockdep_is_held(&tbl->lock));
495
496         if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
497                 nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
498
499         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
500
501         if (n->parms->dead) {
502                 rc = ERR_PTR(-EINVAL);
503                 goto out_tbl_unlock;
504         }
505
506         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507                                             lockdep_is_held(&tbl->lock));
508              n1 != NULL;
509              n1 = rcu_dereference_protected(n1->next,
510                         lockdep_is_held(&tbl->lock))) {
511                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
512                         neigh_hold(n1);
513                         rc = n1;
514                         goto out_tbl_unlock;
515                 }
516         }
517
518         n->dead = 0;
519         neigh_hold(n);
520         rcu_assign_pointer(n->next,
521                            rcu_dereference_protected(nht->hash_buckets[hash_val],
522                                                      lockdep_is_held(&tbl->lock)));
523         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
524         write_unlock_bh(&tbl->lock);
525         NEIGH_PRINTK2("neigh %p is created.\n", n);
526         rc = n;
527 out:
528         return rc;
529 out_tbl_unlock:
530         write_unlock_bh(&tbl->lock);
531 out_neigh_release:
532         neigh_release(n);
533         goto out;
534 }
535 EXPORT_SYMBOL(neigh_create);
536
537 static u32 pneigh_hash(const void *pkey, int key_len)
538 {
539         u32 hash_val = *(u32 *)(pkey + key_len - 4);
540         hash_val ^= (hash_val >> 16);
541         hash_val ^= hash_val >> 8;
542         hash_val ^= hash_val >> 4;
543         hash_val &= PNEIGH_HASHMASK;
544         return hash_val;
545 }
546
547 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
548                                               struct net *net,
549                                               const void *pkey,
550                                               int key_len,
551                                               struct net_device *dev)
552 {
553         while (n) {
554                 if (!memcmp(n->key, pkey, key_len) &&
555                     net_eq(pneigh_net(n), net) &&
556                     (n->dev == dev || !n->dev))
557                         return n;
558                 n = n->next;
559         }
560         return NULL;
561 }
562
563 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
564                 struct net *net, const void *pkey, struct net_device *dev)
565 {
566         int key_len = tbl->key_len;
567         u32 hash_val = pneigh_hash(pkey, key_len);
568
569         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
570                                  net, pkey, key_len, dev);
571 }
572 EXPORT_SYMBOL_GPL(__pneigh_lookup);
573
574 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
575                                     struct net *net, const void *pkey,
576                                     struct net_device *dev, int creat)
577 {
578         struct pneigh_entry *n;
579         int key_len = tbl->key_len;
580         u32 hash_val = pneigh_hash(pkey, key_len);
581
582         read_lock_bh(&tbl->lock);
583         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
584                               net, pkey, key_len, dev);
585         read_unlock_bh(&tbl->lock);
586
587         if (n || !creat)
588                 goto out;
589
590         ASSERT_RTNL();
591
592         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
593         if (!n)
594                 goto out;
595
596         write_pnet(&n->net, hold_net(net));
597         memcpy(n->key, pkey, key_len);
598         n->dev = dev;
599         if (dev)
600                 dev_hold(dev);
601
602         if (tbl->pconstructor && tbl->pconstructor(n)) {
603                 if (dev)
604                         dev_put(dev);
605                 release_net(net);
606                 kfree(n);
607                 n = NULL;
608                 goto out;
609         }
610
611         write_lock_bh(&tbl->lock);
612         n->next = tbl->phash_buckets[hash_val];
613         tbl->phash_buckets[hash_val] = n;
614         write_unlock_bh(&tbl->lock);
615 out:
616         return n;
617 }
618 EXPORT_SYMBOL(pneigh_lookup);
619
620
621 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
622                   struct net_device *dev)
623 {
624         struct pneigh_entry *n, **np;
625         int key_len = tbl->key_len;
626         u32 hash_val = pneigh_hash(pkey, key_len);
627
628         write_lock_bh(&tbl->lock);
629         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
630              np = &n->next) {
631                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632                     net_eq(pneigh_net(n), net)) {
633                         *np = n->next;
634                         write_unlock_bh(&tbl->lock);
635                         if (tbl->pdestructor)
636                                 tbl->pdestructor(n);
637                         if (n->dev)
638                                 dev_put(n->dev);
639                         release_net(pneigh_net(n));
640                         kfree(n);
641                         return 0;
642                 }
643         }
644         write_unlock_bh(&tbl->lock);
645         return -ENOENT;
646 }
647
648 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
649 {
650         struct pneigh_entry *n, **np;
651         u32 h;
652
653         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
654                 np = &tbl->phash_buckets[h];
655                 while ((n = *np) != NULL) {
656                         if (!dev || n->dev == dev) {
657                                 *np = n->next;
658                                 if (tbl->pdestructor)
659                                         tbl->pdestructor(n);
660                                 if (n->dev)
661                                         dev_put(n->dev);
662                                 release_net(pneigh_net(n));
663                                 kfree(n);
664                                 continue;
665                         }
666                         np = &n->next;
667                 }
668         }
669         return -ENOENT;
670 }
671
672 static void neigh_parms_destroy(struct neigh_parms *parms);
673
674 static inline void neigh_parms_put(struct neigh_parms *parms)
675 {
676         if (atomic_dec_and_test(&parms->refcnt))
677                 neigh_parms_destroy(parms);
678 }
679
680 static void neigh_destroy_rcu(struct rcu_head *head)
681 {
682         struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685 }
686 /*
687  *      neighbour must already be out of the table;
688  *
689  */
690 void neigh_destroy(struct neighbour *neigh)
691 {
692         struct hh_cache *hh;
693
694         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
695
696         if (!neigh->dead) {
697                 printk(KERN_WARNING
698                        "Destroying alive neighbour %p\n", neigh);
699                 dump_stack();
700                 return;
701         }
702
703         if (neigh_del_timer(neigh))
704                 printk(KERN_WARNING "Impossible event.\n");
705
706         while ((hh = neigh->hh) != NULL) {
707                 neigh->hh = hh->hh_next;
708                 hh->hh_next = NULL;
709
710                 write_seqlock_bh(&hh->hh_lock);
711                 hh->hh_output = neigh_blackhole;
712                 write_sequnlock_bh(&hh->hh_lock);
713                 hh_cache_put(hh);
714         }
715
716         skb_queue_purge(&neigh->arp_queue);
717
718         dev_put(neigh->dev);
719         neigh_parms_put(neigh->parms);
720
721         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
722
723         atomic_dec(&neigh->tbl->entries);
724         call_rcu(&neigh->rcu, neigh_destroy_rcu);
725 }
726 EXPORT_SYMBOL(neigh_destroy);
727
728 /* Neighbour state is suspicious;
729    disable fast path.
730
731    Called with write_locked neigh.
732  */
733 static void neigh_suspect(struct neighbour *neigh)
734 {
735         struct hh_cache *hh;
736
737         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
738
739         neigh->output = neigh->ops->output;
740
741         for (hh = neigh->hh; hh; hh = hh->hh_next)
742                 hh->hh_output = neigh->ops->output;
743 }
744
745 /* Neighbour state is OK;
746    enable fast path.
747
748    Called with write_locked neigh.
749  */
750 static void neigh_connect(struct neighbour *neigh)
751 {
752         struct hh_cache *hh;
753
754         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
755
756         neigh->output = neigh->ops->connected_output;
757
758         for (hh = neigh->hh; hh; hh = hh->hh_next)
759                 hh->hh_output = neigh->ops->hh_output;
760 }
761
762 static void neigh_periodic_work(struct work_struct *work)
763 {
764         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
765         struct neighbour *n;
766         struct neighbour __rcu **np;
767         unsigned int i;
768         struct neigh_hash_table *nht;
769
770         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
771
772         write_lock_bh(&tbl->lock);
773         nht = rcu_dereference_protected(tbl->nht,
774                                         lockdep_is_held(&tbl->lock));
775
776         /*
777          *      periodically recompute ReachableTime from random function
778          */
779
780         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
781                 struct neigh_parms *p;
782                 tbl->last_rand = jiffies;
783                 for (p = &tbl->parms; p; p = p->next)
784                         p->reachable_time =
785                                 neigh_rand_reach_time(p->base_reachable_time);
786         }
787
788         for (i = 0 ; i <= nht->hash_mask; i++) {
789                 np = &nht->hash_buckets[i];
790
791                 while ((n = rcu_dereference_protected(*np,
792                                 lockdep_is_held(&tbl->lock))) != NULL) {
793                         unsigned int state;
794
795                         write_lock(&n->lock);
796
797                         state = n->nud_state;
798                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
799                                 write_unlock(&n->lock);
800                                 goto next_elt;
801                         }
802
803                         if (time_before(n->used, n->confirmed))
804                                 n->used = n->confirmed;
805
806                         if (atomic_read(&n->refcnt) == 1 &&
807                             (state == NUD_FAILED ||
808                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
809                                 *np = n->next;
810                                 n->dead = 1;
811                                 write_unlock(&n->lock);
812                                 neigh_cleanup_and_release(n);
813                                 continue;
814                         }
815                         write_unlock(&n->lock);
816
817 next_elt:
818                         np = &n->next;
819                 }
820                 /*
821                  * It's fine to release lock here, even if hash table
822                  * grows while we are preempted.
823                  */
824                 write_unlock_bh(&tbl->lock);
825                 cond_resched();
826                 write_lock_bh(&tbl->lock);
827         }
828         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
829          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
830          * base_reachable_time.
831          */
832         schedule_delayed_work(&tbl->gc_work,
833                               tbl->parms.base_reachable_time >> 1);
834         write_unlock_bh(&tbl->lock);
835 }
836
837 static __inline__ int neigh_max_probes(struct neighbour *n)
838 {
839         struct neigh_parms *p = n->parms;
840         return (n->nud_state & NUD_PROBE) ?
841                 p->ucast_probes :
842                 p->ucast_probes + p->app_probes + p->mcast_probes;
843 }
844
845 static void neigh_invalidate(struct neighbour *neigh)
846         __releases(neigh->lock)
847         __acquires(neigh->lock)
848 {
849         struct sk_buff *skb;
850
851         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
852         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
853         neigh->updated = jiffies;
854
855         /* It is very thin place. report_unreachable is very complicated
856            routine. Particularly, it can hit the same neighbour entry!
857
858            So that, we try to be accurate and avoid dead loop. --ANK
859          */
860         while (neigh->nud_state == NUD_FAILED &&
861                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
862                 write_unlock(&neigh->lock);
863                 neigh->ops->error_report(neigh, skb);
864                 write_lock(&neigh->lock);
865         }
866         skb_queue_purge(&neigh->arp_queue);
867 }
868
869 /* Called when a timer expires for a neighbour entry. */
870
871 static void neigh_timer_handler(unsigned long arg)
872 {
873         unsigned long now, next;
874         struct neighbour *neigh = (struct neighbour *)arg;
875         unsigned state;
876         int notify = 0;
877
878         write_lock(&neigh->lock);
879
880         state = neigh->nud_state;
881         now = jiffies;
882         next = now + HZ;
883
884         if (!(state & NUD_IN_TIMER)) {
885 #ifndef CONFIG_SMP
886                 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
887 #endif
888                 goto out;
889         }
890
891         if (state & NUD_REACHABLE) {
892                 if (time_before_eq(now,
893                                    neigh->confirmed + neigh->parms->reachable_time)) {
894                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
895                         next = neigh->confirmed + neigh->parms->reachable_time;
896                 } else if (time_before_eq(now,
897                                           neigh->used + neigh->parms->delay_probe_time)) {
898                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
899                         neigh->nud_state = NUD_DELAY;
900                         neigh->updated = jiffies;
901                         neigh_suspect(neigh);
902                         next = now + neigh->parms->delay_probe_time;
903                 } else {
904                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
905                         neigh->nud_state = NUD_STALE;
906                         neigh->updated = jiffies;
907                         neigh_suspect(neigh);
908                         notify = 1;
909                 }
910         } else if (state & NUD_DELAY) {
911                 if (time_before_eq(now,
912                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
913                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
914                         neigh->nud_state = NUD_REACHABLE;
915                         neigh->updated = jiffies;
916                         neigh_connect(neigh);
917                         notify = 1;
918                         next = neigh->confirmed + neigh->parms->reachable_time;
919                 } else {
920                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
921                         neigh->nud_state = NUD_PROBE;
922                         neigh->updated = jiffies;
923                         atomic_set(&neigh->probes, 0);
924                         next = now + neigh->parms->retrans_time;
925                 }
926         } else {
927                 /* NUD_PROBE|NUD_INCOMPLETE */
928                 next = now + neigh->parms->retrans_time;
929         }
930
931         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
932             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
933                 neigh->nud_state = NUD_FAILED;
934                 notify = 1;
935                 neigh_invalidate(neigh);
936         }
937
938         if (neigh->nud_state & NUD_IN_TIMER) {
939                 if (time_before(next, jiffies + HZ/2))
940                         next = jiffies + HZ/2;
941                 if (!mod_timer(&neigh->timer, next))
942                         neigh_hold(neigh);
943         }
944         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
945                 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
946                 /* keep skb alive even if arp_queue overflows */
947                 if (skb)
948                         skb = skb_copy(skb, GFP_ATOMIC);
949                 write_unlock(&neigh->lock);
950                 neigh->ops->solicit(neigh, skb);
951                 atomic_inc(&neigh->probes);
952                 kfree_skb(skb);
953         } else {
954 out:
955                 write_unlock(&neigh->lock);
956         }
957
958         if (notify)
959                 neigh_update_notify(neigh);
960
961         neigh_release(neigh);
962 }
963
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966         int rc;
967         unsigned long now;
968
969         write_lock_bh(&neigh->lock);
970
971         rc = 0;
972         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973                 goto out_unlock_bh;
974
975         now = jiffies;
976
977         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
978                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
979                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980                         neigh->nud_state     = NUD_INCOMPLETE;
981                         neigh->updated = jiffies;
982                         neigh_add_timer(neigh, now + 1);
983                 } else {
984                         neigh->nud_state = NUD_FAILED;
985                         neigh->updated = jiffies;
986                         write_unlock_bh(&neigh->lock);
987
988                         kfree_skb(skb);
989                         return 1;
990                 }
991         } else if (neigh->nud_state & NUD_STALE) {
992                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
993                 neigh->nud_state = NUD_DELAY;
994                 neigh->updated = jiffies;
995                 neigh_add_timer(neigh,
996                                 jiffies + neigh->parms->delay_probe_time);
997         }
998
999         if (neigh->nud_state == NUD_INCOMPLETE) {
1000                 if (skb) {
1001                         if (skb_queue_len(&neigh->arp_queue) >=
1002                             neigh->parms->queue_len) {
1003                                 struct sk_buff *buff;
1004                                 buff = __skb_dequeue(&neigh->arp_queue);
1005                                 kfree_skb(buff);
1006                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1007                         }
1008                         skb_dst_force(skb);
1009                         __skb_queue_tail(&neigh->arp_queue, skb);
1010                 }
1011                 rc = 1;
1012         }
1013 out_unlock_bh:
1014         write_unlock_bh(&neigh->lock);
1015         return rc;
1016 }
1017 EXPORT_SYMBOL(__neigh_event_send);
1018
1019 static void neigh_update_hhs(const struct neighbour *neigh)
1020 {
1021         struct hh_cache *hh;
1022         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1023                 = NULL;
1024
1025         if (neigh->dev->header_ops)
1026                 update = neigh->dev->header_ops->cache_update;
1027
1028         if (update) {
1029                 for (hh = neigh->hh; hh; hh = hh->hh_next) {
1030                         write_seqlock_bh(&hh->hh_lock);
1031                         update(hh, neigh->dev, neigh->ha);
1032                         write_sequnlock_bh(&hh->hh_lock);
1033                 }
1034         }
1035 }
1036
1037
1038
1039 /* Generic update routine.
1040    -- lladdr is new lladdr or NULL, if it is not supplied.
1041    -- new    is new state.
1042    -- flags
1043         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1044                                 if it is different.
1045         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1046                                 lladdr instead of overriding it
1047                                 if it is different.
1048                                 It also allows to retain current state
1049                                 if lladdr is unchanged.
1050         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1051
1052         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1053                                 NTF_ROUTER flag.
1054         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1055                                 a router.
1056
1057    Caller MUST hold reference count on the entry.
1058  */
1059
1060 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1061                  u32 flags)
1062 {
1063         u8 old;
1064         int err;
1065         int notify = 0;
1066         struct net_device *dev;
1067         int update_isrouter = 0;
1068
1069         write_lock_bh(&neigh->lock);
1070
1071         dev    = neigh->dev;
1072         old    = neigh->nud_state;
1073         err    = -EPERM;
1074
1075         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1076             (old & (NUD_NOARP | NUD_PERMANENT)))
1077                 goto out;
1078
1079         if (!(new & NUD_VALID)) {
1080                 neigh_del_timer(neigh);
1081                 if (old & NUD_CONNECTED)
1082                         neigh_suspect(neigh);
1083                 neigh->nud_state = new;
1084                 err = 0;
1085                 notify = old & NUD_VALID;
1086                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1087                     (new & NUD_FAILED)) {
1088                         neigh_invalidate(neigh);
1089                         notify = 1;
1090                 }
1091                 goto out;
1092         }
1093
1094         /* Compare new lladdr with cached one */
1095         if (!dev->addr_len) {
1096                 /* First case: device needs no address. */
1097                 lladdr = neigh->ha;
1098         } else if (lladdr) {
1099                 /* The second case: if something is already cached
1100                    and a new address is proposed:
1101                    - compare new & old
1102                    - if they are different, check override flag
1103                  */
1104                 if ((old & NUD_VALID) &&
1105                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1106                         lladdr = neigh->ha;
1107         } else {
1108                 /* No address is supplied; if we know something,
1109                    use it, otherwise discard the request.
1110                  */
1111                 err = -EINVAL;
1112                 if (!(old & NUD_VALID))
1113                         goto out;
1114                 lladdr = neigh->ha;
1115         }
1116
1117         if (new & NUD_CONNECTED)
1118                 neigh->confirmed = jiffies;
1119         neigh->updated = jiffies;
1120
1121         /* If entry was valid and address is not changed,
1122            do not change entry state, if new one is STALE.
1123          */
1124         err = 0;
1125         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1126         if (old & NUD_VALID) {
1127                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1128                         update_isrouter = 0;
1129                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1130                             (old & NUD_CONNECTED)) {
1131                                 lladdr = neigh->ha;
1132                                 new = NUD_STALE;
1133                         } else
1134                                 goto out;
1135                 } else {
1136                         if (lladdr == neigh->ha && new == NUD_STALE &&
1137                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1138                              (old & NUD_CONNECTED))
1139                             )
1140                                 new = old;
1141                 }
1142         }
1143
1144         if (new != old) {
1145                 neigh_del_timer(neigh);
1146                 if (new & NUD_IN_TIMER)
1147                         neigh_add_timer(neigh, (jiffies +
1148                                                 ((new & NUD_REACHABLE) ?
1149                                                  neigh->parms->reachable_time :
1150                                                  0)));
1151                 neigh->nud_state = new;
1152         }
1153
1154         if (lladdr != neigh->ha) {
1155                 write_seqlock(&neigh->ha_lock);
1156                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1157                 write_sequnlock(&neigh->ha_lock);
1158                 neigh_update_hhs(neigh);
1159                 if (!(new & NUD_CONNECTED))
1160                         neigh->confirmed = jiffies -
1161                                       (neigh->parms->base_reachable_time << 1);
1162                 notify = 1;
1163         }
1164         if (new == old)
1165                 goto out;
1166         if (new & NUD_CONNECTED)
1167                 neigh_connect(neigh);
1168         else
1169                 neigh_suspect(neigh);
1170         if (!(old & NUD_VALID)) {
1171                 struct sk_buff *skb;
1172
1173                 /* Again: avoid dead loop if something went wrong */
1174
1175                 while (neigh->nud_state & NUD_VALID &&
1176                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1177                         struct neighbour *n1 = neigh;
1178                         write_unlock_bh(&neigh->lock);
1179                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1180                         if (skb_dst(skb) && skb_dst(skb)->neighbour)
1181                                 n1 = skb_dst(skb)->neighbour;
1182                         n1->output(skb);
1183                         write_lock_bh(&neigh->lock);
1184                 }
1185                 skb_queue_purge(&neigh->arp_queue);
1186         }
1187 out:
1188         if (update_isrouter) {
1189                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1190                         (neigh->flags | NTF_ROUTER) :
1191                         (neigh->flags & ~NTF_ROUTER);
1192         }
1193         write_unlock_bh(&neigh->lock);
1194
1195         if (notify)
1196                 neigh_update_notify(neigh);
1197
1198         return err;
1199 }
1200 EXPORT_SYMBOL(neigh_update);
1201
1202 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1203                                  u8 *lladdr, void *saddr,
1204                                  struct net_device *dev)
1205 {
1206         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1207                                                  lladdr || !dev->addr_len);
1208         if (neigh)
1209                 neigh_update(neigh, lladdr, NUD_STALE,
1210                              NEIGH_UPDATE_F_OVERRIDE);
1211         return neigh;
1212 }
1213 EXPORT_SYMBOL(neigh_event_ns);
1214
1215 static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
1216                                    __be16 protocol)
1217 {
1218         struct hh_cache *hh;
1219
1220         smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1221         for (hh = n->hh; hh; hh = hh->hh_next) {
1222                 if (hh->hh_type == protocol) {
1223                         atomic_inc(&hh->hh_refcnt);
1224                         if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1225                                 hh_cache_put(hh);
1226                         return true;
1227                 }
1228         }
1229         return false;
1230 }
1231
1232 /* called with read_lock_bh(&n->lock); */
1233 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1234                           __be16 protocol)
1235 {
1236         struct hh_cache *hh;
1237         struct net_device *dev = dst->dev;
1238
1239         if (likely(neigh_hh_lookup(n, dst, protocol)))
1240                 return;
1241
1242         /* slow path */
1243         hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
1244         if (!hh)
1245                 return;
1246
1247         seqlock_init(&hh->hh_lock);
1248         hh->hh_type = protocol;
1249         atomic_set(&hh->hh_refcnt, 2);
1250
1251         if (dev->header_ops->cache(n, hh)) {
1252                 kfree(hh);
1253                 return;
1254         }
1255
1256         write_lock_bh(&n->lock);
1257
1258         /* must check if another thread already did the insert */
1259         if (neigh_hh_lookup(n, dst, protocol)) {
1260                 kfree(hh);
1261                 goto end;
1262         }
1263
1264         if (n->nud_state & NUD_CONNECTED)
1265                 hh->hh_output = n->ops->hh_output;
1266         else
1267                 hh->hh_output = n->ops->output;
1268
1269         hh->hh_next = n->hh;
1270         smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1271         n->hh       = hh;
1272
1273         if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1274                 hh_cache_put(hh);
1275 end:
1276         write_unlock_bh(&n->lock);
1277 }
1278
1279 /* This function can be used in contexts, where only old dev_queue_xmit
1280  * worked, f.e. if you want to override normal output path (eql, shaper),
1281  * but resolution is not made yet.
1282  */
1283
1284 int neigh_compat_output(struct sk_buff *skb)
1285 {
1286         struct net_device *dev = skb->dev;
1287
1288         __skb_pull(skb, skb_network_offset(skb));
1289
1290         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1291                             skb->len) < 0 &&
1292             dev->header_ops->rebuild(skb))
1293                 return 0;
1294
1295         return dev_queue_xmit(skb);
1296 }
1297 EXPORT_SYMBOL(neigh_compat_output);
1298
1299 /* Slow and careful. */
1300
1301 int neigh_resolve_output(struct sk_buff *skb)
1302 {
1303         struct dst_entry *dst = skb_dst(skb);
1304         struct neighbour *neigh;
1305         int rc = 0;
1306
1307         if (!dst || !(neigh = dst->neighbour))
1308                 goto discard;
1309
1310         __skb_pull(skb, skb_network_offset(skb));
1311
1312         if (!neigh_event_send(neigh, skb)) {
1313                 int err;
1314                 struct net_device *dev = neigh->dev;
1315                 unsigned int seq;
1316
1317                 if (dev->header_ops->cache &&
1318                     !dst->hh &&
1319                     !(dst->flags & DST_NOCACHE))
1320                         neigh_hh_init(neigh, dst, dst->ops->protocol);
1321
1322                 do {
1323                         seq = read_seqbegin(&neigh->ha_lock);
1324                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1325                                               neigh->ha, NULL, skb->len);
1326                 } while (read_seqretry(&neigh->ha_lock, seq));
1327
1328                 if (err >= 0)
1329                         rc = neigh->ops->queue_xmit(skb);
1330                 else
1331                         goto out_kfree_skb;
1332         }
1333 out:
1334         return rc;
1335 discard:
1336         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1337                       dst, dst ? dst->neighbour : NULL);
1338 out_kfree_skb:
1339         rc = -EINVAL;
1340         kfree_skb(skb);
1341         goto out;
1342 }
1343 EXPORT_SYMBOL(neigh_resolve_output);
1344
1345 /* As fast as possible without hh cache */
1346
1347 int neigh_connected_output(struct sk_buff *skb)
1348 {
1349         int err;
1350         struct dst_entry *dst = skb_dst(skb);
1351         struct neighbour *neigh = dst->neighbour;
1352         struct net_device *dev = neigh->dev;
1353         unsigned int seq;
1354
1355         __skb_pull(skb, skb_network_offset(skb));
1356
1357         do {
1358                 seq = read_seqbegin(&neigh->ha_lock);
1359                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1360                                       neigh->ha, NULL, skb->len);
1361         } while (read_seqretry(&neigh->ha_lock, seq));
1362
1363         if (err >= 0)
1364                 err = neigh->ops->queue_xmit(skb);
1365         else {
1366                 err = -EINVAL;
1367                 kfree_skb(skb);
1368         }
1369         return err;
1370 }
1371 EXPORT_SYMBOL(neigh_connected_output);
1372
1373 static void neigh_proxy_process(unsigned long arg)
1374 {
1375         struct neigh_table *tbl = (struct neigh_table *)arg;
1376         long sched_next = 0;
1377         unsigned long now = jiffies;
1378         struct sk_buff *skb, *n;
1379
1380         spin_lock(&tbl->proxy_queue.lock);
1381
1382         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1383                 long tdif = NEIGH_CB(skb)->sched_next - now;
1384
1385                 if (tdif <= 0) {
1386                         struct net_device *dev = skb->dev;
1387                         __skb_unlink(skb, &tbl->proxy_queue);
1388                         if (tbl->proxy_redo && netif_running(dev))
1389                                 tbl->proxy_redo(skb);
1390                         else
1391                                 kfree_skb(skb);
1392
1393                         dev_put(dev);
1394                 } else if (!sched_next || tdif < sched_next)
1395                         sched_next = tdif;
1396         }
1397         del_timer(&tbl->proxy_timer);
1398         if (sched_next)
1399                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1400         spin_unlock(&tbl->proxy_queue.lock);
1401 }
1402
1403 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1404                     struct sk_buff *skb)
1405 {
1406         unsigned long now = jiffies;
1407         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1408
1409         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1410                 kfree_skb(skb);
1411                 return;
1412         }
1413
1414         NEIGH_CB(skb)->sched_next = sched_next;
1415         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1416
1417         spin_lock(&tbl->proxy_queue.lock);
1418         if (del_timer(&tbl->proxy_timer)) {
1419                 if (time_before(tbl->proxy_timer.expires, sched_next))
1420                         sched_next = tbl->proxy_timer.expires;
1421         }
1422         skb_dst_drop(skb);
1423         dev_hold(skb->dev);
1424         __skb_queue_tail(&tbl->proxy_queue, skb);
1425         mod_timer(&tbl->proxy_timer, sched_next);
1426         spin_unlock(&tbl->proxy_queue.lock);
1427 }
1428 EXPORT_SYMBOL(pneigh_enqueue);
1429
1430 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1431                                                       struct net *net, int ifindex)
1432 {
1433         struct neigh_parms *p;
1434
1435         for (p = &tbl->parms; p; p = p->next) {
1436                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1437                     (!p->dev && !ifindex))
1438                         return p;
1439         }
1440
1441         return NULL;
1442 }
1443
1444 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1445                                       struct neigh_table *tbl)
1446 {
1447         struct neigh_parms *p, *ref;
1448         struct net *net = dev_net(dev);
1449         const struct net_device_ops *ops = dev->netdev_ops;
1450
1451         ref = lookup_neigh_parms(tbl, net, 0);
1452         if (!ref)
1453                 return NULL;
1454
1455         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1456         if (p) {
1457                 p->tbl            = tbl;
1458                 atomic_set(&p->refcnt, 1);
1459                 p->reachable_time =
1460                                 neigh_rand_reach_time(p->base_reachable_time);
1461
1462                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1463                         kfree(p);
1464                         return NULL;
1465                 }
1466
1467                 dev_hold(dev);
1468                 p->dev = dev;
1469                 write_pnet(&p->net, hold_net(net));
1470                 p->sysctl_table = NULL;
1471                 write_lock_bh(&tbl->lock);
1472                 p->next         = tbl->parms.next;
1473                 tbl->parms.next = p;
1474                 write_unlock_bh(&tbl->lock);
1475         }
1476         return p;
1477 }
1478 EXPORT_SYMBOL(neigh_parms_alloc);
1479
1480 static void neigh_rcu_free_parms(struct rcu_head *head)
1481 {
1482         struct neigh_parms *parms =
1483                 container_of(head, struct neigh_parms, rcu_head);
1484
1485         neigh_parms_put(parms);
1486 }
1487
1488 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1489 {
1490         struct neigh_parms **p;
1491
1492         if (!parms || parms == &tbl->parms)
1493                 return;
1494         write_lock_bh(&tbl->lock);
1495         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1496                 if (*p == parms) {
1497                         *p = parms->next;
1498                         parms->dead = 1;
1499                         write_unlock_bh(&tbl->lock);
1500                         if (parms->dev)
1501                                 dev_put(parms->dev);
1502                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1503                         return;
1504                 }
1505         }
1506         write_unlock_bh(&tbl->lock);
1507         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1508 }
1509 EXPORT_SYMBOL(neigh_parms_release);
1510
1511 static void neigh_parms_destroy(struct neigh_parms *parms)
1512 {
1513         release_net(neigh_parms_net(parms));
1514         kfree(parms);
1515 }
1516
1517 static struct lock_class_key neigh_table_proxy_queue_class;
1518
1519 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1520 {
1521         unsigned long now = jiffies;
1522         unsigned long phsize;
1523
1524         write_pnet(&tbl->parms.net, &init_net);
1525         atomic_set(&tbl->parms.refcnt, 1);
1526         tbl->parms.reachable_time =
1527                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1528
1529         if (!tbl->kmem_cachep)
1530                 tbl->kmem_cachep =
1531                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1532                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1533                                           NULL);
1534         tbl->stats = alloc_percpu(struct neigh_statistics);
1535         if (!tbl->stats)
1536                 panic("cannot create neighbour cache statistics");
1537
1538 #ifdef CONFIG_PROC_FS
1539         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1540                               &neigh_stat_seq_fops, tbl))
1541                 panic("cannot create neighbour proc dir entry");
1542 #endif
1543
1544         tbl->nht = neigh_hash_alloc(8);
1545
1546         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1547         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1548
1549         if (!tbl->nht || !tbl->phash_buckets)
1550                 panic("cannot allocate neighbour cache hashes");
1551
1552         rwlock_init(&tbl->lock);
1553         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1554         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1555         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1556         skb_queue_head_init_class(&tbl->proxy_queue,
1557                         &neigh_table_proxy_queue_class);
1558
1559         tbl->last_flush = now;
1560         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1561 }
1562 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1563
1564 void neigh_table_init(struct neigh_table *tbl)
1565 {
1566         struct neigh_table *tmp;
1567
1568         neigh_table_init_no_netlink(tbl);
1569         write_lock(&neigh_tbl_lock);
1570         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1571                 if (tmp->family == tbl->family)
1572                         break;
1573         }
1574         tbl->next       = neigh_tables;
1575         neigh_tables    = tbl;
1576         write_unlock(&neigh_tbl_lock);
1577
1578         if (unlikely(tmp)) {
1579                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1580                        "family %d\n", tbl->family);
1581                 dump_stack();
1582         }
1583 }
1584 EXPORT_SYMBOL(neigh_table_init);
1585
1586 int neigh_table_clear(struct neigh_table *tbl)
1587 {
1588         struct neigh_table **tp;
1589
1590         /* It is not clean... Fix it to unload IPv6 module safely */
1591         cancel_delayed_work(&tbl->gc_work);
1592         flush_scheduled_work();
1593         del_timer_sync(&tbl->proxy_timer);
1594         pneigh_queue_purge(&tbl->proxy_queue);
1595         neigh_ifdown(tbl, NULL);
1596         if (atomic_read(&tbl->entries))
1597                 printk(KERN_CRIT "neighbour leakage\n");
1598         write_lock(&neigh_tbl_lock);
1599         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1600                 if (*tp == tbl) {
1601                         *tp = tbl->next;
1602                         break;
1603                 }
1604         }
1605         write_unlock(&neigh_tbl_lock);
1606
1607         call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
1608         tbl->nht = NULL;
1609
1610         kfree(tbl->phash_buckets);
1611         tbl->phash_buckets = NULL;
1612
1613         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1614
1615         free_percpu(tbl->stats);
1616         tbl->stats = NULL;
1617
1618         kmem_cache_destroy(tbl->kmem_cachep);
1619         tbl->kmem_cachep = NULL;
1620
1621         return 0;
1622 }
1623 EXPORT_SYMBOL(neigh_table_clear);
1624
1625 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1626 {
1627         struct net *net = sock_net(skb->sk);
1628         struct ndmsg *ndm;
1629         struct nlattr *dst_attr;
1630         struct neigh_table *tbl;
1631         struct net_device *dev = NULL;
1632         int err = -EINVAL;
1633
1634         ASSERT_RTNL();
1635         if (nlmsg_len(nlh) < sizeof(*ndm))
1636                 goto out;
1637
1638         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1639         if (dst_attr == NULL)
1640                 goto out;
1641
1642         ndm = nlmsg_data(nlh);
1643         if (ndm->ndm_ifindex) {
1644                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1645                 if (dev == NULL) {
1646                         err = -ENODEV;
1647                         goto out;
1648                 }
1649         }
1650
1651         read_lock(&neigh_tbl_lock);
1652         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1653                 struct neighbour *neigh;
1654
1655                 if (tbl->family != ndm->ndm_family)
1656                         continue;
1657                 read_unlock(&neigh_tbl_lock);
1658
1659                 if (nla_len(dst_attr) < tbl->key_len)
1660                         goto out;
1661
1662                 if (ndm->ndm_flags & NTF_PROXY) {
1663                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1664                         goto out;
1665                 }
1666
1667                 if (dev == NULL)
1668                         goto out;
1669
1670                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1671                 if (neigh == NULL) {
1672                         err = -ENOENT;
1673                         goto out;
1674                 }
1675
1676                 err = neigh_update(neigh, NULL, NUD_FAILED,
1677                                    NEIGH_UPDATE_F_OVERRIDE |
1678                                    NEIGH_UPDATE_F_ADMIN);
1679                 neigh_release(neigh);
1680                 goto out;
1681         }
1682         read_unlock(&neigh_tbl_lock);
1683         err = -EAFNOSUPPORT;
1684
1685 out:
1686         return err;
1687 }
1688
1689 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1690 {
1691         struct net *net = sock_net(skb->sk);
1692         struct ndmsg *ndm;
1693         struct nlattr *tb[NDA_MAX+1];
1694         struct neigh_table *tbl;
1695         struct net_device *dev = NULL;
1696         int err;
1697
1698         ASSERT_RTNL();
1699         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1700         if (err < 0)
1701                 goto out;
1702
1703         err = -EINVAL;
1704         if (tb[NDA_DST] == NULL)
1705                 goto out;
1706
1707         ndm = nlmsg_data(nlh);
1708         if (ndm->ndm_ifindex) {
1709                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1710                 if (dev == NULL) {
1711                         err = -ENODEV;
1712                         goto out;
1713                 }
1714
1715                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1716                         goto out;
1717         }
1718
1719         read_lock(&neigh_tbl_lock);
1720         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1721                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1722                 struct neighbour *neigh;
1723                 void *dst, *lladdr;
1724
1725                 if (tbl->family != ndm->ndm_family)
1726                         continue;
1727                 read_unlock(&neigh_tbl_lock);
1728
1729                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1730                         goto out;
1731                 dst = nla_data(tb[NDA_DST]);
1732                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1733
1734                 if (ndm->ndm_flags & NTF_PROXY) {
1735                         struct pneigh_entry *pn;
1736
1737                         err = -ENOBUFS;
1738                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1739                         if (pn) {
1740                                 pn->flags = ndm->ndm_flags;
1741                                 err = 0;
1742                         }
1743                         goto out;
1744                 }
1745
1746                 if (dev == NULL)
1747                         goto out;
1748
1749                 neigh = neigh_lookup(tbl, dst, dev);
1750                 if (neigh == NULL) {
1751                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1752                                 err = -ENOENT;
1753                                 goto out;
1754                         }
1755
1756                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1757                         if (IS_ERR(neigh)) {
1758                                 err = PTR_ERR(neigh);
1759                                 goto out;
1760                         }
1761                 } else {
1762                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1763                                 err = -EEXIST;
1764                                 neigh_release(neigh);
1765                                 goto out;
1766                         }
1767
1768                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1769                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1770                 }
1771
1772                 if (ndm->ndm_flags & NTF_USE) {
1773                         neigh_event_send(neigh, NULL);
1774                         err = 0;
1775                 } else
1776                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1777                 neigh_release(neigh);
1778                 goto out;
1779         }
1780
1781         read_unlock(&neigh_tbl_lock);
1782         err = -EAFNOSUPPORT;
1783 out:
1784         return err;
1785 }
1786
1787 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1788 {
1789         struct nlattr *nest;
1790
1791         nest = nla_nest_start(skb, NDTA_PARMS);
1792         if (nest == NULL)
1793                 return -ENOBUFS;
1794
1795         if (parms->dev)
1796                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1797
1798         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1799         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1800         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1801         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1802         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1803         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1804         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1805         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1806                       parms->base_reachable_time);
1807         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1808         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1809         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1810         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1811         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1812         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1813
1814         return nla_nest_end(skb, nest);
1815
1816 nla_put_failure:
1817         nla_nest_cancel(skb, nest);
1818         return -EMSGSIZE;
1819 }
1820
1821 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1822                               u32 pid, u32 seq, int type, int flags)
1823 {
1824         struct nlmsghdr *nlh;
1825         struct ndtmsg *ndtmsg;
1826
1827         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1828         if (nlh == NULL)
1829                 return -EMSGSIZE;
1830
1831         ndtmsg = nlmsg_data(nlh);
1832
1833         read_lock_bh(&tbl->lock);
1834         ndtmsg->ndtm_family = tbl->family;
1835         ndtmsg->ndtm_pad1   = 0;
1836         ndtmsg->ndtm_pad2   = 0;
1837
1838         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1839         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1840         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1841         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1842         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1843
1844         {
1845                 unsigned long now = jiffies;
1846                 unsigned int flush_delta = now - tbl->last_flush;
1847                 unsigned int rand_delta = now - tbl->last_rand;
1848                 struct neigh_hash_table *nht;
1849                 struct ndt_config ndc = {
1850                         .ndtc_key_len           = tbl->key_len,
1851                         .ndtc_entry_size        = tbl->entry_size,
1852                         .ndtc_entries           = atomic_read(&tbl->entries),
1853                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1854                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1855                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1856                 };
1857
1858                 rcu_read_lock_bh();
1859                 nht = rcu_dereference_bh(tbl->nht);
1860                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1861                 ndc.ndtc_hash_mask = nht->hash_mask;
1862                 rcu_read_unlock_bh();
1863
1864                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1865         }
1866
1867         {
1868                 int cpu;
1869                 struct ndt_stats ndst;
1870
1871                 memset(&ndst, 0, sizeof(ndst));
1872
1873                 for_each_possible_cpu(cpu) {
1874                         struct neigh_statistics *st;
1875
1876                         st = per_cpu_ptr(tbl->stats, cpu);
1877                         ndst.ndts_allocs                += st->allocs;
1878                         ndst.ndts_destroys              += st->destroys;
1879                         ndst.ndts_hash_grows            += st->hash_grows;
1880                         ndst.ndts_res_failed            += st->res_failed;
1881                         ndst.ndts_lookups               += st->lookups;
1882                         ndst.ndts_hits                  += st->hits;
1883                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1884                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1885                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1886                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1887                 }
1888
1889                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1890         }
1891
1892         BUG_ON(tbl->parms.dev);
1893         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1894                 goto nla_put_failure;
1895
1896         read_unlock_bh(&tbl->lock);
1897         return nlmsg_end(skb, nlh);
1898
1899 nla_put_failure:
1900         read_unlock_bh(&tbl->lock);
1901         nlmsg_cancel(skb, nlh);
1902         return -EMSGSIZE;
1903 }
1904
1905 static int neightbl_fill_param_info(struct sk_buff *skb,
1906                                     struct neigh_table *tbl,
1907                                     struct neigh_parms *parms,
1908                                     u32 pid, u32 seq, int type,
1909                                     unsigned int flags)
1910 {
1911         struct ndtmsg *ndtmsg;
1912         struct nlmsghdr *nlh;
1913
1914         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1915         if (nlh == NULL)
1916                 return -EMSGSIZE;
1917
1918         ndtmsg = nlmsg_data(nlh);
1919
1920         read_lock_bh(&tbl->lock);
1921         ndtmsg->ndtm_family = tbl->family;
1922         ndtmsg->ndtm_pad1   = 0;
1923         ndtmsg->ndtm_pad2   = 0;
1924
1925         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1926             neightbl_fill_parms(skb, parms) < 0)
1927                 goto errout;
1928
1929         read_unlock_bh(&tbl->lock);
1930         return nlmsg_end(skb, nlh);
1931 errout:
1932         read_unlock_bh(&tbl->lock);
1933         nlmsg_cancel(skb, nlh);
1934         return -EMSGSIZE;
1935 }
1936
1937 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1938         [NDTA_NAME]             = { .type = NLA_STRING },
1939         [NDTA_THRESH1]          = { .type = NLA_U32 },
1940         [NDTA_THRESH2]          = { .type = NLA_U32 },
1941         [NDTA_THRESH3]          = { .type = NLA_U32 },
1942         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1943         [NDTA_PARMS]            = { .type = NLA_NESTED },
1944 };
1945
1946 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1947         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1948         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1949         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1950         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1951         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1952         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1953         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1954         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1955         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1956         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1957         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1958         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1959         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1960 };
1961
1962 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1963 {
1964         struct net *net = sock_net(skb->sk);
1965         struct neigh_table *tbl;
1966         struct ndtmsg *ndtmsg;
1967         struct nlattr *tb[NDTA_MAX+1];
1968         int err;
1969
1970         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1971                           nl_neightbl_policy);
1972         if (err < 0)
1973                 goto errout;
1974
1975         if (tb[NDTA_NAME] == NULL) {
1976                 err = -EINVAL;
1977                 goto errout;
1978         }
1979
1980         ndtmsg = nlmsg_data(nlh);
1981         read_lock(&neigh_tbl_lock);
1982         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1983                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1984                         continue;
1985
1986                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1987                         break;
1988         }
1989
1990         if (tbl == NULL) {
1991                 err = -ENOENT;
1992                 goto errout_locked;
1993         }
1994
1995         /*
1996          * We acquire tbl->lock to be nice to the periodic timers and
1997          * make sure they always see a consistent set of values.
1998          */
1999         write_lock_bh(&tbl->lock);
2000
2001         if (tb[NDTA_PARMS]) {
2002                 struct nlattr *tbp[NDTPA_MAX+1];
2003                 struct neigh_parms *p;
2004                 int i, ifindex = 0;
2005
2006                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2007                                        nl_ntbl_parm_policy);
2008                 if (err < 0)
2009                         goto errout_tbl_lock;
2010
2011                 if (tbp[NDTPA_IFINDEX])
2012                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2013
2014                 p = lookup_neigh_parms(tbl, net, ifindex);
2015                 if (p == NULL) {
2016                         err = -ENOENT;
2017                         goto errout_tbl_lock;
2018                 }
2019
2020                 for (i = 1; i <= NDTPA_MAX; i++) {
2021                         if (tbp[i] == NULL)
2022                                 continue;
2023
2024                         switch (i) {
2025                         case NDTPA_QUEUE_LEN:
2026                                 p->queue_len = nla_get_u32(tbp[i]);
2027                                 break;
2028                         case NDTPA_PROXY_QLEN:
2029                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2030                                 break;
2031                         case NDTPA_APP_PROBES:
2032                                 p->app_probes = nla_get_u32(tbp[i]);
2033                                 break;
2034                         case NDTPA_UCAST_PROBES:
2035                                 p->ucast_probes = nla_get_u32(tbp[i]);
2036                                 break;
2037                         case NDTPA_MCAST_PROBES:
2038                                 p->mcast_probes = nla_get_u32(tbp[i]);
2039                                 break;
2040                         case NDTPA_BASE_REACHABLE_TIME:
2041                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2042                                 break;
2043                         case NDTPA_GC_STALETIME:
2044                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2045                                 break;
2046                         case NDTPA_DELAY_PROBE_TIME:
2047                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2048                                 break;
2049                         case NDTPA_RETRANS_TIME:
2050                                 p->retrans_time = nla_get_msecs(tbp[i]);
2051                                 break;
2052                         case NDTPA_ANYCAST_DELAY:
2053                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2054                                 break;
2055                         case NDTPA_PROXY_DELAY:
2056                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2057                                 break;
2058                         case NDTPA_LOCKTIME:
2059                                 p->locktime = nla_get_msecs(tbp[i]);
2060                                 break;
2061                         }
2062                 }
2063         }
2064
2065         if (tb[NDTA_THRESH1])
2066                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2067
2068         if (tb[NDTA_THRESH2])
2069                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2070
2071         if (tb[NDTA_THRESH3])
2072                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2073
2074         if (tb[NDTA_GC_INTERVAL])
2075                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2076
2077         err = 0;
2078
2079 errout_tbl_lock:
2080         write_unlock_bh(&tbl->lock);
2081 errout_locked:
2082         read_unlock(&neigh_tbl_lock);
2083 errout:
2084         return err;
2085 }
2086
2087 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2088 {
2089         struct net *net = sock_net(skb->sk);
2090         int family, tidx, nidx = 0;
2091         int tbl_skip = cb->args[0];
2092         int neigh_skip = cb->args[1];
2093         struct neigh_table *tbl;
2094
2095         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2096
2097         read_lock(&neigh_tbl_lock);
2098         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2099                 struct neigh_parms *p;
2100
2101                 if (tidx < tbl_skip || (family && tbl->family != family))
2102                         continue;
2103
2104                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2105                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2106                                        NLM_F_MULTI) <= 0)
2107                         break;
2108
2109                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2110                         if (!net_eq(neigh_parms_net(p), net))
2111                                 continue;
2112
2113                         if (nidx < neigh_skip)
2114                                 goto next;
2115
2116                         if (neightbl_fill_param_info(skb, tbl, p,
2117                                                      NETLINK_CB(cb->skb).pid,
2118                                                      cb->nlh->nlmsg_seq,
2119                                                      RTM_NEWNEIGHTBL,
2120                                                      NLM_F_MULTI) <= 0)
2121                                 goto out;
2122                 next:
2123                         nidx++;
2124                 }
2125
2126                 neigh_skip = 0;
2127         }
2128 out:
2129         read_unlock(&neigh_tbl_lock);
2130         cb->args[0] = tidx;
2131         cb->args[1] = nidx;
2132
2133         return skb->len;
2134 }
2135
2136 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2137                            u32 pid, u32 seq, int type, unsigned int flags)
2138 {
2139         unsigned long now = jiffies;
2140         struct nda_cacheinfo ci;
2141         struct nlmsghdr *nlh;
2142         struct ndmsg *ndm;
2143
2144         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2145         if (nlh == NULL)
2146                 return -EMSGSIZE;
2147
2148         ndm = nlmsg_data(nlh);
2149         ndm->ndm_family  = neigh->ops->family;
2150         ndm->ndm_pad1    = 0;
2151         ndm->ndm_pad2    = 0;
2152         ndm->ndm_flags   = neigh->flags;
2153         ndm->ndm_type    = neigh->type;
2154         ndm->ndm_ifindex = neigh->dev->ifindex;
2155
2156         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2157
2158         read_lock_bh(&neigh->lock);
2159         ndm->ndm_state   = neigh->nud_state;
2160         if (neigh->nud_state & NUD_VALID) {
2161                 char haddr[MAX_ADDR_LEN];
2162
2163                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2164                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2165                         read_unlock_bh(&neigh->lock);
2166                         goto nla_put_failure;
2167                 }
2168         }
2169
2170         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2171         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2172         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2173         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2174         read_unlock_bh(&neigh->lock);
2175
2176         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2177         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2178
2179         return nlmsg_end(skb, nlh);
2180
2181 nla_put_failure:
2182         nlmsg_cancel(skb, nlh);
2183         return -EMSGSIZE;
2184 }
2185
2186 static void neigh_update_notify(struct neighbour *neigh)
2187 {
2188         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2189         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2190 }
2191
2192 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2193                             struct netlink_callback *cb)
2194 {
2195         struct net *net = sock_net(skb->sk);
2196         struct neighbour *n;
2197         int rc, h, s_h = cb->args[1];
2198         int idx, s_idx = idx = cb->args[2];
2199         struct neigh_hash_table *nht;
2200
2201         rcu_read_lock_bh();
2202         nht = rcu_dereference_bh(tbl->nht);
2203
2204         for (h = 0; h <= nht->hash_mask; h++) {
2205                 if (h < s_h)
2206                         continue;
2207                 if (h > s_h)
2208                         s_idx = 0;
2209                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2210                      n != NULL;
2211                      n = rcu_dereference_bh(n->next)) {
2212                         if (!net_eq(dev_net(n->dev), net))
2213                                 continue;
2214                         if (idx < s_idx)
2215                                 goto next;
2216                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2217                                             cb->nlh->nlmsg_seq,
2218                                             RTM_NEWNEIGH,
2219                                             NLM_F_MULTI) <= 0) {
2220                                 rc = -1;
2221                                 goto out;
2222                         }
2223 next:
2224                         idx++;
2225                 }
2226         }
2227         rc = skb->len;
2228 out:
2229         rcu_read_unlock_bh();
2230         cb->args[1] = h;
2231         cb->args[2] = idx;
2232         return rc;
2233 }
2234
2235 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2236 {
2237         struct neigh_table *tbl;
2238         int t, family, s_t;
2239
2240         read_lock(&neigh_tbl_lock);
2241         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2242         s_t = cb->args[0];
2243
2244         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2245                 if (t < s_t || (family && tbl->family != family))
2246                         continue;
2247                 if (t > s_t)
2248                         memset(&cb->args[1], 0, sizeof(cb->args) -
2249                                                 sizeof(cb->args[0]));
2250                 if (neigh_dump_table(tbl, skb, cb) < 0)
2251                         break;
2252         }
2253         read_unlock(&neigh_tbl_lock);
2254
2255         cb->args[0] = t;
2256         return skb->len;
2257 }
2258
2259 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2260 {
2261         int chain;
2262         struct neigh_hash_table *nht;
2263
2264         rcu_read_lock_bh();
2265         nht = rcu_dereference_bh(tbl->nht);
2266
2267         read_lock(&tbl->lock); /* avoid resizes */
2268         for (chain = 0; chain <= nht->hash_mask; chain++) {
2269                 struct neighbour *n;
2270
2271                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2272                      n != NULL;
2273                      n = rcu_dereference_bh(n->next))
2274                         cb(n, cookie);
2275         }
2276         read_unlock(&tbl->lock);
2277         rcu_read_unlock_bh();
2278 }
2279 EXPORT_SYMBOL(neigh_for_each);
2280
2281 /* The tbl->lock must be held as a writer and BH disabled. */
2282 void __neigh_for_each_release(struct neigh_table *tbl,
2283                               int (*cb)(struct neighbour *))
2284 {
2285         int chain;
2286         struct neigh_hash_table *nht;
2287
2288         nht = rcu_dereference_protected(tbl->nht,
2289                                         lockdep_is_held(&tbl->lock));
2290         for (chain = 0; chain <= nht->hash_mask; chain++) {
2291                 struct neighbour *n;
2292                 struct neighbour __rcu **np;
2293
2294                 np = &nht->hash_buckets[chain];
2295                 while ((n = rcu_dereference_protected(*np,
2296                                         lockdep_is_held(&tbl->lock))) != NULL) {
2297                         int release;
2298
2299                         write_lock(&n->lock);
2300                         release = cb(n);
2301                         if (release) {
2302                                 rcu_assign_pointer(*np,
2303                                         rcu_dereference_protected(n->next,
2304                                                 lockdep_is_held(&tbl->lock)));
2305                                 n->dead = 1;
2306                         } else
2307                                 np = &n->next;
2308                         write_unlock(&n->lock);
2309                         if (release)
2310                                 neigh_cleanup_and_release(n);
2311                 }
2312         }
2313 }
2314 EXPORT_SYMBOL(__neigh_for_each_release);
2315
2316 #ifdef CONFIG_PROC_FS
2317
2318 static struct neighbour *neigh_get_first(struct seq_file *seq)
2319 {
2320         struct neigh_seq_state *state = seq->private;
2321         struct net *net = seq_file_net(seq);
2322         struct neigh_hash_table *nht = state->nht;
2323         struct neighbour *n = NULL;
2324         int bucket = state->bucket;
2325
2326         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2327         for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
2328                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2329
2330                 while (n) {
2331                         if (!net_eq(dev_net(n->dev), net))
2332                                 goto next;
2333                         if (state->neigh_sub_iter) {
2334                                 loff_t fakep = 0;
2335                                 void *v;
2336
2337                                 v = state->neigh_sub_iter(state, n, &fakep);
2338                                 if (!v)
2339                                         goto next;
2340                         }
2341                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2342                                 break;
2343                         if (n->nud_state & ~NUD_NOARP)
2344                                 break;
2345 next:
2346                         n = rcu_dereference_bh(n->next);
2347                 }
2348
2349                 if (n)
2350                         break;
2351         }
2352         state->bucket = bucket;
2353
2354         return n;
2355 }
2356
2357 static struct neighbour *neigh_get_next(struct seq_file *seq,
2358                                         struct neighbour *n,
2359                                         loff_t *pos)
2360 {
2361         struct neigh_seq_state *state = seq->private;
2362         struct net *net = seq_file_net(seq);
2363         struct neigh_hash_table *nht = state->nht;
2364
2365         if (state->neigh_sub_iter) {
2366                 void *v = state->neigh_sub_iter(state, n, pos);
2367                 if (v)
2368                         return n;
2369         }
2370         n = rcu_dereference_bh(n->next);
2371
2372         while (1) {
2373                 while (n) {
2374                         if (!net_eq(dev_net(n->dev), net))
2375                                 goto next;
2376                         if (state->neigh_sub_iter) {
2377                                 void *v = state->neigh_sub_iter(state, n, pos);
2378                                 if (v)
2379                                         return n;
2380                                 goto next;
2381                         }
2382                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2383                                 break;
2384
2385                         if (n->nud_state & ~NUD_NOARP)
2386                                 break;
2387 next:
2388                         n = rcu_dereference_bh(n->next);
2389                 }
2390
2391                 if (n)
2392                         break;
2393
2394                 if (++state->bucket > nht->hash_mask)
2395                         break;
2396
2397                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2398         }
2399
2400         if (n && pos)
2401                 --(*pos);
2402         return n;
2403 }
2404
2405 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2406 {
2407         struct neighbour *n = neigh_get_first(seq);
2408
2409         if (n) {
2410                 --(*pos);
2411                 while (*pos) {
2412                         n = neigh_get_next(seq, n, pos);
2413                         if (!n)
2414                                 break;
2415                 }
2416         }
2417         return *pos ? NULL : n;
2418 }
2419
2420 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2421 {
2422         struct neigh_seq_state *state = seq->private;
2423         struct net *net = seq_file_net(seq);
2424         struct neigh_table *tbl = state->tbl;
2425         struct pneigh_entry *pn = NULL;
2426         int bucket = state->bucket;
2427
2428         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2429         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2430                 pn = tbl->phash_buckets[bucket];
2431                 while (pn && !net_eq(pneigh_net(pn), net))
2432                         pn = pn->next;
2433                 if (pn)
2434                         break;
2435         }
2436         state->bucket = bucket;
2437
2438         return pn;
2439 }
2440
2441 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2442                                             struct pneigh_entry *pn,
2443                                             loff_t *pos)
2444 {
2445         struct neigh_seq_state *state = seq->private;
2446         struct net *net = seq_file_net(seq);
2447         struct neigh_table *tbl = state->tbl;
2448
2449         pn = pn->next;
2450         while (!pn) {
2451                 if (++state->bucket > PNEIGH_HASHMASK)
2452                         break;
2453                 pn = tbl->phash_buckets[state->bucket];
2454                 while (pn && !net_eq(pneigh_net(pn), net))
2455                         pn = pn->next;
2456                 if (pn)
2457                         break;
2458         }
2459
2460         if (pn && pos)
2461                 --(*pos);
2462
2463         return pn;
2464 }
2465
2466 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2467 {
2468         struct pneigh_entry *pn = pneigh_get_first(seq);
2469
2470         if (pn) {
2471                 --(*pos);
2472                 while (*pos) {
2473                         pn = pneigh_get_next(seq, pn, pos);
2474                         if (!pn)
2475                                 break;
2476                 }
2477         }
2478         return *pos ? NULL : pn;
2479 }
2480
2481 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2482 {
2483         struct neigh_seq_state *state = seq->private;
2484         void *rc;
2485         loff_t idxpos = *pos;
2486
2487         rc = neigh_get_idx(seq, &idxpos);
2488         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2489                 rc = pneigh_get_idx(seq, &idxpos);
2490
2491         return rc;
2492 }
2493
2494 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2495         __acquires(rcu_bh)
2496 {
2497         struct neigh_seq_state *state = seq->private;
2498
2499         state->tbl = tbl;
2500         state->bucket = 0;
2501         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2502
2503         rcu_read_lock_bh();
2504         state->nht = rcu_dereference_bh(tbl->nht);
2505
2506         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2507 }
2508 EXPORT_SYMBOL(neigh_seq_start);
2509
2510 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2511 {
2512         struct neigh_seq_state *state;
2513         void *rc;
2514
2515         if (v == SEQ_START_TOKEN) {
2516                 rc = neigh_get_first(seq);
2517                 goto out;
2518         }
2519
2520         state = seq->private;
2521         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2522                 rc = neigh_get_next(seq, v, NULL);
2523                 if (rc)
2524                         goto out;
2525                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2526                         rc = pneigh_get_first(seq);
2527         } else {
2528                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2529                 rc = pneigh_get_next(seq, v, NULL);
2530         }
2531 out:
2532         ++(*pos);
2533         return rc;
2534 }
2535 EXPORT_SYMBOL(neigh_seq_next);
2536
2537 void neigh_seq_stop(struct seq_file *seq, void *v)
2538         __releases(rcu_bh)
2539 {
2540         rcu_read_unlock_bh();
2541 }
2542 EXPORT_SYMBOL(neigh_seq_stop);
2543
2544 /* statistics via seq_file */
2545
2546 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2547 {
2548         struct neigh_table *tbl = seq->private;
2549         int cpu;
2550
2551         if (*pos == 0)
2552                 return SEQ_START_TOKEN;
2553
2554         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2555                 if (!cpu_possible(cpu))
2556                         continue;
2557                 *pos = cpu+1;
2558                 return per_cpu_ptr(tbl->stats, cpu);
2559         }
2560         return NULL;
2561 }
2562
2563 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2564 {
2565         struct neigh_table *tbl = seq->private;
2566         int cpu;
2567
2568         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2569                 if (!cpu_possible(cpu))
2570                         continue;
2571                 *pos = cpu+1;
2572                 return per_cpu_ptr(tbl->stats, cpu);
2573         }
2574         return NULL;
2575 }
2576
2577 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2578 {
2579
2580 }
2581
2582 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2583 {
2584         struct neigh_table *tbl = seq->private;
2585         struct neigh_statistics *st = v;
2586
2587         if (v == SEQ_START_TOKEN) {
2588                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2589                 return 0;
2590         }
2591
2592         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2593                         "%08lx %08lx  %08lx %08lx %08lx\n",
2594                    atomic_read(&tbl->entries),
2595
2596                    st->allocs,
2597                    st->destroys,
2598                    st->hash_grows,
2599
2600                    st->lookups,
2601                    st->hits,
2602
2603                    st->res_failed,
2604
2605                    st->rcv_probes_mcast,
2606                    st->rcv_probes_ucast,
2607
2608                    st->periodic_gc_runs,
2609                    st->forced_gc_runs,
2610                    st->unres_discards
2611                    );
2612
2613         return 0;
2614 }
2615
2616 static const struct seq_operations neigh_stat_seq_ops = {
2617         .start  = neigh_stat_seq_start,
2618         .next   = neigh_stat_seq_next,
2619         .stop   = neigh_stat_seq_stop,
2620         .show   = neigh_stat_seq_show,
2621 };
2622
2623 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2624 {
2625         int ret = seq_open(file, &neigh_stat_seq_ops);
2626
2627         if (!ret) {
2628                 struct seq_file *sf = file->private_data;
2629                 sf->private = PDE(inode)->data;
2630         }
2631         return ret;
2632 };
2633
2634 static const struct file_operations neigh_stat_seq_fops = {
2635         .owner   = THIS_MODULE,
2636         .open    = neigh_stat_seq_open,
2637         .read    = seq_read,
2638         .llseek  = seq_lseek,
2639         .release = seq_release,
2640 };
2641
2642 #endif /* CONFIG_PROC_FS */
2643
2644 static inline size_t neigh_nlmsg_size(void)
2645 {
2646         return NLMSG_ALIGN(sizeof(struct ndmsg))
2647                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2648                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2649                + nla_total_size(sizeof(struct nda_cacheinfo))
2650                + nla_total_size(4); /* NDA_PROBES */
2651 }
2652
2653 static void __neigh_notify(struct neighbour *n, int type, int flags)
2654 {
2655         struct net *net = dev_net(n->dev);
2656         struct sk_buff *skb;
2657         int err = -ENOBUFS;
2658
2659         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2660         if (skb == NULL)
2661                 goto errout;
2662
2663         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2664         if (err < 0) {
2665                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2666                 WARN_ON(err == -EMSGSIZE);
2667                 kfree_skb(skb);
2668                 goto errout;
2669         }
2670         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2671         return;
2672 errout:
2673         if (err < 0)
2674                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2675 }
2676
2677 #ifdef CONFIG_ARPD
2678 void neigh_app_ns(struct neighbour *n)
2679 {
2680         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2681 }
2682 EXPORT_SYMBOL(neigh_app_ns);
2683 #endif /* CONFIG_ARPD */
2684
2685 #ifdef CONFIG_SYSCTL
2686
2687 #define NEIGH_VARS_MAX 19
2688
2689 static struct neigh_sysctl_table {
2690         struct ctl_table_header *sysctl_header;
2691         struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2692         char *dev_name;
2693 } neigh_sysctl_template __read_mostly = {
2694         .neigh_vars = {
2695                 {
2696                         .procname       = "mcast_solicit",
2697                         .maxlen         = sizeof(int),
2698                         .mode           = 0644,
2699                         .proc_handler   = proc_dointvec,
2700                 },
2701                 {
2702                         .procname       = "ucast_solicit",
2703                         .maxlen         = sizeof(int),
2704                         .mode           = 0644,
2705                         .proc_handler   = proc_dointvec,
2706                 },
2707                 {
2708                         .procname       = "app_solicit",
2709                         .maxlen         = sizeof(int),
2710                         .mode           = 0644,
2711                         .proc_handler   = proc_dointvec,
2712                 },
2713                 {
2714                         .procname       = "retrans_time",
2715                         .maxlen         = sizeof(int),
2716                         .mode           = 0644,
2717                         .proc_handler   = proc_dointvec_userhz_jiffies,
2718                 },
2719                 {
2720                         .procname       = "base_reachable_time",
2721                         .maxlen         = sizeof(int),
2722                         .mode           = 0644,
2723                         .proc_handler   = proc_dointvec_jiffies,
2724                 },
2725                 {
2726                         .procname       = "delay_first_probe_time",
2727                         .maxlen         = sizeof(int),
2728                         .mode           = 0644,
2729                         .proc_handler   = proc_dointvec_jiffies,
2730                 },
2731                 {
2732                         .procname       = "gc_stale_time",
2733                         .maxlen         = sizeof(int),
2734                         .mode           = 0644,
2735                         .proc_handler   = proc_dointvec_jiffies,
2736                 },
2737                 {
2738                         .procname       = "unres_qlen",
2739                         .maxlen         = sizeof(int),
2740                         .mode           = 0644,
2741                         .proc_handler   = proc_dointvec,
2742                 },
2743                 {
2744                         .procname       = "proxy_qlen",
2745                         .maxlen         = sizeof(int),
2746                         .mode           = 0644,
2747                         .proc_handler   = proc_dointvec,
2748                 },
2749                 {
2750                         .procname       = "anycast_delay",
2751                         .maxlen         = sizeof(int),
2752                         .mode           = 0644,
2753                         .proc_handler   = proc_dointvec_userhz_jiffies,
2754                 },
2755                 {
2756                         .procname       = "proxy_delay",
2757                         .maxlen         = sizeof(int),
2758                         .mode           = 0644,
2759                         .proc_handler   = proc_dointvec_userhz_jiffies,
2760                 },
2761                 {
2762                         .procname       = "locktime",
2763                         .maxlen         = sizeof(int),
2764                         .mode           = 0644,
2765                         .proc_handler   = proc_dointvec_userhz_jiffies,
2766                 },
2767                 {
2768                         .procname       = "retrans_time_ms",
2769                         .maxlen         = sizeof(int),
2770                         .mode           = 0644,
2771                         .proc_handler   = proc_dointvec_ms_jiffies,
2772                 },
2773                 {
2774                         .procname       = "base_reachable_time_ms",
2775                         .maxlen         = sizeof(int),
2776                         .mode           = 0644,
2777                         .proc_handler   = proc_dointvec_ms_jiffies,
2778                 },
2779                 {
2780                         .procname       = "gc_interval",
2781                         .maxlen         = sizeof(int),
2782                         .mode           = 0644,
2783                         .proc_handler   = proc_dointvec_jiffies,
2784                 },
2785                 {
2786                         .procname       = "gc_thresh1",
2787                         .maxlen         = sizeof(int),
2788                         .mode           = 0644,
2789                         .proc_handler   = proc_dointvec,
2790                 },
2791                 {
2792                         .procname       = "gc_thresh2",
2793                         .maxlen         = sizeof(int),
2794                         .mode           = 0644,
2795                         .proc_handler   = proc_dointvec,
2796                 },
2797                 {
2798                         .procname       = "gc_thresh3",
2799                         .maxlen         = sizeof(int),
2800                         .mode           = 0644,
2801                         .proc_handler   = proc_dointvec,
2802                 },
2803                 {},
2804         },
2805 };
2806
2807 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2808                           char *p_name, proc_handler *handler)
2809 {
2810         struct neigh_sysctl_table *t;
2811         const char *dev_name_source = NULL;
2812
2813 #define NEIGH_CTL_PATH_ROOT     0
2814 #define NEIGH_CTL_PATH_PROTO    1
2815 #define NEIGH_CTL_PATH_NEIGH    2
2816 #define NEIGH_CTL_PATH_DEV      3
2817
2818         struct ctl_path neigh_path[] = {
2819                 { .procname = "net",     },
2820                 { .procname = "proto",   },
2821                 { .procname = "neigh",   },
2822                 { .procname = "default", },
2823                 { },
2824         };
2825
2826         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2827         if (!t)
2828                 goto err;
2829
2830         t->neigh_vars[0].data  = &p->mcast_probes;
2831         t->neigh_vars[1].data  = &p->ucast_probes;
2832         t->neigh_vars[2].data  = &p->app_probes;
2833         t->neigh_vars[3].data  = &p->retrans_time;
2834         t->neigh_vars[4].data  = &p->base_reachable_time;
2835         t->neigh_vars[5].data  = &p->delay_probe_time;
2836         t->neigh_vars[6].data  = &p->gc_staletime;
2837         t->neigh_vars[7].data  = &p->queue_len;
2838         t->neigh_vars[8].data  = &p->proxy_qlen;
2839         t->neigh_vars[9].data  = &p->anycast_delay;
2840         t->neigh_vars[10].data = &p->proxy_delay;
2841         t->neigh_vars[11].data = &p->locktime;
2842         t->neigh_vars[12].data  = &p->retrans_time;
2843         t->neigh_vars[13].data  = &p->base_reachable_time;
2844
2845         if (dev) {
2846                 dev_name_source = dev->name;
2847                 /* Terminate the table early */
2848                 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2849         } else {
2850                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2851                 t->neigh_vars[14].data = (int *)(p + 1);
2852                 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2853                 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2854                 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2855         }
2856
2857
2858         if (handler) {
2859                 /* RetransTime */
2860                 t->neigh_vars[3].proc_handler = handler;
2861                 t->neigh_vars[3].extra1 = dev;
2862                 /* ReachableTime */
2863                 t->neigh_vars[4].proc_handler = handler;
2864                 t->neigh_vars[4].extra1 = dev;
2865                 /* RetransTime (in milliseconds)*/
2866                 t->neigh_vars[12].proc_handler = handler;
2867                 t->neigh_vars[12].extra1 = dev;
2868                 /* ReachableTime (in milliseconds) */
2869                 t->neigh_vars[13].proc_handler = handler;
2870                 t->neigh_vars[13].extra1 = dev;
2871         }
2872
2873         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2874         if (!t->dev_name)
2875                 goto free;
2876
2877         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2878         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2879
2880         t->sysctl_header =
2881                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2882         if (!t->sysctl_header)
2883                 goto free_procname;
2884
2885         p->sysctl_table = t;
2886         return 0;
2887
2888 free_procname:
2889         kfree(t->dev_name);
2890 free:
2891         kfree(t);
2892 err:
2893         return -ENOBUFS;
2894 }
2895 EXPORT_SYMBOL(neigh_sysctl_register);
2896
2897 void neigh_sysctl_unregister(struct neigh_parms *p)
2898 {
2899         if (p->sysctl_table) {
2900                 struct neigh_sysctl_table *t = p->sysctl_table;
2901                 p->sysctl_table = NULL;
2902                 unregister_sysctl_table(t->sysctl_header);
2903                 kfree(t->dev_name);
2904                 kfree(t);
2905         }
2906 }
2907 EXPORT_SYMBOL(neigh_sysctl_unregister);
2908
2909 #endif  /* CONFIG_SYSCTL */
2910
2911 static int __init neigh_init(void)
2912 {
2913         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2914         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2915         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2916
2917         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2918         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2919
2920         return 0;
2921 }
2922
2923 subsys_initcall(neigh_init);
2924