]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/core/net_namespace.c
filter: Optimize instruction revalidation code.
[net-next-2.6.git] / net / core / net_namespace.c
CommitLineData
5f256bec
EB
1#include <linux/workqueue.h>
2#include <linux/rtnetlink.h>
3#include <linux/cache.h>
4#include <linux/slab.h>
5#include <linux/list.h>
6#include <linux/delay.h>
9dd776b6 7#include <linux/sched.h>
c93cf61f 8#include <linux/idr.h>
11a28d37 9#include <linux/rculist.h>
30ffee84 10#include <linux/nsproxy.h>
5f256bec 11#include <net/net_namespace.h>
dec827d1 12#include <net/netns/generic.h>
5f256bec
EB
13
14/*
15 * Our network namespace constructor/destructor lists
16 */
17
18static LIST_HEAD(pernet_list);
19static struct list_head *first_device = &pernet_list;
20static DEFINE_MUTEX(net_mutex);
21
5f256bec 22LIST_HEAD(net_namespace_list);
b76a461f 23EXPORT_SYMBOL_GPL(net_namespace_list);
5f256bec 24
5f256bec 25struct net init_net;
ff4b9502 26EXPORT_SYMBOL(init_net);
5f256bec 27
dec827d1
PE
28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
29
05fceb4a
JP
30static void net_generic_release(struct rcu_head *rcu)
31{
32 struct net_generic *ng;
33
34 ng = container_of(rcu, struct net_generic, rcu);
35 kfree(ng);
36}
37
38static int net_assign_generic(struct net *net, int id, void *data)
39{
40 struct net_generic *ng, *old_ng;
41
42 BUG_ON(!mutex_is_locked(&net_mutex));
43 BUG_ON(id == 0);
44
1c87733d
ED
45 old_ng = rcu_dereference_protected(net->gen,
46 lockdep_is_held(&net_mutex));
47 ng = old_ng;
05fceb4a
JP
48 if (old_ng->len >= id)
49 goto assign;
50
51 ng = kzalloc(sizeof(struct net_generic) +
52 id * sizeof(void *), GFP_KERNEL);
53 if (ng == NULL)
54 return -ENOMEM;
55
56 /*
57 * Some synchronisation notes:
58 *
59 * The net_generic explores the net->gen array inside rcu
60 * read section. Besides once set the net->gen->ptr[x]
61 * pointer never changes (see rules in netns/generic.h).
62 *
63 * That said, we simply duplicate this array and schedule
64 * the old copy for kfree after a grace period.
65 */
66
67 ng->len = id;
68 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
69
70 rcu_assign_pointer(net->gen, ng);
71 call_rcu(&old_ng->rcu, net_generic_release);
72assign:
73 ng->ptr[id - 1] = data;
74 return 0;
75}
76
f875bae0
EB
77static int ops_init(const struct pernet_operations *ops, struct net *net)
78{
79 int err;
80 if (ops->id && ops->size) {
81 void *data = kzalloc(ops->size, GFP_KERNEL);
82 if (!data)
83 return -ENOMEM;
84
85 err = net_assign_generic(net, *ops->id, data);
86 if (err) {
87 kfree(data);
88 return err;
89 }
90 }
91 if (ops->init)
92 return ops->init(net);
93 return 0;
94}
95
96static void ops_free(const struct pernet_operations *ops, struct net *net)
97{
98 if (ops->id && ops->size) {
99 int id = *ops->id;
100 kfree(net_generic(net, id));
101 }
102}
103
72ad937a
EB
104static void ops_exit_list(const struct pernet_operations *ops,
105 struct list_head *net_exit_list)
106{
107 struct net *net;
108 if (ops->exit) {
109 list_for_each_entry(net, net_exit_list, exit_list)
110 ops->exit(net);
111 }
72ad937a
EB
112 if (ops->exit_batch)
113 ops->exit_batch(net_exit_list);
114}
115
116static void ops_free_list(const struct pernet_operations *ops,
117 struct list_head *net_exit_list)
118{
119 struct net *net;
120 if (ops->size && ops->id) {
121 list_for_each_entry(net, net_exit_list, exit_list)
122 ops_free(ops, net);
123 }
124}
125
5f256bec
EB
126/*
127 * setup_net runs the initializers for the network namespace object.
128 */
1a2ee93d 129static __net_init int setup_net(struct net *net)
5f256bec
EB
130{
131 /* Must be called with net_mutex held */
f875bae0 132 const struct pernet_operations *ops, *saved_ops;
486a87f1 133 int error = 0;
72ad937a 134 LIST_HEAD(net_exit_list);
5f256bec 135
5f256bec 136 atomic_set(&net->count, 1);
486a87f1 137
5d1e4468 138#ifdef NETNS_REFCNT_DEBUG
5f256bec 139 atomic_set(&net->use_count, 0);
5d1e4468 140#endif
5f256bec 141
768f3591 142 list_for_each_entry(ops, &pernet_list, list) {
f875bae0
EB
143 error = ops_init(ops, net);
144 if (error < 0)
145 goto out_undo;
5f256bec
EB
146 }
147out:
148 return error;
768f3591 149
5f256bec
EB
150out_undo:
151 /* Walk through the list backwards calling the exit functions
152 * for the pernet modules whose init functions did not fail.
153 */
72ad937a 154 list_add(&net->exit_list, &net_exit_list);
f875bae0 155 saved_ops = ops;
72ad937a
EB
156 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
157 ops_exit_list(ops, &net_exit_list);
158
f875bae0
EB
159 ops = saved_ops;
160 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
72ad937a 161 ops_free_list(ops, &net_exit_list);
310928d9
DL
162
163 rcu_barrier();
5f256bec
EB
164 goto out;
165}
166
486a87f1 167static struct net_generic *net_alloc_generic(void)
6a1a3b9f 168{
486a87f1
DL
169 struct net_generic *ng;
170 size_t generic_size = sizeof(struct net_generic) +
171 INITIAL_NET_GEN_PTRS * sizeof(void *);
172
173 ng = kzalloc(generic_size, GFP_KERNEL);
174 if (ng)
175 ng->len = INITIAL_NET_GEN_PTRS;
176
177 return ng;
6a1a3b9f
PE
178}
179
ebe47d47
CN
180#ifdef CONFIG_NET_NS
181static struct kmem_cache *net_cachep;
182static struct workqueue_struct *netns_wq;
183
486a87f1 184static struct net *net_alloc(void)
45a19b0a 185{
486a87f1
DL
186 struct net *net = NULL;
187 struct net_generic *ng;
188
189 ng = net_alloc_generic();
190 if (!ng)
191 goto out;
192
193 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
45a19b0a 194 if (!net)
486a87f1 195 goto out_free;
45a19b0a 196
486a87f1
DL
197 rcu_assign_pointer(net->gen, ng);
198out:
199 return net;
200
201out_free:
202 kfree(ng);
203 goto out;
204}
205
206static void net_free(struct net *net)
207{
5d1e4468 208#ifdef NETNS_REFCNT_DEBUG
45a19b0a
JFS
209 if (unlikely(atomic_read(&net->use_count) != 0)) {
210 printk(KERN_EMERG "network namespace not free! Usage: %d\n",
211 atomic_read(&net->use_count));
212 return;
213 }
5d1e4468 214#endif
4ef079cc 215 kfree(net->gen);
45a19b0a
JFS
216 kmem_cache_free(net_cachep, net);
217}
218
088eb2d9 219static struct net *net_create(void)
9dd776b6 220{
088eb2d9
AD
221 struct net *net;
222 int rv;
9dd776b6 223
088eb2d9
AD
224 net = net_alloc();
225 if (!net)
226 return ERR_PTR(-ENOMEM);
9dd776b6 227 mutex_lock(&net_mutex);
088eb2d9
AD
228 rv = setup_net(net);
229 if (rv == 0) {
486a87f1 230 rtnl_lock();
11a28d37 231 list_add_tail_rcu(&net->list, &net_namespace_list);
486a87f1
DL
232 rtnl_unlock();
233 }
9dd776b6 234 mutex_unlock(&net_mutex);
088eb2d9
AD
235 if (rv < 0) {
236 net_free(net);
237 return ERR_PTR(rv);
238 }
239 return net;
240}
486a87f1 241
088eb2d9
AD
242struct net *copy_net_ns(unsigned long flags, struct net *old_net)
243{
244 if (!(flags & CLONE_NEWNET))
245 return get_net(old_net);
246 return net_create();
9dd776b6
EB
247}
248
2b035b39
EB
249static DEFINE_SPINLOCK(cleanup_list_lock);
250static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
251
6a1a3b9f
PE
252static void cleanup_net(struct work_struct *work)
253{
f875bae0 254 const struct pernet_operations *ops;
2b035b39
EB
255 struct net *net, *tmp;
256 LIST_HEAD(net_kill_list);
72ad937a 257 LIST_HEAD(net_exit_list);
6a1a3b9f 258
2b035b39
EB
259 /* Atomically snapshot the list of namespaces to cleanup */
260 spin_lock_irq(&cleanup_list_lock);
261 list_replace_init(&cleanup_list, &net_kill_list);
262 spin_unlock_irq(&cleanup_list_lock);
6a1a3b9f
PE
263
264 mutex_lock(&net_mutex);
265
266 /* Don't let anyone else find us. */
267 rtnl_lock();
72ad937a 268 list_for_each_entry(net, &net_kill_list, cleanup_list) {
2b035b39 269 list_del_rcu(&net->list);
72ad937a
EB
270 list_add_tail(&net->exit_list, &net_exit_list);
271 }
6a1a3b9f
PE
272 rtnl_unlock();
273
11a28d37
JB
274 /*
275 * Another CPU might be rcu-iterating the list, wait for it.
276 * This needs to be before calling the exit() notifiers, so
277 * the rcu_barrier() below isn't sufficient alone.
278 */
279 synchronize_rcu();
280
6a1a3b9f 281 /* Run all of the network namespace exit methods */
72ad937a
EB
282 list_for_each_entry_reverse(ops, &pernet_list, list)
283 ops_exit_list(ops, &net_exit_list);
284
f875bae0 285 /* Free the net generic variables */
72ad937a
EB
286 list_for_each_entry_reverse(ops, &pernet_list, list)
287 ops_free_list(ops, &net_exit_list);
6a1a3b9f
PE
288
289 mutex_unlock(&net_mutex);
290
291 /* Ensure there are no outstanding rcu callbacks using this
292 * network namespace.
293 */
294 rcu_barrier();
295
296 /* Finally it is safe to free my network namespace structure */
72ad937a
EB
297 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
298 list_del_init(&net->exit_list);
2b035b39
EB
299 net_free(net);
300 }
6a1a3b9f 301}
2b035b39 302static DECLARE_WORK(net_cleanup_work, cleanup_net);
6a1a3b9f
PE
303
304void __put_net(struct net *net)
305{
306 /* Cleanup the network namespace in process context */
2b035b39
EB
307 unsigned long flags;
308
309 spin_lock_irqsave(&cleanup_list_lock, flags);
310 list_add(&net->cleanup_list, &cleanup_list);
311 spin_unlock_irqrestore(&cleanup_list_lock, flags);
312
313 queue_work(netns_wq, &net_cleanup_work);
6a1a3b9f
PE
314}
315EXPORT_SYMBOL_GPL(__put_net);
316
317#else
318struct net *copy_net_ns(unsigned long flags, struct net *old_net)
319{
320 if (flags & CLONE_NEWNET)
321 return ERR_PTR(-EINVAL);
322 return old_net;
323}
324#endif
325
30ffee84
JB
326struct net *get_net_ns_by_pid(pid_t pid)
327{
328 struct task_struct *tsk;
329 struct net *net;
330
331 /* Lookup the network namespace */
332 net = ERR_PTR(-ESRCH);
333 rcu_read_lock();
334 tsk = find_task_by_vpid(pid);
335 if (tsk) {
336 struct nsproxy *nsproxy;
337 nsproxy = task_nsproxy(tsk);
338 if (nsproxy)
339 net = get_net(nsproxy->net_ns);
340 }
341 rcu_read_unlock();
342 return net;
343}
344EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
345
5f256bec
EB
346static int __init net_ns_init(void)
347{
486a87f1 348 struct net_generic *ng;
5f256bec 349
d57a9212 350#ifdef CONFIG_NET_NS
5f256bec
EB
351 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
352 SMP_CACHE_BYTES,
353 SLAB_PANIC, NULL);
3ef1355d
BT
354
355 /* Create workqueue for cleanup */
356 netns_wq = create_singlethread_workqueue("netns");
357 if (!netns_wq)
358 panic("Could not create netns workq");
d57a9212 359#endif
3ef1355d 360
486a87f1
DL
361 ng = net_alloc_generic();
362 if (!ng)
363 panic("Could not allocate generic netns");
364
365 rcu_assign_pointer(init_net.gen, ng);
366
5f256bec 367 mutex_lock(&net_mutex);
ca0f3112
SH
368 if (setup_net(&init_net))
369 panic("Could not setup the initial network namespace");
5f256bec 370
f4618d39 371 rtnl_lock();
11a28d37 372 list_add_tail_rcu(&init_net.list, &net_namespace_list);
f4618d39 373 rtnl_unlock();
5f256bec
EB
374
375 mutex_unlock(&net_mutex);
5f256bec
EB
376
377 return 0;
378}
379
380pure_initcall(net_ns_init);
381
ed160e83 382#ifdef CONFIG_NET_NS
f875bae0
EB
383static int __register_pernet_operations(struct list_head *list,
384 struct pernet_operations *ops)
5f256bec 385{
72ad937a 386 struct net *net;
5f256bec 387 int error;
72ad937a 388 LIST_HEAD(net_exit_list);
5f256bec 389
5f256bec 390 list_add_tail(&ops->list, list);
f875bae0 391 if (ops->init || (ops->id && ops->size)) {
1dba323b 392 for_each_net(net) {
f875bae0 393 error = ops_init(ops, net);
5f256bec
EB
394 if (error)
395 goto out_undo;
72ad937a 396 list_add_tail(&net->exit_list, &net_exit_list);
5f256bec
EB
397 }
398 }
1dba323b 399 return 0;
5f256bec
EB
400
401out_undo:
402 /* If I have an error cleanup all namespaces I initialized */
403 list_del(&ops->list);
72ad937a
EB
404 ops_exit_list(ops, &net_exit_list);
405 ops_free_list(ops, &net_exit_list);
1dba323b 406 return error;
5f256bec
EB
407}
408
f875bae0 409static void __unregister_pernet_operations(struct pernet_operations *ops)
5f256bec
EB
410{
411 struct net *net;
72ad937a 412 LIST_HEAD(net_exit_list);
5f256bec
EB
413
414 list_del(&ops->list);
72ad937a
EB
415 for_each_net(net)
416 list_add_tail(&net->exit_list, &net_exit_list);
417 ops_exit_list(ops, &net_exit_list);
418 ops_free_list(ops, &net_exit_list);
5f256bec
EB
419}
420
ed160e83
DL
421#else
422
f875bae0
EB
423static int __register_pernet_operations(struct list_head *list,
424 struct pernet_operations *ops)
ed160e83 425{
f875bae0
EB
426 int err = 0;
427 err = ops_init(ops, &init_net);
428 if (err)
429 ops_free(ops, &init_net);
430 return err;
431
ed160e83
DL
432}
433
f875bae0 434static void __unregister_pernet_operations(struct pernet_operations *ops)
ed160e83 435{
72ad937a
EB
436 LIST_HEAD(net_exit_list);
437 list_add(&init_net.exit_list, &net_exit_list);
438 ops_exit_list(ops, &net_exit_list);
439 ops_free_list(ops, &net_exit_list);
ed160e83 440}
f875bae0
EB
441
442#endif /* CONFIG_NET_NS */
ed160e83 443
c93cf61f
PE
444static DEFINE_IDA(net_generic_ids);
445
f875bae0
EB
446static int register_pernet_operations(struct list_head *list,
447 struct pernet_operations *ops)
448{
449 int error;
450
451 if (ops->id) {
452again:
453 error = ida_get_new_above(&net_generic_ids, 1, ops->id);
454 if (error < 0) {
455 if (error == -EAGAIN) {
456 ida_pre_get(&net_generic_ids, GFP_KERNEL);
457 goto again;
458 }
459 return error;
460 }
461 }
462 error = __register_pernet_operations(list, ops);
3a765eda
EB
463 if (error) {
464 rcu_barrier();
465 if (ops->id)
466 ida_remove(&net_generic_ids, *ops->id);
467 }
f875bae0
EB
468
469 return error;
470}
471
472static void unregister_pernet_operations(struct pernet_operations *ops)
473{
474
475 __unregister_pernet_operations(ops);
3a765eda 476 rcu_barrier();
f875bae0
EB
477 if (ops->id)
478 ida_remove(&net_generic_ids, *ops->id);
479}
480
5f256bec
EB
481/**
482 * register_pernet_subsys - register a network namespace subsystem
483 * @ops: pernet operations structure for the subsystem
484 *
485 * Register a subsystem which has init and exit functions
486 * that are called when network namespaces are created and
487 * destroyed respectively.
488 *
489 * When registered all network namespace init functions are
490 * called for every existing network namespace. Allowing kernel
491 * modules to have a race free view of the set of network namespaces.
492 *
493 * When a new network namespace is created all of the init
494 * methods are called in the order in which they were registered.
495 *
496 * When a network namespace is destroyed all of the exit methods
497 * are called in the reverse of the order with which they were
498 * registered.
499 */
500int register_pernet_subsys(struct pernet_operations *ops)
501{
502 int error;
503 mutex_lock(&net_mutex);
504 error = register_pernet_operations(first_device, ops);
505 mutex_unlock(&net_mutex);
506 return error;
507}
508EXPORT_SYMBOL_GPL(register_pernet_subsys);
509
510/**
511 * unregister_pernet_subsys - unregister a network namespace subsystem
512 * @ops: pernet operations structure to manipulate
513 *
514 * Remove the pernet operations structure from the list to be
53379e57 515 * used when network namespaces are created or destroyed. In
5f256bec
EB
516 * addition run the exit method for all existing network
517 * namespaces.
518 */
b3c981d2 519void unregister_pernet_subsys(struct pernet_operations *ops)
5f256bec
EB
520{
521 mutex_lock(&net_mutex);
b3c981d2 522 unregister_pernet_operations(ops);
5f256bec
EB
523 mutex_unlock(&net_mutex);
524}
525EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
526
527/**
528 * register_pernet_device - register a network namespace device
529 * @ops: pernet operations structure for the subsystem
530 *
531 * Register a device which has init and exit functions
532 * that are called when network namespaces are created and
533 * destroyed respectively.
534 *
535 * When registered all network namespace init functions are
536 * called for every existing network namespace. Allowing kernel
537 * modules to have a race free view of the set of network namespaces.
538 *
539 * When a new network namespace is created all of the init
540 * methods are called in the order in which they were registered.
541 *
542 * When a network namespace is destroyed all of the exit methods
543 * are called in the reverse of the order with which they were
544 * registered.
545 */
546int register_pernet_device(struct pernet_operations *ops)
547{
548 int error;
549 mutex_lock(&net_mutex);
550 error = register_pernet_operations(&pernet_list, ops);
551 if (!error && (first_device == &pernet_list))
552 first_device = &ops->list;
553 mutex_unlock(&net_mutex);
554 return error;
555}
556EXPORT_SYMBOL_GPL(register_pernet_device);
557
558/**
559 * unregister_pernet_device - unregister a network namespace netdevice
560 * @ops: pernet operations structure to manipulate
561 *
562 * Remove the pernet operations structure from the list to be
53379e57 563 * used when network namespaces are created or destroyed. In
5f256bec
EB
564 * addition run the exit method for all existing network
565 * namespaces.
566 */
567void unregister_pernet_device(struct pernet_operations *ops)
568{
569 mutex_lock(&net_mutex);
570 if (&ops->list == first_device)
571 first_device = first_device->next;
572 unregister_pernet_operations(ops);
573 mutex_unlock(&net_mutex);
574}
575EXPORT_SYMBOL_GPL(unregister_pernet_device);