]> bbs.cooldavid.org Git - net-next-2.6.git/blame - kernel/softirq.c
sched: Consolidate account_system_vtime extern declaration
[net-next-2.6.git] / kernel / softirq.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/softirq.c
3 *
4 * Copyright (C) 1992 Linus Torvalds
5 *
b10db7f0
PM
6 * Distribute under GPLv2.
7 *
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
54514a70
DM
9 *
10 * Remote softirq infrastructure is by Jens Axboe.
1da177e4
LT
11 */
12
13#include <linux/module.h>
14#include <linux/kernel_stat.h>
15#include <linux/interrupt.h>
16#include <linux/init.h>
17#include <linux/mm.h>
18#include <linux/notifier.h>
19#include <linux/percpu.h>
20#include <linux/cpu.h>
83144186 21#include <linux/freezer.h>
1da177e4
LT
22#include <linux/kthread.h>
23#include <linux/rcupdate.h>
7e49fcce 24#include <linux/ftrace.h>
78eef01b 25#include <linux/smp.h>
79bf2bb3 26#include <linux/tick.h>
a0e39ed3
HC
27
28#define CREATE_TRACE_POINTS
ad8d75ff 29#include <trace/events/irq.h>
1da177e4
LT
30
31#include <asm/irq.h>
32/*
33 - No shared variables, all the data are CPU local.
34 - If a softirq needs serialization, let it serialize itself
35 by its own spinlocks.
36 - Even if softirq is serialized, only local cpu is marked for
37 execution. Hence, we get something sort of weak cpu binding.
38 Though it is still not clear, will it result in better locality
39 or will not.
40
41 Examples:
42 - NET RX softirq. It is multithreaded and does not require
43 any global serialization.
44 - NET TX softirq. It kicks software netdevice queues, hence
45 it is logically serialized per device, but this serialization
46 is invisible to common code.
47 - Tasklets: serialized wrt itself.
48 */
49
50#ifndef __ARCH_IRQ_STAT
51irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
52EXPORT_SYMBOL(irq_stat);
53#endif
54
978b0116 55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
1da177e4
LT
56
57static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58
5d592b44 59char *softirq_to_name[NR_SOFTIRQS] = {
5dd4de58 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
899039e8 61 "TASKLET", "SCHED", "HRTIMER", "RCU"
5d592b44
JB
62};
63
1da177e4
LT
64/*
65 * we cannot loop indefinitely here to avoid userspace starvation,
66 * but we also don't want to introduce a worst case 1/HZ latency
67 * to the pending events, so lets the scheduler to balance
68 * the softirq load for us.
69 */
7f1e2ca9 70void wakeup_softirqd(void)
1da177e4
LT
71{
72 /* Interrupts are disabled: no need to stop preemption */
73 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
74
75 if (tsk && tsk->state != TASK_RUNNING)
76 wake_up_process(tsk);
77}
78
75e1056f
VP
79/*
80 * preempt_count and SOFTIRQ_OFFSET usage:
81 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
82 * softirq processing.
83 * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
84 * on local_bh_disable or local_bh_enable.
85 * This lets us distinguish between whether we are currently processing
86 * softirq and whether we just have bh disabled.
87 */
88
de30a2b3
IM
89/*
90 * This one is for softirq.c-internal use,
91 * where hardirqs are disabled legitimately:
92 */
3c829c36 93#ifdef CONFIG_TRACE_IRQFLAGS
75e1056f 94static void __local_bh_disable(unsigned long ip, unsigned int cnt)
de30a2b3
IM
95{
96 unsigned long flags;
97
98 WARN_ON_ONCE(in_irq());
99
100 raw_local_irq_save(flags);
7e49fcce
SR
101 /*
102 * The preempt tracer hooks into add_preempt_count and will break
103 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
104 * is set and before current->softirq_enabled is cleared.
105 * We must manually increment preempt_count here and manually
106 * call the trace_preempt_off later.
107 */
75e1056f 108 preempt_count() += cnt;
de30a2b3
IM
109 /*
110 * Were softirqs turned off above:
111 */
75e1056f 112 if (softirq_count() == cnt)
de30a2b3
IM
113 trace_softirqs_off(ip);
114 raw_local_irq_restore(flags);
7e49fcce 115
75e1056f 116 if (preempt_count() == cnt)
7e49fcce 117 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
de30a2b3 118}
3c829c36 119#else /* !CONFIG_TRACE_IRQFLAGS */
75e1056f 120static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
3c829c36 121{
75e1056f 122 add_preempt_count(cnt);
3c829c36
TC
123 barrier();
124}
125#endif /* CONFIG_TRACE_IRQFLAGS */
de30a2b3
IM
126
127void local_bh_disable(void)
128{
75e1056f
VP
129 __local_bh_disable((unsigned long)__builtin_return_address(0),
130 SOFTIRQ_DISABLE_OFFSET);
de30a2b3
IM
131}
132
133EXPORT_SYMBOL(local_bh_disable);
134
75e1056f
VP
135static void __local_bh_enable(unsigned int cnt)
136{
137 WARN_ON_ONCE(in_irq());
138 WARN_ON_ONCE(!irqs_disabled());
139
140 if (softirq_count() == cnt)
141 trace_softirqs_on((unsigned long)__builtin_return_address(0));
142 sub_preempt_count(cnt);
143}
144
de30a2b3
IM
145/*
146 * Special-case - softirqs can safely be enabled in
147 * cond_resched_softirq(), or by __do_softirq(),
148 * without processing still-pending softirqs:
149 */
150void _local_bh_enable(void)
151{
75e1056f 152 __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
de30a2b3
IM
153}
154
155EXPORT_SYMBOL(_local_bh_enable);
156
0f476b6d 157static inline void _local_bh_enable_ip(unsigned long ip)
de30a2b3 158{
0f476b6d 159 WARN_ON_ONCE(in_irq() || irqs_disabled());
3c829c36 160#ifdef CONFIG_TRACE_IRQFLAGS
0f476b6d 161 local_irq_disable();
3c829c36 162#endif
de30a2b3
IM
163 /*
164 * Are softirqs going to be turned on now:
165 */
75e1056f 166 if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
0f476b6d 167 trace_softirqs_on(ip);
de30a2b3
IM
168 /*
169 * Keep preemption disabled until we are done with
170 * softirq processing:
171 */
75e1056f 172 sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
de30a2b3
IM
173
174 if (unlikely(!in_interrupt() && local_softirq_pending()))
175 do_softirq();
176
177 dec_preempt_count();
3c829c36 178#ifdef CONFIG_TRACE_IRQFLAGS
0f476b6d 179 local_irq_enable();
3c829c36 180#endif
de30a2b3
IM
181 preempt_check_resched();
182}
0f476b6d
JB
183
184void local_bh_enable(void)
185{
186 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
187}
de30a2b3
IM
188EXPORT_SYMBOL(local_bh_enable);
189
190void local_bh_enable_ip(unsigned long ip)
191{
0f476b6d 192 _local_bh_enable_ip(ip);
de30a2b3
IM
193}
194EXPORT_SYMBOL(local_bh_enable_ip);
195
1da177e4
LT
196/*
197 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
198 * and we fall back to softirqd after that.
199 *
200 * This number has been established via experimentation.
201 * The two things to balance is latency against fairness -
202 * we want to handle softirqs as soon as possible, but they
203 * should not be able to lock up the box.
204 */
205#define MAX_SOFTIRQ_RESTART 10
206
207asmlinkage void __do_softirq(void)
208{
209 struct softirq_action *h;
210 __u32 pending;
211 int max_restart = MAX_SOFTIRQ_RESTART;
212 int cpu;
213
214 pending = local_softirq_pending();
829035fd
PM
215 account_system_vtime(current);
216
75e1056f
VP
217 __local_bh_disable((unsigned long)__builtin_return_address(0),
218 SOFTIRQ_OFFSET);
d820ac4c 219 lockdep_softirq_enter();
1da177e4 220
1da177e4
LT
221 cpu = smp_processor_id();
222restart:
223 /* Reset the pending bitmask before enabling irqs */
3f74478b 224 set_softirq_pending(0);
1da177e4 225
c70f5d66 226 local_irq_enable();
1da177e4
LT
227
228 h = softirq_vec;
229
230 do {
231 if (pending & 1) {
8e85b4b5 232 int prev_count = preempt_count();
aa0ce5bb 233 kstat_incr_softirqs_this_cpu(h - softirq_vec);
8e85b4b5 234
39842323 235 trace_softirq_entry(h, softirq_vec);
1da177e4 236 h->action(h);
39842323 237 trace_softirq_exit(h, softirq_vec);
8e85b4b5 238 if (unlikely(prev_count != preempt_count())) {
5d592b44 239 printk(KERN_ERR "huh, entered softirq %td %s %p"
8e85b4b5
TG
240 "with preempt_count %08x,"
241 " exited with %08x?\n", h - softirq_vec,
5d592b44 242 softirq_to_name[h - softirq_vec],
8e85b4b5
TG
243 h->action, prev_count, preempt_count());
244 preempt_count() = prev_count;
245 }
246
d6714c22 247 rcu_bh_qs(cpu);
1da177e4
LT
248 }
249 h++;
250 pending >>= 1;
251 } while (pending);
252
c70f5d66 253 local_irq_disable();
1da177e4
LT
254
255 pending = local_softirq_pending();
256 if (pending && --max_restart)
257 goto restart;
258
259 if (pending)
260 wakeup_softirqd();
261
d820ac4c 262 lockdep_softirq_exit();
829035fd
PM
263
264 account_system_vtime(current);
75e1056f 265 __local_bh_enable(SOFTIRQ_OFFSET);
1da177e4
LT
266}
267
268#ifndef __ARCH_HAS_DO_SOFTIRQ
269
270asmlinkage void do_softirq(void)
271{
272 __u32 pending;
273 unsigned long flags;
274
275 if (in_interrupt())
276 return;
277
278 local_irq_save(flags);
279
280 pending = local_softirq_pending();
281
282 if (pending)
283 __do_softirq();
284
285 local_irq_restore(flags);
286}
287
1da177e4
LT
288#endif
289
dde4b2b5
IM
290/*
291 * Enter an interrupt context.
292 */
293void irq_enter(void)
294{
6378ddb5 295 int cpu = smp_processor_id();
719254fa 296
64db4cff 297 rcu_irq_enter();
ee5f80a9
TG
298 if (idle_cpu(cpu) && !in_interrupt()) {
299 __irq_enter();
719254fa 300 tick_check_idle(cpu);
ee5f80a9
TG
301 } else
302 __irq_enter();
dde4b2b5
IM
303}
304
1da177e4
LT
305#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
306# define invoke_softirq() __do_softirq()
307#else
308# define invoke_softirq() do_softirq()
309#endif
310
311/*
312 * Exit an interrupt context. Process softirqs if needed and possible:
313 */
314void irq_exit(void)
315{
316 account_system_vtime(current);
de30a2b3 317 trace_hardirq_exit();
1da177e4
LT
318 sub_preempt_count(IRQ_EXIT_OFFSET);
319 if (!in_interrupt() && local_softirq_pending())
320 invoke_softirq();
79bf2bb3 321
c5e0cb3d 322 rcu_irq_exit();
79bf2bb3
TG
323#ifdef CONFIG_NO_HZ
324 /* Make sure that timer wheel updates are propagated */
64db4cff
PM
325 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
326 tick_nohz_stop_sched_tick(0);
79bf2bb3 327#endif
1da177e4
LT
328 preempt_enable_no_resched();
329}
330
331/*
332 * This function must run with irqs disabled!
333 */
7ad5b3a5 334inline void raise_softirq_irqoff(unsigned int nr)
1da177e4
LT
335{
336 __raise_softirq_irqoff(nr);
337
338 /*
339 * If we're in an interrupt or softirq, we're done
340 * (this also catches softirq-disabled code). We will
341 * actually run the softirq once we return from
342 * the irq or softirq.
343 *
344 * Otherwise we wake up ksoftirqd to make sure we
345 * schedule the softirq soon.
346 */
347 if (!in_interrupt())
348 wakeup_softirqd();
349}
350
7ad5b3a5 351void raise_softirq(unsigned int nr)
1da177e4
LT
352{
353 unsigned long flags;
354
355 local_irq_save(flags);
356 raise_softirq_irqoff(nr);
357 local_irq_restore(flags);
358}
359
962cf36c 360void open_softirq(int nr, void (*action)(struct softirq_action *))
1da177e4 361{
1da177e4
LT
362 softirq_vec[nr].action = action;
363}
364
9ba5f005
PZ
365/*
366 * Tasklets
367 */
1da177e4
LT
368struct tasklet_head
369{
48f20a9a
OJ
370 struct tasklet_struct *head;
371 struct tasklet_struct **tail;
1da177e4
LT
372};
373
4620b49f
VN
374static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
375static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
1da177e4 376
7ad5b3a5 377void __tasklet_schedule(struct tasklet_struct *t)
1da177e4
LT
378{
379 unsigned long flags;
380
381 local_irq_save(flags);
48f20a9a
OJ
382 t->next = NULL;
383 *__get_cpu_var(tasklet_vec).tail = t;
384 __get_cpu_var(tasklet_vec).tail = &(t->next);
1da177e4
LT
385 raise_softirq_irqoff(TASKLET_SOFTIRQ);
386 local_irq_restore(flags);
387}
388
389EXPORT_SYMBOL(__tasklet_schedule);
390
7ad5b3a5 391void __tasklet_hi_schedule(struct tasklet_struct *t)
1da177e4
LT
392{
393 unsigned long flags;
394
395 local_irq_save(flags);
48f20a9a
OJ
396 t->next = NULL;
397 *__get_cpu_var(tasklet_hi_vec).tail = t;
398 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
1da177e4
LT
399 raise_softirq_irqoff(HI_SOFTIRQ);
400 local_irq_restore(flags);
401}
402
403EXPORT_SYMBOL(__tasklet_hi_schedule);
404
7c692cba
VN
405void __tasklet_hi_schedule_first(struct tasklet_struct *t)
406{
407 BUG_ON(!irqs_disabled());
408
409 t->next = __get_cpu_var(tasklet_hi_vec).head;
410 __get_cpu_var(tasklet_hi_vec).head = t;
411 __raise_softirq_irqoff(HI_SOFTIRQ);
412}
413
414EXPORT_SYMBOL(__tasklet_hi_schedule_first);
415
1da177e4
LT
416static void tasklet_action(struct softirq_action *a)
417{
418 struct tasklet_struct *list;
419
420 local_irq_disable();
48f20a9a
OJ
421 list = __get_cpu_var(tasklet_vec).head;
422 __get_cpu_var(tasklet_vec).head = NULL;
423 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
1da177e4
LT
424 local_irq_enable();
425
426 while (list) {
427 struct tasklet_struct *t = list;
428
429 list = list->next;
430
431 if (tasklet_trylock(t)) {
432 if (!atomic_read(&t->count)) {
433 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
434 BUG();
435 t->func(t->data);
436 tasklet_unlock(t);
437 continue;
438 }
439 tasklet_unlock(t);
440 }
441
442 local_irq_disable();
48f20a9a
OJ
443 t->next = NULL;
444 *__get_cpu_var(tasklet_vec).tail = t;
445 __get_cpu_var(tasklet_vec).tail = &(t->next);
1da177e4
LT
446 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
447 local_irq_enable();
448 }
449}
450
451static void tasklet_hi_action(struct softirq_action *a)
452{
453 struct tasklet_struct *list;
454
455 local_irq_disable();
48f20a9a
OJ
456 list = __get_cpu_var(tasklet_hi_vec).head;
457 __get_cpu_var(tasklet_hi_vec).head = NULL;
458 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
1da177e4
LT
459 local_irq_enable();
460
461 while (list) {
462 struct tasklet_struct *t = list;
463
464 list = list->next;
465
466 if (tasklet_trylock(t)) {
467 if (!atomic_read(&t->count)) {
468 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
469 BUG();
470 t->func(t->data);
471 tasklet_unlock(t);
472 continue;
473 }
474 tasklet_unlock(t);
475 }
476
477 local_irq_disable();
48f20a9a
OJ
478 t->next = NULL;
479 *__get_cpu_var(tasklet_hi_vec).tail = t;
480 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
1da177e4
LT
481 __raise_softirq_irqoff(HI_SOFTIRQ);
482 local_irq_enable();
483 }
484}
485
486
487void tasklet_init(struct tasklet_struct *t,
488 void (*func)(unsigned long), unsigned long data)
489{
490 t->next = NULL;
491 t->state = 0;
492 atomic_set(&t->count, 0);
493 t->func = func;
494 t->data = data;
495}
496
497EXPORT_SYMBOL(tasklet_init);
498
499void tasklet_kill(struct tasklet_struct *t)
500{
501 if (in_interrupt())
502 printk("Attempt to kill tasklet from interrupt\n");
503
504 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
79d381c9 505 do {
1da177e4 506 yield();
79d381c9 507 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
1da177e4
LT
508 }
509 tasklet_unlock_wait(t);
510 clear_bit(TASKLET_STATE_SCHED, &t->state);
511}
512
513EXPORT_SYMBOL(tasklet_kill);
514
9ba5f005
PZ
515/*
516 * tasklet_hrtimer
517 */
518
519/*
b9c30322
PZ
520 * The trampoline is called when the hrtimer expires. It schedules a tasklet
521 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
522 * hrtimer callback, but from softirq context.
9ba5f005
PZ
523 */
524static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
525{
526 struct tasklet_hrtimer *ttimer =
527 container_of(timer, struct tasklet_hrtimer, timer);
528
b9c30322
PZ
529 tasklet_hi_schedule(&ttimer->tasklet);
530 return HRTIMER_NORESTART;
9ba5f005
PZ
531}
532
533/*
534 * Helper function which calls the hrtimer callback from
535 * tasklet/softirq context
536 */
537static void __tasklet_hrtimer_trampoline(unsigned long data)
538{
539 struct tasklet_hrtimer *ttimer = (void *)data;
540 enum hrtimer_restart restart;
541
542 restart = ttimer->function(&ttimer->timer);
543 if (restart != HRTIMER_NORESTART)
544 hrtimer_restart(&ttimer->timer);
545}
546
547/**
548 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
549 * @ttimer: tasklet_hrtimer which is initialized
550 * @function: hrtimer callback funtion which gets called from softirq context
551 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
552 * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
553 */
554void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
555 enum hrtimer_restart (*function)(struct hrtimer *),
556 clockid_t which_clock, enum hrtimer_mode mode)
557{
558 hrtimer_init(&ttimer->timer, which_clock, mode);
559 ttimer->timer.function = __hrtimer_tasklet_trampoline;
560 tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
561 (unsigned long)ttimer);
562 ttimer->function = function;
563}
564EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
565
566/*
567 * Remote softirq bits
568 */
569
54514a70
DM
570DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
571EXPORT_PER_CPU_SYMBOL(softirq_work_list);
572
573static void __local_trigger(struct call_single_data *cp, int softirq)
574{
575 struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
576
577 list_add_tail(&cp->list, head);
578
579 /* Trigger the softirq only if the list was previously empty. */
580 if (head->next == &cp->list)
581 raise_softirq_irqoff(softirq);
582}
583
584#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
585static void remote_softirq_receive(void *data)
586{
587 struct call_single_data *cp = data;
588 unsigned long flags;
589 int softirq;
590
591 softirq = cp->priv;
592
593 local_irq_save(flags);
594 __local_trigger(cp, softirq);
595 local_irq_restore(flags);
596}
597
598static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
599{
600 if (cpu_online(cpu)) {
601 cp->func = remote_softirq_receive;
602 cp->info = cp;
603 cp->flags = 0;
604 cp->priv = softirq;
605
6e275637 606 __smp_call_function_single(cpu, cp, 0);
54514a70
DM
607 return 0;
608 }
609 return 1;
610}
611#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
612static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
613{
614 return 1;
615}
616#endif
617
618/**
619 * __send_remote_softirq - try to schedule softirq work on a remote cpu
620 * @cp: private SMP call function data area
621 * @cpu: the remote cpu
622 * @this_cpu: the currently executing cpu
623 * @softirq: the softirq for the work
624 *
625 * Attempt to schedule softirq work on a remote cpu. If this cannot be
626 * done, the work is instead queued up on the local cpu.
627 *
628 * Interrupts must be disabled.
629 */
630void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
631{
632 if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
633 __local_trigger(cp, softirq);
634}
635EXPORT_SYMBOL(__send_remote_softirq);
636
637/**
638 * send_remote_softirq - try to schedule softirq work on a remote cpu
639 * @cp: private SMP call function data area
640 * @cpu: the remote cpu
641 * @softirq: the softirq for the work
642 *
643 * Like __send_remote_softirq except that disabling interrupts and
644 * computing the current cpu is done for the caller.
645 */
646void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
647{
648 unsigned long flags;
649 int this_cpu;
650
651 local_irq_save(flags);
652 this_cpu = smp_processor_id();
653 __send_remote_softirq(cp, cpu, this_cpu, softirq);
654 local_irq_restore(flags);
655}
656EXPORT_SYMBOL(send_remote_softirq);
657
658static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
659 unsigned long action, void *hcpu)
660{
661 /*
662 * If a CPU goes away, splice its entries to the current CPU
663 * and trigger a run of the softirq
664 */
665 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
666 int cpu = (unsigned long) hcpu;
667 int i;
668
669 local_irq_disable();
670 for (i = 0; i < NR_SOFTIRQS; i++) {
671 struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
672 struct list_head *local_head;
673
674 if (list_empty(head))
675 continue;
676
677 local_head = &__get_cpu_var(softirq_work_list[i]);
678 list_splice_init(head, local_head);
679 raise_softirq_irqoff(i);
680 }
681 local_irq_enable();
682 }
683
684 return NOTIFY_OK;
685}
686
687static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
688 .notifier_call = remote_softirq_cpu_notify,
689};
690
1da177e4
LT
691void __init softirq_init(void)
692{
48f20a9a
OJ
693 int cpu;
694
695 for_each_possible_cpu(cpu) {
54514a70
DM
696 int i;
697
48f20a9a
OJ
698 per_cpu(tasklet_vec, cpu).tail =
699 &per_cpu(tasklet_vec, cpu).head;
700 per_cpu(tasklet_hi_vec, cpu).tail =
701 &per_cpu(tasklet_hi_vec, cpu).head;
54514a70
DM
702 for (i = 0; i < NR_SOFTIRQS; i++)
703 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
48f20a9a
OJ
704 }
705
54514a70
DM
706 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
707
962cf36c
CM
708 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
709 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
1da177e4
LT
710}
711
1871e52c 712static int run_ksoftirqd(void * __bind_cpu)
1da177e4 713{
1da177e4
LT
714 set_current_state(TASK_INTERRUPTIBLE);
715
716 while (!kthread_should_stop()) {
717 preempt_disable();
718 if (!local_softirq_pending()) {
719 preempt_enable_no_resched();
720 schedule();
721 preempt_disable();
722 }
723
724 __set_current_state(TASK_RUNNING);
725
726 while (local_softirq_pending()) {
727 /* Preempt disable stops cpu going offline.
728 If already offline, we'll be on wrong CPU:
729 don't process */
730 if (cpu_is_offline((long)__bind_cpu))
731 goto wait_to_die;
732 do_softirq();
733 preempt_enable_no_resched();
734 cond_resched();
735 preempt_disable();
25502a6c 736 rcu_note_context_switch((long)__bind_cpu);
1da177e4
LT
737 }
738 preempt_enable();
739 set_current_state(TASK_INTERRUPTIBLE);
740 }
741 __set_current_state(TASK_RUNNING);
742 return 0;
743
744wait_to_die:
745 preempt_enable();
746 /* Wait for kthread_stop */
747 set_current_state(TASK_INTERRUPTIBLE);
748 while (!kthread_should_stop()) {
749 schedule();
750 set_current_state(TASK_INTERRUPTIBLE);
751 }
752 __set_current_state(TASK_RUNNING);
753 return 0;
754}
755
756#ifdef CONFIG_HOTPLUG_CPU
757/*
758 * tasklet_kill_immediate is called to remove a tasklet which can already be
759 * scheduled for execution on @cpu.
760 *
761 * Unlike tasklet_kill, this function removes the tasklet
762 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
763 *
764 * When this function is called, @cpu must be in the CPU_DEAD state.
765 */
766void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
767{
768 struct tasklet_struct **i;
769
770 BUG_ON(cpu_online(cpu));
771 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
772
773 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
774 return;
775
776 /* CPU is dead, so no lock needed. */
48f20a9a 777 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
1da177e4
LT
778 if (*i == t) {
779 *i = t->next;
48f20a9a
OJ
780 /* If this was the tail element, move the tail ptr */
781 if (*i == NULL)
782 per_cpu(tasklet_vec, cpu).tail = i;
1da177e4
LT
783 return;
784 }
785 }
786 BUG();
787}
788
789static void takeover_tasklets(unsigned int cpu)
790{
1da177e4
LT
791 /* CPU is dead, so no lock needed. */
792 local_irq_disable();
793
794 /* Find end, append list for that CPU. */
e5e41723
CB
795 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
796 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
797 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
798 per_cpu(tasklet_vec, cpu).head = NULL;
799 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
800 }
1da177e4
LT
801 raise_softirq_irqoff(TASKLET_SOFTIRQ);
802
e5e41723
CB
803 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
804 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
805 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
806 per_cpu(tasklet_hi_vec, cpu).head = NULL;
807 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
808 }
1da177e4
LT
809 raise_softirq_irqoff(HI_SOFTIRQ);
810
811 local_irq_enable();
812}
813#endif /* CONFIG_HOTPLUG_CPU */
814
8c78f307 815static int __cpuinit cpu_callback(struct notifier_block *nfb,
1da177e4
LT
816 unsigned long action,
817 void *hcpu)
818{
819 int hotcpu = (unsigned long)hcpu;
820 struct task_struct *p;
821
822 switch (action) {
823 case CPU_UP_PREPARE:
8bb78442 824 case CPU_UP_PREPARE_FROZEN:
1871e52c 825 p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
1da177e4
LT
826 if (IS_ERR(p)) {
827 printk("ksoftirqd for %i failed\n", hotcpu);
80b5184c 828 return notifier_from_errno(PTR_ERR(p));
1da177e4
LT
829 }
830 kthread_bind(p, hotcpu);
831 per_cpu(ksoftirqd, hotcpu) = p;
832 break;
833 case CPU_ONLINE:
8bb78442 834 case CPU_ONLINE_FROZEN:
1da177e4
LT
835 wake_up_process(per_cpu(ksoftirqd, hotcpu));
836 break;
837#ifdef CONFIG_HOTPLUG_CPU
838 case CPU_UP_CANCELED:
8bb78442 839 case CPU_UP_CANCELED_FROZEN:
fc75cdfa
HC
840 if (!per_cpu(ksoftirqd, hotcpu))
841 break;
1da177e4 842 /* Unbind so it can run. Fall thru. */
a4c4af7c 843 kthread_bind(per_cpu(ksoftirqd, hotcpu),
f1fc057c 844 cpumask_any(cpu_online_mask));
1da177e4 845 case CPU_DEAD:
1c6b4aa9
ST
846 case CPU_DEAD_FROZEN: {
847 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
848
1da177e4
LT
849 p = per_cpu(ksoftirqd, hotcpu);
850 per_cpu(ksoftirqd, hotcpu) = NULL;
961ccddd 851 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
1da177e4
LT
852 kthread_stop(p);
853 takeover_tasklets(hotcpu);
854 break;
1c6b4aa9 855 }
1da177e4
LT
856#endif /* CONFIG_HOTPLUG_CPU */
857 }
858 return NOTIFY_OK;
859}
860
8c78f307 861static struct notifier_block __cpuinitdata cpu_nfb = {
1da177e4
LT
862 .notifier_call = cpu_callback
863};
864
7babe8db 865static __init int spawn_ksoftirqd(void)
1da177e4
LT
866{
867 void *cpu = (void *)(long)smp_processor_id();
07dccf33
AM
868 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
869
9e506f7a 870 BUG_ON(err != NOTIFY_OK);
1da177e4
LT
871 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
872 register_cpu_notifier(&cpu_nfb);
873 return 0;
874}
7babe8db 875early_initcall(spawn_ksoftirqd);
78eef01b
AM
876
877#ifdef CONFIG_SMP
878/*
879 * Call a function on all processors
880 */
15c8b6c1 881int on_each_cpu(void (*func) (void *info), void *info, int wait)
78eef01b
AM
882{
883 int ret = 0;
884
885 preempt_disable();
8691e5a8 886 ret = smp_call_function(func, info, wait);
78eef01b
AM
887 local_irq_disable();
888 func(info);
889 local_irq_enable();
890 preempt_enable();
891 return ret;
892}
893EXPORT_SYMBOL(on_each_cpu);
894#endif
43a25632
YL
895
896/*
897 * [ These __weak aliases are kept in a separate compilation unit, so that
898 * GCC does not inline them incorrectly. ]
899 */
900
901int __init __weak early_irq_init(void)
902{
903 return 0;
904}
905
4a046d17
YL
906int __init __weak arch_probe_nr_irqs(void)
907{
908 return 0;
909}
910
43a25632
YL
911int __init __weak arch_early_irq_init(void)
912{
913 return 0;
914}
915
85ac16d0 916int __weak arch_init_chip_data(struct irq_desc *desc, int node)
43a25632
YL
917{
918 return 0;
919}