]> bbs.cooldavid.org Git - net-next-2.6.git/blame - kernel/softirq.c
[PATCH] Mark TSC on GeodeLX reliable
[net-next-2.6.git] / kernel / softirq.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/softirq.c
3 *
4 * Copyright (C) 1992 Linus Torvalds
5 *
6 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7 */
8
9#include <linux/module.h>
10#include <linux/kernel_stat.h>
11#include <linux/interrupt.h>
12#include <linux/init.h>
13#include <linux/mm.h>
14#include <linux/notifier.h>
15#include <linux/percpu.h>
16#include <linux/cpu.h>
17#include <linux/kthread.h>
18#include <linux/rcupdate.h>
78eef01b 19#include <linux/smp.h>
1da177e4
LT
20
21#include <asm/irq.h>
22/*
23 - No shared variables, all the data are CPU local.
24 - If a softirq needs serialization, let it serialize itself
25 by its own spinlocks.
26 - Even if softirq is serialized, only local cpu is marked for
27 execution. Hence, we get something sort of weak cpu binding.
28 Though it is still not clear, will it result in better locality
29 or will not.
30
31 Examples:
32 - NET RX softirq. It is multithreaded and does not require
33 any global serialization.
34 - NET TX softirq. It kicks software netdevice queues, hence
35 it is logically serialized per device, but this serialization
36 is invisible to common code.
37 - Tasklets: serialized wrt itself.
38 */
39
40#ifndef __ARCH_IRQ_STAT
41irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42EXPORT_SYMBOL(irq_stat);
43#endif
44
45static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49/*
50 * we cannot loop indefinitely here to avoid userspace starvation,
51 * but we also don't want to introduce a worst case 1/HZ latency
52 * to the pending events, so lets the scheduler to balance
53 * the softirq load for us.
54 */
55static inline void wakeup_softirqd(void)
56{
57 /* Interrupts are disabled: no need to stop preemption */
58 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60 if (tsk && tsk->state != TASK_RUNNING)
61 wake_up_process(tsk);
62}
63
de30a2b3
IM
64/*
65 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately:
67 */
3c829c36 68#ifdef CONFIG_TRACE_IRQFLAGS
de30a2b3
IM
69static void __local_bh_disable(unsigned long ip)
70{
71 unsigned long flags;
72
73 WARN_ON_ONCE(in_irq());
74
75 raw_local_irq_save(flags);
76 add_preempt_count(SOFTIRQ_OFFSET);
77 /*
78 * Were softirqs turned off above:
79 */
80 if (softirq_count() == SOFTIRQ_OFFSET)
81 trace_softirqs_off(ip);
82 raw_local_irq_restore(flags);
83}
3c829c36
TC
84#else /* !CONFIG_TRACE_IRQFLAGS */
85static inline void __local_bh_disable(unsigned long ip)
86{
87 add_preempt_count(SOFTIRQ_OFFSET);
88 barrier();
89}
90#endif /* CONFIG_TRACE_IRQFLAGS */
de30a2b3
IM
91
92void local_bh_disable(void)
93{
94 __local_bh_disable((unsigned long)__builtin_return_address(0));
95}
96
97EXPORT_SYMBOL(local_bh_disable);
98
99void __local_bh_enable(void)
100{
101 WARN_ON_ONCE(in_irq());
102
103 /*
104 * softirqs should never be enabled by __local_bh_enable(),
105 * it always nests inside local_bh_enable() sections:
106 */
107 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
108
109 sub_preempt_count(SOFTIRQ_OFFSET);
110}
111EXPORT_SYMBOL_GPL(__local_bh_enable);
112
113/*
114 * Special-case - softirqs can safely be enabled in
115 * cond_resched_softirq(), or by __do_softirq(),
116 * without processing still-pending softirqs:
117 */
118void _local_bh_enable(void)
119{
120 WARN_ON_ONCE(in_irq());
121 WARN_ON_ONCE(!irqs_disabled());
122
123 if (softirq_count() == SOFTIRQ_OFFSET)
124 trace_softirqs_on((unsigned long)__builtin_return_address(0));
125 sub_preempt_count(SOFTIRQ_OFFSET);
126}
127
128EXPORT_SYMBOL(_local_bh_enable);
129
130void local_bh_enable(void)
131{
3c829c36 132#ifdef CONFIG_TRACE_IRQFLAGS
de30a2b3
IM
133 unsigned long flags;
134
135 WARN_ON_ONCE(in_irq());
3c829c36 136#endif
de30a2b3
IM
137 WARN_ON_ONCE(irqs_disabled());
138
3c829c36 139#ifdef CONFIG_TRACE_IRQFLAGS
de30a2b3 140 local_irq_save(flags);
3c829c36 141#endif
de30a2b3
IM
142 /*
143 * Are softirqs going to be turned on now:
144 */
145 if (softirq_count() == SOFTIRQ_OFFSET)
146 trace_softirqs_on((unsigned long)__builtin_return_address(0));
147 /*
148 * Keep preemption disabled until we are done with
149 * softirq processing:
150 */
151 sub_preempt_count(SOFTIRQ_OFFSET - 1);
152
153 if (unlikely(!in_interrupt() && local_softirq_pending()))
154 do_softirq();
155
156 dec_preempt_count();
3c829c36 157#ifdef CONFIG_TRACE_IRQFLAGS
de30a2b3 158 local_irq_restore(flags);
3c829c36 159#endif
de30a2b3
IM
160 preempt_check_resched();
161}
162EXPORT_SYMBOL(local_bh_enable);
163
164void local_bh_enable_ip(unsigned long ip)
165{
3c829c36 166#ifdef CONFIG_TRACE_IRQFLAGS
de30a2b3
IM
167 unsigned long flags;
168
169 WARN_ON_ONCE(in_irq());
170
171 local_irq_save(flags);
3c829c36 172#endif
de30a2b3
IM
173 /*
174 * Are softirqs going to be turned on now:
175 */
176 if (softirq_count() == SOFTIRQ_OFFSET)
177 trace_softirqs_on(ip);
178 /*
179 * Keep preemption disabled until we are done with
180 * softirq processing:
181 */
182 sub_preempt_count(SOFTIRQ_OFFSET - 1);
183
184 if (unlikely(!in_interrupt() && local_softirq_pending()))
185 do_softirq();
186
187 dec_preempt_count();
3c829c36 188#ifdef CONFIG_TRACE_IRQFLAGS
de30a2b3 189 local_irq_restore(flags);
3c829c36 190#endif
de30a2b3
IM
191 preempt_check_resched();
192}
193EXPORT_SYMBOL(local_bh_enable_ip);
194
1da177e4
LT
195/*
196 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
197 * and we fall back to softirqd after that.
198 *
199 * This number has been established via experimentation.
200 * The two things to balance is latency against fairness -
201 * we want to handle softirqs as soon as possible, but they
202 * should not be able to lock up the box.
203 */
204#define MAX_SOFTIRQ_RESTART 10
205
206asmlinkage void __do_softirq(void)
207{
208 struct softirq_action *h;
209 __u32 pending;
210 int max_restart = MAX_SOFTIRQ_RESTART;
211 int cpu;
212
213 pending = local_softirq_pending();
829035fd
PM
214 account_system_vtime(current);
215
de30a2b3
IM
216 __local_bh_disable((unsigned long)__builtin_return_address(0));
217 trace_softirq_enter();
1da177e4 218
1da177e4
LT
219 cpu = smp_processor_id();
220restart:
221 /* Reset the pending bitmask before enabling irqs */
3f74478b 222 set_softirq_pending(0);
1da177e4 223
c70f5d66 224 local_irq_enable();
1da177e4
LT
225
226 h = softirq_vec;
227
228 do {
229 if (pending & 1) {
230 h->action(h);
231 rcu_bh_qsctr_inc(cpu);
232 }
233 h++;
234 pending >>= 1;
235 } while (pending);
236
c70f5d66 237 local_irq_disable();
1da177e4
LT
238
239 pending = local_softirq_pending();
240 if (pending && --max_restart)
241 goto restart;
242
243 if (pending)
244 wakeup_softirqd();
245
de30a2b3 246 trace_softirq_exit();
829035fd
PM
247
248 account_system_vtime(current);
de30a2b3 249 _local_bh_enable();
1da177e4
LT
250}
251
252#ifndef __ARCH_HAS_DO_SOFTIRQ
253
254asmlinkage void do_softirq(void)
255{
256 __u32 pending;
257 unsigned long flags;
258
259 if (in_interrupt())
260 return;
261
262 local_irq_save(flags);
263
264 pending = local_softirq_pending();
265
266 if (pending)
267 __do_softirq();
268
269 local_irq_restore(flags);
270}
271
272EXPORT_SYMBOL(do_softirq);
273
274#endif
275
1da177e4
LT
276#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
277# define invoke_softirq() __do_softirq()
278#else
279# define invoke_softirq() do_softirq()
280#endif
281
282/*
283 * Exit an interrupt context. Process softirqs if needed and possible:
284 */
285void irq_exit(void)
286{
287 account_system_vtime(current);
de30a2b3 288 trace_hardirq_exit();
1da177e4
LT
289 sub_preempt_count(IRQ_EXIT_OFFSET);
290 if (!in_interrupt() && local_softirq_pending())
291 invoke_softirq();
292 preempt_enable_no_resched();
293}
294
295/*
296 * This function must run with irqs disabled!
297 */
298inline fastcall void raise_softirq_irqoff(unsigned int nr)
299{
300 __raise_softirq_irqoff(nr);
301
302 /*
303 * If we're in an interrupt or softirq, we're done
304 * (this also catches softirq-disabled code). We will
305 * actually run the softirq once we return from
306 * the irq or softirq.
307 *
308 * Otherwise we wake up ksoftirqd to make sure we
309 * schedule the softirq soon.
310 */
311 if (!in_interrupt())
312 wakeup_softirqd();
313}
314
315EXPORT_SYMBOL(raise_softirq_irqoff);
316
317void fastcall raise_softirq(unsigned int nr)
318{
319 unsigned long flags;
320
321 local_irq_save(flags);
322 raise_softirq_irqoff(nr);
323 local_irq_restore(flags);
324}
325
326void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
327{
328 softirq_vec[nr].data = data;
329 softirq_vec[nr].action = action;
330}
331
1da177e4
LT
332/* Tasklets */
333struct tasklet_head
334{
335 struct tasklet_struct *list;
336};
337
338/* Some compilers disobey section attribute on statics when not
339 initialized -- RR */
340static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
341static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
342
343void fastcall __tasklet_schedule(struct tasklet_struct *t)
344{
345 unsigned long flags;
346
347 local_irq_save(flags);
348 t->next = __get_cpu_var(tasklet_vec).list;
349 __get_cpu_var(tasklet_vec).list = t;
350 raise_softirq_irqoff(TASKLET_SOFTIRQ);
351 local_irq_restore(flags);
352}
353
354EXPORT_SYMBOL(__tasklet_schedule);
355
356void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
357{
358 unsigned long flags;
359
360 local_irq_save(flags);
361 t->next = __get_cpu_var(tasklet_hi_vec).list;
362 __get_cpu_var(tasklet_hi_vec).list = t;
363 raise_softirq_irqoff(HI_SOFTIRQ);
364 local_irq_restore(flags);
365}
366
367EXPORT_SYMBOL(__tasklet_hi_schedule);
368
369static void tasklet_action(struct softirq_action *a)
370{
371 struct tasklet_struct *list;
372
373 local_irq_disable();
374 list = __get_cpu_var(tasklet_vec).list;
375 __get_cpu_var(tasklet_vec).list = NULL;
376 local_irq_enable();
377
378 while (list) {
379 struct tasklet_struct *t = list;
380
381 list = list->next;
382
383 if (tasklet_trylock(t)) {
384 if (!atomic_read(&t->count)) {
385 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
386 BUG();
387 t->func(t->data);
388 tasklet_unlock(t);
389 continue;
390 }
391 tasklet_unlock(t);
392 }
393
394 local_irq_disable();
395 t->next = __get_cpu_var(tasklet_vec).list;
396 __get_cpu_var(tasklet_vec).list = t;
397 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
398 local_irq_enable();
399 }
400}
401
402static void tasklet_hi_action(struct softirq_action *a)
403{
404 struct tasklet_struct *list;
405
406 local_irq_disable();
407 list = __get_cpu_var(tasklet_hi_vec).list;
408 __get_cpu_var(tasklet_hi_vec).list = NULL;
409 local_irq_enable();
410
411 while (list) {
412 struct tasklet_struct *t = list;
413
414 list = list->next;
415
416 if (tasklet_trylock(t)) {
417 if (!atomic_read(&t->count)) {
418 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
419 BUG();
420 t->func(t->data);
421 tasklet_unlock(t);
422 continue;
423 }
424 tasklet_unlock(t);
425 }
426
427 local_irq_disable();
428 t->next = __get_cpu_var(tasklet_hi_vec).list;
429 __get_cpu_var(tasklet_hi_vec).list = t;
430 __raise_softirq_irqoff(HI_SOFTIRQ);
431 local_irq_enable();
432 }
433}
434
435
436void tasklet_init(struct tasklet_struct *t,
437 void (*func)(unsigned long), unsigned long data)
438{
439 t->next = NULL;
440 t->state = 0;
441 atomic_set(&t->count, 0);
442 t->func = func;
443 t->data = data;
444}
445
446EXPORT_SYMBOL(tasklet_init);
447
448void tasklet_kill(struct tasklet_struct *t)
449{
450 if (in_interrupt())
451 printk("Attempt to kill tasklet from interrupt\n");
452
453 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
454 do
455 yield();
456 while (test_bit(TASKLET_STATE_SCHED, &t->state));
457 }
458 tasklet_unlock_wait(t);
459 clear_bit(TASKLET_STATE_SCHED, &t->state);
460}
461
462EXPORT_SYMBOL(tasklet_kill);
463
464void __init softirq_init(void)
465{
466 open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
467 open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
468}
469
470static int ksoftirqd(void * __bind_cpu)
471{
472 set_user_nice(current, 19);
473 current->flags |= PF_NOFREEZE;
474
475 set_current_state(TASK_INTERRUPTIBLE);
476
477 while (!kthread_should_stop()) {
478 preempt_disable();
479 if (!local_softirq_pending()) {
480 preempt_enable_no_resched();
481 schedule();
482 preempt_disable();
483 }
484
485 __set_current_state(TASK_RUNNING);
486
487 while (local_softirq_pending()) {
488 /* Preempt disable stops cpu going offline.
489 If already offline, we'll be on wrong CPU:
490 don't process */
491 if (cpu_is_offline((long)__bind_cpu))
492 goto wait_to_die;
493 do_softirq();
494 preempt_enable_no_resched();
495 cond_resched();
496 preempt_disable();
497 }
498 preempt_enable();
499 set_current_state(TASK_INTERRUPTIBLE);
500 }
501 __set_current_state(TASK_RUNNING);
502 return 0;
503
504wait_to_die:
505 preempt_enable();
506 /* Wait for kthread_stop */
507 set_current_state(TASK_INTERRUPTIBLE);
508 while (!kthread_should_stop()) {
509 schedule();
510 set_current_state(TASK_INTERRUPTIBLE);
511 }
512 __set_current_state(TASK_RUNNING);
513 return 0;
514}
515
516#ifdef CONFIG_HOTPLUG_CPU
517/*
518 * tasklet_kill_immediate is called to remove a tasklet which can already be
519 * scheduled for execution on @cpu.
520 *
521 * Unlike tasklet_kill, this function removes the tasklet
522 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
523 *
524 * When this function is called, @cpu must be in the CPU_DEAD state.
525 */
526void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
527{
528 struct tasklet_struct **i;
529
530 BUG_ON(cpu_online(cpu));
531 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
532
533 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
534 return;
535
536 /* CPU is dead, so no lock needed. */
537 for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
538 if (*i == t) {
539 *i = t->next;
540 return;
541 }
542 }
543 BUG();
544}
545
546static void takeover_tasklets(unsigned int cpu)
547{
548 struct tasklet_struct **i;
549
550 /* CPU is dead, so no lock needed. */
551 local_irq_disable();
552
553 /* Find end, append list for that CPU. */
554 for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
555 *i = per_cpu(tasklet_vec, cpu).list;
556 per_cpu(tasklet_vec, cpu).list = NULL;
557 raise_softirq_irqoff(TASKLET_SOFTIRQ);
558
559 for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
560 *i = per_cpu(tasklet_hi_vec, cpu).list;
561 per_cpu(tasklet_hi_vec, cpu).list = NULL;
562 raise_softirq_irqoff(HI_SOFTIRQ);
563
564 local_irq_enable();
565}
566#endif /* CONFIG_HOTPLUG_CPU */
567
8c78f307 568static int __cpuinit cpu_callback(struct notifier_block *nfb,
1da177e4
LT
569 unsigned long action,
570 void *hcpu)
571{
572 int hotcpu = (unsigned long)hcpu;
573 struct task_struct *p;
574
575 switch (action) {
576 case CPU_UP_PREPARE:
1da177e4
LT
577 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
578 if (IS_ERR(p)) {
579 printk("ksoftirqd for %i failed\n", hotcpu);
580 return NOTIFY_BAD;
581 }
582 kthread_bind(p, hotcpu);
583 per_cpu(ksoftirqd, hotcpu) = p;
584 break;
585 case CPU_ONLINE:
586 wake_up_process(per_cpu(ksoftirqd, hotcpu));
587 break;
588#ifdef CONFIG_HOTPLUG_CPU
589 case CPU_UP_CANCELED:
fc75cdfa
HC
590 if (!per_cpu(ksoftirqd, hotcpu))
591 break;
1da177e4 592 /* Unbind so it can run. Fall thru. */
a4c4af7c
HC
593 kthread_bind(per_cpu(ksoftirqd, hotcpu),
594 any_online_cpu(cpu_online_map));
1da177e4
LT
595 case CPU_DEAD:
596 p = per_cpu(ksoftirqd, hotcpu);
597 per_cpu(ksoftirqd, hotcpu) = NULL;
598 kthread_stop(p);
599 takeover_tasklets(hotcpu);
600 break;
601#endif /* CONFIG_HOTPLUG_CPU */
602 }
603 return NOTIFY_OK;
604}
605
8c78f307 606static struct notifier_block __cpuinitdata cpu_nfb = {
1da177e4
LT
607 .notifier_call = cpu_callback
608};
609
610__init int spawn_ksoftirqd(void)
611{
612 void *cpu = (void *)(long)smp_processor_id();
07dccf33
AM
613 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
614
615 BUG_ON(err == NOTIFY_BAD);
1da177e4
LT
616 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
617 register_cpu_notifier(&cpu_nfb);
618 return 0;
619}
78eef01b
AM
620
621#ifdef CONFIG_SMP
622/*
623 * Call a function on all processors
624 */
625int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
626{
627 int ret = 0;
628
629 preempt_disable();
630 ret = smp_call_function(func, info, retry, wait);
631 local_irq_disable();
632 func(info);
633 local_irq_enable();
634 preempt_enable();
635 return ret;
636}
637EXPORT_SYMBOL(on_each_cpu);
638#endif