]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
Merge branch 'sched-wq' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq into...
authorIngo Molnar <mingo@elte.hu>
Tue, 8 Jun 2010 21:20:59 +0000 (23:20 +0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 8 Jun 2010 21:20:59 +0000 (23:20 +0200)
include/linux/cpu.h
include/linux/cpuset.h
include/linux/perf_event.h
include/linux/sched.h
kernel/cpu.c
kernel/cpuset.c
kernel/fork.c
kernel/sched.c
kernel/workqueue_sched.h [new file with mode: 0644]

index e287863ac053d9788756ac79c9271ba562d455b1..de6b1722cdcab11e9443a027ebf5aeda7789589d 100644 (file)
@@ -48,6 +48,31 @@ extern ssize_t arch_cpu_release(const char *, size_t);
 #endif
 struct notifier_block;
 
+/*
+ * CPU notifier priorities.
+ */
+enum {
+       /*
+        * SCHED_ACTIVE marks a cpu which is coming up active during
+        * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
+        * notifier.  CPUSET_ACTIVE adjusts cpuset according to
+        * cpu_active mask right after SCHED_ACTIVE.  During
+        * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
+        * ordered in the similar way.
+        *
+        * This ordering guarantees consistent cpu_active mask and
+        * migration behavior to all cpu notifiers.
+        */
+       CPU_PRI_SCHED_ACTIVE    = INT_MAX,
+       CPU_PRI_CPUSET_ACTIVE   = INT_MAX - 1,
+       CPU_PRI_SCHED_INACTIVE  = INT_MIN + 1,
+       CPU_PRI_CPUSET_INACTIVE = INT_MIN,
+
+       /* migration should happen before other stuff but after perf */
+       CPU_PRI_PERF            = 20,
+       CPU_PRI_MIGRATION       = 10,
+};
+
 #ifdef CONFIG_SMP
 /* Need to know about CPUs going up/down? */
 #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
index 457ed765a116a4c06ecd192e0165cf5af096c818..f20eb8f16025d74534dd2b62fa22cf494404ef1c 100644 (file)
@@ -20,6 +20,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
 
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
+extern void cpuset_update_active_cpus(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@@ -132,6 +133,11 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 
+static inline void cpuset_update_active_cpus(void)
+{
+       partition_sched_domains(1, NULL, NULL);
+}
+
 static inline void cpuset_cpus_allowed(struct task_struct *p,
                                       struct cpumask *mask)
 {
index 5d0266d94985c65acbd8b13a41961964cdde4a72..469e03e96fe722c8b465a026835b04e4ace2899a 100644 (file)
@@ -1068,7 +1068,7 @@ static inline void perf_event_disable(struct perf_event *event)           { }
 #define perf_cpu_notifier(fn)                                  \
 do {                                                           \
        static struct notifier_block fn##_nb __cpuinitdata =    \
-               { .notifier_call = fn, .priority = 20 };        \
+               { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
        fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,             \
                (void *)(unsigned long)smp_processor_id());     \
        fn(&fn##_nb, (unsigned long)CPU_STARTING,               \
index f118809c953ff137b2a97e0d9e674b2836ee331a..edc3dd168d87d3d21f9a351cbc2ae0cc97879282 100644 (file)
@@ -1696,6 +1696,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_EXITING     0x00000004      /* getting shut down */
 #define PF_EXITPIDONE  0x00000008      /* pi exit done on shut down */
 #define PF_VCPU                0x00000010      /* I'm a virtual CPU */
+#define PF_WQ_WORKER   0x00000020      /* I'm a workqueue worker */
 #define PF_FORKNOEXEC  0x00000040      /* forked but didn't exec */
 #define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
 #define PF_SUPERPRIV   0x00000100      /* used super-user privileges */
index 97d1b426a4ac39bd49b41b9393ed9b565f8f7f33..f6e726f184916029e2d1cfdbcd4acb2b26f14e69 100644 (file)
@@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
                return -EINVAL;
 
        cpu_hotplug_begin();
-       set_cpu_active(cpu, false);
        err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
        if (err) {
-               set_cpu_active(cpu, true);
-
                nr_calls--;
                __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
                printk("%s: attempt to take down CPU %u failed\n",
@@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
        err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
        if (err) {
-               set_cpu_active(cpu, true);
                /* CPU didn't die: tell everyone.  Can't complain. */
                cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
 
@@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
                goto out_notify;
        BUG_ON(!cpu_online(cpu));
 
-       set_cpu_active(cpu, true);
-
        /* Now call notifier in preparation. */
        cpu_notify(CPU_ONLINE | mod, hcpu);
 
index 02b9611eadde3ebe638b9c24ffbbb5ec8ada5c06..05727dcaa80dd5a2f6c8546063d2467c9083b90d 100644 (file)
@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
  * but making no active use of cpusets.
  *
  * This routine ensures that top_cpuset.cpus_allowed tracks
- * cpu_online_map on each CPU hotplug (cpuhp) event.
+ * cpu_active_mask on each CPU hotplug (cpuhp) event.
  *
  * Called within get_online_cpus().  Needs to call cgroup_lock()
  * before calling generate_sched_domains().
  */
-static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
-                               unsigned long phase, void *unused_cpu)
+void __cpuexit cpuset_update_active_cpus(void)
 {
        struct sched_domain_attr *attr;
        cpumask_var_t *doms;
        int ndoms;
 
-       switch (phase) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-       case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-               break;
-
-       default:
-               return NOTIFY_DONE;
-       }
-
        cgroup_lock();
        mutex_lock(&callback_mutex);
        cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 
        /* Have scheduler rebuild the domains */
        partition_sched_domains(ndoms, doms, attr);
-
-       return NOTIFY_OK;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
        cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
-       hotcpu_notifier(cpuset_track_online_cpus, 0);
        hotplug_memory_notifier(cpuset_track_online_nodes, 10);
 
        cpuset_wq = create_singlethread_workqueue("cpuset");
index b6cce14ba0470e641bd45c68ca2067c4ee70d38c..a82a65cef741935adc51082640f63c9a5049b5ea 100644 (file)
@@ -907,7 +907,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
        unsigned long new_flags = p->flags;
 
-       new_flags &= ~PF_SUPERPRIV;
+       new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
        new_flags |= PF_FORKNOEXEC;
        new_flags |= PF_STARTING;
        p->flags = new_flags;
index 2aaceebd484cade22510b04fc1d561db08acd303..8f351c56567f81c538cc2885d07303a8aa1e1b34 100644 (file)
@@ -77,6 +77,7 @@
 #include <asm/irq_regs.h>
 
 #include "sched_cpupri.h"
+#include "workqueue_sched.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -2264,11 +2265,55 @@ static void update_avg(u64 *avg, u64 sample)
 }
 #endif
 
-/***
+static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
+                                bool is_sync, bool is_migrate, bool is_local,
+                                unsigned long en_flags)
+{
+       schedstat_inc(p, se.statistics.nr_wakeups);
+       if (is_sync)
+               schedstat_inc(p, se.statistics.nr_wakeups_sync);
+       if (is_migrate)
+               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+       if (is_local)
+               schedstat_inc(p, se.statistics.nr_wakeups_local);
+       else
+               schedstat_inc(p, se.statistics.nr_wakeups_remote);
+
+       activate_task(rq, p, en_flags);
+}
+
+static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
+                                       int wake_flags, bool success)
+{
+       trace_sched_wakeup(p, success);
+       check_preempt_curr(rq, p, wake_flags);
+
+       p->state = TASK_RUNNING;
+#ifdef CONFIG_SMP
+       if (p->sched_class->task_woken)
+               p->sched_class->task_woken(rq, p);
+
+       if (unlikely(rq->idle_stamp)) {
+               u64 delta = rq->clock - rq->idle_stamp;
+               u64 max = 2*sysctl_sched_migration_cost;
+
+               if (delta > max)
+                       rq->avg_idle = max;
+               else
+                       update_avg(&rq->avg_idle, delta);
+               rq->idle_stamp = 0;
+       }
+#endif
+       /* if a worker is waking up, notify workqueue */
+       if ((p->flags & PF_WQ_WORKER) && success)
+               wq_worker_waking_up(p, cpu_of(rq));
+}
+
+/**
  * try_to_wake_up - wake up a thread
- * @p: the to-be-woken-up thread
+ * @p: the thread to be awakened
  * @state: the mask of task states that can be woken
- * @sync: do a synchronous wakeup?
+ * @wake_flags: wake modifier flags (WF_*)
  *
  * Put it on the run-queue if it's not already there. The "current"
  * thread is always on the run-queue (except when the actual
@@ -2276,7 +2321,8 @@ static void update_avg(u64 *avg, u64 sample)
  * the simpler "current->state = TASK_RUNNING" to mark yourself
  * runnable without the overhead of this.
  *
- * returns failure only if the task is already active.
+ * Returns %true if @p was woken up, %false if it was already running
+ * or @state didn't match @p's state.
  */
 static int try_to_wake_up(struct task_struct *p, unsigned int state,
                          int wake_flags)
@@ -2356,38 +2402,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 
 out_activate:
 #endif /* CONFIG_SMP */
-       schedstat_inc(p, se.statistics.nr_wakeups);
-       if (wake_flags & WF_SYNC)
-               schedstat_inc(p, se.statistics.nr_wakeups_sync);
-       if (orig_cpu != cpu)
-               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-       if (cpu == this_cpu)
-               schedstat_inc(p, se.statistics.nr_wakeups_local);
-       else
-               schedstat_inc(p, se.statistics.nr_wakeups_remote);
-       activate_task(rq, p, en_flags);
+       ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
+                     cpu == this_cpu, en_flags);
        success = 1;
-
 out_running:
-       trace_sched_wakeup(p, success);
-       check_preempt_curr(rq, p, wake_flags);
-
-       p->state = TASK_RUNNING;
-#ifdef CONFIG_SMP
-       if (p->sched_class->task_woken)
-               p->sched_class->task_woken(rq, p);
-
-       if (unlikely(rq->idle_stamp)) {
-               u64 delta = rq->clock - rq->idle_stamp;
-               u64 max = 2*sysctl_sched_migration_cost;
-
-               if (delta > max)
-                       rq->avg_idle = max;
-               else
-                       update_avg(&rq->avg_idle, delta);
-               rq->idle_stamp = 0;
-       }
-#endif
+       ttwu_post_activation(p, rq, wake_flags, success);
 out:
        task_rq_unlock(rq, &flags);
        put_cpu();
@@ -2395,6 +2414,37 @@ out:
        return success;
 }
 
+/**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the thread to be awakened
+ *
+ * Put @p on the run-queue if it's not alredy there.  The caller must
+ * ensure that this_rq() is locked, @p is bound to this_rq() and not
+ * the current task.  this_rq() stays locked over invocation.
+ */
+static void try_to_wake_up_local(struct task_struct *p)
+{
+       struct rq *rq = task_rq(p);
+       bool success = false;
+
+       BUG_ON(rq != this_rq());
+       BUG_ON(p == current);
+       lockdep_assert_held(&rq->lock);
+
+       if (!(p->state & TASK_NORMAL))
+               return;
+
+       if (!p->se.on_rq) {
+               if (likely(!task_running(rq, p))) {
+                       schedstat_inc(rq, ttwu_count);
+                       schedstat_inc(rq, ttwu_local);
+               }
+               ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
+               success = true;
+       }
+       ttwu_post_activation(p, rq, 0, success);
+}
+
 /**
  * wake_up_process - Wake up a specific process
  * @p: The process to be woken up.
@@ -3600,10 +3650,24 @@ need_resched_nonpreemptible:
        clear_tsk_need_resched(prev);
 
        if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-               if (unlikely(signal_pending_state(prev->state, prev)))
+               if (unlikely(signal_pending_state(prev->state, prev))) {
                        prev->state = TASK_RUNNING;
-               else
+               } else {
+                       /*
+                        * If a worker is going to sleep, notify and
+                        * ask workqueue whether it wants to wake up a
+                        * task to maintain concurrency.  If so, wake
+                        * up the task.
+                        */
+                       if (prev->flags & PF_WQ_WORKER) {
+                               struct task_struct *to_wakeup;
+
+                               to_wakeup = wq_worker_sleeping(prev, cpu);
+                               if (to_wakeup)
+                                       try_to_wake_up_local(to_wakeup);
+                       }
                        deactivate_task(rq, prev, DEQUEUE_SLEEP);
+               }
                switch_count = &prev->nvcsw;
        }
 
@@ -5804,20 +5868,49 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
  */
 static struct notifier_block __cpuinitdata migration_notifier = {
        .notifier_call = migration_call,
-       .priority = 10
+       .priority = CPU_PRI_MIGRATION,
 };
 
+static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+                                     unsigned long action, void *hcpu)
+{
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+               set_cpu_active((long)hcpu, true);
+               return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
+static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+                                       unsigned long action, void *hcpu)
+{
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DOWN_PREPARE:
+               set_cpu_active((long)hcpu, false);
+               return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
 static int __init migration_init(void)
 {
        void *cpu = (void *)(long)smp_processor_id();
        int err;
 
-       /* Start one for the boot CPU: */
+       /* Initialize migration for the boot CPU */
        err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
        BUG_ON(err == NOTIFY_BAD);
        migration_call(&migration_notifier, CPU_ONLINE, cpu);
        register_cpu_notifier(&migration_notifier);
 
+       /* Register cpu active notifiers */
+       cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
+       cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
+
        return 0;
 }
 early_initcall(migration_init);
@@ -7276,29 +7369,35 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
 }
 #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 
-#ifndef CONFIG_CPUSETS
 /*
- * Add online and remove offline CPUs from the scheduler domains.
- * When cpusets are enabled they take over this function.
+ * Update cpusets according to cpu_active mask.  If cpusets are
+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+ * around partition_sched_domains().
  */
-static int update_sched_domains(struct notifier_block *nfb,
-                               unsigned long action, void *hcpu)
+static int __cpuexit cpuset_cpu_active(struct notifier_block *nfb,
+                                      unsigned long action, void *hcpu)
 {
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
        case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-               partition_sched_domains(1, NULL, NULL);
+               cpuset_update_active_cpus();
                return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+}
 
+static int __cpuexit cpuset_cpu_inactive(struct notifier_block *nfb,
+                                        unsigned long action, void *hcpu)
+{
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DOWN_PREPARE:
+               cpuset_update_active_cpus();
+               return NOTIFY_OK;
        default:
                return NOTIFY_DONE;
        }
 }
-#endif
 
 static int update_runtime(struct notifier_block *nfb,
                                unsigned long action, void *hcpu)
@@ -7344,10 +7443,8 @@ void __init sched_init_smp(void)
        mutex_unlock(&sched_domains_mutex);
        put_online_cpus();
 
-#ifndef CONFIG_CPUSETS
-       /* XXX: Theoretical race here - CPU may be hotplugged now */
-       hotcpu_notifier(update_sched_domains, 0);
-#endif
+       hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
+       hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
 
        /* RT runtime code needs to handle some hotplug events */
        hotcpu_notifier(update_runtime, 0);
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
new file mode 100644 (file)
index 0000000..af040ba
--- /dev/null
@@ -0,0 +1,16 @@
+/*
+ * kernel/workqueue_sched.h
+ *
+ * Scheduler hooks for concurrency managed workqueue.  Only to be
+ * included from sched.c and workqueue.c.
+ */
+static inline void wq_worker_waking_up(struct task_struct *task,
+                                      unsigned int cpu)
+{
+}
+
+static inline struct task_struct *wq_worker_sleeping(struct task_struct *task,
+                                                    unsigned int cpu)
+{
+       return NULL;
+}