Merge branch 'sched-wq' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq into...

author Ingo Molnar <mingo@elte.hu>

Tue, 8 Jun 2010 21:20:59 +0000 (23:20 +0200)

committer Ingo Molnar <mingo@elte.hu>

Tue, 8 Jun 2010 21:20:59 +0000 (23:20 +0200)
author Ingo Molnar <mingo@elte.hu>
Tue, 8 Jun 2010 21:20:59 +0000 (23:20 +0200)
committer Ingo Molnar <mingo@elte.hu>
Tue, 8 Jun 2010 21:20:59 +0000 (23:20 +0200)
diff --combined kernel/sched.c

index 2aaceebd484cade22510b04fc1d561db08acd303,edd5a54b95da66813b617c83fecb06fd6410b084..8f351c56567f81c538cc2885d07303a8aa1e1b34
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -77,6 -77,7 +77,7 @@@
   #include <asm/irq_regs.h>
   
   #include "sched_cpupri.h"
+ #include "workqueue_sched.h"
   
   #define CREATE_TRACE_POINTS
   #include <trace/events/sched.h>
@@@ -306,6 -307,52 +307,6 @@@ static int init_task_group_load = INIT_
    */
   struct task_group init_task_group;
   
- -/* return group to which a task belongs */
- -static inline struct task_group *task_group(struct task_struct *p)
- -{
- -      struct task_group *tg;
- -
- -#ifdef CONFIG_CGROUP_SCHED
- -      tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
- -                              struct task_group, css);
- -#else
- -      tg = &init_task_group;
- -#endif
- -      return tg;
- -}
- -
- -/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
- -static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
- -{
- -      /*
- -       * Strictly speaking this rcu_read_lock() is not needed since the
- -       * task_group is tied to the cgroup, which in turn can never go away
- -       * as long as there are tasks attached to it.
- -       *
- -       * However since task_group() uses task_subsys_state() which is an
- -       * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
- -       */
- -      rcu_read_lock();
- -#ifdef CONFIG_FAIR_GROUP_SCHED
- -      p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
- -      p->se.parent = task_group(p)->se[cpu];
- -#endif
- -
- -#ifdef CONFIG_RT_GROUP_SCHED
- -      p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
- -      p->rt.parent = task_group(p)->rt_se[cpu];
- -#endif
- -      rcu_read_unlock();
- -}
- -
- -#else
- -
- -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
- -static inline struct task_group *task_group(struct task_struct *p)
- -{
- -      return NULL;
- -}
- -
   #endif        /* CONFIG_CGROUP_SCHED */
   
   /* CFS-related fields in a runqueue */
@@@ -598,49 -645,6 +599,49 @@@ static inline int cpu_of(struct rq *rq
   #define cpu_curr(cpu)         (cpu_rq(cpu)->curr)
   #define raw_rq()              (&__raw_get_cpu_var(runqueues))
   
+ +#ifdef CONFIG_CGROUP_SCHED
+ +
+ +/*
+ + * Return the group to which this tasks belongs.
+ + *
+ + * We use task_subsys_state_check() and extend the RCU verification
+ + * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
+ + * holds that lock for each task it moves into the cgroup. Therefore
+ + * by holding that lock, we pin the task to the current cgroup.
+ + */
+ +static inline struct task_group *task_group(struct task_struct *p)
+ +{
+ +      struct cgroup_subsys_state *css;
+ +
+ +      css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+ +                      lockdep_is_held(&task_rq(p)->lock));
+ +      return container_of(css, struct task_group, css);
+ +}
+ +
+ +/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+ +static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
+ +{
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
+ +      p->se.parent = task_group(p)->se[cpu];
+ +#endif
+ +
+ +#ifdef CONFIG_RT_GROUP_SCHED
+ +      p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
+ +      p->rt.parent = task_group(p)->rt_se[cpu];
+ +#endif
+ +}
+ +
+ +#else /* CONFIG_CGROUP_SCHED */
+ +
+ +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+ +static inline struct task_group *task_group(struct task_struct *p)
+ +{
+ +      return NULL;
+ +}
+ +
+ +#endif /* CONFIG_CGROUP_SCHED */
+ +
   inline void update_rq_clock(struct rq *rq)
   {
         if (!rq->skip_clock_update)
@@@ -2264,11 -2268,55 +2265,55 @@@ static void update_avg(u64 *avg, u64 sa
   }
   #endif
   
- /***
+ static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
+                                bool is_sync, bool is_migrate, bool is_local,
+                                unsigned long en_flags)
+ {
+       schedstat_inc(p, se.statistics.nr_wakeups);
+       if (is_sync)
+               schedstat_inc(p, se.statistics.nr_wakeups_sync);
+       if (is_migrate)
+               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+       if (is_local)
+               schedstat_inc(p, se.statistics.nr_wakeups_local);
+       else
+               schedstat_inc(p, se.statistics.nr_wakeups_remote);
+ 
+       activate_task(rq, p, en_flags);
+ }
+ 
+ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
+                                       int wake_flags, bool success)
+ {
+       trace_sched_wakeup(p, success);
+       check_preempt_curr(rq, p, wake_flags);
+ 
+       p->state = TASK_RUNNING;
+ #ifdef CONFIG_SMP
+       if (p->sched_class->task_woken)
+               p->sched_class->task_woken(rq, p);
+ 
+       if (unlikely(rq->idle_stamp)) {
+               u64 delta = rq->clock - rq->idle_stamp;
+               u64 max = 2*sysctl_sched_migration_cost;
+ 
+               if (delta > max)
+                       rq->avg_idle = max;
+               else
+                       update_avg(&rq->avg_idle, delta);
+               rq->idle_stamp = 0;
+       }
+ #endif
+       /* if a worker is waking up, notify workqueue */
+       if ((p->flags & PF_WQ_WORKER) && success)
+               wq_worker_waking_up(p, cpu_of(rq));
+ }
+ 
+ /**
    * try_to_wake_up - wake up a thread
-  * @p: the to-be-woken-up thread
+  * @p: the thread to be awakened
    * @state: the mask of task states that can be woken
-  * @sync: do a synchronous wakeup?
+  * @wake_flags: wake modifier flags (WF_*)
    *
    * Put it on the run-queue if it's not already there. The "current"
    * thread is always on the run-queue (except when the actual
@@@ -2276,7 -2324,8 +2321,8 @@@
    * the simpler "current->state = TASK_RUNNING" to mark yourself
    * runnable without the overhead of this.
    *
-  * returns failure only if the task is already active.
+  * Returns %true if @p was woken up, %false if it was already running
+  * or @state didn't match @p's state.
    */
   static int try_to_wake_up(struct task_struct *p, unsigned int state,
                           int wake_flags)
@@@ -2356,38 -2405,11 +2402,11 @@@
   
   out_activate:
   #endif /* CONFIG_SMP */
-       schedstat_inc(p, se.statistics.nr_wakeups);
-       if (wake_flags & WF_SYNC)
-               schedstat_inc(p, se.statistics.nr_wakeups_sync);
-       if (orig_cpu != cpu)
-               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-       if (cpu == this_cpu)
-               schedstat_inc(p, se.statistics.nr_wakeups_local);
-       else
-               schedstat_inc(p, se.statistics.nr_wakeups_remote);
-       activate_task(rq, p, en_flags);
+       ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
+                     cpu == this_cpu, en_flags);
         success = 1;
- 
   out_running:
-       trace_sched_wakeup(p, success);
-       check_preempt_curr(rq, p, wake_flags);
- 
-       p->state = TASK_RUNNING;
- #ifdef CONFIG_SMP
-       if (p->sched_class->task_woken)
-               p->sched_class->task_woken(rq, p);
- 
-       if (unlikely(rq->idle_stamp)) {
-               u64 delta = rq->clock - rq->idle_stamp;
-               u64 max = 2*sysctl_sched_migration_cost;
- 
-               if (delta > max)
-                       rq->avg_idle = max;
-               else
-                       update_avg(&rq->avg_idle, delta);
-               rq->idle_stamp = 0;
-       }
- #endif
+       ttwu_post_activation(p, rq, wake_flags, success);
   out:
         task_rq_unlock(rq, &flags);
         put_cpu();
@@@ -2395,6 -2417,37 +2414,37 @@@
         return success;
   }
   
+ /**
+  * try_to_wake_up_local - try to wake up a local task with rq lock held
+  * @p: the thread to be awakened
+  *
+  * Put @p on the run-queue if it's not alredy there.  The caller must
+  * ensure that this_rq() is locked, @p is bound to this_rq() and not
+  * the current task.  this_rq() stays locked over invocation.
+  */
+ static void try_to_wake_up_local(struct task_struct *p)
+ {
+       struct rq *rq = task_rq(p);
+       bool success = false;
+ 
+       BUG_ON(rq != this_rq());
+       BUG_ON(p == current);
+       lockdep_assert_held(&rq->lock);
+ 
+       if (!(p->state & TASK_NORMAL))
+               return;
+ 
+       if (!p->se.on_rq) {
+               if (likely(!task_running(rq, p))) {
+                       schedstat_inc(rq, ttwu_count);
+                       schedstat_inc(rq, ttwu_local);
+               }
+               ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
+               success = true;
+       }
+       ttwu_post_activation(p, rq, 0, success);
+ }
+ 
   /**
    * wake_up_process - Wake up a specific process
    * @p: The process to be woken up.
@@@ -3600,10 -3653,24 +3650,24 @@@ need_resched_nonpreemptible
         clear_tsk_need_resched(prev);
   
         if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-               if (unlikely(signal_pending_state(prev->state, prev)))
+               if (unlikely(signal_pending_state(prev->state, prev))) {
                         prev->state = TASK_RUNNING;
-               else
+               } else {
+                       /*
+                        * If a worker is going to sleep, notify and
+                        * ask workqueue whether it wants to wake up a
+                        * task to maintain concurrency.  If so, wake
+                        * up the task.
+                        */
+                       if (prev->flags & PF_WQ_WORKER) {
+                               struct task_struct *to_wakeup;
+ 
+                               to_wakeup = wq_worker_sleeping(prev, cpu);
+                               if (to_wakeup)
+                                       try_to_wake_up_local(to_wakeup);
+                       }
                         deactivate_task(rq, prev, DEQUEUE_SLEEP);
+               }
                 switch_count = &prev->nvcsw;
         }
   
@@@ -4462,6 -4529,16 +4526,6 @@@ recheck
         }
   
         if (user) {
- -#ifdef CONFIG_RT_GROUP_SCHED
- -              /*
- -               * Do not allow realtime tasks into groups that have no runtime
- -               * assigned.
- -               */
- -              if (rt_bandwidth_enabled() && rt_policy(policy) &&
- -                              task_group(p)->rt_bandwidth.rt_runtime == 0)
- -                      return -EPERM;
- -#endif
- -
                 retval = security_task_setscheduler(p, policy, param);
                 if (retval)
                         return retval;
@@@ -4477,22 -4554,6 +4541,22 @@@
          * runqueue lock must be held.
          */
         rq = __task_rq_lock(p);
+ +
+ +#ifdef CONFIG_RT_GROUP_SCHED
+ +      if (user) {
+ +              /*
+ +               * Do not allow realtime tasks into groups that have no runtime
+ +               * assigned.
+ +               */
+ +              if (rt_bandwidth_enabled() && rt_policy(policy) &&
+ +                              task_group(p)->rt_bandwidth.rt_runtime == 0) {
+ +                      __task_rq_unlock(rq);
+ +                      raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ +                      return -EPERM;
+ +              }
+ +      }
+ +#endif
+ +
         /* recheck policy now with rq lock held */
         if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
                 policy = oldpolicy = -1;
@@@ -5804,20 -5865,49 +5868,49 @@@ migration_call(struct notifier_block *n
    */
   static struct notifier_block __cpuinitdata migration_notifier = {
         .notifier_call = migration_call,
-       .priority = 10
+       .priority = CPU_PRI_MIGRATION,
   };
   
+ static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+                                     unsigned long action, void *hcpu)
+ {
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+               set_cpu_active((long)hcpu, true);
+               return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+ }
+ 
+ static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+                                       unsigned long action, void *hcpu)
+ {
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DOWN_PREPARE:
+               set_cpu_active((long)hcpu, false);
+               return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+ }
+ 
   static int __init migration_init(void)
   {
         void *cpu = (void *)(long)smp_processor_id();
         int err;
   
-       /* Start one for the boot CPU: */
+       /* Initialize migration for the boot CPU */
         err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
         BUG_ON(err == NOTIFY_BAD);
         migration_call(&migration_notifier, CPU_ONLINE, cpu);
         register_cpu_notifier(&migration_notifier);
   
+       /* Register cpu active notifiers */
+       cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
+       cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
+ 
         return 0;
   }
   early_initcall(migration_init);
@@@ -7276,29 -7366,35 +7369,35 @@@ int __init sched_create_sysfs_power_sav
   }
   #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
   
- #ifndef CONFIG_CPUSETS
   /*
-  * Add online and remove offline CPUs from the scheduler domains.
-  * When cpusets are enabled they take over this function.
+  * Update cpusets according to cpu_active mask.  If cpusets are
+  * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+  * around partition_sched_domains().
    */
- static int update_sched_domains(struct notifier_block *nfb,
-                               unsigned long action, void *hcpu)
+ static int __cpuexit cpuset_cpu_active(struct notifier_block *nfb,
+                                      unsigned long action, void *hcpu)
   {
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
         case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
         case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-               partition_sched_domains(1, NULL, NULL);
+               cpuset_update_active_cpus();
                 return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+ }
   
+ static int __cpuexit cpuset_cpu_inactive(struct notifier_block *nfb,
+                                        unsigned long action, void *hcpu)
+ {
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DOWN_PREPARE:
+               cpuset_update_active_cpus();
+               return NOTIFY_OK;
         default:
                 return NOTIFY_DONE;
         }
   }
- #endif
   
   static int update_runtime(struct notifier_block *nfb,
                                 unsigned long action, void *hcpu)
@@@ -7344,10 -7440,8 +7443,8 @@@ void __init sched_init_smp(void
         mutex_unlock(&sched_domains_mutex);
         put_online_cpus();
   
- #ifndef CONFIG_CPUSETS
-       /* XXX: Theoretical race here - CPU may be hotplugged now */
-       hotcpu_notifier(update_sched_domains, 0);
- #endif
+       hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
+       hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
   
         /* RT runtime code needs to handle some hotplug events */
         hotcpu_notifier(update_runtime, 0);
author	Ingo Molnar <mingo@elte.hu>
	Tue, 8 Jun 2010 21:20:59 +0000 (23:20 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Tue, 8 Jun 2010 21:20:59 +0000 (23:20 +0200)