Merge branch 'linus' into sched/core

author Ingo Molnar <mingo@elte.hu>

Thu, 15 Apr 2010 07:35:24 +0000 (09:35 +0200)

committer Ingo Molnar <mingo@elte.hu>

Thu, 15 Apr 2010 07:36:16 +0000 (09:36 +0200)
author Ingo Molnar <mingo@elte.hu>
Thu, 15 Apr 2010 07:35:24 +0000 (09:35 +0200)
committer Ingo Molnar <mingo@elte.hu>
Thu, 15 Apr 2010 07:36:16 +0000 (09:36 +0200)
diff --combined kernel/cpu.c

index 8d340faac380f6c9c1643fd11b567a560cb7675a,25bba73b1be3a67fe1ce0f932f9d833c81b245f5..914aedcde849a2d577dcfae1a670a3e6c83877e0
--- 1/kernel/cpu.c
--- 2/kernel/cpu.c
+++ b/kernel/cpu.c
@@@ -14,6 -14,7 +14,7 @@@
   #include <linux/kthread.h>
   #include <linux/stop_machine.h>
   #include <linux/mutex.h>
+ #include <linux/gfp.h>
   
   #ifdef CONFIG_SMP
   /* Serializes the updates to cpu_online_mask, cpu_present_mask */
@@@ -163,7 -164,6 +164,7 @@@ static inline void check_for_tasks(int 
   }
   
   struct take_cpu_down_param {
+ +      struct task_struct *caller;
         unsigned long mod;
         void *hcpu;
   };
@@@ -172,7 -172,6 +173,7 @@@
   static int __ref take_cpu_down(void *_param)
   {
         struct take_cpu_down_param *param = _param;
+ +      unsigned int cpu = (unsigned long)param->hcpu;
         int err;
   
         /* Ensure this CPU doesn't handle any more interrupts. */
@@@ -183,8 -182,6 +184,8 @@@
         raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
                                 param->hcpu);
   
+ +      if (task_cpu(param->caller) == cpu)
+ +              move_task_off_dead_cpu(cpu, param->caller);
         /* Force idle task to run as soon as we yield: it should
            immediately notice cpu is offline and die quickly. */
         sched_idle_next();
@@@ -195,10 -192,10 +196,10 @@@
   static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
   {
         int err, nr_calls = 0;
- -      cpumask_var_t old_allowed;
         void *hcpu = (void *)(long)cpu;
         unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
         struct take_cpu_down_param tcd_param = {
+ +              .caller = current,
                 .mod = mod,
                 .hcpu = hcpu,
         };
@@@ -209,6 -206,9 +210,6 @@@
         if (!cpu_online(cpu))
                 return -EINVAL;
   
- -      if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
- -              return -ENOMEM;
- -
         cpu_hotplug_begin();
         set_cpu_active(cpu, false);
         err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@@ -225,6 -225,10 +226,6 @@@
                 goto out_release;
         }
   
- -      /* Ensure that we are not runnable on dying cpu */
- -      cpumask_copy(old_allowed, &current->cpus_allowed);
- -      set_cpus_allowed_ptr(current, cpu_active_mask);
- -
         err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
         if (err) {
                 set_cpu_active(cpu, true);
@@@ -233,7 -237,7 +234,7 @@@
                                             hcpu) == NOTIFY_BAD)
                         BUG();
   
- -              goto out_allowed;
+ +              goto out_release;
         }
         BUG_ON(cpu_online(cpu));
   
@@@ -251,6 -255,8 +252,6 @@@
   
         check_for_tasks(cpu);
   
- -out_allowed:
- -      set_cpus_allowed_ptr(current, old_allowed);
   out_release:
         cpu_hotplug_done();
         if (!err) {
@@@ -258,6 -264,7 +259,6 @@@
                                             hcpu) == NOTIFY_BAD)
                         BUG();
         }
- -      free_cpumask_var(old_allowed);
         return err;
   }
   
diff --combined kernel/cred.c

index 19d3ccce3d4d7e5d6d20c47b07c96408ab8dd87d,e1dbe9eef800b8be745650d095334df6be4ab433..4f483be5944cc96b12e656b98f50019822b43afc
--- 1/kernel/cred.c
--- 2/kernel/cred.c
+++ b/kernel/cred.c
@@@ -10,12 -10,14 +10,13 @@@
    */
   #include <linux/module.h>
   #include <linux/cred.h>
+ #include <linux/slab.h>
   #include <linux/sched.h>
   #include <linux/key.h>
   #include <linux/keyctl.h>
   #include <linux/init_task.h>
   #include <linux/security.h>
   #include <linux/cn_proc.h>
- -#include "cred-internals.h"
   
   #if 0
   #define kdebug(FMT, ...) \
@@@ -556,6 -558,8 +557,6 @@@ int commit_creds(struct cred *new
                 atomic_dec(&old->user->processes);
         alter_cred_subscribers(old, -2);
   
- -      sched_switch_user(task);
- -
         /* send notifications */
         if (new->uid   != old->uid  ||
             new->euid  != old->euid ||
diff --combined kernel/exit.c

index 84dc4b294e47c57f583d18fffdf16bc0f9379e4f,7f2683a10ac40d2682a4a400091ec61bd2131739..eabca5a73a85b70d8d1d9f2ea95c05ae6c27c1e2
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -55,6 -55,7 +55,6 @@@
   #include <asm/unistd.h>
   #include <asm/pgtable.h>
   #include <asm/mmu_context.h>
- -#include "cred-internals.h"
   
   static void exit_mm(struct task_struct * tsk);
   
@@@ -952,7 -953,8 +952,8 @@@ NORET_TYPE void do_exit(long code
   
         acct_update_integrals(tsk);
         /* sync mm's RSS info before statistics gathering */
-       sync_mm_rss(tsk, tsk->mm);
+       if (tsk->mm)
+               sync_mm_rss(tsk, tsk->mm);
         group_dead = atomic_dec_and_test(&tsk->signal->live);
         if (group_dead) {
                 hrtimer_cancel(&tsk->signal->real_timer);
diff --combined kernel/sched.c

index 4a57e96dd6c78e8de4fd237428a0173fcfe795df,6af210a7de70d394015617863ee303059c9dd4d6..ab562ae4007c360be41bf3688f44af1c1e759cf4
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -71,6 -71,7 +71,7 @@@
   #include <linux/debugfs.h>
   #include <linux/ctype.h>
   #include <linux/ftrace.h>
+ #include <linux/slab.h>
   
   #include <asm/tlb.h>
   #include <asm/irq_regs.h>
@@@ -492,11 -493,8 +493,11 @@@ struct rq 
         #define CPU_LOAD_IDX_MAX 5
         unsigned long cpu_load[CPU_LOAD_IDX_MAX];
   #ifdef CONFIG_NO_HZ
+ +      u64 nohz_stamp;
         unsigned char in_nohz_recently;
   #endif
+ +      unsigned int skip_clock_update;
+ +
         /* capture load from *all* tasks on this cpu: */
         struct load_weight load;
         unsigned long nr_load_updates;
@@@ -594,13 -592,6 +595,13 @@@ static inlin
   void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
   {
         rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+ +
+ +      /*
+ +       * A queue event has occurred, and we're going to schedule.  In
+ +       * this case, we can save a useless back to back clock update.
+ +       */
+ +      if (test_tsk_need_resched(p))
+ +              rq->skip_clock_update = 1;
   }
   
   static inline int cpu_of(struct rq *rq)
@@@ -635,8 -626,7 +636,8 @@@
   
   inline void update_rq_clock(struct rq *rq)
   {
- -      rq->clock = sched_clock_cpu(cpu_of(rq));
+ +      if (!rq->skip_clock_update)
+ +              rq->clock = sched_clock_cpu(cpu_of(rq));
   }
   
   /*
@@@ -914,12 -904,16 +915,12 @@@ static inline void finish_lock_switch(s
   #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
   
   /*
- - * Check whether the task is waking, we use this to synchronize against
- - * ttwu() so that task_cpu() reports a stable number.
- - *
- - * We need to make an exception for PF_STARTING tasks because the fork
- - * path might require task_rq_lock() to work, eg. it can call
- - * set_cpus_allowed_ptr() from the cpuset clone_ns code.
+ + * Check whether the task is waking, we use this to synchronize ->cpus_allowed
+ + * against ttwu().
    */
   static inline int task_is_waking(struct task_struct *p)
   {
- -      return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
+ +      return unlikely(p->state == TASK_WAKING);
   }
   
   /*
@@@ -932,9 -926,11 +933,9 @@@ static inline struct rq *__task_rq_lock
         struct rq *rq;
   
         for (;;) {
- -              while (task_is_waking(p))
- -                      cpu_relax();
                 rq = task_rq(p);
                 raw_spin_lock(&rq->lock);
- -              if (likely(rq == task_rq(p) && !task_is_waking(p)))
+ +              if (likely(rq == task_rq(p)))
                         return rq;
                 raw_spin_unlock(&rq->lock);
         }
@@@ -951,10 -947,12 +952,10 @@@ static struct rq *task_rq_lock(struct t
         struct rq *rq;
   
         for (;;) {
- -              while (task_is_waking(p))
- -                      cpu_relax();
                 local_irq_save(*flags);
                 rq = task_rq(p);
                 raw_spin_lock(&rq->lock);
- -              if (likely(rq == task_rq(p) && !task_is_waking(p)))
+ +              if (likely(rq == task_rq(p)))
                         return rq;
                 raw_spin_unlock_irqrestore(&rq->lock, *flags);
         }
@@@ -1231,17 -1229,6 +1232,17 @@@ void wake_up_idle_cpu(int cpu
         if (!tsk_is_polling(rq->idle))
                 smp_send_reschedule(cpu);
   }
+ +
+ +int nohz_ratelimit(int cpu)
+ +{
+ +      struct rq *rq = cpu_rq(cpu);
+ +      u64 diff = rq->clock - rq->nohz_stamp;
+ +
+ +      rq->nohz_stamp = rq->clock;
+ +
+ +      return diff < (NSEC_PER_SEC / HZ) >> 1;
+ +}
+ +
   #endif /* CONFIG_NO_HZ */
   
   static u64 sched_avg_period(void)
@@@ -1784,6 -1771,8 +1785,6 @@@ static void double_rq_lock(struct rq *r
                         raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
                 }
         }
- -      update_rq_clock(rq1);
- -      update_rq_clock(rq2);
   }
   
   /*
@@@ -1877,43 -1866,56 +1878,43 @@@ static void update_avg(u64 *avg, u64 sa
         *avg += diff >> 3;
   }
   
- -static void
- -enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ +static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
   {
- -      if (wakeup)
- -              p->se.start_runtime = p->se.sum_exec_runtime;
- -
+ +      update_rq_clock(rq);
         sched_info_queued(p);
- -      p->sched_class->enqueue_task(rq, p, wakeup, head);
+ +      p->sched_class->enqueue_task(rq, p, flags);
         p->se.on_rq = 1;
   }
   
- -static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
+ +static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
   {
- -      if (sleep) {
- -              if (p->se.last_wakeup) {
- -                      update_avg(&p->se.avg_overlap,
- -                              p->se.sum_exec_runtime - p->se.last_wakeup);
- -                      p->se.last_wakeup = 0;
- -              } else {
- -                      update_avg(&p->se.avg_wakeup,
- -                              sysctl_sched_wakeup_granularity);
- -              }
- -      }
- -
+ +      update_rq_clock(rq);
         sched_info_dequeued(p);
- -      p->sched_class->dequeue_task(rq, p, sleep);
+ +      p->sched_class->dequeue_task(rq, p, flags);
         p->se.on_rq = 0;
   }
   
   /*
    * activate_task - move a task to the runqueue.
    */
- -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+ +static void activate_task(struct rq *rq, struct task_struct *p, int flags)
   {
         if (task_contributes_to_load(p))
                 rq->nr_uninterruptible--;
   
- -      enqueue_task(rq, p, wakeup, false);
+ +      enqueue_task(rq, p, flags);
         inc_nr_running(rq);
   }
   
   /*
    * deactivate_task - remove a task from the runqueue.
    */
- -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
+ +static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
   {
         if (task_contributes_to_load(p))
                 rq->nr_uninterruptible++;
   
- -      dequeue_task(rq, p, sleep);
+ +      dequeue_task(rq, p, flags);
         dec_nr_running(rq);
   }
   
@@@ -2271,9 -2273,6 +2272,9 @@@ void task_oncpu_function_call(struct ta
   }
   
   #ifdef CONFIG_SMP
+ +/*
+ + * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
+ + */
   static int select_fallback_rq(int cpu, struct task_struct *p)
   {
         int dest_cpu;
@@@ -2290,8 -2289,12 +2291,8 @@@
                 return dest_cpu;
   
         /* No more Mr. Nice Guy. */
- -      if (dest_cpu >= nr_cpu_ids) {
- -              rcu_read_lock();
- -              cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
- -              rcu_read_unlock();
- -              dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
- -
+ +      if (unlikely(dest_cpu >= nr_cpu_ids)) {
+ +              dest_cpu = cpuset_cpus_allowed_fallback(p);
                 /*
                  * Don't tell them about moving exiting tasks or
                  * kernel threads (both mm NULL), since they never
@@@ -2308,12 -2311,17 +2309,12 @@@
   }
   
   /*
- - * Gets called from 3 sites (exec, fork, wakeup), since it is called without
- - * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
- - * by:
- - *
- - *  exec:           is unstable, retry loop
- - *  fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
+ + * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
    */
   static inline
- -int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+ +int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
   {
- -      int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+ +      int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
   
         /*
          * In order not to call set_task_cpu() on a blocking task we need
@@@ -2352,13 -2360,16 +2353,13 @@@ static int try_to_wake_up(struct task_s
   {
         int cpu, orig_cpu, this_cpu, success = 0;
         unsigned long flags;
+ +      unsigned long en_flags = ENQUEUE_WAKEUP;
         struct rq *rq;
   
- -      if (!sched_feat(SYNC_WAKEUPS))
- -              wake_flags &= ~WF_SYNC;
- -
         this_cpu = get_cpu();
   
         smp_wmb();
         rq = task_rq_lock(p, &flags);
- -      update_rq_clock(rq);
         if (!(p->state & state))
                 goto out;
   
@@@ -2378,26 -2389,28 +2379,26 @@@
          *
          * First fix up the nr_uninterruptible count:
          */
- -      if (task_contributes_to_load(p))
- -              rq->nr_uninterruptible--;
+ +      if (task_contributes_to_load(p)) {
+ +              if (likely(cpu_online(orig_cpu)))
+ +                      rq->nr_uninterruptible--;
+ +              else
+ +                      this_rq()->nr_uninterruptible--;
+ +      }
         p->state = TASK_WAKING;
   
- -      if (p->sched_class->task_waking)
+ +      if (p->sched_class->task_waking) {
                 p->sched_class->task_waking(rq, p);
+ +              en_flags |= ENQUEUE_WAKING;
+ +      }
   
- -      __task_rq_unlock(rq);
- -
- -      cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
- -      if (cpu != orig_cpu) {
- -              /*
- -               * Since we migrate the task without holding any rq->lock,
- -               * we need to be careful with task_rq_lock(), since that
- -               * might end up locking an invalid rq.
- -               */
+ +      cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
+ +      if (cpu != orig_cpu)
                 set_task_cpu(p, cpu);
- -      }
+ +      __task_rq_unlock(rq);
   
         rq = cpu_rq(cpu);
         raw_spin_lock(&rq->lock);
- -      update_rq_clock(rq);
   
         /*
          * We migrated the task without holding either rq->lock, however
@@@ -2425,18 -2438,34 +2426,18 @@@
   
   out_activate:
   #endif /* CONFIG_SMP */
- -      schedstat_inc(p, se.nr_wakeups);
+ +      schedstat_inc(p, se.statistics.nr_wakeups);
         if (wake_flags & WF_SYNC)
- -              schedstat_inc(p, se.nr_wakeups_sync);
+ +              schedstat_inc(p, se.statistics.nr_wakeups_sync);
         if (orig_cpu != cpu)
- -              schedstat_inc(p, se.nr_wakeups_migrate);
+ +              schedstat_inc(p, se.statistics.nr_wakeups_migrate);
         if (cpu == this_cpu)
- -              schedstat_inc(p, se.nr_wakeups_local);
+ +              schedstat_inc(p, se.statistics.nr_wakeups_local);
         else
- -              schedstat_inc(p, se.nr_wakeups_remote);
- -      activate_task(rq, p, 1);
+ +              schedstat_inc(p, se.statistics.nr_wakeups_remote);
+ +      activate_task(rq, p, en_flags);
         success = 1;
   
- -      /*
- -       * Only attribute actual wakeups done by this task.
- -       */
- -      if (!in_interrupt()) {
- -              struct sched_entity *se = &current->se;
- -              u64 sample = se->sum_exec_runtime;
- -
- -              if (se->last_wakeup)
- -                      sample -= se->last_wakeup;
- -              else
- -                      sample -= se->start_runtime;
- -              update_avg(&se->avg_wakeup, sample);
- -
- -              se->last_wakeup = se->sum_exec_runtime;
- -      }
- -
   out_running:
         trace_sched_wakeup(rq, p, success);
         check_preempt_curr(rq, p, wake_flags);
@@@ -2498,9 -2527,42 +2499,9 @@@ static void __sched_fork(struct task_st
         p->se.sum_exec_runtime          = 0;
         p->se.prev_sum_exec_runtime     = 0;
         p->se.nr_migrations             = 0;
- -      p->se.last_wakeup               = 0;
- -      p->se.avg_overlap               = 0;
- -      p->se.start_runtime             = 0;
- -      p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
   
   #ifdef CONFIG_SCHEDSTATS
- -      p->se.wait_start                        = 0;
- -      p->se.wait_max                          = 0;
- -      p->se.wait_count                        = 0;
- -      p->se.wait_sum                          = 0;
- -
- -      p->se.sleep_start                       = 0;
- -      p->se.sleep_max                         = 0;
- -      p->se.sum_sleep_runtime                 = 0;
- -
- -      p->se.block_start                       = 0;
- -      p->se.block_max                         = 0;
- -      p->se.exec_max                          = 0;
- -      p->se.slice_max                         = 0;
- -
- -      p->se.nr_migrations_cold                = 0;
- -      p->se.nr_failed_migrations_affine       = 0;
- -      p->se.nr_failed_migrations_running      = 0;
- -      p->se.nr_failed_migrations_hot          = 0;
- -      p->se.nr_forced_migrations              = 0;
- -
- -      p->se.nr_wakeups                        = 0;
- -      p->se.nr_wakeups_sync                   = 0;
- -      p->se.nr_wakeups_migrate                = 0;
- -      p->se.nr_wakeups_local                  = 0;
- -      p->se.nr_wakeups_remote                 = 0;
- -      p->se.nr_wakeups_affine                 = 0;
- -      p->se.nr_wakeups_affine_attempts        = 0;
- -      p->se.nr_wakeups_passive                = 0;
- -      p->se.nr_wakeups_idle                   = 0;
- -
+ +      memset(&p->se.statistics, 0, sizeof(p->se.statistics));
   #endif
   
         INIT_LIST_HEAD(&p->rt.run_list);
@@@ -2521,11 -2583,11 +2522,11 @@@ void sched_fork(struct task_struct *p, 
   
         __sched_fork(p);
         /*
- -       * We mark the process as waking here. This guarantees that
+ +       * We mark the process as running here. This guarantees that
          * nobody will actually run it, and a signal or other external
          * event cannot wake it up and insert it on the runqueue either.
          */
- -      p->state = TASK_WAKING;
+ +      p->state = TASK_RUNNING;
   
         /*
          * Revert to default priority/policy on fork if requested.
@@@ -2592,25 -2654,29 +2593,25 @@@ void wake_up_new_task(struct task_struc
         int cpu __maybe_unused = get_cpu();
   
   #ifdef CONFIG_SMP
+ +      rq = task_rq_lock(p, &flags);
+ +      p->state = TASK_WAKING;
+ +
         /*
          * Fork balancing, do it here and not earlier because:
          *  - cpus_allowed can change in the fork path
          *  - any previously selected cpu might disappear through hotplug
          *
- -       * We still have TASK_WAKING but PF_STARTING is gone now, meaning
- -       * ->cpus_allowed is stable, we have preemption disabled, meaning
- -       * cpu_online_mask is stable.
+ +       * We set TASK_WAKING so that select_task_rq() can drop rq->lock
+ +       * without people poking at ->cpus_allowed.
          */
- -      cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+ +      cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
         set_task_cpu(p, cpu);
- -#endif
- -
- -      /*
- -       * Since the task is not on the rq and we still have TASK_WAKING set
- -       * nobody else will migrate this task.
- -       */
- -      rq = cpu_rq(cpu);
- -      raw_spin_lock_irqsave(&rq->lock, flags);
   
- -      BUG_ON(p->state != TASK_WAKING);
         p->state = TASK_RUNNING;
- -      update_rq_clock(rq);
+ +      task_rq_unlock(rq, &flags);
+ +#endif
+ +
+ +      rq = task_rq_lock(p, &flags);
         activate_task(rq, p, 0);
         trace_sched_wakeup_new(rq, p, 1);
         check_preempt_curr(rq, p, WF_FORK);
@@@ -3056,21 -3122,32 +3057,21 @@@ void sched_exec(void
   {
         struct task_struct *p = current;
         struct migration_req req;
- -      int dest_cpu, this_cpu;
         unsigned long flags;
         struct rq *rq;
- -
- -again:
- -      this_cpu = get_cpu();
- -      dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
- -      if (dest_cpu == this_cpu) {
- -              put_cpu();
- -              return;
- -      }
+ +      int dest_cpu;
   
         rq = task_rq_lock(p, &flags);
- -      put_cpu();
+ +      dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+ +      if (dest_cpu == smp_processor_id())
+ +              goto unlock;
   
         /*
          * select_task_rq() can race against ->cpus_allowed
          */
- -      if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
- -          || unlikely(!cpu_active(dest_cpu))) {
- -              task_rq_unlock(rq, &flags);
- -              goto again;
- -      }
- -
- -      /* force the process onto the specified CPU */
- -      if (migrate_task(p, dest_cpu, &req)) {
+ +      if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
+ +          likely(cpu_active(dest_cpu)) &&
+ +          migrate_task(p, dest_cpu, &req)) {
                 /* Need to wait for migration thread (might exit: take ref). */
                 struct task_struct *mt = rq->migration_thread;
   
@@@ -3082,7 -3159,6 +3083,7 @@@
   
                 return;
         }
+ +unlock:
         task_rq_unlock(rq, &flags);
   }
   
@@@ -3554,9 -3630,23 +3555,9 @@@ static inline void schedule_debug(struc
   
   static void put_prev_task(struct rq *rq, struct task_struct *prev)
   {
- -      if (prev->state == TASK_RUNNING) {
- -              u64 runtime = prev->se.sum_exec_runtime;
- -
- -              runtime -= prev->se.prev_sum_exec_runtime;
- -              runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
- -
- -              /*
- -               * In order to avoid avg_overlap growing stale when we are
- -               * indeed overlapping and hence not getting put to sleep, grow
- -               * the avg_overlap on preemption.
- -               *
- -               * We use the average preemption runtime because that
- -               * correlates to the amount of cache footprint a task can
- -               * build up.
- -               */
- -              update_avg(&prev->se.avg_overlap, runtime);
- -      }
+ +      if (prev->se.on_rq)
+ +              update_rq_clock(rq);
+ +      rq->skip_clock_update = 0;
         prev->sched_class->put_prev_task(rq, prev);
   }
   
@@@ -3619,13 -3709,14 +3620,13 @@@ need_resched_nonpreemptible
                 hrtick_clear(rq);
   
         raw_spin_lock_irq(&rq->lock);
- -      update_rq_clock(rq);
         clear_tsk_need_resched(prev);
   
         if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
                 if (unlikely(signal_pending_state(prev->state, prev)))
                         prev->state = TASK_RUNNING;
                 else
- -                      deactivate_task(rq, prev, 1);
+ +                      deactivate_task(rq, prev, DEQUEUE_SLEEP);
                 switch_count = &prev->nvcsw;
         }
   
@@@ -4175,6 -4266,7 +4176,6 @@@ void rt_mutex_setprio(struct task_struc
         BUG_ON(prio < 0 || prio > MAX_PRIO);
   
         rq = task_rq_lock(p, &flags);
- -      update_rq_clock(rq);
   
         oldprio = p->prio;
         prev_class = p->sched_class;
@@@ -4195,7 -4287,7 +4196,7 @@@
         if (running)
                 p->sched_class->set_curr_task(rq);
         if (on_rq) {
- -              enqueue_task(rq, p, 0, oldprio < prio);
+ +              enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
   
                 check_class_changed(rq, p, prev_class, oldprio, running);
         }
@@@ -4217,6 -4309,7 +4218,6 @@@ void set_user_nice(struct task_struct *
          * the task might be in the middle of scheduling on another CPU.
          */
         rq = task_rq_lock(p, &flags);
- -      update_rq_clock(rq);
         /*
          * The RT priorities are set via sched_setscheduler(), but we still
          * allow the 'normal' nice value to be set - but as expected
@@@ -4238,7 -4331,7 +4239,7 @@@
         delta = p->prio - old_prio;
   
         if (on_rq) {
- -              enqueue_task(rq, p, 0, false);
+ +              enqueue_task(rq, p, 0);
                 /*
                  * If the task increased its priority or is running and
                  * lowered its priority, then reschedule its CPU:
@@@ -4499,6 -4592,7 +4500,6 @@@ recheck
                 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
                 goto recheck;
         }
- -      update_rq_clock(rq);
         on_rq = p->se.on_rq;
         running = task_current(rq, p);
         if (on_rq)
@@@ -4809,7 -4903,7 +4810,7 @@@ SYSCALL_DEFINE3(sched_getaffinity, pid_
         int ret;
         cpumask_var_t mask;
   
-       if (len < nr_cpu_ids)
+       if ((len * BITS_PER_BYTE) < nr_cpu_ids)
                 return -EINVAL;
         if (len & (sizeof(unsigned long)-1))
                 return -EINVAL;
@@@ -5264,18 -5358,7 +5265,18 @@@ int set_cpus_allowed_ptr(struct task_st
         struct rq *rq;
         int ret = 0;
   
+ +      /*
+ +       * Serialize against TASK_WAKING so that ttwu() and wunt() can
+ +       * drop the rq->lock and still rely on ->cpus_allowed.
+ +       */
+ +again:
+ +      while (task_is_waking(p))
+ +              cpu_relax();
         rq = task_rq_lock(p, &flags);
+ +      if (task_is_waking(p)) {
+ +              task_rq_unlock(rq, &flags);
+ +              goto again;
+ +      }
   
         if (!cpumask_intersects(new_mask, cpu_active_mask)) {
                 ret = -EINVAL;
@@@ -5305,7 -5388,7 +5306,7 @@@
   
                 get_task_struct(mt);
                 task_rq_unlock(rq, &flags);
-               wake_up_process(rq->migration_thread);
+               wake_up_process(mt);
                 put_task_struct(mt);
                 wait_for_completion(&req.done);
                 tlb_migrate_finish(p->mm);
@@@ -5433,29 -5516,30 +5434,29 @@@ static int migration_thread(void *data
   }
   
   #ifdef CONFIG_HOTPLUG_CPU
- -
- -static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
- -{
- -      int ret;
- -
- -      local_irq_disable();
- -      ret = __migrate_task(p, src_cpu, dest_cpu);
- -      local_irq_enable();
- -      return ret;
- -}
- -
   /*
    * Figure out where task on dead CPU should go, use force if necessary.
    */
- -static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ +void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
   {
- -      int dest_cpu;
+ +      struct rq *rq = cpu_rq(dead_cpu);
+ +      int needs_cpu, uninitialized_var(dest_cpu);
+ +      unsigned long flags;
   
- -again:
- -      dest_cpu = select_fallback_rq(dead_cpu, p);
+ +      local_irq_save(flags);
   
- -      /* It can have affinity changed while we were choosing. */
- -      if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
- -              goto again;
+ +      raw_spin_lock(&rq->lock);
+ +      needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
+ +      if (needs_cpu)
+ +              dest_cpu = select_fallback_rq(dead_cpu, p);
+ +      raw_spin_unlock(&rq->lock);
+ +      /*
+ +       * It can only fail if we race with set_cpus_allowed(),
+ +       * in the racer should migrate the task anyway.
+ +       */
+ +      if (needs_cpu)
+ +              __migrate_task(p, dead_cpu, dest_cpu);
+ +      local_irq_restore(flags);
   }
   
   /*
@@@ -5519,6 -5603,7 +5520,6 @@@ void sched_idle_next(void
   
         __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
   
- -      update_rq_clock(rq);
         activate_task(rq, p, 0);
   
         raw_spin_unlock_irqrestore(&rq->lock, flags);
@@@ -5573,6 -5658,7 +5574,6 @@@ static void migrate_dead_tasks(unsigne
         for ( ; ; ) {
                 if (!rq->nr_running)
                         break;
- -              update_rq_clock(rq);
                 next = pick_next_task(rq);
                 if (!next)
                         break;
@@@ -5848,6 -5934,7 +5849,6 @@@ migration_call(struct notifier_block *n
   
         case CPU_DEAD:
         case CPU_DEAD_FROZEN:
- -              cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
                 migrate_live_tasks(cpu);
                 rq = cpu_rq(cpu);
                 kthread_stop(rq->migration_thread);
@@@ -5855,11 -5942,13 +5856,11 @@@
                 rq->migration_thread = NULL;
                 /* Idle task back to normal (off runqueue, low prio) */
                 raw_spin_lock_irq(&rq->lock);
- -              update_rq_clock(rq);
                 deactivate_task(rq, rq->idle, 0);
                 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
                 rq->idle->sched_class = &idle_sched_class;
                 migrate_dead_tasks(cpu);
                 raw_spin_unlock_irq(&rq->lock);
- -              cpuset_unlock();
                 migrate_nr_uninterruptible(rq);
                 BUG_ON(rq->nr_running != 0);
                 calc_global_load_remove(rq);
@@@ -7803,6 -7892,7 +7804,6 @@@ static void normalize_task(struct rq *r
   {
         int on_rq;
   
- -      update_rq_clock(rq);
         on_rq = p->se.on_rq;
         if (on_rq)
                 deactivate_task(rq, p, 0);
@@@ -7829,9 -7919,9 +7830,9 @@@ void normalize_rt_tasks(void
   
                 p->se.exec_start                = 0;
   #ifdef CONFIG_SCHEDSTATS
- -              p->se.wait_start                = 0;
- -              p->se.sleep_start               = 0;
- -              p->se.block_start               = 0;
+ +              p->se.statistics.wait_start     = 0;
+ +              p->se.statistics.sleep_start    = 0;
+ +              p->se.statistics.block_start    = 0;
   #endif
   
                 if (!rt_task(p)) {
@@@ -8164,6 -8254,8 +8165,6 @@@ void sched_move_task(struct task_struc
   
         rq = task_rq_lock(tsk, &flags);
   
- -      update_rq_clock(rq);
- -
         running = task_current(rq, tsk);
         on_rq = tsk->se.on_rq;
   
@@@ -8182,7 -8274,7 +8183,7 @@@
         if (unlikely(running))
                 tsk->sched_class->set_curr_task(rq);
         if (on_rq)
- -              enqueue_task(rq, tsk, 0, false);
+ +              enqueue_task(rq, tsk, 0);
   
         task_rq_unlock(rq, &flags);
   }
diff --combined kernel/sched_debug.c

index 0932c5c45b34051af474c6d24c4c48e92a2d293a,9b49db1440372bd6227e7e4258a0a6d78903e230..9cf1baf6616af14959b30f00357bbaef09b4a817
--- 1/kernel/sched_debug.c
--- 2/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@@ -70,16 -70,16 +70,16 @@@ static void print_cfs_group_stats(struc
         PN(se->vruntime);
         PN(se->sum_exec_runtime);
   #ifdef CONFIG_SCHEDSTATS
- -      PN(se->wait_start);
- -      PN(se->sleep_start);
- -      PN(se->block_start);
- -      PN(se->sleep_max);
- -      PN(se->block_max);
- -      PN(se->exec_max);
- -      PN(se->slice_max);
- -      PN(se->wait_max);
- -      PN(se->wait_sum);
- -      P(se->wait_count);
+ +      PN(se->statistics.wait_start);
+ +      PN(se->statistics.sleep_start);
+ +      PN(se->statistics.block_start);
+ +      PN(se->statistics.sleep_max);
+ +      PN(se->statistics.block_max);
+ +      PN(se->statistics.exec_max);
+ +      PN(se->statistics.slice_max);
+ +      PN(se->statistics.wait_max);
+ +      PN(se->statistics.wait_sum);
+ +      P(se->statistics.wait_count);
   #endif
         P(se->load.weight);
   #undef PN
@@@ -104,7 -104,7 +104,7 @@@ print_task(struct seq_file *m, struct r
         SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
                 SPLIT_NS(p->se.vruntime),
                 SPLIT_NS(p->se.sum_exec_runtime),
- -              SPLIT_NS(p->se.sum_sleep_runtime));
+ +              SPLIT_NS(p->se.statistics.sum_sleep_runtime));
   #else
         SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
                 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
@@@ -173,6 -173,11 +173,6 @@@ void print_cfs_rq(struct seq_file *m, i
         task_group_path(tg, path, sizeof(path));
   
         SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
- -#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
- -      {
- -              uid_t uid = cfs_rq->tg->uid;
- -              SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid);
- -      }
   #else
         SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
   #endif
@@@ -402,38 -407,40 +402,38 @@@ void proc_sched_show_task(struct task_s
         PN(se.exec_start);
         PN(se.vruntime);
         PN(se.sum_exec_runtime);
- -      PN(se.avg_overlap);
- -      PN(se.avg_wakeup);
   
         nr_switches = p->nvcsw + p->nivcsw;
   
   #ifdef CONFIG_SCHEDSTATS
- -      PN(se.wait_start);
- -      PN(se.sleep_start);
- -      PN(se.block_start);
- -      PN(se.sleep_max);
- -      PN(se.block_max);
- -      PN(se.exec_max);
- -      PN(se.slice_max);
- -      PN(se.wait_max);
- -      PN(se.wait_sum);
- -      P(se.wait_count);
- -      PN(se.iowait_sum);
- -      P(se.iowait_count);
+ +      PN(se.statistics.wait_start);
+ +      PN(se.statistics.sleep_start);
+ +      PN(se.statistics.block_start);
+ +      PN(se.statistics.sleep_max);
+ +      PN(se.statistics.block_max);
+ +      PN(se.statistics.exec_max);
+ +      PN(se.statistics.slice_max);
+ +      PN(se.statistics.wait_max);
+ +      PN(se.statistics.wait_sum);
+ +      P(se.statistics.wait_count);
+ +      PN(se.statistics.iowait_sum);
+ +      P(se.statistics.iowait_count);
         P(sched_info.bkl_count);
         P(se.nr_migrations);
- -      P(se.nr_migrations_cold);
- -      P(se.nr_failed_migrations_affine);
- -      P(se.nr_failed_migrations_running);
- -      P(se.nr_failed_migrations_hot);
- -      P(se.nr_forced_migrations);
- -      P(se.nr_wakeups);
- -      P(se.nr_wakeups_sync);
- -      P(se.nr_wakeups_migrate);
- -      P(se.nr_wakeups_local);
- -      P(se.nr_wakeups_remote);
- -      P(se.nr_wakeups_affine);
- -      P(se.nr_wakeups_affine_attempts);
- -      P(se.nr_wakeups_passive);
- -      P(se.nr_wakeups_idle);
+ +      P(se.statistics.nr_migrations_cold);
+ +      P(se.statistics.nr_failed_migrations_affine);
+ +      P(se.statistics.nr_failed_migrations_running);
+ +      P(se.statistics.nr_failed_migrations_hot);
+ +      P(se.statistics.nr_forced_migrations);
+ +      P(se.statistics.nr_wakeups);
+ +      P(se.statistics.nr_wakeups_sync);
+ +      P(se.statistics.nr_wakeups_migrate);
+ +      P(se.statistics.nr_wakeups_local);
+ +      P(se.statistics.nr_wakeups_remote);
+ +      P(se.statistics.nr_wakeups_affine);
+ +      P(se.statistics.nr_wakeups_affine_attempts);
+ +      P(se.statistics.nr_wakeups_passive);
+ +      P(se.statistics.nr_wakeups_idle);
   
         {
                 u64 avg_atom, avg_per_cpu;
@@@ -484,10 -491,31 +484,6 @@@
   void proc_sched_set_task(struct task_struct *p)
   {
   #ifdef CONFIG_SCHEDSTATS
- -      p->se.wait_max                          = 0;
- -      p->se.wait_sum                          = 0;
- -      p->se.wait_count                        = 0;
- -      p->se.iowait_sum                        = 0;
- -      p->se.iowait_count                      = 0;
- -      p->se.sleep_max                         = 0;
- -      p->se.sum_sleep_runtime                 = 0;
- -      p->se.block_max                         = 0;
- -      p->se.exec_max                          = 0;
- -      p->se.slice_max                         = 0;
- -      p->se.nr_migrations                     = 0;
- -      p->se.nr_migrations_cold                = 0;
- -      p->se.nr_failed_migrations_affine       = 0;
- -      p->se.nr_failed_migrations_running      = 0;
- -      p->se.nr_failed_migrations_hot          = 0;
- -      p->se.nr_forced_migrations              = 0;
- -      p->se.nr_wakeups                        = 0;
- -      p->se.nr_wakeups_sync                   = 0;
- -      p->se.nr_wakeups_migrate                = 0;
- -      p->se.nr_wakeups_local                  = 0;
- -      p->se.nr_wakeups_remote                 = 0;
- -      p->se.nr_wakeups_affine                 = 0;
- -      p->se.nr_wakeups_affine_attempts        = 0;
- -      p->se.nr_wakeups_passive                = 0;
- -      p->se.nr_wakeups_idle                   = 0;
- -      p->sched_info.bkl_count                 = 0;
+ +      memset(&p->se.statistics, 0, sizeof(p->se.statistics));
   #endif
-       p->se.sum_exec_runtime                  = 0;
-       p->se.prev_sum_exec_runtime             = 0;
-       p->nvcsw                                = 0;
-       p->nivcsw                               = 0;
   }
author	Ingo Molnar <mingo@elte.hu>
	Thu, 15 Apr 2010 07:35:24 +0000 (09:35 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Thu, 15 Apr 2010 07:36:16 +0000 (09:36 +0200)
		1	2
kernel/cpu.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cred.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_debug.c	patch \|	diff1 \|	diff2 \|	blob \| history