Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penber...

[deliverable/linux.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 1874c7418319d302223a9e3abed29fe581af89cc..d87c6e5d4e8c0e790791908fd20929e8c7e2c7c7 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -644,7 +644,7 @@ struct rq {
  
         unsigned long cpu_power;
  
-       unsigned char idle_at_tick;
+       unsigned char idle_balance;
         /* For active balancing */
         int post_schedule;
         int active_balance;
@@ -1404,6 +1404,18 @@ void wake_up_idle_cpu(int cpu)
                 smp_send_reschedule(cpu);
  }
  
+static inline bool got_nohz_idle_kick(void)
+{
+       return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick;
+}
+
+#else /* CONFIG_NO_HZ */
+
+static inline bool got_nohz_idle_kick(void)
+{
+       return false;
+}
+
  #endif /* CONFIG_NO_HZ */
  
  static u64 sched_avg_period(void)
@@ -1883,7 +1895,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
  #ifdef CONFIG_SMP
         /*
          * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
-        * successfuly executed on another CPU. We must ensure that updates of
+        * successfully executed on another CPU. We must ensure that updates of
          * per-task data have been completed by this moment.
          */
         smp_wmb();
@@ -2532,11 +2544,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
  
         /* Look for allowed, online CPU in same node. */
         for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
-               if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+               if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
                         return dest_cpu;
  
         /* Any allowed, online CPU? */
-       dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+       dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask);
         if (dest_cpu < nr_cpu_ids)
                 return dest_cpu;
  
@@ -2573,7 +2585,7 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
          * [ this allows ->select_task() to simply return task_cpu(p) and
          *   not worry about this generic constraint ]
          */
-       if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+       if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
                      !cpu_online(cpu)))
                 cpu = select_fallback_rq(task_cpu(p), p);
  
@@ -2717,7 +2729,7 @@ static void sched_ttwu_pending(void)
  
  void scheduler_ipi(void)
  {
-       if (llist_empty(&this_rq()->wake_list))
+       if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
                 return;
  
         /*
@@ -2735,6 +2747,14 @@ void scheduler_ipi(void)
          */
         irq_enter();
         sched_ttwu_pending();
+
+       /*
+        * Check if someone kicked us for doing the nohz idle load balance.
+        */
+       if (unlikely(got_nohz_idle_kick() && !need_resched())) {
+               this_rq()->idle_balance = 1;
+               raise_softirq_irqoff(SCHED_SOFTIRQ);
+       }
         irq_exit();
  }
  
@@ -4229,7 +4249,7 @@ void scheduler_tick(void)
         perf_event_task_tick();
  
  #ifdef CONFIG_SMP
-       rq->idle_at_tick = idle_cpu(cpu);
+       rq->idle_balance = idle_cpu(cpu);
         trigger_load_balance(rq, cpu);
  #endif
  }
@@ -4326,6 +4346,7 @@ static inline void schedule_debug(struct task_struct *prev)
          */
         if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
                 __schedule_bug(prev);
+       rcu_sleep_check();
  
         profile_hit(SCHED_PROFILING, __builtin_return_address(0));
  
@@ -5138,7 +5159,20 @@ EXPORT_SYMBOL(task_nice);
   */
  int idle_cpu(int cpu)
  {
-       return cpu_curr(cpu) == cpu_rq(cpu)->idle;
+       struct rq *rq = cpu_rq(cpu);
+
+       if (rq->curr != rq->idle)
+               return 0;
+
+       if (rq->nr_running)
+               return 0;
+
+#ifdef CONFIG_SMP
+       if (!llist_empty(&rq->wake_list))
+               return 0;
+#endif
+
+       return 1;
  }
  
  /**
@@ -5988,7 +6022,7 @@ void show_state_filter(unsigned long state_filter)
         printk(KERN_INFO
                 "  task                        PC stack   pid father\n");
  #endif
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
         do_each_thread(g, p) {
                 /*
                  * reset the NMI-timeout, listing all files on a slow
@@ -6004,7 +6038,7 @@ void show_state_filter(unsigned long state_filter)
  #ifdef CONFIG_SCHED_DEBUG
         sysrq_sched_debug_show();
  #endif
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
         /*
          * Only show locks if all tasks are dumped:
          */
@@ -6067,15 +6101,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
         ftrace_graph_init_idle_task(idle, cpu);
  }
  
-/*
- * In a system that switches off the HZ timer nohz_cpu_mask
- * indicates which cpus entered this state. This is used
- * in the rcu update to wait only for active cpus. For system
- * which do not switch off the HZ timer nohz_cpu_mask should
- * always be CPU_BITS_NONE.
- */
-cpumask_var_t nohz_cpu_mask;
-
  /*
   * Increase the granularity value when there are more CPUs,
   * because with more CPUs the 'effective latency' as visible
@@ -6128,10 +6153,9 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  {
         if (p->sched_class && p->sched_class->set_cpus_allowed)
                 p->sched_class->set_cpus_allowed(p, new_mask);
-       else {
-               cpumask_copy(&p->cpus_allowed, new_mask);
-               p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
-       }
+
+       cpumask_copy(&p->cpus_allowed, new_mask);
+       p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
  }
  
  /*
@@ -6229,7 +6253,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
         if (task_cpu(p) != src_cpu)
                 goto done;
         /* Affinity changed (again). */
-       if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+       if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
                 goto fail;
  
         /*
@@ -8275,7 +8299,6 @@ void __init sched_init(void)
                 rq_attach_root(rq, &def_root_domain);
  #ifdef CONFIG_NO_HZ
                 rq->nohz_balance_kick = 0;
-               init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i));
  #endif
  #endif
                 init_rq_hrtick(rq);
@@ -8317,8 +8340,6 @@ void __init sched_init(void)
          */
         current->sched_class = &fair_sched_class;
  
-       /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
-       zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
  #ifdef CONFIG_SMP
         zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
  #ifdef CONFIG_NO_HZ
@@ -8348,6 +8369,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
  {
         static unsigned long prev_jiffy;        /* ratelimiting */
  
+       rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
         if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
             system_state != SYSTEM_RUNNING || oops_in_progress)
                 return;