X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=kernel%2Fsched%2Fcore.c;h=dd09def88567bf9f418825c0f684b105b0667be7;hb=99e5ada9407cc19d7c4c05ce2165f20dc46fc093;hp=fc9103e9ff03d58d9bf48bbaf7a1d436e1c07429;hpb=bbbd27e694ce2c5fde9c8fcedbea618dd9153fe7;p=deliverable%2Flinux.git diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fc9103e9ff03..dd09def88567 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -549,7 +549,7 @@ void resched_cpu(int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * In the semi idle case, use the nearest busy cpu for migrating timers * from an idle cpu. This is good for power-savings. @@ -587,7 +587,7 @@ unlock: * account when the CPU goes back to idle and evaluates the timer * wheel for the next timer event. */ -void wake_up_idle_cpu(int cpu) +static void wake_up_idle_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -617,20 +617,56 @@ void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } +static bool wake_up_full_nohz_cpu(int cpu) +{ + if (tick_nohz_full_cpu(cpu)) { + if (cpu != smp_processor_id() || + tick_nohz_tick_stopped()) + smp_send_reschedule(cpu); + return true; + } + + return false; +} + +void wake_up_nohz_cpu(int cpu) +{ + if (!wake_up_full_nohz_cpu(cpu)) + wake_up_idle_cpu(cpu); +} + static inline bool got_nohz_idle_kick(void) { int cpu = smp_processor_id(); return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); } -#else /* CONFIG_NO_HZ */ +#else /* CONFIG_NO_HZ_COMMON */ static inline bool got_nohz_idle_kick(void) { return false; } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ + +#ifdef CONFIG_NO_HZ_FULL +bool sched_can_stop_tick(void) +{ + struct rq *rq; + + rq = this_rq(); + + /* Make sure rq->nr_running update is visible after the IPI */ + smp_rmb(); + + /* More than one running task need preemption */ + if (rq->nr_running > 1) + return false; + + return true; +} +#endif /* CONFIG_NO_HZ_FULL */ void sched_avg_update(struct rq *rq) { @@ -1288,8 +1324,8 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) { - trace_sched_wakeup(p, true); check_preempt_curr(rq, p, wake_flags); + trace_sched_wakeup(p, true); p->state = TASK_RUNNING; #ifdef CONFIG_SMP @@ -1362,7 +1398,8 @@ static void sched_ttwu_pending(void) void scheduler_ipi(void) { - if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) + if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick() + && !tick_nohz_full_cpu(smp_processor_id())) return; /* @@ -1379,6 +1416,7 @@ void scheduler_ipi(void) * somewhat pessimize the simple resched case. */ irq_enter(); + tick_nohz_full_check(); sched_ttwu_pending(); /* @@ -1752,9 +1790,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister); static void fire_sched_in_preempt_notifiers(struct task_struct *curr) { struct preempt_notifier *notifier; - struct hlist_node *node; - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) notifier->ops->sched_in(notifier, raw_smp_processor_id()); } @@ -1763,9 +1800,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, struct task_struct *next) { struct preempt_notifier *notifier; - struct hlist_node *node; - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) notifier->ops->sched_out(notifier, next); } @@ -1860,6 +1896,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) kprobe_flush_task(prev); put_task_struct(prev); } + + tick_nohz_task_switch(current); } #ifdef CONFIG_SMP @@ -1979,11 +2017,10 @@ context_switch(struct rq *rq, struct task_struct *prev, } /* - * nr_running, nr_uninterruptible and nr_context_switches: + * nr_running and nr_context_switches: * * externally visible scheduler statistics: current number of runnable - * threads, current number of uninterruptible-sleeping threads, total - * number of context switches performed since bootup. + * threads, total number of context switches performed since bootup. */ unsigned long nr_running(void) { @@ -1995,23 +2032,6 @@ unsigned long nr_running(void) return sum; } -unsigned long nr_uninterruptible(void) -{ - unsigned long i, sum = 0; - - for_each_possible_cpu(i) - sum += cpu_rq(i)->nr_uninterruptible; - - /* - * Since we read the counters lockless, it might be slightly - * inaccurate. Do not allow it to go below zero though: - */ - if (unlikely((long)sum < 0)) - sum = 0; - - return sum; -} - unsigned long long nr_context_switches(void) { int i; @@ -2141,7 +2161,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) return load >> FSHIFT; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Handle NO_HZ for the global load-average. * @@ -2367,12 +2387,12 @@ static void calc_global_nohz(void) smp_wmb(); calc_load_idx++; } -#else /* !CONFIG_NO_HZ */ +#else /* !CONFIG_NO_HZ_COMMON */ static inline long calc_load_fold_idle(void) { return 0; } static inline void calc_global_nohz(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2532,7 +2552,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * There is no sane way to deal with nohz on smp when using jiffies because the * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading @@ -2592,7 +2612,7 @@ void update_cpu_load_nohz(void) } raw_spin_unlock(&this_rq->lock); } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from scheduler_tick() @@ -3102,11 +3122,13 @@ EXPORT_SYMBOL(preempt_schedule); asmlinkage void __sched preempt_schedule_irq(void) { struct thread_info *ti = current_thread_info(); + enum ctx_state prev_state; /* Catch callers which need to be fixed */ BUG_ON(ti->preempt_count || !irqs_disabled()); - user_exit(); + prev_state = exception_enter(); + do { add_preempt_count(PREEMPT_ACTIVE); local_irq_enable(); @@ -3120,6 +3142,8 @@ asmlinkage void __sched preempt_schedule_irq(void) */ barrier(); } while (need_resched()); + + exception_exit(prev_state); } #endif /* CONFIG_PREEMPT */ @@ -3278,7 +3302,8 @@ void complete_all(struct completion *x) EXPORT_SYMBOL(complete_all); static inline long __sched -do_wait_for_common(struct completion *x, long timeout, int state) +do_wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) { if (!x->done) { DECLARE_WAITQUEUE(wait, current); @@ -3291,7 +3316,7 @@ do_wait_for_common(struct completion *x, long timeout, int state) } __set_current_state(state); spin_unlock_irq(&x->wait.lock); - timeout = schedule_timeout(timeout); + timeout = action(timeout); spin_lock_irq(&x->wait.lock); } while (!x->done && timeout); __remove_wait_queue(&x->wait, &wait); @@ -3302,17 +3327,30 @@ do_wait_for_common(struct completion *x, long timeout, int state) return timeout ?: 1; } -static long __sched -wait_for_common(struct completion *x, long timeout, int state) +static inline long __sched +__wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) { might_sleep(); spin_lock_irq(&x->wait.lock); - timeout = do_wait_for_common(x, timeout, state); + timeout = do_wait_for_common(x, action, timeout, state); spin_unlock_irq(&x->wait.lock); return timeout; } +static long __sched +wait_for_common(struct completion *x, long timeout, int state) +{ + return __wait_for_common(x, schedule_timeout, timeout, state); +} + +static long __sched +wait_for_common_io(struct completion *x, long timeout, int state) +{ + return __wait_for_common(x, io_schedule_timeout, timeout, state); +} + /** * wait_for_completion: - waits for completion of a task * @x: holds the state of this particular completion @@ -3348,6 +3386,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout) } EXPORT_SYMBOL(wait_for_completion_timeout); +/** + * wait_for_completion_io: - waits for completion of a task + * @x: holds the state of this particular completion + * + * This waits to be signaled for completion of a specific task. It is NOT + * interruptible and there is no timeout. The caller is accounted as waiting + * for IO. + */ +void __sched wait_for_completion_io(struct completion *x) +{ + wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(wait_for_completion_io); + +/** + * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout) + * @x: holds the state of this particular completion + * @timeout: timeout value in jiffies + * + * This waits for either a completion of a specific task to be signaled or for a + * specified timeout to expire. The timeout is in jiffies. It is not + * interruptible. The caller is accounted as waiting for IO. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed. + */ +unsigned long __sched +wait_for_completion_io_timeout(struct completion *x, unsigned long timeout) +{ + return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(wait_for_completion_io_timeout); + /** * wait_for_completion_interruptible: - waits for completion of a task (w/intr) * @x: holds the state of this particular completion @@ -6834,6 +6905,10 @@ int in_sched_functions(unsigned long addr) } #ifdef CONFIG_CGROUP_SCHED +/* + * Default task group. + * Every task in system belongs to this group at bootup. + */ struct task_group root_task_group; LIST_HEAD(task_groups); #endif @@ -6970,7 +7045,7 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON rq->nohz_flags = 0; #endif #endif @@ -7428,7 +7503,7 @@ unlock: return err; } -int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) +static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) { u64 rt_runtime, rt_period; @@ -7440,7 +7515,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } -long sched_group_rt_runtime(struct task_group *tg) +static long sched_group_rt_runtime(struct task_group *tg) { u64 rt_runtime_us; @@ -7452,7 +7527,7 @@ long sched_group_rt_runtime(struct task_group *tg) return rt_runtime_us; } -int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) +static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) { u64 rt_runtime, rt_period; @@ -7465,7 +7540,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } -long sched_group_rt_period(struct task_group *tg) +static long sched_group_rt_period(struct task_group *tg) { u64 rt_period_us; @@ -7500,7 +7575,7 @@ static int sched_rt_global_constraints(void) return ret; } -int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) { /* Don't accept realtime tasks when there is no way for them to run */ if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)