From e05510d01ad1565e5e086a939261084d67ba2b10 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 May 2008 23:56:17 +0200 Subject: sched: optimize calc_delta_mine() Joel noticed that the !lw->inv_weight contition isn't unlikely anymore so remove the unlikely annotation. Also, remove the two div64_u64() inv_weight calculations, which makes them rely on the calc_delta_mine() path as well. Signed-off-by: Peter Zijlstra CC: Joel Schopp Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 34bcc5bc120e..00c1ba706a5a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1438,8 +1438,8 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, { u64 tmp; - if (unlikely(!lw->inv_weight)) - lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1); + if (!lw->inv_weight) + lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1); tmp = (u64)delta_exec * weight; /* @@ -8025,7 +8025,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, se->my_q = cfs_rq; se->load.weight = tg->shares; - se->load.inv_weight = div64_u64(1ULL<<32, se->load.weight); + se->load.inv_weight = 0; se->parent = parent; } #endif @@ -8692,7 +8692,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) dequeue_entity(cfs_rq, se, 0); se->load.weight = shares; - se->load.inv_weight = div64_u64((1ULL<<32), shares); + se->load.inv_weight = 0; if (on_rq) enqueue_entity(cfs_rq, se, 0); -- cgit v1.2.2 From d478c2cfaa2476f8b6876f9eb4d8fddcfa986479 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 26 Apr 2008 11:30:34 -0700 Subject: sched: add debug checks to idle functions Cc: Venkatesh Pallipadi Cc: "Justin Mattock" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 00c1ba706a5a..ed3caf26990d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1124,6 +1124,7 @@ void sched_clock_idle_sleep_event(void) { struct rq *rq = cpu_rq(smp_processor_id()); + WARN_ON(!irqs_disabled()); spin_lock(&rq->lock); __update_rq_clock(rq); spin_unlock(&rq->lock); @@ -1139,6 +1140,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) struct rq *rq = cpu_rq(smp_processor_id()); u64 now = sched_clock(); + WARN_ON(!irqs_disabled()); rq->idle_clock += delta_ns; /* * Override the previous timestamp and ignore all -- cgit v1.2.2 From 983ed7a66bcec9dc307d89dc7af47cdf209e56af Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 24 Apr 2008 18:17:55 -0700 Subject: sched: add statics, don't return void expressions Noticed by sparse: kernel/sched.c:760:20: warning: symbol 'sched_feat_names' was not declared. Should it be static? kernel/sched.c:767:5: warning: symbol 'sched_feat_open' was not declared. Should it be static? kernel/sched_fair.c:845:3: warning: returning void-valued expression kernel/sched.c:4386:3: warning: returning void-valued expression Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- kernel/sched.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index ed3caf26990d..d941ddc9ec1d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -757,14 +757,14 @@ const_debug unsigned int sysctl_sched_features = #define SCHED_FEAT(name, enabled) \ #name , -__read_mostly char *sched_feat_names[] = { +static __read_mostly char *sched_feat_names[] = { #include "sched_features.h" NULL }; #undef SCHED_FEAT -int sched_feat_open(struct inode *inode, struct file *filp) +static int sched_feat_open(struct inode *inode, struct file *filp) { filp->private_data = inode->i_private; return 0; @@ -4341,8 +4341,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset, struct rq *rq = this_rq(); cputime64_t tmp; - if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) - return account_guest_time(p, cputime); + if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { + account_guest_time(p, cputime); + return; + } p->stime = cputime_add(p->stime, cputime); -- cgit v1.2.2 From b328ca182f01c2a04b85e0ee8a410720b104fbcc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 Apr 2008 10:02:46 +0200 Subject: sched: fix hrtick_start_fair and CPU-Hotplug Gautham R Shenoy reported: > While running the usual CPU-Hotplug stress tests on linux-2.6.25, > I noticed the following in the console logs. > > This is a wee bit difficult to reproduce. In the past 10 runs I hit this > only once. > > ------------[ cut here ]------------ > > WARNING: at kernel/sched.c:962 hrtick+0x2e/0x65() > > Just wondering if we are doing a good job at handling the cancellation > of any per-cpu scheduler timers during CPU-Hotplug. This looks like its indeed not cancelled at all and migrates the it to another cpu. Fix it via a proper hotplug notifier mechanism. Reported-by: Gautham R Shenoy Signed-off-by: Peter Zijlstra Cc: stable@kernel.org Signed-off-by: Ingo Molnar --- kernel/sched.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index d941ddc9ec1d..bee9cbe13c15 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1191,6 +1191,7 @@ static inline void resched_rq(struct rq *rq) enum { HRTICK_SET, /* re-programm hrtick_timer */ HRTICK_RESET, /* not a new slice */ + HRTICK_BLOCK, /* stop hrtick operations */ }; /* @@ -1202,6 +1203,8 @@ static inline int hrtick_enabled(struct rq *rq) { if (!sched_feat(HRTICK)) return 0; + if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags))) + return 0; return hrtimer_is_hres_active(&rq->hrtick_timer); } @@ -1284,7 +1287,63 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) return HRTIMER_NORESTART; } -static inline void init_rq_hrtick(struct rq *rq) +static void hotplug_hrtick_disable(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + rq->hrtick_flags = 0; + __set_bit(HRTICK_BLOCK, &rq->hrtick_flags); + spin_unlock_irqrestore(&rq->lock, flags); + + hrtick_clear(rq); +} + +static void hotplug_hrtick_enable(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags); + spin_unlock_irqrestore(&rq->lock, flags); +} + +static int +hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + int cpu = (int)(long)hcpu; + + switch (action) { + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + hotplug_hrtick_disable(cpu); + return NOTIFY_OK; + + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + hotplug_hrtick_enable(cpu); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static void init_hrtick(void) +{ + hotcpu_notifier(hotplug_hrtick, 0); +} + +static void init_rq_hrtick(struct rq *rq) { rq->hrtick_flags = 0; hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1321,6 +1380,10 @@ static inline void init_rq_hrtick(struct rq *rq) void hrtick_resched(void) { } + +static inline void init_hrtick(void) +{ +} #endif /* @@ -7943,6 +8006,7 @@ void __init sched_init_smp(void) put_online_cpus(); /* XXX: Theoretical race here - CPU may be hotplugged now */ hotcpu_notifier(update_sched_domains, 0); + init_hrtick(); /* Move init over to a non-isolated CPU */ if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) -- cgit v1.2.2 From 673a90a1e05c8127886f7659d1a457169378371f Mon Sep 17 00:00:00 2001 From: David Simner Date: Tue, 29 Apr 2008 10:08:59 +0100 Subject: sched: fix sched_info_switch not being called according to documentation http://bugzilla.kernel.org/show_bug.cgi?id=10545 sched_stats.h says that __sched_info_switch is "called when prev != next" in the comment. sched.c should therefore do that. Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index bee9cbe13c15..3ac3d7af04a1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4662,9 +4662,9 @@ need_resched_nonpreemptible: prev->sched_class->put_prev_task(rq, prev); next = pick_next_task(rq, prev); - sched_info_switch(prev, next); - if (likely(prev != next)) { + sched_info_switch(prev, next); + rq->nr_switches++; rq->curr = next; ++*switch_count; -- cgit v1.2.2 From 690229a0912ca2fef8b542fe4d8b73acfcdc6e24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 23 Apr 2008 09:31:35 +0200 Subject: sched: make clock sync tunable by architecture code make time_sync_thresh tunable to architecture code. Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 3ac3d7af04a1..8f433fedfcb3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -899,7 +899,7 @@ static inline u64 global_rt_runtime(void) return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; } -static const unsigned long long time_sync_thresh = 100000; +unsigned long long time_sync_thresh = 100000; static DEFINE_PER_CPU(unsigned long long, time_offset); static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); -- cgit v1.2.2 From 712555ee4f873515612f89554ad1a3fda5fa887e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Apr 2008 11:33:07 +0200 Subject: sched: fix missing locking in sched_domains code Concurrent calls to detach_destroy_domains and arch_init_sched_domains were prevented by the old scheduler subsystem cpu hotplug mutex. When this got converted to get_online_cpus() the locking got broken. Unlike before now several processes can concurrently enter the critical sections that were protected by the old lock. So use the already present doms_cur_mutex to protect these sections again. Cc: Gautham R Shenoy Cc: Paul Jackson Signed-off-by: Heiko Carstens Signed-off-by: Ingo Molnar --- kernel/sched.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 8f433fedfcb3..561b3b39bdb8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -242,6 +242,12 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) } #endif +/* + * sched_domains_mutex serializes calls to arch_init_sched_domains, + * detach_destroy_domains and partition_sched_domains. + */ +static DEFINE_MUTEX(sched_domains_mutex); + #ifdef CONFIG_GROUP_SCHED #include @@ -308,9 +314,6 @@ static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; */ static DEFINE_SPINLOCK(task_group_lock); -/* doms_cur_mutex serializes access to doms_cur[] array */ -static DEFINE_MUTEX(doms_cur_mutex); - #ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_USER_SCHED # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) @@ -358,21 +361,9 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) #endif } -static inline void lock_doms_cur(void) -{ - mutex_lock(&doms_cur_mutex); -} - -static inline void unlock_doms_cur(void) -{ - mutex_unlock(&doms_cur_mutex); -} - #else static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } -static inline void lock_doms_cur(void) { } -static inline void unlock_doms_cur(void) { } #endif /* CONFIG_GROUP_SCHED */ @@ -7822,7 +7813,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, { int i, j; - lock_doms_cur(); + mutex_lock(&sched_domains_mutex); /* always unregister in case we don't destroy any domains */ unregister_sched_domain_sysctl(); @@ -7871,7 +7862,7 @@ match2: register_sched_domain_sysctl(); - unlock_doms_cur(); + mutex_unlock(&sched_domains_mutex); } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) @@ -7880,8 +7871,10 @@ int arch_reinit_sched_domains(void) int err; get_online_cpus(); + mutex_lock(&sched_domains_mutex); detach_destroy_domains(&cpu_online_map); err = arch_init_sched_domains(&cpu_online_map); + mutex_unlock(&sched_domains_mutex); put_online_cpus(); return err; @@ -7999,10 +7992,12 @@ void __init sched_init_smp(void) BUG_ON(sched_group_nodes_bycpu == NULL); #endif get_online_cpus(); + mutex_lock(&sched_domains_mutex); arch_init_sched_domains(&cpu_online_map); cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); if (cpus_empty(non_isolated_cpus)) cpu_set(smp_processor_id(), non_isolated_cpus); + mutex_unlock(&sched_domains_mutex); put_online_cpus(); /* XXX: Theoretical race here - CPU may be hotplugged now */ hotcpu_notifier(update_sched_domains, 0); -- cgit v1.2.2 From cb4ad1ffc7c0d8ea7dc8cd8ba303d83551716d46 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 28 Apr 2008 12:54:56 +0800 Subject: sched: fair-group: fix a Div0 error of the fair group scheduler When I echoed 0 into the "cpu.shares" file, a Div0 error occured. We found it is caused by the following calling. sched_group_set_shares(tg, shares) set_se_shares(tg->se[i], shares/nr_cpu_ids) __set_se_shares(se, shares) div64_64((1ULL<<32), shares) When the echoed value was less than the number of processores, the result of the sentence "shares/nr_cpu_ids" was 0, and then the system called div64() to divide the result, the Div0 error occured. It is unnecessary that the shares value is divided by nr_cpu_ids, I think. Because in the function __update_group_shares_cpu() and init_tg_cfs_entry(), the shares value isn't divided by nr_cpu_ids when setting shares of the sched entity. This patch fixes this bug. And echoing ULONG_MAX value into cpu.shares also causes Div0 error, so we set a macro MAX_SHARES to limit the max value of shares. Signed-off-by: Miao Xie Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 561b3b39bdb8..f98f75f3c708 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -321,7 +321,13 @@ static DEFINE_SPINLOCK(task_group_lock); # define INIT_TASK_GROUP_LOAD NICE_0_LOAD #endif +/* + * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems. + * (The default weight is 1024 - so there's no practical + * limitation from this.) + */ #define MIN_SHARES 2 +#define MAX_SHARES (ULONG_MAX - 1) static int init_task_group_load = INIT_TASK_GROUP_LOAD; #endif @@ -1804,6 +1810,8 @@ __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd, if (shares < MIN_SHARES) shares = MIN_SHARES; + else if (shares > MAX_SHARES) + shares = MAX_SHARES; __set_se_shares(tg->se[tcpu], shares); } @@ -8785,13 +8793,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) if (!tg->se[0]) return -EINVAL; - /* - * A weight of 0 or 1 can cause arithmetics problems. - * (The default weight is 1024 - so there's no practical - * limitation from this.) - */ if (shares < MIN_SHARES) shares = MIN_SHARES; + else if (shares > MAX_SHARES) + shares = MAX_SHARES; mutex_lock(&shares_mutex); if (tg->shares == shares) @@ -8816,7 +8821,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) * force a rebalance */ cfs_rq_set_shares(tg->cfs_rq[i], 0); - set_se_shares(tg->se[i], shares/nr_cpu_ids); + set_se_shares(tg->se[i], shares); } /* -- cgit v1.2.2 From dfbf4a1bc319f0f9a31e39b2da1fa5c55e85af89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 23 Apr 2008 09:24:06 +0200 Subject: sched: fix cpu clock David Miller pointed it out that nothing in cpu_clock() sets prev_cpu_time. This caused __sync_cpu_clock() to be called all the time - against the intention of this code. The result was that in practice we hit a global spinlock every time cpu_clock() is called - which - even though cpu_clock() is used for tracing and debugging, is suboptimal. While at it, also: - move the irq disabling to the outest layer, this should make cpu_clock() warp-free when called with irqs enabled. - use long long instead of cycles_t - for platforms where cycles_t is 32-bit. Reported-by: David Miller Signed-off-by: Ingo Molnar --- kernel/sched.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index f98f75f3c708..9457106b18af 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -910,11 +910,14 @@ static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); static DEFINE_SPINLOCK(time_sync_lock); static unsigned long long prev_global_time; -static unsigned long long __sync_cpu_clock(cycles_t time, int cpu) +static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu) { - unsigned long flags; - - spin_lock_irqsave(&time_sync_lock, flags); + /* + * We want this inlined, to not get tracer function calls + * in this critical section: + */ + spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_); + __raw_spin_lock(&time_sync_lock.raw_lock); if (time < prev_global_time) { per_cpu(time_offset, cpu) += prev_global_time - time; @@ -923,7 +926,8 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu) prev_global_time = time; } - spin_unlock_irqrestore(&time_sync_lock, flags); + __raw_spin_unlock(&time_sync_lock.raw_lock); + spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_); return time; } @@ -931,7 +935,6 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu) static unsigned long long __cpu_clock(int cpu) { unsigned long long now; - unsigned long flags; struct rq *rq; /* @@ -941,11 +944,9 @@ static unsigned long long __cpu_clock(int cpu) if (unlikely(!scheduler_running)) return 0; - local_irq_save(flags); rq = cpu_rq(cpu); update_rq_clock(rq); now = rq->clock; - local_irq_restore(flags); return now; } @@ -957,13 +958,18 @@ static unsigned long long __cpu_clock(int cpu) unsigned long long cpu_clock(int cpu) { unsigned long long prev_cpu_time, time, delta_time; + unsigned long flags; + local_irq_save(flags); prev_cpu_time = per_cpu(prev_cpu_time, cpu); time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); delta_time = time-prev_cpu_time; - if (unlikely(delta_time > time_sync_thresh)) + if (unlikely(delta_time > time_sync_thresh)) { time = __sync_cpu_clock(time, cpu); + per_cpu(prev_cpu_time, cpu) = time; + } + local_irq_restore(flags); return time; } -- cgit v1.2.2 From 3e51f33fcc7f55e6df25d15b55ed10c8b4da84cd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 3 May 2008 18:29:28 +0200 Subject: sched: add optional support for CONFIG_HAVE_UNSTABLE_SCHED_CLOCK this replaces the rq->clock stuff (and possibly cpu_clock()). - architectures that have an 'imperfect' hardware clock can set CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - the 'jiffie' window might be superfulous when we update tick_gtod before the __update_sched_clock() call in sched_clock_tick() - cpu_clock() might be implemented as: sched_clock_cpu(smp_processor_id()) if the accuracy proves good enough - how far can TSC drift in a single jiffie when considering the filtering and idle hooks? [ mingo@elte.hu: various fixes and cleanups ] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 165 +++++---------------------------------------------------- 1 file changed, 13 insertions(+), 152 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 9457106b18af..58fb8af15776 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -74,16 +74,6 @@ #include #include -/* - * Scheduler clock - returns current time in nanosec units. - * This is default implementation. - * Architectures and sub-architectures can override this. - */ -unsigned long long __attribute__((weak)) sched_clock(void) -{ - return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); -} - /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -557,13 +547,7 @@ struct rq { unsigned long next_balance; struct mm_struct *prev_mm; - u64 clock, prev_clock_raw; - s64 clock_max_delta; - - unsigned int clock_warps, clock_overflows, clock_underflows; - u64 idle_clock; - unsigned int clock_deep_idle_events; - u64 tick_timestamp; + u64 clock; atomic_t nr_iowait; @@ -628,82 +612,6 @@ static inline int cpu_of(struct rq *rq) #endif } -#ifdef CONFIG_NO_HZ -static inline bool nohz_on(int cpu) -{ - return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE; -} - -static inline u64 max_skipped_ticks(struct rq *rq) -{ - return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1; -} - -static inline void update_last_tick_seen(struct rq *rq) -{ - rq->last_tick_seen = jiffies; -} -#else -static inline u64 max_skipped_ticks(struct rq *rq) -{ - return 1; -} - -static inline void update_last_tick_seen(struct rq *rq) -{ -} -#endif - -/* - * Update the per-runqueue clock, as finegrained as the platform can give - * us, but without assuming monotonicity, etc.: - */ -static void __update_rq_clock(struct rq *rq) -{ - u64 prev_raw = rq->prev_clock_raw; - u64 now = sched_clock(); - s64 delta = now - prev_raw; - u64 clock = rq->clock; - -#ifdef CONFIG_SCHED_DEBUG - WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); -#endif - /* - * Protect against sched_clock() occasionally going backwards: - */ - if (unlikely(delta < 0)) { - clock++; - rq->clock_warps++; - } else { - /* - * Catch too large forward jumps too: - */ - u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC; - u64 max_time = rq->tick_timestamp + max_jump; - - if (unlikely(clock + delta > max_time)) { - if (clock < max_time) - clock = max_time; - else - clock++; - rq->clock_overflows++; - } else { - if (unlikely(delta > rq->clock_max_delta)) - rq->clock_max_delta = delta; - clock += delta; - } - } - - rq->prev_clock_raw = now; - rq->clock = clock; -} - -static void update_rq_clock(struct rq *rq) -{ - if (likely(smp_processor_id() == cpu_of(rq))) - __update_rq_clock(rq); -} - /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. @@ -719,6 +627,11 @@ static void update_rq_clock(struct rq *rq) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) +static inline void update_rq_clock(struct rq *rq) +{ + rq->clock = sched_clock_cpu(cpu_of(rq)); +} + /* * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ @@ -935,7 +848,6 @@ static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu) static unsigned long long __cpu_clock(int cpu) { unsigned long long now; - struct rq *rq; /* * Only call sched_clock() if the scheduler has already been @@ -944,9 +856,7 @@ static unsigned long long __cpu_clock(int cpu) if (unlikely(!scheduler_running)) return 0; - rq = cpu_rq(cpu); - update_rq_clock(rq); - now = rq->clock; + now = sched_clock_cpu(cpu); return now; } @@ -1120,45 +1030,6 @@ static struct rq *this_rq_lock(void) return rq; } -/* - * We are going deep-idle (irqs are disabled): - */ -void sched_clock_idle_sleep_event(void) -{ - struct rq *rq = cpu_rq(smp_processor_id()); - - WARN_ON(!irqs_disabled()); - spin_lock(&rq->lock); - __update_rq_clock(rq); - spin_unlock(&rq->lock); - rq->clock_deep_idle_events++; -} -EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); - -/* - * We just idled delta nanoseconds (called with irqs disabled): - */ -void sched_clock_idle_wakeup_event(u64 delta_ns) -{ - struct rq *rq = cpu_rq(smp_processor_id()); - u64 now = sched_clock(); - - WARN_ON(!irqs_disabled()); - rq->idle_clock += delta_ns; - /* - * Override the previous timestamp and ignore all - * sched_clock() deltas that occured while we idled, - * and use the PM-provided delta_ns to advance the - * rq clock: - */ - spin_lock(&rq->lock); - rq->prev_clock_raw = now; - rq->clock += delta_ns; - spin_unlock(&rq->lock); - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); - static void __resched_task(struct task_struct *p, int tif_bit); static inline void resched_task(struct task_struct *p) @@ -1283,7 +1154,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); spin_lock(&rq->lock); - __update_rq_clock(rq); + update_rq_clock(rq); rq->curr->sched_class->task_tick(rq, rq->curr, 1); spin_unlock(&rq->lock); @@ -4476,19 +4347,11 @@ void scheduler_tick(void) int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; - u64 next_tick = rq->tick_timestamp + TICK_NSEC; + + sched_clock_tick(); spin_lock(&rq->lock); - __update_rq_clock(rq); - /* - * Let rq->clock advance by at least TICK_NSEC: - */ - if (unlikely(rq->clock < next_tick)) { - rq->clock = next_tick; - rq->clock_underflows++; - } - rq->tick_timestamp = rq->clock; - update_last_tick_seen(rq); + update_rq_clock(rq); update_cpu_load(rq); curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); @@ -4642,7 +4505,7 @@ need_resched_nonpreemptible: * Do the rq-clock update outside the rq lock: */ local_irq_disable(); - __update_rq_clock(rq); + update_rq_clock(rq); spin_lock(&rq->lock); clear_tsk_need_resched(prev); @@ -8226,8 +8089,6 @@ void __init sched_init(void) spin_lock_init(&rq->lock); lockdep_set_class(&rq->lock, &rq->rq_lock_key); rq->nr_running = 0; - rq->clock = 1; - update_last_tick_seen(rq); init_cfs_rq(&rq->cfs, rq); init_rt_rq(&rq->rt, rq); #ifdef CONFIG_FAIR_GROUP_SCHED @@ -8371,6 +8232,7 @@ EXPORT_SYMBOL(__might_sleep); static void normalize_task(struct rq *rq, struct task_struct *p) { int on_rq; + update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) @@ -8402,7 +8264,6 @@ void normalize_rt_tasks(void) p->se.sleep_start = 0; p->se.block_start = 0; #endif - task_rq(p)->clock = 0; if (!rt_task(p)) { /* -- cgit v1.2.2