aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-05-03 12:29:28 -0400
committerIngo Molnar <mingo@elte.hu>2008-05-05 17:56:18 -0400
commit3e51f33fcc7f55e6df25d15b55ed10c8b4da84cd (patch)
tree3752f9ea8e014ec40e95a1b197b0a3d18e1056a8 /kernel/sched.c
parenta5574cf65b5f03ce9ade3918764fe22e5e2371e3 (diff)
sched: add optional support for CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
this replaces the rq->clock stuff (and possibly cpu_clock()). - architectures that have an 'imperfect' hardware clock can set CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - the 'jiffie' window might be superfulous when we update tick_gtod before the __update_sched_clock() call in sched_clock_tick() - cpu_clock() might be implemented as: sched_clock_cpu(smp_processor_id()) if the accuracy proves good enough - how far can TSC drift in a single jiffie when considering the filtering and idle hooks? [ mingo@elte.hu: various fixes and cleanups ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c165
1 files changed, 13 insertions, 152 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 9457106b18af..58fb8af15776 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -75,16 +75,6 @@
75#include <asm/irq_regs.h> 75#include <asm/irq_regs.h>
76 76
77/* 77/*
78 * Scheduler clock - returns current time in nanosec units.
79 * This is default implementation.
80 * Architectures and sub-architectures can override this.
81 */
82unsigned long long __attribute__((weak)) sched_clock(void)
83{
84 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
85}
86
87/*
88 * Convert user-nice values [ -20 ... 0 ... 19 ] 78 * Convert user-nice values [ -20 ... 0 ... 19 ]
89 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 79 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
90 * and back. 80 * and back.
@@ -557,13 +547,7 @@ struct rq {
557 unsigned long next_balance; 547 unsigned long next_balance;
558 struct mm_struct *prev_mm; 548 struct mm_struct *prev_mm;
559 549
560 u64 clock, prev_clock_raw; 550 u64 clock;
561 s64 clock_max_delta;
562
563 unsigned int clock_warps, clock_overflows, clock_underflows;
564 u64 idle_clock;
565 unsigned int clock_deep_idle_events;
566 u64 tick_timestamp;
567 551
568 atomic_t nr_iowait; 552 atomic_t nr_iowait;
569 553
@@ -628,82 +612,6 @@ static inline int cpu_of(struct rq *rq)
628#endif 612#endif
629} 613}
630 614
631#ifdef CONFIG_NO_HZ
632static inline bool nohz_on(int cpu)
633{
634 return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE;
635}
636
637static inline u64 max_skipped_ticks(struct rq *rq)
638{
639 return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1;
640}
641
642static inline void update_last_tick_seen(struct rq *rq)
643{
644 rq->last_tick_seen = jiffies;
645}
646#else
647static inline u64 max_skipped_ticks(struct rq *rq)
648{
649 return 1;
650}
651
652static inline void update_last_tick_seen(struct rq *rq)
653{
654}
655#endif
656
657/*
658 * Update the per-runqueue clock, as finegrained as the platform can give
659 * us, but without assuming monotonicity, etc.:
660 */
661static void __update_rq_clock(struct rq *rq)
662{
663 u64 prev_raw = rq->prev_clock_raw;
664 u64 now = sched_clock();
665 s64 delta = now - prev_raw;
666 u64 clock = rq->clock;
667
668#ifdef CONFIG_SCHED_DEBUG
669 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
670#endif
671 /*
672 * Protect against sched_clock() occasionally going backwards:
673 */
674 if (unlikely(delta < 0)) {
675 clock++;
676 rq->clock_warps++;
677 } else {
678 /*
679 * Catch too large forward jumps too:
680 */
681 u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC;
682 u64 max_time = rq->tick_timestamp + max_jump;
683
684 if (unlikely(clock + delta > max_time)) {
685 if (clock < max_time)
686 clock = max_time;
687 else
688 clock++;
689 rq->clock_overflows++;
690 } else {
691 if (unlikely(delta > rq->clock_max_delta))
692 rq->clock_max_delta = delta;
693 clock += delta;
694 }
695 }
696
697 rq->prev_clock_raw = now;
698 rq->clock = clock;
699}
700
701static void update_rq_clock(struct rq *rq)
702{
703 if (likely(smp_processor_id() == cpu_of(rq)))
704 __update_rq_clock(rq);
705}
706
707/* 615/*
708 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 616 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
709 * See detach_destroy_domains: synchronize_sched for details. 617 * See detach_destroy_domains: synchronize_sched for details.
@@ -719,6 +627,11 @@ static void update_rq_clock(struct rq *rq)
719#define task_rq(p) cpu_rq(task_cpu(p)) 627#define task_rq(p) cpu_rq(task_cpu(p))
720#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 628#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
721 629
630static inline void update_rq_clock(struct rq *rq)
631{
632 rq->clock = sched_clock_cpu(cpu_of(rq));
633}
634
722/* 635/*
723 * Tunables that become constants when CONFIG_SCHED_DEBUG is off: 636 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
724 */ 637 */
@@ -935,7 +848,6 @@ static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
935static unsigned long long __cpu_clock(int cpu) 848static unsigned long long __cpu_clock(int cpu)
936{ 849{
937 unsigned long long now; 850 unsigned long long now;
938 struct rq *rq;
939 851
940 /* 852 /*
941 * Only call sched_clock() if the scheduler has already been 853 * Only call sched_clock() if the scheduler has already been
@@ -944,9 +856,7 @@ static unsigned long long __cpu_clock(int cpu)
944 if (unlikely(!scheduler_running)) 856 if (unlikely(!scheduler_running))
945 return 0; 857 return 0;
946 858
947 rq = cpu_rq(cpu); 859 now = sched_clock_cpu(cpu);
948 update_rq_clock(rq);
949 now = rq->clock;
950 860
951 return now; 861 return now;
952} 862}
@@ -1120,45 +1030,6 @@ static struct rq *this_rq_lock(void)
1120 return rq; 1030 return rq;
1121} 1031}
1122 1032
1123/*
1124 * We are going deep-idle (irqs are disabled):
1125 */
1126void sched_clock_idle_sleep_event(void)
1127{
1128 struct rq *rq = cpu_rq(smp_processor_id());
1129
1130 WARN_ON(!irqs_disabled());
1131 spin_lock(&rq->lock);
1132 __update_rq_clock(rq);
1133 spin_unlock(&rq->lock);
1134 rq->clock_deep_idle_events++;
1135}
1136EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
1137
1138/*
1139 * We just idled delta nanoseconds (called with irqs disabled):
1140 */
1141void sched_clock_idle_wakeup_event(u64 delta_ns)
1142{
1143 struct rq *rq = cpu_rq(smp_processor_id());
1144 u64 now = sched_clock();
1145
1146 WARN_ON(!irqs_disabled());
1147 rq->idle_clock += delta_ns;
1148 /*
1149 * Override the previous timestamp and ignore all
1150 * sched_clock() deltas that occured while we idled,
1151 * and use the PM-provided delta_ns to advance the
1152 * rq clock:
1153 */
1154 spin_lock(&rq->lock);
1155 rq->prev_clock_raw = now;
1156 rq->clock += delta_ns;
1157 spin_unlock(&rq->lock);
1158 touch_softlockup_watchdog();
1159}
1160EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
1161
1162static void __resched_task(struct task_struct *p, int tif_bit); 1033static void __resched_task(struct task_struct *p, int tif_bit);
1163 1034
1164static inline void resched_task(struct task_struct *p) 1035static inline void resched_task(struct task_struct *p)
@@ -1283,7 +1154,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1283 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); 1154 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1284 1155
1285 spin_lock(&rq->lock); 1156 spin_lock(&rq->lock);
1286 __update_rq_clock(rq); 1157 update_rq_clock(rq);
1287 rq->curr->sched_class->task_tick(rq, rq->curr, 1); 1158 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1288 spin_unlock(&rq->lock); 1159 spin_unlock(&rq->lock);
1289 1160
@@ -4476,19 +4347,11 @@ void scheduler_tick(void)
4476 int cpu = smp_processor_id(); 4347 int cpu = smp_processor_id();
4477 struct rq *rq = cpu_rq(cpu); 4348 struct rq *rq = cpu_rq(cpu);
4478 struct task_struct *curr = rq->curr; 4349 struct task_struct *curr = rq->curr;
4479 u64 next_tick = rq->tick_timestamp + TICK_NSEC; 4350
4351 sched_clock_tick();
4480 4352
4481 spin_lock(&rq->lock); 4353 spin_lock(&rq->lock);
4482 __update_rq_clock(rq); 4354 update_rq_clock(rq);
4483 /*
4484 * Let rq->clock advance by at least TICK_NSEC:
4485 */
4486 if (unlikely(rq->clock < next_tick)) {
4487 rq->clock = next_tick;
4488 rq->clock_underflows++;
4489 }
4490 rq->tick_timestamp = rq->clock;
4491 update_last_tick_seen(rq);
4492 update_cpu_load(rq); 4355 update_cpu_load(rq);
4493 curr->sched_class->task_tick(rq, curr, 0); 4356 curr->sched_class->task_tick(rq, curr, 0);
4494 spin_unlock(&rq->lock); 4357 spin_unlock(&rq->lock);
@@ -4642,7 +4505,7 @@ need_resched_nonpreemptible:
4642 * Do the rq-clock update outside the rq lock: 4505 * Do the rq-clock update outside the rq lock:
4643 */ 4506 */
4644 local_irq_disable(); 4507 local_irq_disable();
4645 __update_rq_clock(rq); 4508 update_rq_clock(rq);
4646 spin_lock(&rq->lock); 4509 spin_lock(&rq->lock);
4647 clear_tsk_need_resched(prev); 4510 clear_tsk_need_resched(prev);
4648 4511
@@ -8226,8 +8089,6 @@ void __init sched_init(void)
8226 spin_lock_init(&rq->lock); 8089 spin_lock_init(&rq->lock);
8227 lockdep_set_class(&rq->lock, &rq->rq_lock_key); 8090 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
8228 rq->nr_running = 0; 8091 rq->nr_running = 0;
8229 rq->clock = 1;
8230 update_last_tick_seen(rq);
8231 init_cfs_rq(&rq->cfs, rq); 8092 init_cfs_rq(&rq->cfs, rq);
8232 init_rt_rq(&rq->rt, rq); 8093 init_rt_rq(&rq->rt, rq);
8233#ifdef CONFIG_FAIR_GROUP_SCHED 8094#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8371,6 +8232,7 @@ EXPORT_SYMBOL(__might_sleep);
8371static void normalize_task(struct rq *rq, struct task_struct *p) 8232static void normalize_task(struct rq *rq, struct task_struct *p)
8372{ 8233{
8373 int on_rq; 8234 int on_rq;
8235
8374 update_rq_clock(rq); 8236 update_rq_clock(rq);
8375 on_rq = p->se.on_rq; 8237 on_rq = p->se.on_rq;
8376 if (on_rq) 8238 if (on_rq)
@@ -8402,7 +8264,6 @@ void normalize_rt_tasks(void)
8402 p->se.sleep_start = 0; 8264 p->se.sleep_start = 0;
8403 p->se.block_start = 0; 8265 p->se.block_start = 0;
8404#endif 8266#endif
8405 task_rq(p)->clock = 0;
8406 8267
8407 if (!rt_task(p)) { 8268 if (!rt_task(p)) {
8408 /* 8269 /*