diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 15:52:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 15:52:55 -0500 |
commit | 828cad8ea05d194d8a9452e0793261c2024c23a2 (patch) | |
tree | 0ad7c7e044cdcfe75d78da0b52eb2358d4686e02 /kernel/sched/clock.c | |
parent | 60c906bab124a0627fba04c9ca5e61bba4747c0c (diff) | |
parent | bb3bac2ca9a3a5b7fa601781adf70167a0449d75 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes in this (fairly busy) cycle were:
- There was a class of scheduler bugs related to forgetting to update
the rq-clock timestamp which can cause weird and hard to debug
problems, so there's a new debug facility for this: which uncovered
a whole lot of bugs which convinced us that we want to keep the
debug facility.
(Peter Zijlstra, Matt Fleming)
- Various cputime related updates: eliminate cputime and use u64
nanoseconds directly, simplify and improve the arch interfaces,
implement delayed accounting more widely, etc. - (Frederic
Weisbecker)
- Move code around for better structure plus cleanups (Ingo Molnar)
- Move IO schedule accounting deeper into the scheduler plus related
changes to improve the situation (Tejun Heo)
- ... plus a round of sched/rt and sched/deadline fixes, plus other
fixes, updats and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (85 commits)
sched/core: Remove unlikely() annotation from sched_move_task()
sched/autogroup: Rename auto_group.[ch] to autogroup.[ch]
sched/topology: Split out scheduler topology code from core.c into topology.c
sched/core: Remove unnecessary #include headers
sched/rq_clock: Consolidate the ordering of the rq_clock methods
delayacct: Include <uapi/linux/taskstats.h>
sched/core: Clean up comments
sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds
sched/clock: Add dummy clear_sched_clock_stable() stub function
sched/cputime: Remove generic asm headers
sched/cputime: Remove unused nsec_to_cputime()
s390, sched/cputime: Remove unused cputime definitions
powerpc, sched/cputime: Remove unused cputime definitions
s390, sched/cputime: Make arch_cpu_idle_time() to return nsecs
ia64, sched/cputime: Remove unused cputime definitions
ia64: Convert vtime to use nsec units directly
ia64, sched/cputime: Move the nsecs based cputime headers to the last arch using it
sched/cputime: Remove jiffies based cputime
sched/cputime, vtime: Return nsecs instead of cputime_t to account
sched/cputime: Complete nsec conversion of tick based accounting
...
Diffstat (limited to 'kernel/sched/clock.c')
-rw-r--r-- | kernel/sched/clock.c | 158 |
1 files changed, 83 insertions, 75 deletions
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index e85a725e5c34..ad64efe41722 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c | |||
@@ -77,41 +77,88 @@ EXPORT_SYMBOL_GPL(sched_clock); | |||
77 | 77 | ||
78 | __read_mostly int sched_clock_running; | 78 | __read_mostly int sched_clock_running; |
79 | 79 | ||
80 | void sched_clock_init(void) | ||
81 | { | ||
82 | sched_clock_running = 1; | ||
83 | } | ||
84 | |||
80 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 85 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
81 | static struct static_key __sched_clock_stable = STATIC_KEY_INIT; | 86 | /* |
82 | static int __sched_clock_stable_early; | 87 | * We must start with !__sched_clock_stable because the unstable -> stable |
88 | * transition is accurate, while the stable -> unstable transition is not. | ||
89 | * | ||
90 | * Similarly we start with __sched_clock_stable_early, thereby assuming we | ||
91 | * will become stable, such that there's only a single 1 -> 0 transition. | ||
92 | */ | ||
93 | static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); | ||
94 | static int __sched_clock_stable_early = 1; | ||
83 | 95 | ||
84 | int sched_clock_stable(void) | 96 | /* |
97 | * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset | ||
98 | */ | ||
99 | static __read_mostly u64 raw_offset; | ||
100 | static __read_mostly u64 gtod_offset; | ||
101 | |||
102 | struct sched_clock_data { | ||
103 | u64 tick_raw; | ||
104 | u64 tick_gtod; | ||
105 | u64 clock; | ||
106 | }; | ||
107 | |||
108 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); | ||
109 | |||
110 | static inline struct sched_clock_data *this_scd(void) | ||
85 | { | 111 | { |
86 | return static_key_false(&__sched_clock_stable); | 112 | return this_cpu_ptr(&sched_clock_data); |
87 | } | 113 | } |
88 | 114 | ||
89 | static void __set_sched_clock_stable(void) | 115 | static inline struct sched_clock_data *cpu_sdc(int cpu) |
90 | { | 116 | { |
91 | if (!sched_clock_stable()) | 117 | return &per_cpu(sched_clock_data, cpu); |
92 | static_key_slow_inc(&__sched_clock_stable); | 118 | } |
93 | 119 | ||
94 | tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); | 120 | int sched_clock_stable(void) |
121 | { | ||
122 | return static_branch_likely(&__sched_clock_stable); | ||
95 | } | 123 | } |
96 | 124 | ||
97 | void set_sched_clock_stable(void) | 125 | static void __set_sched_clock_stable(void) |
98 | { | 126 | { |
99 | __sched_clock_stable_early = 1; | 127 | struct sched_clock_data *scd = this_scd(); |
100 | 128 | ||
101 | smp_mb(); /* matches sched_clock_init() */ | 129 | /* |
130 | * Attempt to make the (initial) unstable->stable transition continuous. | ||
131 | */ | ||
132 | raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); | ||
102 | 133 | ||
103 | if (!sched_clock_running) | 134 | printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", |
104 | return; | 135 | scd->tick_gtod, gtod_offset, |
136 | scd->tick_raw, raw_offset); | ||
105 | 137 | ||
106 | __set_sched_clock_stable(); | 138 | static_branch_enable(&__sched_clock_stable); |
139 | tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); | ||
107 | } | 140 | } |
108 | 141 | ||
109 | static void __clear_sched_clock_stable(struct work_struct *work) | 142 | static void __clear_sched_clock_stable(struct work_struct *work) |
110 | { | 143 | { |
111 | /* XXX worry about clock continuity */ | 144 | struct sched_clock_data *scd = this_scd(); |
112 | if (sched_clock_stable()) | 145 | |
113 | static_key_slow_dec(&__sched_clock_stable); | 146 | /* |
147 | * Attempt to make the stable->unstable transition continuous. | ||
148 | * | ||
149 | * Trouble is, this is typically called from the TSC watchdog | ||
150 | * timer, which is late per definition. This means the tick | ||
151 | * values can already be screwy. | ||
152 | * | ||
153 | * Still do what we can. | ||
154 | */ | ||
155 | gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); | ||
156 | |||
157 | printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", | ||
158 | scd->tick_gtod, gtod_offset, | ||
159 | scd->tick_raw, raw_offset); | ||
114 | 160 | ||
161 | static_branch_disable(&__sched_clock_stable); | ||
115 | tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); | 162 | tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); |
116 | } | 163 | } |
117 | 164 | ||
@@ -121,47 +168,15 @@ void clear_sched_clock_stable(void) | |||
121 | { | 168 | { |
122 | __sched_clock_stable_early = 0; | 169 | __sched_clock_stable_early = 0; |
123 | 170 | ||
124 | smp_mb(); /* matches sched_clock_init() */ | 171 | smp_mb(); /* matches sched_clock_init_late() */ |
125 | |||
126 | if (!sched_clock_running) | ||
127 | return; | ||
128 | 172 | ||
129 | schedule_work(&sched_clock_work); | 173 | if (sched_clock_running == 2) |
174 | schedule_work(&sched_clock_work); | ||
130 | } | 175 | } |
131 | 176 | ||
132 | struct sched_clock_data { | 177 | void sched_clock_init_late(void) |
133 | u64 tick_raw; | ||
134 | u64 tick_gtod; | ||
135 | u64 clock; | ||
136 | }; | ||
137 | |||
138 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); | ||
139 | |||
140 | static inline struct sched_clock_data *this_scd(void) | ||
141 | { | 178 | { |
142 | return this_cpu_ptr(&sched_clock_data); | 179 | sched_clock_running = 2; |
143 | } | ||
144 | |||
145 | static inline struct sched_clock_data *cpu_sdc(int cpu) | ||
146 | { | ||
147 | return &per_cpu(sched_clock_data, cpu); | ||
148 | } | ||
149 | |||
150 | void sched_clock_init(void) | ||
151 | { | ||
152 | u64 ktime_now = ktime_to_ns(ktime_get()); | ||
153 | int cpu; | ||
154 | |||
155 | for_each_possible_cpu(cpu) { | ||
156 | struct sched_clock_data *scd = cpu_sdc(cpu); | ||
157 | |||
158 | scd->tick_raw = 0; | ||
159 | scd->tick_gtod = ktime_now; | ||
160 | scd->clock = ktime_now; | ||
161 | } | ||
162 | |||
163 | sched_clock_running = 1; | ||
164 | |||
165 | /* | 180 | /* |
166 | * Ensure that it is impossible to not do a static_key update. | 181 | * Ensure that it is impossible to not do a static_key update. |
167 | * | 182 | * |
@@ -173,8 +188,6 @@ void sched_clock_init(void) | |||
173 | 188 | ||
174 | if (__sched_clock_stable_early) | 189 | if (__sched_clock_stable_early) |
175 | __set_sched_clock_stable(); | 190 | __set_sched_clock_stable(); |
176 | else | ||
177 | __clear_sched_clock_stable(NULL); | ||
178 | } | 191 | } |
179 | 192 | ||
180 | /* | 193 | /* |
@@ -216,7 +229,7 @@ again: | |||
216 | * scd->tick_gtod + TICK_NSEC); | 229 | * scd->tick_gtod + TICK_NSEC); |
217 | */ | 230 | */ |
218 | 231 | ||
219 | clock = scd->tick_gtod + delta; | 232 | clock = scd->tick_gtod + gtod_offset + delta; |
220 | min_clock = wrap_max(scd->tick_gtod, old_clock); | 233 | min_clock = wrap_max(scd->tick_gtod, old_clock); |
221 | max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); | 234 | max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); |
222 | 235 | ||
@@ -302,7 +315,7 @@ u64 sched_clock_cpu(int cpu) | |||
302 | u64 clock; | 315 | u64 clock; |
303 | 316 | ||
304 | if (sched_clock_stable()) | 317 | if (sched_clock_stable()) |
305 | return sched_clock(); | 318 | return sched_clock() + raw_offset; |
306 | 319 | ||
307 | if (unlikely(!sched_clock_running)) | 320 | if (unlikely(!sched_clock_running)) |
308 | return 0ull; | 321 | return 0ull; |
@@ -323,23 +336,22 @@ EXPORT_SYMBOL_GPL(sched_clock_cpu); | |||
323 | void sched_clock_tick(void) | 336 | void sched_clock_tick(void) |
324 | { | 337 | { |
325 | struct sched_clock_data *scd; | 338 | struct sched_clock_data *scd; |
326 | u64 now, now_gtod; | ||
327 | |||
328 | if (sched_clock_stable()) | ||
329 | return; | ||
330 | |||
331 | if (unlikely(!sched_clock_running)) | ||
332 | return; | ||
333 | 339 | ||
334 | WARN_ON_ONCE(!irqs_disabled()); | 340 | WARN_ON_ONCE(!irqs_disabled()); |
335 | 341 | ||
342 | /* | ||
343 | * Update these values even if sched_clock_stable(), because it can | ||
344 | * become unstable at any point in time at which point we need some | ||
345 | * values to fall back on. | ||
346 | * | ||
347 | * XXX arguably we can skip this if we expose tsc_clocksource_reliable | ||
348 | */ | ||
336 | scd = this_scd(); | 349 | scd = this_scd(); |
337 | now_gtod = ktime_to_ns(ktime_get()); | 350 | scd->tick_raw = sched_clock(); |
338 | now = sched_clock(); | 351 | scd->tick_gtod = ktime_get_ns(); |
339 | 352 | ||
340 | scd->tick_raw = now; | 353 | if (!sched_clock_stable() && likely(sched_clock_running)) |
341 | scd->tick_gtod = now_gtod; | 354 | sched_clock_local(scd); |
342 | sched_clock_local(scd); | ||
343 | } | 355 | } |
344 | 356 | ||
345 | /* | 357 | /* |
@@ -366,11 +378,6 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | |||
366 | 378 | ||
367 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 379 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
368 | 380 | ||
369 | void sched_clock_init(void) | ||
370 | { | ||
371 | sched_clock_running = 1; | ||
372 | } | ||
373 | |||
374 | u64 sched_clock_cpu(int cpu) | 381 | u64 sched_clock_cpu(int cpu) |
375 | { | 382 | { |
376 | if (unlikely(!sched_clock_running)) | 383 | if (unlikely(!sched_clock_running)) |
@@ -378,6 +385,7 @@ u64 sched_clock_cpu(int cpu) | |||
378 | 385 | ||
379 | return sched_clock(); | 386 | return sched_clock(); |
380 | } | 387 | } |
388 | |||
381 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 389 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
382 | 390 | ||
383 | /* | 391 | /* |