aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-04-02 12:25:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-04-02 12:25:10 -0400
commit128c434a70996a3738c7ca4aa2ee91942e4c48f0 (patch)
tree65646fafea62ef939e49ef57a31edec4c5b049fb
parent0a89b5eb818aee2efe8b092bb303aec56486a3d3 (diff)
parent658b299580da2830a69eea44a8b6da1c2579a32e (diff)
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Thomas Gleixner: "This update provides: - make the scheduler clock switch to unstable mode smooth so the timestamps stay at microseconds granularity instead of switching to tick granularity. - unbreak perf test tsc by taking the new offset into account which was added in order to proveide better sched clock continuity - switching sched clock to unstable mode runs all clock related computations which affect the sched clock output itself from a work queue. In case of preemption sched clock uses half updated data and provides wrong timestamps. Keep the math in the protected context and delegate only the static key switch to workqueue context. - remove a duplicate header include" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/headers: Remove duplicate #include <linux/sched/debug.h> line sched/clock: Fix broken stable to unstable transfer sched/clock, x86/perf: Fix "perf test tsc" sched/clock: Fix clear_sched_clock_stable() preempt wobbly
-rw-r--r--arch/x86/events/core.c9
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--drivers/tty/vt/keyboard.c1
-rw-r--r--include/linux/sched/clock.h13
-rw-r--r--kernel/sched/clock.c46
6 files changed, 44 insertions, 31 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2aa1ad194db2..580b60f5ac83 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event,
2256 struct perf_event_mmap_page *userpg, u64 now) 2256 struct perf_event_mmap_page *userpg, u64 now)
2257{ 2257{
2258 struct cyc2ns_data *data; 2258 struct cyc2ns_data *data;
2259 u64 offset;
2259 2260
2260 userpg->cap_user_time = 0; 2261 userpg->cap_user_time = 0;
2261 userpg->cap_user_time_zero = 0; 2262 userpg->cap_user_time_zero = 0;
@@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event,
2263 !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); 2264 !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
2264 userpg->pmc_width = x86_pmu.cntval_bits; 2265 userpg->pmc_width = x86_pmu.cntval_bits;
2265 2266
2266 if (!sched_clock_stable()) 2267 if (!using_native_sched_clock() || !sched_clock_stable())
2267 return; 2268 return;
2268 2269
2269 data = cyc2ns_read_begin(); 2270 data = cyc2ns_read_begin();
2270 2271
2272 offset = data->cyc2ns_offset + __sched_clock_offset;
2273
2271 /* 2274 /*
2272 * Internal timekeeping for enabled/running/stopped times 2275 * Internal timekeeping for enabled/running/stopped times
2273 * is always in the local_clock domain. 2276 * is always in the local_clock domain.
@@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event,
2275 userpg->cap_user_time = 1; 2278 userpg->cap_user_time = 1;
2276 userpg->time_mult = data->cyc2ns_mul; 2279 userpg->time_mult = data->cyc2ns_mul;
2277 userpg->time_shift = data->cyc2ns_shift; 2280 userpg->time_shift = data->cyc2ns_shift;
2278 userpg->time_offset = data->cyc2ns_offset - now; 2281 userpg->time_offset = offset - now;
2279 2282
2280 /* 2283 /*
2281 * cap_user_time_zero doesn't make sense when we're using a different 2284 * cap_user_time_zero doesn't make sense when we're using a different
@@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event,
2283 */ 2286 */
2284 if (!event->attr.use_clockid) { 2287 if (!event->attr.use_clockid) {
2285 userpg->cap_user_time_zero = 1; 2288 userpg->cap_user_time_zero = 1;
2286 userpg->time_zero = data->cyc2ns_offset; 2289 userpg->time_zero = offset;
2287 } 2290 }
2288 2291
2289 cyc2ns_read_end(data); 2292 cyc2ns_read_end(data);
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index a04eabd43d06..27e9f9d769b8 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void);
12 12
13extern int no_timer_check; 13extern int no_timer_check;
14 14
15extern bool using_native_sched_clock(void);
16
15/* 17/*
16 * We use the full linear equation: f(x) = a + b*x, in order to allow 18 * We use the full linear equation: f(x) = a + b*x, in order to allow
17 * a continuous function in the face of dynamic freq changes. 19 * a continuous function in the face of dynamic freq changes.
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c73a7f9e881a..714dfba6a1e7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -328,7 +328,7 @@ unsigned long long sched_clock(void)
328 return paravirt_sched_clock(); 328 return paravirt_sched_clock();
329} 329}
330 330
331static inline bool using_native_sched_clock(void) 331bool using_native_sched_clock(void)
332{ 332{
333 return pv_time_ops.sched_clock == native_sched_clock; 333 return pv_time_ops.sched_clock == native_sched_clock;
334} 334}
@@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void)
336unsigned long long 336unsigned long long
337sched_clock(void) __attribute__((alias("native_sched_clock"))); 337sched_clock(void) __attribute__((alias("native_sched_clock")));
338 338
339static inline bool using_native_sched_clock(void) { return true; } 339bool using_native_sched_clock(void) { return true; }
340#endif 340#endif
341 341
342int check_tsc_unstable(void) 342int check_tsc_unstable(void)
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index c5f0fc906136..8af8d9542663 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -28,7 +28,6 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/sched/signal.h> 29#include <linux/sched/signal.h>
30#include <linux/sched/debug.h> 30#include <linux/sched/debug.h>
31#include <linux/sched/debug.h>
32#include <linux/tty.h> 31#include <linux/tty.h>
33#include <linux/tty_flip.h> 32#include <linux/tty_flip.h>
34#include <linux/mm.h> 33#include <linux/mm.h>
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 4a68c6791207..34fe92ce1ebd 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -54,15 +54,16 @@ static inline u64 local_clock(void)
54} 54}
55#else 55#else
56extern void sched_clock_init_late(void); 56extern void sched_clock_init_late(void);
57/*
58 * Architectures can set this to 1 if they have specified
59 * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
60 * but then during bootup it turns out that sched_clock()
61 * is reliable after all:
62 */
63extern int sched_clock_stable(void); 57extern int sched_clock_stable(void);
64extern void clear_sched_clock_stable(void); 58extern void clear_sched_clock_stable(void);
65 59
60/*
61 * When sched_clock_stable(), __sched_clock_offset provides the offset
62 * between local_clock() and sched_clock().
63 */
64extern u64 __sched_clock_offset;
65
66
66extern void sched_clock_tick(void); 67extern void sched_clock_tick(void);
67extern void sched_clock_idle_sleep_event(void); 68extern void sched_clock_idle_sleep_event(void);
68extern void sched_clock_idle_wakeup_event(u64 delta_ns); 69extern void sched_clock_idle_wakeup_event(u64 delta_ns);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index a08795e21628..00a45c45beca 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
96static int __sched_clock_stable_early = 1; 96static int __sched_clock_stable_early = 1;
97 97
98/* 98/*
99 * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset 99 * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
100 */ 100 */
101static __read_mostly u64 raw_offset; 101__read_mostly u64 __sched_clock_offset;
102static __read_mostly u64 gtod_offset; 102static __read_mostly u64 __gtod_offset;
103 103
104struct sched_clock_data { 104struct sched_clock_data {
105 u64 tick_raw; 105 u64 tick_raw;
@@ -131,17 +131,24 @@ static void __set_sched_clock_stable(void)
131 /* 131 /*
132 * Attempt to make the (initial) unstable->stable transition continuous. 132 * Attempt to make the (initial) unstable->stable transition continuous.
133 */ 133 */
134 raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); 134 __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
135 135
136 printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", 136 printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
137 scd->tick_gtod, gtod_offset, 137 scd->tick_gtod, __gtod_offset,
138 scd->tick_raw, raw_offset); 138 scd->tick_raw, __sched_clock_offset);
139 139
140 static_branch_enable(&__sched_clock_stable); 140 static_branch_enable(&__sched_clock_stable);
141 tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); 141 tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
142} 142}
143 143
144static void __clear_sched_clock_stable(struct work_struct *work) 144static void __sched_clock_work(struct work_struct *work)
145{
146 static_branch_disable(&__sched_clock_stable);
147}
148
149static DECLARE_WORK(sched_clock_work, __sched_clock_work);
150
151static void __clear_sched_clock_stable(void)
145{ 152{
146 struct sched_clock_data *scd = this_scd(); 153 struct sched_clock_data *scd = this_scd();
147 154
@@ -154,17 +161,17 @@ static void __clear_sched_clock_stable(struct work_struct *work)
154 * 161 *
155 * Still do what we can. 162 * Still do what we can.
156 */ 163 */
157 gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); 164 __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
158 165
159 printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", 166 printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
160 scd->tick_gtod, gtod_offset, 167 scd->tick_gtod, __gtod_offset,
161 scd->tick_raw, raw_offset); 168 scd->tick_raw, __sched_clock_offset);
162 169
163 static_branch_disable(&__sched_clock_stable);
164 tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); 170 tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
165}
166 171
167static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); 172 if (sched_clock_stable())
173 schedule_work(&sched_clock_work);
174}
168 175
169void clear_sched_clock_stable(void) 176void clear_sched_clock_stable(void)
170{ 177{
@@ -173,7 +180,7 @@ void clear_sched_clock_stable(void)
173 smp_mb(); /* matches sched_clock_init_late() */ 180 smp_mb(); /* matches sched_clock_init_late() */
174 181
175 if (sched_clock_running == 2) 182 if (sched_clock_running == 2)
176 schedule_work(&sched_clock_work); 183 __clear_sched_clock_stable();
177} 184}
178 185
179void sched_clock_init_late(void) 186void sched_clock_init_late(void)
@@ -214,7 +221,7 @@ static inline u64 wrap_max(u64 x, u64 y)
214 */ 221 */
215static u64 sched_clock_local(struct sched_clock_data *scd) 222static u64 sched_clock_local(struct sched_clock_data *scd)
216{ 223{
217 u64 now, clock, old_clock, min_clock, max_clock; 224 u64 now, clock, old_clock, min_clock, max_clock, gtod;
218 s64 delta; 225 s64 delta;
219 226
220again: 227again:
@@ -231,9 +238,10 @@ again:
231 * scd->tick_gtod + TICK_NSEC); 238 * scd->tick_gtod + TICK_NSEC);
232 */ 239 */
233 240
234 clock = scd->tick_gtod + gtod_offset + delta; 241 gtod = scd->tick_gtod + __gtod_offset;
235 min_clock = wrap_max(scd->tick_gtod, old_clock); 242 clock = gtod + delta;
236 max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); 243 min_clock = wrap_max(gtod, old_clock);
244 max_clock = wrap_max(old_clock, gtod + TICK_NSEC);
237 245
238 clock = wrap_max(clock, min_clock); 246 clock = wrap_max(clock, min_clock);
239 clock = wrap_min(clock, max_clock); 247 clock = wrap_min(clock, max_clock);
@@ -317,7 +325,7 @@ u64 sched_clock_cpu(int cpu)
317 u64 clock; 325 u64 clock;
318 326
319 if (sched_clock_stable()) 327 if (sched_clock_stable())
320 return sched_clock() + raw_offset; 328 return sched_clock() + __sched_clock_offset;
321 329
322 if (unlikely(!sched_clock_running)) 330 if (unlikely(!sched_clock_running))
323 return 0ull; 331 return 0ull;