diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2018-09-17 08:45:43 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2018-10-04 17:00:27 -0400 |
commit | 3e89bf35ebf59c12e8c1476f6681fae0ebdcb2a7 (patch) | |
tree | dac2e3ce9c6adcb4bb627e2a7331b31aa24b3411 | |
parent | 4f72adc5068294268387a81a6bf91d9bb07ecc5c (diff) |
x86/vdso: Move cycle_last handling into the caller
Dereferencing gtod->cycle_last all over the place and foing the cycles <
last comparison in the vclock read functions generates horrible code. Doing
it at the call site is much better and gains a few cycles both for TSC and
pvclock.
Caveat: This adds the comparison to the hyperv vclock as well, but I have
no way to test that.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Matt Rickard <matt@softrans.com.au>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: devel@linuxdriverproject.org
Cc: virtualization@lists.linux-foundation.org
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Juergen Gross <jgross@suse.com>
Link: https://lkml.kernel.org/r/20180917130707.741440803@linutronix.de
-rw-r--r-- | arch/x86/entry/vdso/vclock_gettime.c | 39 |
1 files changed, 7 insertions, 32 deletions
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 40105024a210..b7ccbff26a3f 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c | |||
@@ -77,9 +77,8 @@ static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) | |||
77 | static notrace u64 vread_pvclock(void) | 77 | static notrace u64 vread_pvclock(void) |
78 | { | 78 | { |
79 | const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; | 79 | const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; |
80 | u64 ret; | ||
81 | u64 last; | ||
82 | u32 version; | 80 | u32 version; |
81 | u64 ret; | ||
83 | 82 | ||
84 | /* | 83 | /* |
85 | * Note: The kernel and hypervisor must guarantee that cpu ID | 84 | * Note: The kernel and hypervisor must guarantee that cpu ID |
@@ -112,13 +111,7 @@ static notrace u64 vread_pvclock(void) | |||
112 | ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); | 111 | ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); |
113 | } while (pvclock_read_retry(pvti, version)); | 112 | } while (pvclock_read_retry(pvti, version)); |
114 | 113 | ||
115 | /* refer to vread_tsc() comment for rationale */ | 114 | return ret; |
116 | last = gtod->cycle_last; | ||
117 | |||
118 | if (likely(ret >= last)) | ||
119 | return ret; | ||
120 | |||
121 | return last; | ||
122 | } | 115 | } |
123 | #endif | 116 | #endif |
124 | #ifdef CONFIG_HYPERV_TSCPAGE | 117 | #ifdef CONFIG_HYPERV_TSCPAGE |
@@ -131,30 +124,10 @@ static notrace u64 vread_hvclock(void) | |||
131 | } | 124 | } |
132 | #endif | 125 | #endif |
133 | 126 | ||
134 | notrace static u64 vread_tsc(void) | ||
135 | { | ||
136 | u64 ret = (u64)rdtsc_ordered(); | ||
137 | u64 last = gtod->cycle_last; | ||
138 | |||
139 | if (likely(ret >= last)) | ||
140 | return ret; | ||
141 | |||
142 | /* | ||
143 | * GCC likes to generate cmov here, but this branch is extremely | ||
144 | * predictable (it's just a function of time and the likely is | ||
145 | * very likely) and there's a data dependence, so force GCC | ||
146 | * to generate a branch instead. I don't barrier() because | ||
147 | * we don't actually need a barrier, and if this function | ||
148 | * ever gets inlined it will generate worse code. | ||
149 | */ | ||
150 | asm volatile (""); | ||
151 | return last; | ||
152 | } | ||
153 | |||
154 | notrace static inline u64 vgetcyc(int mode) | 127 | notrace static inline u64 vgetcyc(int mode) |
155 | { | 128 | { |
156 | if (mode == VCLOCK_TSC) | 129 | if (mode == VCLOCK_TSC) |
157 | return vread_tsc(); | 130 | return (u64)rdtsc_ordered(); |
158 | #ifdef CONFIG_PARAVIRT_CLOCK | 131 | #ifdef CONFIG_PARAVIRT_CLOCK |
159 | else if (mode == VCLOCK_PVCLOCK) | 132 | else if (mode == VCLOCK_PVCLOCK) |
160 | return vread_pvclock(); | 133 | return vread_pvclock(); |
@@ -169,17 +142,19 @@ notrace static inline u64 vgetcyc(int mode) | |||
169 | notrace static int do_hres(clockid_t clk, struct timespec *ts) | 142 | notrace static int do_hres(clockid_t clk, struct timespec *ts) |
170 | { | 143 | { |
171 | struct vgtod_ts *base = >od->basetime[clk]; | 144 | struct vgtod_ts *base = >od->basetime[clk]; |
145 | u64 cycles, last, ns; | ||
172 | unsigned int seq; | 146 | unsigned int seq; |
173 | u64 cycles, ns; | ||
174 | 147 | ||
175 | do { | 148 | do { |
176 | seq = gtod_read_begin(gtod); | 149 | seq = gtod_read_begin(gtod); |
177 | ts->tv_sec = base->sec; | 150 | ts->tv_sec = base->sec; |
178 | ns = base->nsec; | 151 | ns = base->nsec; |
152 | last = gtod->cycle_last; | ||
179 | cycles = vgetcyc(gtod->vclock_mode); | 153 | cycles = vgetcyc(gtod->vclock_mode); |
180 | if (unlikely((s64)cycles < 0)) | 154 | if (unlikely((s64)cycles < 0)) |
181 | return vdso_fallback_gettime(clk, ts); | 155 | return vdso_fallback_gettime(clk, ts); |
182 | ns += (cycles - gtod->cycle_last) * gtod->mult; | 156 | if (cycles > last) |
157 | ns += (cycles - last) * gtod->mult; | ||
183 | ns >>= gtod->shift; | 158 | ns >>= gtod->shift; |
184 | } while (unlikely(gtod_read_retry(gtod, seq))); | 159 | } while (unlikely(gtod_read_retry(gtod, seq))); |
185 | 160 | ||