diff options
author | Marcelo Tosatti <mtosatti@redhat.com> | 2012-11-27 20:28:57 -0500 |
---|---|---|
committer | Marcelo Tosatti <mtosatti@redhat.com> | 2012-11-27 20:29:11 -0500 |
commit | 51c19b4f5927f5a646e93d69f73c7e89ea14e737 (patch) | |
tree | a48486f72dc433fd516684d4441022650429d333 /arch/x86 | |
parent | 3dc4f7cfb7441e5e0fed3a02fc81cdaabd28300a (diff) |
x86: vdso: pvclock gettime support
Improve performance of time system calls when using Linux pvclock,
by reading time info from fixmap visible copy of pvclock data.
Originally from Jeremy Fitzhardinge.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/vsyscall.h | 20 | ||||
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 81 | ||||
-rw-r--r-- | arch/x86/vdso/vgetcpu.c | 11 |
3 files changed, 101 insertions, 11 deletions
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index eaea1d31f753..80f80955cfd8 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h | |||
@@ -33,6 +33,26 @@ extern void map_vsyscall(void); | |||
33 | */ | 33 | */ |
34 | extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); | 34 | extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); |
35 | 35 | ||
36 | #ifdef CONFIG_X86_64 | ||
37 | |||
38 | #define VGETCPU_CPU_MASK 0xfff | ||
39 | |||
40 | static inline unsigned int __getcpu(void) | ||
41 | { | ||
42 | unsigned int p; | ||
43 | |||
44 | if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { | ||
45 | /* Load per CPU data from RDTSCP */ | ||
46 | native_read_tscp(&p); | ||
47 | } else { | ||
48 | /* Load per CPU data from GDT */ | ||
49 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
50 | } | ||
51 | |||
52 | return p; | ||
53 | } | ||
54 | #endif /* CONFIG_X86_64 */ | ||
55 | |||
36 | #endif /* __KERNEL__ */ | 56 | #endif /* __KERNEL__ */ |
37 | 57 | ||
38 | #endif /* _ASM_X86_VSYSCALL_H */ | 58 | #endif /* _ASM_X86_VSYSCALL_H */ |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 4df6c373421a..205ad328aa52 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
23 | #include <asm/unistd.h> | 23 | #include <asm/unistd.h> |
24 | #include <asm/io.h> | 24 | #include <asm/io.h> |
25 | #include <asm/pvclock.h> | ||
25 | 26 | ||
26 | #define gtod (&VVAR(vsyscall_gtod_data)) | 27 | #define gtod (&VVAR(vsyscall_gtod_data)) |
27 | 28 | ||
@@ -62,6 +63,76 @@ static notrace cycle_t vread_hpet(void) | |||
62 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); | 63 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); |
63 | } | 64 | } |
64 | 65 | ||
66 | #ifdef CONFIG_PARAVIRT_CLOCK | ||
67 | |||
68 | static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) | ||
69 | { | ||
70 | const struct pvclock_vsyscall_time_info *pvti_base; | ||
71 | int idx = cpu / (PAGE_SIZE/PVTI_SIZE); | ||
72 | int offset = cpu % (PAGE_SIZE/PVTI_SIZE); | ||
73 | |||
74 | BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); | ||
75 | |||
76 | pvti_base = (struct pvclock_vsyscall_time_info *) | ||
77 | __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); | ||
78 | |||
79 | return &pvti_base[offset]; | ||
80 | } | ||
81 | |||
82 | static notrace cycle_t vread_pvclock(int *mode) | ||
83 | { | ||
84 | const struct pvclock_vsyscall_time_info *pvti; | ||
85 | cycle_t ret; | ||
86 | u64 last; | ||
87 | u32 version; | ||
88 | u32 migrate_count; | ||
89 | u8 flags; | ||
90 | unsigned cpu, cpu1; | ||
91 | |||
92 | |||
93 | /* | ||
94 | * When looping to get a consistent (time-info, tsc) pair, we | ||
95 | * also need to deal with the possibility we can switch vcpus, | ||
96 | * so make sure we always re-fetch time-info for the current vcpu. | ||
97 | */ | ||
98 | do { | ||
99 | cpu = __getcpu() & VGETCPU_CPU_MASK; | ||
100 | /* TODO: We can put vcpu id into higher bits of pvti.version. | ||
101 | * This will save a couple of cycles by getting rid of | ||
102 | * __getcpu() calls (Gleb). | ||
103 | */ | ||
104 | |||
105 | pvti = get_pvti(cpu); | ||
106 | |||
107 | migrate_count = pvti->migrate_count; | ||
108 | |||
109 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | ||
110 | |||
111 | /* | ||
112 | * Test we're still on the cpu as well as the version. | ||
113 | * We could have been migrated just after the first | ||
114 | * vgetcpu but before fetching the version, so we | ||
115 | * wouldn't notice a version change. | ||
116 | */ | ||
117 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; | ||
118 | } while (unlikely(cpu != cpu1 || | ||
119 | (pvti->pvti.version & 1) || | ||
120 | pvti->pvti.version != version || | ||
121 | pvti->migrate_count != migrate_count)); | ||
122 | |||
123 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | ||
124 | *mode = VCLOCK_NONE; | ||
125 | |||
126 | /* refer to tsc.c read_tsc() comment for rationale */ | ||
127 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
128 | |||
129 | if (likely(ret >= last)) | ||
130 | return ret; | ||
131 | |||
132 | return last; | ||
133 | } | ||
134 | #endif | ||
135 | |||
65 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | 136 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
66 | { | 137 | { |
67 | long ret; | 138 | long ret; |
@@ -80,7 +151,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) | |||
80 | } | 151 | } |
81 | 152 | ||
82 | 153 | ||
83 | notrace static inline u64 vgetsns(void) | 154 | notrace static inline u64 vgetsns(int *mode) |
84 | { | 155 | { |
85 | long v; | 156 | long v; |
86 | cycles_t cycles; | 157 | cycles_t cycles; |
@@ -88,6 +159,10 @@ notrace static inline u64 vgetsns(void) | |||
88 | cycles = vread_tsc(); | 159 | cycles = vread_tsc(); |
89 | else if (gtod->clock.vclock_mode == VCLOCK_HPET) | 160 | else if (gtod->clock.vclock_mode == VCLOCK_HPET) |
90 | cycles = vread_hpet(); | 161 | cycles = vread_hpet(); |
162 | #ifdef CONFIG_PARAVIRT_CLOCK | ||
163 | else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) | ||
164 | cycles = vread_pvclock(mode); | ||
165 | #endif | ||
91 | else | 166 | else |
92 | return 0; | 167 | return 0; |
93 | v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; | 168 | v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; |
@@ -107,7 +182,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts) | |||
107 | mode = gtod->clock.vclock_mode; | 182 | mode = gtod->clock.vclock_mode; |
108 | ts->tv_sec = gtod->wall_time_sec; | 183 | ts->tv_sec = gtod->wall_time_sec; |
109 | ns = gtod->wall_time_snsec; | 184 | ns = gtod->wall_time_snsec; |
110 | ns += vgetsns(); | 185 | ns += vgetsns(&mode); |
111 | ns >>= gtod->clock.shift; | 186 | ns >>= gtod->clock.shift; |
112 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 187 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); |
113 | 188 | ||
@@ -127,7 +202,7 @@ notrace static int do_monotonic(struct timespec *ts) | |||
127 | mode = gtod->clock.vclock_mode; | 202 | mode = gtod->clock.vclock_mode; |
128 | ts->tv_sec = gtod->monotonic_time_sec; | 203 | ts->tv_sec = gtod->monotonic_time_sec; |
129 | ns = gtod->monotonic_time_snsec; | 204 | ns = gtod->monotonic_time_snsec; |
130 | ns += vgetsns(); | 205 | ns += vgetsns(&mode); |
131 | ns >>= gtod->clock.shift; | 206 | ns >>= gtod->clock.shift; |
132 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 207 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); |
133 | timespec_add_ns(ts, ns); | 208 | timespec_add_ns(ts, ns); |
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index 5463ad558573..2f94b039e55b 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c | |||
@@ -17,15 +17,10 @@ __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) | |||
17 | { | 17 | { |
18 | unsigned int p; | 18 | unsigned int p; |
19 | 19 | ||
20 | if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { | 20 | p = __getcpu(); |
21 | /* Load per CPU data from RDTSCP */ | 21 | |
22 | native_read_tscp(&p); | ||
23 | } else { | ||
24 | /* Load per CPU data from GDT */ | ||
25 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
26 | } | ||
27 | if (cpu) | 22 | if (cpu) |
28 | *cpu = p & 0xfff; | 23 | *cpu = p & VGETCPU_CPU_MASK; |
29 | if (node) | 24 | if (node) |
30 | *node = p >> 12; | 25 | *node = p >> 12; |
31 | return 0; | 26 | return 0; |