diff options
author | Marcelo Tosatti <mtosatti@redhat.com> | 2015-03-23 19:21:51 -0400 |
---|---|---|
committer | Marcelo Tosatti <mtosatti@redhat.com> | 2015-03-23 19:22:48 -0400 |
commit | 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 (patch) | |
tree | 461ed6cace26da5c75d1c01982fde2f3ccc5d4fa /arch/x86 | |
parent | 58d2930f4ee335ab703d768cb0318331fc1bb62c (diff) |
x86: kvm: Revert "remove sched notifier for cross-cpu migrations"
The following point:
2. per-CPU pvclock time info is updated if the
underlying CPU changes.
Is not true anymore since "KVM: x86: update pvclock area conditionally,
on cpu migration".
Add task migration notification back.
Problem noticed by Andy Lutomirski.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
CC: stable@kernel.org # 3.11+
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/pvclock.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 44 | ||||
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 16 |
3 files changed, 53 insertions, 8 deletions
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d6b078e9fa28..25b1cc07d496 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, | |||
95 | 95 | ||
96 | struct pvclock_vsyscall_time_info { | 96 | struct pvclock_vsyscall_time_info { |
97 | struct pvclock_vcpu_time_info pvti; | 97 | struct pvclock_vcpu_time_info pvti; |
98 | u32 migrate_count; | ||
98 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 99 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
99 | 100 | ||
100 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) | 101 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2f355d229a58..e5ecd20e72dd 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | |||
141 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 141 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); |
142 | } | 142 | } |
143 | 143 | ||
144 | static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | ||
145 | |||
146 | static struct pvclock_vsyscall_time_info * | ||
147 | pvclock_get_vsyscall_user_time_info(int cpu) | ||
148 | { | ||
149 | if (!pvclock_vdso_info) { | ||
150 | BUG(); | ||
151 | return NULL; | ||
152 | } | ||
153 | |||
154 | return &pvclock_vdso_info[cpu]; | ||
155 | } | ||
156 | |||
157 | struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | ||
158 | { | ||
159 | return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | ||
160 | } | ||
161 | |||
144 | #ifdef CONFIG_X86_64 | 162 | #ifdef CONFIG_X86_64 |
163 | static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | ||
164 | void *v) | ||
165 | { | ||
166 | struct task_migration_notifier *mn = v; | ||
167 | struct pvclock_vsyscall_time_info *pvti; | ||
168 | |||
169 | pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | ||
170 | |||
171 | /* this is NULL when pvclock vsyscall is not initialized */ | ||
172 | if (unlikely(pvti == NULL)) | ||
173 | return NOTIFY_DONE; | ||
174 | |||
175 | pvti->migrate_count++; | ||
176 | |||
177 | return NOTIFY_DONE; | ||
178 | } | ||
179 | |||
180 | static struct notifier_block pvclock_migrate = { | ||
181 | .notifier_call = pvclock_task_migrate, | ||
182 | }; | ||
183 | |||
145 | /* | 184 | /* |
146 | * Initialize the generic pvclock vsyscall state. This will allocate | 185 | * Initialize the generic pvclock vsyscall state. This will allocate |
147 | * a/some page(s) for the per-vcpu pvclock information, set up a | 186 | * a/some page(s) for the per-vcpu pvclock information, set up a |
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | |||
155 | 194 | ||
156 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | 195 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); |
157 | 196 | ||
197 | pvclock_vdso_info = i; | ||
198 | |||
158 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | 199 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { |
159 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | 200 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, |
160 | __pa(i) + (idx*PAGE_SIZE), | 201 | __pa(i) + (idx*PAGE_SIZE), |
161 | PAGE_KERNEL_VVAR); | 202 | PAGE_KERNEL_VVAR); |
162 | } | 203 | } |
163 | 204 | ||
205 | |||
206 | register_task_migration_notifier(&pvclock_migrate); | ||
207 | |||
164 | return 0; | 208 | return 0; |
165 | } | 209 | } |
166 | #endif | 210 | #endif |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 9793322751e0..30933760ee5f 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
82 | cycle_t ret; | 82 | cycle_t ret; |
83 | u64 last; | 83 | u64 last; |
84 | u32 version; | 84 | u32 version; |
85 | u32 migrate_count; | ||
85 | u8 flags; | 86 | u8 flags; |
86 | unsigned cpu, cpu1; | 87 | unsigned cpu, cpu1; |
87 | 88 | ||
88 | 89 | ||
89 | /* | 90 | /* |
90 | * Note: hypervisor must guarantee that: | 91 | * When looping to get a consistent (time-info, tsc) pair, we |
91 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. | 92 | * also need to deal with the possibility we can switch vcpus, |
92 | * 2. that per-CPU pvclock time info is updated if the | 93 | * so make sure we always re-fetch time-info for the current vcpu. |
93 | * underlying CPU changes. | ||
94 | * 3. that version is increased whenever underlying CPU | ||
95 | * changes. | ||
96 | * | ||
97 | */ | 94 | */ |
98 | do { | 95 | do { |
99 | cpu = __getcpu() & VGETCPU_CPU_MASK; | 96 | cpu = __getcpu() & VGETCPU_CPU_MASK; |
@@ -104,6 +101,8 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
104 | 101 | ||
105 | pvti = get_pvti(cpu); | 102 | pvti = get_pvti(cpu); |
106 | 103 | ||
104 | migrate_count = pvti->migrate_count; | ||
105 | |||
107 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | 106 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); |
108 | 107 | ||
109 | /* | 108 | /* |
@@ -115,7 +114,8 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
115 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; | 114 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; |
116 | } while (unlikely(cpu != cpu1 || | 115 | } while (unlikely(cpu != cpu1 || |
117 | (pvti->pvti.version & 1) || | 116 | (pvti->pvti.version & 1) || |
118 | pvti->pvti.version != version)); | 117 | pvti->pvti.version != version || |
118 | pvti->migrate_count != migrate_count)); | ||
119 | 119 | ||
120 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | 120 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) |
121 | *mode = VCLOCK_NONE; | 121 | *mode = VCLOCK_NONE; |