aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2015-03-23 19:21:51 -0400
committerMarcelo Tosatti <mtosatti@redhat.com>2015-03-23 19:22:48 -0400
commit0a4e6be9ca17c54817cf814b4b5aa60478c6df27 (patch)
tree461ed6cace26da5c75d1c01982fde2f3ccc5d4fa /arch/x86
parent58d2930f4ee335ab703d768cb0318331fc1bb62c (diff)
x86: kvm: Revert "remove sched notifier for cross-cpu migrations"
The following point: 2. per-CPU pvclock time info is updated if the underlying CPU changes. Is not true anymore since "KVM: x86: update pvclock area conditionally, on cpu migration". Add task migration notification back. Problem noticed by Andy Lutomirski. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> CC: stable@kernel.org # 3.11+
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/kernel/pvclock.c44
-rw-r--r--arch/x86/vdso/vclock_gettime.c16
3 files changed, 53 insertions, 8 deletions
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e9fa28..25b1cc07d496 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
95 95
96struct pvclock_vsyscall_time_info { 96struct pvclock_vsyscall_time_info {
97 struct pvclock_vcpu_time_info pvti; 97 struct pvclock_vcpu_time_info pvti;
98 u32 migrate_count;
98} __attribute__((__aligned__(SMP_CACHE_BYTES))); 99} __attribute__((__aligned__(SMP_CACHE_BYTES)));
99 100
100#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) 101#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2f355d229a58..e5ecd20e72dd 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
141 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 141 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
142} 142}
143 143
144static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
145
146static struct pvclock_vsyscall_time_info *
147pvclock_get_vsyscall_user_time_info(int cpu)
148{
149 if (!pvclock_vdso_info) {
150 BUG();
151 return NULL;
152 }
153
154 return &pvclock_vdso_info[cpu];
155}
156
157struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
158{
159 return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
160}
161
144#ifdef CONFIG_X86_64 162#ifdef CONFIG_X86_64
163static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
164 void *v)
165{
166 struct task_migration_notifier *mn = v;
167 struct pvclock_vsyscall_time_info *pvti;
168
169 pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
170
171 /* this is NULL when pvclock vsyscall is not initialized */
172 if (unlikely(pvti == NULL))
173 return NOTIFY_DONE;
174
175 pvti->migrate_count++;
176
177 return NOTIFY_DONE;
178}
179
180static struct notifier_block pvclock_migrate = {
181 .notifier_call = pvclock_task_migrate,
182};
183
145/* 184/*
146 * Initialize the generic pvclock vsyscall state. This will allocate 185 * Initialize the generic pvclock vsyscall state. This will allocate
147 * a/some page(s) for the per-vcpu pvclock information, set up a 186 * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
155 194
156 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); 195 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
157 196
197 pvclock_vdso_info = i;
198
158 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { 199 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
159 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, 200 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
160 __pa(i) + (idx*PAGE_SIZE), 201 __pa(i) + (idx*PAGE_SIZE),
161 PAGE_KERNEL_VVAR); 202 PAGE_KERNEL_VVAR);
162 } 203 }
163 204
205
206 register_task_migration_notifier(&pvclock_migrate);
207
164 return 0; 208 return 0;
165} 209}
166#endif 210#endif
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 9793322751e0..30933760ee5f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode)
82 cycle_t ret; 82 cycle_t ret;
83 u64 last; 83 u64 last;
84 u32 version; 84 u32 version;
85 u32 migrate_count;
85 u8 flags; 86 u8 flags;
86 unsigned cpu, cpu1; 87 unsigned cpu, cpu1;
87 88
88 89
89 /* 90 /*
90 * Note: hypervisor must guarantee that: 91 * When looping to get a consistent (time-info, tsc) pair, we
91 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. 92 * also need to deal with the possibility we can switch vcpus,
92 * 2. that per-CPU pvclock time info is updated if the 93 * so make sure we always re-fetch time-info for the current vcpu.
93 * underlying CPU changes.
94 * 3. that version is increased whenever underlying CPU
95 * changes.
96 *
97 */ 94 */
98 do { 95 do {
99 cpu = __getcpu() & VGETCPU_CPU_MASK; 96 cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -104,6 +101,8 @@ static notrace cycle_t vread_pvclock(int *mode)
104 101
105 pvti = get_pvti(cpu); 102 pvti = get_pvti(cpu);
106 103
104 migrate_count = pvti->migrate_count;
105
107 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); 106 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
108 107
109 /* 108 /*
@@ -115,7 +114,8 @@ static notrace cycle_t vread_pvclock(int *mode)
115 cpu1 = __getcpu() & VGETCPU_CPU_MASK; 114 cpu1 = __getcpu() & VGETCPU_CPU_MASK;
116 } while (unlikely(cpu != cpu1 || 115 } while (unlikely(cpu != cpu1 ||
117 (pvti->pvti.version & 1) || 116 (pvti->pvti.version & 1) ||
118 pvti->pvti.version != version)); 117 pvti->pvti.version != version ||
118 pvti->migrate_count != migrate_count));
119 119
120 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) 120 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
121 *mode = VCLOCK_NONE; 121 *mode = VCLOCK_NONE;