aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2013-07-10 21:21:57 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2013-07-18 06:29:30 -0400
commite04c5d76b0cfb66cadd900cf147526f2271884b8 (patch)
treea303b66c1fc1ae7786148d2f50e8b0b86290162b /arch/x86
parentb3897a49e22fc173efa77527a447c714f753f681 (diff)
remove sched notifier for cross-cpu migrations
Linux as a guest on KVM hypervisor, the only user of the pvclock vsyscall interface, does not require notification on task migration because: 1. cpu ID number maps 1:1 to per-CPU pvclock time info. 2. per-CPU pvclock time info is updated if the underlying CPU changes. 3. that version is increased whenever underlying CPU changes. Which is sufficient to guarantee nanoseconds counter is calculated properly. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Gleb Natapov <gleb@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/kernel/pvclock.c44
-rw-r--r--arch/x86/vdso/vclock_gettime.c16
3 files changed, 8 insertions, 53 deletions
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 109a9dd5d454..be8269b00e2a 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -93,7 +93,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
93 93
94struct pvclock_vsyscall_time_info { 94struct pvclock_vsyscall_time_info {
95 struct pvclock_vcpu_time_info pvti; 95 struct pvclock_vcpu_time_info pvti;
96 u32 migrate_count;
97} __attribute__((__aligned__(SMP_CACHE_BYTES))); 96} __attribute__((__aligned__(SMP_CACHE_BYTES)));
98 97
99#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) 98#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2cb9470ea85b..a16bae3f83b3 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -128,46 +128,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
128 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 128 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
129} 129}
130 130
131static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
132
133static struct pvclock_vsyscall_time_info *
134pvclock_get_vsyscall_user_time_info(int cpu)
135{
136 if (!pvclock_vdso_info) {
137 BUG();
138 return NULL;
139 }
140
141 return &pvclock_vdso_info[cpu];
142}
143
144struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
145{
146 return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
147}
148
149#ifdef CONFIG_X86_64 131#ifdef CONFIG_X86_64
150static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
151 void *v)
152{
153 struct task_migration_notifier *mn = v;
154 struct pvclock_vsyscall_time_info *pvti;
155
156 pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
157
158 /* this is NULL when pvclock vsyscall is not initialized */
159 if (unlikely(pvti == NULL))
160 return NOTIFY_DONE;
161
162 pvti->migrate_count++;
163
164 return NOTIFY_DONE;
165}
166
167static struct notifier_block pvclock_migrate = {
168 .notifier_call = pvclock_task_migrate,
169};
170
171/* 132/*
172 * Initialize the generic pvclock vsyscall state. This will allocate 133 * Initialize the generic pvclock vsyscall state. This will allocate
173 * a/some page(s) for the per-vcpu pvclock information, set up a 134 * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -181,17 +142,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
181 142
182 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); 143 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
183 144
184 pvclock_vdso_info = i;
185
186 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { 145 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
187 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, 146 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
188 __pa(i) + (idx*PAGE_SIZE), 147 __pa(i) + (idx*PAGE_SIZE),
189 PAGE_KERNEL_VVAR); 148 PAGE_KERNEL_VVAR);
190 } 149 }
191 150
192
193 register_task_migration_notifier(&pvclock_migrate);
194
195 return 0; 151 return 0;
196} 152}
197#endif 153#endif
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index c74436e687bf..72074d528400 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -85,15 +85,18 @@ static notrace cycle_t vread_pvclock(int *mode)
85 cycle_t ret; 85 cycle_t ret;
86 u64 last; 86 u64 last;
87 u32 version; 87 u32 version;
88 u32 migrate_count;
89 u8 flags; 88 u8 flags;
90 unsigned cpu, cpu1; 89 unsigned cpu, cpu1;
91 90
92 91
93 /* 92 /*
94 * When looping to get a consistent (time-info, tsc) pair, we 93 * Note: hypervisor must guarantee that:
95 * also need to deal with the possibility we can switch vcpus, 94 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
96 * so make sure we always re-fetch time-info for the current vcpu. 95 * 2. that per-CPU pvclock time info is updated if the
96 * underlying CPU changes.
97 * 3. that version is increased whenever underlying CPU
98 * changes.
99 *
97 */ 100 */
98 do { 101 do {
99 cpu = __getcpu() & VGETCPU_CPU_MASK; 102 cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -104,8 +107,6 @@ static notrace cycle_t vread_pvclock(int *mode)
104 107
105 pvti = get_pvti(cpu); 108 pvti = get_pvti(cpu);
106 109
107 migrate_count = pvti->migrate_count;
108
109 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); 110 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
110 111
111 /* 112 /*
@@ -117,8 +118,7 @@ static notrace cycle_t vread_pvclock(int *mode)
117 cpu1 = __getcpu() & VGETCPU_CPU_MASK; 118 cpu1 = __getcpu() & VGETCPU_CPU_MASK;
118 } while (unlikely(cpu != cpu1 || 119 } while (unlikely(cpu != cpu1 ||
119 (pvti->pvti.version & 1) || 120 (pvti->pvti.version & 1) ||
120 pvti->pvti.version != version || 121 pvti->pvti.version != version));
121 pvti->migrate_count != migrate_count));
122 122
123 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) 123 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
124 *mode = VCLOCK_NONE; 124 *mode = VCLOCK_NONE;