aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/kernel/pvclock.c44
-rw-r--r--arch/x86/vdso/vclock_gettime.c34
-rw-r--r--include/linux/sched.h8
-rw-r--r--kernel/sched/core.c15
5 files changed, 15 insertions, 87 deletions
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 25b1cc07d496..d6b078e9fa28 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
95 95
96struct pvclock_vsyscall_time_info { 96struct pvclock_vsyscall_time_info {
97 struct pvclock_vcpu_time_info pvti; 97 struct pvclock_vcpu_time_info pvti;
98 u32 migrate_count;
99} __attribute__((__aligned__(SMP_CACHE_BYTES))); 98} __attribute__((__aligned__(SMP_CACHE_BYTES)));
100 99
101#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) 100#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index e5ecd20e72dd..2f355d229a58 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
141 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 141 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
142} 142}
143 143
144static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
145
146static struct pvclock_vsyscall_time_info *
147pvclock_get_vsyscall_user_time_info(int cpu)
148{
149 if (!pvclock_vdso_info) {
150 BUG();
151 return NULL;
152 }
153
154 return &pvclock_vdso_info[cpu];
155}
156
157struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
158{
159 return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
160}
161
162#ifdef CONFIG_X86_64 144#ifdef CONFIG_X86_64
163static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
164 void *v)
165{
166 struct task_migration_notifier *mn = v;
167 struct pvclock_vsyscall_time_info *pvti;
168
169 pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
170
171 /* this is NULL when pvclock vsyscall is not initialized */
172 if (unlikely(pvti == NULL))
173 return NOTIFY_DONE;
174
175 pvti->migrate_count++;
176
177 return NOTIFY_DONE;
178}
179
180static struct notifier_block pvclock_migrate = {
181 .notifier_call = pvclock_task_migrate,
182};
183
184/* 145/*
185 * Initialize the generic pvclock vsyscall state. This will allocate 146 * Initialize the generic pvclock vsyscall state. This will allocate
186 * a/some page(s) for the per-vcpu pvclock information, set up a 147 * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
194 155
195 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); 156 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
196 157
197 pvclock_vdso_info = i;
198
199 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { 158 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
200 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, 159 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
201 __pa(i) + (idx*PAGE_SIZE), 160 __pa(i) + (idx*PAGE_SIZE),
202 PAGE_KERNEL_VVAR); 161 PAGE_KERNEL_VVAR);
203 } 162 }
204 163
205
206 register_task_migration_notifier(&pvclock_migrate);
207
208 return 0; 164 return 0;
209} 165}
210#endif 166#endif
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 40d2473836c9..9793322751e0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode)
82 cycle_t ret; 82 cycle_t ret;
83 u64 last; 83 u64 last;
84 u32 version; 84 u32 version;
85 u32 migrate_count;
86 u8 flags; 85 u8 flags;
87 unsigned cpu, cpu1; 86 unsigned cpu, cpu1;
88 87
89 88
90 /* 89 /*
91 * When looping to get a consistent (time-info, tsc) pair, we 90 * Note: hypervisor must guarantee that:
92 * also need to deal with the possibility we can switch vcpus, 91 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
93 * so make sure we always re-fetch time-info for the current vcpu. 92 * 2. that per-CPU pvclock time info is updated if the
93 * underlying CPU changes.
94 * 3. that version is increased whenever underlying CPU
95 * changes.
96 *
94 */ 97 */
95 do { 98 do {
96 cpu = __getcpu() & VGETCPU_CPU_MASK; 99 cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode)
99 * __getcpu() calls (Gleb). 102 * __getcpu() calls (Gleb).
100 */ 103 */
101 104
102 /* Make sure migrate_count will change if we leave the VCPU. */ 105 pvti = get_pvti(cpu);
103 do {
104 pvti = get_pvti(cpu);
105 migrate_count = pvti->migrate_count;
106
107 cpu1 = cpu;
108 cpu = __getcpu() & VGETCPU_CPU_MASK;
109 } while (unlikely(cpu != cpu1));
110 106
111 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); 107 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
112 108
113 /* 109 /*
114 * Test we're still on the cpu as well as the version. 110 * Test we're still on the cpu as well as the version.
115 * - We must read TSC of pvti's VCPU. 111 * We could have been migrated just after the first
116 * - KVM doesn't follow the versioning protocol, so data could 112 * vgetcpu but before fetching the version, so we
117 * change before version if we left the VCPU. 113 * wouldn't notice a version change.
118 */ 114 */
119 smp_rmb(); 115 cpu1 = __getcpu() & VGETCPU_CPU_MASK;
120 } while (unlikely((pvti->pvti.version & 1) || 116 } while (unlikely(cpu != cpu1 ||
121 pvti->pvti.version != version || 117 (pvti->pvti.version & 1) ||
122 pvti->migrate_count != migrate_count)); 118 pvti->pvti.version != version));
123 119
124 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) 120 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
125 *mode = VCLOCK_NONE; 121 *mode = VCLOCK_NONE;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8222ae40ecb0..26a2e6122734 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -175,14 +175,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
175extern void calc_global_load(unsigned long ticks); 175extern void calc_global_load(unsigned long ticks);
176extern void update_cpu_load_nohz(void); 176extern void update_cpu_load_nohz(void);
177 177
178/* Notifier for when a task gets migrated to a new CPU */
179struct task_migration_notifier {
180 struct task_struct *task;
181 int from_cpu;
182 int to_cpu;
183};
184extern void register_task_migration_notifier(struct notifier_block *n);
185
186extern unsigned long get_parent_ip(unsigned long addr); 178extern unsigned long get_parent_ip(unsigned long addr);
187 179
188extern void dump_cpu_task(int cpu); 180extern void dump_cpu_task(int cpu);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f9123a82cbb6..fe22f7510bce 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
1016 rq_clock_skip_update(rq, true); 1016 rq_clock_skip_update(rq, true);
1017} 1017}
1018 1018
1019static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
1020
1021void register_task_migration_notifier(struct notifier_block *n)
1022{
1023 atomic_notifier_chain_register(&task_migration_notifier, n);
1024}
1025
1026#ifdef CONFIG_SMP 1019#ifdef CONFIG_SMP
1027void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 1020void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1028{ 1021{
@@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1053 trace_sched_migrate_task(p, new_cpu); 1046 trace_sched_migrate_task(p, new_cpu);
1054 1047
1055 if (task_cpu(p) != new_cpu) { 1048 if (task_cpu(p) != new_cpu) {
1056 struct task_migration_notifier tmn;
1057
1058 if (p->sched_class->migrate_task_rq) 1049 if (p->sched_class->migrate_task_rq)
1059 p->sched_class->migrate_task_rq(p, new_cpu); 1050 p->sched_class->migrate_task_rq(p, new_cpu);
1060 p->se.nr_migrations++; 1051 p->se.nr_migrations++;
1061 perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); 1052 perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
1062
1063 tmn.task = p;
1064 tmn.from_cpu = task_cpu(p);
1065 tmn.to_cpu = new_cpu;
1066
1067 atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
1068 } 1053 }
1069 1054
1070 __set_task_cpu(p, new_cpu); 1055 __set_task_cpu(p, new_cpu);