diff options
-rw-r--r-- | arch/x86/include/asm/pvclock.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 44 | ||||
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 34 | ||||
-rw-r--r-- | include/linux/sched.h | 8 | ||||
-rw-r--r-- | kernel/sched/core.c | 15 |
5 files changed, 15 insertions, 87 deletions
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 25b1cc07d496..d6b078e9fa28 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, | |||
95 | 95 | ||
96 | struct pvclock_vsyscall_time_info { | 96 | struct pvclock_vsyscall_time_info { |
97 | struct pvclock_vcpu_time_info pvti; | 97 | struct pvclock_vcpu_time_info pvti; |
98 | u32 migrate_count; | ||
99 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 98 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
100 | 99 | ||
101 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) | 100 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index e5ecd20e72dd..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | |||
141 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 141 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); |
142 | } | 142 | } |
143 | 143 | ||
144 | static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | ||
145 | |||
146 | static struct pvclock_vsyscall_time_info * | ||
147 | pvclock_get_vsyscall_user_time_info(int cpu) | ||
148 | { | ||
149 | if (!pvclock_vdso_info) { | ||
150 | BUG(); | ||
151 | return NULL; | ||
152 | } | ||
153 | |||
154 | return &pvclock_vdso_info[cpu]; | ||
155 | } | ||
156 | |||
157 | struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | ||
158 | { | ||
159 | return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | ||
160 | } | ||
161 | |||
162 | #ifdef CONFIG_X86_64 | 144 | #ifdef CONFIG_X86_64 |
163 | static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | ||
164 | void *v) | ||
165 | { | ||
166 | struct task_migration_notifier *mn = v; | ||
167 | struct pvclock_vsyscall_time_info *pvti; | ||
168 | |||
169 | pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | ||
170 | |||
171 | /* this is NULL when pvclock vsyscall is not initialized */ | ||
172 | if (unlikely(pvti == NULL)) | ||
173 | return NOTIFY_DONE; | ||
174 | |||
175 | pvti->migrate_count++; | ||
176 | |||
177 | return NOTIFY_DONE; | ||
178 | } | ||
179 | |||
180 | static struct notifier_block pvclock_migrate = { | ||
181 | .notifier_call = pvclock_task_migrate, | ||
182 | }; | ||
183 | |||
184 | /* | 145 | /* |
185 | * Initialize the generic pvclock vsyscall state. This will allocate | 146 | * Initialize the generic pvclock vsyscall state. This will allocate |
186 | * a/some page(s) for the per-vcpu pvclock information, set up a | 147 | * a/some page(s) for the per-vcpu pvclock information, set up a |
@@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | |||
194 | 155 | ||
195 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | 156 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); |
196 | 157 | ||
197 | pvclock_vdso_info = i; | ||
198 | |||
199 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | 158 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { |
200 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | 159 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, |
201 | __pa(i) + (idx*PAGE_SIZE), | 160 | __pa(i) + (idx*PAGE_SIZE), |
202 | PAGE_KERNEL_VVAR); | 161 | PAGE_KERNEL_VVAR); |
203 | } | 162 | } |
204 | 163 | ||
205 | |||
206 | register_task_migration_notifier(&pvclock_migrate); | ||
207 | |||
208 | return 0; | 164 | return 0; |
209 | } | 165 | } |
210 | #endif | 166 | #endif |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 40d2473836c9..9793322751e0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
82 | cycle_t ret; | 82 | cycle_t ret; |
83 | u64 last; | 83 | u64 last; |
84 | u32 version; | 84 | u32 version; |
85 | u32 migrate_count; | ||
86 | u8 flags; | 85 | u8 flags; |
87 | unsigned cpu, cpu1; | 86 | unsigned cpu, cpu1; |
88 | 87 | ||
89 | 88 | ||
90 | /* | 89 | /* |
91 | * When looping to get a consistent (time-info, tsc) pair, we | 90 | * Note: hypervisor must guarantee that: |
92 | * also need to deal with the possibility we can switch vcpus, | 91 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. |
93 | * so make sure we always re-fetch time-info for the current vcpu. | 92 | * 2. that per-CPU pvclock time info is updated if the |
93 | * underlying CPU changes. | ||
94 | * 3. that version is increased whenever underlying CPU | ||
95 | * changes. | ||
96 | * | ||
94 | */ | 97 | */ |
95 | do { | 98 | do { |
96 | cpu = __getcpu() & VGETCPU_CPU_MASK; | 99 | cpu = __getcpu() & VGETCPU_CPU_MASK; |
@@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
99 | * __getcpu() calls (Gleb). | 102 | * __getcpu() calls (Gleb). |
100 | */ | 103 | */ |
101 | 104 | ||
102 | /* Make sure migrate_count will change if we leave the VCPU. */ | 105 | pvti = get_pvti(cpu); |
103 | do { | ||
104 | pvti = get_pvti(cpu); | ||
105 | migrate_count = pvti->migrate_count; | ||
106 | |||
107 | cpu1 = cpu; | ||
108 | cpu = __getcpu() & VGETCPU_CPU_MASK; | ||
109 | } while (unlikely(cpu != cpu1)); | ||
110 | 106 | ||
111 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | 107 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); |
112 | 108 | ||
113 | /* | 109 | /* |
114 | * Test we're still on the cpu as well as the version. | 110 | * Test we're still on the cpu as well as the version. |
115 | * - We must read TSC of pvti's VCPU. | 111 | * We could have been migrated just after the first |
116 | * - KVM doesn't follow the versioning protocol, so data could | 112 | * vgetcpu but before fetching the version, so we |
117 | * change before version if we left the VCPU. | 113 | * wouldn't notice a version change. |
118 | */ | 114 | */ |
119 | smp_rmb(); | 115 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; |
120 | } while (unlikely((pvti->pvti.version & 1) || | 116 | } while (unlikely(cpu != cpu1 || |
121 | pvti->pvti.version != version || | 117 | (pvti->pvti.version & 1) || |
122 | pvti->migrate_count != migrate_count)); | 118 | pvti->pvti.version != version)); |
123 | 119 | ||
124 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | 120 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) |
125 | *mode = VCLOCK_NONE; | 121 | *mode = VCLOCK_NONE; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 8222ae40ecb0..26a2e6122734 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -175,14 +175,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); | |||
175 | extern void calc_global_load(unsigned long ticks); | 175 | extern void calc_global_load(unsigned long ticks); |
176 | extern void update_cpu_load_nohz(void); | 176 | extern void update_cpu_load_nohz(void); |
177 | 177 | ||
178 | /* Notifier for when a task gets migrated to a new CPU */ | ||
179 | struct task_migration_notifier { | ||
180 | struct task_struct *task; | ||
181 | int from_cpu; | ||
182 | int to_cpu; | ||
183 | }; | ||
184 | extern void register_task_migration_notifier(struct notifier_block *n); | ||
185 | |||
186 | extern unsigned long get_parent_ip(unsigned long addr); | 178 | extern unsigned long get_parent_ip(unsigned long addr); |
187 | 179 | ||
188 | extern void dump_cpu_task(int cpu); | 180 | extern void dump_cpu_task(int cpu); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f9123a82cbb6..fe22f7510bce 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
1016 | rq_clock_skip_update(rq, true); | 1016 | rq_clock_skip_update(rq, true); |
1017 | } | 1017 | } |
1018 | 1018 | ||
1019 | static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); | ||
1020 | |||
1021 | void register_task_migration_notifier(struct notifier_block *n) | ||
1022 | { | ||
1023 | atomic_notifier_chain_register(&task_migration_notifier, n); | ||
1024 | } | ||
1025 | |||
1026 | #ifdef CONFIG_SMP | 1019 | #ifdef CONFIG_SMP |
1027 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 1020 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
1028 | { | 1021 | { |
@@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1053 | trace_sched_migrate_task(p, new_cpu); | 1046 | trace_sched_migrate_task(p, new_cpu); |
1054 | 1047 | ||
1055 | if (task_cpu(p) != new_cpu) { | 1048 | if (task_cpu(p) != new_cpu) { |
1056 | struct task_migration_notifier tmn; | ||
1057 | |||
1058 | if (p->sched_class->migrate_task_rq) | 1049 | if (p->sched_class->migrate_task_rq) |
1059 | p->sched_class->migrate_task_rq(p, new_cpu); | 1050 | p->sched_class->migrate_task_rq(p, new_cpu); |
1060 | p->se.nr_migrations++; | 1051 | p->se.nr_migrations++; |
1061 | perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); | 1052 | perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); |
1062 | |||
1063 | tmn.task = p; | ||
1064 | tmn.from_cpu = task_cpu(p); | ||
1065 | tmn.to_cpu = new_cpu; | ||
1066 | |||
1067 | atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); | ||
1068 | } | 1053 | } |
1069 | 1054 | ||
1070 | __set_task_cpu(p, new_cpu); | 1055 | __set_task_cpu(p, new_cpu); |