aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2013-07-10 21:21:57 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2013-07-18 06:29:30 -0400
commite04c5d76b0cfb66cadd900cf147526f2271884b8 (patch)
treea303b66c1fc1ae7786148d2f50e8b0b86290162b
parentb3897a49e22fc173efa77527a447c714f753f681 (diff)
remove sched notifier for cross-cpu migrations
Linux as a guest on KVM hypervisor, the only user of the pvclock vsyscall interface, does not require notification on task migration because: 1. cpu ID number maps 1:1 to per-CPU pvclock time info. 2. per-CPU pvclock time info is updated if the underlying CPU changes. 3. that version is increased whenever underlying CPU changes. Which is sufficient to guarantee nanoseconds counter is calculated properly. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Gleb Natapov <gleb@redhat.com>
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/kernel/pvclock.c44
-rw-r--r--arch/x86/vdso/vclock_gettime.c16
-rw-r--r--include/linux/sched.h8
-rw-r--r--kernel/sched/core.c15
5 files changed, 8 insertions, 76 deletions
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 109a9dd5d454..be8269b00e2a 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -93,7 +93,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
93 93
94struct pvclock_vsyscall_time_info { 94struct pvclock_vsyscall_time_info {
95 struct pvclock_vcpu_time_info pvti; 95 struct pvclock_vcpu_time_info pvti;
96 u32 migrate_count;
97} __attribute__((__aligned__(SMP_CACHE_BYTES))); 96} __attribute__((__aligned__(SMP_CACHE_BYTES)));
98 97
99#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) 98#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2cb9470ea85b..a16bae3f83b3 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -128,46 +128,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
128 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 128 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
129} 129}
130 130
131static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
132
133static struct pvclock_vsyscall_time_info *
134pvclock_get_vsyscall_user_time_info(int cpu)
135{
136 if (!pvclock_vdso_info) {
137 BUG();
138 return NULL;
139 }
140
141 return &pvclock_vdso_info[cpu];
142}
143
144struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
145{
146 return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
147}
148
149#ifdef CONFIG_X86_64 131#ifdef CONFIG_X86_64
150static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
151 void *v)
152{
153 struct task_migration_notifier *mn = v;
154 struct pvclock_vsyscall_time_info *pvti;
155
156 pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
157
158 /* this is NULL when pvclock vsyscall is not initialized */
159 if (unlikely(pvti == NULL))
160 return NOTIFY_DONE;
161
162 pvti->migrate_count++;
163
164 return NOTIFY_DONE;
165}
166
167static struct notifier_block pvclock_migrate = {
168 .notifier_call = pvclock_task_migrate,
169};
170
171/* 132/*
172 * Initialize the generic pvclock vsyscall state. This will allocate 133 * Initialize the generic pvclock vsyscall state. This will allocate
173 * a/some page(s) for the per-vcpu pvclock information, set up a 134 * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -181,17 +142,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
181 142
182 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); 143 WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
183 144
184 pvclock_vdso_info = i;
185
186 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { 145 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
187 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, 146 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
188 __pa(i) + (idx*PAGE_SIZE), 147 __pa(i) + (idx*PAGE_SIZE),
189 PAGE_KERNEL_VVAR); 148 PAGE_KERNEL_VVAR);
190 } 149 }
191 150
192
193 register_task_migration_notifier(&pvclock_migrate);
194
195 return 0; 151 return 0;
196} 152}
197#endif 153#endif
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index c74436e687bf..72074d528400 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -85,15 +85,18 @@ static notrace cycle_t vread_pvclock(int *mode)
85 cycle_t ret; 85 cycle_t ret;
86 u64 last; 86 u64 last;
87 u32 version; 87 u32 version;
88 u32 migrate_count;
89 u8 flags; 88 u8 flags;
90 unsigned cpu, cpu1; 89 unsigned cpu, cpu1;
91 90
92 91
93 /* 92 /*
94 * When looping to get a consistent (time-info, tsc) pair, we 93 * Note: hypervisor must guarantee that:
95 * also need to deal with the possibility we can switch vcpus, 94 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
96 * so make sure we always re-fetch time-info for the current vcpu. 95 * 2. that per-CPU pvclock time info is updated if the
96 * underlying CPU changes.
97 * 3. that version is increased whenever underlying CPU
98 * changes.
99 *
97 */ 100 */
98 do { 101 do {
99 cpu = __getcpu() & VGETCPU_CPU_MASK; 102 cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -104,8 +107,6 @@ static notrace cycle_t vread_pvclock(int *mode)
104 107
105 pvti = get_pvti(cpu); 108 pvti = get_pvti(cpu);
106 109
107 migrate_count = pvti->migrate_count;
108
109 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); 110 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
110 111
111 /* 112 /*
@@ -117,8 +118,7 @@ static notrace cycle_t vread_pvclock(int *mode)
117 cpu1 = __getcpu() & VGETCPU_CPU_MASK; 118 cpu1 = __getcpu() & VGETCPU_CPU_MASK;
118 } while (unlikely(cpu != cpu1 || 119 } while (unlikely(cpu != cpu1 ||
119 (pvti->pvti.version & 1) || 120 (pvti->pvti.version & 1) ||
120 pvti->pvti.version != version || 121 pvti->pvti.version != version));
121 pvti->migrate_count != migrate_count));
122 122
123 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) 123 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
124 *mode = VCLOCK_NONE; 124 *mode = VCLOCK_NONE;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d04b92ceda..bfc809d51745 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -107,14 +107,6 @@ extern unsigned long this_cpu_load(void);
107extern void calc_global_load(unsigned long ticks); 107extern void calc_global_load(unsigned long ticks);
108extern void update_cpu_load_nohz(void); 108extern void update_cpu_load_nohz(void);
109 109
110/* Notifier for when a task gets migrated to a new CPU */
111struct task_migration_notifier {
112 struct task_struct *task;
113 int from_cpu;
114 int to_cpu;
115};
116extern void register_task_migration_notifier(struct notifier_block *n);
117
118extern unsigned long get_parent_ip(unsigned long addr); 110extern unsigned long get_parent_ip(unsigned long addr);
119 111
120extern void dump_cpu_task(int cpu); 112extern void dump_cpu_task(int cpu);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0d8eb4525e76..0efd2eefb027 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -976,13 +976,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
976 rq->skip_clock_update = 1; 976 rq->skip_clock_update = 1;
977} 977}
978 978
979static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
980
981void register_task_migration_notifier(struct notifier_block *n)
982{
983 atomic_notifier_chain_register(&task_migration_notifier, n);
984}
985
986#ifdef CONFIG_SMP 979#ifdef CONFIG_SMP
987void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 980void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
988{ 981{
@@ -1013,18 +1006,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1013 trace_sched_migrate_task(p, new_cpu); 1006 trace_sched_migrate_task(p, new_cpu);
1014 1007
1015 if (task_cpu(p) != new_cpu) { 1008 if (task_cpu(p) != new_cpu) {
1016 struct task_migration_notifier tmn;
1017
1018 if (p->sched_class->migrate_task_rq) 1009 if (p->sched_class->migrate_task_rq)
1019 p->sched_class->migrate_task_rq(p, new_cpu); 1010 p->sched_class->migrate_task_rq(p, new_cpu);
1020 p->se.nr_migrations++; 1011 p->se.nr_migrations++;
1021 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); 1012 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
1022
1023 tmn.task = p;
1024 tmn.from_cpu = task_cpu(p);
1025 tmn.to_cpu = new_cpu;
1026
1027 atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
1028 } 1013 }
1029 1014
1030 __set_task_cpu(p, new_cpu); 1015 __set_task_cpu(p, new_cpu);