diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2019-05-17 06:50:42 -0400 |
---|---|---|
committer | Heiko Carstens <heiko.carstens@de.ibm.com> | 2019-06-15 06:25:52 -0400 |
commit | 38f2c691a4b3e89d476f8e8350d1ca299974b89d (patch) | |
tree | 8b4ab76fd8281be6dfcf8d5edc48d5ee522a4710 | |
parent | 7928260539f3a13b5b23a3fa0a7c0e4f5255940b (diff) |
s390: improve wait logic of stop_machine
The stop_machine loop to advance the state machine and to wait for all
affected CPUs to check-in calls cpu_relax_yield in a tight loop until
the last missing CPUs acknowledged the state transition.
On a virtual system where not all logical CPUs are backed by real CPUs
all the time it can take a while for all CPUs to check-in. With the
current definition of cpu_relax_yield a diagnose 0x44 is done which
tells the hypervisor to schedule *some* other CPU. That can be any
CPU and not necessarily one of the CPUs that need to run in order to
advance the state machine. This can lead to a pretty bad diagnose 0x44
storm until the last missing CPU finally checked-in.
Replace the undirected cpu_relax_yield based on diagnose 0x44 with a
directed yield. Each CPU in the wait loop will pick up the next CPU
in the cpumask of stop_machine. The diagnose 0x9c is used to tell the
hypervisor to run this next CPU instead of the current one. If there
is only a limited number of real CPUs backing the virtual CPUs we
end up with the real CPUs passed around in a round-robin fashion.
[heiko.carstens@de.ibm.com]:
Use cpumask_next_wrap as suggested by Peter Zijlstra.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
-rw-r--r-- | arch/s390/include/asm/processor.h | 3 | ||||
-rw-r--r-- | arch/s390/kernel/processor.c | 17 | ||||
-rw-r--r-- | arch/s390/kernel/smp.c | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/stop_machine.c | 14 |
5 files changed, 25 insertions, 13 deletions
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b0fcbc37b637..445ce9ee4404 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h | |||
@@ -36,6 +36,7 @@ | |||
36 | 36 | ||
37 | #ifndef __ASSEMBLY__ | 37 | #ifndef __ASSEMBLY__ |
38 | 38 | ||
39 | #include <linux/cpumask.h> | ||
39 | #include <linux/linkage.h> | 40 | #include <linux/linkage.h> |
40 | #include <linux/irqflags.h> | 41 | #include <linux/irqflags.h> |
41 | #include <asm/cpu.h> | 42 | #include <asm/cpu.h> |
@@ -225,7 +226,7 @@ static __no_kasan_or_inline unsigned short stap(void) | |||
225 | * Give up the time slice of the virtual PU. | 226 | * Give up the time slice of the virtual PU. |
226 | */ | 227 | */ |
227 | #define cpu_relax_yield cpu_relax_yield | 228 | #define cpu_relax_yield cpu_relax_yield |
228 | void cpu_relax_yield(void); | 229 | void cpu_relax_yield(const struct cpumask *cpumask); |
229 | 230 | ||
230 | #define cpu_relax() barrier() | 231 | #define cpu_relax() barrier() |
231 | 232 | ||
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5de13307b703..4cdaefec1b7c 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c | |||
@@ -31,6 +31,7 @@ struct cpu_info { | |||
31 | }; | 31 | }; |
32 | 32 | ||
33 | static DEFINE_PER_CPU(struct cpu_info, cpu_info); | 33 | static DEFINE_PER_CPU(struct cpu_info, cpu_info); |
34 | static DEFINE_PER_CPU(int, cpu_relax_retry); | ||
34 | 35 | ||
35 | static bool machine_has_cpu_mhz; | 36 | static bool machine_has_cpu_mhz; |
36 | 37 | ||
@@ -58,13 +59,19 @@ void s390_update_cpu_mhz(void) | |||
58 | on_each_cpu(update_cpu_mhz, NULL, 0); | 59 | on_each_cpu(update_cpu_mhz, NULL, 0); |
59 | } | 60 | } |
60 | 61 | ||
61 | void notrace cpu_relax_yield(void) | 62 | void notrace cpu_relax_yield(const struct cpumask *cpumask) |
62 | { | 63 | { |
63 | if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { | 64 | int cpu, this_cpu; |
64 | diag_stat_inc(DIAG_STAT_X044); | 65 | |
65 | asm volatile("diag 0,0,0x44"); | 66 | this_cpu = smp_processor_id(); |
67 | if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { | ||
68 | __this_cpu_write(cpu_relax_retry, 0); | ||
69 | cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); | ||
70 | if (cpu >= nr_cpu_ids) | ||
71 | return; | ||
72 | if (arch_vcpu_is_preempted(cpu)) | ||
73 | smp_yield_cpu(cpu); | ||
66 | } | 74 | } |
67 | barrier(); | ||
68 | } | 75 | } |
69 | EXPORT_SYMBOL(cpu_relax_yield); | 76 | EXPORT_SYMBOL(cpu_relax_yield); |
70 | 77 | ||
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f00955940694..44974654cbd0 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -414,7 +414,7 @@ void smp_yield_cpu(int cpu) | |||
414 | diag_stat_inc_norecursion(DIAG_STAT_X09C); | 414 | diag_stat_inc_norecursion(DIAG_STAT_X09C); |
415 | asm volatile("diag %0,0,0x9c" | 415 | asm volatile("diag %0,0,0x9c" |
416 | : : "d" (pcpu_devices[cpu].address)); | 416 | : : "d" (pcpu_devices[cpu].address)); |
417 | } else if (MACHINE_HAS_DIAG44) { | 417 | } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) { |
418 | diag_stat_inc_norecursion(DIAG_STAT_X044); | 418 | diag_stat_inc_norecursion(DIAG_STAT_X044); |
419 | asm volatile("diag 0,0,0x44"); | 419 | asm volatile("diag 0,0,0x44"); |
420 | } | 420 | } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..1f9f3160da7e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1519,7 +1519,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma | |||
1519 | #endif | 1519 | #endif |
1520 | 1520 | ||
1521 | #ifndef cpu_relax_yield | 1521 | #ifndef cpu_relax_yield |
1522 | #define cpu_relax_yield() cpu_relax() | 1522 | #define cpu_relax_yield(cpumask) cpu_relax() |
1523 | #endif | 1523 | #endif |
1524 | 1524 | ||
1525 | extern int yield_to(struct task_struct *p, bool preempt); | 1525 | extern int yield_to(struct task_struct *p, bool preempt); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2b5a6754646f..b8b0c5ff8da9 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -183,6 +183,7 @@ static int multi_cpu_stop(void *data) | |||
183 | struct multi_stop_data *msdata = data; | 183 | struct multi_stop_data *msdata = data; |
184 | enum multi_stop_state curstate = MULTI_STOP_NONE; | 184 | enum multi_stop_state curstate = MULTI_STOP_NONE; |
185 | int cpu = smp_processor_id(), err = 0; | 185 | int cpu = smp_processor_id(), err = 0; |
186 | const struct cpumask *cpumask; | ||
186 | unsigned long flags; | 187 | unsigned long flags; |
187 | bool is_active; | 188 | bool is_active; |
188 | 189 | ||
@@ -192,15 +193,18 @@ static int multi_cpu_stop(void *data) | |||
192 | */ | 193 | */ |
193 | local_save_flags(flags); | 194 | local_save_flags(flags); |
194 | 195 | ||
195 | if (!msdata->active_cpus) | 196 | if (!msdata->active_cpus) { |
196 | is_active = cpu == cpumask_first(cpu_online_mask); | 197 | cpumask = cpu_online_mask; |
197 | else | 198 | is_active = cpu == cpumask_first(cpumask); |
198 | is_active = cpumask_test_cpu(cpu, msdata->active_cpus); | 199 | } else { |
200 | cpumask = msdata->active_cpus; | ||
201 | is_active = cpumask_test_cpu(cpu, cpumask); | ||
202 | } | ||
199 | 203 | ||
200 | /* Simple state machine */ | 204 | /* Simple state machine */ |
201 | do { | 205 | do { |
202 | /* Chill out and ensure we re-read multi_stop_state. */ | 206 | /* Chill out and ensure we re-read multi_stop_state. */ |
203 | cpu_relax_yield(); | 207 | cpu_relax_yield(cpumask); |
204 | if (msdata->state != curstate) { | 208 | if (msdata->state != curstate) { |
205 | curstate = msdata->state; | 209 | curstate = msdata->state; |
206 | switch (curstate) { | 210 | switch (curstate) { |