summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2019-05-17 06:50:42 -0400
committerHeiko Carstens <heiko.carstens@de.ibm.com>2019-06-15 06:25:52 -0400
commit38f2c691a4b3e89d476f8e8350d1ca299974b89d (patch)
tree8b4ab76fd8281be6dfcf8d5edc48d5ee522a4710
parent7928260539f3a13b5b23a3fa0a7c0e4f5255940b (diff)
s390: improve wait logic of stop_machine
The stop_machine loop to advance the state machine and to wait for all affected CPUs to check-in calls cpu_relax_yield in a tight loop until the last missing CPUs acknowledged the state transition. On a virtual system where not all logical CPUs are backed by real CPUs all the time it can take a while for all CPUs to check-in. With the current definition of cpu_relax_yield a diagnose 0x44 is done which tells the hypervisor to schedule *some* other CPU. That can be any CPU and not necessarily one of the CPUs that need to run in order to advance the state machine. This can lead to a pretty bad diagnose 0x44 storm until the last missing CPU finally checked-in. Replace the undirected cpu_relax_yield based on diagnose 0x44 with a directed yield. Each CPU in the wait loop will pick up the next CPU in the cpumask of stop_machine. The diagnose 0x9c is used to tell the hypervisor to run this next CPU instead of the current one. If there is only a limited number of real CPUs backing the virtual CPUs we end up with the real CPUs passed around in a round-robin fashion. [heiko.carstens@de.ibm.com]: Use cpumask_next_wrap as suggested by Peter Zijlstra. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
-rw-r--r--arch/s390/include/asm/processor.h3
-rw-r--r--arch/s390/kernel/processor.c17
-rw-r--r--arch/s390/kernel/smp.c2
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/stop_machine.c14
5 files changed, 25 insertions, 13 deletions
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b0fcbc37b637..445ce9ee4404 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -36,6 +36,7 @@
36 36
37#ifndef __ASSEMBLY__ 37#ifndef __ASSEMBLY__
38 38
39#include <linux/cpumask.h>
39#include <linux/linkage.h> 40#include <linux/linkage.h>
40#include <linux/irqflags.h> 41#include <linux/irqflags.h>
41#include <asm/cpu.h> 42#include <asm/cpu.h>
@@ -225,7 +226,7 @@ static __no_kasan_or_inline unsigned short stap(void)
225 * Give up the time slice of the virtual PU. 226 * Give up the time slice of the virtual PU.
226 */ 227 */
227#define cpu_relax_yield cpu_relax_yield 228#define cpu_relax_yield cpu_relax_yield
228void cpu_relax_yield(void); 229void cpu_relax_yield(const struct cpumask *cpumask);
229 230
230#define cpu_relax() barrier() 231#define cpu_relax() barrier()
231 232
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 5de13307b703..4cdaefec1b7c 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -31,6 +31,7 @@ struct cpu_info {
31}; 31};
32 32
33static DEFINE_PER_CPU(struct cpu_info, cpu_info); 33static DEFINE_PER_CPU(struct cpu_info, cpu_info);
34static DEFINE_PER_CPU(int, cpu_relax_retry);
34 35
35static bool machine_has_cpu_mhz; 36static bool machine_has_cpu_mhz;
36 37
@@ -58,13 +59,19 @@ void s390_update_cpu_mhz(void)
58 on_each_cpu(update_cpu_mhz, NULL, 0); 59 on_each_cpu(update_cpu_mhz, NULL, 0);
59} 60}
60 61
61void notrace cpu_relax_yield(void) 62void notrace cpu_relax_yield(const struct cpumask *cpumask)
62{ 63{
63 if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { 64 int cpu, this_cpu;
64 diag_stat_inc(DIAG_STAT_X044); 65
65 asm volatile("diag 0,0,0x44"); 66 this_cpu = smp_processor_id();
67 if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
68 __this_cpu_write(cpu_relax_retry, 0);
69 cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
70 if (cpu >= nr_cpu_ids)
71 return;
72 if (arch_vcpu_is_preempted(cpu))
73 smp_yield_cpu(cpu);
66 } 74 }
67 barrier();
68} 75}
69EXPORT_SYMBOL(cpu_relax_yield); 76EXPORT_SYMBOL(cpu_relax_yield);
70 77
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index f00955940694..44974654cbd0 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -414,7 +414,7 @@ void smp_yield_cpu(int cpu)
414 diag_stat_inc_norecursion(DIAG_STAT_X09C); 414 diag_stat_inc_norecursion(DIAG_STAT_X09C);
415 asm volatile("diag %0,0,0x9c" 415 asm volatile("diag %0,0,0x9c"
416 : : "d" (pcpu_devices[cpu].address)); 416 : : "d" (pcpu_devices[cpu].address));
417 } else if (MACHINE_HAS_DIAG44) { 417 } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
418 diag_stat_inc_norecursion(DIAG_STAT_X044); 418 diag_stat_inc_norecursion(DIAG_STAT_X044);
419 asm volatile("diag 0,0,0x44"); 419 asm volatile("diag 0,0,0x44");
420 } 420 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 11837410690f..1f9f3160da7e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1519,7 +1519,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
1519#endif 1519#endif
1520 1520
1521#ifndef cpu_relax_yield 1521#ifndef cpu_relax_yield
1522#define cpu_relax_yield() cpu_relax() 1522#define cpu_relax_yield(cpumask) cpu_relax()
1523#endif 1523#endif
1524 1524
1525extern int yield_to(struct task_struct *p, bool preempt); 1525extern int yield_to(struct task_struct *p, bool preempt);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2b5a6754646f..b8b0c5ff8da9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -183,6 +183,7 @@ static int multi_cpu_stop(void *data)
183 struct multi_stop_data *msdata = data; 183 struct multi_stop_data *msdata = data;
184 enum multi_stop_state curstate = MULTI_STOP_NONE; 184 enum multi_stop_state curstate = MULTI_STOP_NONE;
185 int cpu = smp_processor_id(), err = 0; 185 int cpu = smp_processor_id(), err = 0;
186 const struct cpumask *cpumask;
186 unsigned long flags; 187 unsigned long flags;
187 bool is_active; 188 bool is_active;
188 189
@@ -192,15 +193,18 @@ static int multi_cpu_stop(void *data)
192 */ 193 */
193 local_save_flags(flags); 194 local_save_flags(flags);
194 195
195 if (!msdata->active_cpus) 196 if (!msdata->active_cpus) {
196 is_active = cpu == cpumask_first(cpu_online_mask); 197 cpumask = cpu_online_mask;
197 else 198 is_active = cpu == cpumask_first(cpumask);
198 is_active = cpumask_test_cpu(cpu, msdata->active_cpus); 199 } else {
200 cpumask = msdata->active_cpus;
201 is_active = cpumask_test_cpu(cpu, cpumask);
202 }
199 203
200 /* Simple state machine */ 204 /* Simple state machine */
201 do { 205 do {
202 /* Chill out and ensure we re-read multi_stop_state. */ 206 /* Chill out and ensure we re-read multi_stop_state. */
203 cpu_relax_yield(); 207 cpu_relax_yield(cpumask);
204 if (msdata->state != curstate) { 208 if (msdata->state != curstate) {
205 curstate = msdata->state; 209 curstate = msdata->state;
206 switch (curstate) { 210 switch (curstate) {