aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2008-04-17 01:46:13 -0400
committerHeiko Carstens <heiko.carstens@de.ibm.com>2008-04-17 01:47:01 -0400
commitc10fde0d9e2112c25052a8742e893ec5965c0007 (patch)
tree8d0e3ad337e44e9e06caabd5f0667f27e9ddaca3 /arch/s390/kernel
parentdbd70fb499952d0ba282f0159dafacfc31d50313 (diff)
[S390] Vertical cpu management.
If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/smp.c83
-rw-r--r--arch/s390/kernel/topology.c72
2 files changed, 138 insertions, 17 deletions
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 5448aa87fa23..d1e8e8a3fb66 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -68,7 +68,9 @@ enum s390_cpu_state {
68}; 68};
69 69
70DEFINE_MUTEX(smp_cpu_state_mutex); 70DEFINE_MUTEX(smp_cpu_state_mutex);
71int smp_cpu_polarization[NR_CPUS];
71static int smp_cpu_state[NR_CPUS]; 72static int smp_cpu_state[NR_CPUS];
73static int cpu_management;
72 74
73static DEFINE_PER_CPU(struct cpu, cpu_devices); 75static DEFINE_PER_CPU(struct cpu, cpu_devices);
74DEFINE_PER_CPU(struct s390_idle_data, s390_idle); 76DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
@@ -454,6 +456,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail)
454 if (cpu_known(cpu_id)) 456 if (cpu_known(cpu_id))
455 continue; 457 continue;
456 __cpu_logical_map[logical_cpu] = cpu_id; 458 __cpu_logical_map[logical_cpu] = cpu_id;
459 smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
457 if (!cpu_stopped(logical_cpu)) 460 if (!cpu_stopped(logical_cpu))
458 continue; 461 continue;
459 cpu_set(logical_cpu, cpu_present_map); 462 cpu_set(logical_cpu, cpu_present_map);
@@ -487,6 +490,7 @@ static int smp_rescan_cpus_sclp(cpumask_t avail)
487 if (cpu_known(cpu_id)) 490 if (cpu_known(cpu_id))
488 continue; 491 continue;
489 __cpu_logical_map[logical_cpu] = cpu_id; 492 __cpu_logical_map[logical_cpu] = cpu_id;
493 smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
490 cpu_set(logical_cpu, cpu_present_map); 494 cpu_set(logical_cpu, cpu_present_map);
491 if (cpu >= info->configured) 495 if (cpu >= info->configured)
492 smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; 496 smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
@@ -844,6 +848,7 @@ void __init smp_prepare_boot_cpu(void)
844 S390_lowcore.percpu_offset = __per_cpu_offset[0]; 848 S390_lowcore.percpu_offset = __per_cpu_offset[0];
845 current_set[0] = current; 849 current_set[0] = current;
846 smp_cpu_state[0] = CPU_STATE_CONFIGURED; 850 smp_cpu_state[0] = CPU_STATE_CONFIGURED;
851 smp_cpu_polarization[0] = POLARIZATION_UNKNWN;
847 spin_lock_init(&(&__get_cpu_var(s390_idle))->lock); 852 spin_lock_init(&(&__get_cpu_var(s390_idle))->lock);
848} 853}
849 854
@@ -895,15 +900,19 @@ static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf,
895 case 0: 900 case 0:
896 if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) { 901 if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
897 rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]); 902 rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
898 if (!rc) 903 if (!rc) {
899 smp_cpu_state[cpu] = CPU_STATE_STANDBY; 904 smp_cpu_state[cpu] = CPU_STATE_STANDBY;
905 smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
906 }
900 } 907 }
901 break; 908 break;
902 case 1: 909 case 1:
903 if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) { 910 if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
904 rc = sclp_cpu_configure(__cpu_logical_map[cpu]); 911 rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
905 if (!rc) 912 if (!rc) {
906 smp_cpu_state[cpu] = CPU_STATE_CONFIGURED; 913 smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
914 smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
915 }
907 } 916 }
908 break; 917 break;
909 default: 918 default:
@@ -917,6 +926,34 @@ out:
917static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); 926static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
918#endif /* CONFIG_HOTPLUG_CPU */ 927#endif /* CONFIG_HOTPLUG_CPU */
919 928
929static ssize_t cpu_polarization_show(struct sys_device *dev, char *buf)
930{
931 int cpu = dev->id;
932 ssize_t count;
933
934 mutex_lock(&smp_cpu_state_mutex);
935 switch (smp_cpu_polarization[cpu]) {
936 case POLARIZATION_HRZ:
937 count = sprintf(buf, "horizontal\n");
938 break;
939 case POLARIZATION_VL:
940 count = sprintf(buf, "vertical:low\n");
941 break;
942 case POLARIZATION_VM:
943 count = sprintf(buf, "vertical:medium\n");
944 break;
945 case POLARIZATION_VH:
946 count = sprintf(buf, "vertical:high\n");
947 break;
948 default:
949 count = sprintf(buf, "unknown\n");
950 break;
951 }
952 mutex_unlock(&smp_cpu_state_mutex);
953 return count;
954}
955static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);
956
920static ssize_t show_cpu_address(struct sys_device *dev, char *buf) 957static ssize_t show_cpu_address(struct sys_device *dev, char *buf)
921{ 958{
922 return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); 959 return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
@@ -929,6 +966,7 @@ static struct attribute *cpu_common_attrs[] = {
929 &attr_configure.attr, 966 &attr_configure.attr,
930#endif 967#endif
931 &attr_address.attr, 968 &attr_address.attr,
969 &attr_polarization.attr,
932 NULL, 970 NULL,
933}; 971};
934 972
@@ -1073,11 +1111,48 @@ static ssize_t __ref rescan_store(struct sys_device *dev,
1073out: 1111out:
1074 put_online_cpus(); 1112 put_online_cpus();
1075 mutex_unlock(&smp_cpu_state_mutex); 1113 mutex_unlock(&smp_cpu_state_mutex);
1114 if (!cpus_empty(newcpus))
1115 topology_schedule_update();
1076 return rc ? rc : count; 1116 return rc ? rc : count;
1077} 1117}
1078static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store); 1118static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store);
1079#endif /* CONFIG_HOTPLUG_CPU */ 1119#endif /* CONFIG_HOTPLUG_CPU */
1080 1120
1121static ssize_t dispatching_show(struct sys_device *dev, char *buf)
1122{
1123 ssize_t count;
1124
1125 mutex_lock(&smp_cpu_state_mutex);
1126 count = sprintf(buf, "%d\n", cpu_management);
1127 mutex_unlock(&smp_cpu_state_mutex);
1128 return count;
1129}
1130
1131static ssize_t dispatching_store(struct sys_device *dev, const char *buf,
1132 size_t count)
1133{
1134 int val, rc;
1135 char delim;
1136
1137 if (sscanf(buf, "%d %c", &val, &delim) != 1)
1138 return -EINVAL;
1139 if (val != 0 && val != 1)
1140 return -EINVAL;
1141 rc = 0;
1142 mutex_lock(&smp_cpu_state_mutex);
1143 get_online_cpus();
1144 if (cpu_management == val)
1145 goto out;
1146 rc = topology_set_cpu_management(val);
1147 if (!rc)
1148 cpu_management = val;
1149out:
1150 put_online_cpus();
1151 mutex_unlock(&smp_cpu_state_mutex);
1152 return rc ? rc : count;
1153}
1154static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store);
1155
1081static int __init topology_init(void) 1156static int __init topology_init(void)
1082{ 1157{
1083 int cpu; 1158 int cpu;
@@ -1091,6 +1166,10 @@ static int __init topology_init(void)
1091 if (rc) 1166 if (rc)
1092 return rc; 1167 return rc;
1093#endif 1168#endif
1169 rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
1170 &attr_dispatching.attr);
1171 if (rc)
1172 return rc;
1094 for_each_present_cpu(cpu) { 1173 for_each_present_cpu(cpu) {
1095 rc = smp_add_present_cpu(cpu); 1174 rc = smp_add_present_cpu(cpu);
1096 if (rc) 1175 if (rc)
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 369dc1c3bd10..12b39b3d9c38 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * arch/s390/kernel/topology.c
3 *
4 * Copyright IBM Corp. 2007 2 * Copyright IBM Corp. 2007
5 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
6 */ 4 */
@@ -19,9 +17,17 @@
19#include <asm/sysinfo.h> 17#include <asm/sysinfo.h>
20 18
21#define CPU_BITS 64 19#define CPU_BITS 64
20#define NR_MAG 6
21
22#define PTF_HORIZONTAL (0UL)
23#define PTF_VERTICAL (1UL)
24#define PTF_CHECK (2UL)
22 25
23struct tl_cpu { 26struct tl_cpu {
24 unsigned char reserved[6]; 27 unsigned char reserved0[4];
28 unsigned char :6;
29 unsigned char pp:2;
30 unsigned char reserved1;
25 unsigned short origin; 31 unsigned short origin;
26 unsigned long mask[CPU_BITS / BITS_PER_LONG]; 32 unsigned long mask[CPU_BITS / BITS_PER_LONG];
27}; 33};
@@ -36,8 +42,6 @@ union tl_entry {
36 struct tl_container container; 42 struct tl_container container;
37}; 43};
38 44
39#define NR_MAG 6
40
41struct tl_info { 45struct tl_info {
42 unsigned char reserved0[2]; 46 unsigned char reserved0[2];
43 unsigned short length; 47 unsigned short length;
@@ -96,8 +100,10 @@ static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
96 100
97 rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin; 101 rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin;
98 for_each_present_cpu(lcpu) { 102 for_each_present_cpu(lcpu) {
99 if (__cpu_logical_map[lcpu] == rcpu) 103 if (__cpu_logical_map[lcpu] == rcpu) {
100 cpu_set(lcpu, core->mask); 104 cpu_set(lcpu, core->mask);
105 smp_cpu_polarization[lcpu] = tl_cpu->pp;
106 }
101 } 107 }
102 } 108 }
103} 109}
@@ -127,7 +133,7 @@ static void tl_to_cores(struct tl_info *info)
127 133
128 mutex_lock(&smp_cpu_state_mutex); 134 mutex_lock(&smp_cpu_state_mutex);
129 clear_cores(); 135 clear_cores();
130 tle = (union tl_entry *)&info->tle; 136 tle = info->tle;
131 end = (union tl_entry *)((unsigned long)info + info->length); 137 end = (union tl_entry *)((unsigned long)info + info->length);
132 while (tle < end) { 138 while (tle < end) {
133 switch (tle->nl) { 139 switch (tle->nl) {
@@ -152,7 +158,17 @@ static void tl_to_cores(struct tl_info *info)
152 mutex_unlock(&smp_cpu_state_mutex); 158 mutex_unlock(&smp_cpu_state_mutex);
153} 159}
154 160
155static int ptf(void) 161static void topology_update_polarization_simple(void)
162{
163 int cpu;
164
165 mutex_lock(&smp_cpu_state_mutex);
166 for_each_present_cpu(cpu)
167 smp_cpu_polarization[cpu] = POLARIZATION_HRZ;
168 mutex_unlock(&smp_cpu_state_mutex);
169}
170
171static int ptf(unsigned long fc)
156{ 172{
157 int rc; 173 int rc;
158 174
@@ -161,7 +177,25 @@ static int ptf(void)
161 " ipm %0\n" 177 " ipm %0\n"
162 " srl %0,28\n" 178 " srl %0,28\n"
163 : "=d" (rc) 179 : "=d" (rc)
164 : "d" (2UL) : "cc"); 180 : "d" (fc) : "cc");
181 return rc;
182}
183
184int topology_set_cpu_management(int fc)
185{
186 int cpu;
187 int rc;
188
189 if (!machine_has_topology)
190 return -EOPNOTSUPP;
191 if (fc)
192 rc = ptf(PTF_VERTICAL);
193 else
194 rc = ptf(PTF_HORIZONTAL);
195 if (rc)
196 return -EBUSY;
197 for_each_present_cpu(cpu)
198 smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
165 return rc; 199 return rc;
166} 200}
167 201
@@ -171,9 +205,10 @@ void arch_update_cpu_topology(void)
171 struct sys_device *sysdev; 205 struct sys_device *sysdev;
172 int cpu; 206 int cpu;
173 207
174 if (!machine_has_topology) 208 if (!machine_has_topology) {
209 topology_update_polarization_simple();
175 return; 210 return;
176 ptf(); 211 }
177 stsi(info, 15, 1, 2); 212 stsi(info, 15, 1, 2);
178 tl_to_cores(info); 213 tl_to_cores(info);
179 for_each_online_cpu(cpu) { 214 for_each_online_cpu(cpu) {
@@ -187,10 +222,15 @@ static void topology_work_fn(struct work_struct *work)
187 arch_reinit_sched_domains(); 222 arch_reinit_sched_domains();
188} 223}
189 224
225void topology_schedule_update(void)
226{
227 schedule_work(&topology_work);
228}
229
190static void topology_timer_fn(unsigned long ignored) 230static void topology_timer_fn(unsigned long ignored)
191{ 231{
192 if (ptf()) 232 if (ptf(PTF_CHECK))
193 schedule_work(&topology_work); 233 topology_schedule_update();
194 set_topology_timer(); 234 set_topology_timer();
195} 235}
196 236
@@ -211,9 +251,11 @@ static int __init init_topology_update(void)
211{ 251{
212 int rc; 252 int rc;
213 253
214 if (!machine_has_topology) 254 if (!machine_has_topology) {
255 topology_update_polarization_simple();
215 return 0; 256 return 0;
216 init_timer(&topology_timer); 257 }
258 init_timer_deferrable(&topology_timer);
217 if (machine_has_topology_irq) { 259 if (machine_has_topology_irq) {
218 rc = register_external_interrupt(0x2005, topology_interrupt); 260 rc = register_external_interrupt(0x2005, topology_interrupt);
219 if (rc) 261 if (rc)