diff options
-rw-r--r-- | Documentation/cpu-hotplug.txt | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-cpu.c | 182 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/offline_states.h | 18 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/smp.c | 19 |
4 files changed, 216 insertions, 9 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 9d620c153b04..4d4a644b505e 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt | |||
@@ -49,6 +49,12 @@ maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using | |||
49 | additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets | 49 | additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets |
50 | cpu_possible_map = cpu_present_map + additional_cpus | 50 | cpu_possible_map = cpu_present_map + additional_cpus |
51 | 51 | ||
52 | cede_offline={"off","on"} Use this option to disable/enable putting offlined | ||
53 | processors to an extended H_CEDE state on | ||
54 | supported pseries platforms. | ||
55 | If nothing is specified, | ||
56 | cede_offline is set to "on". | ||
57 | |||
52 | (*) Option valid only for following architectures | 58 | (*) Option valid only for following architectures |
53 | - ia64 | 59 | - ia64 |
54 | 60 | ||
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index ebff6d9a4e39..6ea4698d9176 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/pSeries_reconfig.h> | 30 | #include <asm/pSeries_reconfig.h> |
31 | #include "xics.h" | 31 | #include "xics.h" |
32 | #include "plpar_wrappers.h" | 32 | #include "plpar_wrappers.h" |
33 | #include "offline_states.h" | ||
33 | 34 | ||
34 | /* This version can't take the spinlock, because it never returns */ | 35 | /* This version can't take the spinlock, because it never returns */ |
35 | static struct rtas_args rtas_stop_self_args = { | 36 | static struct rtas_args rtas_stop_self_args = { |
@@ -39,6 +40,55 @@ static struct rtas_args rtas_stop_self_args = { | |||
39 | .rets = &rtas_stop_self_args.args[0], | 40 | .rets = &rtas_stop_self_args.args[0], |
40 | }; | 41 | }; |
41 | 42 | ||
43 | static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = | ||
44 | CPU_STATE_OFFLINE; | ||
45 | static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; | ||
46 | |||
47 | static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; | ||
48 | |||
49 | static int cede_offline_enabled __read_mostly = 1; | ||
50 | |||
51 | /* | ||
52 | * Enable/disable cede_offline when available. | ||
53 | */ | ||
54 | static int __init setup_cede_offline(char *str) | ||
55 | { | ||
56 | if (!strcmp(str, "off")) | ||
57 | cede_offline_enabled = 0; | ||
58 | else if (!strcmp(str, "on")) | ||
59 | cede_offline_enabled = 1; | ||
60 | else | ||
61 | return 0; | ||
62 | return 1; | ||
63 | } | ||
64 | |||
65 | __setup("cede_offline=", setup_cede_offline); | ||
66 | |||
67 | enum cpu_state_vals get_cpu_current_state(int cpu) | ||
68 | { | ||
69 | return per_cpu(current_state, cpu); | ||
70 | } | ||
71 | |||
72 | void set_cpu_current_state(int cpu, enum cpu_state_vals state) | ||
73 | { | ||
74 | per_cpu(current_state, cpu) = state; | ||
75 | } | ||
76 | |||
77 | enum cpu_state_vals get_preferred_offline_state(int cpu) | ||
78 | { | ||
79 | return per_cpu(preferred_offline_state, cpu); | ||
80 | } | ||
81 | |||
82 | void set_preferred_offline_state(int cpu, enum cpu_state_vals state) | ||
83 | { | ||
84 | per_cpu(preferred_offline_state, cpu) = state; | ||
85 | } | ||
86 | |||
87 | void set_default_offline_state(int cpu) | ||
88 | { | ||
89 | per_cpu(preferred_offline_state, cpu) = default_offline_state; | ||
90 | } | ||
91 | |||
42 | static void rtas_stop_self(void) | 92 | static void rtas_stop_self(void) |
43 | { | 93 | { |
44 | struct rtas_args *args = &rtas_stop_self_args; | 94 | struct rtas_args *args = &rtas_stop_self_args; |
@@ -56,11 +106,61 @@ static void rtas_stop_self(void) | |||
56 | 106 | ||
57 | static void pseries_mach_cpu_die(void) | 107 | static void pseries_mach_cpu_die(void) |
58 | { | 108 | { |
109 | unsigned int cpu = smp_processor_id(); | ||
110 | unsigned int hwcpu = hard_smp_processor_id(); | ||
111 | u8 cede_latency_hint = 0; | ||
112 | |||
59 | local_irq_disable(); | 113 | local_irq_disable(); |
60 | idle_task_exit(); | 114 | idle_task_exit(); |
61 | xics_teardown_cpu(); | 115 | xics_teardown_cpu(); |
62 | unregister_slb_shadow(hard_smp_processor_id(), __pa(get_slb_shadow())); | 116 | |
63 | rtas_stop_self(); | 117 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
118 | set_cpu_current_state(cpu, CPU_STATE_INACTIVE); | ||
119 | cede_latency_hint = 2; | ||
120 | |||
121 | get_lppaca()->idle = 1; | ||
122 | if (!get_lppaca()->shared_proc) | ||
123 | get_lppaca()->donate_dedicated_cpu = 1; | ||
124 | |||
125 | printk(KERN_INFO | ||
126 | "cpu %u (hwid %u) ceding for offline with hint %d\n", | ||
127 | cpu, hwcpu, cede_latency_hint); | ||
128 | while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { | ||
129 | extended_cede_processor(cede_latency_hint); | ||
130 | printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n", | ||
131 | cpu, hwcpu); | ||
132 | printk(KERN_INFO | ||
133 | "Decrementer value = %x Timebase value = %llx\n", | ||
134 | get_dec(), get_tb()); | ||
135 | } | ||
136 | |||
137 | printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n", | ||
138 | cpu, hwcpu); | ||
139 | |||
140 | if (!get_lppaca()->shared_proc) | ||
141 | get_lppaca()->donate_dedicated_cpu = 0; | ||
142 | get_lppaca()->idle = 0; | ||
143 | } | ||
144 | |||
145 | if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { | ||
146 | unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); | ||
147 | |||
148 | /* | ||
149 | * NOTE: Calling start_secondary() here for now to | ||
150 | * start new context. | ||
151 | * However, need to do it cleanly by resetting the | ||
152 | * stack pointer. | ||
153 | */ | ||
154 | start_secondary(); | ||
155 | |||
156 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | ||
157 | |||
158 | set_cpu_current_state(cpu, CPU_STATE_OFFLINE); | ||
159 | unregister_slb_shadow(hard_smp_processor_id(), | ||
160 | __pa(get_slb_shadow())); | ||
161 | rtas_stop_self(); | ||
162 | } | ||
163 | |||
64 | /* Should never get here... */ | 164 | /* Should never get here... */ |
65 | BUG(); | 165 | BUG(); |
66 | for(;;); | 166 | for(;;); |
@@ -106,18 +206,43 @@ static int pseries_cpu_disable(void) | |||
106 | return 0; | 206 | return 0; |
107 | } | 207 | } |
108 | 208 | ||
209 | /* | ||
210 | * pseries_cpu_die: Wait for the cpu to die. | ||
211 | * @cpu: logical processor id of the CPU whose death we're awaiting. | ||
212 | * | ||
213 | * This function is called from the context of the thread which is performing | ||
214 | * the cpu-offline. Here we wait for long enough to allow the cpu in question | ||
215 | * to self-destroy so that the cpu-offline thread can send the CPU_DEAD | ||
216 | * notifications. | ||
217 | * | ||
218 | * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to | ||
219 | * self-destruct. | ||
220 | */ | ||
109 | static void pseries_cpu_die(unsigned int cpu) | 221 | static void pseries_cpu_die(unsigned int cpu) |
110 | { | 222 | { |
111 | int tries; | 223 | int tries; |
112 | int cpu_status; | 224 | int cpu_status = 1; |
113 | unsigned int pcpu = get_hard_smp_processor_id(cpu); | 225 | unsigned int pcpu = get_hard_smp_processor_id(cpu); |
114 | 226 | ||
115 | for (tries = 0; tries < 25; tries++) { | 227 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
116 | cpu_status = query_cpu_stopped(pcpu); | 228 | cpu_status = 1; |
117 | if (cpu_status == 0 || cpu_status == -1) | 229 | for (tries = 0; tries < 1000; tries++) { |
118 | break; | 230 | if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { |
119 | cpu_relax(); | 231 | cpu_status = 0; |
232 | break; | ||
233 | } | ||
234 | cpu_relax(); | ||
235 | } | ||
236 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | ||
237 | |||
238 | for (tries = 0; tries < 25; tries++) { | ||
239 | cpu_status = query_cpu_stopped(pcpu); | ||
240 | if (cpu_status == 0 || cpu_status == -1) | ||
241 | break; | ||
242 | cpu_relax(); | ||
243 | } | ||
120 | } | 244 | } |
245 | |||
121 | if (cpu_status != 0) { | 246 | if (cpu_status != 0) { |
122 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | 247 | printk("Querying DEAD? cpu %i (%i) shows %i\n", |
123 | cpu, pcpu, cpu_status); | 248 | cpu, pcpu, cpu_status); |
@@ -252,10 +377,41 @@ static struct notifier_block pseries_smp_nb = { | |||
252 | .notifier_call = pseries_smp_notifier, | 377 | .notifier_call = pseries_smp_notifier, |
253 | }; | 378 | }; |
254 | 379 | ||
380 | #define MAX_CEDE_LATENCY_LEVELS 4 | ||
381 | #define CEDE_LATENCY_PARAM_LENGTH 10 | ||
382 | #define CEDE_LATENCY_PARAM_MAX_LENGTH \ | ||
383 | (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) | ||
384 | #define CEDE_LATENCY_TOKEN 45 | ||
385 | |||
386 | static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; | ||
387 | |||
388 | static int parse_cede_parameters(void) | ||
389 | { | ||
390 | int call_status; | ||
391 | |||
392 | memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); | ||
393 | call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, | ||
394 | NULL, | ||
395 | CEDE_LATENCY_TOKEN, | ||
396 | __pa(cede_parameters), | ||
397 | CEDE_LATENCY_PARAM_MAX_LENGTH); | ||
398 | |||
399 | if (call_status != 0) | ||
400 | printk(KERN_INFO "CEDE_LATENCY: \ | ||
401 | %s %s Error calling get-system-parameter(0x%x)\n", | ||
402 | __FILE__, __func__, call_status); | ||
403 | else | ||
404 | printk(KERN_INFO "CEDE_LATENCY: \ | ||
405 | get-system-parameter successful.\n"); | ||
406 | |||
407 | return call_status; | ||
408 | } | ||
409 | |||
255 | static int __init pseries_cpu_hotplug_init(void) | 410 | static int __init pseries_cpu_hotplug_init(void) |
256 | { | 411 | { |
257 | struct device_node *np; | 412 | struct device_node *np; |
258 | const char *typep; | 413 | const char *typep; |
414 | int cpu; | ||
259 | 415 | ||
260 | for_each_node_by_name(np, "interrupt-controller") { | 416 | for_each_node_by_name(np, "interrupt-controller") { |
261 | typep = of_get_property(np, "compatible", NULL); | 417 | typep = of_get_property(np, "compatible", NULL); |
@@ -283,8 +439,16 @@ static int __init pseries_cpu_hotplug_init(void) | |||
283 | smp_ops->cpu_die = pseries_cpu_die; | 439 | smp_ops->cpu_die = pseries_cpu_die; |
284 | 440 | ||
285 | /* Processors can be added/removed only on LPAR */ | 441 | /* Processors can be added/removed only on LPAR */ |
286 | if (firmware_has_feature(FW_FEATURE_LPAR)) | 442 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
287 | pSeries_reconfig_notifier_register(&pseries_smp_nb); | 443 | pSeries_reconfig_notifier_register(&pseries_smp_nb); |
444 | cpu_maps_update_begin(); | ||
445 | if (cede_offline_enabled && parse_cede_parameters() == 0) { | ||
446 | default_offline_state = CPU_STATE_INACTIVE; | ||
447 | for_each_online_cpu(cpu) | ||
448 | set_default_offline_state(cpu); | ||
449 | } | ||
450 | cpu_maps_update_done(); | ||
451 | } | ||
288 | 452 | ||
289 | return 0; | 453 | return 0; |
290 | } | 454 | } |
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h new file mode 100644 index 000000000000..22574e0d9d91 --- /dev/null +++ b/arch/powerpc/platforms/pseries/offline_states.h | |||
@@ -0,0 +1,18 @@ | |||
1 | #ifndef _OFFLINE_STATES_H_ | ||
2 | #define _OFFLINE_STATES_H_ | ||
3 | |||
4 | /* Cpu offline states go here */ | ||
5 | enum cpu_state_vals { | ||
6 | CPU_STATE_OFFLINE, | ||
7 | CPU_STATE_INACTIVE, | ||
8 | CPU_STATE_ONLINE, | ||
9 | CPU_MAX_OFFLINE_STATES | ||
10 | }; | ||
11 | |||
12 | extern enum cpu_state_vals get_cpu_current_state(int cpu); | ||
13 | extern void set_cpu_current_state(int cpu, enum cpu_state_vals state); | ||
14 | extern enum cpu_state_vals get_preferred_offline_state(int cpu); | ||
15 | extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state); | ||
16 | extern void set_default_offline_state(int cpu); | ||
17 | extern int start_secondary(void); | ||
18 | #endif | ||
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 440000cc7130..8868c012268a 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include "plpar_wrappers.h" | 48 | #include "plpar_wrappers.h" |
49 | #include "pseries.h" | 49 | #include "pseries.h" |
50 | #include "xics.h" | 50 | #include "xics.h" |
51 | #include "offline_states.h" | ||
51 | 52 | ||
52 | 53 | ||
53 | /* | 54 | /* |
@@ -84,6 +85,9 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) | |||
84 | /* Fixup atomic count: it exited inside IRQ handler. */ | 85 | /* Fixup atomic count: it exited inside IRQ handler. */ |
85 | task_thread_info(paca[lcpu].__current)->preempt_count = 0; | 86 | task_thread_info(paca[lcpu].__current)->preempt_count = 0; |
86 | 87 | ||
88 | if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) | ||
89 | goto out; | ||
90 | |||
87 | /* | 91 | /* |
88 | * If the RTAS start-cpu token does not exist then presume the | 92 | * If the RTAS start-cpu token does not exist then presume the |
89 | * cpu is already spinning. | 93 | * cpu is already spinning. |
@@ -98,6 +102,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) | |||
98 | return 0; | 102 | return 0; |
99 | } | 103 | } |
100 | 104 | ||
105 | out: | ||
101 | return 1; | 106 | return 1; |
102 | } | 107 | } |
103 | 108 | ||
@@ -111,12 +116,16 @@ static void __devinit smp_xics_setup_cpu(int cpu) | |||
111 | vpa_init(cpu); | 116 | vpa_init(cpu); |
112 | 117 | ||
113 | cpu_clear(cpu, of_spin_map); | 118 | cpu_clear(cpu, of_spin_map); |
119 | set_cpu_current_state(cpu, CPU_STATE_ONLINE); | ||
120 | set_default_offline_state(cpu); | ||
114 | 121 | ||
115 | } | 122 | } |
116 | #endif /* CONFIG_XICS */ | 123 | #endif /* CONFIG_XICS */ |
117 | 124 | ||
118 | static void __devinit smp_pSeries_kick_cpu(int nr) | 125 | static void __devinit smp_pSeries_kick_cpu(int nr) |
119 | { | 126 | { |
127 | long rc; | ||
128 | unsigned long hcpuid; | ||
120 | BUG_ON(nr < 0 || nr >= NR_CPUS); | 129 | BUG_ON(nr < 0 || nr >= NR_CPUS); |
121 | 130 | ||
122 | if (!smp_startup_cpu(nr)) | 131 | if (!smp_startup_cpu(nr)) |
@@ -128,6 +137,16 @@ static void __devinit smp_pSeries_kick_cpu(int nr) | |||
128 | * the processor will continue on to secondary_start | 137 | * the processor will continue on to secondary_start |
129 | */ | 138 | */ |
130 | paca[nr].cpu_start = 1; | 139 | paca[nr].cpu_start = 1; |
140 | |||
141 | set_preferred_offline_state(nr, CPU_STATE_ONLINE); | ||
142 | |||
143 | if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) { | ||
144 | hcpuid = get_hard_smp_processor_id(nr); | ||
145 | rc = plpar_hcall_norets(H_PROD, hcpuid); | ||
146 | if (rc != H_SUCCESS) | ||
147 | panic("Error: Prod to wake up processor %d Ret= %ld\n", | ||
148 | nr, rc); | ||
149 | } | ||
131 | } | 150 | } |
132 | 151 | ||
133 | static int smp_pSeries_cpu_bootable(unsigned int nr) | 152 | static int smp_pSeries_cpu_bootable(unsigned int nr) |