aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGautham R Shenoy <ego@in.ibm.com>2009-10-29 15:22:53 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-11-23 22:33:04 -0500
commit3aa565f53c396914a9406388efaa238e9c937fc6 (patch)
treec5860c47cb1c6545a07aad87004fd97af25959a4
parent69ddb57cbea0b3dd851ea5f1edd1e609ad4da04e (diff)
powerpc/pseries: Add hooks to put the CPU into an appropriate offline state
When a CPU is offlined on POWER currently, we call rtas_stop_self() and hand the CPU back to the resource pool. This path is used for DLPAR which will cause a change in the LPAR configuration which will be visible outside. This patch changes the default state a CPU is put into when it is offlined. On platforms which support ceding the processor to the hypervisor with latency hint specifier value, during a cpu offline operation, instead of calling rtas_stop_self(), we cede the vCPU to the hypervisor while passing a latency hint specifier value. The Hypervisor can use this hint to provide better energy savings. Also, during the offline operation, the control of the vCPU remains with the LPAR as oppposed to returning it to the resource pool. The patch achieves this by creating an infrastructure to set the preferred_offline_state() which can be either - CPU_STATE_OFFLINE: which is the current behaviour of calling rtas_stop_self() - CPU_STATE_INACTIVE: which cedes the vCPU to the hypervisor with the latency hint specifier. The codepath which wants to perform a DLPAR operation can set the preferred_offline_state() of a CPU to CPU_STATE_OFFLINE before invoking cpu_down(). The patch also provides a boot-time command line argument to disable/enable CPU_STATE_INACTIVE. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--Documentation/cpu-hotplug.txt6
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c182
-rw-r--r--arch/powerpc/platforms/pseries/offline_states.h18
-rw-r--r--arch/powerpc/platforms/pseries/smp.c19
4 files changed, 216 insertions, 9 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 9d620c153b04..4d4a644b505e 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -49,6 +49,12 @@ maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using
49additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets 49additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets
50 cpu_possible_map = cpu_present_map + additional_cpus 50 cpu_possible_map = cpu_present_map + additional_cpus
51 51
52cede_offline={"off","on"} Use this option to disable/enable putting offlined
53 processors to an extended H_CEDE state on
54 supported pseries platforms.
55 If nothing is specified,
56 cede_offline is set to "on".
57
52(*) Option valid only for following architectures 58(*) Option valid only for following architectures
53- ia64 59- ia64
54 60
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index ebff6d9a4e39..6ea4698d9176 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -30,6 +30,7 @@
30#include <asm/pSeries_reconfig.h> 30#include <asm/pSeries_reconfig.h>
31#include "xics.h" 31#include "xics.h"
32#include "plpar_wrappers.h" 32#include "plpar_wrappers.h"
33#include "offline_states.h"
33 34
34/* This version can't take the spinlock, because it never returns */ 35/* This version can't take the spinlock, because it never returns */
35static struct rtas_args rtas_stop_self_args = { 36static struct rtas_args rtas_stop_self_args = {
@@ -39,6 +40,55 @@ static struct rtas_args rtas_stop_self_args = {
39 .rets = &rtas_stop_self_args.args[0], 40 .rets = &rtas_stop_self_args.args[0],
40}; 41};
41 42
43static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
44 CPU_STATE_OFFLINE;
45static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
46
47static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
48
49static int cede_offline_enabled __read_mostly = 1;
50
51/*
52 * Enable/disable cede_offline when available.
53 */
54static int __init setup_cede_offline(char *str)
55{
56 if (!strcmp(str, "off"))
57 cede_offline_enabled = 0;
58 else if (!strcmp(str, "on"))
59 cede_offline_enabled = 1;
60 else
61 return 0;
62 return 1;
63}
64
65__setup("cede_offline=", setup_cede_offline);
66
67enum cpu_state_vals get_cpu_current_state(int cpu)
68{
69 return per_cpu(current_state, cpu);
70}
71
72void set_cpu_current_state(int cpu, enum cpu_state_vals state)
73{
74 per_cpu(current_state, cpu) = state;
75}
76
77enum cpu_state_vals get_preferred_offline_state(int cpu)
78{
79 return per_cpu(preferred_offline_state, cpu);
80}
81
82void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
83{
84 per_cpu(preferred_offline_state, cpu) = state;
85}
86
87void set_default_offline_state(int cpu)
88{
89 per_cpu(preferred_offline_state, cpu) = default_offline_state;
90}
91
42static void rtas_stop_self(void) 92static void rtas_stop_self(void)
43{ 93{
44 struct rtas_args *args = &rtas_stop_self_args; 94 struct rtas_args *args = &rtas_stop_self_args;
@@ -56,11 +106,61 @@ static void rtas_stop_self(void)
56 106
57static void pseries_mach_cpu_die(void) 107static void pseries_mach_cpu_die(void)
58{ 108{
109 unsigned int cpu = smp_processor_id();
110 unsigned int hwcpu = hard_smp_processor_id();
111 u8 cede_latency_hint = 0;
112
59 local_irq_disable(); 113 local_irq_disable();
60 idle_task_exit(); 114 idle_task_exit();
61 xics_teardown_cpu(); 115 xics_teardown_cpu();
62 unregister_slb_shadow(hard_smp_processor_id(), __pa(get_slb_shadow())); 116
63 rtas_stop_self(); 117 if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
118 set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
119 cede_latency_hint = 2;
120
121 get_lppaca()->idle = 1;
122 if (!get_lppaca()->shared_proc)
123 get_lppaca()->donate_dedicated_cpu = 1;
124
125 printk(KERN_INFO
126 "cpu %u (hwid %u) ceding for offline with hint %d\n",
127 cpu, hwcpu, cede_latency_hint);
128 while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
129 extended_cede_processor(cede_latency_hint);
130 printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n",
131 cpu, hwcpu);
132 printk(KERN_INFO
133 "Decrementer value = %x Timebase value = %llx\n",
134 get_dec(), get_tb());
135 }
136
137 printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n",
138 cpu, hwcpu);
139
140 if (!get_lppaca()->shared_proc)
141 get_lppaca()->donate_dedicated_cpu = 0;
142 get_lppaca()->idle = 0;
143 }
144
145 if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
146 unregister_slb_shadow(hwcpu, __pa(get_slb_shadow()));
147
148 /*
149 * NOTE: Calling start_secondary() here for now to
150 * start new context.
151 * However, need to do it cleanly by resetting the
152 * stack pointer.
153 */
154 start_secondary();
155
156 } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
157
158 set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
159 unregister_slb_shadow(hard_smp_processor_id(),
160 __pa(get_slb_shadow()));
161 rtas_stop_self();
162 }
163
64 /* Should never get here... */ 164 /* Should never get here... */
65 BUG(); 165 BUG();
66 for(;;); 166 for(;;);
@@ -106,18 +206,43 @@ static int pseries_cpu_disable(void)
106 return 0; 206 return 0;
107} 207}
108 208
209/*
210 * pseries_cpu_die: Wait for the cpu to die.
211 * @cpu: logical processor id of the CPU whose death we're awaiting.
212 *
213 * This function is called from the context of the thread which is performing
214 * the cpu-offline. Here we wait for long enough to allow the cpu in question
215 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
216 * notifications.
217 *
218 * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
219 * self-destruct.
220 */
109static void pseries_cpu_die(unsigned int cpu) 221static void pseries_cpu_die(unsigned int cpu)
110{ 222{
111 int tries; 223 int tries;
112 int cpu_status; 224 int cpu_status = 1;
113 unsigned int pcpu = get_hard_smp_processor_id(cpu); 225 unsigned int pcpu = get_hard_smp_processor_id(cpu);
114 226
115 for (tries = 0; tries < 25; tries++) { 227 if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
116 cpu_status = query_cpu_stopped(pcpu); 228 cpu_status = 1;
117 if (cpu_status == 0 || cpu_status == -1) 229 for (tries = 0; tries < 1000; tries++) {
118 break; 230 if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
119 cpu_relax(); 231 cpu_status = 0;
232 break;
233 }
234 cpu_relax();
235 }
236 } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
237
238 for (tries = 0; tries < 25; tries++) {
239 cpu_status = query_cpu_stopped(pcpu);
240 if (cpu_status == 0 || cpu_status == -1)
241 break;
242 cpu_relax();
243 }
120 } 244 }
245
121 if (cpu_status != 0) { 246 if (cpu_status != 0) {
122 printk("Querying DEAD? cpu %i (%i) shows %i\n", 247 printk("Querying DEAD? cpu %i (%i) shows %i\n",
123 cpu, pcpu, cpu_status); 248 cpu, pcpu, cpu_status);
@@ -252,10 +377,41 @@ static struct notifier_block pseries_smp_nb = {
252 .notifier_call = pseries_smp_notifier, 377 .notifier_call = pseries_smp_notifier,
253}; 378};
254 379
380#define MAX_CEDE_LATENCY_LEVELS 4
381#define CEDE_LATENCY_PARAM_LENGTH 10
382#define CEDE_LATENCY_PARAM_MAX_LENGTH \
383 (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
384#define CEDE_LATENCY_TOKEN 45
385
386static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
387
388static int parse_cede_parameters(void)
389{
390 int call_status;
391
392 memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
393 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
394 NULL,
395 CEDE_LATENCY_TOKEN,
396 __pa(cede_parameters),
397 CEDE_LATENCY_PARAM_MAX_LENGTH);
398
399 if (call_status != 0)
400 printk(KERN_INFO "CEDE_LATENCY: \
401 %s %s Error calling get-system-parameter(0x%x)\n",
402 __FILE__, __func__, call_status);
403 else
404 printk(KERN_INFO "CEDE_LATENCY: \
405 get-system-parameter successful.\n");
406
407 return call_status;
408}
409
255static int __init pseries_cpu_hotplug_init(void) 410static int __init pseries_cpu_hotplug_init(void)
256{ 411{
257 struct device_node *np; 412 struct device_node *np;
258 const char *typep; 413 const char *typep;
414 int cpu;
259 415
260 for_each_node_by_name(np, "interrupt-controller") { 416 for_each_node_by_name(np, "interrupt-controller") {
261 typep = of_get_property(np, "compatible", NULL); 417 typep = of_get_property(np, "compatible", NULL);
@@ -283,8 +439,16 @@ static int __init pseries_cpu_hotplug_init(void)
283 smp_ops->cpu_die = pseries_cpu_die; 439 smp_ops->cpu_die = pseries_cpu_die;
284 440
285 /* Processors can be added/removed only on LPAR */ 441 /* Processors can be added/removed only on LPAR */
286 if (firmware_has_feature(FW_FEATURE_LPAR)) 442 if (firmware_has_feature(FW_FEATURE_LPAR)) {
287 pSeries_reconfig_notifier_register(&pseries_smp_nb); 443 pSeries_reconfig_notifier_register(&pseries_smp_nb);
444 cpu_maps_update_begin();
445 if (cede_offline_enabled && parse_cede_parameters() == 0) {
446 default_offline_state = CPU_STATE_INACTIVE;
447 for_each_online_cpu(cpu)
448 set_default_offline_state(cpu);
449 }
450 cpu_maps_update_done();
451 }
288 452
289 return 0; 453 return 0;
290} 454}
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h
new file mode 100644
index 000000000000..22574e0d9d91
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/offline_states.h
@@ -0,0 +1,18 @@
1#ifndef _OFFLINE_STATES_H_
2#define _OFFLINE_STATES_H_
3
4/* Cpu offline states go here */
5enum cpu_state_vals {
6 CPU_STATE_OFFLINE,
7 CPU_STATE_INACTIVE,
8 CPU_STATE_ONLINE,
9 CPU_MAX_OFFLINE_STATES
10};
11
12extern enum cpu_state_vals get_cpu_current_state(int cpu);
13extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
14extern enum cpu_state_vals get_preferred_offline_state(int cpu);
15extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
16extern void set_default_offline_state(int cpu);
17extern int start_secondary(void);
18#endif
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 440000cc7130..8868c012268a 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -48,6 +48,7 @@
48#include "plpar_wrappers.h" 48#include "plpar_wrappers.h"
49#include "pseries.h" 49#include "pseries.h"
50#include "xics.h" 50#include "xics.h"
51#include "offline_states.h"
51 52
52 53
53/* 54/*
@@ -84,6 +85,9 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
84 /* Fixup atomic count: it exited inside IRQ handler. */ 85 /* Fixup atomic count: it exited inside IRQ handler. */
85 task_thread_info(paca[lcpu].__current)->preempt_count = 0; 86 task_thread_info(paca[lcpu].__current)->preempt_count = 0;
86 87
88 if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
89 goto out;
90
87 /* 91 /*
88 * If the RTAS start-cpu token does not exist then presume the 92 * If the RTAS start-cpu token does not exist then presume the
89 * cpu is already spinning. 93 * cpu is already spinning.
@@ -98,6 +102,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
98 return 0; 102 return 0;
99 } 103 }
100 104
105out:
101 return 1; 106 return 1;
102} 107}
103 108
@@ -111,12 +116,16 @@ static void __devinit smp_xics_setup_cpu(int cpu)
111 vpa_init(cpu); 116 vpa_init(cpu);
112 117
113 cpu_clear(cpu, of_spin_map); 118 cpu_clear(cpu, of_spin_map);
119 set_cpu_current_state(cpu, CPU_STATE_ONLINE);
120 set_default_offline_state(cpu);
114 121
115} 122}
116#endif /* CONFIG_XICS */ 123#endif /* CONFIG_XICS */
117 124
118static void __devinit smp_pSeries_kick_cpu(int nr) 125static void __devinit smp_pSeries_kick_cpu(int nr)
119{ 126{
127 long rc;
128 unsigned long hcpuid;
120 BUG_ON(nr < 0 || nr >= NR_CPUS); 129 BUG_ON(nr < 0 || nr >= NR_CPUS);
121 130
122 if (!smp_startup_cpu(nr)) 131 if (!smp_startup_cpu(nr))
@@ -128,6 +137,16 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
128 * the processor will continue on to secondary_start 137 * the processor will continue on to secondary_start
129 */ 138 */
130 paca[nr].cpu_start = 1; 139 paca[nr].cpu_start = 1;
140
141 set_preferred_offline_state(nr, CPU_STATE_ONLINE);
142
143 if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
144 hcpuid = get_hard_smp_processor_id(nr);
145 rc = plpar_hcall_norets(H_PROD, hcpuid);
146 if (rc != H_SUCCESS)
147 panic("Error: Prod to wake up processor %d Ret= %ld\n",
148 nr, rc);
149 }
131} 150}
132 151
133static int smp_pSeries_cpu_bootable(unsigned int nr) 152static int smp_pSeries_cpu_bootable(unsigned int nr)