diff options
| -rw-r--r-- | Documentation/cpu-hotplug.txt | 6 | ||||
| -rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-cpu.c | 182 | ||||
| -rw-r--r-- | arch/powerpc/platforms/pseries/offline_states.h | 18 | ||||
| -rw-r--r-- | arch/powerpc/platforms/pseries/smp.c | 19 |
4 files changed, 216 insertions, 9 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 9d620c153b04..4d4a644b505e 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt | |||
| @@ -49,6 +49,12 @@ maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using | |||
| 49 | additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets | 49 | additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets |
| 50 | cpu_possible_map = cpu_present_map + additional_cpus | 50 | cpu_possible_map = cpu_present_map + additional_cpus |
| 51 | 51 | ||
| 52 | cede_offline={"off","on"} Use this option to disable/enable putting offlined | ||
| 53 | processors to an extended H_CEDE state on | ||
| 54 | supported pseries platforms. | ||
| 55 | If nothing is specified, | ||
| 56 | cede_offline is set to "on". | ||
| 57 | |||
| 52 | (*) Option valid only for following architectures | 58 | (*) Option valid only for following architectures |
| 53 | - ia64 | 59 | - ia64 |
| 54 | 60 | ||
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index ebff6d9a4e39..6ea4698d9176 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <asm/pSeries_reconfig.h> | 30 | #include <asm/pSeries_reconfig.h> |
| 31 | #include "xics.h" | 31 | #include "xics.h" |
| 32 | #include "plpar_wrappers.h" | 32 | #include "plpar_wrappers.h" |
| 33 | #include "offline_states.h" | ||
| 33 | 34 | ||
| 34 | /* This version can't take the spinlock, because it never returns */ | 35 | /* This version can't take the spinlock, because it never returns */ |
| 35 | static struct rtas_args rtas_stop_self_args = { | 36 | static struct rtas_args rtas_stop_self_args = { |
| @@ -39,6 +40,55 @@ static struct rtas_args rtas_stop_self_args = { | |||
| 39 | .rets = &rtas_stop_self_args.args[0], | 40 | .rets = &rtas_stop_self_args.args[0], |
| 40 | }; | 41 | }; |
| 41 | 42 | ||
| 43 | static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = | ||
| 44 | CPU_STATE_OFFLINE; | ||
| 45 | static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; | ||
| 46 | |||
| 47 | static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; | ||
| 48 | |||
| 49 | static int cede_offline_enabled __read_mostly = 1; | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Enable/disable cede_offline when available. | ||
| 53 | */ | ||
| 54 | static int __init setup_cede_offline(char *str) | ||
| 55 | { | ||
| 56 | if (!strcmp(str, "off")) | ||
| 57 | cede_offline_enabled = 0; | ||
| 58 | else if (!strcmp(str, "on")) | ||
| 59 | cede_offline_enabled = 1; | ||
| 60 | else | ||
| 61 | return 0; | ||
| 62 | return 1; | ||
| 63 | } | ||
| 64 | |||
| 65 | __setup("cede_offline=", setup_cede_offline); | ||
| 66 | |||
| 67 | enum cpu_state_vals get_cpu_current_state(int cpu) | ||
| 68 | { | ||
| 69 | return per_cpu(current_state, cpu); | ||
| 70 | } | ||
| 71 | |||
| 72 | void set_cpu_current_state(int cpu, enum cpu_state_vals state) | ||
| 73 | { | ||
| 74 | per_cpu(current_state, cpu) = state; | ||
| 75 | } | ||
| 76 | |||
| 77 | enum cpu_state_vals get_preferred_offline_state(int cpu) | ||
| 78 | { | ||
| 79 | return per_cpu(preferred_offline_state, cpu); | ||
| 80 | } | ||
| 81 | |||
| 82 | void set_preferred_offline_state(int cpu, enum cpu_state_vals state) | ||
| 83 | { | ||
| 84 | per_cpu(preferred_offline_state, cpu) = state; | ||
| 85 | } | ||
| 86 | |||
| 87 | void set_default_offline_state(int cpu) | ||
| 88 | { | ||
| 89 | per_cpu(preferred_offline_state, cpu) = default_offline_state; | ||
| 90 | } | ||
| 91 | |||
| 42 | static void rtas_stop_self(void) | 92 | static void rtas_stop_self(void) |
| 43 | { | 93 | { |
| 44 | struct rtas_args *args = &rtas_stop_self_args; | 94 | struct rtas_args *args = &rtas_stop_self_args; |
| @@ -56,11 +106,61 @@ static void rtas_stop_self(void) | |||
| 56 | 106 | ||
| 57 | static void pseries_mach_cpu_die(void) | 107 | static void pseries_mach_cpu_die(void) |
| 58 | { | 108 | { |
| 109 | unsigned int cpu = smp_processor_id(); | ||
| 110 | unsigned int hwcpu = hard_smp_processor_id(); | ||
| 111 | u8 cede_latency_hint = 0; | ||
| 112 | |||
| 59 | local_irq_disable(); | 113 | local_irq_disable(); |
| 60 | idle_task_exit(); | 114 | idle_task_exit(); |
| 61 | xics_teardown_cpu(); | 115 | xics_teardown_cpu(); |
| 62 | unregister_slb_shadow(hard_smp_processor_id(), __pa(get_slb_shadow())); | 116 | |
| 63 | rtas_stop_self(); | 117 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
| 118 | set_cpu_current_state(cpu, CPU_STATE_INACTIVE); | ||
| 119 | cede_latency_hint = 2; | ||
| 120 | |||
| 121 | get_lppaca()->idle = 1; | ||
| 122 | if (!get_lppaca()->shared_proc) | ||
| 123 | get_lppaca()->donate_dedicated_cpu = 1; | ||
| 124 | |||
| 125 | printk(KERN_INFO | ||
| 126 | "cpu %u (hwid %u) ceding for offline with hint %d\n", | ||
| 127 | cpu, hwcpu, cede_latency_hint); | ||
| 128 | while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { | ||
| 129 | extended_cede_processor(cede_latency_hint); | ||
| 130 | printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n", | ||
| 131 | cpu, hwcpu); | ||
| 132 | printk(KERN_INFO | ||
| 133 | "Decrementer value = %x Timebase value = %llx\n", | ||
| 134 | get_dec(), get_tb()); | ||
| 135 | } | ||
| 136 | |||
| 137 | printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n", | ||
| 138 | cpu, hwcpu); | ||
| 139 | |||
| 140 | if (!get_lppaca()->shared_proc) | ||
| 141 | get_lppaca()->donate_dedicated_cpu = 0; | ||
| 142 | get_lppaca()->idle = 0; | ||
| 143 | } | ||
| 144 | |||
| 145 | if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { | ||
| 146 | unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); | ||
| 147 | |||
| 148 | /* | ||
| 149 | * NOTE: Calling start_secondary() here for now to | ||
| 150 | * start new context. | ||
| 151 | * However, need to do it cleanly by resetting the | ||
| 152 | * stack pointer. | ||
| 153 | */ | ||
| 154 | start_secondary(); | ||
| 155 | |||
| 156 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | ||
| 157 | |||
| 158 | set_cpu_current_state(cpu, CPU_STATE_OFFLINE); | ||
| 159 | unregister_slb_shadow(hard_smp_processor_id(), | ||
| 160 | __pa(get_slb_shadow())); | ||
| 161 | rtas_stop_self(); | ||
| 162 | } | ||
| 163 | |||
| 64 | /* Should never get here... */ | 164 | /* Should never get here... */ |
| 65 | BUG(); | 165 | BUG(); |
| 66 | for(;;); | 166 | for(;;); |
| @@ -106,18 +206,43 @@ static int pseries_cpu_disable(void) | |||
| 106 | return 0; | 206 | return 0; |
| 107 | } | 207 | } |
| 108 | 208 | ||
| 209 | /* | ||
| 210 | * pseries_cpu_die: Wait for the cpu to die. | ||
| 211 | * @cpu: logical processor id of the CPU whose death we're awaiting. | ||
| 212 | * | ||
| 213 | * This function is called from the context of the thread which is performing | ||
| 214 | * the cpu-offline. Here we wait for long enough to allow the cpu in question | ||
| 215 | * to self-destroy so that the cpu-offline thread can send the CPU_DEAD | ||
| 216 | * notifications. | ||
| 217 | * | ||
| 218 | * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to | ||
| 219 | * self-destruct. | ||
| 220 | */ | ||
| 109 | static void pseries_cpu_die(unsigned int cpu) | 221 | static void pseries_cpu_die(unsigned int cpu) |
| 110 | { | 222 | { |
| 111 | int tries; | 223 | int tries; |
| 112 | int cpu_status; | 224 | int cpu_status = 1; |
| 113 | unsigned int pcpu = get_hard_smp_processor_id(cpu); | 225 | unsigned int pcpu = get_hard_smp_processor_id(cpu); |
| 114 | 226 | ||
| 115 | for (tries = 0; tries < 25; tries++) { | 227 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
| 116 | cpu_status = query_cpu_stopped(pcpu); | 228 | cpu_status = 1; |
| 117 | if (cpu_status == 0 || cpu_status == -1) | 229 | for (tries = 0; tries < 1000; tries++) { |
| 118 | break; | 230 | if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { |
| 119 | cpu_relax(); | 231 | cpu_status = 0; |
| 232 | break; | ||
| 233 | } | ||
| 234 | cpu_relax(); | ||
| 235 | } | ||
| 236 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | ||
| 237 | |||
| 238 | for (tries = 0; tries < 25; tries++) { | ||
| 239 | cpu_status = query_cpu_stopped(pcpu); | ||
| 240 | if (cpu_status == 0 || cpu_status == -1) | ||
| 241 | break; | ||
| 242 | cpu_relax(); | ||
| 243 | } | ||
| 120 | } | 244 | } |
| 245 | |||
| 121 | if (cpu_status != 0) { | 246 | if (cpu_status != 0) { |
| 122 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | 247 | printk("Querying DEAD? cpu %i (%i) shows %i\n", |
| 123 | cpu, pcpu, cpu_status); | 248 | cpu, pcpu, cpu_status); |
| @@ -252,10 +377,41 @@ static struct notifier_block pseries_smp_nb = { | |||
| 252 | .notifier_call = pseries_smp_notifier, | 377 | .notifier_call = pseries_smp_notifier, |
| 253 | }; | 378 | }; |
| 254 | 379 | ||
| 380 | #define MAX_CEDE_LATENCY_LEVELS 4 | ||
| 381 | #define CEDE_LATENCY_PARAM_LENGTH 10 | ||
| 382 | #define CEDE_LATENCY_PARAM_MAX_LENGTH \ | ||
| 383 | (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) | ||
| 384 | #define CEDE_LATENCY_TOKEN 45 | ||
| 385 | |||
| 386 | static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; | ||
| 387 | |||
| 388 | static int parse_cede_parameters(void) | ||
| 389 | { | ||
| 390 | int call_status; | ||
| 391 | |||
| 392 | memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); | ||
| 393 | call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, | ||
| 394 | NULL, | ||
| 395 | CEDE_LATENCY_TOKEN, | ||
| 396 | __pa(cede_parameters), | ||
| 397 | CEDE_LATENCY_PARAM_MAX_LENGTH); | ||
| 398 | |||
| 399 | if (call_status != 0) | ||
| 400 | printk(KERN_INFO "CEDE_LATENCY: \ | ||
| 401 | %s %s Error calling get-system-parameter(0x%x)\n", | ||
| 402 | __FILE__, __func__, call_status); | ||
| 403 | else | ||
| 404 | printk(KERN_INFO "CEDE_LATENCY: \ | ||
| 405 | get-system-parameter successful.\n"); | ||
| 406 | |||
| 407 | return call_status; | ||
| 408 | } | ||
| 409 | |||
| 255 | static int __init pseries_cpu_hotplug_init(void) | 410 | static int __init pseries_cpu_hotplug_init(void) |
| 256 | { | 411 | { |
| 257 | struct device_node *np; | 412 | struct device_node *np; |
| 258 | const char *typep; | 413 | const char *typep; |
| 414 | int cpu; | ||
| 259 | 415 | ||
| 260 | for_each_node_by_name(np, "interrupt-controller") { | 416 | for_each_node_by_name(np, "interrupt-controller") { |
| 261 | typep = of_get_property(np, "compatible", NULL); | 417 | typep = of_get_property(np, "compatible", NULL); |
| @@ -283,8 +439,16 @@ static int __init pseries_cpu_hotplug_init(void) | |||
| 283 | smp_ops->cpu_die = pseries_cpu_die; | 439 | smp_ops->cpu_die = pseries_cpu_die; |
| 284 | 440 | ||
| 285 | /* Processors can be added/removed only on LPAR */ | 441 | /* Processors can be added/removed only on LPAR */ |
| 286 | if (firmware_has_feature(FW_FEATURE_LPAR)) | 442 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
| 287 | pSeries_reconfig_notifier_register(&pseries_smp_nb); | 443 | pSeries_reconfig_notifier_register(&pseries_smp_nb); |
| 444 | cpu_maps_update_begin(); | ||
| 445 | if (cede_offline_enabled && parse_cede_parameters() == 0) { | ||
| 446 | default_offline_state = CPU_STATE_INACTIVE; | ||
| 447 | for_each_online_cpu(cpu) | ||
| 448 | set_default_offline_state(cpu); | ||
| 449 | } | ||
| 450 | cpu_maps_update_done(); | ||
| 451 | } | ||
| 288 | 452 | ||
| 289 | return 0; | 453 | return 0; |
| 290 | } | 454 | } |
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h new file mode 100644 index 000000000000..22574e0d9d91 --- /dev/null +++ b/arch/powerpc/platforms/pseries/offline_states.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | #ifndef _OFFLINE_STATES_H_ | ||
| 2 | #define _OFFLINE_STATES_H_ | ||
| 3 | |||
| 4 | /* Cpu offline states go here */ | ||
| 5 | enum cpu_state_vals { | ||
| 6 | CPU_STATE_OFFLINE, | ||
| 7 | CPU_STATE_INACTIVE, | ||
| 8 | CPU_STATE_ONLINE, | ||
| 9 | CPU_MAX_OFFLINE_STATES | ||
| 10 | }; | ||
| 11 | |||
| 12 | extern enum cpu_state_vals get_cpu_current_state(int cpu); | ||
| 13 | extern void set_cpu_current_state(int cpu, enum cpu_state_vals state); | ||
| 14 | extern enum cpu_state_vals get_preferred_offline_state(int cpu); | ||
| 15 | extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state); | ||
| 16 | extern void set_default_offline_state(int cpu); | ||
| 17 | extern int start_secondary(void); | ||
| 18 | #endif | ||
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 440000cc7130..8868c012268a 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c | |||
| @@ -48,6 +48,7 @@ | |||
| 48 | #include "plpar_wrappers.h" | 48 | #include "plpar_wrappers.h" |
| 49 | #include "pseries.h" | 49 | #include "pseries.h" |
| 50 | #include "xics.h" | 50 | #include "xics.h" |
| 51 | #include "offline_states.h" | ||
| 51 | 52 | ||
| 52 | 53 | ||
| 53 | /* | 54 | /* |
| @@ -84,6 +85,9 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) | |||
| 84 | /* Fixup atomic count: it exited inside IRQ handler. */ | 85 | /* Fixup atomic count: it exited inside IRQ handler. */ |
| 85 | task_thread_info(paca[lcpu].__current)->preempt_count = 0; | 86 | task_thread_info(paca[lcpu].__current)->preempt_count = 0; |
| 86 | 87 | ||
| 88 | if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) | ||
| 89 | goto out; | ||
| 90 | |||
| 87 | /* | 91 | /* |
| 88 | * If the RTAS start-cpu token does not exist then presume the | 92 | * If the RTAS start-cpu token does not exist then presume the |
| 89 | * cpu is already spinning. | 93 | * cpu is already spinning. |
| @@ -98,6 +102,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) | |||
| 98 | return 0; | 102 | return 0; |
| 99 | } | 103 | } |
| 100 | 104 | ||
| 105 | out: | ||
| 101 | return 1; | 106 | return 1; |
| 102 | } | 107 | } |
| 103 | 108 | ||
| @@ -111,12 +116,16 @@ static void __devinit smp_xics_setup_cpu(int cpu) | |||
| 111 | vpa_init(cpu); | 116 | vpa_init(cpu); |
| 112 | 117 | ||
| 113 | cpu_clear(cpu, of_spin_map); | 118 | cpu_clear(cpu, of_spin_map); |
| 119 | set_cpu_current_state(cpu, CPU_STATE_ONLINE); | ||
| 120 | set_default_offline_state(cpu); | ||
| 114 | 121 | ||
| 115 | } | 122 | } |
| 116 | #endif /* CONFIG_XICS */ | 123 | #endif /* CONFIG_XICS */ |
| 117 | 124 | ||
| 118 | static void __devinit smp_pSeries_kick_cpu(int nr) | 125 | static void __devinit smp_pSeries_kick_cpu(int nr) |
| 119 | { | 126 | { |
| 127 | long rc; | ||
| 128 | unsigned long hcpuid; | ||
| 120 | BUG_ON(nr < 0 || nr >= NR_CPUS); | 129 | BUG_ON(nr < 0 || nr >= NR_CPUS); |
| 121 | 130 | ||
| 122 | if (!smp_startup_cpu(nr)) | 131 | if (!smp_startup_cpu(nr)) |
| @@ -128,6 +137,16 @@ static void __devinit smp_pSeries_kick_cpu(int nr) | |||
| 128 | * the processor will continue on to secondary_start | 137 | * the processor will continue on to secondary_start |
| 129 | */ | 138 | */ |
| 130 | paca[nr].cpu_start = 1; | 139 | paca[nr].cpu_start = 1; |
| 140 | |||
| 141 | set_preferred_offline_state(nr, CPU_STATE_ONLINE); | ||
| 142 | |||
| 143 | if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) { | ||
| 144 | hcpuid = get_hard_smp_processor_id(nr); | ||
| 145 | rc = plpar_hcall_norets(H_PROD, hcpuid); | ||
| 146 | if (rc != H_SUCCESS) | ||
| 147 | panic("Error: Prod to wake up processor %d Ret= %ld\n", | ||
| 148 | nr, rc); | ||
| 149 | } | ||
| 131 | } | 150 | } |
| 132 | 151 | ||
| 133 | static int smp_pSeries_cpu_bootable(unsigned int nr) | 152 | static int smp_pSeries_cpu_bootable(unsigned int nr) |
