aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorShreyas B. Prabhu <shreyas@linux.vnet.ibm.com>2014-12-09 13:56:52 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2014-12-14 18:46:40 -0500
commit7cba160ad789a3ad7e68b92bf20eaad6ed171f80 (patch)
treeef57d54fcf61e5acf912e03004c0913457d3832b /arch
parent8eb8ac89a364305d05ad16be983b7890eb462cc3 (diff)
powernv/cpuidle: Redesign idle states management
Deep idle states like sleep and winkle are per core idle states. A core enters these states only when all the threads enter either the particular idle state or a deeper one. There are tasks like fastsleep hardware bug workaround and hypervisor core state save which have to be done only by the last thread of the core entering deep idle state and similarly tasks like timebase resync, hypervisor core register restore that have to be done only by the first thread waking up from these state. The current idle state management does not have a way to distinguish the first/last thread of the core waking/entering idle states. Tasks like timebase resync are done for all the threads. This is not only is suboptimal, but can cause functionality issues when subcores and kvm is involved. This patch adds the necessary infrastructure to track idle states of threads in a per-core structure. It uses this info to perform tasks like fastsleep workaround and timebase resync only once per core. Signed-off-by: Shreyas B. Prabhu <shreyas@linux.vnet.ibm.com> Originally-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: linux-pm@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/cpuidle.h20
-rw-r--r--arch/powerpc/include/asm/opal.h2
-rw-r--r--arch/powerpc/include/asm/paca.h8
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c9
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S24
-rw-r--r--arch/powerpc/kernel/idle_power7.S197
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S37
-rw-r--r--arch/powerpc/platforms/powernv/setup.c49
-rw-r--r--arch/powerpc/platforms/powernv/smp.c3
10 files changed, 294 insertions, 57 deletions
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 000000000000..d2f99ca1e3a6
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -0,0 +1,20 @@
1#ifndef _ASM_POWERPC_CPUIDLE_H
2#define _ASM_POWERPC_CPUIDLE_H
3
4#ifdef CONFIG_PPC_POWERNV
5/* Used in powernv idle state management */
6#define PNV_THREAD_RUNNING 0
7#define PNV_THREAD_NAP 1
8#define PNV_THREAD_SLEEP 2
9#define PNV_THREAD_WINKLE 3
10#define PNV_CORE_IDLE_LOCK_BIT 0x100
11#define PNV_CORE_IDLE_THREAD_BITS 0x0FF
12
13#ifndef __ASSEMBLY__
14extern u32 pnv_fastsleep_workaround_at_entry[];
15extern u32 pnv_fastsleep_workaround_at_exit[];
16#endif
17
18#endif
19
20#endif
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 6dedf9b05a86..3dea31c1080c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -160,6 +160,7 @@ struct opal_sg_list {
160#define OPAL_PCI_ERR_INJECT 96 160#define OPAL_PCI_ERR_INJECT 96
161#define OPAL_PCI_EEH_FREEZE_SET 97 161#define OPAL_PCI_EEH_FREEZE_SET 97
162#define OPAL_HANDLE_HMI 98 162#define OPAL_HANDLE_HMI 98
163#define OPAL_CONFIG_CPU_IDLE_STATE 99
163#define OPAL_REGISTER_DUMP_REGION 101 164#define OPAL_REGISTER_DUMP_REGION 101
164#define OPAL_UNREGISTER_DUMP_REGION 102 165#define OPAL_UNREGISTER_DUMP_REGION 102
165#define OPAL_WRITE_TPO 103 166#define OPAL_WRITE_TPO 103
@@ -175,6 +176,7 @@ struct opal_sg_list {
175 */ 176 */
176#define OPAL_PM_NAP_ENABLED 0x00010000 177#define OPAL_PM_NAP_ENABLED 0x00010000
177#define OPAL_PM_SLEEP_ENABLED 0x00020000 178#define OPAL_PM_SLEEP_ENABLED 0x00020000
179#define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000
178 180
179#ifndef __ASSEMBLY__ 181#ifndef __ASSEMBLY__
180 182
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 24a386cbb928..a0a16847bd40 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,6 +152,14 @@ struct paca_struct {
152 u64 tm_scratch; /* TM scratch area for reclaim */ 152 u64 tm_scratch; /* TM scratch area for reclaim */
153#endif 153#endif
154 154
155#ifdef CONFIG_PPC_POWERNV
156 /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
157 u32 *core_idle_state_ptr;
158 u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */
159 /* Mask to indicate thread id in core */
160 u8 thread_mask;
161#endif
162
155#ifdef CONFIG_PPC_BOOK3S_64 163#ifdef CONFIG_PPC_BOOK3S_64
156 /* Exclusive emergency stack pointer for machine check exception. */ 164 /* Exclusive emergency stack pointer for machine check exception. */
157 void *mc_emergency_sp; 165 void *mc_emergency_sp;
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 29c3798cf800..f5c45b37c0d4 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -452,7 +452,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
452 452
453extern int powersave_nap; /* set if nap mode can be used in idle loop */ 453extern int powersave_nap; /* set if nap mode can be used in idle loop */
454extern unsigned long power7_nap(int check_irq); 454extern unsigned long power7_nap(int check_irq);
455extern void power7_sleep(void); 455extern unsigned long power7_sleep(void);
456extern void flush_instruction_cache(void); 456extern void flush_instruction_cache(void);
457extern void hard_reset_now(void); 457extern void hard_reset_now(void);
458extern void poweroff_now(void); 458extern void poweroff_now(void);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c161ef3f28a1..bbd27fe0c039 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -726,5 +726,14 @@ int main(void)
726 arch.timing_last_enter.tv32.tbl)); 726 arch.timing_last_enter.tv32.tbl));
727#endif 727#endif
728 728
729#ifdef CONFIG_PPC_POWERNV
730 DEFINE(PACA_CORE_IDLE_STATE_PTR,
731 offsetof(struct paca_struct, core_idle_state_ptr));
732 DEFINE(PACA_THREAD_IDLE_STATE,
733 offsetof(struct paca_struct, thread_idle_state));
734 DEFINE(PACA_THREAD_MASK,
735 offsetof(struct paca_struct, thread_mask));
736#endif
737
729 return 0; 738 return 0;
730} 739}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index db08382e19f1..289fe718ecd4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -15,6 +15,7 @@
15#include <asm/hw_irq.h> 15#include <asm/hw_irq.h>
16#include <asm/exception-64s.h> 16#include <asm/exception-64s.h>
17#include <asm/ptrace.h> 17#include <asm/ptrace.h>
18#include <asm/cpuidle.h>
18 19
19/* 20/*
20 * We layout physical memory as follows: 21 * We layout physical memory as follows:
@@ -109,15 +110,19 @@ BEGIN_FTR_SECTION
109 rlwinm. r13,r13,47-31,30,31 110 rlwinm. r13,r13,47-31,30,31
110 beq 9f 111 beq 9f
111 112
112 /* waking up from powersave (nap) state */ 113 cmpwi cr3,r13,2
113 cmpwi cr1,r13,2 114
114 /* Total loss of HV state is fatal, we could try to use the
115 * PIR to locate a PACA, then use an emergency stack etc...
116 * OPAL v3 based powernv platforms have new idle states
117 * which fall in this catagory.
118 */
119 bgt cr1,8f
120 GET_PACA(r13) 115 GET_PACA(r13)
116 lbz r0,PACA_THREAD_IDLE_STATE(r13)
117 cmpwi cr2,r0,PNV_THREAD_NAP
118 bgt cr2,8f /* Either sleep or Winkle */
119
120 /* Waking up from nap should not cause hypervisor state loss */
121 bgt cr3,.
122
123 /* Waking up from nap */
124 li r0,PNV_THREAD_RUNNING
125 stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
121 126
122#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 127#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
123 li r0,KVM_HWTHREAD_IN_KERNEL 128 li r0,KVM_HWTHREAD_IN_KERNEL
@@ -133,7 +138,7 @@ BEGIN_FTR_SECTION
133 138
134 /* Return SRR1 from power7_nap() */ 139 /* Return SRR1 from power7_nap() */
135 mfspr r3,SPRN_SRR1 140 mfspr r3,SPRN_SRR1
136 beq cr1,2f 141 beq cr3,2f
137 b power7_wakeup_noloss 142 b power7_wakeup_noloss
1382: b power7_wakeup_loss 1432: b power7_wakeup_loss
139 144
@@ -1382,6 +1387,7 @@ machine_check_handle_early:
1382 MACHINE_CHECK_HANDLER_WINDUP 1387 MACHINE_CHECK_HANDLER_WINDUP
1383 GET_PACA(r13) 1388 GET_PACA(r13)
1384 ld r1,PACAR1(r13) 1389 ld r1,PACAR1(r13)
1390 li r3,PNV_THREAD_NAP
1385 b power7_enter_nap_mode 1391 b power7_enter_nap_mode
13864: 13924:
1387#endif 1393#endif
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index e5aba6abbe6c..0f2c113c8ca5 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -18,6 +18,7 @@
18#include <asm/hw_irq.h> 18#include <asm/hw_irq.h>
19#include <asm/kvm_book3s_asm.h> 19#include <asm/kvm_book3s_asm.h>
20#include <asm/opal.h> 20#include <asm/opal.h>
21#include <asm/cpuidle.h>
21 22
22#undef DEBUG 23#undef DEBUG
23 24
@@ -37,8 +38,7 @@
37 38
38/* 39/*
39 * Pass requested state in r3: 40 * Pass requested state in r3:
40 * 0 - nap 41 * r3 - PNV_THREAD_NAP/SLEEP/WINKLE
41 * 1 - sleep
42 * 42 *
43 * To check IRQ_HAPPENED in r4 43 * To check IRQ_HAPPENED in r4
44 * 0 - don't check 44 * 0 - don't check
@@ -123,12 +123,58 @@ power7_enter_nap_mode:
123 li r4,KVM_HWTHREAD_IN_NAP 123 li r4,KVM_HWTHREAD_IN_NAP
124 stb r4,HSTATE_HWTHREAD_STATE(r13) 124 stb r4,HSTATE_HWTHREAD_STATE(r13)
125#endif 125#endif
126 cmpwi cr0,r3,1 126 stb r3,PACA_THREAD_IDLE_STATE(r13)
127 beq 2f 127 cmpwi cr1,r3,PNV_THREAD_SLEEP
128 bge cr1,2f
128 IDLE_STATE_ENTER_SEQ(PPC_NAP) 129 IDLE_STATE_ENTER_SEQ(PPC_NAP)
129 /* No return */ 130 /* No return */
1302: IDLE_STATE_ENTER_SEQ(PPC_SLEEP) 1312:
131 /* No return */ 132 /* Sleep or winkle */
133 lbz r7,PACA_THREAD_MASK(r13)
134 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
135lwarx_loop1:
136 lwarx r15,0,r14
137 andc r15,r15,r7 /* Clear thread bit */
138
139 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
140
141/*
142 * If cr0 = 0, then current thread is the last thread of the core entering
143 * sleep. Last thread needs to execute the hardware bug workaround code if
144 * required by the platform.
145 * Make the workaround call unconditionally here. The below branch call is
146 * patched out when the idle states are discovered if the platform does not
147 * require it.
148 */
149.global pnv_fastsleep_workaround_at_entry
150pnv_fastsleep_workaround_at_entry:
151 beq fastsleep_workaround_at_entry
152
153 stwcx. r15,0,r14
154 bne- lwarx_loop1
155 isync
156
157common_enter: /* common code for all the threads entering sleep */
158 IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
159
160fastsleep_workaround_at_entry:
161 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
162 stwcx. r15,0,r14
163 bne- lwarx_loop1
164 isync
165
166 /* Fast sleep workaround */
167 li r3,1
168 li r4,1
169 li r0,OPAL_CONFIG_CPU_IDLE_STATE
170 bl opal_call_realmode
171
172 /* Clear Lock bit */
173 li r0,0
174 lwsync
175 stw r0,0(r14)
176 b common_enter
177
132 178
133_GLOBAL(power7_idle) 179_GLOBAL(power7_idle)
134 /* Now check if user or arch enabled NAP mode */ 180 /* Now check if user or arch enabled NAP mode */
@@ -141,49 +187,16 @@ _GLOBAL(power7_idle)
141 187
142_GLOBAL(power7_nap) 188_GLOBAL(power7_nap)
143 mr r4,r3 189 mr r4,r3
144 li r3,0 190 li r3,PNV_THREAD_NAP
145 b power7_powersave_common 191 b power7_powersave_common
146 /* No return */ 192 /* No return */
147 193
148_GLOBAL(power7_sleep) 194_GLOBAL(power7_sleep)
149 li r3,1 195 li r3,PNV_THREAD_SLEEP
150 li r4,1 196 li r4,1
151 b power7_powersave_common 197 b power7_powersave_common
152 /* No return */ 198 /* No return */
153 199
154/*
155 * Make opal call in realmode. This is a generic function to be called
156 * from realmode from reset vector. It handles endianess.
157 *
158 * r13 - paca pointer
159 * r1 - stack pointer
160 * r3 - opal token
161 */
162opal_call_realmode:
163 mflr r12
164 std r12,_LINK(r1)
165 ld r2,PACATOC(r13)
166 /* Set opal return address */
167 LOAD_REG_ADDR(r0,return_from_opal_call)
168 mtlr r0
169 /* Handle endian-ness */
170 li r0,MSR_LE
171 mfmsr r12
172 andc r12,r12,r0
173 mtspr SPRN_HSRR1,r12
174 mr r0,r3 /* Move opal token to r0 */
175 LOAD_REG_ADDR(r11,opal)
176 ld r12,8(r11)
177 ld r2,0(r11)
178 mtspr SPRN_HSRR0,r12
179 hrfid
180
181return_from_opal_call:
182 FIXUP_ENDIAN
183 ld r0,_LINK(r1)
184 mtlr r0
185 blr
186
187#define CHECK_HMI_INTERRUPT \ 200#define CHECK_HMI_INTERRUPT \
188 mfspr r0,SPRN_SRR1; \ 201 mfspr r0,SPRN_SRR1; \
189BEGIN_FTR_SECTION_NESTED(66); \ 202BEGIN_FTR_SECTION_NESTED(66); \
@@ -197,7 +210,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
197 ld r2,PACATOC(r13); \ 210 ld r2,PACATOC(r13); \
198 ld r1,PACAR1(r13); \ 211 ld r1,PACAR1(r13); \
199 std r3,ORIG_GPR3(r1); /* Save original r3 */ \ 212 std r3,ORIG_GPR3(r1); /* Save original r3 */ \
200 li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ 213 li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
201 bl opal_call_realmode; \ 214 bl opal_call_realmode; \
202 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 215 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
20320: nop; 21620: nop;
@@ -206,16 +219,105 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
206_GLOBAL(power7_wakeup_tb_loss) 219_GLOBAL(power7_wakeup_tb_loss)
207 ld r2,PACATOC(r13); 220 ld r2,PACATOC(r13);
208 ld r1,PACAR1(r13) 221 ld r1,PACAR1(r13)
222 /*
223 * Before entering any idle state, the NVGPRs are saved in the stack
224 * and they are restored before switching to the process context. Hence
225 * until they are restored, they are free to be used.
226 *
227 * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
228 * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
229 * wakeup reason if we branch to kvm_start_guest.
230 */
209 231
232 mfspr r16,SPRN_SRR1
210BEGIN_FTR_SECTION 233BEGIN_FTR_SECTION
211 CHECK_HMI_INTERRUPT 234 CHECK_HMI_INTERRUPT
212END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) 235END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
236
237 lbz r7,PACA_THREAD_MASK(r13)
238 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
239lwarx_loop2:
240 lwarx r15,0,r14
241 andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
242 /*
243 * Lock bit is set in one of the 2 cases-
244 * a. In the sleep/winkle enter path, the last thread is executing
245 * fastsleep workaround code.
246 * b. In the wake up path, another thread is executing fastsleep
247 * workaround undo code or resyncing timebase or restoring context
248 * In either case loop until the lock bit is cleared.
249 */
250 bne core_idle_lock_held
251
252 cmpwi cr2,r15,0
253 or r15,r15,r7 /* Set thread bit */
254
255 beq cr2,first_thread
256
257 /* Not first thread in core to wake up */
258 stwcx. r15,0,r14
259 bne- lwarx_loop2
260 isync
261 b common_exit
262
263core_idle_lock_held:
264 HMT_LOW
265core_idle_lock_loop:
266 lwz r15,0(14)
267 andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
268 bne core_idle_lock_loop
269 HMT_MEDIUM
270 b lwarx_loop2
271
272first_thread:
273 /* First thread in core to wakeup */
274 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
275 stwcx. r15,0,r14
276 bne- lwarx_loop2
277 isync
278
279 /*
280 * First thread in the core waking up from fastsleep. It needs to
281 * call the fastsleep workaround code if the platform requires it.
282 * Call it unconditionally here. The below branch instruction will
283 * be patched out when the idle states are discovered if platform
284 * does not require workaround.
285 */
286.global pnv_fastsleep_workaround_at_exit
287pnv_fastsleep_workaround_at_exit:
288 b fastsleep_workaround_at_exit
289
290timebase_resync:
291 /* Do timebase resync if we are waking up from sleep. Use cr3 value
292 * set in exceptions-64s.S */
293 ble cr3,clear_lock
213 /* Time base re-sync */ 294 /* Time base re-sync */
214 li r3,OPAL_RESYNC_TIMEBASE 295 li r0,OPAL_RESYNC_TIMEBASE
215 bl opal_call_realmode; 296 bl opal_call_realmode;
216
217 /* TODO: Check r3 for failure */ 297 /* TODO: Check r3 for failure */
218 298
299clear_lock:
300 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
301 lwsync
302 stw r15,0(r14)
303
304common_exit:
305 li r5,PNV_THREAD_RUNNING
306 stb r5,PACA_THREAD_IDLE_STATE(r13)
307
308 mtspr SPRN_SRR1,r16
309#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
310 li r0,KVM_HWTHREAD_IN_KERNEL
311 stb r0,HSTATE_HWTHREAD_STATE(r13)
312 /* Order setting hwthread_state vs. testing hwthread_req */
313 sync
314 lbz r0,HSTATE_HWTHREAD_REQ(r13)
315 cmpwi r0,0
316 beq 6f
317 b kvm_start_guest
3186:
319#endif
320
219 REST_NVGPRS(r1) 321 REST_NVGPRS(r1)
220 REST_GPR(2, r1) 322 REST_GPR(2, r1)
221 ld r3,_CCR(r1) 323 ld r3,_CCR(r1)
@@ -228,6 +330,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
228 mtspr SPRN_SRR0,r5 330 mtspr SPRN_SRR0,r5
229 rfid 331 rfid
230 332
333fastsleep_workaround_at_exit:
334 li r3,1
335 li r4,0
336 li r0,OPAL_CONFIG_CPU_IDLE_STATE
337 bl opal_call_realmode
338 b timebase_resync
339
231/* 340/*
232 * R3 here contains the value that will be returned to the caller 341 * R3 here contains the value that will be returned to the caller
233 * of power7_nap. 342 * of power7_nap.
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 2111e08d406b..78289ed7058c 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -158,6 +158,43 @@ opal_tracepoint_return:
158 blr 158 blr
159#endif 159#endif
160 160
161/*
162 * Make opal call in realmode. This is a generic function to be called
163 * from realmode. It handles endianness.
164 *
165 * r13 - paca pointer
166 * r1 - stack pointer
167 * r0 - opal token
168 */
169_GLOBAL(opal_call_realmode)
170 mflr r12
171 std r12,PPC_LR_STKOFF(r1)
172 ld r2,PACATOC(r13)
173 /* Set opal return address */
174 LOAD_REG_ADDR(r12,return_from_opal_call)
175 mtlr r12
176
177 mfmsr r12
178#ifdef __LITTLE_ENDIAN__
179 /* Handle endian-ness */
180 li r11,MSR_LE
181 andc r12,r12,r11
182#endif
183 mtspr SPRN_HSRR1,r12
184 LOAD_REG_ADDR(r11,opal)
185 ld r12,8(r11)
186 ld r2,0(r11)
187 mtspr SPRN_HSRR0,r12
188 hrfid
189
190return_from_opal_call:
191#ifdef __LITTLE_ENDIAN__
192 FIXUP_ENDIAN
193#endif
194 ld r12,PPC_LR_STKOFF(r1)
195 mtlr r12
196 blr
197
161OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); 198OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
162OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); 199OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
163OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); 200OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 88e579e62a73..2e9b53bb73e2 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,6 +36,9 @@
36#include <asm/opal.h> 36#include <asm/opal.h>
37#include <asm/kexec.h> 37#include <asm/kexec.h>
38#include <asm/smp.h> 38#include <asm/smp.h>
39#include <asm/cputhreads.h>
40#include <asm/cpuidle.h>
41#include <asm/code-patching.h>
39 42
40#include "powernv.h" 43#include "powernv.h"
41 44
@@ -290,10 +293,45 @@ static void __init pnv_setup_machdep_rtas(void)
290 293
291static u32 supported_cpuidle_states; 294static u32 supported_cpuidle_states;
292 295
296static void pnv_alloc_idle_core_states(void)
297{
298 int i, j;
299 int nr_cores = cpu_nr_cores();
300 u32 *core_idle_state;
301
302 /*
303 * core_idle_state - First 8 bits track the idle state of each thread
304 * of the core. The 8th bit is the lock bit. Initially all thread bits
305 * are set. They are cleared when the thread enters deep idle state
306 * like sleep and winkle. Initially the lock bit is cleared.
307 * The lock bit has 2 purposes
308 * a. While the first thread is restoring core state, it prevents
309 * other threads in the core from switching to process context.
310 * b. While the last thread in the core is saving the core state, it
311 * prevents a different thread from waking up.
312 */
313 for (i = 0; i < nr_cores; i++) {
314 int first_cpu = i * threads_per_core;
315 int node = cpu_to_node(first_cpu);
316
317 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
318 *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
319
320 for (j = 0; j < threads_per_core; j++) {
321 int cpu = first_cpu + j;
322
323 paca[cpu].core_idle_state_ptr = core_idle_state;
324 paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
325 paca[cpu].thread_mask = 1 << j;
326 }
327 }
328}
329
293u32 pnv_get_supported_cpuidle_states(void) 330u32 pnv_get_supported_cpuidle_states(void)
294{ 331{
295 return supported_cpuidle_states; 332 return supported_cpuidle_states;
296} 333}
334EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
297 335
298static int __init pnv_init_idle_states(void) 336static int __init pnv_init_idle_states(void)
299{ 337{
@@ -330,13 +368,20 @@ static int __init pnv_init_idle_states(void)
330 flags = be32_to_cpu(idle_state_flags[i]); 368 flags = be32_to_cpu(idle_state_flags[i]);
331 supported_cpuidle_states |= flags; 369 supported_cpuidle_states |= flags;
332 } 370 }
333 371 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
372 patch_instruction(
373 (unsigned int *)pnv_fastsleep_workaround_at_entry,
374 PPC_INST_NOP);
375 patch_instruction(
376 (unsigned int *)pnv_fastsleep_workaround_at_exit,
377 PPC_INST_NOP);
378 }
379 pnv_alloc_idle_core_states();
334 return 0; 380 return 0;
335} 381}
336 382
337subsys_initcall(pnv_init_idle_states); 383subsys_initcall(pnv_init_idle_states);
338 384
339
340static int __init pnv_probe(void) 385static int __init pnv_probe(void)
341{ 386{
342 unsigned long root = of_get_flat_dt_root(); 387 unsigned long root = of_get_flat_dt_root();
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 83299ef2dc3d..c0691d0fb385 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -168,7 +168,8 @@ static void pnv_smp_cpu_kill_self(void)
168 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); 168 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
169 while (!generic_check_cpu_restart(cpu)) { 169 while (!generic_check_cpu_restart(cpu)) {
170 ppc64_runlatch_off(); 170 ppc64_runlatch_off();
171 if (idle_states & OPAL_PM_SLEEP_ENABLED) 171 if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
172 (idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
172 srr1 = power7_sleep(); 173 srr1 = power7_sleep();
173 else 174 else
174 srr1 = power7_nap(1); 175 srr1 = power7_nap(1);