aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorShreyas B. Prabhu <shreyas@linux.vnet.ibm.com>2014-12-09 13:56:52 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2014-12-14 18:46:40 -0500
commit7cba160ad789a3ad7e68b92bf20eaad6ed171f80 (patch)
treeef57d54fcf61e5acf912e03004c0913457d3832b /arch/powerpc/kernel
parent8eb8ac89a364305d05ad16be983b7890eb462cc3 (diff)
powernv/cpuidle: Redesign idle states management
Deep idle states like sleep and winkle are per core idle states. A core enters these states only when all the threads enter either the particular idle state or a deeper one. There are tasks like fastsleep hardware bug workaround and hypervisor core state save which have to be done only by the last thread of the core entering deep idle state and similarly tasks like timebase resync, hypervisor core register restore that have to be done only by the first thread waking up from these state. The current idle state management does not have a way to distinguish the first/last thread of the core waking/entering idle states. Tasks like timebase resync are done for all the threads. This is not only is suboptimal, but can cause functionality issues when subcores and kvm is involved. This patch adds the necessary infrastructure to track idle states of threads in a per-core structure. It uses this info to perform tasks like fastsleep workaround and timebase resync only once per core. Signed-off-by: Shreyas B. Prabhu <shreyas@linux.vnet.ibm.com> Originally-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: linux-pm@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/asm-offsets.c9
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S24
-rw-r--r--arch/powerpc/kernel/idle_power7.S197
3 files changed, 177 insertions, 53 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c161ef3f28a1..bbd27fe0c039 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -726,5 +726,14 @@ int main(void)
726 arch.timing_last_enter.tv32.tbl)); 726 arch.timing_last_enter.tv32.tbl));
727#endif 727#endif
728 728
729#ifdef CONFIG_PPC_POWERNV
730 DEFINE(PACA_CORE_IDLE_STATE_PTR,
731 offsetof(struct paca_struct, core_idle_state_ptr));
732 DEFINE(PACA_THREAD_IDLE_STATE,
733 offsetof(struct paca_struct, thread_idle_state));
734 DEFINE(PACA_THREAD_MASK,
735 offsetof(struct paca_struct, thread_mask));
736#endif
737
729 return 0; 738 return 0;
730} 739}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index db08382e19f1..289fe718ecd4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -15,6 +15,7 @@
15#include <asm/hw_irq.h> 15#include <asm/hw_irq.h>
16#include <asm/exception-64s.h> 16#include <asm/exception-64s.h>
17#include <asm/ptrace.h> 17#include <asm/ptrace.h>
18#include <asm/cpuidle.h>
18 19
19/* 20/*
20 * We layout physical memory as follows: 21 * We layout physical memory as follows:
@@ -109,15 +110,19 @@ BEGIN_FTR_SECTION
109 rlwinm. r13,r13,47-31,30,31 110 rlwinm. r13,r13,47-31,30,31
110 beq 9f 111 beq 9f
111 112
112 /* waking up from powersave (nap) state */ 113 cmpwi cr3,r13,2
113 cmpwi cr1,r13,2 114
114 /* Total loss of HV state is fatal, we could try to use the
115 * PIR to locate a PACA, then use an emergency stack etc...
116 * OPAL v3 based powernv platforms have new idle states
117 * which fall in this catagory.
118 */
119 bgt cr1,8f
120 GET_PACA(r13) 115 GET_PACA(r13)
116 lbz r0,PACA_THREAD_IDLE_STATE(r13)
117 cmpwi cr2,r0,PNV_THREAD_NAP
118 bgt cr2,8f /* Either sleep or Winkle */
119
120 /* Waking up from nap should not cause hypervisor state loss */
121 bgt cr3,.
122
123 /* Waking up from nap */
124 li r0,PNV_THREAD_RUNNING
125 stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
121 126
122#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 127#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
123 li r0,KVM_HWTHREAD_IN_KERNEL 128 li r0,KVM_HWTHREAD_IN_KERNEL
@@ -133,7 +138,7 @@ BEGIN_FTR_SECTION
133 138
134 /* Return SRR1 from power7_nap() */ 139 /* Return SRR1 from power7_nap() */
135 mfspr r3,SPRN_SRR1 140 mfspr r3,SPRN_SRR1
136 beq cr1,2f 141 beq cr3,2f
137 b power7_wakeup_noloss 142 b power7_wakeup_noloss
1382: b power7_wakeup_loss 1432: b power7_wakeup_loss
139 144
@@ -1382,6 +1387,7 @@ machine_check_handle_early:
1382 MACHINE_CHECK_HANDLER_WINDUP 1387 MACHINE_CHECK_HANDLER_WINDUP
1383 GET_PACA(r13) 1388 GET_PACA(r13)
1384 ld r1,PACAR1(r13) 1389 ld r1,PACAR1(r13)
1390 li r3,PNV_THREAD_NAP
1385 b power7_enter_nap_mode 1391 b power7_enter_nap_mode
13864: 13924:
1387#endif 1393#endif
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index e5aba6abbe6c..0f2c113c8ca5 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -18,6 +18,7 @@
18#include <asm/hw_irq.h> 18#include <asm/hw_irq.h>
19#include <asm/kvm_book3s_asm.h> 19#include <asm/kvm_book3s_asm.h>
20#include <asm/opal.h> 20#include <asm/opal.h>
21#include <asm/cpuidle.h>
21 22
22#undef DEBUG 23#undef DEBUG
23 24
@@ -37,8 +38,7 @@
37 38
38/* 39/*
39 * Pass requested state in r3: 40 * Pass requested state in r3:
40 * 0 - nap 41 * r3 - PNV_THREAD_NAP/SLEEP/WINKLE
41 * 1 - sleep
42 * 42 *
43 * To check IRQ_HAPPENED in r4 43 * To check IRQ_HAPPENED in r4
44 * 0 - don't check 44 * 0 - don't check
@@ -123,12 +123,58 @@ power7_enter_nap_mode:
123 li r4,KVM_HWTHREAD_IN_NAP 123 li r4,KVM_HWTHREAD_IN_NAP
124 stb r4,HSTATE_HWTHREAD_STATE(r13) 124 stb r4,HSTATE_HWTHREAD_STATE(r13)
125#endif 125#endif
126 cmpwi cr0,r3,1 126 stb r3,PACA_THREAD_IDLE_STATE(r13)
127 beq 2f 127 cmpwi cr1,r3,PNV_THREAD_SLEEP
128 bge cr1,2f
128 IDLE_STATE_ENTER_SEQ(PPC_NAP) 129 IDLE_STATE_ENTER_SEQ(PPC_NAP)
129 /* No return */ 130 /* No return */
1302: IDLE_STATE_ENTER_SEQ(PPC_SLEEP) 1312:
131 /* No return */ 132 /* Sleep or winkle */
133 lbz r7,PACA_THREAD_MASK(r13)
134 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
135lwarx_loop1:
136 lwarx r15,0,r14
137 andc r15,r15,r7 /* Clear thread bit */
138
139 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
140
141/*
142 * If cr0 = 0, then current thread is the last thread of the core entering
143 * sleep. Last thread needs to execute the hardware bug workaround code if
144 * required by the platform.
145 * Make the workaround call unconditionally here. The below branch call is
146 * patched out when the idle states are discovered if the platform does not
147 * require it.
148 */
149.global pnv_fastsleep_workaround_at_entry
150pnv_fastsleep_workaround_at_entry:
151 beq fastsleep_workaround_at_entry
152
153 stwcx. r15,0,r14
154 bne- lwarx_loop1
155 isync
156
157common_enter: /* common code for all the threads entering sleep */
158 IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
159
160fastsleep_workaround_at_entry:
161 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
162 stwcx. r15,0,r14
163 bne- lwarx_loop1
164 isync
165
166 /* Fast sleep workaround */
167 li r3,1
168 li r4,1
169 li r0,OPAL_CONFIG_CPU_IDLE_STATE
170 bl opal_call_realmode
171
172 /* Clear Lock bit */
173 li r0,0
174 lwsync
175 stw r0,0(r14)
176 b common_enter
177
132 178
133_GLOBAL(power7_idle) 179_GLOBAL(power7_idle)
134 /* Now check if user or arch enabled NAP mode */ 180 /* Now check if user or arch enabled NAP mode */
@@ -141,49 +187,16 @@ _GLOBAL(power7_idle)
141 187
142_GLOBAL(power7_nap) 188_GLOBAL(power7_nap)
143 mr r4,r3 189 mr r4,r3
144 li r3,0 190 li r3,PNV_THREAD_NAP
145 b power7_powersave_common 191 b power7_powersave_common
146 /* No return */ 192 /* No return */
147 193
148_GLOBAL(power7_sleep) 194_GLOBAL(power7_sleep)
149 li r3,1 195 li r3,PNV_THREAD_SLEEP
150 li r4,1 196 li r4,1
151 b power7_powersave_common 197 b power7_powersave_common
152 /* No return */ 198 /* No return */
153 199
154/*
155 * Make opal call in realmode. This is a generic function to be called
156 * from realmode from reset vector. It handles endianess.
157 *
158 * r13 - paca pointer
159 * r1 - stack pointer
160 * r3 - opal token
161 */
162opal_call_realmode:
163 mflr r12
164 std r12,_LINK(r1)
165 ld r2,PACATOC(r13)
166 /* Set opal return address */
167 LOAD_REG_ADDR(r0,return_from_opal_call)
168 mtlr r0
169 /* Handle endian-ness */
170 li r0,MSR_LE
171 mfmsr r12
172 andc r12,r12,r0
173 mtspr SPRN_HSRR1,r12
174 mr r0,r3 /* Move opal token to r0 */
175 LOAD_REG_ADDR(r11,opal)
176 ld r12,8(r11)
177 ld r2,0(r11)
178 mtspr SPRN_HSRR0,r12
179 hrfid
180
181return_from_opal_call:
182 FIXUP_ENDIAN
183 ld r0,_LINK(r1)
184 mtlr r0
185 blr
186
187#define CHECK_HMI_INTERRUPT \ 200#define CHECK_HMI_INTERRUPT \
188 mfspr r0,SPRN_SRR1; \ 201 mfspr r0,SPRN_SRR1; \
189BEGIN_FTR_SECTION_NESTED(66); \ 202BEGIN_FTR_SECTION_NESTED(66); \
@@ -197,7 +210,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
197 ld r2,PACATOC(r13); \ 210 ld r2,PACATOC(r13); \
198 ld r1,PACAR1(r13); \ 211 ld r1,PACAR1(r13); \
199 std r3,ORIG_GPR3(r1); /* Save original r3 */ \ 212 std r3,ORIG_GPR3(r1); /* Save original r3 */ \
200 li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ 213 li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
201 bl opal_call_realmode; \ 214 bl opal_call_realmode; \
202 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 215 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
20320: nop; 21620: nop;
@@ -206,16 +219,105 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
206_GLOBAL(power7_wakeup_tb_loss) 219_GLOBAL(power7_wakeup_tb_loss)
207 ld r2,PACATOC(r13); 220 ld r2,PACATOC(r13);
208 ld r1,PACAR1(r13) 221 ld r1,PACAR1(r13)
222 /*
223 * Before entering any idle state, the NVGPRs are saved in the stack
224 * and they are restored before switching to the process context. Hence
225 * until they are restored, they are free to be used.
226 *
227 * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
228 * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
229 * wakeup reason if we branch to kvm_start_guest.
230 */
209 231
232 mfspr r16,SPRN_SRR1
210BEGIN_FTR_SECTION 233BEGIN_FTR_SECTION
211 CHECK_HMI_INTERRUPT 234 CHECK_HMI_INTERRUPT
212END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) 235END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
236
237 lbz r7,PACA_THREAD_MASK(r13)
238 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
239lwarx_loop2:
240 lwarx r15,0,r14
241 andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
242 /*
243 * Lock bit is set in one of the 2 cases-
244 * a. In the sleep/winkle enter path, the last thread is executing
245 * fastsleep workaround code.
246 * b. In the wake up path, another thread is executing fastsleep
247 * workaround undo code or resyncing timebase or restoring context
248 * In either case loop until the lock bit is cleared.
249 */
250 bne core_idle_lock_held
251
252 cmpwi cr2,r15,0
253 or r15,r15,r7 /* Set thread bit */
254
255 beq cr2,first_thread
256
257 /* Not first thread in core to wake up */
258 stwcx. r15,0,r14
259 bne- lwarx_loop2
260 isync
261 b common_exit
262
263core_idle_lock_held:
264 HMT_LOW
265core_idle_lock_loop:
266 lwz r15,0(14)
267 andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
268 bne core_idle_lock_loop
269 HMT_MEDIUM
270 b lwarx_loop2
271
272first_thread:
273 /* First thread in core to wakeup */
274 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
275 stwcx. r15,0,r14
276 bne- lwarx_loop2
277 isync
278
279 /*
280 * First thread in the core waking up from fastsleep. It needs to
281 * call the fastsleep workaround code if the platform requires it.
282 * Call it unconditionally here. The below branch instruction will
283 * be patched out when the idle states are discovered if platform
284 * does not require workaround.
285 */
286.global pnv_fastsleep_workaround_at_exit
287pnv_fastsleep_workaround_at_exit:
288 b fastsleep_workaround_at_exit
289
290timebase_resync:
291 /* Do timebase resync if we are waking up from sleep. Use cr3 value
292 * set in exceptions-64s.S */
293 ble cr3,clear_lock
213 /* Time base re-sync */ 294 /* Time base re-sync */
214 li r3,OPAL_RESYNC_TIMEBASE 295 li r0,OPAL_RESYNC_TIMEBASE
215 bl opal_call_realmode; 296 bl opal_call_realmode;
216
217 /* TODO: Check r3 for failure */ 297 /* TODO: Check r3 for failure */
218 298
299clear_lock:
300 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
301 lwsync
302 stw r15,0(r14)
303
304common_exit:
305 li r5,PNV_THREAD_RUNNING
306 stb r5,PACA_THREAD_IDLE_STATE(r13)
307
308 mtspr SPRN_SRR1,r16
309#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
310 li r0,KVM_HWTHREAD_IN_KERNEL
311 stb r0,HSTATE_HWTHREAD_STATE(r13)
312 /* Order setting hwthread_state vs. testing hwthread_req */
313 sync
314 lbz r0,HSTATE_HWTHREAD_REQ(r13)
315 cmpwi r0,0
316 beq 6f
317 b kvm_start_guest
3186:
319#endif
320
219 REST_NVGPRS(r1) 321 REST_NVGPRS(r1)
220 REST_GPR(2, r1) 322 REST_GPR(2, r1)
221 ld r3,_CCR(r1) 323 ld r3,_CCR(r1)
@@ -228,6 +330,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
228 mtspr SPRN_SRR0,r5 330 mtspr SPRN_SRR0,r5
229 rfid 331 rfid
230 332
333fastsleep_workaround_at_exit:
334 li r3,1
335 li r4,0
336 li r0,OPAL_CONFIG_CPU_IDLE_STATE
337 bl opal_call_realmode
338 b timebase_resync
339
231/* 340/*
232 * R3 here contains the value that will be returned to the caller 341 * R3 here contains the value that will be returned to the caller
233 * of power7_nap. 342 * of power7_nap.