diff options
author | Shreyas B. Prabhu <shreyas@linux.vnet.ibm.com> | 2014-12-09 13:56:52 -0500 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2014-12-14 18:46:40 -0500 |
commit | 7cba160ad789a3ad7e68b92bf20eaad6ed171f80 (patch) | |
tree | ef57d54fcf61e5acf912e03004c0913457d3832b /arch/powerpc/kernel | |
parent | 8eb8ac89a364305d05ad16be983b7890eb462cc3 (diff) |
powernv/cpuidle: Redesign idle states management
Deep idle states like sleep and winkle are per core idle states. A core
enters these states only when all the threads enter either the
particular idle state or a deeper one. There are tasks like fastsleep
hardware bug workaround and hypervisor core state save which have to be
done only by the last thread of the core entering deep idle state and
similarly tasks like timebase resync, hypervisor core register restore
that have to be done only by the first thread waking up from these
state.
The current idle state management does not have a way to distinguish the
first/last thread of the core waking/entering idle states. Tasks like
timebase resync are done for all the threads. This is not only is
suboptimal, but can cause functionality issues when subcores and kvm is
involved.
This patch adds the necessary infrastructure to track idle states of
threads in a per-core structure. It uses this info to perform tasks like
fastsleep workaround and timebase resync only once per core.
Signed-off-by: Shreyas B. Prabhu <shreyas@linux.vnet.ibm.com>
Originally-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: linux-pm@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 9 | ||||
-rw-r--r-- | arch/powerpc/kernel/exceptions-64s.S | 24 | ||||
-rw-r--r-- | arch/powerpc/kernel/idle_power7.S | 197 |
3 files changed, 177 insertions, 53 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c161ef3f28a1..bbd27fe0c039 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -726,5 +726,14 @@ int main(void) | |||
726 | arch.timing_last_enter.tv32.tbl)); | 726 | arch.timing_last_enter.tv32.tbl)); |
727 | #endif | 727 | #endif |
728 | 728 | ||
729 | #ifdef CONFIG_PPC_POWERNV | ||
730 | DEFINE(PACA_CORE_IDLE_STATE_PTR, | ||
731 | offsetof(struct paca_struct, core_idle_state_ptr)); | ||
732 | DEFINE(PACA_THREAD_IDLE_STATE, | ||
733 | offsetof(struct paca_struct, thread_idle_state)); | ||
734 | DEFINE(PACA_THREAD_MASK, | ||
735 | offsetof(struct paca_struct, thread_mask)); | ||
736 | #endif | ||
737 | |||
729 | return 0; | 738 | return 0; |
730 | } | 739 | } |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index db08382e19f1..289fe718ecd4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/hw_irq.h> | 15 | #include <asm/hw_irq.h> |
16 | #include <asm/exception-64s.h> | 16 | #include <asm/exception-64s.h> |
17 | #include <asm/ptrace.h> | 17 | #include <asm/ptrace.h> |
18 | #include <asm/cpuidle.h> | ||
18 | 19 | ||
19 | /* | 20 | /* |
20 | * We layout physical memory as follows: | 21 | * We layout physical memory as follows: |
@@ -109,15 +110,19 @@ BEGIN_FTR_SECTION | |||
109 | rlwinm. r13,r13,47-31,30,31 | 110 | rlwinm. r13,r13,47-31,30,31 |
110 | beq 9f | 111 | beq 9f |
111 | 112 | ||
112 | /* waking up from powersave (nap) state */ | 113 | cmpwi cr3,r13,2 |
113 | cmpwi cr1,r13,2 | 114 | |
114 | /* Total loss of HV state is fatal, we could try to use the | ||
115 | * PIR to locate a PACA, then use an emergency stack etc... | ||
116 | * OPAL v3 based powernv platforms have new idle states | ||
117 | * which fall in this catagory. | ||
118 | */ | ||
119 | bgt cr1,8f | ||
120 | GET_PACA(r13) | 115 | GET_PACA(r13) |
116 | lbz r0,PACA_THREAD_IDLE_STATE(r13) | ||
117 | cmpwi cr2,r0,PNV_THREAD_NAP | ||
118 | bgt cr2,8f /* Either sleep or Winkle */ | ||
119 | |||
120 | /* Waking up from nap should not cause hypervisor state loss */ | ||
121 | bgt cr3,. | ||
122 | |||
123 | /* Waking up from nap */ | ||
124 | li r0,PNV_THREAD_RUNNING | ||
125 | stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ | ||
121 | 126 | ||
122 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 127 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
123 | li r0,KVM_HWTHREAD_IN_KERNEL | 128 | li r0,KVM_HWTHREAD_IN_KERNEL |
@@ -133,7 +138,7 @@ BEGIN_FTR_SECTION | |||
133 | 138 | ||
134 | /* Return SRR1 from power7_nap() */ | 139 | /* Return SRR1 from power7_nap() */ |
135 | mfspr r3,SPRN_SRR1 | 140 | mfspr r3,SPRN_SRR1 |
136 | beq cr1,2f | 141 | beq cr3,2f |
137 | b power7_wakeup_noloss | 142 | b power7_wakeup_noloss |
138 | 2: b power7_wakeup_loss | 143 | 2: b power7_wakeup_loss |
139 | 144 | ||
@@ -1382,6 +1387,7 @@ machine_check_handle_early: | |||
1382 | MACHINE_CHECK_HANDLER_WINDUP | 1387 | MACHINE_CHECK_HANDLER_WINDUP |
1383 | GET_PACA(r13) | 1388 | GET_PACA(r13) |
1384 | ld r1,PACAR1(r13) | 1389 | ld r1,PACAR1(r13) |
1390 | li r3,PNV_THREAD_NAP | ||
1385 | b power7_enter_nap_mode | 1391 | b power7_enter_nap_mode |
1386 | 4: | 1392 | 4: |
1387 | #endif | 1393 | #endif |
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index e5aba6abbe6c..0f2c113c8ca5 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/hw_irq.h> | 18 | #include <asm/hw_irq.h> |
19 | #include <asm/kvm_book3s_asm.h> | 19 | #include <asm/kvm_book3s_asm.h> |
20 | #include <asm/opal.h> | 20 | #include <asm/opal.h> |
21 | #include <asm/cpuidle.h> | ||
21 | 22 | ||
22 | #undef DEBUG | 23 | #undef DEBUG |
23 | 24 | ||
@@ -37,8 +38,7 @@ | |||
37 | 38 | ||
38 | /* | 39 | /* |
39 | * Pass requested state in r3: | 40 | * Pass requested state in r3: |
40 | * 0 - nap | 41 | * r3 - PNV_THREAD_NAP/SLEEP/WINKLE |
41 | * 1 - sleep | ||
42 | * | 42 | * |
43 | * To check IRQ_HAPPENED in r4 | 43 | * To check IRQ_HAPPENED in r4 |
44 | * 0 - don't check | 44 | * 0 - don't check |
@@ -123,12 +123,58 @@ power7_enter_nap_mode: | |||
123 | li r4,KVM_HWTHREAD_IN_NAP | 123 | li r4,KVM_HWTHREAD_IN_NAP |
124 | stb r4,HSTATE_HWTHREAD_STATE(r13) | 124 | stb r4,HSTATE_HWTHREAD_STATE(r13) |
125 | #endif | 125 | #endif |
126 | cmpwi cr0,r3,1 | 126 | stb r3,PACA_THREAD_IDLE_STATE(r13) |
127 | beq 2f | 127 | cmpwi cr1,r3,PNV_THREAD_SLEEP |
128 | bge cr1,2f | ||
128 | IDLE_STATE_ENTER_SEQ(PPC_NAP) | 129 | IDLE_STATE_ENTER_SEQ(PPC_NAP) |
129 | /* No return */ | 130 | /* No return */ |
130 | 2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP) | 131 | 2: |
131 | /* No return */ | 132 | /* Sleep or winkle */ |
133 | lbz r7,PACA_THREAD_MASK(r13) | ||
134 | ld r14,PACA_CORE_IDLE_STATE_PTR(r13) | ||
135 | lwarx_loop1: | ||
136 | lwarx r15,0,r14 | ||
137 | andc r15,r15,r7 /* Clear thread bit */ | ||
138 | |||
139 | andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS | ||
140 | |||
141 | /* | ||
142 | * If cr0 = 0, then current thread is the last thread of the core entering | ||
143 | * sleep. Last thread needs to execute the hardware bug workaround code if | ||
144 | * required by the platform. | ||
145 | * Make the workaround call unconditionally here. The below branch call is | ||
146 | * patched out when the idle states are discovered if the platform does not | ||
147 | * require it. | ||
148 | */ | ||
149 | .global pnv_fastsleep_workaround_at_entry | ||
150 | pnv_fastsleep_workaround_at_entry: | ||
151 | beq fastsleep_workaround_at_entry | ||
152 | |||
153 | stwcx. r15,0,r14 | ||
154 | bne- lwarx_loop1 | ||
155 | isync | ||
156 | |||
157 | common_enter: /* common code for all the threads entering sleep */ | ||
158 | IDLE_STATE_ENTER_SEQ(PPC_SLEEP) | ||
159 | |||
160 | fastsleep_workaround_at_entry: | ||
161 | ori r15,r15,PNV_CORE_IDLE_LOCK_BIT | ||
162 | stwcx. r15,0,r14 | ||
163 | bne- lwarx_loop1 | ||
164 | isync | ||
165 | |||
166 | /* Fast sleep workaround */ | ||
167 | li r3,1 | ||
168 | li r4,1 | ||
169 | li r0,OPAL_CONFIG_CPU_IDLE_STATE | ||
170 | bl opal_call_realmode | ||
171 | |||
172 | /* Clear Lock bit */ | ||
173 | li r0,0 | ||
174 | lwsync | ||
175 | stw r0,0(r14) | ||
176 | b common_enter | ||
177 | |||
132 | 178 | ||
133 | _GLOBAL(power7_idle) | 179 | _GLOBAL(power7_idle) |
134 | /* Now check if user or arch enabled NAP mode */ | 180 | /* Now check if user or arch enabled NAP mode */ |
@@ -141,49 +187,16 @@ _GLOBAL(power7_idle) | |||
141 | 187 | ||
142 | _GLOBAL(power7_nap) | 188 | _GLOBAL(power7_nap) |
143 | mr r4,r3 | 189 | mr r4,r3 |
144 | li r3,0 | 190 | li r3,PNV_THREAD_NAP |
145 | b power7_powersave_common | 191 | b power7_powersave_common |
146 | /* No return */ | 192 | /* No return */ |
147 | 193 | ||
148 | _GLOBAL(power7_sleep) | 194 | _GLOBAL(power7_sleep) |
149 | li r3,1 | 195 | li r3,PNV_THREAD_SLEEP |
150 | li r4,1 | 196 | li r4,1 |
151 | b power7_powersave_common | 197 | b power7_powersave_common |
152 | /* No return */ | 198 | /* No return */ |
153 | 199 | ||
154 | /* | ||
155 | * Make opal call in realmode. This is a generic function to be called | ||
156 | * from realmode from reset vector. It handles endianess. | ||
157 | * | ||
158 | * r13 - paca pointer | ||
159 | * r1 - stack pointer | ||
160 | * r3 - opal token | ||
161 | */ | ||
162 | opal_call_realmode: | ||
163 | mflr r12 | ||
164 | std r12,_LINK(r1) | ||
165 | ld r2,PACATOC(r13) | ||
166 | /* Set opal return address */ | ||
167 | LOAD_REG_ADDR(r0,return_from_opal_call) | ||
168 | mtlr r0 | ||
169 | /* Handle endian-ness */ | ||
170 | li r0,MSR_LE | ||
171 | mfmsr r12 | ||
172 | andc r12,r12,r0 | ||
173 | mtspr SPRN_HSRR1,r12 | ||
174 | mr r0,r3 /* Move opal token to r0 */ | ||
175 | LOAD_REG_ADDR(r11,opal) | ||
176 | ld r12,8(r11) | ||
177 | ld r2,0(r11) | ||
178 | mtspr SPRN_HSRR0,r12 | ||
179 | hrfid | ||
180 | |||
181 | return_from_opal_call: | ||
182 | FIXUP_ENDIAN | ||
183 | ld r0,_LINK(r1) | ||
184 | mtlr r0 | ||
185 | blr | ||
186 | |||
187 | #define CHECK_HMI_INTERRUPT \ | 200 | #define CHECK_HMI_INTERRUPT \ |
188 | mfspr r0,SPRN_SRR1; \ | 201 | mfspr r0,SPRN_SRR1; \ |
189 | BEGIN_FTR_SECTION_NESTED(66); \ | 202 | BEGIN_FTR_SECTION_NESTED(66); \ |
@@ -197,7 +210,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ | |||
197 | ld r2,PACATOC(r13); \ | 210 | ld r2,PACATOC(r13); \ |
198 | ld r1,PACAR1(r13); \ | 211 | ld r1,PACAR1(r13); \ |
199 | std r3,ORIG_GPR3(r1); /* Save original r3 */ \ | 212 | std r3,ORIG_GPR3(r1); /* Save original r3 */ \ |
200 | li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ | 213 | li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ |
201 | bl opal_call_realmode; \ | 214 | bl opal_call_realmode; \ |
202 | ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ | 215 | ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ |
203 | 20: nop; | 216 | 20: nop; |
@@ -206,16 +219,105 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ | |||
206 | _GLOBAL(power7_wakeup_tb_loss) | 219 | _GLOBAL(power7_wakeup_tb_loss) |
207 | ld r2,PACATOC(r13); | 220 | ld r2,PACATOC(r13); |
208 | ld r1,PACAR1(r13) | 221 | ld r1,PACAR1(r13) |
222 | /* | ||
223 | * Before entering any idle state, the NVGPRs are saved in the stack | ||
224 | * and they are restored before switching to the process context. Hence | ||
225 | * until they are restored, they are free to be used. | ||
226 | * | ||
227 | * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode | ||
228 | * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the | ||
229 | * wakeup reason if we branch to kvm_start_guest. | ||
230 | */ | ||
209 | 231 | ||
232 | mfspr r16,SPRN_SRR1 | ||
210 | BEGIN_FTR_SECTION | 233 | BEGIN_FTR_SECTION |
211 | CHECK_HMI_INTERRUPT | 234 | CHECK_HMI_INTERRUPT |
212 | END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) | 235 | END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) |
236 | |||
237 | lbz r7,PACA_THREAD_MASK(r13) | ||
238 | ld r14,PACA_CORE_IDLE_STATE_PTR(r13) | ||
239 | lwarx_loop2: | ||
240 | lwarx r15,0,r14 | ||
241 | andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT | ||
242 | /* | ||
243 | * Lock bit is set in one of the 2 cases- | ||
244 | * a. In the sleep/winkle enter path, the last thread is executing | ||
245 | * fastsleep workaround code. | ||
246 | * b. In the wake up path, another thread is executing fastsleep | ||
247 | * workaround undo code or resyncing timebase or restoring context | ||
248 | * In either case loop until the lock bit is cleared. | ||
249 | */ | ||
250 | bne core_idle_lock_held | ||
251 | |||
252 | cmpwi cr2,r15,0 | ||
253 | or r15,r15,r7 /* Set thread bit */ | ||
254 | |||
255 | beq cr2,first_thread | ||
256 | |||
257 | /* Not first thread in core to wake up */ | ||
258 | stwcx. r15,0,r14 | ||
259 | bne- lwarx_loop2 | ||
260 | isync | ||
261 | b common_exit | ||
262 | |||
263 | core_idle_lock_held: | ||
264 | HMT_LOW | ||
265 | core_idle_lock_loop: | ||
266 | lwz r15,0(14) | ||
267 | andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT | ||
268 | bne core_idle_lock_loop | ||
269 | HMT_MEDIUM | ||
270 | b lwarx_loop2 | ||
271 | |||
272 | first_thread: | ||
273 | /* First thread in core to wakeup */ | ||
274 | ori r15,r15,PNV_CORE_IDLE_LOCK_BIT | ||
275 | stwcx. r15,0,r14 | ||
276 | bne- lwarx_loop2 | ||
277 | isync | ||
278 | |||
279 | /* | ||
280 | * First thread in the core waking up from fastsleep. It needs to | ||
281 | * call the fastsleep workaround code if the platform requires it. | ||
282 | * Call it unconditionally here. The below branch instruction will | ||
283 | * be patched out when the idle states are discovered if platform | ||
284 | * does not require workaround. | ||
285 | */ | ||
286 | .global pnv_fastsleep_workaround_at_exit | ||
287 | pnv_fastsleep_workaround_at_exit: | ||
288 | b fastsleep_workaround_at_exit | ||
289 | |||
290 | timebase_resync: | ||
291 | /* Do timebase resync if we are waking up from sleep. Use cr3 value | ||
292 | * set in exceptions-64s.S */ | ||
293 | ble cr3,clear_lock | ||
213 | /* Time base re-sync */ | 294 | /* Time base re-sync */ |
214 | li r3,OPAL_RESYNC_TIMEBASE | 295 | li r0,OPAL_RESYNC_TIMEBASE |
215 | bl opal_call_realmode; | 296 | bl opal_call_realmode; |
216 | |||
217 | /* TODO: Check r3 for failure */ | 297 | /* TODO: Check r3 for failure */ |
218 | 298 | ||
299 | clear_lock: | ||
300 | andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS | ||
301 | lwsync | ||
302 | stw r15,0(r14) | ||
303 | |||
304 | common_exit: | ||
305 | li r5,PNV_THREAD_RUNNING | ||
306 | stb r5,PACA_THREAD_IDLE_STATE(r13) | ||
307 | |||
308 | mtspr SPRN_SRR1,r16 | ||
309 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||
310 | li r0,KVM_HWTHREAD_IN_KERNEL | ||
311 | stb r0,HSTATE_HWTHREAD_STATE(r13) | ||
312 | /* Order setting hwthread_state vs. testing hwthread_req */ | ||
313 | sync | ||
314 | lbz r0,HSTATE_HWTHREAD_REQ(r13) | ||
315 | cmpwi r0,0 | ||
316 | beq 6f | ||
317 | b kvm_start_guest | ||
318 | 6: | ||
319 | #endif | ||
320 | |||
219 | REST_NVGPRS(r1) | 321 | REST_NVGPRS(r1) |
220 | REST_GPR(2, r1) | 322 | REST_GPR(2, r1) |
221 | ld r3,_CCR(r1) | 323 | ld r3,_CCR(r1) |
@@ -228,6 +330,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) | |||
228 | mtspr SPRN_SRR0,r5 | 330 | mtspr SPRN_SRR0,r5 |
229 | rfid | 331 | rfid |
230 | 332 | ||
333 | fastsleep_workaround_at_exit: | ||
334 | li r3,1 | ||
335 | li r4,0 | ||
336 | li r0,OPAL_CONFIG_CPU_IDLE_STATE | ||
337 | bl opal_call_realmode | ||
338 | b timebase_resync | ||
339 | |||
231 | /* | 340 | /* |
232 | * R3 here contains the value that will be returned to the caller | 341 | * R3 here contains the value that will be returned to the caller |
233 | * of power7_nap. | 342 | * of power7_nap. |