diff options
author | Catalin Marinas <catalin.marinas@arm.com> | 2007-11-22 12:32:01 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2008-01-26 09:41:28 -0500 |
commit | c98929c07a01c9ec2e1e5253456acc7168da8b66 (patch) | |
tree | 7d0014de51fe530b95bce7f74d9122229067f850 /arch/arm | |
parent | 9b73e76f3cf63379dcf45fcd4f112f5812418d0a (diff) |
[ARM] 4582/2: Add support for the common VFP subarchitecture
This patch allows the VFP support code to run correctly on CPUs
compatible with the common VFP subarchitecture specification (Appendix
B in the ARM ARM v7-A and v7-R edition). It implements support for VFP
subarchitecture 2 while being backwards compatible with
subarchitecture 1.
On VFP subarchitecture 1, the arithmetic exceptions are asynchronous
(or imprecise as described in the old ARM ARM) unless the FPSCR.IXE
bit is 1. The exceptional instructions can be read from FPINST and
FPINST2 registers. With VFP subarchitecture 2, the arithmetic
exceptions can also be synchronous and marked by the FPEXC.DEX bit
(the FPEXC.EX bit is cleared). CPUs implementing the synchronous
arithmetic exceptions don't have the FPINST and FPINST2 registers and
accessing them would trigger and undefined exception.
Note that FPEXC.EX bit has an additional meaning on subarchitecture 1
- if it isn't set, there is no additional information in FPINST and
FPINST2 that needs to be saved at context switch or when lazy-loading
the VFP state of a different thread.
The patch also removes the clearing of the cumulative exception flags in
FPSCR when additional exceptions were raised. It is up to the user
application to clear these bits.
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/vfp/vfphw.S | 38 | ||||
-rw-r--r-- | arch/arm/vfp/vfpmodule.c | 98 |
2 files changed, 74 insertions, 62 deletions
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index 0ac022f800a1..53d9f8e8fac3 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S | |||
@@ -100,10 +100,10 @@ vfp_support_entry: | |||
100 | cmp r4, #0 | 100 | cmp r4, #0 |
101 | beq no_old_VFP_process | 101 | beq no_old_VFP_process |
102 | VFPFMRX r5, FPSCR @ current status | 102 | VFPFMRX r5, FPSCR @ current status |
103 | VFPFMRX r6, FPINST @ FPINST (always there, rev0 onwards) | 103 | tst r1, #FPEXC_EX @ is there additional state to save? |
104 | tst r1, #FPEXC_FPV2 @ is there an FPINST2 to read? | 104 | VFPFMRX r6, FPINST, NE @ FPINST (only if FPEXC.EX is set) |
105 | VFPFMRX r8, FPINST2, NE @ FPINST2 if needed - avoids reading | 105 | tstne r1, #FPEXC_FP2V @ is there an FPINST2 to read? |
106 | @ nonexistant reg on rev0 | 106 | VFPFMRX r8, FPINST2, NE @ FPINST2 if needed (and present) |
107 | VFPFSTMIA r4 @ save the working registers | 107 | VFPFSTMIA r4 @ save the working registers |
108 | stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 | 108 | stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 |
109 | @ and point r4 at the word at the | 109 | @ and point r4 at the word at the |
@@ -117,10 +117,10 @@ no_old_VFP_process: | |||
117 | VFPFLDMIA r10 @ reload the working registers while | 117 | VFPFLDMIA r10 @ reload the working registers while |
118 | @ FPEXC is in a safe state | 118 | @ FPEXC is in a safe state |
119 | ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 | 119 | ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 |
120 | tst r1, #FPEXC_FPV2 @ is there an FPINST2 to write? | 120 | tst r1, #FPEXC_EX @ is there additional state to restore? |
121 | VFPFMXR FPINST2, r8, NE @ FPINST2 if needed - avoids writing | 121 | VFPFMXR FPINST, r6, NE @ restore FPINST (only if FPEXC.EX is set) |
122 | @ nonexistant reg on rev0 | 122 | tstne r1, #FPEXC_FP2V @ is there an FPINST2 to write? |
123 | VFPFMXR FPINST, r6 | 123 | VFPFMXR FPINST2, r8, NE @ FPINST2 if needed (and present) |
124 | VFPFMXR FPSCR, r5 @ restore status | 124 | VFPFMXR FPSCR, r5 @ restore status |
125 | 125 | ||
126 | check_for_exception: | 126 | check_for_exception: |
@@ -136,10 +136,14 @@ check_for_exception: | |||
136 | 136 | ||
137 | 137 | ||
138 | look_for_VFP_exceptions: | 138 | look_for_VFP_exceptions: |
139 | tst r1, #FPEXC_EX | 139 | @ Check for synchronous or asynchronous exception |
140 | tst r1, #FPEXC_EX | FPEXC_DEX | ||
140 | bne process_exception | 141 | bne process_exception |
142 | @ On some implementations of the VFP subarch 1, setting FPSCR.IXE | ||
143 | @ causes all the CDP instructions to be bounced synchronously without | ||
144 | @ setting the FPEXC.EX bit | ||
141 | VFPFMRX r5, FPSCR | 145 | VFPFMRX r5, FPSCR |
142 | tst r5, #FPSCR_IXE @ IXE doesn't set FPEXC_EX ! | 146 | tst r5, #FPSCR_IXE |
143 | bne process_exception | 147 | bne process_exception |
144 | 148 | ||
145 | @ Fall into hand on to next handler - appropriate coproc instr | 149 | @ Fall into hand on to next handler - appropriate coproc instr |
@@ -150,10 +154,6 @@ look_for_VFP_exceptions: | |||
150 | 154 | ||
151 | process_exception: | 155 | process_exception: |
152 | DBGSTR "bounce" | 156 | DBGSTR "bounce" |
153 | sub r2, r2, #4 | ||
154 | str r2, [sp, #S_PC] @ retry the instruction on exit from | ||
155 | @ the imprecise exception handling in | ||
156 | @ the support code | ||
157 | mov r2, sp @ nothing stacked - regdump is at TOS | 157 | mov r2, sp @ nothing stacked - regdump is at TOS |
158 | mov lr, r9 @ setup for a return to the user code. | 158 | mov lr, r9 @ setup for a return to the user code. |
159 | 159 | ||
@@ -161,7 +161,7 @@ process_exception: | |||
161 | @ r0 holds the trigger instruction | 161 | @ r0 holds the trigger instruction |
162 | @ r1 holds the FPEXC value | 162 | @ r1 holds the FPEXC value |
163 | @ r2 pointer to register dump | 163 | @ r2 pointer to register dump |
164 | b VFP9_bounce @ we have handled this - the support | 164 | b VFP_bounce @ we have handled this - the support |
165 | @ code will raise an exception if | 165 | @ code will raise an exception if |
166 | @ required. If not, the user code will | 166 | @ required. If not, the user code will |
167 | @ retry the faulted instruction | 167 | @ retry the faulted instruction |
@@ -175,10 +175,10 @@ vfp_save_state: | |||
175 | @ r1 - FPEXC | 175 | @ r1 - FPEXC |
176 | DBGSTR1 "save VFP state %p", r0 | 176 | DBGSTR1 "save VFP state %p", r0 |
177 | VFPFMRX r2, FPSCR @ current status | 177 | VFPFMRX r2, FPSCR @ current status |
178 | VFPFMRX r3, FPINST @ FPINST (always there, rev0 onwards) | 178 | tst r1, #FPEXC_EX @ is there additional state to save? |
179 | tst r1, #FPEXC_FPV2 @ is there an FPINST2 to read? | 179 | VFPFMRX r3, FPINST, NE @ FPINST (only if FPEXC.EX is set) |
180 | VFPFMRX r12, FPINST2, NE @ FPINST2 if needed - avoids reading | 180 | tstne r1, #FPEXC_FP2V @ is there an FPINST2 to read? |
181 | @ nonexistant reg on rev0 | 181 | VFPFMRX r12, FPINST2, NE @ FPINST2 if needed (and present) |
182 | VFPFSTMIA r0 @ save the working registers | 182 | VFPFSTMIA r0 @ save the working registers |
183 | stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 | 183 | stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 |
184 | mov pc, lr | 184 | mov pc, lr |
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index b4e210df92f2..32455c633f1c 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c | |||
@@ -125,13 +125,13 @@ void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) | |||
125 | send_sig_info(SIGFPE, &info, current); | 125 | send_sig_info(SIGFPE, &info, current); |
126 | } | 126 | } |
127 | 127 | ||
128 | static void vfp_panic(char *reason) | 128 | static void vfp_panic(char *reason, u32 inst) |
129 | { | 129 | { |
130 | int i; | 130 | int i; |
131 | 131 | ||
132 | printk(KERN_ERR "VFP: Error: %s\n", reason); | 132 | printk(KERN_ERR "VFP: Error: %s\n", reason); |
133 | printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n", | 133 | printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n", |
134 | fmrx(FPEXC), fmrx(FPSCR), fmrx(FPINST)); | 134 | fmrx(FPEXC), fmrx(FPSCR), inst); |
135 | for (i = 0; i < 32; i += 2) | 135 | for (i = 0; i < 32; i += 2) |
136 | printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n", | 136 | printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n", |
137 | i, vfp_get_float(i), i+1, vfp_get_float(i+1)); | 137 | i, vfp_get_float(i), i+1, vfp_get_float(i+1)); |
@@ -147,19 +147,16 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ | |||
147 | pr_debug("VFP: raising exceptions %08x\n", exceptions); | 147 | pr_debug("VFP: raising exceptions %08x\n", exceptions); |
148 | 148 | ||
149 | if (exceptions == VFP_EXCEPTION_ERROR) { | 149 | if (exceptions == VFP_EXCEPTION_ERROR) { |
150 | vfp_panic("unhandled bounce"); | 150 | vfp_panic("unhandled bounce", inst); |
151 | vfp_raise_sigfpe(0, regs); | 151 | vfp_raise_sigfpe(0, regs); |
152 | return; | 152 | return; |
153 | } | 153 | } |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * If any of the status flags are set, update the FPSCR. | 156 | * Update the FPSCR with the additional exception flags. |
157 | * Comparison instructions always return at least one of | 157 | * Comparison instructions always return at least one of |
158 | * these flags set. | 158 | * these flags set. |
159 | */ | 159 | */ |
160 | if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | ||
161 | fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V); | ||
162 | |||
163 | fpscr |= exceptions; | 160 | fpscr |= exceptions; |
164 | 161 | ||
165 | fmxr(FPSCR, fpscr); | 162 | fmxr(FPSCR, fpscr); |
@@ -220,35 +217,64 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) | |||
220 | /* | 217 | /* |
221 | * Package up a bounce condition. | 218 | * Package up a bounce condition. |
222 | */ | 219 | */ |
223 | void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) | 220 | void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) |
224 | { | 221 | { |
225 | u32 fpscr, orig_fpscr, exceptions, inst; | 222 | u32 fpscr, orig_fpscr, fpsid, exceptions; |
226 | 223 | ||
227 | pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); | 224 | pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); |
228 | 225 | ||
229 | /* | 226 | /* |
230 | * Enable access to the VFP so we can handle the bounce. | 227 | * At this point, FPEXC can have the following configuration: |
228 | * | ||
229 | * EX DEX IXE | ||
230 | * 0 1 x - synchronous exception | ||
231 | * 1 x 0 - asynchronous exception | ||
232 | * 1 x 1 - sychronous on VFP subarch 1 and asynchronous on later | ||
233 | * 0 0 1 - synchronous on VFP9 (non-standard subarch 1 | ||
234 | * implementation), undefined otherwise | ||
235 | * | ||
236 | * Clear various bits and enable access to the VFP so we can | ||
237 | * handle the bounce. | ||
231 | */ | 238 | */ |
232 | fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_FPV2|FPEXC_INV|FPEXC_UFC|FPEXC_OFC|FPEXC_IOC)); | 239 | fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK)); |
233 | 240 | ||
241 | fpsid = fmrx(FPSID); | ||
234 | orig_fpscr = fpscr = fmrx(FPSCR); | 242 | orig_fpscr = fpscr = fmrx(FPSCR); |
235 | 243 | ||
236 | /* | 244 | /* |
237 | * If we are running with inexact exceptions enabled, we need to | 245 | * Check for the special VFP subarch 1 and FPSCR.IXE bit case |
238 | * emulate the trigger instruction. Note that as we're emulating | ||
239 | * the trigger instruction, we need to increment PC. | ||
240 | */ | 246 | */ |
241 | if (fpscr & FPSCR_IXE) { | 247 | if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT) |
242 | regs->ARM_pc += 4; | 248 | && (fpscr & FPSCR_IXE)) { |
249 | /* | ||
250 | * Synchronous exception, emulate the trigger instruction | ||
251 | */ | ||
243 | goto emulate; | 252 | goto emulate; |
244 | } | 253 | } |
245 | 254 | ||
246 | barrier(); | 255 | if (fpexc & FPEXC_EX) { |
256 | /* | ||
257 | * Asynchronous exception. The instruction is read from FPINST | ||
258 | * and the interrupted instruction has to be restarted. | ||
259 | */ | ||
260 | trigger = fmrx(FPINST); | ||
261 | regs->ARM_pc -= 4; | ||
262 | } else if (!(fpexc & FPEXC_DEX)) { | ||
263 | /* | ||
264 | * Illegal combination of bits. It can be caused by an | ||
265 | * unallocated VFP instruction but with FPSCR.IXE set and not | ||
266 | * on VFP subarch 1. | ||
267 | */ | ||
268 | vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); | ||
269 | return; | ||
270 | } | ||
247 | 271 | ||
248 | /* | 272 | /* |
249 | * Modify fpscr to indicate the number of iterations remaining | 273 | * Modify fpscr to indicate the number of iterations remaining. |
274 | * If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates | ||
275 | * whether FPEXC.VECITR or FPSCR.LEN is used. | ||
250 | */ | 276 | */ |
251 | if (fpexc & FPEXC_EX) { | 277 | if (fpexc & (FPEXC_EX | FPEXC_VV)) { |
252 | u32 len; | 278 | u32 len; |
253 | 279 | ||
254 | len = fpexc + (1 << FPEXC_LENGTH_BIT); | 280 | len = fpexc + (1 << FPEXC_LENGTH_BIT); |
@@ -262,15 +288,15 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) | |||
262 | * FPEXC bounce reason, but this appears to be unreliable. | 288 | * FPEXC bounce reason, but this appears to be unreliable. |
263 | * Emulate the bounced instruction instead. | 289 | * Emulate the bounced instruction instead. |
264 | */ | 290 | */ |
265 | inst = fmrx(FPINST); | 291 | exceptions = vfp_emulate_instruction(trigger, fpscr, regs); |
266 | exceptions = vfp_emulate_instruction(inst, fpscr, regs); | ||
267 | if (exceptions) | 292 | if (exceptions) |
268 | vfp_raise_exceptions(exceptions, inst, orig_fpscr, regs); | 293 | vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); |
269 | 294 | ||
270 | /* | 295 | /* |
271 | * If there isn't a second FP instruction, exit now. | 296 | * If there isn't a second FP instruction, exit now. Note that |
297 | * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. | ||
272 | */ | 298 | */ |
273 | if (!(fpexc & FPEXC_FPV2)) | 299 | if (fpexc ^ (FPEXC_EX | FPEXC_FP2V)) |
274 | return; | 300 | return; |
275 | 301 | ||
276 | /* | 302 | /* |
@@ -279,10 +305,9 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) | |||
279 | */ | 305 | */ |
280 | barrier(); | 306 | barrier(); |
281 | trigger = fmrx(FPINST2); | 307 | trigger = fmrx(FPINST2); |
282 | orig_fpscr = fpscr = fmrx(FPSCR); | ||
283 | 308 | ||
284 | emulate: | 309 | emulate: |
285 | exceptions = vfp_emulate_instruction(trigger, fpscr, regs); | 310 | exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); |
286 | if (exceptions) | 311 | if (exceptions) |
287 | vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); | 312 | vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); |
288 | } | 313 | } |
@@ -306,16 +331,9 @@ static int __init vfp_init(void) | |||
306 | { | 331 | { |
307 | unsigned int vfpsid; | 332 | unsigned int vfpsid; |
308 | unsigned int cpu_arch = cpu_architecture(); | 333 | unsigned int cpu_arch = cpu_architecture(); |
309 | u32 access = 0; | ||
310 | 334 | ||
311 | if (cpu_arch >= CPU_ARCH_ARMv6) { | 335 | if (cpu_arch >= CPU_ARCH_ARMv6) |
312 | access = get_copro_access(); | 336 | vfp_enable(NULL); |
313 | |||
314 | /* | ||
315 | * Enable full access to VFP (cp10 and cp11) | ||
316 | */ | ||
317 | set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11)); | ||
318 | } | ||
319 | 337 | ||
320 | /* | 338 | /* |
321 | * First check that there is a VFP that we can use. | 339 | * First check that there is a VFP that we can use. |
@@ -329,15 +347,9 @@ static int __init vfp_init(void) | |||
329 | vfp_vector = vfp_null_entry; | 347 | vfp_vector = vfp_null_entry; |
330 | 348 | ||
331 | printk(KERN_INFO "VFP support v0.3: "); | 349 | printk(KERN_INFO "VFP support v0.3: "); |
332 | if (VFP_arch) { | 350 | if (VFP_arch) |
333 | printk("not present\n"); | 351 | printk("not present\n"); |
334 | 352 | else if (vfpsid & FPSID_NODOUBLE) { | |
335 | /* | ||
336 | * Restore the copro access register. | ||
337 | */ | ||
338 | if (cpu_arch >= CPU_ARCH_ARMv6) | ||
339 | set_copro_access(access); | ||
340 | } else if (vfpsid & FPSID_NODOUBLE) { | ||
341 | printk("no double precision support\n"); | 353 | printk("no double precision support\n"); |
342 | } else { | 354 | } else { |
343 | smp_call_function(vfp_enable, NULL, 1, 1); | 355 | smp_call_function(vfp_enable, NULL, 1, 1); |