aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCatalin Marinas <catalin.marinas@arm.com>2007-11-22 12:32:01 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2008-01-26 09:41:28 -0500
commitc98929c07a01c9ec2e1e5253456acc7168da8b66 (patch)
tree7d0014de51fe530b95bce7f74d9122229067f850
parent9b73e76f3cf63379dcf45fcd4f112f5812418d0a (diff)
[ARM] 4582/2: Add support for the common VFP subarchitecture
This patch allows the VFP support code to run correctly on CPUs compatible with the common VFP subarchitecture specification (Appendix B in the ARM ARM v7-A and v7-R edition). It implements support for VFP subarchitecture 2 while being backwards compatible with subarchitecture 1. On VFP subarchitecture 1, the arithmetic exceptions are asynchronous (or imprecise as described in the old ARM ARM) unless the FPSCR.IXE bit is 1. The exceptional instructions can be read from FPINST and FPINST2 registers. With VFP subarchitecture 2, the arithmetic exceptions can also be synchronous and marked by the FPEXC.DEX bit (the FPEXC.EX bit is cleared). CPUs implementing the synchronous arithmetic exceptions don't have the FPINST and FPINST2 registers and accessing them would trigger and undefined exception. Note that FPEXC.EX bit has an additional meaning on subarchitecture 1 - if it isn't set, there is no additional information in FPINST and FPINST2 that needs to be saved at context switch or when lazy-loading the VFP state of a different thread. The patch also removes the clearing of the cumulative exception flags in FPSCR when additional exceptions were raised. It is up to the user application to clear these bits. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/vfp/vfphw.S38
-rw-r--r--arch/arm/vfp/vfpmodule.c98
-rw-r--r--include/asm-arm/vfp.h30
3 files changed, 89 insertions, 77 deletions
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 0ac022f800a..53d9f8e8fac 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -100,10 +100,10 @@ vfp_support_entry:
100 cmp r4, #0 100 cmp r4, #0
101 beq no_old_VFP_process 101 beq no_old_VFP_process
102 VFPFMRX r5, FPSCR @ current status 102 VFPFMRX r5, FPSCR @ current status
103 VFPFMRX r6, FPINST @ FPINST (always there, rev0 onwards) 103 tst r1, #FPEXC_EX @ is there additional state to save?
104 tst r1, #FPEXC_FPV2 @ is there an FPINST2 to read? 104 VFPFMRX r6, FPINST, NE @ FPINST (only if FPEXC.EX is set)
105 VFPFMRX r8, FPINST2, NE @ FPINST2 if needed - avoids reading 105 tstne r1, #FPEXC_FP2V @ is there an FPINST2 to read?
106 @ nonexistant reg on rev0 106 VFPFMRX r8, FPINST2, NE @ FPINST2 if needed (and present)
107 VFPFSTMIA r4 @ save the working registers 107 VFPFSTMIA r4 @ save the working registers
108 stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 108 stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2
109 @ and point r4 at the word at the 109 @ and point r4 at the word at the
@@ -117,10 +117,10 @@ no_old_VFP_process:
117 VFPFLDMIA r10 @ reload the working registers while 117 VFPFLDMIA r10 @ reload the working registers while
118 @ FPEXC is in a safe state 118 @ FPEXC is in a safe state
119 ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 119 ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2
120 tst r1, #FPEXC_FPV2 @ is there an FPINST2 to write? 120 tst r1, #FPEXC_EX @ is there additional state to restore?
121 VFPFMXR FPINST2, r8, NE @ FPINST2 if needed - avoids writing 121 VFPFMXR FPINST, r6, NE @ restore FPINST (only if FPEXC.EX is set)
122 @ nonexistant reg on rev0 122 tstne r1, #FPEXC_FP2V @ is there an FPINST2 to write?
123 VFPFMXR FPINST, r6 123 VFPFMXR FPINST2, r8, NE @ FPINST2 if needed (and present)
124 VFPFMXR FPSCR, r5 @ restore status 124 VFPFMXR FPSCR, r5 @ restore status
125 125
126check_for_exception: 126check_for_exception:
@@ -136,10 +136,14 @@ check_for_exception:
136 136
137 137
138look_for_VFP_exceptions: 138look_for_VFP_exceptions:
139 tst r1, #FPEXC_EX 139 @ Check for synchronous or asynchronous exception
140 tst r1, #FPEXC_EX | FPEXC_DEX
140 bne process_exception 141 bne process_exception
142 @ On some implementations of the VFP subarch 1, setting FPSCR.IXE
143 @ causes all the CDP instructions to be bounced synchronously without
144 @ setting the FPEXC.EX bit
141 VFPFMRX r5, FPSCR 145 VFPFMRX r5, FPSCR
142 tst r5, #FPSCR_IXE @ IXE doesn't set FPEXC_EX ! 146 tst r5, #FPSCR_IXE
143 bne process_exception 147 bne process_exception
144 148
145 @ Fall into hand on to next handler - appropriate coproc instr 149 @ Fall into hand on to next handler - appropriate coproc instr
@@ -150,10 +154,6 @@ look_for_VFP_exceptions:
150 154
151process_exception: 155process_exception:
152 DBGSTR "bounce" 156 DBGSTR "bounce"
153 sub r2, r2, #4
154 str r2, [sp, #S_PC] @ retry the instruction on exit from
155 @ the imprecise exception handling in
156 @ the support code
157 mov r2, sp @ nothing stacked - regdump is at TOS 157 mov r2, sp @ nothing stacked - regdump is at TOS
158 mov lr, r9 @ setup for a return to the user code. 158 mov lr, r9 @ setup for a return to the user code.
159 159
@@ -161,7 +161,7 @@ process_exception:
161 @ r0 holds the trigger instruction 161 @ r0 holds the trigger instruction
162 @ r1 holds the FPEXC value 162 @ r1 holds the FPEXC value
163 @ r2 pointer to register dump 163 @ r2 pointer to register dump
164 b VFP9_bounce @ we have handled this - the support 164 b VFP_bounce @ we have handled this - the support
165 @ code will raise an exception if 165 @ code will raise an exception if
166 @ required. If not, the user code will 166 @ required. If not, the user code will
167 @ retry the faulted instruction 167 @ retry the faulted instruction
@@ -175,10 +175,10 @@ vfp_save_state:
175 @ r1 - FPEXC 175 @ r1 - FPEXC
176 DBGSTR1 "save VFP state %p", r0 176 DBGSTR1 "save VFP state %p", r0
177 VFPFMRX r2, FPSCR @ current status 177 VFPFMRX r2, FPSCR @ current status
178 VFPFMRX r3, FPINST @ FPINST (always there, rev0 onwards) 178 tst r1, #FPEXC_EX @ is there additional state to save?
179 tst r1, #FPEXC_FPV2 @ is there an FPINST2 to read? 179 VFPFMRX r3, FPINST, NE @ FPINST (only if FPEXC.EX is set)
180 VFPFMRX r12, FPINST2, NE @ FPINST2 if needed - avoids reading 180 tstne r1, #FPEXC_FP2V @ is there an FPINST2 to read?
181 @ nonexistant reg on rev0 181 VFPFMRX r12, FPINST2, NE @ FPINST2 if needed (and present)
182 VFPFSTMIA r0 @ save the working registers 182 VFPFSTMIA r0 @ save the working registers
183 stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 183 stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2
184 mov pc, lr 184 mov pc, lr
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index b4e210df92f..32455c633f1 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -125,13 +125,13 @@ void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
125 send_sig_info(SIGFPE, &info, current); 125 send_sig_info(SIGFPE, &info, current);
126} 126}
127 127
128static void vfp_panic(char *reason) 128static void vfp_panic(char *reason, u32 inst)
129{ 129{
130 int i; 130 int i;
131 131
132 printk(KERN_ERR "VFP: Error: %s\n", reason); 132 printk(KERN_ERR "VFP: Error: %s\n", reason);
133 printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n", 133 printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n",
134 fmrx(FPEXC), fmrx(FPSCR), fmrx(FPINST)); 134 fmrx(FPEXC), fmrx(FPSCR), inst);
135 for (i = 0; i < 32; i += 2) 135 for (i = 0; i < 32; i += 2)
136 printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n", 136 printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n",
137 i, vfp_get_float(i), i+1, vfp_get_float(i+1)); 137 i, vfp_get_float(i), i+1, vfp_get_float(i+1));
@@ -147,19 +147,16 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
147 pr_debug("VFP: raising exceptions %08x\n", exceptions); 147 pr_debug("VFP: raising exceptions %08x\n", exceptions);
148 148
149 if (exceptions == VFP_EXCEPTION_ERROR) { 149 if (exceptions == VFP_EXCEPTION_ERROR) {
150 vfp_panic("unhandled bounce"); 150 vfp_panic("unhandled bounce", inst);
151 vfp_raise_sigfpe(0, regs); 151 vfp_raise_sigfpe(0, regs);
152 return; 152 return;
153 } 153 }
154 154
155 /* 155 /*
156 * If any of the status flags are set, update the FPSCR. 156 * Update the FPSCR with the additional exception flags.
157 * Comparison instructions always return at least one of 157 * Comparison instructions always return at least one of
158 * these flags set. 158 * these flags set.
159 */ 159 */
160 if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
161 fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
162
163 fpscr |= exceptions; 160 fpscr |= exceptions;
164 161
165 fmxr(FPSCR, fpscr); 162 fmxr(FPSCR, fpscr);
@@ -220,35 +217,64 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
220/* 217/*
221 * Package up a bounce condition. 218 * Package up a bounce condition.
222 */ 219 */
223void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) 220void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
224{ 221{
225 u32 fpscr, orig_fpscr, exceptions, inst; 222 u32 fpscr, orig_fpscr, fpsid, exceptions;
226 223
227 pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); 224 pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
228 225
229 /* 226 /*
230 * Enable access to the VFP so we can handle the bounce. 227 * At this point, FPEXC can have the following configuration:
228 *
229 * EX DEX IXE
230 * 0 1 x - synchronous exception
231 * 1 x 0 - asynchronous exception
232 * 1 x 1 - sychronous on VFP subarch 1 and asynchronous on later
233 * 0 0 1 - synchronous on VFP9 (non-standard subarch 1
234 * implementation), undefined otherwise
235 *
236 * Clear various bits and enable access to the VFP so we can
237 * handle the bounce.
231 */ 238 */
232 fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_FPV2|FPEXC_INV|FPEXC_UFC|FPEXC_OFC|FPEXC_IOC)); 239 fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK));
233 240
241 fpsid = fmrx(FPSID);
234 orig_fpscr = fpscr = fmrx(FPSCR); 242 orig_fpscr = fpscr = fmrx(FPSCR);
235 243
236 /* 244 /*
237 * If we are running with inexact exceptions enabled, we need to 245 * Check for the special VFP subarch 1 and FPSCR.IXE bit case
238 * emulate the trigger instruction. Note that as we're emulating
239 * the trigger instruction, we need to increment PC.
240 */ 246 */
241 if (fpscr & FPSCR_IXE) { 247 if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT)
242 regs->ARM_pc += 4; 248 && (fpscr & FPSCR_IXE)) {
249 /*
250 * Synchronous exception, emulate the trigger instruction
251 */
243 goto emulate; 252 goto emulate;
244 } 253 }
245 254
246 barrier(); 255 if (fpexc & FPEXC_EX) {
256 /*
257 * Asynchronous exception. The instruction is read from FPINST
258 * and the interrupted instruction has to be restarted.
259 */
260 trigger = fmrx(FPINST);
261 regs->ARM_pc -= 4;
262 } else if (!(fpexc & FPEXC_DEX)) {
263 /*
264 * Illegal combination of bits. It can be caused by an
265 * unallocated VFP instruction but with FPSCR.IXE set and not
266 * on VFP subarch 1.
267 */
268 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
269 return;
270 }
247 271
248 /* 272 /*
249 * Modify fpscr to indicate the number of iterations remaining 273 * Modify fpscr to indicate the number of iterations remaining.
274 * If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates
275 * whether FPEXC.VECITR or FPSCR.LEN is used.
250 */ 276 */
251 if (fpexc & FPEXC_EX) { 277 if (fpexc & (FPEXC_EX | FPEXC_VV)) {
252 u32 len; 278 u32 len;
253 279
254 len = fpexc + (1 << FPEXC_LENGTH_BIT); 280 len = fpexc + (1 << FPEXC_LENGTH_BIT);
@@ -262,15 +288,15 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
262 * FPEXC bounce reason, but this appears to be unreliable. 288 * FPEXC bounce reason, but this appears to be unreliable.
263 * Emulate the bounced instruction instead. 289 * Emulate the bounced instruction instead.
264 */ 290 */
265 inst = fmrx(FPINST); 291 exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
266 exceptions = vfp_emulate_instruction(inst, fpscr, regs);
267 if (exceptions) 292 if (exceptions)
268 vfp_raise_exceptions(exceptions, inst, orig_fpscr, regs); 293 vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
269 294
270 /* 295 /*
271 * If there isn't a second FP instruction, exit now. 296 * If there isn't a second FP instruction, exit now. Note that
297 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
272 */ 298 */
273 if (!(fpexc & FPEXC_FPV2)) 299 if (fpexc ^ (FPEXC_EX | FPEXC_FP2V))
274 return; 300 return;
275 301
276 /* 302 /*
@@ -279,10 +305,9 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
279 */ 305 */
280 barrier(); 306 barrier();
281 trigger = fmrx(FPINST2); 307 trigger = fmrx(FPINST2);
282 orig_fpscr = fpscr = fmrx(FPSCR);
283 308
284 emulate: 309 emulate:
285 exceptions = vfp_emulate_instruction(trigger, fpscr, regs); 310 exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
286 if (exceptions) 311 if (exceptions)
287 vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); 312 vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
288} 313}
@@ -306,16 +331,9 @@ static int __init vfp_init(void)
306{ 331{
307 unsigned int vfpsid; 332 unsigned int vfpsid;
308 unsigned int cpu_arch = cpu_architecture(); 333 unsigned int cpu_arch = cpu_architecture();
309 u32 access = 0;
310 334
311 if (cpu_arch >= CPU_ARCH_ARMv6) { 335 if (cpu_arch >= CPU_ARCH_ARMv6)
312 access = get_copro_access(); 336 vfp_enable(NULL);
313
314 /*
315 * Enable full access to VFP (cp10 and cp11)
316 */
317 set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11));
318 }
319 337
320 /* 338 /*
321 * First check that there is a VFP that we can use. 339 * First check that there is a VFP that we can use.
@@ -329,15 +347,9 @@ static int __init vfp_init(void)
329 vfp_vector = vfp_null_entry; 347 vfp_vector = vfp_null_entry;
330 348
331 printk(KERN_INFO "VFP support v0.3: "); 349 printk(KERN_INFO "VFP support v0.3: ");
332 if (VFP_arch) { 350 if (VFP_arch)
333 printk("not present\n"); 351 printk("not present\n");
334 352 else if (vfpsid & FPSID_NODOUBLE) {
335 /*
336 * Restore the copro access register.
337 */
338 if (cpu_arch >= CPU_ARCH_ARMv6)
339 set_copro_access(access);
340 } else if (vfpsid & FPSID_NODOUBLE) {
341 printk("no double precision support\n"); 353 printk("no double precision support\n");
342 } else { 354 } else {
343 smp_call_function(vfp_enable, NULL, 1, 1); 355 smp_call_function(vfp_enable, NULL, 1, 1);
diff --git a/include/asm-arm/vfp.h b/include/asm-arm/vfp.h
index bd6be9d7f77..9d474d47b26 100644
--- a/include/asm-arm/vfp.h
+++ b/include/asm-arm/vfp.h
@@ -8,6 +8,8 @@
8#define FPSID cr0 8#define FPSID cr0
9#define FPSCR cr1 9#define FPSCR cr1
10#define FPEXC cr8 10#define FPEXC cr8
11#define FPINST cr9
12#define FPINST2 cr10
11 13
12/* FPSID bits */ 14/* FPSID bits */
13#define FPSID_IMPLEMENTER_BIT (24) 15#define FPSID_IMPLEMENTER_BIT (24)
@@ -28,6 +30,19 @@
28/* FPEXC bits */ 30/* FPEXC bits */
29#define FPEXC_EX (1 << 31) 31#define FPEXC_EX (1 << 31)
30#define FPEXC_EN (1 << 30) 32#define FPEXC_EN (1 << 30)
33#define FPEXC_DEX (1 << 29)
34#define FPEXC_FP2V (1 << 28)
35#define FPEXC_VV (1 << 27)
36#define FPEXC_TFV (1 << 26)
37#define FPEXC_LENGTH_BIT (8)
38#define FPEXC_LENGTH_MASK (7 << FPEXC_LENGTH_BIT)
39#define FPEXC_IDF (1 << 7)
40#define FPEXC_IXF (1 << 4)
41#define FPEXC_UFF (1 << 3)
42#define FPEXC_OFF (1 << 2)
43#define FPEXC_DZF (1 << 1)
44#define FPEXC_IOF (1 << 0)
45#define FPEXC_TRAP_MASK (FPEXC_IDF|FPEXC_IXF|FPEXC_UFF|FPEXC_OFF|FPEXC_DZF|FPEXC_IOF)
31 46
32/* FPSCR bits */ 47/* FPSCR bits */
33#define FPSCR_DEFAULT_NAN (1<<25) 48#define FPSCR_DEFAULT_NAN (1<<25)
@@ -55,21 +70,6 @@
55#define FPSCR_IXC (1<<4) 70#define FPSCR_IXC (1<<4)
56#define FPSCR_IDC (1<<7) 71#define FPSCR_IDC (1<<7)
57 72
58/*
59 * VFP9-S specific.
60 */
61#define FPINST cr9
62#define FPINST2 cr10
63
64/* FPEXC bits */
65#define FPEXC_FPV2 (1<<28)
66#define FPEXC_LENGTH_BIT (8)
67#define FPEXC_LENGTH_MASK (7 << FPEXC_LENGTH_BIT)
68#define FPEXC_INV (1 << 7)
69#define FPEXC_UFC (1 << 3)
70#define FPEXC_OFC (1 << 2)
71#define FPEXC_IOC (1 << 0)
72
73/* Bit patterns for decoding the packaged operation descriptors */ 73/* Bit patterns for decoding the packaged operation descriptors */
74#define VFPOPDESC_LENGTH_BIT (9) 74#define VFPOPDESC_LENGTH_BIT (9)
75#define VFPOPDESC_LENGTH_MASK (0x07 << VFPOPDESC_LENGTH_BIT) 75#define VFPOPDESC_LENGTH_MASK (0x07 << VFPOPDESC_LENGTH_BIT)