[ARM] 4582/2: Add support for the common VFP subarchitecture

This patch allows the VFP support code to run correctly on CPUs compatible with the common VFP subarchitecture specification (Appendix B in the ARM ARM v7-A and v7-R edition). It implements support for VFP subarchitecture 2 while being backwards compatible with subarchitecture 1. On VFP subarchitecture 1, the arithmetic exceptions are asynchronous (or imprecise as described in the old ARM ARM) unless the FPSCR.IXE bit is 1. The exceptional instructions can be read from FPINST and FPINST2 registers. With VFP subarchitecture 2, the arithmetic exceptions can also be synchronous and marked by the FPEXC.DEX bit (the FPEXC.EX bit is cleared). CPUs implementing the synchronous arithmetic exceptions don't have the FPINST and FPINST2 registers and accessing them would trigger and undefined exception. Note that FPEXC.EX bit has an additional meaning on subarchitecture 1 - if it isn't set, there is no additional information in FPINST and FPINST2 that needs to be saved at context switch or when lazy-loading the VFP state of a different thread. The patch also removes the clearing of the cumulative exception flags in FPSCR when additional exceptions were raised. It is up to the user application to clear these bits. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
author: Catalin Marinas <catalin.marinas@arm.com> 2007-11-22 12:32:01 -0500
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2008-01-26 09:41:28 -0500
commit: c98929c07a01c9ec2e1e5253456acc7168da8b66 (patch)
tree: 7d0014de51fe530b95bce7f74d9122229067f850 /arch/arm/vfp/vfpmodule.c
parent: 9b73e76f3cf63379dcf45fcd4f112f5812418d0a (diff)
1 files changed, 55 insertions, 43 deletions
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index b4e210df92f2..32455c633f1c 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -125,13 +125,13 @@ void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
        send_sig_info(SIGFPE, &info, current);
 }
-static void vfp_panic(char *reason)
+static void vfp_panic(char *reason, u32 inst)
 {
        int i;
        printk(KERN_ERR "VFP: Error: %s\n", reason);
        printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n",
-                fmrx(FPEXC), fmrx(FPSCR), fmrx(FPINST));
+                fmrx(FPEXC), fmrx(FPSCR), inst);
        for (i = 0; i < 32; i += 2)
                printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n",
                       i, vfp_get_float(i), i+1, vfp_get_float(i+1));
@@ -147,19 +147,16 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
        pr_debug("VFP: raising exceptions %08x\n", exceptions);
        if (exceptions == VFP_EXCEPTION_ERROR) {
-                vfp_panic("unhandled bounce");
+                vfp_panic("unhandled bounce", inst);
                vfp_raise_sigfpe(0, regs);
                return;
        }
        /*
-         * If any of the status flags are set, update the FPSCR.
+         * Update the FPSCR with the additional exception flags.
         * Comparison instructions always return at least one of
         * these flags set.
         */
-        if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
-                fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
        fpscr |= exceptions;
        fmxr(FPSCR, fpscr);
@@ -220,35 +217,64 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
 /*
 * Package up a bounce condition.
 */
-void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
+void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 {
-        u32 fpscr, orig_fpscr, exceptions, inst;
+        u32 fpscr, orig_fpscr, fpsid, exceptions;
        pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
        /*
-         * Enable access to the VFP so we can handle the bounce.
+         * At this point, FPEXC can have the following configuration:
+         *
+         *  EX DEX IXE
+         *  0   1   x   - synchronous exception
+         *  1   x   0   - asynchronous exception
+         *  1   x   1   - sychronous on VFP subarch 1 and asynchronous on later
+         *  0   0   1   - synchronous on VFP9 (non-standard subarch 1
+         *                implementation), undefined otherwise
+         *
+         * Clear various bits and enable access to the VFP so we can
+         * handle the bounce.
         */
-        fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_FPV2|FPEXC_INV|FPEXC_UFC|FPEXC_OFC|FPEXC_IOC));
+        fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK));
+        fpsid = fmrx(FPSID);
        orig_fpscr = fpscr = fmrx(FPSCR);
        /*
-         * If we are running with inexact exceptions enabled, we need to
+         * Check for the special VFP subarch 1 and FPSCR.IXE bit case
-         * emulate the trigger instruction.  Note that as we're emulating
-         * the trigger instruction, we need to increment PC.
         */
-        if (fpscr & FPSCR_IXE) {
+        if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT)
-                regs->ARM_pc += 4;
+            && (fpscr & FPSCR_IXE)) {
+                /*
+                 * Synchronous exception, emulate the trigger instruction
+                 */
                goto emulate;
        }
-        barrier();
+        if (fpexc & FPEXC_EX) {
+                /*
+                 * Asynchronous exception. The instruction is read from FPINST
+                 * and the interrupted instruction has to be restarted.
+                 */
+                trigger = fmrx(FPINST);
+                regs->ARM_pc -= 4;
+        } else if (!(fpexc & FPEXC_DEX)) {
+                /*
+                 * Illegal combination of bits. It can be caused by an
+                 * unallocated VFP instruction but with FPSCR.IXE set and not
+                 * on VFP subarch 1.
+                 */
+                 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
+                 return;
+        }
        /*
-         * Modify fpscr to indicate the number of iterations remaining
+         * Modify fpscr to indicate the number of iterations remaining.
+         * If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates
+         * whether FPEXC.VECITR or FPSCR.LEN is used.
         */
-        if (fpexc & FPEXC_EX) {
+        if (fpexc & (FPEXC_EX | FPEXC_VV)) {
                u32 len;
                len = fpexc + (1 << FPEXC_LENGTH_BIT);
@@ -262,15 +288,15 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
         * FPEXC bounce reason, but this appears to be unreliable.
         * Emulate the bounced instruction instead.
         */
-        inst = fmrx(FPINST);
+        exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
-        exceptions = vfp_emulate_instruction(inst, fpscr, regs);
        if (exceptions)
-                vfp_raise_exceptions(exceptions, inst, orig_fpscr, regs);
+                vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
        /*
-         * If there isn't a second FP instruction, exit now.
+         * If there isn't a second FP instruction, exit now. Note that
+         * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
         */
-        if (!(fpexc & FPEXC_FPV2))
+        if (fpexc ^ (FPEXC_EX | FPEXC_FP2V))
                return;
        /*
@@ -279,10 +305,9 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
         */
        barrier();
        trigger = fmrx(FPINST2);
-        orig_fpscr = fpscr = fmrx(FPSCR);
 emulate:
-        exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
+        exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
        if (exceptions)
                vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
 }
@@ -306,16 +331,9 @@ static int __init vfp_init(void)
 {
        unsigned int vfpsid;
        unsigned int cpu_arch = cpu_architecture();
-        u32 access = 0;
-        if (cpu_arch >= CPU_ARCH_ARMv6) {
+        if (cpu_arch >= CPU_ARCH_ARMv6)
-                access = get_copro_access();
+                vfp_enable(NULL);
-                /*
-                 * Enable full access to VFP (cp10 and cp11)
-                 */
-                set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11));
-        }
        /*
         * First check that there is a VFP that we can use.
@@ -329,15 +347,9 @@ static int __init vfp_init(void)
        vfp_vector = vfp_null_entry;
        printk(KERN_INFO "VFP support v0.3: ");
-        if (VFP_arch) {
+        if (VFP_arch)
                printk("not present\n");
+        else if (vfpsid & FPSID_NODOUBLE) {
-                /*
-                 * Restore the copro access register.
-                 */
-                if (cpu_arch >= CPU_ARCH_ARMv6)
-                        set_copro_access(access);
-        } else if (vfpsid & FPSID_NODOUBLE) {
                printk("no double precision support\n");
        } else {
                smp_call_function(vfp_enable, NULL, 1, 1);
author	Catalin Marinas <catalin.marinas@arm.com>	2007-11-22 12:32:01 -0500
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2008-01-26 09:41:28 -0500
commit	c98929c07a01c9ec2e1e5253456acc7168da8b66 (patch)
tree	7d0014de51fe530b95bce7f74d9122229067f850 /arch/arm/vfp/vfpmodule.c
parent	9b73e76f3cf63379dcf45fcd4f112f5812418d0a (diff)

diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index b4e210df92f2..32455c633f1c 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c
@@ -125,13 +125,13 @@ void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
125	send_sig_info(SIGFPE, &info, current);	125	send_sig_info(SIGFPE, &info, current);
126	}	126	}
127		127
128	static void vfp_panic(char *reason)	128	static void vfp_panic(char *reason, u32 inst)
129	{	129	{
130	int i;	130	int i;
131		131
132	printk(KERN_ERR "VFP: Error: %s\n", reason);	132	printk(KERN_ERR "VFP: Error: %s\n", reason);
133	printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n",	133	printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n",
134	fmrx(FPEXC), fmrx(FPSCR), fmrx(FPINST));	134	fmrx(FPEXC), fmrx(FPSCR), inst);
135	for (i = 0; i < 32; i += 2)	135	for (i = 0; i < 32; i += 2)
136	printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n",	136	printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n",
137	i, vfp_get_float(i), i+1, vfp_get_float(i+1));	137	i, vfp_get_float(i), i+1, vfp_get_float(i+1));
@@ -147,19 +147,16 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
147	pr_debug("VFP: raising exceptions %08x\n", exceptions);	147	pr_debug("VFP: raising exceptions %08x\n", exceptions);
148		148
149	if (exceptions == VFP_EXCEPTION_ERROR) {	149	if (exceptions == VFP_EXCEPTION_ERROR) {
150	vfp_panic("unhandled bounce");	150	vfp_panic("unhandled bounce", inst);
151	vfp_raise_sigfpe(0, regs);	151	vfp_raise_sigfpe(0, regs);
152	return;	152	return;
153	}	153	}
154		154
155	/*	155	/*
156	* If any of the status flags are set, update the FPSCR.	156	* Update the FPSCR with the additional exception flags.
157	* Comparison instructions always return at least one of	157	* Comparison instructions always return at least one of
158	* these flags set.	158	* these flags set.
159	*/	159	*/
160	if (exceptions & (FPSCR_N\|FPSCR_Z\|FPSCR_C\|FPSCR_V))
161	fpscr &= ~(FPSCR_N\|FPSCR_Z\|FPSCR_C\|FPSCR_V);
162
163	fpscr \|= exceptions;	160	fpscr \|= exceptions;
164		161
165	fmxr(FPSCR, fpscr);	162	fmxr(FPSCR, fpscr);
@@ -220,35 +217,64 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
220	/*	217	/*
221	* Package up a bounce condition.	218	* Package up a bounce condition.
222	*/	219	*/
223	void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)	220	void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
224	{	221	{
225	u32 fpscr, orig_fpscr, exceptions, inst;	222	u32 fpscr, orig_fpscr, fpsid, exceptions;
226		223
227	pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);	224	pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
228		225
229	/*	226	/*
230	* Enable access to the VFP so we can handle the bounce.	227	* At this point, FPEXC can have the following configuration:
		228	*
		229	* EX DEX IXE
		230	* 0 1 x - synchronous exception
		231	* 1 x 0 - asynchronous exception
		232	* 1 x 1 - sychronous on VFP subarch 1 and asynchronous on later
		233	* 0 0 1 - synchronous on VFP9 (non-standard subarch 1
		234	* implementation), undefined otherwise
		235	*
		236	* Clear various bits and enable access to the VFP so we can
		237	* handle the bounce.
231	*/	238	*/
232	fmxr(FPEXC, fpexc & ~(FPEXC_EX\|FPEXC_FPV2\|FPEXC_INV\|FPEXC_UFC\|FPEXC_OFC\|FPEXC_IOC));	239	fmxr(FPEXC, fpexc & ~(FPEXC_EX\|FPEXC_DEX\|FPEXC_FP2V\|FPEXC_VV\|FPEXC_TRAP_MASK));
233		240
		241	fpsid = fmrx(FPSID);
234	orig_fpscr = fpscr = fmrx(FPSCR);	242	orig_fpscr = fpscr = fmrx(FPSCR);
235		243
236	/*	244	/*
237	* If we are running with inexact exceptions enabled, we need to	245	* Check for the special VFP subarch 1 and FPSCR.IXE bit case
238	* emulate the trigger instruction. Note that as we're emulating
239	* the trigger instruction, we need to increment PC.
240	*/	246	*/
241	if (fpscr & FPSCR_IXE) {	247	if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT)
242	regs->ARM_pc += 4;	248	&& (fpscr & FPSCR_IXE)) {
		249	/*
		250	* Synchronous exception, emulate the trigger instruction
		251	*/
243	goto emulate;	252	goto emulate;
244	}	253	}
245		254
246	barrier();	255	if (fpexc & FPEXC_EX) {
		256	/*
		257	* Asynchronous exception. The instruction is read from FPINST
		258	* and the interrupted instruction has to be restarted.
		259	*/
		260	trigger = fmrx(FPINST);
		261	regs->ARM_pc -= 4;
		262	} else if (!(fpexc & FPEXC_DEX)) {
		263	/*
		264	* Illegal combination of bits. It can be caused by an
		265	* unallocated VFP instruction but with FPSCR.IXE set and not
		266	* on VFP subarch 1.
		267	*/
		268	vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
		269	return;
		270	}
247		271
248	/*	272	/*
249	* Modify fpscr to indicate the number of iterations remaining	273	* Modify fpscr to indicate the number of iterations remaining.
		274	* If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates
		275	* whether FPEXC.VECITR or FPSCR.LEN is used.
250	*/	276	*/
251	if (fpexc & FPEXC_EX) {	277	if (fpexc & (FPEXC_EX \| FPEXC_VV)) {
252	u32 len;	278	u32 len;
253		279
254	len = fpexc + (1 << FPEXC_LENGTH_BIT);	280	len = fpexc + (1 << FPEXC_LENGTH_BIT);
@@ -262,15 +288,15 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
262	* FPEXC bounce reason, but this appears to be unreliable.	288	* FPEXC bounce reason, but this appears to be unreliable.
263	* Emulate the bounced instruction instead.	289	* Emulate the bounced instruction instead.
264	*/	290	*/
265	inst = fmrx(FPINST);	291	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
266	exceptions = vfp_emulate_instruction(inst, fpscr, regs);
267	if (exceptions)	292	if (exceptions)
268	vfp_raise_exceptions(exceptions, inst, orig_fpscr, regs);	293	vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
269		294
270	/*	295	/*
271	* If there isn't a second FP instruction, exit now.	296	* If there isn't a second FP instruction, exit now. Note that
		297	* the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
272	*/	298	*/
273	if (!(fpexc & FPEXC_FPV2))	299	if (fpexc ^ (FPEXC_EX \| FPEXC_FP2V))
274	return;	300	return;
275		301
276	/*	302	/*
@@ -279,10 +305,9 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
279	*/	305	*/
280	barrier();	306	barrier();
281	trigger = fmrx(FPINST2);	307	trigger = fmrx(FPINST2);
282	orig_fpscr = fpscr = fmrx(FPSCR);
283		308
284	emulate:	309	emulate:
285	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);	310	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
286	if (exceptions)	311	if (exceptions)
287	vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);	312	vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
288	}	313	}
@@ -306,16 +331,9 @@ static int __init vfp_init(void)
306	{	331	{
307	unsigned int vfpsid;	332	unsigned int vfpsid;
308	unsigned int cpu_arch = cpu_architecture();	333	unsigned int cpu_arch = cpu_architecture();
309	u32 access = 0;
310		334
311	if (cpu_arch >= CPU_ARCH_ARMv6) {	335	if (cpu_arch >= CPU_ARCH_ARMv6)
312	access = get_copro_access();	336	vfp_enable(NULL);
313
314	/*
315	* Enable full access to VFP (cp10 and cp11)
316	*/
317	set_copro_access(access \| CPACC_FULL(10) \| CPACC_FULL(11));
318	}
319		337
320	/*	338	/*
321	* First check that there is a VFP that we can use.	339	* First check that there is a VFP that we can use.
@@ -329,15 +347,9 @@ static int __init vfp_init(void)
329	vfp_vector = vfp_null_entry;	347	vfp_vector = vfp_null_entry;
330		348
331	printk(KERN_INFO "VFP support v0.3: ");	349	printk(KERN_INFO "VFP support v0.3: ");
332	if (VFP_arch) {	350	if (VFP_arch)
333	printk("not present\n");	351	printk("not present\n");
334		352	else if (vfpsid & FPSID_NODOUBLE) {
335	/*
336	* Restore the copro access register.
337	*/
338	if (cpu_arch >= CPU_ARCH_ARMv6)
339	set_copro_access(access);
340	} else if (vfpsid & FPSID_NODOUBLE) {
341	printk("no double precision support\n");	353	printk("no double precision support\n");
342	} else {	354	} else {
343	smp_call_function(vfp_enable, NULL, 1, 1);	355	smp_call_function(vfp_enable, NULL, 1, 1);