aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2013-12-10 18:07:45 -0500
committerScott Wood <scottwood@freescale.com>2014-01-07 19:32:21 -0500
commit640e922501103aaf2e0abb4cf4de5d49fa8342f7 (patch)
tree4aeb43629f6537c620ac72f39a202d6b334ec8a1
parent228b1a473037c89d524e03a569c688a22241b4ea (diff)
powerpc: fix exception clearing in e500 SPE float emulation
The e500 SPE floating-point emulation code clears existing exceptions (__FPU_FPSCR &= ~FP_EX_MASK;) before ORing in the exceptions from the emulated operation. However, these exception bits are the "sticky", cumulative exception bits, and should only be cleared by the user program setting SPEFSCR, not implicitly by any floating-point instruction (whether executed purely by the hardware or emulated). The spurious clearing of these bits shows up as missing exceptions in glibc testing. Fixing this, however, is not as simple as just not clearing the bits, because while the bits may be from previous floating-point operations (in which case they should not be cleared), the processor can also set the sticky bits itself before the interrupt for an exception occurs, and this can happen in cases when IEEE 754 semantics are that the sticky bit should not be set. Specifically, the "invalid" sticky bit is set in various cases with non-finite operands, where IEEE 754 semantics do not involve raising such an exception, and the "underflow" sticky bit is set in cases of exact underflow, whereas IEEE 754 semantics are that this flag is set only for inexact underflow. Thus, for correct emulation the kernel needs to know the setting of these two sticky bits before the instruction being emulated. When a floating-point operation raises an exception, the kernel can note the state of the sticky bits immediately afterwards. Some <fenv.h> functions that affect the state of these bits, such as fesetenv and feholdexcept, need to use prctl with PR_GET_FPEXC and PR_SET_FPEXC anyway, and so it is natural to record the state of those bits during that call into the kernel and so avoid any need for a separate call into the kernel to inform it of a change to those bits. Thus, the interface I chose to use (in this patch and the glibc port) is that one of those prctl calls must be made after any userspace change to those sticky bits, other than through a floating-point operation that traps into the kernel anyway. feclearexcept and fesetexceptflag duly make those calls, which would not be required were it not for this issue. The previous EGLIBC port, and the uClibc code copied from it, is fundamentally broken as regards any use of prctl for floating-point exceptions because it didn't use the PR_FP_EXC_SW_ENABLE bit in its prctl calls (and did various worse things, such as passing a pointer when prctl expected an integer). If you avoid anything where prctl is used, the clearing of sticky bits still means it will never give anything approximating correct exception semantics with existing kernels. I don't believe the patch makes things any worse for existing code that doesn't try to inform the kernel of changes to sticky bits - such code may get incorrect exceptions in some cases, but it would have done so anyway in other cases. Signed-off-by: Joseph Myers <joseph@codesourcery.com> Signed-off-by: Scott Wood <scottwood@freescale.com>
-rw-r--r--arch/powerpc/include/asm/processor.h6
-rw-r--r--arch/powerpc/kernel/process.c30
-rw-r--r--arch/powerpc/math-emu/math_efp.c20
3 files changed, 52 insertions, 4 deletions
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index fc14a38c7ccf..91441d9cbaae 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -256,6 +256,8 @@ struct thread_struct {
256 unsigned long evr[32]; /* upper 32-bits of SPE regs */ 256 unsigned long evr[32]; /* upper 32-bits of SPE regs */
257 u64 acc; /* Accumulator */ 257 u64 acc; /* Accumulator */
258 unsigned long spefscr; /* SPE & eFP status */ 258 unsigned long spefscr; /* SPE & eFP status */
259 unsigned long spefscr_last; /* SPEFSCR value on last prctl
260 call or trap return */
259 int used_spe; /* set if process has used spe */ 261 int used_spe; /* set if process has used spe */
260#endif /* CONFIG_SPE */ 262#endif /* CONFIG_SPE */
261#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 263#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -317,7 +319,9 @@ struct thread_struct {
317 (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) &init_stack) 319 (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) &init_stack)
318 320
319#ifdef CONFIG_SPE 321#ifdef CONFIG_SPE
320#define SPEFSCR_INIT .spefscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, 322#define SPEFSCR_INIT \
323 .spefscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, \
324 .spefscr_last = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE,
321#else 325#else
322#define SPEFSCR_INIT 326#define SPEFSCR_INIT
323#endif 327#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3386d8ab7eb0..b08c0d03530f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1175,6 +1175,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
1175 if (val & PR_FP_EXC_SW_ENABLE) { 1175 if (val & PR_FP_EXC_SW_ENABLE) {
1176#ifdef CONFIG_SPE 1176#ifdef CONFIG_SPE
1177 if (cpu_has_feature(CPU_FTR_SPE)) { 1177 if (cpu_has_feature(CPU_FTR_SPE)) {
1178 /*
1179 * When the sticky exception bits are set
1180 * directly by userspace, it must call prctl
1181 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
1182 * in the existing prctl settings) or
1183 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
1184 * the bits being set). <fenv.h> functions
1185 * saving and restoring the whole
1186 * floating-point environment need to do so
1187 * anyway to restore the prctl settings from
1188 * the saved environment.
1189 */
1190 tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
1178 tsk->thread.fpexc_mode = val & 1191 tsk->thread.fpexc_mode = val &
1179 (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); 1192 (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
1180 return 0; 1193 return 0;
@@ -1206,9 +1219,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
1206 1219
1207 if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) 1220 if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
1208#ifdef CONFIG_SPE 1221#ifdef CONFIG_SPE
1209 if (cpu_has_feature(CPU_FTR_SPE)) 1222 if (cpu_has_feature(CPU_FTR_SPE)) {
1223 /*
1224 * When the sticky exception bits are set
1225 * directly by userspace, it must call prctl
1226 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
1227 * in the existing prctl settings) or
1228 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
1229 * the bits being set). <fenv.h> functions
1230 * saving and restoring the whole
1231 * floating-point environment need to do so
1232 * anyway to restore the prctl settings from
1233 * the saved environment.
1234 */
1235 tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
1210 val = tsk->thread.fpexc_mode; 1236 val = tsk->thread.fpexc_mode;
1211 else 1237 } else
1212 return -EINVAL; 1238 return -EINVAL;
1213#else 1239#else
1214 return -EINVAL; 1240 return -EINVAL;
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c
index a73f0884d358..59835c625dc6 100644
--- a/arch/powerpc/math-emu/math_efp.c
+++ b/arch/powerpc/math-emu/math_efp.c
@@ -630,9 +630,27 @@ update_ccr:
630 regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2)); 630 regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));
631 631
632update_regs: 632update_regs:
633 __FPU_FPSCR &= ~FP_EX_MASK; 633 /*
634 * If the "invalid" exception sticky bit was set by the
635 * processor for non-finite input, but was not set before the
636 * instruction being emulated, clear it. Likewise for the
637 * "underflow" bit, which may have been set by the processor
638 * for exact underflow, not just inexact underflow when the
639 * flag should be set for IEEE 754 semantics. Other sticky
640 * exceptions will only be set by the processor when they are
641 * correct according to IEEE 754 semantics, and we must not
642 * clear sticky bits that were already set before the emulated
643 * instruction as they represent the user-visible sticky
644 * exception status. "inexact" traps to kernel are not
645 * required for IEEE semantics and are not enabled by default,
646 * so the "inexact" sticky bit may have been set by a previous
647 * instruction without the kernel being aware of it.
648 */
649 __FPU_FPSCR
650 &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;
634 __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK); 651 __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);
635 mtspr(SPRN_SPEFSCR, __FPU_FPSCR); 652 mtspr(SPRN_SPEFSCR, __FPU_FPSCR);
653 current->thread.spefscr_last = __FPU_FPSCR;
636 654
637 current->thread.evr[fc] = vc.wp[0]; 655 current->thread.evr[fc] = vc.wp[0];
638 regs->gpr[fc] = vc.wp[1]; 656 regs->gpr[fc] = vc.wp[1];