aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sh/kernel/cpu/sh4
diff options
context:
space:
mode:
authorStuart Menefy <stuart.menefy@st.com>2007-11-30 04:42:27 -0500
committerPaul Mundt <lethal@linux-sh.org>2008-01-27 23:18:59 -0500
commitc8c0a1aba9fa8f816dc8fb477ff816a5b700f0ea (patch)
tree54329f0b6497be088fc573c67e5541863041fdde /arch/sh/kernel/cpu/sh4
parent453ec9c1c3808b051347edbbf637f997add7b85b (diff)
sh: Support denormalization on SH-4 FPU.
Signed-off-by: Stuart Menefy <stuart.menefy@st.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/kernel/cpu/sh4')
-rw-r--r--arch/sh/kernel/cpu/sh4/Makefile2
-rw-r--r--arch/sh/kernel/cpu/sh4/fpu.c514
-rw-r--r--arch/sh/kernel/cpu/sh4/softfloat.c892
3 files changed, 1227 insertions, 181 deletions
diff --git a/arch/sh/kernel/cpu/sh4/Makefile b/arch/sh/kernel/cpu/sh4/Makefile
index dadd6bffc128..d608557c7a3f 100644
--- a/arch/sh/kernel/cpu/sh4/Makefile
+++ b/arch/sh/kernel/cpu/sh4/Makefile
@@ -5,7 +5,7 @@
5obj-y := probe.o common.o 5obj-y := probe.o common.o
6common-y += $(addprefix ../sh3/, entry.o ex.o) 6common-y += $(addprefix ../sh3/, entry.o ex.o)
7 7
8obj-$(CONFIG_SH_FPU) += fpu.o 8obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o
9obj-$(CONFIG_SH_STORE_QUEUES) += sq.o 9obj-$(CONFIG_SH_STORE_QUEUES) += sq.o
10 10
11# CPU subtype setup 11# CPU subtype setup
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index e624180b4467..817f9939cda6 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -1,7 +1,4 @@
1/* $Id: fpu.c,v 1.4 2004/01/13 05:52:11 kkojima Exp $ 1/*
2 *
3 * linux/arch/sh/kernel/fpu.c
4 *
5 * Save/restore floating point context for signal handlers. 2 * Save/restore floating point context for signal handlers.
6 * 3 *
7 * This file is subject to the terms and conditions of the GNU General Public 4 * This file is subject to the terms and conditions of the GNU General Public
@@ -9,15 +6,16 @@
9 * for more details. 6 * for more details.
10 * 7 *
11 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka 8 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
9 * Copyright (C) 2006 ST Microelectronics Ltd. (denorm support)
12 * 10 *
13 * FIXME! These routines can be optimized in big endian case. 11 * FIXME! These routines have not been tested for big endian case.
14 */ 12 */
15
16#include <linux/sched.h> 13#include <linux/sched.h>
17#include <linux/signal.h> 14#include <linux/signal.h>
15#include <linux/io.h>
16#include <asm/cpu/fpu.h>
18#include <asm/processor.h> 17#include <asm/processor.h>
19#include <asm/system.h> 18#include <asm/system.h>
20#include <asm/io.h>
21 19
22/* The PR (precision) bit in the FP Status Register must be clear when 20/* The PR (precision) bit in the FP Status Register must be clear when
23 * an frchg instruction is executed, otherwise the instruction is undefined. 21 * an frchg instruction is executed, otherwise the instruction is undefined.
@@ -25,113 +23,122 @@
25 */ 23 */
26 24
27#define FPSCR_RCHG 0x00000000 25#define FPSCR_RCHG 0x00000000
26extern unsigned long long float64_div(unsigned long long a,
27 unsigned long long b);
28extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
29extern unsigned long long float64_mul(unsigned long long a,
30 unsigned long long b);
31extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
32extern unsigned long long float64_add(unsigned long long a,
33 unsigned long long b);
34extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
35extern unsigned long long float64_sub(unsigned long long a,
36 unsigned long long b);
37extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
28 38
39static unsigned int fpu_exception_flags;
29 40
30/* 41/*
31 * Save FPU registers onto task structure. 42 * Save FPU registers onto task structure.
32 * Assume called with FPU enabled (SR.FD=0). 43 * Assume called with FPU enabled (SR.FD=0).
33 */ 44 */
34void 45void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
35save_fpu(struct task_struct *tsk, struct pt_regs *regs)
36{ 46{
37 unsigned long dummy; 47 unsigned long dummy;
38 48
39 clear_tsk_thread_flag(tsk, TIF_USEDFPU); 49 clear_tsk_thread_flag(tsk, TIF_USEDFPU);
40 enable_fpu(); 50 enable_fpu();
41 asm volatile("sts.l fpul, @-%0\n\t" 51 asm volatile ("sts.l fpul, @-%0\n\t"
42 "sts.l fpscr, @-%0\n\t" 52 "sts.l fpscr, @-%0\n\t"
43 "lds %2, fpscr\n\t" 53 "lds %2, fpscr\n\t"
44 "frchg\n\t" 54 "frchg\n\t"
45 "fmov.s fr15, @-%0\n\t" 55 "fmov.s fr15, @-%0\n\t"
46 "fmov.s fr14, @-%0\n\t" 56 "fmov.s fr14, @-%0\n\t"
47 "fmov.s fr13, @-%0\n\t" 57 "fmov.s fr13, @-%0\n\t"
48 "fmov.s fr12, @-%0\n\t" 58 "fmov.s fr12, @-%0\n\t"
49 "fmov.s fr11, @-%0\n\t" 59 "fmov.s fr11, @-%0\n\t"
50 "fmov.s fr10, @-%0\n\t" 60 "fmov.s fr10, @-%0\n\t"
51 "fmov.s fr9, @-%0\n\t" 61 "fmov.s fr9, @-%0\n\t"
52 "fmov.s fr8, @-%0\n\t" 62 "fmov.s fr8, @-%0\n\t"
53 "fmov.s fr7, @-%0\n\t" 63 "fmov.s fr7, @-%0\n\t"
54 "fmov.s fr6, @-%0\n\t" 64 "fmov.s fr6, @-%0\n\t"
55 "fmov.s fr5, @-%0\n\t" 65 "fmov.s fr5, @-%0\n\t"
56 "fmov.s fr4, @-%0\n\t" 66 "fmov.s fr4, @-%0\n\t"
57 "fmov.s fr3, @-%0\n\t" 67 "fmov.s fr3, @-%0\n\t"
58 "fmov.s fr2, @-%0\n\t" 68 "fmov.s fr2, @-%0\n\t"
59 "fmov.s fr1, @-%0\n\t" 69 "fmov.s fr1, @-%0\n\t"
60 "fmov.s fr0, @-%0\n\t" 70 "fmov.s fr0, @-%0\n\t"
61 "frchg\n\t" 71 "frchg\n\t"
62 "fmov.s fr15, @-%0\n\t" 72 "fmov.s fr15, @-%0\n\t"
63 "fmov.s fr14, @-%0\n\t" 73 "fmov.s fr14, @-%0\n\t"
64 "fmov.s fr13, @-%0\n\t" 74 "fmov.s fr13, @-%0\n\t"
65 "fmov.s fr12, @-%0\n\t" 75 "fmov.s fr12, @-%0\n\t"
66 "fmov.s fr11, @-%0\n\t" 76 "fmov.s fr11, @-%0\n\t"
67 "fmov.s fr10, @-%0\n\t" 77 "fmov.s fr10, @-%0\n\t"
68 "fmov.s fr9, @-%0\n\t" 78 "fmov.s fr9, @-%0\n\t"
69 "fmov.s fr8, @-%0\n\t" 79 "fmov.s fr8, @-%0\n\t"
70 "fmov.s fr7, @-%0\n\t" 80 "fmov.s fr7, @-%0\n\t"
71 "fmov.s fr6, @-%0\n\t" 81 "fmov.s fr6, @-%0\n\t"
72 "fmov.s fr5, @-%0\n\t" 82 "fmov.s fr5, @-%0\n\t"
73 "fmov.s fr4, @-%0\n\t" 83 "fmov.s fr4, @-%0\n\t"
74 "fmov.s fr3, @-%0\n\t" 84 "fmov.s fr3, @-%0\n\t"
75 "fmov.s fr2, @-%0\n\t" 85 "fmov.s fr2, @-%0\n\t"
76 "fmov.s fr1, @-%0\n\t" 86 "fmov.s fr1, @-%0\n\t"
77 "fmov.s fr0, @-%0\n\t" 87 "fmov.s fr0, @-%0\n\t"
78 "lds %3, fpscr\n\t" 88 "lds %3, fpscr\n\t":"=r" (dummy)
79 : "=r" (dummy) 89 :"0"((char *)(&tsk->thread.fpu.hard.status)),
80 : "0" ((char *)(&tsk->thread.fpu.hard.status)), 90 "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
81 "r" (FPSCR_RCHG), 91 :"memory");
82 "r" (FPSCR_INIT)
83 : "memory");
84 92
85 disable_fpu(); 93 disable_fpu();
86 release_fpu(regs); 94 release_fpu(regs);
87} 95}
88 96
89static void 97static void restore_fpu(struct task_struct *tsk)
90restore_fpu(struct task_struct *tsk)
91{ 98{
92 unsigned long dummy; 99 unsigned long dummy;
93 100
94 enable_fpu(); 101 enable_fpu();
95 asm volatile("lds %2, fpscr\n\t" 102 asm volatile ("lds %2, fpscr\n\t"
96 "fmov.s @%0+, fr0\n\t" 103 "fmov.s @%0+, fr0\n\t"
97 "fmov.s @%0+, fr1\n\t" 104 "fmov.s @%0+, fr1\n\t"
98 "fmov.s @%0+, fr2\n\t" 105 "fmov.s @%0+, fr2\n\t"
99 "fmov.s @%0+, fr3\n\t" 106 "fmov.s @%0+, fr3\n\t"
100 "fmov.s @%0+, fr4\n\t" 107 "fmov.s @%0+, fr4\n\t"
101 "fmov.s @%0+, fr5\n\t" 108 "fmov.s @%0+, fr5\n\t"
102 "fmov.s @%0+, fr6\n\t" 109 "fmov.s @%0+, fr6\n\t"
103 "fmov.s @%0+, fr7\n\t" 110 "fmov.s @%0+, fr7\n\t"
104 "fmov.s @%0+, fr8\n\t" 111 "fmov.s @%0+, fr8\n\t"
105 "fmov.s @%0+, fr9\n\t" 112 "fmov.s @%0+, fr9\n\t"
106 "fmov.s @%0+, fr10\n\t" 113 "fmov.s @%0+, fr10\n\t"
107 "fmov.s @%0+, fr11\n\t" 114 "fmov.s @%0+, fr11\n\t"
108 "fmov.s @%0+, fr12\n\t" 115 "fmov.s @%0+, fr12\n\t"
109 "fmov.s @%0+, fr13\n\t" 116 "fmov.s @%0+, fr13\n\t"
110 "fmov.s @%0+, fr14\n\t" 117 "fmov.s @%0+, fr14\n\t"
111 "fmov.s @%0+, fr15\n\t" 118 "fmov.s @%0+, fr15\n\t"
112 "frchg\n\t" 119 "frchg\n\t"
113 "fmov.s @%0+, fr0\n\t" 120 "fmov.s @%0+, fr0\n\t"
114 "fmov.s @%0+, fr1\n\t" 121 "fmov.s @%0+, fr1\n\t"
115 "fmov.s @%0+, fr2\n\t" 122 "fmov.s @%0+, fr2\n\t"
116 "fmov.s @%0+, fr3\n\t" 123 "fmov.s @%0+, fr3\n\t"
117 "fmov.s @%0+, fr4\n\t" 124 "fmov.s @%0+, fr4\n\t"
118 "fmov.s @%0+, fr5\n\t" 125 "fmov.s @%0+, fr5\n\t"
119 "fmov.s @%0+, fr6\n\t" 126 "fmov.s @%0+, fr6\n\t"
120 "fmov.s @%0+, fr7\n\t" 127 "fmov.s @%0+, fr7\n\t"
121 "fmov.s @%0+, fr8\n\t" 128 "fmov.s @%0+, fr8\n\t"
122 "fmov.s @%0+, fr9\n\t" 129 "fmov.s @%0+, fr9\n\t"
123 "fmov.s @%0+, fr10\n\t" 130 "fmov.s @%0+, fr10\n\t"
124 "fmov.s @%0+, fr11\n\t" 131 "fmov.s @%0+, fr11\n\t"
125 "fmov.s @%0+, fr12\n\t" 132 "fmov.s @%0+, fr12\n\t"
126 "fmov.s @%0+, fr13\n\t" 133 "fmov.s @%0+, fr13\n\t"
127 "fmov.s @%0+, fr14\n\t" 134 "fmov.s @%0+, fr14\n\t"
128 "fmov.s @%0+, fr15\n\t" 135 "fmov.s @%0+, fr15\n\t"
129 "frchg\n\t" 136 "frchg\n\t"
130 "lds.l @%0+, fpscr\n\t" 137 "lds.l @%0+, fpscr\n\t"
131 "lds.l @%0+, fpul\n\t" 138 "lds.l @%0+, fpul\n\t"
132 : "=r" (dummy) 139 :"=r" (dummy)
133 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG) 140 :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
134 : "memory"); 141 :"memory");
135 disable_fpu(); 142 disable_fpu();
136} 143}
137 144
@@ -141,61 +148,59 @@ restore_fpu(struct task_struct *tsk)
141 * double precision represents signaling NANS. 148 * double precision represents signaling NANS.
142 */ 149 */
143 150
144static void 151static void fpu_init(void)
145fpu_init(void)
146{ 152{
147 enable_fpu(); 153 enable_fpu();
148 asm volatile("lds %0, fpul\n\t" 154 asm volatile ( "lds %0, fpul\n\t"
149 "lds %1, fpscr\n\t" 155 "lds %1, fpscr\n\t"
150 "fsts fpul, fr0\n\t" 156 "fsts fpul, fr0\n\t"
151 "fsts fpul, fr1\n\t" 157 "fsts fpul, fr1\n\t"
152 "fsts fpul, fr2\n\t" 158 "fsts fpul, fr2\n\t"
153 "fsts fpul, fr3\n\t" 159 "fsts fpul, fr3\n\t"
154 "fsts fpul, fr4\n\t" 160 "fsts fpul, fr4\n\t"
155 "fsts fpul, fr5\n\t" 161 "fsts fpul, fr5\n\t"
156 "fsts fpul, fr6\n\t" 162 "fsts fpul, fr6\n\t"
157 "fsts fpul, fr7\n\t" 163 "fsts fpul, fr7\n\t"
158 "fsts fpul, fr8\n\t" 164 "fsts fpul, fr8\n\t"
159 "fsts fpul, fr9\n\t" 165 "fsts fpul, fr9\n\t"
160 "fsts fpul, fr10\n\t" 166 "fsts fpul, fr10\n\t"
161 "fsts fpul, fr11\n\t" 167 "fsts fpul, fr11\n\t"
162 "fsts fpul, fr12\n\t" 168 "fsts fpul, fr12\n\t"
163 "fsts fpul, fr13\n\t" 169 "fsts fpul, fr13\n\t"
164 "fsts fpul, fr14\n\t" 170 "fsts fpul, fr14\n\t"
165 "fsts fpul, fr15\n\t" 171 "fsts fpul, fr15\n\t"
166 "frchg\n\t" 172 "frchg\n\t"
167 "fsts fpul, fr0\n\t" 173 "fsts fpul, fr0\n\t"
168 "fsts fpul, fr1\n\t" 174 "fsts fpul, fr1\n\t"
169 "fsts fpul, fr2\n\t" 175 "fsts fpul, fr2\n\t"
170 "fsts fpul, fr3\n\t" 176 "fsts fpul, fr3\n\t"
171 "fsts fpul, fr4\n\t" 177 "fsts fpul, fr4\n\t"
172 "fsts fpul, fr5\n\t" 178 "fsts fpul, fr5\n\t"
173 "fsts fpul, fr6\n\t" 179 "fsts fpul, fr6\n\t"
174 "fsts fpul, fr7\n\t" 180 "fsts fpul, fr7\n\t"
175 "fsts fpul, fr8\n\t" 181 "fsts fpul, fr8\n\t"
176 "fsts fpul, fr9\n\t" 182 "fsts fpul, fr9\n\t"
177 "fsts fpul, fr10\n\t" 183 "fsts fpul, fr10\n\t"
178 "fsts fpul, fr11\n\t" 184 "fsts fpul, fr11\n\t"
179 "fsts fpul, fr12\n\t" 185 "fsts fpul, fr12\n\t"
180 "fsts fpul, fr13\n\t" 186 "fsts fpul, fr13\n\t"
181 "fsts fpul, fr14\n\t" 187 "fsts fpul, fr14\n\t"
182 "fsts fpul, fr15\n\t" 188 "fsts fpul, fr15\n\t"
183 "frchg\n\t" 189 "frchg\n\t"
184 "lds %2, fpscr\n\t" 190 "lds %2, fpscr\n\t"
185 : /* no output */ 191 : /* no output */
186 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); 192 :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
187 disable_fpu(); 193 disable_fpu();
188} 194}
189 195
190/** 196/**
191 * denormal_to_double - Given denormalized float number, 197 * denormal_to_double - Given denormalized float number,
192 * store double float 198 * store double float
193 * 199 *
194 * @fpu: Pointer to sh_fpu_hard structure 200 * @fpu: Pointer to sh_fpu_hard structure
195 * @n: Index to FP register 201 * @n: Index to FP register
196 */ 202 */
197static void 203static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
198denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
199{ 204{
200 unsigned long du, dl; 205 unsigned long du, dl;
201 unsigned long x = fpu->fpul; 206 unsigned long x = fpu->fpul;
@@ -212,7 +217,7 @@ denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
212 dl = x << 29; 217 dl = x << 29;
213 218
214 fpu->fp_regs[n] = du; 219 fpu->fp_regs[n] = du;
215 fpu->fp_regs[n+1] = dl; 220 fpu->fp_regs[n + 1] = dl;
216 } 221 }
217} 222}
218 223
@@ -223,67 +228,191 @@ denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
223 * 228 *
224 * Returns 1 when it's handled (should not cause exception). 229 * Returns 1 when it's handled (should not cause exception).
225 */ 230 */
226static int 231static int ieee_fpe_handler(struct pt_regs *regs)
227ieee_fpe_handler (struct pt_regs *regs)
228{ 232{
229 unsigned short insn = *(unsigned short *) regs->pc; 233 unsigned short insn = *(unsigned short *)regs->pc;
230 unsigned short finsn; 234 unsigned short finsn;
231 unsigned long nextpc; 235 unsigned long nextpc;
232 int nib[4] = { 236 int nib[4] = {
233 (insn >> 12) & 0xf, 237 (insn >> 12) & 0xf,
234 (insn >> 8) & 0xf, 238 (insn >> 8) & 0xf,
235 (insn >> 4) & 0xf, 239 (insn >> 4) & 0xf,
236 insn & 0xf}; 240 insn & 0xf
237 241 };
238 if (nib[0] == 0xb || 242
239 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ 243 if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
240 regs->pr = regs->pc + 4; 244 regs->pr = regs->pc + 4; /* bsr & jsr */
241 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ 245
242 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); 246 if (nib[0] == 0xa || nib[0] == 0xb) {
243 finsn = *(unsigned short *) (regs->pc + 2); 247 /* bra & bsr */
244 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ 248 nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
249 finsn = *(unsigned short *)(regs->pc + 2);
250 } else if (nib[0] == 0x8 && nib[1] == 0xd) {
251 /* bt/s */
245 if (regs->sr & 1) 252 if (regs->sr & 1)
246 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); 253 nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
247 else 254 else
248 nextpc = regs->pc + 4; 255 nextpc = regs->pc + 4;
249 finsn = *(unsigned short *) (regs->pc + 2); 256 finsn = *(unsigned short *)(regs->pc + 2);
250 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ 257 } else if (nib[0] == 0x8 && nib[1] == 0xf) {
258 /* bf/s */
251 if (regs->sr & 1) 259 if (regs->sr & 1)
252 nextpc = regs->pc + 4; 260 nextpc = regs->pc + 4;
253 else 261 else
254 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); 262 nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
255 finsn = *(unsigned short *) (regs->pc + 2); 263 finsn = *(unsigned short *)(regs->pc + 2);
256 } else if (nib[0] == 0x4 && nib[3] == 0xb && 264 } else if (nib[0] == 0x4 && nib[3] == 0xb &&
257 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ 265 (nib[2] == 0x0 || nib[2] == 0x2)) {
266 /* jmp & jsr */
258 nextpc = regs->regs[nib[1]]; 267 nextpc = regs->regs[nib[1]];
259 finsn = *(unsigned short *) (regs->pc + 2); 268 finsn = *(unsigned short *)(regs->pc + 2);
260 } else if (nib[0] == 0x0 && nib[3] == 0x3 && 269 } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
261 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ 270 (nib[2] == 0x0 || nib[2] == 0x2)) {
271 /* braf & bsrf */
262 nextpc = regs->pc + 4 + regs->regs[nib[1]]; 272 nextpc = regs->pc + 4 + regs->regs[nib[1]];
263 finsn = *(unsigned short *) (regs->pc + 2); 273 finsn = *(unsigned short *)(regs->pc + 2);
264 } else if (insn == 0x000b) { /* rts */ 274 } else if (insn == 0x000b) {
275 /* rts */
265 nextpc = regs->pr; 276 nextpc = regs->pr;
266 finsn = *(unsigned short *) (regs->pc + 2); 277 finsn = *(unsigned short *)(regs->pc + 2);
267 } else { 278 } else {
268 nextpc = regs->pc + instruction_size(insn); 279 nextpc = regs->pc + instruction_size(insn);
269 finsn = insn; 280 finsn = insn;
270 } 281 }
271 282
272 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ 283 if ((finsn & 0xf1ff) == 0xf0ad) {
284 /* fcnvsd */
273 struct task_struct *tsk = current; 285 struct task_struct *tsk = current;
274 286
275 save_fpu(tsk, regs); 287 save_fpu(tsk, regs);
276 if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) { 288 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
277 /* FPU error */ 289 /* FPU error */
278 denormal_to_double (&tsk->thread.fpu.hard, 290 denormal_to_double(&tsk->thread.fpu.hard,
279 (finsn >> 8) & 0xf); 291 (finsn >> 8) & 0xf);
280 tsk->thread.fpu.hard.fpscr &= 292 else
281 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); 293 return 0;
282 grab_fpu(regs); 294
283 restore_fpu(tsk); 295 regs->pc = nextpc;
284 set_tsk_thread_flag(tsk, TIF_USEDFPU); 296 return 1;
297 } else if ((finsn & 0xf00f) == 0xf002) {
298 /* fmul */
299 struct task_struct *tsk = current;
300 int fpscr;
301 int n, m, prec;
302 unsigned int hx, hy;
303
304 n = (finsn >> 8) & 0xf;
305 m = (finsn >> 4) & 0xf;
306 hx = tsk->thread.fpu.hard.fp_regs[n];
307 hy = tsk->thread.fpu.hard.fp_regs[m];
308 fpscr = tsk->thread.fpu.hard.fpscr;
309 prec = fpscr & FPSCR_DBL_PRECISION;
310
311 if ((fpscr & FPSCR_CAUSE_ERROR)
312 && (prec && ((hx & 0x7fffffff) < 0x00100000
313 || (hy & 0x7fffffff) < 0x00100000))) {
314 long long llx, lly;
315
316 /* FPU error because of denormal (doubles) */
317 llx = ((long long)hx << 32)
318 | tsk->thread.fpu.hard.fp_regs[n + 1];
319 lly = ((long long)hy << 32)
320 | tsk->thread.fpu.hard.fp_regs[m + 1];
321 llx = float64_mul(llx, lly);
322 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
323 tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
324 } else if ((fpscr & FPSCR_CAUSE_ERROR)
325 && (!prec && ((hx & 0x7fffffff) < 0x00800000
326 || (hy & 0x7fffffff) < 0x00800000))) {
327 /* FPU error because of denormal (floats) */
328 hx = float32_mul(hx, hy);
329 tsk->thread.fpu.hard.fp_regs[n] = hx;
330 } else
331 return 0;
332
333 regs->pc = nextpc;
334 return 1;
335 } else if ((finsn & 0xf00e) == 0xf000) {
336 /* fadd, fsub */
337 struct task_struct *tsk = current;
338 int fpscr;
339 int n, m, prec;
340 unsigned int hx, hy;
341
342 n = (finsn >> 8) & 0xf;
343 m = (finsn >> 4) & 0xf;
344 hx = tsk->thread.fpu.hard.fp_regs[n];
345 hy = tsk->thread.fpu.hard.fp_regs[m];
346 fpscr = tsk->thread.fpu.hard.fpscr;
347 prec = fpscr & FPSCR_DBL_PRECISION;
348
349 if ((fpscr & FPSCR_CAUSE_ERROR)
350 && (prec && ((hx & 0x7fffffff) < 0x00100000
351 || (hy & 0x7fffffff) < 0x00100000))) {
352 long long llx, lly;
353
354 /* FPU error because of denormal (doubles) */
355 llx = ((long long)hx << 32)
356 | tsk->thread.fpu.hard.fp_regs[n + 1];
357 lly = ((long long)hy << 32)
358 | tsk->thread.fpu.hard.fp_regs[m + 1];
359 if ((finsn & 0xf00f) == 0xf000)
360 llx = float64_add(llx, lly);
361 else
362 llx = float64_sub(llx, lly);
363 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
364 tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
365 } else if ((fpscr & FPSCR_CAUSE_ERROR)
366 && (!prec && ((hx & 0x7fffffff) < 0x00800000
367 || (hy & 0x7fffffff) < 0x00800000))) {
368 /* FPU error because of denormal (floats) */
369 if ((finsn & 0xf00f) == 0xf000)
370 hx = float32_add(hx, hy);
371 else
372 hx = float32_sub(hx, hy);
373 tsk->thread.fpu.hard.fp_regs[n] = hx;
374 } else
375 return 0;
376
377 regs->pc = nextpc;
378 return 1;
379 } else if ((finsn & 0xf003) == 0xf003) {
380 /* fdiv */
381 struct task_struct *tsk = current;
382 int fpscr;
383 int n, m, prec;
384 unsigned int hx, hy;
385
386 n = (finsn >> 8) & 0xf;
387 m = (finsn >> 4) & 0xf;
388 hx = tsk->thread.fpu.hard.fp_regs[n];
389 hy = tsk->thread.fpu.hard.fp_regs[m];
390 fpscr = tsk->thread.fpu.hard.fpscr;
391 prec = fpscr & FPSCR_DBL_PRECISION;
392
393 if ((fpscr & FPSCR_CAUSE_ERROR)
394 && (prec && ((hx & 0x7fffffff) < 0x00100000
395 || (hy & 0x7fffffff) < 0x00100000))) {
396 long long llx, lly;
397
398 /* FPU error because of denormal (doubles) */
399 llx = ((long long)hx << 32)
400 | tsk->thread.fpu.hard.fp_regs[n + 1];
401 lly = ((long long)hy << 32)
402 | tsk->thread.fpu.hard.fp_regs[m + 1];
403
404 llx = float64_div(llx, lly);
405
406 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
407 tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
408 } else if ((fpscr & FPSCR_CAUSE_ERROR)
409 && (!prec && ((hx & 0x7fffffff) < 0x00800000
410 || (hy & 0x7fffffff) < 0x00800000))) {
411 /* FPU error because of denormal (floats) */
412 hx = float32_div(hx, hy);
413 tsk->thread.fpu.hard.fp_regs[n] = hx;
285 } else 414 } else
286 force_sig(SIGFPE, tsk); 415 return 0;
287 416
288 regs->pc = nextpc; 417 regs->pc = nextpc;
289 return 1; 418 return 1;
@@ -292,16 +421,41 @@ ieee_fpe_handler (struct pt_regs *regs)
292 return 0; 421 return 0;
293} 422}
294 423
424void float_raise(unsigned int flags)
425{
426 fpu_exception_flags |= flags;
427}
428
429int float_rounding_mode(void)
430{
431 struct task_struct *tsk = current;
432 int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
433 return roundingMode;
434}
435
295BUILD_TRAP_HANDLER(fpu_error) 436BUILD_TRAP_HANDLER(fpu_error)
296{ 437{
297 struct task_struct *tsk = current; 438 struct task_struct *tsk = current;
298 TRAP_HANDLER_DECL; 439 TRAP_HANDLER_DECL;
299 440
300 if (ieee_fpe_handler(regs))
301 return;
302
303 regs->pc += 2;
304 save_fpu(tsk, regs); 441 save_fpu(tsk, regs);
442 fpu_exception_flags = 0;
443 if (ieee_fpe_handler(regs)) {
444 tsk->thread.fpu.hard.fpscr &=
445 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
446 tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
447 /* Set the FPSCR flag as well as cause bits - simply
448 * replicate the cause */
449 tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
450 grab_fpu(regs);
451 restore_fpu(tsk);
452 set_tsk_thread_flag(tsk, TIF_USEDFPU);
453 if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
454 (fpu_exception_flags >> 2)) == 0) {
455 return;
456 }
457 }
458
305 force_sig(SIGFPE, tsk); 459 force_sig(SIGFPE, tsk);
306} 460}
307 461
@@ -319,7 +473,7 @@ BUILD_TRAP_HANDLER(fpu_state_restore)
319 if (used_math()) { 473 if (used_math()) {
320 /* Using the FPU again. */ 474 /* Using the FPU again. */
321 restore_fpu(tsk); 475 restore_fpu(tsk);
322 } else { 476 } else {
323 /* First time FPU user. */ 477 /* First time FPU user. */
324 fpu_init(); 478 fpu_init();
325 set_used_math(); 479 set_used_math();
diff --git a/arch/sh/kernel/cpu/sh4/softfloat.c b/arch/sh/kernel/cpu/sh4/softfloat.c
new file mode 100644
index 000000000000..7b2d337ee412
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4/softfloat.c
@@ -0,0 +1,892 @@
1/*
2 * Floating point emulation support for subnormalised numbers on SH4
3 * architecture This file is derived from the SoftFloat IEC/IEEE
4 * Floating-point Arithmetic Package, Release 2 the original license of
5 * which is reproduced below.
6 *
7 * ========================================================================
8 *
9 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10 * Arithmetic Package, Release 2.
11 *
12 * Written by John R. Hauser. This work was made possible in part by the
13 * International Computer Science Institute, located at Suite 600, 1947 Center
14 * Street, Berkeley, California 94704. Funding was partially provided by the
15 * National Science Foundation under grant MIP-9311980. The original version
16 * of this code was written as part of a project to build a fixed-point vector
17 * processor in collaboration with the University of California at Berkeley,
18 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20 * arithmetic/softfloat.html'.
21 *
22 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27 *
28 * Derivative works are acceptable, even for commercial purposes, so long as
29 * (1) they include prominent notice that the work is derivative, and (2) they
30 * include prominent notice akin to these three paragraphs for those parts of
31 * this code that are retained.
32 *
33 * ========================================================================
34 *
35 * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36 * and Kamel Khelifi <kamel.khelifi@st.com>
37 */
38#include <linux/kernel.h>
39#include <asm/cpu/fpu.h>
40
41#define LIT64( a ) a##LL
42
43typedef char flag;
44typedef unsigned char uint8;
45typedef signed char int8;
46typedef int uint16;
47typedef int int16;
48typedef unsigned int uint32;
49typedef signed int int32;
50
51typedef unsigned long long int bits64;
52typedef signed long long int sbits64;
53
54typedef unsigned char bits8;
55typedef signed char sbits8;
56typedef unsigned short int bits16;
57typedef signed short int sbits16;
58typedef unsigned int bits32;
59typedef signed int sbits32;
60
61typedef unsigned long long int uint64;
62typedef signed long long int int64;
63
64typedef unsigned long int float32;
65typedef unsigned long long float64;
66
67extern void float_raise(unsigned int flags); /* in fpu.c */
68extern int float_rounding_mode(void); /* in fpu.c */
69
70inline bits64 extractFloat64Frac(float64 a);
71inline flag extractFloat64Sign(float64 a);
72inline int16 extractFloat64Exp(float64 a);
73inline int16 extractFloat32Exp(float32 a);
74inline flag extractFloat32Sign(float32 a);
75inline bits32 extractFloat32Frac(float32 a);
76inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
77inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
78inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
79inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
80float64 float64_sub(float64 a, float64 b);
81float32 float32_sub(float32 a, float32 b);
82float32 float32_add(float32 a, float32 b);
83float64 float64_add(float64 a, float64 b);
84float64 float64_div(float64 a, float64 b);
85float32 float32_div(float32 a, float32 b);
86float32 float32_mul(float32 a, float32 b);
87float64 float64_mul(float64 a, float64 b);
88inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
89 bits64 * z1Ptr);
90inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91 bits64 * z1Ptr);
92inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
93
94static int8 countLeadingZeros32(bits32 a);
95static int8 countLeadingZeros64(bits64 a);
96static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
97 bits64 zSig);
98static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
99static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
100static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
101static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
102 bits32 zSig);
103static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
104static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
105static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
106static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
107 bits64 * zSigPtr);
108static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
109static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
110 bits32 * zSigPtr);
111
112inline bits64 extractFloat64Frac(float64 a)
113{
114 return a & LIT64(0x000FFFFFFFFFFFFF);
115}
116
117inline flag extractFloat64Sign(float64 a)
118{
119 return a >> 63;
120}
121
122inline int16 extractFloat64Exp(float64 a)
123{
124 return (a >> 52) & 0x7FF;
125}
126
127inline int16 extractFloat32Exp(float32 a)
128{
129 return (a >> 23) & 0xFF;
130}
131
132inline flag extractFloat32Sign(float32 a)
133{
134 return a >> 31;
135}
136
137inline bits32 extractFloat32Frac(float32 a)
138{
139 return a & 0x007FFFFF;
140}
141
142inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
143{
144 return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
145}
146
147inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
148{
149 bits64 z;
150
151 if (count == 0) {
152 z = a;
153 } else if (count < 64) {
154 z = (a >> count) | ((a << ((-count) & 63)) != 0);
155 } else {
156 z = (a != 0);
157 }
158 *zPtr = z;
159}
160
161static int8 countLeadingZeros32(bits32 a)
162{
163 static const int8 countLeadingZerosHigh[] = {
164 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
165 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
180 };
181 int8 shiftCount;
182
183 shiftCount = 0;
184 if (a < 0x10000) {
185 shiftCount += 16;
186 a <<= 16;
187 }
188 if (a < 0x1000000) {
189 shiftCount += 8;
190 a <<= 8;
191 }
192 shiftCount += countLeadingZerosHigh[a >> 24];
193 return shiftCount;
194
195}
196
197static int8 countLeadingZeros64(bits64 a)
198{
199 int8 shiftCount;
200
201 shiftCount = 0;
202 if (a < ((bits64) 1) << 32) {
203 shiftCount += 32;
204 } else {
205 a >>= 32;
206 }
207 shiftCount += countLeadingZeros32(a);
208 return shiftCount;
209
210}
211
212static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
213{
214 int8 shiftCount;
215
216 shiftCount = countLeadingZeros64(zSig) - 1;
217 return roundAndPackFloat64(zSign, zExp - shiftCount,
218 zSig << shiftCount);
219
220}
221
222static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
223{
224 int16 aExp, bExp, zExp;
225 bits64 aSig, bSig, zSig;
226 int16 expDiff;
227
228 aSig = extractFloat64Frac(a);
229 aExp = extractFloat64Exp(a);
230 bSig = extractFloat64Frac(b);
231 bExp = extractFloat64Exp(b);
232 expDiff = aExp - bExp;
233 aSig <<= 10;
234 bSig <<= 10;
235 if (0 < expDiff)
236 goto aExpBigger;
237 if (expDiff < 0)
238 goto bExpBigger;
239 if (aExp == 0) {
240 aExp = 1;
241 bExp = 1;
242 }
243 if (bSig < aSig)
244 goto aBigger;
245 if (aSig < bSig)
246 goto bBigger;
247 return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
248 bExpBigger:
249 if (bExp == 0x7FF) {
250 return packFloat64(zSign ^ 1, 0x7FF, 0);
251 }
252 if (aExp == 0) {
253 ++expDiff;
254 } else {
255 aSig |= LIT64(0x4000000000000000);
256 }
257 shift64RightJamming(aSig, -expDiff, &aSig);
258 bSig |= LIT64(0x4000000000000000);
259 bBigger:
260 zSig = bSig - aSig;
261 zExp = bExp;
262 zSign ^= 1;
263 goto normalizeRoundAndPack;
264 aExpBigger:
265 if (aExp == 0x7FF) {
266 return a;
267 }
268 if (bExp == 0) {
269 --expDiff;
270 } else {
271 bSig |= LIT64(0x4000000000000000);
272 }
273 shift64RightJamming(bSig, expDiff, &bSig);
274 aSig |= LIT64(0x4000000000000000);
275 aBigger:
276 zSig = aSig - bSig;
277 zExp = aExp;
278 normalizeRoundAndPack:
279 --zExp;
280 return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
281
282}
283static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
284{
285 int16 aExp, bExp, zExp;
286 bits64 aSig, bSig, zSig;
287 int16 expDiff;
288
289 aSig = extractFloat64Frac(a);
290 aExp = extractFloat64Exp(a);
291 bSig = extractFloat64Frac(b);
292 bExp = extractFloat64Exp(b);
293 expDiff = aExp - bExp;
294 aSig <<= 9;
295 bSig <<= 9;
296 if (0 < expDiff) {
297 if (aExp == 0x7FF) {
298 return a;
299 }
300 if (bExp == 0) {
301 --expDiff;
302 } else {
303 bSig |= LIT64(0x2000000000000000);
304 }
305 shift64RightJamming(bSig, expDiff, &bSig);
306 zExp = aExp;
307 } else if (expDiff < 0) {
308 if (bExp == 0x7FF) {
309 return packFloat64(zSign, 0x7FF, 0);
310 }
311 if (aExp == 0) {
312 ++expDiff;
313 } else {
314 aSig |= LIT64(0x2000000000000000);
315 }
316 shift64RightJamming(aSig, -expDiff, &aSig);
317 zExp = bExp;
318 } else {
319 if (aExp == 0x7FF) {
320 return a;
321 }
322 if (aExp == 0)
323 return packFloat64(zSign, 0, (aSig + bSig) >> 9);
324 zSig = LIT64(0x4000000000000000) + aSig + bSig;
325 zExp = aExp;
326 goto roundAndPack;
327 }
328 aSig |= LIT64(0x2000000000000000);
329 zSig = (aSig + bSig) << 1;
330 --zExp;
331 if ((sbits64) zSig < 0) {
332 zSig = aSig + bSig;
333 ++zExp;
334 }
335 roundAndPack:
336 return roundAndPackFloat64(zSign, zExp, zSig);
337
338}
339
340inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
341{
342 return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
343}
344
345inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
346{
347 bits32 z;
348 if (count == 0) {
349 z = a;
350 } else if (count < 32) {
351 z = (a >> count) | ((a << ((-count) & 31)) != 0);
352 } else {
353 z = (a != 0);
354 }
355 *zPtr = z;
356}
357
358static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
359{
360 flag roundNearestEven;
361 int8 roundIncrement, roundBits;
362 flag isTiny;
363
364 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
365 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
366 roundIncrement = 0x40;
367 if (!roundNearestEven) {
368 roundIncrement = 0;
369 }
370 roundBits = zSig & 0x7F;
371 if (0xFD <= (bits16) zExp) {
372 if ((0xFD < zExp)
373 || ((zExp == 0xFD)
374 && ((sbits32) (zSig + roundIncrement) < 0))
375 ) {
376 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
377 return packFloat32(zSign, 0xFF,
378 0) - (roundIncrement == 0);
379 }
380 if (zExp < 0) {
381 isTiny = (zExp < -1)
382 || (zSig + roundIncrement < 0x80000000);
383 shift32RightJamming(zSig, -zExp, &zSig);
384 zExp = 0;
385 roundBits = zSig & 0x7F;
386 if (isTiny && roundBits)
387 float_raise(FPSCR_CAUSE_UNDERFLOW);
388 }
389 }
390 if (roundBits)
391 float_raise(FPSCR_CAUSE_INEXACT);
392 zSig = (zSig + roundIncrement) >> 7;
393 zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
394 if (zSig == 0)
395 zExp = 0;
396 return packFloat32(zSign, zExp, zSig);
397
398}
399
400static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
401{
402 int8 shiftCount;
403
404 shiftCount = countLeadingZeros32(zSig) - 1;
405 return roundAndPackFloat32(zSign, zExp - shiftCount,
406 zSig << shiftCount);
407}
408
409static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
410{
411 flag roundNearestEven;
412 int16 roundIncrement, roundBits;
413 flag isTiny;
414
415 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
416 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
417 roundIncrement = 0x200;
418 if (!roundNearestEven) {
419 roundIncrement = 0;
420 }
421 roundBits = zSig & 0x3FF;
422 if (0x7FD <= (bits16) zExp) {
423 if ((0x7FD < zExp)
424 || ((zExp == 0x7FD)
425 && ((sbits64) (zSig + roundIncrement) < 0))
426 ) {
427 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
428 return packFloat64(zSign, 0x7FF,
429 0) - (roundIncrement == 0);
430 }
431 if (zExp < 0) {
432 isTiny = (zExp < -1)
433 || (zSig + roundIncrement <
434 LIT64(0x8000000000000000));
435 shift64RightJamming(zSig, -zExp, &zSig);
436 zExp = 0;
437 roundBits = zSig & 0x3FF;
438 if (isTiny && roundBits)
439 float_raise(FPSCR_CAUSE_UNDERFLOW);
440 }
441 }
442 if (roundBits)
443 float_raise(FPSCR_CAUSE_INEXACT);
444 zSig = (zSig + roundIncrement) >> 10;
445 zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
446 if (zSig == 0)
447 zExp = 0;
448 return packFloat64(zSign, zExp, zSig);
449
450}
451
452static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
453{
454 int16 aExp, bExp, zExp;
455 bits32 aSig, bSig, zSig;
456 int16 expDiff;
457
458 aSig = extractFloat32Frac(a);
459 aExp = extractFloat32Exp(a);
460 bSig = extractFloat32Frac(b);
461 bExp = extractFloat32Exp(b);
462 expDiff = aExp - bExp;
463 aSig <<= 7;
464 bSig <<= 7;
465 if (0 < expDiff)
466 goto aExpBigger;
467 if (expDiff < 0)
468 goto bExpBigger;
469 if (aExp == 0) {
470 aExp = 1;
471 bExp = 1;
472 }
473 if (bSig < aSig)
474 goto aBigger;
475 if (aSig < bSig)
476 goto bBigger;
477 return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
478 bExpBigger:
479 if (bExp == 0xFF) {
480 return packFloat32(zSign ^ 1, 0xFF, 0);
481 }
482 if (aExp == 0) {
483 ++expDiff;
484 } else {
485 aSig |= 0x40000000;
486 }
487 shift32RightJamming(aSig, -expDiff, &aSig);
488 bSig |= 0x40000000;
489 bBigger:
490 zSig = bSig - aSig;
491 zExp = bExp;
492 zSign ^= 1;
493 goto normalizeRoundAndPack;
494 aExpBigger:
495 if (aExp == 0xFF) {
496 return a;
497 }
498 if (bExp == 0) {
499 --expDiff;
500 } else {
501 bSig |= 0x40000000;
502 }
503 shift32RightJamming(bSig, expDiff, &bSig);
504 aSig |= 0x40000000;
505 aBigger:
506 zSig = aSig - bSig;
507 zExp = aExp;
508 normalizeRoundAndPack:
509 --zExp;
510 return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
511
512}
513
514static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
515{
516 int16 aExp, bExp, zExp;
517 bits32 aSig, bSig, zSig;
518 int16 expDiff;
519
520 aSig = extractFloat32Frac(a);
521 aExp = extractFloat32Exp(a);
522 bSig = extractFloat32Frac(b);
523 bExp = extractFloat32Exp(b);
524 expDiff = aExp - bExp;
525 aSig <<= 6;
526 bSig <<= 6;
527 if (0 < expDiff) {
528 if (aExp == 0xFF) {
529 return a;
530 }
531 if (bExp == 0) {
532 --expDiff;
533 } else {
534 bSig |= 0x20000000;
535 }
536 shift32RightJamming(bSig, expDiff, &bSig);
537 zExp = aExp;
538 } else if (expDiff < 0) {
539 if (bExp == 0xFF) {
540 return packFloat32(zSign, 0xFF, 0);
541 }
542 if (aExp == 0) {
543 ++expDiff;
544 } else {
545 aSig |= 0x20000000;
546 }
547 shift32RightJamming(aSig, -expDiff, &aSig);
548 zExp = bExp;
549 } else {
550 if (aExp == 0xFF) {
551 return a;
552 }
553 if (aExp == 0)
554 return packFloat32(zSign, 0, (aSig + bSig) >> 6);
555 zSig = 0x40000000 + aSig + bSig;
556 zExp = aExp;
557 goto roundAndPack;
558 }
559 aSig |= 0x20000000;
560 zSig = (aSig + bSig) << 1;
561 --zExp;
562 if ((sbits32) zSig < 0) {
563 zSig = aSig + bSig;
564 ++zExp;
565 }
566 roundAndPack:
567 return roundAndPackFloat32(zSign, zExp, zSig);
568
569}
570
571float64 float64_sub(float64 a, float64 b)
572{
573 flag aSign, bSign;
574
575 aSign = extractFloat64Sign(a);
576 bSign = extractFloat64Sign(b);
577 if (aSign == bSign) {
578 return subFloat64Sigs(a, b, aSign);
579 } else {
580 return addFloat64Sigs(a, b, aSign);
581 }
582
583}
584
585float32 float32_sub(float32 a, float32 b)
586{
587 flag aSign, bSign;
588
589 aSign = extractFloat32Sign(a);
590 bSign = extractFloat32Sign(b);
591 if (aSign == bSign) {
592 return subFloat32Sigs(a, b, aSign);
593 } else {
594 return addFloat32Sigs(a, b, aSign);
595 }
596
597}
598
599float32 float32_add(float32 a, float32 b)
600{
601 flag aSign, bSign;
602
603 aSign = extractFloat32Sign(a);
604 bSign = extractFloat32Sign(b);
605 if (aSign == bSign) {
606 return addFloat32Sigs(a, b, aSign);
607 } else {
608 return subFloat32Sigs(a, b, aSign);
609 }
610
611}
612
613float64 float64_add(float64 a, float64 b)
614{
615 flag aSign, bSign;
616
617 aSign = extractFloat64Sign(a);
618 bSign = extractFloat64Sign(b);
619 if (aSign == bSign) {
620 return addFloat64Sigs(a, b, aSign);
621 } else {
622 return subFloat64Sigs(a, b, aSign);
623 }
624}
625
626static void
627normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
628{
629 int8 shiftCount;
630
631 shiftCount = countLeadingZeros64(aSig) - 11;
632 *zSigPtr = aSig << shiftCount;
633 *zExpPtr = 1 - shiftCount;
634}
635
636inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
637 bits64 * z1Ptr)
638{
639 bits64 z1;
640
641 z1 = a1 + b1;
642 *z1Ptr = z1;
643 *z0Ptr = a0 + b0 + (z1 < a1);
644}
645
646inline void
647sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
648 bits64 * z1Ptr)
649{
650 *z1Ptr = a1 - b1;
651 *z0Ptr = a0 - b0 - (a1 < b1);
652}
653
654static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
655{
656 bits64 b0, b1;
657 bits64 rem0, rem1, term0, term1;
658 bits64 z;
659 if (b <= a0)
660 return LIT64(0xFFFFFFFFFFFFFFFF);
661 b0 = b >> 32;
662 z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32;
663 mul64To128(b, z, &term0, &term1);
664 sub128(a0, a1, term0, term1, &rem0, &rem1);
665 while (((sbits64) rem0) < 0) {
666 z -= LIT64(0x100000000);
667 b1 = b << 32;
668 add128(rem0, rem1, b0, b1, &rem0, &rem1);
669 }
670 rem0 = (rem0 << 32) | (rem1 >> 32);
671 z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
672 return z;
673}
674
675inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
676{
677 bits32 aHigh, aLow, bHigh, bLow;
678 bits64 z0, zMiddleA, zMiddleB, z1;
679
680 aLow = a;
681 aHigh = a >> 32;
682 bLow = b;
683 bHigh = b >> 32;
684 z1 = ((bits64) aLow) * bLow;
685 zMiddleA = ((bits64) aLow) * bHigh;
686 zMiddleB = ((bits64) aHigh) * bLow;
687 z0 = ((bits64) aHigh) * bHigh;
688 zMiddleA += zMiddleB;
689 z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
690 zMiddleA <<= 32;
691 z1 += zMiddleA;
692 z0 += (z1 < zMiddleA);
693 *z1Ptr = z1;
694 *z0Ptr = z0;
695
696}
697
698static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
699 bits32 * zSigPtr)
700{
701 int8 shiftCount;
702
703 shiftCount = countLeadingZeros32(aSig) - 8;
704 *zSigPtr = aSig << shiftCount;
705 *zExpPtr = 1 - shiftCount;
706
707}
708
709float64 float64_div(float64 a, float64 b)
710{
711 flag aSign, bSign, zSign;
712 int16 aExp, bExp, zExp;
713 bits64 aSig, bSig, zSig;
714 bits64 rem0, rem1;
715 bits64 term0, term1;
716
717 aSig = extractFloat64Frac(a);
718 aExp = extractFloat64Exp(a);
719 aSign = extractFloat64Sign(a);
720 bSig = extractFloat64Frac(b);
721 bExp = extractFloat64Exp(b);
722 bSign = extractFloat64Sign(b);
723 zSign = aSign ^ bSign;
724 if (aExp == 0x7FF) {
725 if (bExp == 0x7FF) {
726 }
727 return packFloat64(zSign, 0x7FF, 0);
728 }
729 if (bExp == 0x7FF) {
730 return packFloat64(zSign, 0, 0);
731 }
732 if (bExp == 0) {
733 if (bSig == 0) {
734 if ((aExp | aSig) == 0) {
735 float_raise(FPSCR_CAUSE_INVALID);
736 }
737 return packFloat64(zSign, 0x7FF, 0);
738 }
739 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
740 }
741 if (aExp == 0) {
742 if (aSig == 0)
743 return packFloat64(zSign, 0, 0);
744 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
745 }
746 zExp = aExp - bExp + 0x3FD;
747 aSig = (aSig | LIT64(0x0010000000000000)) << 10;
748 bSig = (bSig | LIT64(0x0010000000000000)) << 11;
749 if (bSig <= (aSig + aSig)) {
750 aSig >>= 1;
751 ++zExp;
752 }
753 zSig = estimateDiv128To64(aSig, 0, bSig);
754 if ((zSig & 0x1FF) <= 2) {
755 mul64To128(bSig, zSig, &term0, &term1);
756 sub128(aSig, 0, term0, term1, &rem0, &rem1);
757 while ((sbits64) rem0 < 0) {
758 --zSig;
759 add128(rem0, rem1, 0, bSig, &rem0, &rem1);
760 }
761 zSig |= (rem1 != 0);
762 }
763 return roundAndPackFloat64(zSign, zExp, zSig);
764
765}
766
767float32 float32_div(float32 a, float32 b)
768{
769 flag aSign, bSign, zSign;
770 int16 aExp, bExp, zExp;
771 bits32 aSig, bSig, zSig;
772
773 aSig = extractFloat32Frac(a);
774 aExp = extractFloat32Exp(a);
775 aSign = extractFloat32Sign(a);
776 bSig = extractFloat32Frac(b);
777 bExp = extractFloat32Exp(b);
778 bSign = extractFloat32Sign(b);
779 zSign = aSign ^ bSign;
780 if (aExp == 0xFF) {
781 if (bExp == 0xFF) {
782 }
783 return packFloat32(zSign, 0xFF, 0);
784 }
785 if (bExp == 0xFF) {
786 return packFloat32(zSign, 0, 0);
787 }
788 if (bExp == 0) {
789 if (bSig == 0) {
790 return packFloat32(zSign, 0xFF, 0);
791 }
792 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
793 }
794 if (aExp == 0) {
795 if (aSig == 0)
796 return packFloat32(zSign, 0, 0);
797 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
798 }
799 zExp = aExp - bExp + 0x7D;
800 aSig = (aSig | 0x00800000) << 7;
801 bSig = (bSig | 0x00800000) << 8;
802 if (bSig <= (aSig + aSig)) {
803 aSig >>= 1;
804 ++zExp;
805 }
806 zSig = (((bits64) aSig) << 32) / bSig;
807 if ((zSig & 0x3F) == 0) {
808 zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
809 }
810 return roundAndPackFloat32(zSign, zExp, zSig);
811
812}
813
814float32 float32_mul(float32 a, float32 b)
815{
816 char aSign, bSign, zSign;
817 int aExp, bExp, zExp;
818 unsigned int aSig, bSig;
819 unsigned long long zSig64;
820 unsigned int zSig;
821
822 aSig = extractFloat32Frac(a);
823 aExp = extractFloat32Exp(a);
824 aSign = extractFloat32Sign(a);
825 bSig = extractFloat32Frac(b);
826 bExp = extractFloat32Exp(b);
827 bSign = extractFloat32Sign(b);
828 zSign = aSign ^ bSign;
829 if (aExp == 0) {
830 if (aSig == 0)
831 return packFloat32(zSign, 0, 0);
832 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
833 }
834 if (bExp == 0) {
835 if (bSig == 0)
836 return packFloat32(zSign, 0, 0);
837 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
838 }
839 if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
840 return roundAndPackFloat32(zSign, 0xff, 0);
841
842 zExp = aExp + bExp - 0x7F;
843 aSig = (aSig | 0x00800000) << 7;
844 bSig = (bSig | 0x00800000) << 8;
845 shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
846 zSig = zSig64;
847 if (0 <= (signed int)(zSig << 1)) {
848 zSig <<= 1;
849 --zExp;
850 }
851 return roundAndPackFloat32(zSign, zExp, zSig);
852
853}
854
855float64 float64_mul(float64 a, float64 b)
856{
857 char aSign, bSign, zSign;
858 int aExp, bExp, zExp;
859 unsigned long long int aSig, bSig, zSig0, zSig1;
860
861 aSig = extractFloat64Frac(a);
862 aExp = extractFloat64Exp(a);
863 aSign = extractFloat64Sign(a);
864 bSig = extractFloat64Frac(b);
865 bExp = extractFloat64Exp(b);
866 bSign = extractFloat64Sign(b);
867 zSign = aSign ^ bSign;
868
869 if (aExp == 0) {
870 if (aSig == 0)
871 return packFloat64(zSign, 0, 0);
872 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
873 }
874 if (bExp == 0) {
875 if (bSig == 0)
876 return packFloat64(zSign, 0, 0);
877 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
878 }
879 if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
880 return roundAndPackFloat64(zSign, 0x7ff, 0);
881
882 zExp = aExp + bExp - 0x3FF;
883 aSig = (aSig | 0x0010000000000000LL) << 10;
884 bSig = (bSig | 0x0010000000000000LL) << 11;
885 mul64To128(aSig, bSig, &zSig0, &zSig1);
886 zSig0 |= (zSig1 != 0);
887 if (0 <= (signed long long int)(zSig0 << 1)) {
888 zSig0 <<= 1;
889 --zExp;
890 }
891 return roundAndPackFloat64(zSign, zExp, zSig0);
892}