diff options
author | Stuart Menefy <stuart.menefy@st.com> | 2007-11-30 04:42:27 -0500 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2008-01-27 23:18:59 -0500 |
commit | c8c0a1aba9fa8f816dc8fb477ff816a5b700f0ea (patch) | |
tree | 54329f0b6497be088fc573c67e5541863041fdde /arch/sh | |
parent | 453ec9c1c3808b051347edbbf637f997add7b85b (diff) |
sh: Support denormalization on SH-4 FPU.
Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh')
-rw-r--r-- | arch/sh/kernel/cpu/sh4/Makefile | 2 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/fpu.c | 514 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/softfloat.c | 892 |
3 files changed, 1227 insertions, 181 deletions
diff --git a/arch/sh/kernel/cpu/sh4/Makefile b/arch/sh/kernel/cpu/sh4/Makefile index dadd6bffc128..d608557c7a3f 100644 --- a/arch/sh/kernel/cpu/sh4/Makefile +++ b/arch/sh/kernel/cpu/sh4/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | obj-y := probe.o common.o | 5 | obj-y := probe.o common.o |
6 | common-y += $(addprefix ../sh3/, entry.o ex.o) | 6 | common-y += $(addprefix ../sh3/, entry.o ex.o) |
7 | 7 | ||
8 | obj-$(CONFIG_SH_FPU) += fpu.o | 8 | obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o |
9 | obj-$(CONFIG_SH_STORE_QUEUES) += sq.o | 9 | obj-$(CONFIG_SH_STORE_QUEUES) += sq.o |
10 | 10 | ||
11 | # CPU subtype setup | 11 | # CPU subtype setup |
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e624180b4467..817f9939cda6 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c | |||
@@ -1,7 +1,4 @@ | |||
1 | /* $Id: fpu.c,v 1.4 2004/01/13 05:52:11 kkojima Exp $ | 1 | /* |
2 | * | ||
3 | * linux/arch/sh/kernel/fpu.c | ||
4 | * | ||
5 | * Save/restore floating point context for signal handlers. | 2 | * Save/restore floating point context for signal handlers. |
6 | * | 3 | * |
7 | * This file is subject to the terms and conditions of the GNU General Public | 4 | * This file is subject to the terms and conditions of the GNU General Public |
@@ -9,15 +6,16 @@ | |||
9 | * for more details. | 6 | * for more details. |
10 | * | 7 | * |
11 | * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka | 8 | * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka |
9 | * Copyright (C) 2006 ST Microelectronics Ltd. (denorm support) | ||
12 | * | 10 | * |
13 | * FIXME! These routines can be optimized in big endian case. | 11 | * FIXME! These routines have not been tested for big endian case. |
14 | */ | 12 | */ |
15 | |||
16 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
17 | #include <linux/signal.h> | 14 | #include <linux/signal.h> |
15 | #include <linux/io.h> | ||
16 | #include <asm/cpu/fpu.h> | ||
18 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
19 | #include <asm/system.h> | 18 | #include <asm/system.h> |
20 | #include <asm/io.h> | ||
21 | 19 | ||
22 | /* The PR (precision) bit in the FP Status Register must be clear when | 20 | /* The PR (precision) bit in the FP Status Register must be clear when |
23 | * an frchg instruction is executed, otherwise the instruction is undefined. | 21 | * an frchg instruction is executed, otherwise the instruction is undefined. |
@@ -25,113 +23,122 @@ | |||
25 | */ | 23 | */ |
26 | 24 | ||
27 | #define FPSCR_RCHG 0x00000000 | 25 | #define FPSCR_RCHG 0x00000000 |
26 | extern unsigned long long float64_div(unsigned long long a, | ||
27 | unsigned long long b); | ||
28 | extern unsigned long int float32_div(unsigned long int a, unsigned long int b); | ||
29 | extern unsigned long long float64_mul(unsigned long long a, | ||
30 | unsigned long long b); | ||
31 | extern unsigned long int float32_mul(unsigned long int a, unsigned long int b); | ||
32 | extern unsigned long long float64_add(unsigned long long a, | ||
33 | unsigned long long b); | ||
34 | extern unsigned long int float32_add(unsigned long int a, unsigned long int b); | ||
35 | extern unsigned long long float64_sub(unsigned long long a, | ||
36 | unsigned long long b); | ||
37 | extern unsigned long int float32_sub(unsigned long int a, unsigned long int b); | ||
28 | 38 | ||
39 | static unsigned int fpu_exception_flags; | ||
29 | 40 | ||
30 | /* | 41 | /* |
31 | * Save FPU registers onto task structure. | 42 | * Save FPU registers onto task structure. |
32 | * Assume called with FPU enabled (SR.FD=0). | 43 | * Assume called with FPU enabled (SR.FD=0). |
33 | */ | 44 | */ |
34 | void | 45 | void save_fpu(struct task_struct *tsk, struct pt_regs *regs) |
35 | save_fpu(struct task_struct *tsk, struct pt_regs *regs) | ||
36 | { | 46 | { |
37 | unsigned long dummy; | 47 | unsigned long dummy; |
38 | 48 | ||
39 | clear_tsk_thread_flag(tsk, TIF_USEDFPU); | 49 | clear_tsk_thread_flag(tsk, TIF_USEDFPU); |
40 | enable_fpu(); | 50 | enable_fpu(); |
41 | asm volatile("sts.l fpul, @-%0\n\t" | 51 | asm volatile ("sts.l fpul, @-%0\n\t" |
42 | "sts.l fpscr, @-%0\n\t" | 52 | "sts.l fpscr, @-%0\n\t" |
43 | "lds %2, fpscr\n\t" | 53 | "lds %2, fpscr\n\t" |
44 | "frchg\n\t" | 54 | "frchg\n\t" |
45 | "fmov.s fr15, @-%0\n\t" | 55 | "fmov.s fr15, @-%0\n\t" |
46 | "fmov.s fr14, @-%0\n\t" | 56 | "fmov.s fr14, @-%0\n\t" |
47 | "fmov.s fr13, @-%0\n\t" | 57 | "fmov.s fr13, @-%0\n\t" |
48 | "fmov.s fr12, @-%0\n\t" | 58 | "fmov.s fr12, @-%0\n\t" |
49 | "fmov.s fr11, @-%0\n\t" | 59 | "fmov.s fr11, @-%0\n\t" |
50 | "fmov.s fr10, @-%0\n\t" | 60 | "fmov.s fr10, @-%0\n\t" |
51 | "fmov.s fr9, @-%0\n\t" | 61 | "fmov.s fr9, @-%0\n\t" |
52 | "fmov.s fr8, @-%0\n\t" | 62 | "fmov.s fr8, @-%0\n\t" |
53 | "fmov.s fr7, @-%0\n\t" | 63 | "fmov.s fr7, @-%0\n\t" |
54 | "fmov.s fr6, @-%0\n\t" | 64 | "fmov.s fr6, @-%0\n\t" |
55 | "fmov.s fr5, @-%0\n\t" | 65 | "fmov.s fr5, @-%0\n\t" |
56 | "fmov.s fr4, @-%0\n\t" | 66 | "fmov.s fr4, @-%0\n\t" |
57 | "fmov.s fr3, @-%0\n\t" | 67 | "fmov.s fr3, @-%0\n\t" |
58 | "fmov.s fr2, @-%0\n\t" | 68 | "fmov.s fr2, @-%0\n\t" |
59 | "fmov.s fr1, @-%0\n\t" | 69 | "fmov.s fr1, @-%0\n\t" |
60 | "fmov.s fr0, @-%0\n\t" | 70 | "fmov.s fr0, @-%0\n\t" |
61 | "frchg\n\t" | 71 | "frchg\n\t" |
62 | "fmov.s fr15, @-%0\n\t" | 72 | "fmov.s fr15, @-%0\n\t" |
63 | "fmov.s fr14, @-%0\n\t" | 73 | "fmov.s fr14, @-%0\n\t" |
64 | "fmov.s fr13, @-%0\n\t" | 74 | "fmov.s fr13, @-%0\n\t" |
65 | "fmov.s fr12, @-%0\n\t" | 75 | "fmov.s fr12, @-%0\n\t" |
66 | "fmov.s fr11, @-%0\n\t" | 76 | "fmov.s fr11, @-%0\n\t" |
67 | "fmov.s fr10, @-%0\n\t" | 77 | "fmov.s fr10, @-%0\n\t" |
68 | "fmov.s fr9, @-%0\n\t" | 78 | "fmov.s fr9, @-%0\n\t" |
69 | "fmov.s fr8, @-%0\n\t" | 79 | "fmov.s fr8, @-%0\n\t" |
70 | "fmov.s fr7, @-%0\n\t" | 80 | "fmov.s fr7, @-%0\n\t" |
71 | "fmov.s fr6, @-%0\n\t" | 81 | "fmov.s fr6, @-%0\n\t" |
72 | "fmov.s fr5, @-%0\n\t" | 82 | "fmov.s fr5, @-%0\n\t" |
73 | "fmov.s fr4, @-%0\n\t" | 83 | "fmov.s fr4, @-%0\n\t" |
74 | "fmov.s fr3, @-%0\n\t" | 84 | "fmov.s fr3, @-%0\n\t" |
75 | "fmov.s fr2, @-%0\n\t" | 85 | "fmov.s fr2, @-%0\n\t" |
76 | "fmov.s fr1, @-%0\n\t" | 86 | "fmov.s fr1, @-%0\n\t" |
77 | "fmov.s fr0, @-%0\n\t" | 87 | "fmov.s fr0, @-%0\n\t" |
78 | "lds %3, fpscr\n\t" | 88 | "lds %3, fpscr\n\t":"=r" (dummy) |
79 | : "=r" (dummy) | 89 | :"0"((char *)(&tsk->thread.fpu.hard.status)), |
80 | : "0" ((char *)(&tsk->thread.fpu.hard.status)), | 90 | "r"(FPSCR_RCHG), "r"(FPSCR_INIT) |
81 | "r" (FPSCR_RCHG), | 91 | :"memory"); |
82 | "r" (FPSCR_INIT) | ||
83 | : "memory"); | ||
84 | 92 | ||
85 | disable_fpu(); | 93 | disable_fpu(); |
86 | release_fpu(regs); | 94 | release_fpu(regs); |
87 | } | 95 | } |
88 | 96 | ||
89 | static void | 97 | static void restore_fpu(struct task_struct *tsk) |
90 | restore_fpu(struct task_struct *tsk) | ||
91 | { | 98 | { |
92 | unsigned long dummy; | 99 | unsigned long dummy; |
93 | 100 | ||
94 | enable_fpu(); | 101 | enable_fpu(); |
95 | asm volatile("lds %2, fpscr\n\t" | 102 | asm volatile ("lds %2, fpscr\n\t" |
96 | "fmov.s @%0+, fr0\n\t" | 103 | "fmov.s @%0+, fr0\n\t" |
97 | "fmov.s @%0+, fr1\n\t" | 104 | "fmov.s @%0+, fr1\n\t" |
98 | "fmov.s @%0+, fr2\n\t" | 105 | "fmov.s @%0+, fr2\n\t" |
99 | "fmov.s @%0+, fr3\n\t" | 106 | "fmov.s @%0+, fr3\n\t" |
100 | "fmov.s @%0+, fr4\n\t" | 107 | "fmov.s @%0+, fr4\n\t" |
101 | "fmov.s @%0+, fr5\n\t" | 108 | "fmov.s @%0+, fr5\n\t" |
102 | "fmov.s @%0+, fr6\n\t" | 109 | "fmov.s @%0+, fr6\n\t" |
103 | "fmov.s @%0+, fr7\n\t" | 110 | "fmov.s @%0+, fr7\n\t" |
104 | "fmov.s @%0+, fr8\n\t" | 111 | "fmov.s @%0+, fr8\n\t" |
105 | "fmov.s @%0+, fr9\n\t" | 112 | "fmov.s @%0+, fr9\n\t" |
106 | "fmov.s @%0+, fr10\n\t" | 113 | "fmov.s @%0+, fr10\n\t" |
107 | "fmov.s @%0+, fr11\n\t" | 114 | "fmov.s @%0+, fr11\n\t" |
108 | "fmov.s @%0+, fr12\n\t" | 115 | "fmov.s @%0+, fr12\n\t" |
109 | "fmov.s @%0+, fr13\n\t" | 116 | "fmov.s @%0+, fr13\n\t" |
110 | "fmov.s @%0+, fr14\n\t" | 117 | "fmov.s @%0+, fr14\n\t" |
111 | "fmov.s @%0+, fr15\n\t" | 118 | "fmov.s @%0+, fr15\n\t" |
112 | "frchg\n\t" | 119 | "frchg\n\t" |
113 | "fmov.s @%0+, fr0\n\t" | 120 | "fmov.s @%0+, fr0\n\t" |
114 | "fmov.s @%0+, fr1\n\t" | 121 | "fmov.s @%0+, fr1\n\t" |
115 | "fmov.s @%0+, fr2\n\t" | 122 | "fmov.s @%0+, fr2\n\t" |
116 | "fmov.s @%0+, fr3\n\t" | 123 | "fmov.s @%0+, fr3\n\t" |
117 | "fmov.s @%0+, fr4\n\t" | 124 | "fmov.s @%0+, fr4\n\t" |
118 | "fmov.s @%0+, fr5\n\t" | 125 | "fmov.s @%0+, fr5\n\t" |
119 | "fmov.s @%0+, fr6\n\t" | 126 | "fmov.s @%0+, fr6\n\t" |
120 | "fmov.s @%0+, fr7\n\t" | 127 | "fmov.s @%0+, fr7\n\t" |
121 | "fmov.s @%0+, fr8\n\t" | 128 | "fmov.s @%0+, fr8\n\t" |
122 | "fmov.s @%0+, fr9\n\t" | 129 | "fmov.s @%0+, fr9\n\t" |
123 | "fmov.s @%0+, fr10\n\t" | 130 | "fmov.s @%0+, fr10\n\t" |
124 | "fmov.s @%0+, fr11\n\t" | 131 | "fmov.s @%0+, fr11\n\t" |
125 | "fmov.s @%0+, fr12\n\t" | 132 | "fmov.s @%0+, fr12\n\t" |
126 | "fmov.s @%0+, fr13\n\t" | 133 | "fmov.s @%0+, fr13\n\t" |
127 | "fmov.s @%0+, fr14\n\t" | 134 | "fmov.s @%0+, fr14\n\t" |
128 | "fmov.s @%0+, fr15\n\t" | 135 | "fmov.s @%0+, fr15\n\t" |
129 | "frchg\n\t" | 136 | "frchg\n\t" |
130 | "lds.l @%0+, fpscr\n\t" | 137 | "lds.l @%0+, fpscr\n\t" |
131 | "lds.l @%0+, fpul\n\t" | 138 | "lds.l @%0+, fpul\n\t" |
132 | : "=r" (dummy) | 139 | :"=r" (dummy) |
133 | : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG) | 140 | :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG) |
134 | : "memory"); | 141 | :"memory"); |
135 | disable_fpu(); | 142 | disable_fpu(); |
136 | } | 143 | } |
137 | 144 | ||
@@ -141,61 +148,59 @@ restore_fpu(struct task_struct *tsk) | |||
141 | * double precision represents signaling NANS. | 148 | * double precision represents signaling NANS. |
142 | */ | 149 | */ |
143 | 150 | ||
144 | static void | 151 | static void fpu_init(void) |
145 | fpu_init(void) | ||
146 | { | 152 | { |
147 | enable_fpu(); | 153 | enable_fpu(); |
148 | asm volatile("lds %0, fpul\n\t" | 154 | asm volatile ( "lds %0, fpul\n\t" |
149 | "lds %1, fpscr\n\t" | 155 | "lds %1, fpscr\n\t" |
150 | "fsts fpul, fr0\n\t" | 156 | "fsts fpul, fr0\n\t" |
151 | "fsts fpul, fr1\n\t" | 157 | "fsts fpul, fr1\n\t" |
152 | "fsts fpul, fr2\n\t" | 158 | "fsts fpul, fr2\n\t" |
153 | "fsts fpul, fr3\n\t" | 159 | "fsts fpul, fr3\n\t" |
154 | "fsts fpul, fr4\n\t" | 160 | "fsts fpul, fr4\n\t" |
155 | "fsts fpul, fr5\n\t" | 161 | "fsts fpul, fr5\n\t" |
156 | "fsts fpul, fr6\n\t" | 162 | "fsts fpul, fr6\n\t" |
157 | "fsts fpul, fr7\n\t" | 163 | "fsts fpul, fr7\n\t" |
158 | "fsts fpul, fr8\n\t" | 164 | "fsts fpul, fr8\n\t" |
159 | "fsts fpul, fr9\n\t" | 165 | "fsts fpul, fr9\n\t" |
160 | "fsts fpul, fr10\n\t" | 166 | "fsts fpul, fr10\n\t" |
161 | "fsts fpul, fr11\n\t" | 167 | "fsts fpul, fr11\n\t" |
162 | "fsts fpul, fr12\n\t" | 168 | "fsts fpul, fr12\n\t" |
163 | "fsts fpul, fr13\n\t" | 169 | "fsts fpul, fr13\n\t" |
164 | "fsts fpul, fr14\n\t" | 170 | "fsts fpul, fr14\n\t" |
165 | "fsts fpul, fr15\n\t" | 171 | "fsts fpul, fr15\n\t" |
166 | "frchg\n\t" | 172 | "frchg\n\t" |
167 | "fsts fpul, fr0\n\t" | 173 | "fsts fpul, fr0\n\t" |
168 | "fsts fpul, fr1\n\t" | 174 | "fsts fpul, fr1\n\t" |
169 | "fsts fpul, fr2\n\t" | 175 | "fsts fpul, fr2\n\t" |
170 | "fsts fpul, fr3\n\t" | 176 | "fsts fpul, fr3\n\t" |
171 | "fsts fpul, fr4\n\t" | 177 | "fsts fpul, fr4\n\t" |
172 | "fsts fpul, fr5\n\t" | 178 | "fsts fpul, fr5\n\t" |
173 | "fsts fpul, fr6\n\t" | 179 | "fsts fpul, fr6\n\t" |
174 | "fsts fpul, fr7\n\t" | 180 | "fsts fpul, fr7\n\t" |
175 | "fsts fpul, fr8\n\t" | 181 | "fsts fpul, fr8\n\t" |
176 | "fsts fpul, fr9\n\t" | 182 | "fsts fpul, fr9\n\t" |
177 | "fsts fpul, fr10\n\t" | 183 | "fsts fpul, fr10\n\t" |
178 | "fsts fpul, fr11\n\t" | 184 | "fsts fpul, fr11\n\t" |
179 | "fsts fpul, fr12\n\t" | 185 | "fsts fpul, fr12\n\t" |
180 | "fsts fpul, fr13\n\t" | 186 | "fsts fpul, fr13\n\t" |
181 | "fsts fpul, fr14\n\t" | 187 | "fsts fpul, fr14\n\t" |
182 | "fsts fpul, fr15\n\t" | 188 | "fsts fpul, fr15\n\t" |
183 | "frchg\n\t" | 189 | "frchg\n\t" |
184 | "lds %2, fpscr\n\t" | 190 | "lds %2, fpscr\n\t" |
185 | : /* no output */ | 191 | : /* no output */ |
186 | : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); | 192 | :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT)); |
187 | disable_fpu(); | 193 | disable_fpu(); |
188 | } | 194 | } |
189 | 195 | ||
190 | /** | 196 | /** |
191 | * denormal_to_double - Given denormalized float number, | 197 | * denormal_to_double - Given denormalized float number, |
192 | * store double float | 198 | * store double float |
193 | * | 199 | * |
194 | * @fpu: Pointer to sh_fpu_hard structure | 200 | * @fpu: Pointer to sh_fpu_hard structure |
195 | * @n: Index to FP register | 201 | * @n: Index to FP register |
196 | */ | 202 | */ |
197 | static void | 203 | static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n) |
198 | denormal_to_double (struct sh_fpu_hard_struct *fpu, int n) | ||
199 | { | 204 | { |
200 | unsigned long du, dl; | 205 | unsigned long du, dl; |
201 | unsigned long x = fpu->fpul; | 206 | unsigned long x = fpu->fpul; |
@@ -212,7 +217,7 @@ denormal_to_double (struct sh_fpu_hard_struct *fpu, int n) | |||
212 | dl = x << 29; | 217 | dl = x << 29; |
213 | 218 | ||
214 | fpu->fp_regs[n] = du; | 219 | fpu->fp_regs[n] = du; |
215 | fpu->fp_regs[n+1] = dl; | 220 | fpu->fp_regs[n + 1] = dl; |
216 | } | 221 | } |
217 | } | 222 | } |
218 | 223 | ||
@@ -223,67 +228,191 @@ denormal_to_double (struct sh_fpu_hard_struct *fpu, int n) | |||
223 | * | 228 | * |
224 | * Returns 1 when it's handled (should not cause exception). | 229 | * Returns 1 when it's handled (should not cause exception). |
225 | */ | 230 | */ |
226 | static int | 231 | static int ieee_fpe_handler(struct pt_regs *regs) |
227 | ieee_fpe_handler (struct pt_regs *regs) | ||
228 | { | 232 | { |
229 | unsigned short insn = *(unsigned short *) regs->pc; | 233 | unsigned short insn = *(unsigned short *)regs->pc; |
230 | unsigned short finsn; | 234 | unsigned short finsn; |
231 | unsigned long nextpc; | 235 | unsigned long nextpc; |
232 | int nib[4] = { | 236 | int nib[4] = { |
233 | (insn >> 12) & 0xf, | 237 | (insn >> 12) & 0xf, |
234 | (insn >> 8) & 0xf, | 238 | (insn >> 8) & 0xf, |
235 | (insn >> 4) & 0xf, | 239 | (insn >> 4) & 0xf, |
236 | insn & 0xf}; | 240 | insn & 0xf |
237 | 241 | }; | |
238 | if (nib[0] == 0xb || | 242 | |
239 | (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ | 243 | if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) |
240 | regs->pr = regs->pc + 4; | 244 | regs->pr = regs->pc + 4; /* bsr & jsr */ |
241 | if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ | 245 | |
242 | nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); | 246 | if (nib[0] == 0xa || nib[0] == 0xb) { |
243 | finsn = *(unsigned short *) (regs->pc + 2); | 247 | /* bra & bsr */ |
244 | } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ | 248 | nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3); |
249 | finsn = *(unsigned short *)(regs->pc + 2); | ||
250 | } else if (nib[0] == 0x8 && nib[1] == 0xd) { | ||
251 | /* bt/s */ | ||
245 | if (regs->sr & 1) | 252 | if (regs->sr & 1) |
246 | nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); | 253 | nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1); |
247 | else | 254 | else |
248 | nextpc = regs->pc + 4; | 255 | nextpc = regs->pc + 4; |
249 | finsn = *(unsigned short *) (regs->pc + 2); | 256 | finsn = *(unsigned short *)(regs->pc + 2); |
250 | } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ | 257 | } else if (nib[0] == 0x8 && nib[1] == 0xf) { |
258 | /* bf/s */ | ||
251 | if (regs->sr & 1) | 259 | if (regs->sr & 1) |
252 | nextpc = regs->pc + 4; | 260 | nextpc = regs->pc + 4; |
253 | else | 261 | else |
254 | nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); | 262 | nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1); |
255 | finsn = *(unsigned short *) (regs->pc + 2); | 263 | finsn = *(unsigned short *)(regs->pc + 2); |
256 | } else if (nib[0] == 0x4 && nib[3] == 0xb && | 264 | } else if (nib[0] == 0x4 && nib[3] == 0xb && |
257 | (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ | 265 | (nib[2] == 0x0 || nib[2] == 0x2)) { |
266 | /* jmp & jsr */ | ||
258 | nextpc = regs->regs[nib[1]]; | 267 | nextpc = regs->regs[nib[1]]; |
259 | finsn = *(unsigned short *) (regs->pc + 2); | 268 | finsn = *(unsigned short *)(regs->pc + 2); |
260 | } else if (nib[0] == 0x0 && nib[3] == 0x3 && | 269 | } else if (nib[0] == 0x0 && nib[3] == 0x3 && |
261 | (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ | 270 | (nib[2] == 0x0 || nib[2] == 0x2)) { |
271 | /* braf & bsrf */ | ||
262 | nextpc = regs->pc + 4 + regs->regs[nib[1]]; | 272 | nextpc = regs->pc + 4 + regs->regs[nib[1]]; |
263 | finsn = *(unsigned short *) (regs->pc + 2); | 273 | finsn = *(unsigned short *)(regs->pc + 2); |
264 | } else if (insn == 0x000b) { /* rts */ | 274 | } else if (insn == 0x000b) { |
275 | /* rts */ | ||
265 | nextpc = regs->pr; | 276 | nextpc = regs->pr; |
266 | finsn = *(unsigned short *) (regs->pc + 2); | 277 | finsn = *(unsigned short *)(regs->pc + 2); |
267 | } else { | 278 | } else { |
268 | nextpc = regs->pc + instruction_size(insn); | 279 | nextpc = regs->pc + instruction_size(insn); |
269 | finsn = insn; | 280 | finsn = insn; |
270 | } | 281 | } |
271 | 282 | ||
272 | if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ | 283 | if ((finsn & 0xf1ff) == 0xf0ad) { |
284 | /* fcnvsd */ | ||
273 | struct task_struct *tsk = current; | 285 | struct task_struct *tsk = current; |
274 | 286 | ||
275 | save_fpu(tsk, regs); | 287 | save_fpu(tsk, regs); |
276 | if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) { | 288 | if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) |
277 | /* FPU error */ | 289 | /* FPU error */ |
278 | denormal_to_double (&tsk->thread.fpu.hard, | 290 | denormal_to_double(&tsk->thread.fpu.hard, |
279 | (finsn >> 8) & 0xf); | 291 | (finsn >> 8) & 0xf); |
280 | tsk->thread.fpu.hard.fpscr &= | 292 | else |
281 | ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); | 293 | return 0; |
282 | grab_fpu(regs); | 294 | |
283 | restore_fpu(tsk); | 295 | regs->pc = nextpc; |
284 | set_tsk_thread_flag(tsk, TIF_USEDFPU); | 296 | return 1; |
297 | } else if ((finsn & 0xf00f) == 0xf002) { | ||
298 | /* fmul */ | ||
299 | struct task_struct *tsk = current; | ||
300 | int fpscr; | ||
301 | int n, m, prec; | ||
302 | unsigned int hx, hy; | ||
303 | |||
304 | n = (finsn >> 8) & 0xf; | ||
305 | m = (finsn >> 4) & 0xf; | ||
306 | hx = tsk->thread.fpu.hard.fp_regs[n]; | ||
307 | hy = tsk->thread.fpu.hard.fp_regs[m]; | ||
308 | fpscr = tsk->thread.fpu.hard.fpscr; | ||
309 | prec = fpscr & FPSCR_DBL_PRECISION; | ||
310 | |||
311 | if ((fpscr & FPSCR_CAUSE_ERROR) | ||
312 | && (prec && ((hx & 0x7fffffff) < 0x00100000 | ||
313 | || (hy & 0x7fffffff) < 0x00100000))) { | ||
314 | long long llx, lly; | ||
315 | |||
316 | /* FPU error because of denormal (doubles) */ | ||
317 | llx = ((long long)hx << 32) | ||
318 | | tsk->thread.fpu.hard.fp_regs[n + 1]; | ||
319 | lly = ((long long)hy << 32) | ||
320 | | tsk->thread.fpu.hard.fp_regs[m + 1]; | ||
321 | llx = float64_mul(llx, lly); | ||
322 | tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; | ||
323 | tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; | ||
324 | } else if ((fpscr & FPSCR_CAUSE_ERROR) | ||
325 | && (!prec && ((hx & 0x7fffffff) < 0x00800000 | ||
326 | || (hy & 0x7fffffff) < 0x00800000))) { | ||
327 | /* FPU error because of denormal (floats) */ | ||
328 | hx = float32_mul(hx, hy); | ||
329 | tsk->thread.fpu.hard.fp_regs[n] = hx; | ||
330 | } else | ||
331 | return 0; | ||
332 | |||
333 | regs->pc = nextpc; | ||
334 | return 1; | ||
335 | } else if ((finsn & 0xf00e) == 0xf000) { | ||
336 | /* fadd, fsub */ | ||
337 | struct task_struct *tsk = current; | ||
338 | int fpscr; | ||
339 | int n, m, prec; | ||
340 | unsigned int hx, hy; | ||
341 | |||
342 | n = (finsn >> 8) & 0xf; | ||
343 | m = (finsn >> 4) & 0xf; | ||
344 | hx = tsk->thread.fpu.hard.fp_regs[n]; | ||
345 | hy = tsk->thread.fpu.hard.fp_regs[m]; | ||
346 | fpscr = tsk->thread.fpu.hard.fpscr; | ||
347 | prec = fpscr & FPSCR_DBL_PRECISION; | ||
348 | |||
349 | if ((fpscr & FPSCR_CAUSE_ERROR) | ||
350 | && (prec && ((hx & 0x7fffffff) < 0x00100000 | ||
351 | || (hy & 0x7fffffff) < 0x00100000))) { | ||
352 | long long llx, lly; | ||
353 | |||
354 | /* FPU error because of denormal (doubles) */ | ||
355 | llx = ((long long)hx << 32) | ||
356 | | tsk->thread.fpu.hard.fp_regs[n + 1]; | ||
357 | lly = ((long long)hy << 32) | ||
358 | | tsk->thread.fpu.hard.fp_regs[m + 1]; | ||
359 | if ((finsn & 0xf00f) == 0xf000) | ||
360 | llx = float64_add(llx, lly); | ||
361 | else | ||
362 | llx = float64_sub(llx, lly); | ||
363 | tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; | ||
364 | tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; | ||
365 | } else if ((fpscr & FPSCR_CAUSE_ERROR) | ||
366 | && (!prec && ((hx & 0x7fffffff) < 0x00800000 | ||
367 | || (hy & 0x7fffffff) < 0x00800000))) { | ||
368 | /* FPU error because of denormal (floats) */ | ||
369 | if ((finsn & 0xf00f) == 0xf000) | ||
370 | hx = float32_add(hx, hy); | ||
371 | else | ||
372 | hx = float32_sub(hx, hy); | ||
373 | tsk->thread.fpu.hard.fp_regs[n] = hx; | ||
374 | } else | ||
375 | return 0; | ||
376 | |||
377 | regs->pc = nextpc; | ||
378 | return 1; | ||
379 | } else if ((finsn & 0xf003) == 0xf003) { | ||
380 | /* fdiv */ | ||
381 | struct task_struct *tsk = current; | ||
382 | int fpscr; | ||
383 | int n, m, prec; | ||
384 | unsigned int hx, hy; | ||
385 | |||
386 | n = (finsn >> 8) & 0xf; | ||
387 | m = (finsn >> 4) & 0xf; | ||
388 | hx = tsk->thread.fpu.hard.fp_regs[n]; | ||
389 | hy = tsk->thread.fpu.hard.fp_regs[m]; | ||
390 | fpscr = tsk->thread.fpu.hard.fpscr; | ||
391 | prec = fpscr & FPSCR_DBL_PRECISION; | ||
392 | |||
393 | if ((fpscr & FPSCR_CAUSE_ERROR) | ||
394 | && (prec && ((hx & 0x7fffffff) < 0x00100000 | ||
395 | || (hy & 0x7fffffff) < 0x00100000))) { | ||
396 | long long llx, lly; | ||
397 | |||
398 | /* FPU error because of denormal (doubles) */ | ||
399 | llx = ((long long)hx << 32) | ||
400 | | tsk->thread.fpu.hard.fp_regs[n + 1]; | ||
401 | lly = ((long long)hy << 32) | ||
402 | | tsk->thread.fpu.hard.fp_regs[m + 1]; | ||
403 | |||
404 | llx = float64_div(llx, lly); | ||
405 | |||
406 | tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; | ||
407 | tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; | ||
408 | } else if ((fpscr & FPSCR_CAUSE_ERROR) | ||
409 | && (!prec && ((hx & 0x7fffffff) < 0x00800000 | ||
410 | || (hy & 0x7fffffff) < 0x00800000))) { | ||
411 | /* FPU error because of denormal (floats) */ | ||
412 | hx = float32_div(hx, hy); | ||
413 | tsk->thread.fpu.hard.fp_regs[n] = hx; | ||
285 | } else | 414 | } else |
286 | force_sig(SIGFPE, tsk); | 415 | return 0; |
287 | 416 | ||
288 | regs->pc = nextpc; | 417 | regs->pc = nextpc; |
289 | return 1; | 418 | return 1; |
@@ -292,16 +421,41 @@ ieee_fpe_handler (struct pt_regs *regs) | |||
292 | return 0; | 421 | return 0; |
293 | } | 422 | } |
294 | 423 | ||
424 | void float_raise(unsigned int flags) | ||
425 | { | ||
426 | fpu_exception_flags |= flags; | ||
427 | } | ||
428 | |||
429 | int float_rounding_mode(void) | ||
430 | { | ||
431 | struct task_struct *tsk = current; | ||
432 | int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr); | ||
433 | return roundingMode; | ||
434 | } | ||
435 | |||
295 | BUILD_TRAP_HANDLER(fpu_error) | 436 | BUILD_TRAP_HANDLER(fpu_error) |
296 | { | 437 | { |
297 | struct task_struct *tsk = current; | 438 | struct task_struct *tsk = current; |
298 | TRAP_HANDLER_DECL; | 439 | TRAP_HANDLER_DECL; |
299 | 440 | ||
300 | if (ieee_fpe_handler(regs)) | ||
301 | return; | ||
302 | |||
303 | regs->pc += 2; | ||
304 | save_fpu(tsk, regs); | 441 | save_fpu(tsk, regs); |
442 | fpu_exception_flags = 0; | ||
443 | if (ieee_fpe_handler(regs)) { | ||
444 | tsk->thread.fpu.hard.fpscr &= | ||
445 | ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); | ||
446 | tsk->thread.fpu.hard.fpscr |= fpu_exception_flags; | ||
447 | /* Set the FPSCR flag as well as cause bits - simply | ||
448 | * replicate the cause */ | ||
449 | tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); | ||
450 | grab_fpu(regs); | ||
451 | restore_fpu(tsk); | ||
452 | set_tsk_thread_flag(tsk, TIF_USEDFPU); | ||
453 | if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & | ||
454 | (fpu_exception_flags >> 2)) == 0) { | ||
455 | return; | ||
456 | } | ||
457 | } | ||
458 | |||
305 | force_sig(SIGFPE, tsk); | 459 | force_sig(SIGFPE, tsk); |
306 | } | 460 | } |
307 | 461 | ||
@@ -319,7 +473,7 @@ BUILD_TRAP_HANDLER(fpu_state_restore) | |||
319 | if (used_math()) { | 473 | if (used_math()) { |
320 | /* Using the FPU again. */ | 474 | /* Using the FPU again. */ |
321 | restore_fpu(tsk); | 475 | restore_fpu(tsk); |
322 | } else { | 476 | } else { |
323 | /* First time FPU user. */ | 477 | /* First time FPU user. */ |
324 | fpu_init(); | 478 | fpu_init(); |
325 | set_used_math(); | 479 | set_used_math(); |
diff --git a/arch/sh/kernel/cpu/sh4/softfloat.c b/arch/sh/kernel/cpu/sh4/softfloat.c new file mode 100644 index 000000000000..7b2d337ee412 --- /dev/null +++ b/arch/sh/kernel/cpu/sh4/softfloat.c | |||
@@ -0,0 +1,892 @@ | |||
1 | /* | ||
2 | * Floating point emulation support for subnormalised numbers on SH4 | ||
3 | * architecture This file is derived from the SoftFloat IEC/IEEE | ||
4 | * Floating-point Arithmetic Package, Release 2 the original license of | ||
5 | * which is reproduced below. | ||
6 | * | ||
7 | * ======================================================================== | ||
8 | * | ||
9 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | ||
10 | * Arithmetic Package, Release 2. | ||
11 | * | ||
12 | * Written by John R. Hauser. This work was made possible in part by the | ||
13 | * International Computer Science Institute, located at Suite 600, 1947 Center | ||
14 | * Street, Berkeley, California 94704. Funding was partially provided by the | ||
15 | * National Science Foundation under grant MIP-9311980. The original version | ||
16 | * of this code was written as part of a project to build a fixed-point vector | ||
17 | * processor in collaboration with the University of California at Berkeley, | ||
18 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | ||
19 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | ||
20 | * arithmetic/softfloat.html'. | ||
21 | * | ||
22 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | ||
23 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | ||
24 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | ||
25 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | ||
26 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | ||
27 | * | ||
28 | * Derivative works are acceptable, even for commercial purposes, so long as | ||
29 | * (1) they include prominent notice that the work is derivative, and (2) they | ||
30 | * include prominent notice akin to these three paragraphs for those parts of | ||
31 | * this code that are retained. | ||
32 | * | ||
33 | * ======================================================================== | ||
34 | * | ||
35 | * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com> | ||
36 | * and Kamel Khelifi <kamel.khelifi@st.com> | ||
37 | */ | ||
38 | #include <linux/kernel.h> | ||
39 | #include <asm/cpu/fpu.h> | ||
40 | |||
41 | #define LIT64( a ) a##LL | ||
42 | |||
43 | typedef char flag; | ||
44 | typedef unsigned char uint8; | ||
45 | typedef signed char int8; | ||
46 | typedef int uint16; | ||
47 | typedef int int16; | ||
48 | typedef unsigned int uint32; | ||
49 | typedef signed int int32; | ||
50 | |||
51 | typedef unsigned long long int bits64; | ||
52 | typedef signed long long int sbits64; | ||
53 | |||
54 | typedef unsigned char bits8; | ||
55 | typedef signed char sbits8; | ||
56 | typedef unsigned short int bits16; | ||
57 | typedef signed short int sbits16; | ||
58 | typedef unsigned int bits32; | ||
59 | typedef signed int sbits32; | ||
60 | |||
61 | typedef unsigned long long int uint64; | ||
62 | typedef signed long long int int64; | ||
63 | |||
64 | typedef unsigned long int float32; | ||
65 | typedef unsigned long long float64; | ||
66 | |||
67 | extern void float_raise(unsigned int flags); /* in fpu.c */ | ||
68 | extern int float_rounding_mode(void); /* in fpu.c */ | ||
69 | |||
70 | inline bits64 extractFloat64Frac(float64 a); | ||
71 | inline flag extractFloat64Sign(float64 a); | ||
72 | inline int16 extractFloat64Exp(float64 a); | ||
73 | inline int16 extractFloat32Exp(float32 a); | ||
74 | inline flag extractFloat32Sign(float32 a); | ||
75 | inline bits32 extractFloat32Frac(float32 a); | ||
76 | inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig); | ||
77 | inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr); | ||
78 | inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig); | ||
79 | inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr); | ||
80 | float64 float64_sub(float64 a, float64 b); | ||
81 | float32 float32_sub(float32 a, float32 b); | ||
82 | float32 float32_add(float32 a, float32 b); | ||
83 | float64 float64_add(float64 a, float64 b); | ||
84 | float64 float64_div(float64 a, float64 b); | ||
85 | float32 float32_div(float32 a, float32 b); | ||
86 | float32 float32_mul(float32 a, float32 b); | ||
87 | float64 float64_mul(float64 a, float64 b); | ||
88 | inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | ||
89 | bits64 * z1Ptr); | ||
90 | inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | ||
91 | bits64 * z1Ptr); | ||
92 | inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr); | ||
93 | |||
94 | static int8 countLeadingZeros32(bits32 a); | ||
95 | static int8 countLeadingZeros64(bits64 a); | ||
96 | static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, | ||
97 | bits64 zSig); | ||
98 | static float64 subFloat64Sigs(float64 a, float64 b, flag zSign); | ||
99 | static float64 addFloat64Sigs(float64 a, float64 b, flag zSign); | ||
100 | static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig); | ||
101 | static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, | ||
102 | bits32 zSig); | ||
103 | static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig); | ||
104 | static float32 subFloat32Sigs(float32 a, float32 b, flag zSign); | ||
105 | static float32 addFloat32Sigs(float32 a, float32 b, flag zSign); | ||
106 | static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, | ||
107 | bits64 * zSigPtr); | ||
108 | static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b); | ||
109 | static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, | ||
110 | bits32 * zSigPtr); | ||
111 | |||
112 | inline bits64 extractFloat64Frac(float64 a) | ||
113 | { | ||
114 | return a & LIT64(0x000FFFFFFFFFFFFF); | ||
115 | } | ||
116 | |||
117 | inline flag extractFloat64Sign(float64 a) | ||
118 | { | ||
119 | return a >> 63; | ||
120 | } | ||
121 | |||
122 | inline int16 extractFloat64Exp(float64 a) | ||
123 | { | ||
124 | return (a >> 52) & 0x7FF; | ||
125 | } | ||
126 | |||
127 | inline int16 extractFloat32Exp(float32 a) | ||
128 | { | ||
129 | return (a >> 23) & 0xFF; | ||
130 | } | ||
131 | |||
132 | inline flag extractFloat32Sign(float32 a) | ||
133 | { | ||
134 | return a >> 31; | ||
135 | } | ||
136 | |||
137 | inline bits32 extractFloat32Frac(float32 a) | ||
138 | { | ||
139 | return a & 0x007FFFFF; | ||
140 | } | ||
141 | |||
142 | inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig) | ||
143 | { | ||
144 | return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig; | ||
145 | } | ||
146 | |||
147 | inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr) | ||
148 | { | ||
149 | bits64 z; | ||
150 | |||
151 | if (count == 0) { | ||
152 | z = a; | ||
153 | } else if (count < 64) { | ||
154 | z = (a >> count) | ((a << ((-count) & 63)) != 0); | ||
155 | } else { | ||
156 | z = (a != 0); | ||
157 | } | ||
158 | *zPtr = z; | ||
159 | } | ||
160 | |||
161 | static int8 countLeadingZeros32(bits32 a) | ||
162 | { | ||
163 | static const int8 countLeadingZerosHigh[] = { | ||
164 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, | ||
165 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
166 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
167 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
168 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
169 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
170 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
171 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
172 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
173 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
174 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
175 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
176 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
177 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
178 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
179 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | ||
180 | }; | ||
181 | int8 shiftCount; | ||
182 | |||
183 | shiftCount = 0; | ||
184 | if (a < 0x10000) { | ||
185 | shiftCount += 16; | ||
186 | a <<= 16; | ||
187 | } | ||
188 | if (a < 0x1000000) { | ||
189 | shiftCount += 8; | ||
190 | a <<= 8; | ||
191 | } | ||
192 | shiftCount += countLeadingZerosHigh[a >> 24]; | ||
193 | return shiftCount; | ||
194 | |||
195 | } | ||
196 | |||
197 | static int8 countLeadingZeros64(bits64 a) | ||
198 | { | ||
199 | int8 shiftCount; | ||
200 | |||
201 | shiftCount = 0; | ||
202 | if (a < ((bits64) 1) << 32) { | ||
203 | shiftCount += 32; | ||
204 | } else { | ||
205 | a >>= 32; | ||
206 | } | ||
207 | shiftCount += countLeadingZeros32(a); | ||
208 | return shiftCount; | ||
209 | |||
210 | } | ||
211 | |||
212 | static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig) | ||
213 | { | ||
214 | int8 shiftCount; | ||
215 | |||
216 | shiftCount = countLeadingZeros64(zSig) - 1; | ||
217 | return roundAndPackFloat64(zSign, zExp - shiftCount, | ||
218 | zSig << shiftCount); | ||
219 | |||
220 | } | ||
221 | |||
222 | static float64 subFloat64Sigs(float64 a, float64 b, flag zSign) | ||
223 | { | ||
224 | int16 aExp, bExp, zExp; | ||
225 | bits64 aSig, bSig, zSig; | ||
226 | int16 expDiff; | ||
227 | |||
228 | aSig = extractFloat64Frac(a); | ||
229 | aExp = extractFloat64Exp(a); | ||
230 | bSig = extractFloat64Frac(b); | ||
231 | bExp = extractFloat64Exp(b); | ||
232 | expDiff = aExp - bExp; | ||
233 | aSig <<= 10; | ||
234 | bSig <<= 10; | ||
235 | if (0 < expDiff) | ||
236 | goto aExpBigger; | ||
237 | if (expDiff < 0) | ||
238 | goto bExpBigger; | ||
239 | if (aExp == 0) { | ||
240 | aExp = 1; | ||
241 | bExp = 1; | ||
242 | } | ||
243 | if (bSig < aSig) | ||
244 | goto aBigger; | ||
245 | if (aSig < bSig) | ||
246 | goto bBigger; | ||
247 | return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0); | ||
248 | bExpBigger: | ||
249 | if (bExp == 0x7FF) { | ||
250 | return packFloat64(zSign ^ 1, 0x7FF, 0); | ||
251 | } | ||
252 | if (aExp == 0) { | ||
253 | ++expDiff; | ||
254 | } else { | ||
255 | aSig |= LIT64(0x4000000000000000); | ||
256 | } | ||
257 | shift64RightJamming(aSig, -expDiff, &aSig); | ||
258 | bSig |= LIT64(0x4000000000000000); | ||
259 | bBigger: | ||
260 | zSig = bSig - aSig; | ||
261 | zExp = bExp; | ||
262 | zSign ^= 1; | ||
263 | goto normalizeRoundAndPack; | ||
264 | aExpBigger: | ||
265 | if (aExp == 0x7FF) { | ||
266 | return a; | ||
267 | } | ||
268 | if (bExp == 0) { | ||
269 | --expDiff; | ||
270 | } else { | ||
271 | bSig |= LIT64(0x4000000000000000); | ||
272 | } | ||
273 | shift64RightJamming(bSig, expDiff, &bSig); | ||
274 | aSig |= LIT64(0x4000000000000000); | ||
275 | aBigger: | ||
276 | zSig = aSig - bSig; | ||
277 | zExp = aExp; | ||
278 | normalizeRoundAndPack: | ||
279 | --zExp; | ||
280 | return normalizeRoundAndPackFloat64(zSign, zExp, zSig); | ||
281 | |||
282 | } | ||
283 | static float64 addFloat64Sigs(float64 a, float64 b, flag zSign) | ||
284 | { | ||
285 | int16 aExp, bExp, zExp; | ||
286 | bits64 aSig, bSig, zSig; | ||
287 | int16 expDiff; | ||
288 | |||
289 | aSig = extractFloat64Frac(a); | ||
290 | aExp = extractFloat64Exp(a); | ||
291 | bSig = extractFloat64Frac(b); | ||
292 | bExp = extractFloat64Exp(b); | ||
293 | expDiff = aExp - bExp; | ||
294 | aSig <<= 9; | ||
295 | bSig <<= 9; | ||
296 | if (0 < expDiff) { | ||
297 | if (aExp == 0x7FF) { | ||
298 | return a; | ||
299 | } | ||
300 | if (bExp == 0) { | ||
301 | --expDiff; | ||
302 | } else { | ||
303 | bSig |= LIT64(0x2000000000000000); | ||
304 | } | ||
305 | shift64RightJamming(bSig, expDiff, &bSig); | ||
306 | zExp = aExp; | ||
307 | } else if (expDiff < 0) { | ||
308 | if (bExp == 0x7FF) { | ||
309 | return packFloat64(zSign, 0x7FF, 0); | ||
310 | } | ||
311 | if (aExp == 0) { | ||
312 | ++expDiff; | ||
313 | } else { | ||
314 | aSig |= LIT64(0x2000000000000000); | ||
315 | } | ||
316 | shift64RightJamming(aSig, -expDiff, &aSig); | ||
317 | zExp = bExp; | ||
318 | } else { | ||
319 | if (aExp == 0x7FF) { | ||
320 | return a; | ||
321 | } | ||
322 | if (aExp == 0) | ||
323 | return packFloat64(zSign, 0, (aSig + bSig) >> 9); | ||
324 | zSig = LIT64(0x4000000000000000) + aSig + bSig; | ||
325 | zExp = aExp; | ||
326 | goto roundAndPack; | ||
327 | } | ||
328 | aSig |= LIT64(0x2000000000000000); | ||
329 | zSig = (aSig + bSig) << 1; | ||
330 | --zExp; | ||
331 | if ((sbits64) zSig < 0) { | ||
332 | zSig = aSig + bSig; | ||
333 | ++zExp; | ||
334 | } | ||
335 | roundAndPack: | ||
336 | return roundAndPackFloat64(zSign, zExp, zSig); | ||
337 | |||
338 | } | ||
339 | |||
340 | inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig) | ||
341 | { | ||
342 | return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig; | ||
343 | } | ||
344 | |||
345 | inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr) | ||
346 | { | ||
347 | bits32 z; | ||
348 | if (count == 0) { | ||
349 | z = a; | ||
350 | } else if (count < 32) { | ||
351 | z = (a >> count) | ((a << ((-count) & 31)) != 0); | ||
352 | } else { | ||
353 | z = (a != 0); | ||
354 | } | ||
355 | *zPtr = z; | ||
356 | } | ||
357 | |||
358 | static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig) | ||
359 | { | ||
360 | flag roundNearestEven; | ||
361 | int8 roundIncrement, roundBits; | ||
362 | flag isTiny; | ||
363 | |||
364 | /* SH4 has only 2 rounding modes - round to nearest and round to zero */ | ||
365 | roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST); | ||
366 | roundIncrement = 0x40; | ||
367 | if (!roundNearestEven) { | ||
368 | roundIncrement = 0; | ||
369 | } | ||
370 | roundBits = zSig & 0x7F; | ||
371 | if (0xFD <= (bits16) zExp) { | ||
372 | if ((0xFD < zExp) | ||
373 | || ((zExp == 0xFD) | ||
374 | && ((sbits32) (zSig + roundIncrement) < 0)) | ||
375 | ) { | ||
376 | float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT); | ||
377 | return packFloat32(zSign, 0xFF, | ||
378 | 0) - (roundIncrement == 0); | ||
379 | } | ||
380 | if (zExp < 0) { | ||
381 | isTiny = (zExp < -1) | ||
382 | || (zSig + roundIncrement < 0x80000000); | ||
383 | shift32RightJamming(zSig, -zExp, &zSig); | ||
384 | zExp = 0; | ||
385 | roundBits = zSig & 0x7F; | ||
386 | if (isTiny && roundBits) | ||
387 | float_raise(FPSCR_CAUSE_UNDERFLOW); | ||
388 | } | ||
389 | } | ||
390 | if (roundBits) | ||
391 | float_raise(FPSCR_CAUSE_INEXACT); | ||
392 | zSig = (zSig + roundIncrement) >> 7; | ||
393 | zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven); | ||
394 | if (zSig == 0) | ||
395 | zExp = 0; | ||
396 | return packFloat32(zSign, zExp, zSig); | ||
397 | |||
398 | } | ||
399 | |||
400 | static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig) | ||
401 | { | ||
402 | int8 shiftCount; | ||
403 | |||
404 | shiftCount = countLeadingZeros32(zSig) - 1; | ||
405 | return roundAndPackFloat32(zSign, zExp - shiftCount, | ||
406 | zSig << shiftCount); | ||
407 | } | ||
408 | |||
409 | static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig) | ||
410 | { | ||
411 | flag roundNearestEven; | ||
412 | int16 roundIncrement, roundBits; | ||
413 | flag isTiny; | ||
414 | |||
415 | /* SH4 has only 2 rounding modes - round to nearest and round to zero */ | ||
416 | roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST); | ||
417 | roundIncrement = 0x200; | ||
418 | if (!roundNearestEven) { | ||
419 | roundIncrement = 0; | ||
420 | } | ||
421 | roundBits = zSig & 0x3FF; | ||
422 | if (0x7FD <= (bits16) zExp) { | ||
423 | if ((0x7FD < zExp) | ||
424 | || ((zExp == 0x7FD) | ||
425 | && ((sbits64) (zSig + roundIncrement) < 0)) | ||
426 | ) { | ||
427 | float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT); | ||
428 | return packFloat64(zSign, 0x7FF, | ||
429 | 0) - (roundIncrement == 0); | ||
430 | } | ||
431 | if (zExp < 0) { | ||
432 | isTiny = (zExp < -1) | ||
433 | || (zSig + roundIncrement < | ||
434 | LIT64(0x8000000000000000)); | ||
435 | shift64RightJamming(zSig, -zExp, &zSig); | ||
436 | zExp = 0; | ||
437 | roundBits = zSig & 0x3FF; | ||
438 | if (isTiny && roundBits) | ||
439 | float_raise(FPSCR_CAUSE_UNDERFLOW); | ||
440 | } | ||
441 | } | ||
442 | if (roundBits) | ||
443 | float_raise(FPSCR_CAUSE_INEXACT); | ||
444 | zSig = (zSig + roundIncrement) >> 10; | ||
445 | zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven); | ||
446 | if (zSig == 0) | ||
447 | zExp = 0; | ||
448 | return packFloat64(zSign, zExp, zSig); | ||
449 | |||
450 | } | ||
451 | |||
452 | static float32 subFloat32Sigs(float32 a, float32 b, flag zSign) | ||
453 | { | ||
454 | int16 aExp, bExp, zExp; | ||
455 | bits32 aSig, bSig, zSig; | ||
456 | int16 expDiff; | ||
457 | |||
458 | aSig = extractFloat32Frac(a); | ||
459 | aExp = extractFloat32Exp(a); | ||
460 | bSig = extractFloat32Frac(b); | ||
461 | bExp = extractFloat32Exp(b); | ||
462 | expDiff = aExp - bExp; | ||
463 | aSig <<= 7; | ||
464 | bSig <<= 7; | ||
465 | if (0 < expDiff) | ||
466 | goto aExpBigger; | ||
467 | if (expDiff < 0) | ||
468 | goto bExpBigger; | ||
469 | if (aExp == 0) { | ||
470 | aExp = 1; | ||
471 | bExp = 1; | ||
472 | } | ||
473 | if (bSig < aSig) | ||
474 | goto aBigger; | ||
475 | if (aSig < bSig) | ||
476 | goto bBigger; | ||
477 | return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0); | ||
478 | bExpBigger: | ||
479 | if (bExp == 0xFF) { | ||
480 | return packFloat32(zSign ^ 1, 0xFF, 0); | ||
481 | } | ||
482 | if (aExp == 0) { | ||
483 | ++expDiff; | ||
484 | } else { | ||
485 | aSig |= 0x40000000; | ||
486 | } | ||
487 | shift32RightJamming(aSig, -expDiff, &aSig); | ||
488 | bSig |= 0x40000000; | ||
489 | bBigger: | ||
490 | zSig = bSig - aSig; | ||
491 | zExp = bExp; | ||
492 | zSign ^= 1; | ||
493 | goto normalizeRoundAndPack; | ||
494 | aExpBigger: | ||
495 | if (aExp == 0xFF) { | ||
496 | return a; | ||
497 | } | ||
498 | if (bExp == 0) { | ||
499 | --expDiff; | ||
500 | } else { | ||
501 | bSig |= 0x40000000; | ||
502 | } | ||
503 | shift32RightJamming(bSig, expDiff, &bSig); | ||
504 | aSig |= 0x40000000; | ||
505 | aBigger: | ||
506 | zSig = aSig - bSig; | ||
507 | zExp = aExp; | ||
508 | normalizeRoundAndPack: | ||
509 | --zExp; | ||
510 | return normalizeRoundAndPackFloat32(zSign, zExp, zSig); | ||
511 | |||
512 | } | ||
513 | |||
514 | static float32 addFloat32Sigs(float32 a, float32 b, flag zSign) | ||
515 | { | ||
516 | int16 aExp, bExp, zExp; | ||
517 | bits32 aSig, bSig, zSig; | ||
518 | int16 expDiff; | ||
519 | |||
520 | aSig = extractFloat32Frac(a); | ||
521 | aExp = extractFloat32Exp(a); | ||
522 | bSig = extractFloat32Frac(b); | ||
523 | bExp = extractFloat32Exp(b); | ||
524 | expDiff = aExp - bExp; | ||
525 | aSig <<= 6; | ||
526 | bSig <<= 6; | ||
527 | if (0 < expDiff) { | ||
528 | if (aExp == 0xFF) { | ||
529 | return a; | ||
530 | } | ||
531 | if (bExp == 0) { | ||
532 | --expDiff; | ||
533 | } else { | ||
534 | bSig |= 0x20000000; | ||
535 | } | ||
536 | shift32RightJamming(bSig, expDiff, &bSig); | ||
537 | zExp = aExp; | ||
538 | } else if (expDiff < 0) { | ||
539 | if (bExp == 0xFF) { | ||
540 | return packFloat32(zSign, 0xFF, 0); | ||
541 | } | ||
542 | if (aExp == 0) { | ||
543 | ++expDiff; | ||
544 | } else { | ||
545 | aSig |= 0x20000000; | ||
546 | } | ||
547 | shift32RightJamming(aSig, -expDiff, &aSig); | ||
548 | zExp = bExp; | ||
549 | } else { | ||
550 | if (aExp == 0xFF) { | ||
551 | return a; | ||
552 | } | ||
553 | if (aExp == 0) | ||
554 | return packFloat32(zSign, 0, (aSig + bSig) >> 6); | ||
555 | zSig = 0x40000000 + aSig + bSig; | ||
556 | zExp = aExp; | ||
557 | goto roundAndPack; | ||
558 | } | ||
559 | aSig |= 0x20000000; | ||
560 | zSig = (aSig + bSig) << 1; | ||
561 | --zExp; | ||
562 | if ((sbits32) zSig < 0) { | ||
563 | zSig = aSig + bSig; | ||
564 | ++zExp; | ||
565 | } | ||
566 | roundAndPack: | ||
567 | return roundAndPackFloat32(zSign, zExp, zSig); | ||
568 | |||
569 | } | ||
570 | |||
571 | float64 float64_sub(float64 a, float64 b) | ||
572 | { | ||
573 | flag aSign, bSign; | ||
574 | |||
575 | aSign = extractFloat64Sign(a); | ||
576 | bSign = extractFloat64Sign(b); | ||
577 | if (aSign == bSign) { | ||
578 | return subFloat64Sigs(a, b, aSign); | ||
579 | } else { | ||
580 | return addFloat64Sigs(a, b, aSign); | ||
581 | } | ||
582 | |||
583 | } | ||
584 | |||
585 | float32 float32_sub(float32 a, float32 b) | ||
586 | { | ||
587 | flag aSign, bSign; | ||
588 | |||
589 | aSign = extractFloat32Sign(a); | ||
590 | bSign = extractFloat32Sign(b); | ||
591 | if (aSign == bSign) { | ||
592 | return subFloat32Sigs(a, b, aSign); | ||
593 | } else { | ||
594 | return addFloat32Sigs(a, b, aSign); | ||
595 | } | ||
596 | |||
597 | } | ||
598 | |||
599 | float32 float32_add(float32 a, float32 b) | ||
600 | { | ||
601 | flag aSign, bSign; | ||
602 | |||
603 | aSign = extractFloat32Sign(a); | ||
604 | bSign = extractFloat32Sign(b); | ||
605 | if (aSign == bSign) { | ||
606 | return addFloat32Sigs(a, b, aSign); | ||
607 | } else { | ||
608 | return subFloat32Sigs(a, b, aSign); | ||
609 | } | ||
610 | |||
611 | } | ||
612 | |||
613 | float64 float64_add(float64 a, float64 b) | ||
614 | { | ||
615 | flag aSign, bSign; | ||
616 | |||
617 | aSign = extractFloat64Sign(a); | ||
618 | bSign = extractFloat64Sign(b); | ||
619 | if (aSign == bSign) { | ||
620 | return addFloat64Sigs(a, b, aSign); | ||
621 | } else { | ||
622 | return subFloat64Sigs(a, b, aSign); | ||
623 | } | ||
624 | } | ||
625 | |||
626 | static void | ||
627 | normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr) | ||
628 | { | ||
629 | int8 shiftCount; | ||
630 | |||
631 | shiftCount = countLeadingZeros64(aSig) - 11; | ||
632 | *zSigPtr = aSig << shiftCount; | ||
633 | *zExpPtr = 1 - shiftCount; | ||
634 | } | ||
635 | |||
636 | inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | ||
637 | bits64 * z1Ptr) | ||
638 | { | ||
639 | bits64 z1; | ||
640 | |||
641 | z1 = a1 + b1; | ||
642 | *z1Ptr = z1; | ||
643 | *z0Ptr = a0 + b0 + (z1 < a1); | ||
644 | } | ||
645 | |||
646 | inline void | ||
647 | sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, | ||
648 | bits64 * z1Ptr) | ||
649 | { | ||
650 | *z1Ptr = a1 - b1; | ||
651 | *z0Ptr = a0 - b0 - (a1 < b1); | ||
652 | } | ||
653 | |||
654 | static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b) | ||
655 | { | ||
656 | bits64 b0, b1; | ||
657 | bits64 rem0, rem1, term0, term1; | ||
658 | bits64 z; | ||
659 | if (b <= a0) | ||
660 | return LIT64(0xFFFFFFFFFFFFFFFF); | ||
661 | b0 = b >> 32; | ||
662 | z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32; | ||
663 | mul64To128(b, z, &term0, &term1); | ||
664 | sub128(a0, a1, term0, term1, &rem0, &rem1); | ||
665 | while (((sbits64) rem0) < 0) { | ||
666 | z -= LIT64(0x100000000); | ||
667 | b1 = b << 32; | ||
668 | add128(rem0, rem1, b0, b1, &rem0, &rem1); | ||
669 | } | ||
670 | rem0 = (rem0 << 32) | (rem1 >> 32); | ||
671 | z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0; | ||
672 | return z; | ||
673 | } | ||
674 | |||
675 | inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr) | ||
676 | { | ||
677 | bits32 aHigh, aLow, bHigh, bLow; | ||
678 | bits64 z0, zMiddleA, zMiddleB, z1; | ||
679 | |||
680 | aLow = a; | ||
681 | aHigh = a >> 32; | ||
682 | bLow = b; | ||
683 | bHigh = b >> 32; | ||
684 | z1 = ((bits64) aLow) * bLow; | ||
685 | zMiddleA = ((bits64) aLow) * bHigh; | ||
686 | zMiddleB = ((bits64) aHigh) * bLow; | ||
687 | z0 = ((bits64) aHigh) * bHigh; | ||
688 | zMiddleA += zMiddleB; | ||
689 | z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32); | ||
690 | zMiddleA <<= 32; | ||
691 | z1 += zMiddleA; | ||
692 | z0 += (z1 < zMiddleA); | ||
693 | *z1Ptr = z1; | ||
694 | *z0Ptr = z0; | ||
695 | |||
696 | } | ||
697 | |||
698 | static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, | ||
699 | bits32 * zSigPtr) | ||
700 | { | ||
701 | int8 shiftCount; | ||
702 | |||
703 | shiftCount = countLeadingZeros32(aSig) - 8; | ||
704 | *zSigPtr = aSig << shiftCount; | ||
705 | *zExpPtr = 1 - shiftCount; | ||
706 | |||
707 | } | ||
708 | |||
709 | float64 float64_div(float64 a, float64 b) | ||
710 | { | ||
711 | flag aSign, bSign, zSign; | ||
712 | int16 aExp, bExp, zExp; | ||
713 | bits64 aSig, bSig, zSig; | ||
714 | bits64 rem0, rem1; | ||
715 | bits64 term0, term1; | ||
716 | |||
717 | aSig = extractFloat64Frac(a); | ||
718 | aExp = extractFloat64Exp(a); | ||
719 | aSign = extractFloat64Sign(a); | ||
720 | bSig = extractFloat64Frac(b); | ||
721 | bExp = extractFloat64Exp(b); | ||
722 | bSign = extractFloat64Sign(b); | ||
723 | zSign = aSign ^ bSign; | ||
724 | if (aExp == 0x7FF) { | ||
725 | if (bExp == 0x7FF) { | ||
726 | } | ||
727 | return packFloat64(zSign, 0x7FF, 0); | ||
728 | } | ||
729 | if (bExp == 0x7FF) { | ||
730 | return packFloat64(zSign, 0, 0); | ||
731 | } | ||
732 | if (bExp == 0) { | ||
733 | if (bSig == 0) { | ||
734 | if ((aExp | aSig) == 0) { | ||
735 | float_raise(FPSCR_CAUSE_INVALID); | ||
736 | } | ||
737 | return packFloat64(zSign, 0x7FF, 0); | ||
738 | } | ||
739 | normalizeFloat64Subnormal(bSig, &bExp, &bSig); | ||
740 | } | ||
741 | if (aExp == 0) { | ||
742 | if (aSig == 0) | ||
743 | return packFloat64(zSign, 0, 0); | ||
744 | normalizeFloat64Subnormal(aSig, &aExp, &aSig); | ||
745 | } | ||
746 | zExp = aExp - bExp + 0x3FD; | ||
747 | aSig = (aSig | LIT64(0x0010000000000000)) << 10; | ||
748 | bSig = (bSig | LIT64(0x0010000000000000)) << 11; | ||
749 | if (bSig <= (aSig + aSig)) { | ||
750 | aSig >>= 1; | ||
751 | ++zExp; | ||
752 | } | ||
753 | zSig = estimateDiv128To64(aSig, 0, bSig); | ||
754 | if ((zSig & 0x1FF) <= 2) { | ||
755 | mul64To128(bSig, zSig, &term0, &term1); | ||
756 | sub128(aSig, 0, term0, term1, &rem0, &rem1); | ||
757 | while ((sbits64) rem0 < 0) { | ||
758 | --zSig; | ||
759 | add128(rem0, rem1, 0, bSig, &rem0, &rem1); | ||
760 | } | ||
761 | zSig |= (rem1 != 0); | ||
762 | } | ||
763 | return roundAndPackFloat64(zSign, zExp, zSig); | ||
764 | |||
765 | } | ||
766 | |||
767 | float32 float32_div(float32 a, float32 b) | ||
768 | { | ||
769 | flag aSign, bSign, zSign; | ||
770 | int16 aExp, bExp, zExp; | ||
771 | bits32 aSig, bSig, zSig; | ||
772 | |||
773 | aSig = extractFloat32Frac(a); | ||
774 | aExp = extractFloat32Exp(a); | ||
775 | aSign = extractFloat32Sign(a); | ||
776 | bSig = extractFloat32Frac(b); | ||
777 | bExp = extractFloat32Exp(b); | ||
778 | bSign = extractFloat32Sign(b); | ||
779 | zSign = aSign ^ bSign; | ||
780 | if (aExp == 0xFF) { | ||
781 | if (bExp == 0xFF) { | ||
782 | } | ||
783 | return packFloat32(zSign, 0xFF, 0); | ||
784 | } | ||
785 | if (bExp == 0xFF) { | ||
786 | return packFloat32(zSign, 0, 0); | ||
787 | } | ||
788 | if (bExp == 0) { | ||
789 | if (bSig == 0) { | ||
790 | return packFloat32(zSign, 0xFF, 0); | ||
791 | } | ||
792 | normalizeFloat32Subnormal(bSig, &bExp, &bSig); | ||
793 | } | ||
794 | if (aExp == 0) { | ||
795 | if (aSig == 0) | ||
796 | return packFloat32(zSign, 0, 0); | ||
797 | normalizeFloat32Subnormal(aSig, &aExp, &aSig); | ||
798 | } | ||
799 | zExp = aExp - bExp + 0x7D; | ||
800 | aSig = (aSig | 0x00800000) << 7; | ||
801 | bSig = (bSig | 0x00800000) << 8; | ||
802 | if (bSig <= (aSig + aSig)) { | ||
803 | aSig >>= 1; | ||
804 | ++zExp; | ||
805 | } | ||
806 | zSig = (((bits64) aSig) << 32) / bSig; | ||
807 | if ((zSig & 0x3F) == 0) { | ||
808 | zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32); | ||
809 | } | ||
810 | return roundAndPackFloat32(zSign, zExp, zSig); | ||
811 | |||
812 | } | ||
813 | |||
814 | float32 float32_mul(float32 a, float32 b) | ||
815 | { | ||
816 | char aSign, bSign, zSign; | ||
817 | int aExp, bExp, zExp; | ||
818 | unsigned int aSig, bSig; | ||
819 | unsigned long long zSig64; | ||
820 | unsigned int zSig; | ||
821 | |||
822 | aSig = extractFloat32Frac(a); | ||
823 | aExp = extractFloat32Exp(a); | ||
824 | aSign = extractFloat32Sign(a); | ||
825 | bSig = extractFloat32Frac(b); | ||
826 | bExp = extractFloat32Exp(b); | ||
827 | bSign = extractFloat32Sign(b); | ||
828 | zSign = aSign ^ bSign; | ||
829 | if (aExp == 0) { | ||
830 | if (aSig == 0) | ||
831 | return packFloat32(zSign, 0, 0); | ||
832 | normalizeFloat32Subnormal(aSig, &aExp, &aSig); | ||
833 | } | ||
834 | if (bExp == 0) { | ||
835 | if (bSig == 0) | ||
836 | return packFloat32(zSign, 0, 0); | ||
837 | normalizeFloat32Subnormal(bSig, &bExp, &bSig); | ||
838 | } | ||
839 | if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0)) | ||
840 | return roundAndPackFloat32(zSign, 0xff, 0); | ||
841 | |||
842 | zExp = aExp + bExp - 0x7F; | ||
843 | aSig = (aSig | 0x00800000) << 7; | ||
844 | bSig = (bSig | 0x00800000) << 8; | ||
845 | shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64); | ||
846 | zSig = zSig64; | ||
847 | if (0 <= (signed int)(zSig << 1)) { | ||
848 | zSig <<= 1; | ||
849 | --zExp; | ||
850 | } | ||
851 | return roundAndPackFloat32(zSign, zExp, zSig); | ||
852 | |||
853 | } | ||
854 | |||
855 | float64 float64_mul(float64 a, float64 b) | ||
856 | { | ||
857 | char aSign, bSign, zSign; | ||
858 | int aExp, bExp, zExp; | ||
859 | unsigned long long int aSig, bSig, zSig0, zSig1; | ||
860 | |||
861 | aSig = extractFloat64Frac(a); | ||
862 | aExp = extractFloat64Exp(a); | ||
863 | aSign = extractFloat64Sign(a); | ||
864 | bSig = extractFloat64Frac(b); | ||
865 | bExp = extractFloat64Exp(b); | ||
866 | bSign = extractFloat64Sign(b); | ||
867 | zSign = aSign ^ bSign; | ||
868 | |||
869 | if (aExp == 0) { | ||
870 | if (aSig == 0) | ||
871 | return packFloat64(zSign, 0, 0); | ||
872 | normalizeFloat64Subnormal(aSig, &aExp, &aSig); | ||
873 | } | ||
874 | if (bExp == 0) { | ||
875 | if (bSig == 0) | ||
876 | return packFloat64(zSign, 0, 0); | ||
877 | normalizeFloat64Subnormal(bSig, &bExp, &bSig); | ||
878 | } | ||
879 | if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0)) | ||
880 | return roundAndPackFloat64(zSign, 0x7ff, 0); | ||
881 | |||
882 | zExp = aExp + bExp - 0x3FF; | ||
883 | aSig = (aSig | 0x0010000000000000LL) << 10; | ||
884 | bSig = (bSig | 0x0010000000000000LL) << 11; | ||
885 | mul64To128(aSig, bSig, &zSig0, &zSig1); | ||
886 | zSig0 |= (zSig1 != 0); | ||
887 | if (0 <= (signed long long int)(zSig0 << 1)) { | ||
888 | zSig0 <<= 1; | ||
889 | --zExp; | ||
890 | } | ||
891 | return roundAndPackFloat64(zSign, zExp, zSig0); | ||
892 | } | ||