diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/m68k/fpsp040/round.S |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/m68k/fpsp040/round.S')
-rw-r--r-- | arch/m68k/fpsp040/round.S | 649 |
1 files changed, 649 insertions, 0 deletions
diff --git a/arch/m68k/fpsp040/round.S b/arch/m68k/fpsp040/round.S new file mode 100644 index 000000000000..00f98068783f --- /dev/null +++ b/arch/m68k/fpsp040/round.S | |||
@@ -0,0 +1,649 @@ | |||
1 | | | ||
2 | | round.sa 3.4 7/29/91 | ||
3 | | | ||
4 | | handle rounding and normalization tasks | ||
5 | | | ||
6 | | | ||
7 | | | ||
8 | | Copyright (C) Motorola, Inc. 1990 | ||
9 | | All Rights Reserved | ||
10 | | | ||
11 | | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA | ||
12 | | The copyright notice above does not evidence any | ||
13 | | actual or intended publication of such source code. | ||
14 | |||
15 | |ROUND idnt 2,1 | Motorola 040 Floating Point Software Package | ||
16 | |||
17 | |section 8 | ||
18 | |||
19 | #include "fpsp.h" | ||
20 | |||
21 | | | ||
22 | | round --- round result according to precision/mode | ||
23 | | | ||
24 | | a0 points to the input operand in the internal extended format | ||
25 | | d1(high word) contains rounding precision: | ||
26 | | ext = $0000xxxx | ||
27 | | sgl = $0001xxxx | ||
28 | | dbl = $0002xxxx | ||
29 | | d1(low word) contains rounding mode: | ||
30 | | RN = $xxxx0000 | ||
31 | | RZ = $xxxx0001 | ||
32 | | RM = $xxxx0010 | ||
33 | | RP = $xxxx0011 | ||
34 | | d0{31:29} contains the g,r,s bits (extended) | ||
35 | | | ||
36 | | On return the value pointed to by a0 is correctly rounded, | ||
37 | | a0 is preserved and the g-r-s bits in d0 are cleared. | ||
38 | | The result is not typed - the tag field is invalid. The | ||
39 | | result is still in the internal extended format. | ||
40 | | | ||
41 | | The INEX bit of USER_FPSR will be set if the rounded result was | ||
42 | | inexact (i.e. if any of the g-r-s bits were set). | ||
43 | | | ||
44 | |||
45 | .global round | ||
46 | round: | ||
47 | | If g=r=s=0 then result is exact and round is done, else set | ||
48 | | the inex flag in status reg and continue. | ||
49 | | | ||
50 | bsrs ext_grs |this subroutine looks at the | ||
51 | | :rounding precision and sets | ||
52 | | ;the appropriate g-r-s bits. | ||
53 | tstl %d0 |if grs are zero, go force | ||
54 | bne rnd_cont |lower bits to zero for size | ||
55 | |||
56 | swap %d1 |set up d1.w for round prec. | ||
57 | bra truncate | ||
58 | |||
59 | rnd_cont: | ||
60 | | | ||
61 | | Use rounding mode as an index into a jump table for these modes. | ||
62 | | | ||
63 | orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | ||
64 | lea mode_tab,%a1 | ||
65 | movel (%a1,%d1.w*4),%a1 | ||
66 | jmp (%a1) | ||
67 | | | ||
68 | | Jump table indexed by rounding mode in d1.w. All following assumes | ||
69 | | grs != 0. | ||
70 | | | ||
71 | mode_tab: | ||
72 | .long rnd_near | ||
73 | .long rnd_zero | ||
74 | .long rnd_mnus | ||
75 | .long rnd_plus | ||
76 | | | ||
77 | | ROUND PLUS INFINITY | ||
78 | | | ||
79 | | If sign of fp number = 0 (positive), then add 1 to l. | ||
80 | | | ||
81 | rnd_plus: | ||
82 | swap %d1 |set up d1 for round prec. | ||
83 | tstb LOCAL_SGN(%a0) |check for sign | ||
84 | bmi truncate |if positive then truncate | ||
85 | movel #0xffffffff,%d0 |force g,r,s to be all f's | ||
86 | lea add_to_l,%a1 | ||
87 | movel (%a1,%d1.w*4),%a1 | ||
88 | jmp (%a1) | ||
89 | | | ||
90 | | ROUND MINUS INFINITY | ||
91 | | | ||
92 | | If sign of fp number = 1 (negative), then add 1 to l. | ||
93 | | | ||
94 | rnd_mnus: | ||
95 | swap %d1 |set up d1 for round prec. | ||
96 | tstb LOCAL_SGN(%a0) |check for sign | ||
97 | bpl truncate |if negative then truncate | ||
98 | movel #0xffffffff,%d0 |force g,r,s to be all f's | ||
99 | lea add_to_l,%a1 | ||
100 | movel (%a1,%d1.w*4),%a1 | ||
101 | jmp (%a1) | ||
102 | | | ||
103 | | ROUND ZERO | ||
104 | | | ||
105 | | Always truncate. | ||
106 | rnd_zero: | ||
107 | swap %d1 |set up d1 for round prec. | ||
108 | bra truncate | ||
109 | | | ||
110 | | | ||
111 | | ROUND NEAREST | ||
112 | | | ||
113 | | If (g=1), then add 1 to l and if (r=s=0), then clear l | ||
114 | | Note that this will round to even in case of a tie. | ||
115 | | | ||
116 | rnd_near: | ||
117 | swap %d1 |set up d1 for round prec. | ||
118 | asll #1,%d0 |shift g-bit to c-bit | ||
119 | bcc truncate |if (g=1) then | ||
120 | lea add_to_l,%a1 | ||
121 | movel (%a1,%d1.w*4),%a1 | ||
122 | jmp (%a1) | ||
123 | |||
124 | | | ||
125 | | ext_grs --- extract guard, round and sticky bits | ||
126 | | | ||
127 | | Input: d1 = PREC:ROUND | ||
128 | | Output: d0{31:29}= guard, round, sticky | ||
129 | | | ||
130 | | The ext_grs extract the guard/round/sticky bits according to the | ||
131 | | selected rounding precision. It is called by the round subroutine | ||
132 | | only. All registers except d0 are kept intact. d0 becomes an | ||
133 | | updated guard,round,sticky in d0{31:29} | ||
134 | | | ||
135 | | Notes: the ext_grs uses the round PREC, and therefore has to swap d1 | ||
136 | | prior to usage, and needs to restore d1 to original. | ||
137 | | | ||
138 | ext_grs: | ||
139 | swap %d1 |have d1.w point to round precision | ||
140 | cmpiw #0,%d1 | ||
141 | bnes sgl_or_dbl | ||
142 | bras end_ext_grs | ||
143 | |||
144 | sgl_or_dbl: | ||
145 | moveml %d2/%d3,-(%a7) |make some temp registers | ||
146 | cmpiw #1,%d1 | ||
147 | bnes grs_dbl | ||
148 | grs_sgl: | ||
149 | bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right | ||
150 | movel #30,%d2 |of the sgl prec. limits | ||
151 | lsll %d2,%d3 |shift g-r bits to MSB of d3 | ||
152 | movel LOCAL_HI(%a0),%d2 |get word 2 for s-bit test | ||
153 | andil #0x0000003f,%d2 |s bit is the or of all other | ||
154 | bnes st_stky |bits to the right of g-r | ||
155 | tstl LOCAL_LO(%a0) |test lower mantissa | ||
156 | bnes st_stky |if any are set, set sticky | ||
157 | tstl %d0 |test original g,r,s | ||
158 | bnes st_stky |if any are set, set sticky | ||
159 | bras end_sd |if words 3 and 4 are clr, exit | ||
160 | grs_dbl: | ||
161 | bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right | ||
162 | movel #30,%d2 |of the dbl prec. limits | ||
163 | lsll %d2,%d3 |shift g-r bits to the MSB of d3 | ||
164 | movel LOCAL_LO(%a0),%d2 |get lower mantissa for s-bit test | ||
165 | andil #0x000001ff,%d2 |s bit is the or-ing of all | ||
166 | bnes st_stky |other bits to the right of g-r | ||
167 | tstl %d0 |test word original g,r,s | ||
168 | bnes st_stky |if any are set, set sticky | ||
169 | bras end_sd |if clear, exit | ||
170 | st_stky: | ||
171 | bset #rnd_stky_bit,%d3 | ||
172 | end_sd: | ||
173 | movel %d3,%d0 |return grs to d0 | ||
174 | moveml (%a7)+,%d2/%d3 |restore scratch registers | ||
175 | end_ext_grs: | ||
176 | swap %d1 |restore d1 to original | ||
177 | rts | ||
178 | |||
179 | |******************* Local Equates | ||
180 | .set ad_1_sgl,0x00000100 | constant to add 1 to l-bit in sgl prec | ||
181 | .set ad_1_dbl,0x00000800 | constant to add 1 to l-bit in dbl prec | ||
182 | |||
183 | |||
184 | |Jump table for adding 1 to the l-bit indexed by rnd prec | ||
185 | |||
186 | add_to_l: | ||
187 | .long add_ext | ||
188 | .long add_sgl | ||
189 | .long add_dbl | ||
190 | .long add_dbl | ||
191 | | | ||
192 | | ADD SINGLE | ||
193 | | | ||
194 | add_sgl: | ||
195 | addl #ad_1_sgl,LOCAL_HI(%a0) | ||
196 | bccs scc_clr |no mantissa overflow | ||
197 | roxrw LOCAL_HI(%a0) |shift v-bit back in | ||
198 | roxrw LOCAL_HI+2(%a0) |shift v-bit back in | ||
199 | addw #0x1,LOCAL_EX(%a0) |and incr exponent | ||
200 | scc_clr: | ||
201 | tstl %d0 |test for rs = 0 | ||
202 | bnes sgl_done | ||
203 | andiw #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit | ||
204 | sgl_done: | ||
205 | andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit | ||
206 | clrl LOCAL_LO(%a0) |clear d2 | ||
207 | rts | ||
208 | |||
209 | | | ||
210 | | ADD EXTENDED | ||
211 | | | ||
212 | add_ext: | ||
213 | addql #1,LOCAL_LO(%a0) |add 1 to l-bit | ||
214 | bccs xcc_clr |test for carry out | ||
215 | addql #1,LOCAL_HI(%a0) |propagate carry | ||
216 | bccs xcc_clr | ||
217 | roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit | ||
218 | roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit | ||
219 | roxrw LOCAL_LO(%a0) | ||
220 | roxrw LOCAL_LO+2(%a0) | ||
221 | addw #0x1,LOCAL_EX(%a0) |and inc exp | ||
222 | xcc_clr: | ||
223 | tstl %d0 |test rs = 0 | ||
224 | bnes add_ext_done | ||
225 | andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit | ||
226 | add_ext_done: | ||
227 | rts | ||
228 | | | ||
229 | | ADD DOUBLE | ||
230 | | | ||
231 | add_dbl: | ||
232 | addl #ad_1_dbl,LOCAL_LO(%a0) | ||
233 | bccs dcc_clr | ||
234 | addql #1,LOCAL_HI(%a0) |propagate carry | ||
235 | bccs dcc_clr | ||
236 | roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit | ||
237 | roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit | ||
238 | roxrw LOCAL_LO(%a0) | ||
239 | roxrw LOCAL_LO+2(%a0) | ||
240 | addw #0x1,LOCAL_EX(%a0) |incr exponent | ||
241 | dcc_clr: | ||
242 | tstl %d0 |test for rs = 0 | ||
243 | bnes dbl_done | ||
244 | andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit | ||
245 | |||
246 | dbl_done: | ||
247 | andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit | ||
248 | rts | ||
249 | |||
250 | error: | ||
251 | rts | ||
252 | | | ||
253 | | Truncate all other bits | ||
254 | | | ||
255 | trunct: | ||
256 | .long end_rnd | ||
257 | .long sgl_done | ||
258 | .long dbl_done | ||
259 | .long dbl_done | ||
260 | |||
261 | truncate: | ||
262 | lea trunct,%a1 | ||
263 | movel (%a1,%d1.w*4),%a1 | ||
264 | jmp (%a1) | ||
265 | |||
266 | end_rnd: | ||
267 | rts | ||
268 | |||
269 | | | ||
270 | | NORMALIZE | ||
271 | | | ||
272 | | These routines (nrm_zero & nrm_set) normalize the unnorm. This | ||
273 | | is done by shifting the mantissa left while decrementing the | ||
274 | | exponent. | ||
275 | | | ||
276 | | NRM_SET shifts and decrements until there is a 1 set in the integer | ||
277 | | bit of the mantissa (msb in d1). | ||
278 | | | ||
279 | | NRM_ZERO shifts and decrements until there is a 1 set in the integer | ||
280 | | bit of the mantissa (msb in d1) unless this would mean the exponent | ||
281 | | would go less than 0. In that case the number becomes a denorm - the | ||
282 | | exponent (d0) is set to 0 and the mantissa (d1 & d2) is not | ||
283 | | normalized. | ||
284 | | | ||
285 | | Note that both routines have been optimized (for the worst case) and | ||
286 | | therefore do not have the easy to follow decrement/shift loop. | ||
287 | | | ||
288 | | NRM_ZERO | ||
289 | | | ||
290 | | Distance to first 1 bit in mantissa = X | ||
291 | | Distance to 0 from exponent = Y | ||
292 | | If X < Y | ||
293 | | Then | ||
294 | | nrm_set | ||
295 | | Else | ||
296 | | shift mantissa by Y | ||
297 | | set exponent = 0 | ||
298 | | | ||
299 | |input: | ||
300 | | FP_SCR1 = exponent, ms mantissa part, ls mantissa part | ||
301 | |output: | ||
302 | | L_SCR1{4} = fpte15 or ete15 bit | ||
303 | | | ||
304 | .global nrm_zero | ||
305 | nrm_zero: | ||
306 | movew LOCAL_EX(%a0),%d0 | ||
307 | cmpw #64,%d0 |see if exp > 64 | ||
308 | bmis d0_less | ||
309 | bsr nrm_set |exp > 64 so exp won't exceed 0 | ||
310 | rts | ||
311 | d0_less: | ||
312 | moveml %d2/%d3/%d5/%d6,-(%a7) | ||
313 | movel LOCAL_HI(%a0),%d1 | ||
314 | movel LOCAL_LO(%a0),%d2 | ||
315 | |||
316 | bfffo %d1{#0:#32},%d3 |get the distance to the first 1 | ||
317 | | ;in ms mant | ||
318 | beqs ms_clr |branch if no bits were set | ||
319 | cmpw %d3,%d0 |of X>Y | ||
320 | bmis greater |then exp will go past 0 (neg) if | ||
321 | | ;it is just shifted | ||
322 | bsr nrm_set |else exp won't go past 0 | ||
323 | moveml (%a7)+,%d2/%d3/%d5/%d6 | ||
324 | rts | ||
325 | greater: | ||
326 | movel %d2,%d6 |save ls mant in d6 | ||
327 | lsll %d0,%d2 |shift ls mant by count | ||
328 | lsll %d0,%d1 |shift ms mant by count | ||
329 | movel #32,%d5 | ||
330 | subl %d0,%d5 |make op a denorm by shifting bits | ||
331 | lsrl %d5,%d6 |by the number in the exp, then | ||
332 | | ;set exp = 0. | ||
333 | orl %d6,%d1 |shift the ls mant bits into the ms mant | ||
334 | movel #0,%d0 |same as if decremented exp to 0 | ||
335 | | ;while shifting | ||
336 | movew %d0,LOCAL_EX(%a0) | ||
337 | movel %d1,LOCAL_HI(%a0) | ||
338 | movel %d2,LOCAL_LO(%a0) | ||
339 | moveml (%a7)+,%d2/%d3/%d5/%d6 | ||
340 | rts | ||
341 | ms_clr: | ||
342 | bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant | ||
343 | beqs all_clr |branch if none set | ||
344 | addw #32,%d3 | ||
345 | cmpw %d3,%d0 |if X>Y | ||
346 | bmis greater |then branch | ||
347 | bsr nrm_set |else exp won't go past 0 | ||
348 | moveml (%a7)+,%d2/%d3/%d5/%d6 | ||
349 | rts | ||
350 | all_clr: | ||
351 | movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0. | ||
352 | moveml (%a7)+,%d2/%d3/%d5/%d6 | ||
353 | rts | ||
354 | | | ||
355 | | NRM_SET | ||
356 | | | ||
357 | .global nrm_set | ||
358 | nrm_set: | ||
359 | movel %d7,-(%a7) | ||
360 | bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7) | ||
361 | beqs lower |branch if ms mant is all 0's | ||
362 | |||
363 | movel %d6,-(%a7) | ||
364 | |||
365 | subw %d7,LOCAL_EX(%a0) |sub exponent by count | ||
366 | movel LOCAL_HI(%a0),%d0 |d0 has ms mant | ||
367 | movel LOCAL_LO(%a0),%d1 |d1 has ls mant | ||
368 | |||
369 | lsll %d7,%d0 |shift first 1 to j bit position | ||
370 | movel %d1,%d6 |copy ls mant into d6 | ||
371 | lsll %d7,%d6 |shift ls mant by count | ||
372 | movel %d6,LOCAL_LO(%a0) |store ls mant into memory | ||
373 | moveql #32,%d6 | ||
374 | subl %d7,%d6 |continue shift | ||
375 | lsrl %d6,%d1 |shift off all bits but those that will | ||
376 | | ;be shifted into ms mant | ||
377 | orl %d1,%d0 |shift the ls mant bits into the ms mant | ||
378 | movel %d0,LOCAL_HI(%a0) |store ms mant into memory | ||
379 | moveml (%a7)+,%d7/%d6 |restore registers | ||
380 | rts | ||
381 | |||
382 | | | ||
383 | | We get here if ms mant was = 0, and we assume ls mant has bits | ||
384 | | set (otherwise this would have been tagged a zero not a denorm). | ||
385 | | | ||
386 | lower: | ||
387 | movew LOCAL_EX(%a0),%d0 |d0 has exponent | ||
388 | movel LOCAL_LO(%a0),%d1 |d1 has ls mant | ||
389 | subw #32,%d0 |account for ms mant being all zeros | ||
390 | bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7) | ||
391 | subw %d7,%d0 |subtract shift count from exp | ||
392 | lsll %d7,%d1 |shift first 1 to integer bit in ms mant | ||
393 | movew %d0,LOCAL_EX(%a0) |store ms mant | ||
394 | movel %d1,LOCAL_HI(%a0) |store exp | ||
395 | clrl LOCAL_LO(%a0) |clear ls mant | ||
396 | movel (%a7)+,%d7 | ||
397 | rts | ||
398 | | | ||
399 | | denorm --- denormalize an intermediate result | ||
400 | | | ||
401 | | Used by underflow. | ||
402 | | | ||
403 | | Input: | ||
404 | | a0 points to the operand to be denormalized | ||
405 | | (in the internal extended format) | ||
406 | | | ||
407 | | d0: rounding precision | ||
408 | | Output: | ||
409 | | a0 points to the denormalized result | ||
410 | | (in the internal extended format) | ||
411 | | | ||
412 | | d0 is guard,round,sticky | ||
413 | | | ||
414 | | d0 comes into this routine with the rounding precision. It | ||
415 | | is then loaded with the denormalized exponent threshold for the | ||
416 | | rounding precision. | ||
417 | | | ||
418 | |||
419 | .global denorm | ||
420 | denorm: | ||
421 | btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000 | ||
422 | beqs no_sgn_ext | ||
423 | bsetb #7,LOCAL_EX(%a0) |sign extend if it is so | ||
424 | no_sgn_ext: | ||
425 | |||
426 | cmpib #0,%d0 |if 0 then extended precision | ||
427 | bnes not_ext |else branch | ||
428 | |||
429 | clrl %d1 |load d1 with ext threshold | ||
430 | clrl %d0 |clear the sticky flag | ||
431 | bsr dnrm_lp |denormalize the number | ||
432 | tstb %d1 |check for inex | ||
433 | beq no_inex |if clr, no inex | ||
434 | bras dnrm_inex |if set, set inex | ||
435 | |||
436 | not_ext: | ||
437 | cmpil #1,%d0 |if 1 then single precision | ||
438 | beqs load_sgl |else must be 2, double prec | ||
439 | |||
440 | load_dbl: | ||
441 | movew #dbl_thresh,%d1 |put copy of threshold in d1 | ||
442 | movel %d1,%d0 |copy d1 into d0 | ||
443 | subw LOCAL_EX(%a0),%d0 |diff = threshold - exp | ||
444 | cmpw #67,%d0 |if diff > 67 (mant + grs bits) | ||
445 | bpls chk_stky |then branch (all bits would be | ||
446 | | ; shifted off in denorm routine) | ||
447 | clrl %d0 |else clear the sticky flag | ||
448 | bsr dnrm_lp |denormalize the number | ||
449 | tstb %d1 |check flag | ||
450 | beqs no_inex |if clr, no inex | ||
451 | bras dnrm_inex |if set, set inex | ||
452 | |||
453 | load_sgl: | ||
454 | movew #sgl_thresh,%d1 |put copy of threshold in d1 | ||
455 | movel %d1,%d0 |copy d1 into d0 | ||
456 | subw LOCAL_EX(%a0),%d0 |diff = threshold - exp | ||
457 | cmpw #67,%d0 |if diff > 67 (mant + grs bits) | ||
458 | bpls chk_stky |then branch (all bits would be | ||
459 | | ; shifted off in denorm routine) | ||
460 | clrl %d0 |else clear the sticky flag | ||
461 | bsr dnrm_lp |denormalize the number | ||
462 | tstb %d1 |check flag | ||
463 | beqs no_inex |if clr, no inex | ||
464 | bras dnrm_inex |if set, set inex | ||
465 | |||
466 | chk_stky: | ||
467 | tstl LOCAL_HI(%a0) |check for any bits set | ||
468 | bnes set_stky | ||
469 | tstl LOCAL_LO(%a0) |check for any bits set | ||
470 | bnes set_stky | ||
471 | bras clr_mant | ||
472 | set_stky: | ||
473 | orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | ||
474 | movel #0x20000000,%d0 |set sticky bit in return value | ||
475 | clr_mant: | ||
476 | movew %d1,LOCAL_EX(%a0) |load exp with threshold | ||
477 | movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa) | ||
478 | movel #0,LOCAL_LO(%a0) |set d2 = 0 (ms mantissa) | ||
479 | rts | ||
480 | dnrm_inex: | ||
481 | orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex | ||
482 | no_inex: | ||
483 | rts | ||
484 | |||
485 | | | ||
486 | | dnrm_lp --- normalize exponent/mantissa to specified threshold | ||
487 | | | ||
488 | | Input: | ||
489 | | a0 points to the operand to be denormalized | ||
490 | | d0{31:29} initial guard,round,sticky | ||
491 | | d1{15:0} denormalization threshold | ||
492 | | Output: | ||
493 | | a0 points to the denormalized operand | ||
494 | | d0{31:29} final guard,round,sticky | ||
495 | | d1.b inexact flag: all ones means inexact result | ||
496 | | | ||
497 | | The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 | ||
498 | | so that bfext can be used to extract the new low part of the mantissa. | ||
499 | | Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there | ||
500 | | is no LOCAL_GRS scratch word following it on the fsave frame. | ||
501 | | | ||
502 | .global dnrm_lp | ||
503 | dnrm_lp: | ||
504 | movel %d2,-(%sp) |save d2 for temp use | ||
505 | btstb #E3,E_BYTE(%a6) |test for type E3 exception | ||
506 | beqs not_E3 |not type E3 exception | ||
507 | bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky bit | ||
508 | movel #29,%d0 | ||
509 | lsll %d0,%d2 |shift g,r,s to their positions | ||
510 | movel %d2,%d0 | ||
511 | not_E3: | ||
512 | movel (%sp)+,%d2 |restore d2 | ||
513 | movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) | ||
514 | movel %d0,FP_SCR2+LOCAL_GRS(%a6) | ||
515 | movel %d1,%d0 |copy the denorm threshold | ||
516 | subw LOCAL_EX(%a0),%d1 |d1 = threshold - uns exponent | ||
517 | bles no_lp |d1 <= 0 | ||
518 | cmpw #32,%d1 | ||
519 | blts case_1 |0 = d1 < 32 | ||
520 | cmpw #64,%d1 | ||
521 | blts case_2 |32 <= d1 < 64 | ||
522 | bra case_3 |d1 >= 64 | ||
523 | | | ||
524 | | No normalization necessary | ||
525 | | | ||
526 | no_lp: | ||
527 | clrb %d1 |set no inex2 reported | ||
528 | movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s | ||
529 | rts | ||
530 | | | ||
531 | | case (0<d1<32) | ||
532 | | | ||
533 | case_1: | ||
534 | movel %d2,-(%sp) | ||
535 | movew %d0,LOCAL_EX(%a0) |exponent = denorm threshold | ||
536 | movel #32,%d0 | ||
537 | subw %d1,%d0 |d0 = 32 - d1 | ||
538 | bfextu LOCAL_EX(%a0){%d0:#32},%d2 | ||
539 | bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_HI | ||
540 | bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new LOCAL_LO | ||
541 | bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 |d0 = new G,R,S | ||
542 | movel %d2,LOCAL_HI(%a0) |store new LOCAL_HI | ||
543 | movel %d1,LOCAL_LO(%a0) |store new LOCAL_LO | ||
544 | clrb %d1 | ||
545 | bftst %d0{#2:#30} | ||
546 | beqs c1nstky | ||
547 | bsetl #rnd_stky_bit,%d0 | ||
548 | st %d1 | ||
549 | c1nstky: | ||
550 | movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s | ||
551 | andil #0xe0000000,%d2 |clear all but G,R,S | ||
552 | tstl %d2 |test if original G,R,S are clear | ||
553 | beqs grs_clear | ||
554 | orl #0x20000000,%d0 |set sticky bit in d0 | ||
555 | grs_clear: | ||
556 | andil #0xe0000000,%d0 |clear all but G,R,S | ||
557 | movel (%sp)+,%d2 | ||
558 | rts | ||
559 | | | ||
560 | | case (32<=d1<64) | ||
561 | | | ||
562 | case_2: | ||
563 | movel %d2,-(%sp) | ||
564 | movew %d0,LOCAL_EX(%a0) |unsigned exponent = threshold | ||
565 | subw #32,%d1 |d1 now between 0 and 32 | ||
566 | movel #32,%d0 | ||
567 | subw %d1,%d0 |d0 = 32 - d1 | ||
568 | bfextu LOCAL_EX(%a0){%d0:#32},%d2 | ||
569 | bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_LO | ||
570 | bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new G,R,S | ||
571 | bftst %d1{#2:#30} | ||
572 | bnes c2_sstky |bra if sticky bit to be set | ||
573 | bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32} | ||
574 | bnes c2_sstky |bra if sticky bit to be set | ||
575 | movel %d1,%d0 | ||
576 | clrb %d1 | ||
577 | bras end_c2 | ||
578 | c2_sstky: | ||
579 | movel %d1,%d0 | ||
580 | bsetl #rnd_stky_bit,%d0 | ||
581 | st %d1 | ||
582 | end_c2: | ||
583 | clrl LOCAL_HI(%a0) |store LOCAL_HI = 0 | ||
584 | movel %d2,LOCAL_LO(%a0) |store LOCAL_LO | ||
585 | movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s | ||
586 | andil #0xe0000000,%d2 |clear all but G,R,S | ||
587 | tstl %d2 |test if original G,R,S are clear | ||
588 | beqs clear_grs | ||
589 | orl #0x20000000,%d0 |set sticky bit in d0 | ||
590 | clear_grs: | ||
591 | andil #0xe0000000,%d0 |get rid of all but G,R,S | ||
592 | movel (%sp)+,%d2 | ||
593 | rts | ||
594 | | | ||
595 | | d1 >= 64 Force the exponent to be the denorm threshold with the | ||
596 | | correct sign. | ||
597 | | | ||
598 | case_3: | ||
599 | movew %d0,LOCAL_EX(%a0) | ||
600 | tstw LOCAL_SGN(%a0) | ||
601 | bges c3con | ||
602 | c3neg: | ||
603 | orl #0x80000000,LOCAL_EX(%a0) | ||
604 | c3con: | ||
605 | cmpw #64,%d1 | ||
606 | beqs sixty_four | ||
607 | cmpw #65,%d1 | ||
608 | beqs sixty_five | ||
609 | | | ||
610 | | Shift value is out of range. Set d1 for inex2 flag and | ||
611 | | return a zero with the given threshold. | ||
612 | | | ||
613 | clrl LOCAL_HI(%a0) | ||
614 | clrl LOCAL_LO(%a0) | ||
615 | movel #0x20000000,%d0 | ||
616 | st %d1 | ||
617 | rts | ||
618 | |||
619 | sixty_four: | ||
620 | movel LOCAL_HI(%a0),%d0 | ||
621 | bfextu %d0{#2:#30},%d1 | ||
622 | andil #0xc0000000,%d0 | ||
623 | bras c3com | ||
624 | |||
625 | sixty_five: | ||
626 | movel LOCAL_HI(%a0),%d0 | ||
627 | bfextu %d0{#1:#31},%d1 | ||
628 | andil #0x80000000,%d0 | ||
629 | lsrl #1,%d0 |shift high bit into R bit | ||
630 | |||
631 | c3com: | ||
632 | tstl %d1 | ||
633 | bnes c3ssticky | ||
634 | tstl LOCAL_LO(%a0) | ||
635 | bnes c3ssticky | ||
636 | tstb FP_SCR2+LOCAL_GRS(%a6) | ||
637 | bnes c3ssticky | ||
638 | clrb %d1 | ||
639 | bras c3end | ||
640 | |||
641 | c3ssticky: | ||
642 | bsetl #rnd_stky_bit,%d0 | ||
643 | st %d1 | ||
644 | c3end: | ||
645 | clrl LOCAL_HI(%a0) | ||
646 | clrl LOCAL_LO(%a0) | ||
647 | rts | ||
648 | |||
649 | |end | ||