Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/m68k/fpsp040/round.S
1 files changed, 649 insertions, 0 deletions
diff --git a/arch/m68k/fpsp040/round.S b/arch/m68k/fpsp040/round.S
new file mode 100644
index 000000000000..00f98068783f
--- /dev/null
+++ b/arch/m68k/fpsp040/round.S
@@ -0,0 +1,649 @@
+|
+|       round.sa 3.4 7/29/91
+|
+|       handle rounding and normalization tasks
+|
+|
+|
+|               Copyright (C) Motorola, Inc. 1990
+|                       All Rights Reserved
+|
+|       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|       The copyright notice above does not evidence any
+|       actual or intended publication of such source code.
+|ROUND  idnt    2,1 | Motorola 040 Floating Point Software Package
+        |section        8
+#include "fpsp.h"
+|
+|       round --- round result according to precision/mode
+|
+|       a0 points to the input operand in the internal extended format
+|       d1(high word) contains rounding precision:
+|               ext = $0000xxxx
+|               sgl = $0001xxxx
+|               dbl = $0002xxxx
+|       d1(low word) contains rounding mode:
+|               RN  = $xxxx0000
+|               RZ  = $xxxx0001
+|               RM  = $xxxx0010
+|               RP  = $xxxx0011
+|       d0{31:29} contains the g,r,s bits (extended)
+|
+|       On return the value pointed to by a0 is correctly rounded,
+|       a0 is preserved and the g-r-s bits in d0 are cleared.
+|       The result is not typed - the tag field is invalid.  The
+|       result is still in the internal extended format.
+|
+|       The INEX bit of USER_FPSR will be set if the rounded result was
+|       inexact (i.e. if any of the g-r-s bits were set).
+|
+        .global round
+round:
+| If g=r=s=0 then result is exact and round is done, else set
+| the inex flag in status reg and continue.
+|
+        bsrs    ext_grs                 |this subroutine looks at the
+|                                       :rounding precision and sets
+|                                       ;the appropriate g-r-s bits.
+        tstl    %d0                     |if grs are zero, go force
+        bne     rnd_cont                |lower bits to zero for size
+        swap    %d1                     |set up d1.w for round prec.
+        bra     truncate
+rnd_cont:
+|
+| Use rounding mode as an index into a jump table for these modes.
+|
+        orl     #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
+        lea     mode_tab,%a1
+        movel   (%a1,%d1.w*4),%a1
+        jmp     (%a1)
+|
+| Jump table indexed by rounding mode in d1.w.  All following assumes
+| grs != 0.
+|
+mode_tab:
+        .long   rnd_near
+        .long   rnd_zero
+        .long   rnd_mnus
+        .long   rnd_plus
+|
+|       ROUND PLUS INFINITY
+|
+|       If sign of fp number = 0 (positive), then add 1 to l.
+|
+rnd_plus:
+        swap    %d1                     |set up d1 for round prec.
+        tstb    LOCAL_SGN(%a0)          |check for sign
+        bmi     truncate                |if positive then truncate
+        movel   #0xffffffff,%d0         |force g,r,s to be all f's
+        lea     add_to_l,%a1
+        movel   (%a1,%d1.w*4),%a1
+        jmp     (%a1)
+|
+|       ROUND MINUS INFINITY
+|
+|       If sign of fp number = 1 (negative), then add 1 to l.
+|
+rnd_mnus:
+        swap    %d1                     |set up d1 for round prec.
+        tstb    LOCAL_SGN(%a0)          |check for sign
+        bpl     truncate                |if negative then truncate
+        movel   #0xffffffff,%d0         |force g,r,s to be all f's
+        lea     add_to_l,%a1
+        movel   (%a1,%d1.w*4),%a1
+        jmp     (%a1)
+|
+|       ROUND ZERO
+|
+|       Always truncate.
+rnd_zero:
+        swap    %d1                     |set up d1 for round prec.
+        bra     truncate
+|
+|
+|       ROUND NEAREST
+|
+|       If (g=1), then add 1 to l and if (r=s=0), then clear l
+|       Note that this will round to even in case of a tie.
+|
+rnd_near:
+        swap    %d1                     |set up d1 for round prec.
+        asll    #1,%d0                  |shift g-bit to c-bit
+        bcc     truncate                |if (g=1) then
+        lea     add_to_l,%a1
+        movel   (%a1,%d1.w*4),%a1
+        jmp     (%a1)
+|
+|       ext_grs --- extract guard, round and sticky bits
+|
+| Input:        d1 =            PREC:ROUND
+| Output:       d0{31:29}=      guard, round, sticky
+|
+| The ext_grs extract the guard/round/sticky bits according to the
+| selected rounding precision. It is called by the round subroutine
+| only.  All registers except d0 are kept intact. d0 becomes an
+| updated guard,round,sticky in d0{31:29}
+|
+| Notes: the ext_grs uses the round PREC, and therefore has to swap d1
+|        prior to usage, and needs to restore d1 to original.
+|
+ext_grs:
+        swap    %d1                     |have d1.w point to round precision
+        cmpiw   #0,%d1
+        bnes    sgl_or_dbl
+        bras    end_ext_grs
+sgl_or_dbl:
+        moveml  %d2/%d3,-(%a7)          |make some temp registers
+        cmpiw   #1,%d1
+        bnes    grs_dbl
+grs_sgl:
+        bfextu  LOCAL_HI(%a0){#24:#2},%d3       |sgl prec. g-r are 2 bits right
+        movel   #30,%d2                 |of the sgl prec. limits
+        lsll    %d2,%d3                 |shift g-r bits to MSB of d3
+        movel   LOCAL_HI(%a0),%d2               |get word 2 for s-bit test
+        andil   #0x0000003f,%d2         |s bit is the or of all other
+        bnes    st_stky                 |bits to the right of g-r
+        tstl    LOCAL_LO(%a0)           |test lower mantissa
+        bnes    st_stky                 |if any are set, set sticky
+        tstl    %d0                     |test original g,r,s
+        bnes    st_stky                 |if any are set, set sticky
+        bras    end_sd                  |if words 3 and 4 are clr, exit
+grs_dbl:
+        bfextu  LOCAL_LO(%a0){#21:#2},%d3       |dbl-prec. g-r are 2 bits right
+        movel   #30,%d2                 |of the dbl prec. limits
+        lsll    %d2,%d3                 |shift g-r bits to the MSB of d3
+        movel   LOCAL_LO(%a0),%d2               |get lower mantissa  for s-bit test
+        andil   #0x000001ff,%d2         |s bit is the or-ing of all
+        bnes    st_stky                 |other bits to the right of g-r
+        tstl    %d0                     |test word original g,r,s
+        bnes    st_stky                 |if any are set, set sticky
+        bras    end_sd                  |if clear, exit
+st_stky:
+        bset    #rnd_stky_bit,%d3
+end_sd:
+        movel   %d3,%d0                 |return grs to d0
+        moveml  (%a7)+,%d2/%d3          |restore scratch registers
+end_ext_grs:
+        swap    %d1                     |restore d1 to original
+        rts
+|*******************  Local Equates
+        .set    ad_1_sgl,0x00000100     |  constant to add 1 to l-bit in sgl prec
+        .set    ad_1_dbl,0x00000800     |  constant to add 1 to l-bit in dbl prec
+|Jump table for adding 1 to the l-bit indexed by rnd prec
+add_to_l:
+        .long   add_ext
+        .long   add_sgl
+        .long   add_dbl
+        .long   add_dbl
+|
+|       ADD SINGLE
+|
+add_sgl:
+        addl    #ad_1_sgl,LOCAL_HI(%a0)
+        bccs    scc_clr                 |no mantissa overflow
+        roxrw  LOCAL_HI(%a0)            |shift v-bit back in
+        roxrw  LOCAL_HI+2(%a0)          |shift v-bit back in
+        addw    #0x1,LOCAL_EX(%a0)      |and incr exponent
+scc_clr:
+        tstl    %d0                     |test for rs = 0
+        bnes    sgl_done
+        andiw  #0xfe00,LOCAL_HI+2(%a0)  |clear the l-bit
+sgl_done:
+        andil   #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit
+        clrl    LOCAL_LO(%a0)           |clear d2
+        rts
+|
+|       ADD EXTENDED
+|
+add_ext:
+        addql  #1,LOCAL_LO(%a0)         |add 1 to l-bit
+        bccs    xcc_clr                 |test for carry out
+        addql  #1,LOCAL_HI(%a0)         |propagate carry
+        bccs    xcc_clr
+        roxrw  LOCAL_HI(%a0)            |mant is 0 so restore v-bit
+        roxrw  LOCAL_HI+2(%a0)          |mant is 0 so restore v-bit
+        roxrw   LOCAL_LO(%a0)
+        roxrw   LOCAL_LO+2(%a0)
+        addw    #0x1,LOCAL_EX(%a0)      |and inc exp
+xcc_clr:
+        tstl    %d0                     |test rs = 0
+        bnes    add_ext_done
+        andib   #0xfe,LOCAL_LO+3(%a0)   |clear the l bit
+add_ext_done:
+        rts
+|
+|       ADD DOUBLE
+|
+add_dbl:
+        addl    #ad_1_dbl,LOCAL_LO(%a0)
+        bccs    dcc_clr
+        addql   #1,LOCAL_HI(%a0)                |propagate carry
+        bccs    dcc_clr
+        roxrw   LOCAL_HI(%a0)           |mant is 0 so restore v-bit
+        roxrw   LOCAL_HI+2(%a0)         |mant is 0 so restore v-bit
+        roxrw   LOCAL_LO(%a0)
+        roxrw   LOCAL_LO+2(%a0)
+        addw    #0x1,LOCAL_EX(%a0)      |incr exponent
+dcc_clr:
+        tstl    %d0                     |test for rs = 0
+        bnes    dbl_done
+        andiw   #0xf000,LOCAL_LO+2(%a0) |clear the l-bit
+dbl_done:
+        andil   #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit
+        rts
+error:
+        rts
+|
+| Truncate all other bits
+|
+trunct:
+        .long   end_rnd
+        .long   sgl_done
+        .long   dbl_done
+        .long   dbl_done
+truncate:
+        lea     trunct,%a1
+        movel   (%a1,%d1.w*4),%a1
+        jmp     (%a1)
+end_rnd:
+        rts
+|
+|       NORMALIZE
+|
+| These routines (nrm_zero & nrm_set) normalize the unnorm.  This
+| is done by shifting the mantissa left while decrementing the
+| exponent.
+|
+| NRM_SET shifts and decrements until there is a 1 set in the integer
+| bit of the mantissa (msb in d1).
+|
+| NRM_ZERO shifts and decrements until there is a 1 set in the integer
+| bit of the mantissa (msb in d1) unless this would mean the exponent
+| would go less than 0.  In that case the number becomes a denorm - the
+| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
+| normalized.
+|
+| Note that both routines have been optimized (for the worst case) and
+| therefore do not have the easy to follow decrement/shift loop.
+|
+|       NRM_ZERO
+|
+|       Distance to first 1 bit in mantissa = X
+|       Distance to 0 from exponent = Y
+|       If X < Y
+|       Then
+|         nrm_set
+|       Else
+|         shift mantissa by Y
+|         set exponent = 0
+|
+|input:
+|       FP_SCR1 = exponent, ms mantissa part, ls mantissa part
+|output:
+|       L_SCR1{4} = fpte15 or ete15 bit
+|
+        .global nrm_zero
+nrm_zero:
+        movew   LOCAL_EX(%a0),%d0
+        cmpw   #64,%d0          |see if exp > 64
+        bmis    d0_less
+        bsr     nrm_set         |exp > 64 so exp won't exceed 0
+        rts
+d0_less:
+        moveml  %d2/%d3/%d5/%d6,-(%a7)
+        movel   LOCAL_HI(%a0),%d1
+        movel   LOCAL_LO(%a0),%d2
+        bfffo   %d1{#0:#32},%d3 |get the distance to the first 1
+|                               ;in ms mant
+        beqs    ms_clr          |branch if no bits were set
+        cmpw    %d3,%d0         |of X>Y
+        bmis    greater         |then exp will go past 0 (neg) if
+|                               ;it is just shifted
+        bsr     nrm_set         |else exp won't go past 0
+        moveml  (%a7)+,%d2/%d3/%d5/%d6
+        rts
+greater:
+        movel   %d2,%d6         |save ls mant in d6
+        lsll    %d0,%d2         |shift ls mant by count
+        lsll    %d0,%d1         |shift ms mant by count
+        movel   #32,%d5
+        subl    %d0,%d5         |make op a denorm by shifting bits
+        lsrl    %d5,%d6         |by the number in the exp, then
+|                               ;set exp = 0.
+        orl     %d6,%d1         |shift the ls mant bits into the ms mant
+        movel   #0,%d0          |same as if decremented exp to 0
+|                               ;while shifting
+        movew   %d0,LOCAL_EX(%a0)
+        movel   %d1,LOCAL_HI(%a0)
+        movel   %d2,LOCAL_LO(%a0)
+        moveml  (%a7)+,%d2/%d3/%d5/%d6
+        rts
+ms_clr:
+        bfffo   %d2{#0:#32},%d3 |check if any bits set in ls mant
+        beqs    all_clr         |branch if none set
+        addw    #32,%d3
+        cmpw    %d3,%d0         |if X>Y
+        bmis    greater         |then branch
+        bsr     nrm_set         |else exp won't go past 0
+        moveml  (%a7)+,%d2/%d3/%d5/%d6
+        rts
+all_clr:
+        movew   #0,LOCAL_EX(%a0)        |no mantissa bits set. Set exp = 0.
+        moveml  (%a7)+,%d2/%d3/%d5/%d6
+        rts
+|
+|       NRM_SET
+|
+        .global nrm_set
+nrm_set:
+        movel   %d7,-(%a7)
+        bfffo   LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7)
+        beqs    lower           |branch if ms mant is all 0's
+        movel   %d6,-(%a7)
+        subw    %d7,LOCAL_EX(%a0)       |sub exponent by count
+        movel   LOCAL_HI(%a0),%d0       |d0 has ms mant
+        movel   LOCAL_LO(%a0),%d1 |d1 has ls mant
+        lsll    %d7,%d0         |shift first 1 to j bit position
+        movel   %d1,%d6         |copy ls mant into d6
+        lsll    %d7,%d6         |shift ls mant by count
+        movel   %d6,LOCAL_LO(%a0)       |store ls mant into memory
+        moveql  #32,%d6
+        subl    %d7,%d6         |continue shift
+        lsrl    %d6,%d1         |shift off all bits but those that will
+|                               ;be shifted into ms mant
+        orl     %d1,%d0         |shift the ls mant bits into the ms mant
+        movel   %d0,LOCAL_HI(%a0)       |store ms mant into memory
+        moveml  (%a7)+,%d7/%d6  |restore registers
+        rts
+|
+| We get here if ms mant was = 0, and we assume ls mant has bits
+| set (otherwise this would have been tagged a zero not a denorm).
+|
+lower:
+        movew   LOCAL_EX(%a0),%d0       |d0 has exponent
+        movel   LOCAL_LO(%a0),%d1       |d1 has ls mant
+        subw    #32,%d0         |account for ms mant being all zeros
+        bfffo   %d1{#0:#32},%d7 |find first 1 in ls mant to d7)
+        subw    %d7,%d0         |subtract shift count from exp
+        lsll    %d7,%d1         |shift first 1 to integer bit in ms mant
+        movew   %d0,LOCAL_EX(%a0)       |store ms mant
+        movel   %d1,LOCAL_HI(%a0)       |store exp
+        clrl    LOCAL_LO(%a0)   |clear ls mant
+        movel   (%a7)+,%d7
+        rts
+|
+|       denorm --- denormalize an intermediate result
+|
+|       Used by underflow.
+|
+| Input:
+|       a0       points to the operand to be denormalized
+|                (in the internal extended format)
+|
+|       d0:      rounding precision
+| Output:
+|       a0       points to the denormalized result
+|                (in the internal extended format)
+|
+|       d0      is guard,round,sticky
+|
+| d0 comes into this routine with the rounding precision. It
+| is then loaded with the denormalized exponent threshold for the
+| rounding precision.
+|
+        .global denorm
+denorm:
+        btstb   #6,LOCAL_EX(%a0)        |check for exponents between $7fff-$4000
+        beqs    no_sgn_ext
+        bsetb   #7,LOCAL_EX(%a0)        |sign extend if it is so
+no_sgn_ext:
+        cmpib   #0,%d0          |if 0 then extended precision
+        bnes    not_ext         |else branch
+        clrl    %d1             |load d1 with ext threshold
+        clrl    %d0             |clear the sticky flag
+        bsr     dnrm_lp         |denormalize the number
+        tstb    %d1             |check for inex
+        beq     no_inex         |if clr, no inex
+        bras    dnrm_inex       |if set, set inex
+not_ext:
+        cmpil   #1,%d0          |if 1 then single precision
+        beqs    load_sgl        |else must be 2, double prec
+load_dbl:
+        movew   #dbl_thresh,%d1 |put copy of threshold in d1
+        movel   %d1,%d0         |copy d1 into d0
+        subw    LOCAL_EX(%a0),%d0       |diff = threshold - exp
+        cmpw    #67,%d0         |if diff > 67 (mant + grs bits)
+        bpls    chk_stky        |then branch (all bits would be
+|                               ; shifted off in denorm routine)
+        clrl    %d0             |else clear the sticky flag
+        bsr     dnrm_lp         |denormalize the number
+        tstb    %d1             |check flag
+        beqs    no_inex         |if clr, no inex
+        bras    dnrm_inex       |if set, set inex
+load_sgl:
+        movew   #sgl_thresh,%d1 |put copy of threshold in d1
+        movel   %d1,%d0         |copy d1 into d0
+        subw    LOCAL_EX(%a0),%d0       |diff = threshold - exp
+        cmpw    #67,%d0         |if diff > 67 (mant + grs bits)
+        bpls    chk_stky        |then branch (all bits would be
+|                               ; shifted off in denorm routine)
+        clrl    %d0             |else clear the sticky flag
+        bsr     dnrm_lp         |denormalize the number
+        tstb    %d1             |check flag
+        beqs    no_inex         |if clr, no inex
+        bras    dnrm_inex       |if set, set inex
+chk_stky:
+        tstl    LOCAL_HI(%a0)   |check for any bits set
+        bnes    set_stky
+        tstl    LOCAL_LO(%a0)   |check for any bits set
+        bnes    set_stky
+        bras    clr_mant
+set_stky:
+        orl     #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
+        movel   #0x20000000,%d0 |set sticky bit in return value
+clr_mant:
+        movew   %d1,LOCAL_EX(%a0)               |load exp with threshold
+        movel   #0,LOCAL_HI(%a0)        |set d1 = 0 (ms mantissa)
+        movel   #0,LOCAL_LO(%a0)                |set d2 = 0 (ms mantissa)
+        rts
+dnrm_inex:
+        orl     #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
+no_inex:
+        rts
+|
+|       dnrm_lp --- normalize exponent/mantissa to specified threshold
+|
+| Input:
+|       a0              points to the operand to be denormalized
+|       d0{31:29}       initial guard,round,sticky
+|       d1{15:0}        denormalization threshold
+| Output:
+|       a0              points to the denormalized operand
+|       d0{31:29}       final guard,round,sticky
+|       d1.b            inexact flag:  all ones means inexact result
+|
+| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
+| so that bfext can be used to extract the new low part of the mantissa.
+| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
+| is no LOCAL_GRS scratch word following it on the fsave frame.
+|
+        .global dnrm_lp
+dnrm_lp:
+        movel   %d2,-(%sp)              |save d2 for temp use
+        btstb   #E3,E_BYTE(%a6)         |test for type E3 exception
+        beqs    not_E3                  |not type E3 exception
+        bfextu  WBTEMP_GRS(%a6){#6:#3},%d2      |extract guard,round, sticky  bit
+        movel   #29,%d0
+        lsll    %d0,%d2                 |shift g,r,s to their positions
+        movel   %d2,%d0
+not_E3:
+        movel   (%sp)+,%d2              |restore d2
+        movel   LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
+        movel   %d0,FP_SCR2+LOCAL_GRS(%a6)
+        movel   %d1,%d0                 |copy the denorm threshold
+        subw    LOCAL_EX(%a0),%d1               |d1 = threshold - uns exponent
+        bles    no_lp                   |d1 <= 0
+        cmpw    #32,%d1
+        blts    case_1                  |0 = d1 < 32
+        cmpw    #64,%d1
+        blts    case_2                  |32 <= d1 < 64
+        bra     case_3                  |d1 >= 64
+|
+| No normalization necessary
+|
+no_lp:
+        clrb    %d1                     |set no inex2 reported
+        movel   FP_SCR2+LOCAL_GRS(%a6),%d0      |restore original g,r,s
+        rts
+|
+| case (0<d1<32)
+|
+case_1:
+        movel   %d2,-(%sp)
+        movew   %d0,LOCAL_EX(%a0)               |exponent = denorm threshold
+        movel   #32,%d0
+        subw    %d1,%d0                 |d0 = 32 - d1
+        bfextu  LOCAL_EX(%a0){%d0:#32},%d2
+        bfextu  %d2{%d1:%d0},%d2                |d2 = new LOCAL_HI
+        bfextu  LOCAL_HI(%a0){%d0:#32},%d1      |d1 = new LOCAL_LO
+        bfextu  FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0      |d0 = new G,R,S
+        movel   %d2,LOCAL_HI(%a0)               |store new LOCAL_HI
+        movel   %d1,LOCAL_LO(%a0)               |store new LOCAL_LO
+        clrb    %d1
+        bftst   %d0{#2:#30}
+        beqs    c1nstky
+        bsetl   #rnd_stky_bit,%d0
+        st      %d1
+c1nstky:
+        movel   FP_SCR2+LOCAL_GRS(%a6),%d2      |restore original g,r,s
+        andil   #0xe0000000,%d2         |clear all but G,R,S
+        tstl    %d2                     |test if original G,R,S are clear
+        beqs    grs_clear
+        orl     #0x20000000,%d0         |set sticky bit in d0
+grs_clear:
+        andil   #0xe0000000,%d0         |clear all but G,R,S
+        movel   (%sp)+,%d2
+        rts
+|
+| case (32<=d1<64)
+|
+case_2:
+        movel   %d2,-(%sp)
+        movew   %d0,LOCAL_EX(%a0)               |unsigned exponent = threshold
+        subw    #32,%d1                 |d1 now between 0 and 32
+        movel   #32,%d0
+        subw    %d1,%d0                 |d0 = 32 - d1
+        bfextu  LOCAL_EX(%a0){%d0:#32},%d2
+        bfextu  %d2{%d1:%d0},%d2                |d2 = new LOCAL_LO
+        bfextu  LOCAL_HI(%a0){%d0:#32},%d1      |d1 = new G,R,S
+        bftst   %d1{#2:#30}
+        bnes    c2_sstky                |bra if sticky bit to be set
+        bftst   FP_SCR2+LOCAL_LO(%a6){%d0:#32}
+        bnes    c2_sstky                |bra if sticky bit to be set
+        movel   %d1,%d0
+        clrb    %d1
+        bras    end_c2
+c2_sstky:
+        movel   %d1,%d0
+        bsetl   #rnd_stky_bit,%d0
+        st      %d1
+end_c2:
+        clrl    LOCAL_HI(%a0)           |store LOCAL_HI = 0
+        movel   %d2,LOCAL_LO(%a0)               |store LOCAL_LO
+        movel   FP_SCR2+LOCAL_GRS(%a6),%d2      |restore original g,r,s
+        andil   #0xe0000000,%d2         |clear all but G,R,S
+        tstl    %d2                     |test if original G,R,S are clear
+        beqs    clear_grs
+        orl     #0x20000000,%d0         |set sticky bit in d0
+clear_grs:
+        andil   #0xe0000000,%d0         |get rid of all but G,R,S
+        movel   (%sp)+,%d2
+        rts
+|
+| d1 >= 64 Force the exponent to be the denorm threshold with the
+| correct sign.
+|
+case_3:
+        movew   %d0,LOCAL_EX(%a0)
+        tstw    LOCAL_SGN(%a0)
+        bges    c3con
+c3neg:
+        orl     #0x80000000,LOCAL_EX(%a0)
+c3con:
+        cmpw    #64,%d1
+        beqs    sixty_four
+        cmpw    #65,%d1
+        beqs    sixty_five
+|
+| Shift value is out of range.  Set d1 for inex2 flag and
+| return a zero with the given threshold.
+|
+        clrl    LOCAL_HI(%a0)
+        clrl    LOCAL_LO(%a0)
+        movel   #0x20000000,%d0
+        st      %d1
+        rts
+sixty_four:
+        movel   LOCAL_HI(%a0),%d0
+        bfextu  %d0{#2:#30},%d1
+        andil   #0xc0000000,%d0
+        bras    c3com
+sixty_five:
+        movel   LOCAL_HI(%a0),%d0
+        bfextu  %d0{#1:#31},%d1
+        andil   #0x80000000,%d0
+        lsrl    #1,%d0                  |shift high bit into R bit
+c3com:
+        tstl    %d1
+        bnes    c3ssticky
+        tstl    LOCAL_LO(%a0)
+        bnes    c3ssticky
+        tstb    FP_SCR2+LOCAL_GRS(%a6)
+        bnes    c3ssticky
+        clrb    %d1
+        bras    c3end
+c3ssticky:
+        bsetl   #rnd_stky_bit,%d0
+        st      %d1
+c3end:
+        clrl    LOCAL_HI(%a0)
+        clrl    LOCAL_LO(%a0)
+        rts
+        |end
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/m68k/fpsp040/round.S

diff --git a/arch/m68k/fpsp040/round.S b/arch/m68k/fpsp040/round.S new file mode 100644 index 000000000000..00f98068783f --- /dev/null +++ b/arch/m68k/fpsp040/round.S
@@ -0,0 +1,649 @@
	1	\|
	2	\| round.sa 3.4 7/29/91
	3	\|
	4	\| handle rounding and normalization tasks
	5	\|
	6	\|
	7	\|
	8	\| Copyright (C) Motorola, Inc. 1990
	9	\| All Rights Reserved
	10	\|
	11	\| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
	12	\| The copyright notice above does not evidence any
	13	\| actual or intended publication of such source code.
	14
	15	\|ROUND idnt 2,1 \| Motorola 040 Floating Point Software Package
	16
	17	\|section 8
	18
	19	#include "fpsp.h"
	20
	21	\|
	22	\| round --- round result according to precision/mode
	23	\|
	24	\| a0 points to the input operand in the internal extended format
	25	\| d1(high word) contains rounding precision:
	26	\| ext = $0000xxxx
	27	\| sgl = $0001xxxx
	28	\| dbl = $0002xxxx
	29	\| d1(low word) contains rounding mode:
	30	\| RN = $xxxx0000
	31	\| RZ = $xxxx0001
	32	\| RM = $xxxx0010
	33	\| RP = $xxxx0011
	34	\| d0{31:29} contains the g,r,s bits (extended)
	35	\|
	36	\| On return the value pointed to by a0 is correctly rounded,
	37	\| a0 is preserved and the g-r-s bits in d0 are cleared.
	38	\| The result is not typed - the tag field is invalid. The
	39	\| result is still in the internal extended format.
	40	\|
	41	\| The INEX bit of USER_FPSR will be set if the rounded result was
	42	\| inexact (i.e. if any of the g-r-s bits were set).
	43	\|
	44
	45	.global round
	46	round:
	47	\| If g=r=s=0 then result is exact and round is done, else set
	48	\| the inex flag in status reg and continue.
	49	\|
	50	bsrs ext_grs \|this subroutine looks at the
	51	\| :rounding precision and sets
	52	\| ;the appropriate g-r-s bits.
	53	tstl %d0 \|if grs are zero, go force
	54	bne rnd_cont \|lower bits to zero for size
	55
	56	swap %d1 \|set up d1.w for round prec.
	57	bra truncate
	58
	59	rnd_cont:
	60	\|
	61	\| Use rounding mode as an index into a jump table for these modes.
	62	\|
	63	orl #inx2a_mask,USER_FPSR(%a6) \|set inex2/ainex
	64	lea mode_tab,%a1
	65	movel (%a1,%d1.w*4),%a1
	66	jmp (%a1)
	67	\|
	68	\| Jump table indexed by rounding mode in d1.w. All following assumes
	69	\| grs != 0.
	70	\|
	71	mode_tab:
	72	.long rnd_near
	73	.long rnd_zero
	74	.long rnd_mnus
	75	.long rnd_plus
	76	\|
	77	\| ROUND PLUS INFINITY
	78	\|
	79	\| If sign of fp number = 0 (positive), then add 1 to l.
	80	\|
	81	rnd_plus:
	82	swap %d1 \|set up d1 for round prec.
	83	tstb LOCAL_SGN(%a0) \|check for sign
	84	bmi truncate \|if positive then truncate
	85	movel #0xffffffff,%d0 \|force g,r,s to be all f's
	86	lea add_to_l,%a1
	87	movel (%a1,%d1.w*4),%a1
	88	jmp (%a1)
	89	\|
	90	\| ROUND MINUS INFINITY
	91	\|
	92	\| If sign of fp number = 1 (negative), then add 1 to l.
	93	\|
	94	rnd_mnus:
	95	swap %d1 \|set up d1 for round prec.
	96	tstb LOCAL_SGN(%a0) \|check for sign
	97	bpl truncate \|if negative then truncate
	98	movel #0xffffffff,%d0 \|force g,r,s to be all f's
	99	lea add_to_l,%a1
	100	movel (%a1,%d1.w*4),%a1
	101	jmp (%a1)
	102	\|
	103	\| ROUND ZERO
	104	\|
	105	\| Always truncate.
	106	rnd_zero:
	107	swap %d1 \|set up d1 for round prec.
	108	bra truncate
	109	\|
	110	\|
	111	\| ROUND NEAREST
	112	\|
	113	\| If (g=1), then add 1 to l and if (r=s=0), then clear l
	114	\| Note that this will round to even in case of a tie.
	115	\|
	116	rnd_near:
	117	swap %d1 \|set up d1 for round prec.
	118	asll #1,%d0 \|shift g-bit to c-bit
	119	bcc truncate \|if (g=1) then
	120	lea add_to_l,%a1
	121	movel (%a1,%d1.w*4),%a1
	122	jmp (%a1)
	123
	124	\|
	125	\| ext_grs --- extract guard, round and sticky bits
	126	\|
	127	\| Input: d1 = PREC:ROUND
	128	\| Output: d0{31:29}= guard, round, sticky
	129	\|
	130	\| The ext_grs extract the guard/round/sticky bits according to the
	131	\| selected rounding precision. It is called by the round subroutine
	132	\| only. All registers except d0 are kept intact. d0 becomes an
	133	\| updated guard,round,sticky in d0{31:29}
	134	\|
	135	\| Notes: the ext_grs uses the round PREC, and therefore has to swap d1
	136	\| prior to usage, and needs to restore d1 to original.
	137	\|
	138	ext_grs:
	139	swap %d1 \|have d1.w point to round precision
	140	cmpiw #0,%d1
	141	bnes sgl_or_dbl
	142	bras end_ext_grs
	143
	144	sgl_or_dbl:
	145	moveml %d2/%d3,-(%a7) \|make some temp registers
	146	cmpiw #1,%d1
	147	bnes grs_dbl
	148	grs_sgl:
	149	bfextu LOCAL_HI(%a0){#24:#2},%d3 \|sgl prec. g-r are 2 bits right
	150	movel #30,%d2 \|of the sgl prec. limits
	151	lsll %d2,%d3 \|shift g-r bits to MSB of d3
	152	movel LOCAL_HI(%a0),%d2 \|get word 2 for s-bit test
	153	andil #0x0000003f,%d2 \|s bit is the or of all other
	154	bnes st_stky \|bits to the right of g-r
	155	tstl LOCAL_LO(%a0) \|test lower mantissa
	156	bnes st_stky \|if any are set, set sticky
	157	tstl %d0 \|test original g,r,s
	158	bnes st_stky \|if any are set, set sticky
	159	bras end_sd \|if words 3 and 4 are clr, exit
	160	grs_dbl:
	161	bfextu LOCAL_LO(%a0){#21:#2},%d3 \|dbl-prec. g-r are 2 bits right
	162	movel #30,%d2 \|of the dbl prec. limits
	163	lsll %d2,%d3 \|shift g-r bits to the MSB of d3
	164	movel LOCAL_LO(%a0),%d2 \|get lower mantissa for s-bit test
	165	andil #0x000001ff,%d2 \|s bit is the or-ing of all
	166	bnes st_stky \|other bits to the right of g-r
	167	tstl %d0 \|test word original g,r,s
	168	bnes st_stky \|if any are set, set sticky
	169	bras end_sd \|if clear, exit
	170	st_stky:
	171	bset #rnd_stky_bit,%d3
	172	end_sd:
	173	movel %d3,%d0 \|return grs to d0
	174	moveml (%a7)+,%d2/%d3 \|restore scratch registers
	175	end_ext_grs:
	176	swap %d1 \|restore d1 to original
	177	rts
	178
	179	\|******************* Local Equates
	180	.set ad_1_sgl,0x00000100 \| constant to add 1 to l-bit in sgl prec
	181	.set ad_1_dbl,0x00000800 \| constant to add 1 to l-bit in dbl prec
	182
	183
	184	\|Jump table for adding 1 to the l-bit indexed by rnd prec
	185
	186	add_to_l:
	187	.long add_ext
	188	.long add_sgl
	189	.long add_dbl
	190	.long add_dbl
	191	\|
	192	\| ADD SINGLE
	193	\|
	194	add_sgl:
	195	addl #ad_1_sgl,LOCAL_HI(%a0)
	196	bccs scc_clr \|no mantissa overflow
	197	roxrw LOCAL_HI(%a0) \|shift v-bit back in
	198	roxrw LOCAL_HI+2(%a0) \|shift v-bit back in
	199	addw #0x1,LOCAL_EX(%a0) \|and incr exponent
	200	scc_clr:
	201	tstl %d0 \|test for rs = 0
	202	bnes sgl_done
	203	andiw #0xfe00,LOCAL_HI+2(%a0) \|clear the l-bit
	204	sgl_done:
	205	andil #0xffffff00,LOCAL_HI(%a0) \|truncate bits beyond sgl limit
	206	clrl LOCAL_LO(%a0) \|clear d2
	207	rts
	208
	209	\|
	210	\| ADD EXTENDED
	211	\|
	212	add_ext:
	213	addql #1,LOCAL_LO(%a0) \|add 1 to l-bit
	214	bccs xcc_clr \|test for carry out
	215	addql #1,LOCAL_HI(%a0) \|propagate carry
	216	bccs xcc_clr
	217	roxrw LOCAL_HI(%a0) \|mant is 0 so restore v-bit
	218	roxrw LOCAL_HI+2(%a0) \|mant is 0 so restore v-bit
	219	roxrw LOCAL_LO(%a0)
	220	roxrw LOCAL_LO+2(%a0)
	221	addw #0x1,LOCAL_EX(%a0) \|and inc exp
	222	xcc_clr:
	223	tstl %d0 \|test rs = 0
	224	bnes add_ext_done
	225	andib #0xfe,LOCAL_LO+3(%a0) \|clear the l bit
	226	add_ext_done:
	227	rts
	228	\|
	229	\| ADD DOUBLE
	230	\|
	231	add_dbl:
	232	addl #ad_1_dbl,LOCAL_LO(%a0)
	233	bccs dcc_clr
	234	addql #1,LOCAL_HI(%a0) \|propagate carry
	235	bccs dcc_clr
	236	roxrw LOCAL_HI(%a0) \|mant is 0 so restore v-bit
	237	roxrw LOCAL_HI+2(%a0) \|mant is 0 so restore v-bit
	238	roxrw LOCAL_LO(%a0)
	239	roxrw LOCAL_LO+2(%a0)
	240	addw #0x1,LOCAL_EX(%a0) \|incr exponent
	241	dcc_clr:
	242	tstl %d0 \|test for rs = 0
	243	bnes dbl_done
	244	andiw #0xf000,LOCAL_LO+2(%a0) \|clear the l-bit
	245
	246	dbl_done:
	247	andil #0xfffff800,LOCAL_LO(%a0) \|truncate bits beyond dbl limit
	248	rts
	249
	250	error:
	251	rts
	252	\|
	253	\| Truncate all other bits
	254	\|
	255	trunct:
	256	.long end_rnd
	257	.long sgl_done
	258	.long dbl_done
	259	.long dbl_done
	260
	261	truncate:
	262	lea trunct,%a1
	263	movel (%a1,%d1.w*4),%a1
	264	jmp (%a1)
	265
	266	end_rnd:
	267	rts
	268
	269	\|
	270	\| NORMALIZE
	271	\|
	272	\| These routines (nrm_zero & nrm_set) normalize the unnorm. This
	273	\| is done by shifting the mantissa left while decrementing the
	274	\| exponent.
	275	\|
	276	\| NRM_SET shifts and decrements until there is a 1 set in the integer
	277	\| bit of the mantissa (msb in d1).
	278	\|
	279	\| NRM_ZERO shifts and decrements until there is a 1 set in the integer
	280	\| bit of the mantissa (msb in d1) unless this would mean the exponent
	281	\| would go less than 0. In that case the number becomes a denorm - the
	282	\| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
	283	\| normalized.
	284	\|
	285	\| Note that both routines have been optimized (for the worst case) and
	286	\| therefore do not have the easy to follow decrement/shift loop.
	287	\|
	288	\| NRM_ZERO
	289	\|
	290	\| Distance to first 1 bit in mantissa = X
	291	\| Distance to 0 from exponent = Y
	292	\| If X < Y
	293	\| Then
	294	\| nrm_set
	295	\| Else
	296	\| shift mantissa by Y
	297	\| set exponent = 0
	298	\|
	299	\|input:
	300	\| FP_SCR1 = exponent, ms mantissa part, ls mantissa part
	301	\|output:
	302	\| L_SCR1{4} = fpte15 or ete15 bit
	303	\|
	304	.global nrm_zero
	305	nrm_zero:
	306	movew LOCAL_EX(%a0),%d0
	307	cmpw #64,%d0 \|see if exp > 64
	308	bmis d0_less
	309	bsr nrm_set \|exp > 64 so exp won't exceed 0
	310	rts
	311	d0_less:
	312	moveml %d2/%d3/%d5/%d6,-(%a7)
	313	movel LOCAL_HI(%a0),%d1
	314	movel LOCAL_LO(%a0),%d2
	315
	316	bfffo %d1{#0:#32},%d3 \|get the distance to the first 1
	317	\| ;in ms mant
	318	beqs ms_clr \|branch if no bits were set
	319	cmpw %d3,%d0 \|of X>Y
	320	bmis greater \|then exp will go past 0 (neg) if
	321	\| ;it is just shifted
	322	bsr nrm_set \|else exp won't go past 0
	323	moveml (%a7)+,%d2/%d3/%d5/%d6
	324	rts
	325	greater:
	326	movel %d2,%d6 \|save ls mant in d6
	327	lsll %d0,%d2 \|shift ls mant by count
	328	lsll %d0,%d1 \|shift ms mant by count
	329	movel #32,%d5
	330	subl %d0,%d5 \|make op a denorm by shifting bits
	331	lsrl %d5,%d6 \|by the number in the exp, then
	332	\| ;set exp = 0.
	333	orl %d6,%d1 \|shift the ls mant bits into the ms mant
	334	movel #0,%d0 \|same as if decremented exp to 0
	335	\| ;while shifting
	336	movew %d0,LOCAL_EX(%a0)
	337	movel %d1,LOCAL_HI(%a0)
	338	movel %d2,LOCAL_LO(%a0)
	339	moveml (%a7)+,%d2/%d3/%d5/%d6
	340	rts
	341	ms_clr:
	342	bfffo %d2{#0:#32},%d3 \|check if any bits set in ls mant
	343	beqs all_clr \|branch if none set
	344	addw #32,%d3
	345	cmpw %d3,%d0 \|if X>Y
	346	bmis greater \|then branch
	347	bsr nrm_set \|else exp won't go past 0
	348	moveml (%a7)+,%d2/%d3/%d5/%d6
	349	rts
	350	all_clr:
	351	movew #0,LOCAL_EX(%a0) \|no mantissa bits set. Set exp = 0.
	352	moveml (%a7)+,%d2/%d3/%d5/%d6
	353	rts
	354	\|
	355	\| NRM_SET
	356	\|
	357	.global nrm_set
	358	nrm_set:
	359	movel %d7,-(%a7)
	360	bfffo LOCAL_HI(%a0){#0:#32},%d7 \|find first 1 in ms mant to d7)
	361	beqs lower \|branch if ms mant is all 0's
	362
	363	movel %d6,-(%a7)
	364
	365	subw %d7,LOCAL_EX(%a0) \|sub exponent by count
	366	movel LOCAL_HI(%a0),%d0 \|d0 has ms mant
	367	movel LOCAL_LO(%a0),%d1 \|d1 has ls mant
	368
	369	lsll %d7,%d0 \|shift first 1 to j bit position
	370	movel %d1,%d6 \|copy ls mant into d6
	371	lsll %d7,%d6 \|shift ls mant by count
	372	movel %d6,LOCAL_LO(%a0) \|store ls mant into memory
	373	moveql #32,%d6
	374	subl %d7,%d6 \|continue shift
	375	lsrl %d6,%d1 \|shift off all bits but those that will
	376	\| ;be shifted into ms mant
	377	orl %d1,%d0 \|shift the ls mant bits into the ms mant
	378	movel %d0,LOCAL_HI(%a0) \|store ms mant into memory
	379	moveml (%a7)+,%d7/%d6 \|restore registers
	380	rts
	381
	382	\|
	383	\| We get here if ms mant was = 0, and we assume ls mant has bits
	384	\| set (otherwise this would have been tagged a zero not a denorm).
	385	\|
	386	lower:
	387	movew LOCAL_EX(%a0),%d0 \|d0 has exponent
	388	movel LOCAL_LO(%a0),%d1 \|d1 has ls mant
	389	subw #32,%d0 \|account for ms mant being all zeros
	390	bfffo %d1{#0:#32},%d7 \|find first 1 in ls mant to d7)
	391	subw %d7,%d0 \|subtract shift count from exp
	392	lsll %d7,%d1 \|shift first 1 to integer bit in ms mant
	393	movew %d0,LOCAL_EX(%a0) \|store ms mant
	394	movel %d1,LOCAL_HI(%a0) \|store exp
	395	clrl LOCAL_LO(%a0) \|clear ls mant
	396	movel (%a7)+,%d7
	397	rts
	398	\|
	399	\| denorm --- denormalize an intermediate result
	400	\|
	401	\| Used by underflow.
	402	\|
	403	\| Input:
	404	\| a0 points to the operand to be denormalized
	405	\| (in the internal extended format)
	406	\|
	407	\| d0: rounding precision
	408	\| Output:
	409	\| a0 points to the denormalized result
	410	\| (in the internal extended format)
	411	\|
	412	\| d0 is guard,round,sticky
	413	\|
	414	\| d0 comes into this routine with the rounding precision. It
	415	\| is then loaded with the denormalized exponent threshold for the
	416	\| rounding precision.
	417	\|
	418
	419	.global denorm
	420	denorm:
	421	btstb #6,LOCAL_EX(%a0) \|check for exponents between $7fff-$4000
	422	beqs no_sgn_ext
	423	bsetb #7,LOCAL_EX(%a0) \|sign extend if it is so
	424	no_sgn_ext:
	425
	426	cmpib #0,%d0 \|if 0 then extended precision
	427	bnes not_ext \|else branch
	428
	429	clrl %d1 \|load d1 with ext threshold
	430	clrl %d0 \|clear the sticky flag
	431	bsr dnrm_lp \|denormalize the number
	432	tstb %d1 \|check for inex
	433	beq no_inex \|if clr, no inex
	434	bras dnrm_inex \|if set, set inex
	435
	436	not_ext:
	437	cmpil #1,%d0 \|if 1 then single precision
	438	beqs load_sgl \|else must be 2, double prec
	439
	440	load_dbl:
	441	movew #dbl_thresh,%d1 \|put copy of threshold in d1
	442	movel %d1,%d0 \|copy d1 into d0
	443	subw LOCAL_EX(%a0),%d0 \|diff = threshold - exp
	444	cmpw #67,%d0 \|if diff > 67 (mant + grs bits)
	445	bpls chk_stky \|then branch (all bits would be
	446	\| ; shifted off in denorm routine)
	447	clrl %d0 \|else clear the sticky flag
	448	bsr dnrm_lp \|denormalize the number
	449	tstb %d1 \|check flag
	450	beqs no_inex \|if clr, no inex
	451	bras dnrm_inex \|if set, set inex
	452
	453	load_sgl:
	454	movew #sgl_thresh,%d1 \|put copy of threshold in d1
	455	movel %d1,%d0 \|copy d1 into d0
	456	subw LOCAL_EX(%a0),%d0 \|diff = threshold - exp
	457	cmpw #67,%d0 \|if diff > 67 (mant + grs bits)
	458	bpls chk_stky \|then branch (all bits would be
	459	\| ; shifted off in denorm routine)
	460	clrl %d0 \|else clear the sticky flag
	461	bsr dnrm_lp \|denormalize the number
	462	tstb %d1 \|check flag
	463	beqs no_inex \|if clr, no inex
	464	bras dnrm_inex \|if set, set inex
	465
	466	chk_stky:
	467	tstl LOCAL_HI(%a0) \|check for any bits set
	468	bnes set_stky
	469	tstl LOCAL_LO(%a0) \|check for any bits set
	470	bnes set_stky
	471	bras clr_mant
	472	set_stky:
	473	orl #inx2a_mask,USER_FPSR(%a6) \|set inex2/ainex
	474	movel #0x20000000,%d0 \|set sticky bit in return value
	475	clr_mant:
	476	movew %d1,LOCAL_EX(%a0) \|load exp with threshold
	477	movel #0,LOCAL_HI(%a0) \|set d1 = 0 (ms mantissa)
	478	movel #0,LOCAL_LO(%a0) \|set d2 = 0 (ms mantissa)
	479	rts
	480	dnrm_inex:
	481	orl #inx2a_mask,USER_FPSR(%a6) \|set inex2/ainex
	482	no_inex:
	483	rts
	484
	485	\|
	486	\| dnrm_lp --- normalize exponent/mantissa to specified threshold
	487	\|
	488	\| Input:
	489	\| a0 points to the operand to be denormalized
	490	\| d0{31:29} initial guard,round,sticky
	491	\| d1{15:0} denormalization threshold
	492	\| Output:
	493	\| a0 points to the denormalized operand
	494	\| d0{31:29} final guard,round,sticky
	495	\| d1.b inexact flag: all ones means inexact result
	496	\|
	497	\| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
	498	\| so that bfext can be used to extract the new low part of the mantissa.
	499	\| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
	500	\| is no LOCAL_GRS scratch word following it on the fsave frame.
	501	\|
	502	.global dnrm_lp
	503	dnrm_lp:
	504	movel %d2,-(%sp) \|save d2 for temp use
	505	btstb #E3,E_BYTE(%a6) \|test for type E3 exception
	506	beqs not_E3 \|not type E3 exception
	507	bfextu WBTEMP_GRS(%a6){#6:#3},%d2 \|extract guard,round, sticky bit
	508	movel #29,%d0
	509	lsll %d0,%d2 \|shift g,r,s to their positions
	510	movel %d2,%d0
	511	not_E3:
	512	movel (%sp)+,%d2 \|restore d2
	513	movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
	514	movel %d0,FP_SCR2+LOCAL_GRS(%a6)
	515	movel %d1,%d0 \|copy the denorm threshold
	516	subw LOCAL_EX(%a0),%d1 \|d1 = threshold - uns exponent
	517	bles no_lp \|d1 <= 0
	518	cmpw #32,%d1
	519	blts case_1 \|0 = d1 < 32
	520	cmpw #64,%d1
	521	blts case_2 \|32 <= d1 < 64
	522	bra case_3 \|d1 >= 64
	523	\|
	524	\| No normalization necessary
	525	\|
	526	no_lp:
	527	clrb %d1 \|set no inex2 reported
	528	movel FP_SCR2+LOCAL_GRS(%a6),%d0 \|restore original g,r,s
	529	rts
	530	\|
	531	\| case (0<d1<32)
	532	\|
	533	case_1:
	534	movel %d2,-(%sp)
	535	movew %d0,LOCAL_EX(%a0) \|exponent = denorm threshold
	536	movel #32,%d0
	537	subw %d1,%d0 \|d0 = 32 - d1
	538	bfextu LOCAL_EX(%a0){%d0:#32},%d2
	539	bfextu %d2{%d1:%d0},%d2 \|d2 = new LOCAL_HI
	540	bfextu LOCAL_HI(%a0){%d0:#32},%d1 \|d1 = new LOCAL_LO
	541	bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 \|d0 = new G,R,S
	542	movel %d2,LOCAL_HI(%a0) \|store new LOCAL_HI
	543	movel %d1,LOCAL_LO(%a0) \|store new LOCAL_LO
	544	clrb %d1
	545	bftst %d0{#2:#30}
	546	beqs c1nstky
	547	bsetl #rnd_stky_bit,%d0
	548	st %d1
	549	c1nstky:
	550	movel FP_SCR2+LOCAL_GRS(%a6),%d2 \|restore original g,r,s
	551	andil #0xe0000000,%d2 \|clear all but G,R,S
	552	tstl %d2 \|test if original G,R,S are clear
	553	beqs grs_clear
	554	orl #0x20000000,%d0 \|set sticky bit in d0
	555	grs_clear:
	556	andil #0xe0000000,%d0 \|clear all but G,R,S
	557	movel (%sp)+,%d2
	558	rts
	559	\|
	560	\| case (32<=d1<64)
	561	\|
	562	case_2:
	563	movel %d2,-(%sp)
	564	movew %d0,LOCAL_EX(%a0) \|unsigned exponent = threshold
	565	subw #32,%d1 \|d1 now between 0 and 32
	566	movel #32,%d0
	567	subw %d1,%d0 \|d0 = 32 - d1
	568	bfextu LOCAL_EX(%a0){%d0:#32},%d2
	569	bfextu %d2{%d1:%d0},%d2 \|d2 = new LOCAL_LO
	570	bfextu LOCAL_HI(%a0){%d0:#32},%d1 \|d1 = new G,R,S
	571	bftst %d1{#2:#30}
	572	bnes c2_sstky \|bra if sticky bit to be set
	573	bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32}
	574	bnes c2_sstky \|bra if sticky bit to be set
	575	movel %d1,%d0
	576	clrb %d1
	577	bras end_c2
	578	c2_sstky:
	579	movel %d1,%d0
	580	bsetl #rnd_stky_bit,%d0
	581	st %d1
	582	end_c2:
	583	clrl LOCAL_HI(%a0) \|store LOCAL_HI = 0
	584	movel %d2,LOCAL_LO(%a0) \|store LOCAL_LO
	585	movel FP_SCR2+LOCAL_GRS(%a6),%d2 \|restore original g,r,s
	586	andil #0xe0000000,%d2 \|clear all but G,R,S
	587	tstl %d2 \|test if original G,R,S are clear
	588	beqs clear_grs
	589	orl #0x20000000,%d0 \|set sticky bit in d0
	590	clear_grs:
	591	andil #0xe0000000,%d0 \|get rid of all but G,R,S
	592	movel (%sp)+,%d2
	593	rts
	594	\|
	595	\| d1 >= 64 Force the exponent to be the denorm threshold with the
	596	\| correct sign.
	597	\|
	598	case_3:
	599	movew %d0,LOCAL_EX(%a0)
	600	tstw LOCAL_SGN(%a0)
	601	bges c3con
	602	c3neg:
	603	orl #0x80000000,LOCAL_EX(%a0)
	604	c3con:
	605	cmpw #64,%d1
	606	beqs sixty_four
	607	cmpw #65,%d1
	608	beqs sixty_five
	609	\|
	610	\| Shift value is out of range. Set d1 for inex2 flag and
	611	\| return a zero with the given threshold.
	612	\|
	613	clrl LOCAL_HI(%a0)
	614	clrl LOCAL_LO(%a0)
	615	movel #0x20000000,%d0
	616	st %d1
	617	rts
	618
	619	sixty_four:
	620	movel LOCAL_HI(%a0),%d0
	621	bfextu %d0{#2:#30},%d1
	622	andil #0xc0000000,%d0
	623	bras c3com
	624
	625	sixty_five:
	626	movel LOCAL_HI(%a0),%d0
	627	bfextu %d0{#1:#31},%d1
	628	andil #0x80000000,%d0
	629	lsrl #1,%d0 \|shift high bit into R bit
	630
	631	c3com:
	632	tstl %d1
	633	bnes c3ssticky
	634	tstl LOCAL_LO(%a0)
	635	bnes c3ssticky
	636	tstb FP_SCR2+LOCAL_GRS(%a6)
	637	bnes c3ssticky
	638	clrb %d1
	639	bras c3end
	640
	641	c3ssticky:
	642	bsetl #rnd_stky_bit,%d0
	643	st %d1
	644	c3end:
	645	clrl LOCAL_HI(%a0)
	646	clrl LOCAL_LO(%a0)
	647	rts
	648
	649	\|end