i386: move math-emu

Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Thomas Gleixner <tglx@linutronix.de> 2007-10-11 05:16:31 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2007-10-11 05:16:31 -0400
commit: da957e111bb0c189a4a3bf8a00caaecb59ed94ca (patch)
tree: 6916075fdd3e28869dcd3dfa2cf160a74d1cb02e /arch/x86/math-emu/reg_round.S
parent: 2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 (diff)
1 files changed, 708 insertions, 0 deletions
diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S
new file mode 100644
index 000000000000..d1d4e48b4f67
--- /dev/null
+++ b/arch/x86/math-emu/reg_round.S
@@ -0,0 +1,708 @@
+        .file "reg_round.S"
+/*---------------------------------------------------------------------------+
+ |  reg_round.S                                                              |
+ |                                                                           |
+ | Rounding/truncation/etc for FPU basic arithmetic functions.               |
+ |                                                                           |
+ | Copyright (C) 1993,1995,1997                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@suburbia.net               |
+ |                                                                           |
+ | This code has four possible entry points.                                 |
+ | The following must be entered by a jmp instruction:                       |
+ |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
+ |                                                                           |
+ | The FPU_round entry point is intended to be used by C code.               |
+ | From C, call as:                                                          |
+ |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
+ |                                                                           |
+ |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
+ |    one was raised, or -1 on internal error.                               |
+ |                                                                           |
+ | For correct "up" and "down" rounding, the argument must have the correct  |
+ | sign.                                                                     |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------+
+ | Four entry points.                                                        |
+ |                                                                           |
+ | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
+ |  %eax:%ebx  64 bit significand                                            |
+ |  %edx       32 bit extension of the significand                           |
+ |  %edi       pointer to an FPU_REG for the result to be stored             |
+ |  stack      calling function must have set up a C stack frame and         |
+ |             pushed %esi, %edi, and %ebx                                   |
+ |                                                                           |
+ | Needed just for the fpu_reg_round_sqrt entry point:                       |
+ |  %cx  A control word in the same format as the FPU control word.          |
+ | Otherwise, PARAM4 must give such a value.                                 |
+ |                                                                           |
+ |                                                                           |
+ | The significand and its extension are assumed to be exact in the          |
+ | following sense:                                                          |
+ |   If the significand by itself is the exact result then the significand   |
+ |   extension (%edx) must contain 0, otherwise the significand extension    |
+ |   must be non-zero.                                                       |
+ |   If the significand extension is non-zero then the significand is        |
+ |   smaller than the magnitude of the correct exact result by an amount     |
+ |   greater than zero and less than one ls bit of the significand.          |
+ |   The significand extension is only required to have three possible       |
+ |   non-zero values:                                                        |
+ |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
+ |                                 bit smaller than the magnitude of the     |
+ |                                 true exact result.                        |
+ |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
+ |                                 smaller than the magnitude of the true    |
+ |                                 exact result.                             |
+ |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
+ |                                 bit smaller than the magnitude of the     |
+ |                                 true exact result.                        |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------+
+ |  The code in this module has become quite complex, but it should handle   |
+ |  all of the FPU flags which are set at this stage of the basic arithmetic |
+ |  computations.                                                            |
+ |  There are a few rare cases where the results are not set identically to  |
+ |  a real FPU. These require a bit more thought because at this stage the   |
+ |  results of the code here appear to be more consistent...                 |
+ |  This may be changed in a future version.                                 |
+ +---------------------------------------------------------------------------*/
+#include "fpu_emu.h"
+#include "exception.h"
+#include "control_w.h"
+/* Flags for FPU_bits_lost */
+#define LOST_DOWN       $1
+#define LOST_UP         $2
+/* Flags for FPU_denormal */
+#define DENORMAL        $1
+#define UNMASKED_UNDERFLOW $2
+#ifndef NON_REENTRANT_FPU
+/*      Make the code re-entrant by putting
+        local storage on the stack: */
+#define FPU_bits_lost   (%esp)
+#define FPU_denormal    1(%esp)
+#else
+/*      Not re-entrant, so we can gain speed by putting
+        local storage in a static area: */
+.data
+        .align 4,0
+FPU_bits_lost:
+        .byte   0
+FPU_denormal:
+        .byte   0
+#endif /* NON_REENTRANT_FPU */
+.text
+.globl fpu_reg_round
+.globl fpu_Arith_exit
+/* Entry point when called from C */
+ENTRY(FPU_round)
+        pushl   %ebp
+        movl    %esp,%ebp
+        pushl   %esi
+        pushl   %edi
+        pushl   %ebx
+        movl    PARAM1,%edi
+        movl    SIGH(%edi),%eax
+        movl    SIGL(%edi),%ebx
+        movl    PARAM2,%edx
+fpu_reg_round:                  /* Normal entry point */
+        movl    PARAM4,%ecx
+#ifndef NON_REENTRANT_FPU
+        pushl   %ebx            /* adjust the stack pointer */
+#endif /* NON_REENTRANT_FPU */ 
+#ifdef PARANOID
+/* Cannot use this here yet */
+/*      orl     %eax,%eax */
+/*      jns     L_entry_bugged */
+#endif /* PARANOID */
+        cmpw    EXP_UNDER,EXP(%edi)
+        jle     L_Make_denorm                   /* The number is a de-normal */
+        movb    $0,FPU_denormal                 /* 0 -> not a de-normal */
+Denorm_done:
+        movb    $0,FPU_bits_lost                /* No bits yet lost in rounding */
+        movl    %ecx,%esi
+        andl    CW_PC,%ecx
+        cmpl    PR_64_BITS,%ecx
+        je      LRound_To_64
+        cmpl    PR_53_BITS,%ecx
+        je      LRound_To_53
+        cmpl    PR_24_BITS,%ecx
+        je      LRound_To_24
+#ifdef PECULIAR_486
+/* With the precision control bits set to 01 "(reserved)", a real 80486
+   behaves as if the precision control bits were set to 11 "64 bits" */
+        cmpl    PR_RESERVED_BITS,%ecx
+        je      LRound_To_64
+#ifdef PARANOID
+        jmp     L_bugged_denorm_486
+#endif /* PARANOID */ 
+#else
+#ifdef PARANOID
+        jmp     L_bugged_denorm /* There is no bug, just a bad control word */
+#endif /* PARANOID */ 
+#endif /* PECULIAR_486 */
+/* Round etc to 24 bit precision */
+LRound_To_24:
+        movl    %esi,%ecx
+        andl    CW_RC,%ecx
+        cmpl    RC_RND,%ecx
+        je      LRound_nearest_24
+        cmpl    RC_CHOP,%ecx
+        je      LCheck_truncate_24
+        cmpl    RC_UP,%ecx              /* Towards +infinity */
+        je      LUp_24
+        cmpl    RC_DOWN,%ecx            /* Towards -infinity */
+        je      LDown_24
+#ifdef PARANOID
+        jmp     L_bugged_round24
+#endif /* PARANOID */ 
+LUp_24:
+        cmpb    SIGN_POS,PARAM5
+        jne     LCheck_truncate_24      /* If negative then  up==truncate */
+        jmp     LCheck_24_round_up
+LDown_24:
+        cmpb    SIGN_POS,PARAM5
+        je      LCheck_truncate_24      /* If positive then  down==truncate */
+LCheck_24_round_up:
+        movl    %eax,%ecx
+        andl    $0x000000ff,%ecx
+        orl     %ebx,%ecx
+        orl     %edx,%ecx
+        jnz     LDo_24_round_up
+        jmp     L_Re_normalise
+LRound_nearest_24:
+        /* Do rounding of the 24th bit if needed (nearest or even) */
+        movl    %eax,%ecx
+        andl    $0x000000ff,%ecx
+        cmpl    $0x00000080,%ecx
+        jc      LCheck_truncate_24      /* less than half, no increment needed */
+        jne     LGreater_Half_24        /* greater than half, increment needed */
+        /* Possibly half, we need to check the ls bits */
+        orl     %ebx,%ebx
+        jnz     LGreater_Half_24        /* greater than half, increment needed */
+        orl     %edx,%edx
+        jnz     LGreater_Half_24        /* greater than half, increment needed */
+        /* Exactly half, increment only if 24th bit is 1 (round to even) */
+        testl   $0x00000100,%eax
+        jz      LDo_truncate_24
+LGreater_Half_24:                       /* Rounding: increment at the 24th bit */
+LDo_24_round_up:
+        andl    $0xffffff00,%eax        /* Truncate to 24 bits */
+        xorl    %ebx,%ebx
+        movb    LOST_UP,FPU_bits_lost
+        addl    $0x00000100,%eax
+        jmp     LCheck_Round_Overflow
+LCheck_truncate_24:
+        movl    %eax,%ecx
+        andl    $0x000000ff,%ecx
+        orl     %ebx,%ecx
+        orl     %edx,%ecx
+        jz      L_Re_normalise          /* No truncation needed */
+LDo_truncate_24:
+        andl    $0xffffff00,%eax        /* Truncate to 24 bits */
+        xorl    %ebx,%ebx
+        movb    LOST_DOWN,FPU_bits_lost
+        jmp     L_Re_normalise
+/* Round etc to 53 bit precision */
+LRound_To_53:
+        movl    %esi,%ecx
+        andl    CW_RC,%ecx
+        cmpl    RC_RND,%ecx
+        je      LRound_nearest_53
+        cmpl    RC_CHOP,%ecx
+        je      LCheck_truncate_53
+        cmpl    RC_UP,%ecx              /* Towards +infinity */
+        je      LUp_53
+        cmpl    RC_DOWN,%ecx            /* Towards -infinity */
+        je      LDown_53
+#ifdef PARANOID
+        jmp     L_bugged_round53
+#endif /* PARANOID */ 
+LUp_53:
+        cmpb    SIGN_POS,PARAM5
+        jne     LCheck_truncate_53      /* If negative then  up==truncate */
+        jmp     LCheck_53_round_up
+LDown_53:
+        cmpb    SIGN_POS,PARAM5
+        je      LCheck_truncate_53      /* If positive then  down==truncate */
+LCheck_53_round_up:
+        movl    %ebx,%ecx
+        andl    $0x000007ff,%ecx
+        orl     %edx,%ecx
+        jnz     LDo_53_round_up
+        jmp     L_Re_normalise
+LRound_nearest_53:
+        /* Do rounding of the 53rd bit if needed (nearest or even) */
+        movl    %ebx,%ecx
+        andl    $0x000007ff,%ecx
+        cmpl    $0x00000400,%ecx
+        jc      LCheck_truncate_53      /* less than half, no increment needed */
+        jnz     LGreater_Half_53        /* greater than half, increment needed */
+        /* Possibly half, we need to check the ls bits */
+        orl     %edx,%edx
+        jnz     LGreater_Half_53        /* greater than half, increment needed */
+        /* Exactly half, increment only if 53rd bit is 1 (round to even) */
+        testl   $0x00000800,%ebx
+        jz      LTruncate_53
+LGreater_Half_53:                       /* Rounding: increment at the 53rd bit */
+LDo_53_round_up:
+        movb    LOST_UP,FPU_bits_lost
+        andl    $0xfffff800,%ebx        /* Truncate to 53 bits */
+        addl    $0x00000800,%ebx
+        adcl    $0,%eax
+        jmp     LCheck_Round_Overflow
+LCheck_truncate_53:
+        movl    %ebx,%ecx
+        andl    $0x000007ff,%ecx
+        orl     %edx,%ecx
+        jz      L_Re_normalise
+LTruncate_53:
+        movb    LOST_DOWN,FPU_bits_lost
+        andl    $0xfffff800,%ebx        /* Truncate to 53 bits */
+        jmp     L_Re_normalise
+/* Round etc to 64 bit precision */
+LRound_To_64:
+        movl    %esi,%ecx
+        andl    CW_RC,%ecx
+        cmpl    RC_RND,%ecx
+        je      LRound_nearest_64
+        cmpl    RC_CHOP,%ecx
+        je      LCheck_truncate_64
+        cmpl    RC_UP,%ecx              /* Towards +infinity */
+        je      LUp_64
+        cmpl    RC_DOWN,%ecx            /* Towards -infinity */
+        je      LDown_64
+#ifdef PARANOID
+        jmp     L_bugged_round64
+#endif /* PARANOID */ 
+LUp_64:
+        cmpb    SIGN_POS,PARAM5
+        jne     LCheck_truncate_64      /* If negative then  up==truncate */
+        orl     %edx,%edx
+        jnz     LDo_64_round_up
+        jmp     L_Re_normalise
+LDown_64:
+        cmpb    SIGN_POS,PARAM5
+        je      LCheck_truncate_64      /* If positive then  down==truncate */
+        orl     %edx,%edx
+        jnz     LDo_64_round_up
+        jmp     L_Re_normalise
+LRound_nearest_64:
+        cmpl    $0x80000000,%edx
+        jc      LCheck_truncate_64
+        jne     LDo_64_round_up
+        /* Now test for round-to-even */
+        testb   $1,%bl
+        jz      LCheck_truncate_64
+LDo_64_round_up:
+        movb    LOST_UP,FPU_bits_lost
+        addl    $1,%ebx
+        adcl    $0,%eax
+LCheck_Round_Overflow:
+        jnc     L_Re_normalise
+        /* Overflow, adjust the result (significand to 1.0) */
+        rcrl    $1,%eax
+        rcrl    $1,%ebx
+        incw    EXP(%edi)
+        jmp     L_Re_normalise
+LCheck_truncate_64:
+        orl     %edx,%edx
+        jz      L_Re_normalise
+LTruncate_64:
+        movb    LOST_DOWN,FPU_bits_lost
+L_Re_normalise:
+        testb   $0xff,FPU_denormal
+        jnz     Normalise_result
+L_Normalised:
+        movl    TAG_Valid,%edx
+L_deNormalised:
+        cmpb    LOST_UP,FPU_bits_lost
+        je      L_precision_lost_up
+        cmpb    LOST_DOWN,FPU_bits_lost
+        je      L_precision_lost_down
+L_no_precision_loss:
+        /* store the result */
+L_Store_significand:
+        movl    %eax,SIGH(%edi)
+        movl    %ebx,SIGL(%edi)
+        cmpw    EXP_OVER,EXP(%edi)
+        jge     L_overflow
+        movl    %edx,%eax
+        /* Convert the exponent to 80x87 form. */
+        addw    EXTENDED_Ebias,EXP(%edi)
+        andw    $0x7fff,EXP(%edi)
+fpu_reg_round_signed_special_exit:
+        cmpb    SIGN_POS,PARAM5
+        je      fpu_reg_round_special_exit
+        orw     $0x8000,EXP(%edi)       /* Negative sign for the result. */
+fpu_reg_round_special_exit:
+#ifndef NON_REENTRANT_FPU
+        popl    %ebx            /* adjust the stack pointer */
+#endif /* NON_REENTRANT_FPU */ 
+fpu_Arith_exit:
+        popl    %ebx
+        popl    %edi
+        popl    %esi
+        leave
+        ret
+/*
+ * Set the FPU status flags to represent precision loss due to
+ * round-up.
+ */
+L_precision_lost_up:
+        push    %edx
+        push    %eax
+        call    set_precision_flag_up
+        popl    %eax
+        popl    %edx
+        jmp     L_no_precision_loss
+/*
+ * Set the FPU status flags to represent precision loss due to
+ * truncation.
+ */
+L_precision_lost_down:
+        push    %edx
+        push    %eax
+        call    set_precision_flag_down
+        popl    %eax
+        popl    %edx
+        jmp     L_no_precision_loss
+/*
+ * The number is a denormal (which might get rounded up to a normal)
+ * Shift the number right the required number of bits, which will
+ * have to be undone later...
+ */
+L_Make_denorm:
+        /* The action to be taken depends upon whether the underflow
+           exception is masked */
+        testb   CW_Underflow,%cl                /* Underflow mask. */
+        jz      Unmasked_underflow              /* Do not make a denormal. */
+        movb    DENORMAL,FPU_denormal
+        pushl   %ecx            /* Save */
+        movw    EXP_UNDER+1,%cx
+        subw    EXP(%edi),%cx
+        cmpw    $64,%cx /* shrd only works for 0..31 bits */
+        jnc     Denorm_shift_more_than_63
+        cmpw    $32,%cx /* shrd only works for 0..31 bits */
+        jnc     Denorm_shift_more_than_32
+/*
+ * We got here without jumps by assuming that the most common requirement
+ *   is for a small de-normalising shift.
+ * Shift by [1..31] bits
+ */
+        addw    %cx,EXP(%edi)
+        orl     %edx,%edx       /* extension */
+        setne   %ch             /* Save whether %edx is non-zero */
+        xorl    %edx,%edx
+        shrd    %cl,%ebx,%edx
+        shrd    %cl,%eax,%ebx
+        shr     %cl,%eax
+        orb     %ch,%dl
+        popl    %ecx
+        jmp     Denorm_done
+/* Shift by [32..63] bits */
+Denorm_shift_more_than_32:
+        addw    %cx,EXP(%edi)
+        subb    $32,%cl
+        orl     %edx,%edx
+        setne   %ch
+        orb     %ch,%bl
+        xorl    %edx,%edx
+        shrd    %cl,%ebx,%edx
+        shrd    %cl,%eax,%ebx
+        shr     %cl,%eax
+        orl     %edx,%edx               /* test these 32 bits */
+        setne   %cl
+        orb     %ch,%bl
+        orb     %cl,%bl
+        movl    %ebx,%edx
+        movl    %eax,%ebx
+        xorl    %eax,%eax
+        popl    %ecx
+        jmp     Denorm_done
+/* Shift by [64..) bits */
+Denorm_shift_more_than_63:
+        cmpw    $64,%cx
+        jne     Denorm_shift_more_than_64
+/* Exactly 64 bit shift */
+        addw    %cx,EXP(%edi)
+        xorl    %ecx,%ecx
+        orl     %edx,%edx
+        setne   %cl
+        orl     %ebx,%ebx
+        setne   %ch
+        orb     %ch,%cl
+        orb     %cl,%al
+        movl    %eax,%edx
+        xorl    %eax,%eax
+        xorl    %ebx,%ebx
+        popl    %ecx
+        jmp     Denorm_done
+Denorm_shift_more_than_64:
+        movw    EXP_UNDER+1,EXP(%edi)
+/* This is easy, %eax must be non-zero, so.. */
+        movl    $1,%edx
+        xorl    %eax,%eax
+        xorl    %ebx,%ebx
+        popl    %ecx
+        jmp     Denorm_done
+Unmasked_underflow:
+        movb    UNMASKED_UNDERFLOW,FPU_denormal
+        jmp     Denorm_done
+/* Undo the de-normalisation. */
+Normalise_result:
+        cmpb    UNMASKED_UNDERFLOW,FPU_denormal
+        je      Signal_underflow
+/* The number must be a denormal if we got here. */
+#ifdef PARANOID
+        /* But check it... just in case. */
+        cmpw    EXP_UNDER+1,EXP(%edi)
+        jne     L_norm_bugged
+#endif /* PARANOID */
+#ifdef PECULIAR_486
+        /*
+         * This implements a special feature of 80486 behaviour.
+         * Underflow will be signalled even if the number is
+         * not a denormal after rounding.
+         * This difference occurs only for masked underflow, and not
+         * in the unmasked case.
+         * Actual 80486 behaviour differs from this in some circumstances.
+         */
+        orl     %eax,%eax               /* ms bits */
+        js      LPseudoDenormal         /* Will be masked underflow */
+#else
+        orl     %eax,%eax               /* ms bits */
+        js      L_Normalised            /* No longer a denormal */
+#endif /* PECULIAR_486 */ 
+        jnz     LDenormal_adj_exponent
+        orl     %ebx,%ebx
+        jz      L_underflow_to_zero     /* The contents are zero */
+LDenormal_adj_exponent:
+        decw    EXP(%edi)
+LPseudoDenormal:
+        testb   $0xff,FPU_bits_lost     /* bits lost == underflow */
+        movl    TAG_Special,%edx
+        jz      L_deNormalised
+        /* There must be a masked underflow */
+        push    %eax
+        pushl   EX_Underflow
+        call    EXCEPTION
+        popl    %eax
+        popl    %eax
+        movl    TAG_Special,%edx
+        jmp     L_deNormalised
+/*
+ * The operations resulted in a number too small to represent.
+ * Masked response.
+ */
+L_underflow_to_zero:
+        push    %eax
+        call    set_precision_flag_down
+        popl    %eax
+        push    %eax
+        pushl   EX_Underflow
+        call    EXCEPTION
+        popl    %eax
+        popl    %eax
+/* Reduce the exponent to EXP_UNDER */
+        movw    EXP_UNDER,EXP(%edi)
+        movl    TAG_Zero,%edx
+        jmp     L_Store_significand
+/* The operations resulted in a number too large to represent. */
+L_overflow:
+        addw    EXTENDED_Ebias,EXP(%edi)        /* Set for unmasked response. */
+        push    %edi
+        call    arith_overflow
+        pop     %edi
+        jmp     fpu_reg_round_signed_special_exit
+Signal_underflow:
+        /* The number may have been changed to a non-denormal */
+        /* by the rounding operations. */
+        cmpw    EXP_UNDER,EXP(%edi)
+        jle     Do_unmasked_underflow
+        jmp     L_Normalised
+Do_unmasked_underflow:
+        /* Increase the exponent by the magic number */
+        addw    $(3*(1<<13)),EXP(%edi)
+        push    %eax
+        pushl   EX_Underflow
+        call    EXCEPTION
+        popl    %eax
+        popl    %eax
+        jmp     L_Normalised
+#ifdef PARANOID
+#ifdef PECULIAR_486
+L_bugged_denorm_486:
+        pushl   EX_INTERNAL|0x236
+        call    EXCEPTION
+        popl    %ebx
+        jmp     L_exception_exit
+#else
+L_bugged_denorm:
+        pushl   EX_INTERNAL|0x230
+        call    EXCEPTION
+        popl    %ebx
+        jmp     L_exception_exit
+#endif /* PECULIAR_486 */ 
+L_bugged_round24:
+        pushl   EX_INTERNAL|0x231
+        call    EXCEPTION
+        popl    %ebx
+        jmp     L_exception_exit
+L_bugged_round53:
+        pushl   EX_INTERNAL|0x232
+        call    EXCEPTION
+        popl    %ebx
+        jmp     L_exception_exit
+L_bugged_round64:
+        pushl   EX_INTERNAL|0x233
+        call    EXCEPTION
+        popl    %ebx
+        jmp     L_exception_exit
+L_norm_bugged:
+        pushl   EX_INTERNAL|0x234
+        call    EXCEPTION
+        popl    %ebx
+        jmp     L_exception_exit
+L_entry_bugged:
+        pushl   EX_INTERNAL|0x235
+        call    EXCEPTION
+        popl    %ebx
+L_exception_exit:
+        mov     $-1,%eax
+        jmp     fpu_reg_round_special_exit
+#endif /* PARANOID */
author	Thomas Gleixner <tglx@linutronix.de>	2007-10-11 05:16:31 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2007-10-11 05:16:31 -0400
commit	da957e111bb0c189a4a3bf8a00caaecb59ed94ca (patch)
tree	6916075fdd3e28869dcd3dfa2cf160a74d1cb02e /arch/x86/math-emu/reg_round.S
parent	2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 (diff)

diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S new file mode 100644 index 000000000000..d1d4e48b4f67 --- /dev/null +++ b/arch/x86/math-emu/reg_round.S
@@ -0,0 +1,708 @@
	1	.file "reg_round.S"
	2	/*---------------------------------------------------------------------------+
	3	\| reg_round.S \|
	4	\| \|
	5	\| Rounding/truncation/etc for FPU basic arithmetic functions. \|
	6	\| \|
	7	\| Copyright (C) 1993,1995,1997 \|
	8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
	9	\| Australia. E-mail billm@suburbia.net \|
	10	\| \|
	11	\| This code has four possible entry points. \|
	12	\| The following must be entered by a jmp instruction: \|
	13	\| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. \|
	14	\| \|
	15	\| The FPU_round entry point is intended to be used by C code. \|
	16	\| From C, call as: \|
	17	\| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) \|
	18	\| \|
	19	\| Return value is the tag of the answer, or-ed with FPU_Exception if \|
	20	\| one was raised, or -1 on internal error. \|
	21	\| \|
	22	\| For correct "up" and "down" rounding, the argument must have the correct \|
	23	\| sign. \|
	24	\| \|
	25	+---------------------------------------------------------------------------*/
	26
	27	/*---------------------------------------------------------------------------+
	28	\| Four entry points. \|
	29	\| \|
	30	\| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: \|
	31	\| %eax:%ebx 64 bit significand \|
	32	\| %edx 32 bit extension of the significand \|
	33	\| %edi pointer to an FPU_REG for the result to be stored \|
	34	\| stack calling function must have set up a C stack frame and \|
	35	\| pushed %esi, %edi, and %ebx \|
	36	\| \|
	37	\| Needed just for the fpu_reg_round_sqrt entry point: \|
	38	\| %cx A control word in the same format as the FPU control word. \|
	39	\| Otherwise, PARAM4 must give such a value. \|
	40	\| \|
	41	\| \|
	42	\| The significand and its extension are assumed to be exact in the \|
	43	\| following sense: \|
	44	\| If the significand by itself is the exact result then the significand \|
	45	\| extension (%edx) must contain 0, otherwise the significand extension \|
	46	\| must be non-zero. \|
	47	\| If the significand extension is non-zero then the significand is \|
	48	\| smaller than the magnitude of the correct exact result by an amount \|
	49	\| greater than zero and less than one ls bit of the significand. \|
	50	\| The significand extension is only required to have three possible \|
	51	\| non-zero values: \|
	52	\| less than 0x80000000 <=> the significand is less than 1/2 an ls \|
	53	\| bit smaller than the magnitude of the \|
	54	\| true exact result. \|
	55	\| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit \|
	56	\| smaller than the magnitude of the true \|
	57	\| exact result. \|
	58	\| greater than 0x80000000 <=> the significand is more than 1/2 an ls \|
	59	\| bit smaller than the magnitude of the \|
	60	\| true exact result. \|
	61	\| \|
	62	+---------------------------------------------------------------------------*/
	63
	64	/*---------------------------------------------------------------------------+
	65	\| The code in this module has become quite complex, but it should handle \|
	66	\| all of the FPU flags which are set at this stage of the basic arithmetic \|
	67	\| computations. \|
	68	\| There are a few rare cases where the results are not set identically to \|
	69	\| a real FPU. These require a bit more thought because at this stage the \|
	70	\| results of the code here appear to be more consistent... \|
	71	\| This may be changed in a future version. \|
	72	+---------------------------------------------------------------------------*/
	73
	74
	75	#include "fpu_emu.h"
	76	#include "exception.h"
	77	#include "control_w.h"
	78
	79	/* Flags for FPU_bits_lost */
	80	#define LOST_DOWN $1
	81	#define LOST_UP $2
	82
	83	/* Flags for FPU_denormal */
	84	#define DENORMAL $1
	85	#define UNMASKED_UNDERFLOW $2
	86
	87
	88	#ifndef NON_REENTRANT_FPU
	89	/* Make the code re-entrant by putting
	90	local storage on the stack: */
	91	#define FPU_bits_lost (%esp)
	92	#define FPU_denormal 1(%esp)
	93
	94	#else
	95	/* Not re-entrant, so we can gain speed by putting
	96	local storage in a static area: */
	97	.data
	98	.align 4,0
	99	FPU_bits_lost:
	100	.byte 0
	101	FPU_denormal:
	102	.byte 0
	103	#endif /* NON_REENTRANT_FPU */
	104
	105
	106	.text
	107	.globl fpu_reg_round
	108	.globl fpu_Arith_exit
	109
	110	/* Entry point when called from C */
	111	ENTRY(FPU_round)
	112	pushl %ebp
	113	movl %esp,%ebp
	114	pushl %esi
	115	pushl %edi
	116	pushl %ebx
	117
	118	movl PARAM1,%edi
	119	movl SIGH(%edi),%eax
	120	movl SIGL(%edi),%ebx
	121	movl PARAM2,%edx
	122
	123	fpu_reg_round: /* Normal entry point */
	124	movl PARAM4,%ecx
	125
	126	#ifndef NON_REENTRANT_FPU
	127	pushl %ebx /* adjust the stack pointer */
	128	#endif /* NON_REENTRANT_FPU */
	129
	130	#ifdef PARANOID
	131	/* Cannot use this here yet */
	132	/* orl %eax,%eax */
	133	/* jns L_entry_bugged */
	134	#endif /* PARANOID */
	135
	136	cmpw EXP_UNDER,EXP(%edi)
	137	jle L_Make_denorm /* The number is a de-normal */
	138
	139	movb $0,FPU_denormal /* 0 -> not a de-normal */
	140
	141	Denorm_done:
	142	movb $0,FPU_bits_lost /* No bits yet lost in rounding */
	143
	144	movl %ecx,%esi
	145	andl CW_PC,%ecx
	146	cmpl PR_64_BITS,%ecx
	147	je LRound_To_64
	148
	149	cmpl PR_53_BITS,%ecx
	150	je LRound_To_53
	151
	152	cmpl PR_24_BITS,%ecx
	153	je LRound_To_24
	154
	155	#ifdef PECULIAR_486
	156	/* With the precision control bits set to 01 "(reserved)", a real 80486
	157	behaves as if the precision control bits were set to 11 "64 bits" */
	158	cmpl PR_RESERVED_BITS,%ecx
	159	je LRound_To_64
	160	#ifdef PARANOID
	161	jmp L_bugged_denorm_486
	162	#endif /* PARANOID */
	163	#else
	164	#ifdef PARANOID
	165	jmp L_bugged_denorm /* There is no bug, just a bad control word */
	166	#endif /* PARANOID */
	167	#endif /* PECULIAR_486 */
	168
	169
	170	/* Round etc to 24 bit precision */
	171	LRound_To_24:
	172	movl %esi,%ecx
	173	andl CW_RC,%ecx
	174	cmpl RC_RND,%ecx
	175	je LRound_nearest_24
	176
	177	cmpl RC_CHOP,%ecx
	178	je LCheck_truncate_24
	179
	180	cmpl RC_UP,%ecx /* Towards +infinity */
	181	je LUp_24
	182
	183	cmpl RC_DOWN,%ecx /* Towards -infinity */
	184	je LDown_24
	185
	186	#ifdef PARANOID
	187	jmp L_bugged_round24
	188	#endif /* PARANOID */
	189
	190	LUp_24:
	191	cmpb SIGN_POS,PARAM5
	192	jne LCheck_truncate_24 /* If negative then up==truncate */
	193
	194	jmp LCheck_24_round_up
	195
	196	LDown_24:
	197	cmpb SIGN_POS,PARAM5
	198	je LCheck_truncate_24 /* If positive then down==truncate */
	199
	200	LCheck_24_round_up:
	201	movl %eax,%ecx
	202	andl $0x000000ff,%ecx
	203	orl %ebx,%ecx
	204	orl %edx,%ecx
	205	jnz LDo_24_round_up
	206	jmp L_Re_normalise
	207
	208	LRound_nearest_24:
	209	/* Do rounding of the 24th bit if needed (nearest or even) */
	210	movl %eax,%ecx
	211	andl $0x000000ff,%ecx
	212	cmpl $0x00000080,%ecx
	213	jc LCheck_truncate_24 /* less than half, no increment needed */
	214
	215	jne LGreater_Half_24 /* greater than half, increment needed */
	216
	217	/* Possibly half, we need to check the ls bits */
	218	orl %ebx,%ebx
	219	jnz LGreater_Half_24 /* greater than half, increment needed */
	220
	221	orl %edx,%edx
	222	jnz LGreater_Half_24 /* greater than half, increment needed */
	223
	224	/* Exactly half, increment only if 24th bit is 1 (round to even) */
	225	testl $0x00000100,%eax
	226	jz LDo_truncate_24
	227
	228	LGreater_Half_24: /* Rounding: increment at the 24th bit */
	229	LDo_24_round_up:
	230	andl $0xffffff00,%eax /* Truncate to 24 bits */
	231	xorl %ebx,%ebx
	232	movb LOST_UP,FPU_bits_lost
	233	addl $0x00000100,%eax
	234	jmp LCheck_Round_Overflow
	235
	236	LCheck_truncate_24:
	237	movl %eax,%ecx
	238	andl $0x000000ff,%ecx
	239	orl %ebx,%ecx
	240	orl %edx,%ecx
	241	jz L_Re_normalise /* No truncation needed */
	242
	243	LDo_truncate_24:
	244	andl $0xffffff00,%eax /* Truncate to 24 bits */
	245	xorl %ebx,%ebx
	246	movb LOST_DOWN,FPU_bits_lost
	247	jmp L_Re_normalise
	248
	249
	250	/* Round etc to 53 bit precision */
	251	LRound_To_53:
	252	movl %esi,%ecx
	253	andl CW_RC,%ecx
	254	cmpl RC_RND,%ecx
	255	je LRound_nearest_53
	256
	257	cmpl RC_CHOP,%ecx
	258	je LCheck_truncate_53
	259
	260	cmpl RC_UP,%ecx /* Towards +infinity */
	261	je LUp_53
	262
	263	cmpl RC_DOWN,%ecx /* Towards -infinity */
	264	je LDown_53
	265
	266	#ifdef PARANOID
	267	jmp L_bugged_round53
	268	#endif /* PARANOID */
	269
	270	LUp_53:
	271	cmpb SIGN_POS,PARAM5
	272	jne LCheck_truncate_53 /* If negative then up==truncate */
	273
	274	jmp LCheck_53_round_up
	275
	276	LDown_53:
	277	cmpb SIGN_POS,PARAM5
	278	je LCheck_truncate_53 /* If positive then down==truncate */
	279
	280	LCheck_53_round_up:
	281	movl %ebx,%ecx
	282	andl $0x000007ff,%ecx
	283	orl %edx,%ecx
	284	jnz LDo_53_round_up
	285	jmp L_Re_normalise
	286
	287	LRound_nearest_53:
	288	/* Do rounding of the 53rd bit if needed (nearest or even) */
	289	movl %ebx,%ecx
	290	andl $0x000007ff,%ecx
	291	cmpl $0x00000400,%ecx
	292	jc LCheck_truncate_53 /* less than half, no increment needed */
	293
	294	jnz LGreater_Half_53 /* greater than half, increment needed */
	295
	296	/* Possibly half, we need to check the ls bits */
	297	orl %edx,%edx
	298	jnz LGreater_Half_53 /* greater than half, increment needed */
	299
	300	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
	301	testl $0x00000800,%ebx
	302	jz LTruncate_53
	303
	304	LGreater_Half_53: /* Rounding: increment at the 53rd bit */
	305	LDo_53_round_up:
	306	movb LOST_UP,FPU_bits_lost
	307	andl $0xfffff800,%ebx /* Truncate to 53 bits */
	308	addl $0x00000800,%ebx
	309	adcl $0,%eax
	310	jmp LCheck_Round_Overflow
	311
	312	LCheck_truncate_53:
	313	movl %ebx,%ecx
	314	andl $0x000007ff,%ecx
	315	orl %edx,%ecx
	316	jz L_Re_normalise
	317
	318	LTruncate_53:
	319	movb LOST_DOWN,FPU_bits_lost
	320	andl $0xfffff800,%ebx /* Truncate to 53 bits */
	321	jmp L_Re_normalise
	322
	323
	324	/* Round etc to 64 bit precision */
	325	LRound_To_64:
	326	movl %esi,%ecx
	327	andl CW_RC,%ecx
	328	cmpl RC_RND,%ecx
	329	je LRound_nearest_64
	330
	331	cmpl RC_CHOP,%ecx
	332	je LCheck_truncate_64
	333
	334	cmpl RC_UP,%ecx /* Towards +infinity */
	335	je LUp_64
	336
	337	cmpl RC_DOWN,%ecx /* Towards -infinity */
	338	je LDown_64
	339
	340	#ifdef PARANOID
	341	jmp L_bugged_round64
	342	#endif /* PARANOID */
	343
	344	LUp_64:
	345	cmpb SIGN_POS,PARAM5
	346	jne LCheck_truncate_64 /* If negative then up==truncate */
	347
	348	orl %edx,%edx
	349	jnz LDo_64_round_up
	350	jmp L_Re_normalise
	351
	352	LDown_64:
	353	cmpb SIGN_POS,PARAM5
	354	je LCheck_truncate_64 /* If positive then down==truncate */
	355
	356	orl %edx,%edx
	357	jnz LDo_64_round_up
	358	jmp L_Re_normalise
	359
	360	LRound_nearest_64:
	361	cmpl $0x80000000,%edx
	362	jc LCheck_truncate_64
	363
	364	jne LDo_64_round_up
	365
	366	/* Now test for round-to-even */
	367	testb $1,%bl
	368	jz LCheck_truncate_64
	369
	370	LDo_64_round_up:
	371	movb LOST_UP,FPU_bits_lost
	372	addl $1,%ebx
	373	adcl $0,%eax
	374
	375	LCheck_Round_Overflow:
	376	jnc L_Re_normalise
	377
	378	/* Overflow, adjust the result (significand to 1.0) */
	379	rcrl $1,%eax
	380	rcrl $1,%ebx
	381	incw EXP(%edi)
	382	jmp L_Re_normalise
	383
	384	LCheck_truncate_64:
	385	orl %edx,%edx
	386	jz L_Re_normalise
	387
	388	LTruncate_64:
	389	movb LOST_DOWN,FPU_bits_lost
	390
	391	L_Re_normalise:
	392	testb $0xff,FPU_denormal
	393	jnz Normalise_result
	394
	395	L_Normalised:
	396	movl TAG_Valid,%edx
	397
	398	L_deNormalised:
	399	cmpb LOST_UP,FPU_bits_lost
	400	je L_precision_lost_up
	401
	402	cmpb LOST_DOWN,FPU_bits_lost
	403	je L_precision_lost_down
	404
	405	L_no_precision_loss:
	406	/* store the result */
	407
	408	L_Store_significand:
	409	movl %eax,SIGH(%edi)
	410	movl %ebx,SIGL(%edi)
	411
	412	cmpw EXP_OVER,EXP(%edi)
	413	jge L_overflow
	414
	415	movl %edx,%eax
	416
	417	/* Convert the exponent to 80x87 form. */
	418	addw EXTENDED_Ebias,EXP(%edi)
	419	andw $0x7fff,EXP(%edi)
	420
	421	fpu_reg_round_signed_special_exit:
	422
	423	cmpb SIGN_POS,PARAM5
	424	je fpu_reg_round_special_exit
	425
	426	orw $0x8000,EXP(%edi) /* Negative sign for the result. */
	427
	428	fpu_reg_round_special_exit:
	429
	430	#ifndef NON_REENTRANT_FPU
	431	popl %ebx /* adjust the stack pointer */
	432	#endif /* NON_REENTRANT_FPU */
	433
	434	fpu_Arith_exit:
	435	popl %ebx
	436	popl %edi
	437	popl %esi
	438	leave
	439	ret
	440
	441
	442	/*
	443	* Set the FPU status flags to represent precision loss due to
	444	* round-up.
	445	*/
	446	L_precision_lost_up:
	447	push %edx
	448	push %eax
	449	call set_precision_flag_up
	450	popl %eax
	451	popl %edx
	452	jmp L_no_precision_loss
	453
	454	/*
	455	* Set the FPU status flags to represent precision loss due to
	456	* truncation.
	457	*/
	458	L_precision_lost_down:
	459	push %edx
	460	push %eax
	461	call set_precision_flag_down
	462	popl %eax
	463	popl %edx
	464	jmp L_no_precision_loss
	465
	466
	467	/*
	468	* The number is a denormal (which might get rounded up to a normal)
	469	* Shift the number right the required number of bits, which will
	470	* have to be undone later...
	471	*/
	472	L_Make_denorm:
	473	/* The action to be taken depends upon whether the underflow
	474	exception is masked */
	475	testb CW_Underflow,%cl /* Underflow mask. */
	476	jz Unmasked_underflow /* Do not make a denormal. */
	477
	478	movb DENORMAL,FPU_denormal
	479
	480	pushl %ecx /* Save */
	481	movw EXP_UNDER+1,%cx
	482	subw EXP(%edi),%cx
	483
	484	cmpw $64,%cx /* shrd only works for 0..31 bits */
	485	jnc Denorm_shift_more_than_63
	486
	487	cmpw $32,%cx /* shrd only works for 0..31 bits */
	488	jnc Denorm_shift_more_than_32
	489
	490	/*
	491	* We got here without jumps by assuming that the most common requirement
	492	* is for a small de-normalising shift.
	493	* Shift by [1..31] bits
	494	*/
	495	addw %cx,EXP(%edi)
	496	orl %edx,%edx /* extension */
	497	setne %ch /* Save whether %edx is non-zero */
	498	xorl %edx,%edx
	499	shrd %cl,%ebx,%edx
	500	shrd %cl,%eax,%ebx
	501	shr %cl,%eax
	502	orb %ch,%dl
	503	popl %ecx
	504	jmp Denorm_done
	505
	506	/* Shift by [32..63] bits */
	507	Denorm_shift_more_than_32:
	508	addw %cx,EXP(%edi)
	509	subb $32,%cl
	510	orl %edx,%edx
	511	setne %ch
	512	orb %ch,%bl
	513	xorl %edx,%edx
	514	shrd %cl,%ebx,%edx
	515	shrd %cl,%eax,%ebx
	516	shr %cl,%eax
	517	orl %edx,%edx /* test these 32 bits */
	518	setne %cl
	519	orb %ch,%bl
	520	orb %cl,%bl
	521	movl %ebx,%edx
	522	movl %eax,%ebx
	523	xorl %eax,%eax
	524	popl %ecx
	525	jmp Denorm_done
	526
	527	/* Shift by [64..) bits */
	528	Denorm_shift_more_than_63:
	529	cmpw $64,%cx
	530	jne Denorm_shift_more_than_64
	531
	532	/* Exactly 64 bit shift */
	533	addw %cx,EXP(%edi)
	534	xorl %ecx,%ecx
	535	orl %edx,%edx
	536	setne %cl
	537	orl %ebx,%ebx
	538	setne %ch
	539	orb %ch,%cl
	540	orb %cl,%al
	541	movl %eax,%edx
	542	xorl %eax,%eax
	543	xorl %ebx,%ebx
	544	popl %ecx
	545	jmp Denorm_done
	546
	547	Denorm_shift_more_than_64:
	548	movw EXP_UNDER+1,EXP(%edi)
	549	/* This is easy, %eax must be non-zero, so.. */
	550	movl $1,%edx
	551	xorl %eax,%eax
	552	xorl %ebx,%ebx
	553	popl %ecx
	554	jmp Denorm_done
	555
	556
	557	Unmasked_underflow:
	558	movb UNMASKED_UNDERFLOW,FPU_denormal
	559	jmp Denorm_done
	560
	561
	562	/* Undo the de-normalisation. */
	563	Normalise_result:
	564	cmpb UNMASKED_UNDERFLOW,FPU_denormal
	565	je Signal_underflow
	566
	567	/* The number must be a denormal if we got here. */
	568	#ifdef PARANOID
	569	/* But check it... just in case. */
	570	cmpw EXP_UNDER+1,EXP(%edi)
	571	jne L_norm_bugged
	572	#endif /* PARANOID */
	573
	574	#ifdef PECULIAR_486
	575	/*
	576	* This implements a special feature of 80486 behaviour.
	577	* Underflow will be signalled even if the number is
	578	* not a denormal after rounding.
	579	* This difference occurs only for masked underflow, and not
	580	* in the unmasked case.
	581	* Actual 80486 behaviour differs from this in some circumstances.
	582	*/
	583	orl %eax,%eax /* ms bits */
	584	js LPseudoDenormal /* Will be masked underflow */
	585	#else
	586	orl %eax,%eax /* ms bits */
	587	js L_Normalised /* No longer a denormal */
	588	#endif /* PECULIAR_486 */
	589
	590	jnz LDenormal_adj_exponent
	591
	592	orl %ebx,%ebx
	593	jz L_underflow_to_zero /* The contents are zero */
	594
	595	LDenormal_adj_exponent:
	596	decw EXP(%edi)
	597
	598	LPseudoDenormal:
	599	testb $0xff,FPU_bits_lost /* bits lost == underflow */
	600	movl TAG_Special,%edx
	601	jz L_deNormalised
	602
	603	/* There must be a masked underflow */
	604	push %eax
	605	pushl EX_Underflow
	606	call EXCEPTION
	607	popl %eax
	608	popl %eax
	609	movl TAG_Special,%edx
	610	jmp L_deNormalised
	611
	612
	613	/*
	614	* The operations resulted in a number too small to represent.
	615	* Masked response.
	616	*/
	617	L_underflow_to_zero:
	618	push %eax
	619	call set_precision_flag_down
	620	popl %eax
	621
	622	push %eax
	623	pushl EX_Underflow
	624	call EXCEPTION
	625	popl %eax
	626	popl %eax
	627
	628	/* Reduce the exponent to EXP_UNDER */
	629	movw EXP_UNDER,EXP(%edi)
	630	movl TAG_Zero,%edx
	631	jmp L_Store_significand
	632
	633
	634	/* The operations resulted in a number too large to represent. */
	635	L_overflow:
	636	addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
	637	push %edi
	638	call arith_overflow
	639	pop %edi
	640	jmp fpu_reg_round_signed_special_exit
	641
	642
	643	Signal_underflow:
	644	/* The number may have been changed to a non-denormal */
	645	/* by the rounding operations. */
	646	cmpw EXP_UNDER,EXP(%edi)
	647	jle Do_unmasked_underflow
	648
	649	jmp L_Normalised
	650
	651	Do_unmasked_underflow:
	652	/* Increase the exponent by the magic number */
	653	addw $(3*(1<<13)),EXP(%edi)
	654	push %eax
	655	pushl EX_Underflow
	656	call EXCEPTION
	657	popl %eax
	658	popl %eax
	659	jmp L_Normalised
	660
	661
	662	#ifdef PARANOID
	663	#ifdef PECULIAR_486
	664	L_bugged_denorm_486:
	665	pushl EX_INTERNAL\|0x236
	666	call EXCEPTION
	667	popl %ebx
	668	jmp L_exception_exit
	669	#else
	670	L_bugged_denorm:
	671	pushl EX_INTERNAL\|0x230
	672	call EXCEPTION
	673	popl %ebx
	674	jmp L_exception_exit
	675	#endif /* PECULIAR_486 */
	676
	677	L_bugged_round24:
	678	pushl EX_INTERNAL\|0x231
	679	call EXCEPTION
	680	popl %ebx
	681	jmp L_exception_exit
	682
	683	L_bugged_round53:
	684	pushl EX_INTERNAL\|0x232
	685	call EXCEPTION
	686	popl %ebx
	687	jmp L_exception_exit
	688
	689	L_bugged_round64:
	690	pushl EX_INTERNAL\|0x233
	691	call EXCEPTION
	692	popl %ebx
	693	jmp L_exception_exit
	694
	695	L_norm_bugged:
	696	pushl EX_INTERNAL\|0x234
	697	call EXCEPTION
	698	popl %ebx
	699	jmp L_exception_exit
	700
	701	L_entry_bugged:
	702	pushl EX_INTERNAL\|0x235
	703	call EXCEPTION
	704	popl %ebx
	705	L_exception_exit:
	706	mov $-1,%eax
	707	jmp fpu_reg_round_special_exit
	708	#endif /* PARANOID */