1 files changed, 135 insertions, 0 deletions
diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S
new file mode 100644
index 000000000000..17315c89ff3d
--- /dev/null
+++ b/arch/i386/math-emu/polynom_Xsig.S
@@ -0,0 +1,135 @@
+/*---------------------------------------------------------------------------+
+ |  polynomial_Xsig.S                                                        |
+ |                                                                           |
+ | Fixed point arithmetic polynomial evaluation.                             |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994,1995                                         |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
+ |                        unsigned long long terms[], int n)                 |
+ |                                                                           |
+ | Computes:                                                                 |
+ | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
+ | and adds the result to the 12 byte Xsig.                                  |
+ | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
+ | precision.                                                                |
+ |                                                                           |
+ | This function must be used carefully: most overflow of intermediate       |
+ | results is controlled, but overflow of the result is not.                 |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+        .file   "polynomial_Xsig.S"
+#include "fpu_emu.h"
+#define TERM_SIZE       $8
+#define SUM_MS          -20(%ebp)       /* sum ms long */
+#define SUM_MIDDLE      -24(%ebp)       /* sum middle long */
+#define SUM_LS          -28(%ebp)       /* sum ls long */
+#define ACCUM_MS        -4(%ebp)        /* accum ms long */
+#define ACCUM_MIDDLE    -8(%ebp)        /* accum middle long */
+#define ACCUM_LS        -12(%ebp)       /* accum ls long */
+#define OVERFLOWED      -16(%ebp)       /* addition overflow flag */
+.text
+ENTRY(polynomial_Xsig)
+        pushl   %ebp
+        movl    %esp,%ebp
+        subl    $32,%esp
+        pushl   %esi
+        pushl   %edi
+        pushl   %ebx
+        movl    PARAM2,%esi             /* x */
+        movl    PARAM3,%edi             /* terms */
+        movl    TERM_SIZE,%eax
+        mull    PARAM4                  /* n */
+        addl    %eax,%edi
+        movl    4(%edi),%edx            /* terms[n] */
+        movl    %edx,SUM_MS
+        movl    (%edi),%edx             /* terms[n] */
+        movl    %edx,SUM_MIDDLE
+        xor     %eax,%eax
+        movl    %eax,SUM_LS
+        movb    %al,OVERFLOWED
+        subl    TERM_SIZE,%edi
+        decl    PARAM4
+        js      L_accum_done
+L_accum_loop:
+        xor     %eax,%eax
+        movl    %eax,ACCUM_MS
+        movl    %eax,ACCUM_MIDDLE
+        movl    SUM_MIDDLE,%eax
+        mull    (%esi)                  /* x ls long */
+        movl    %edx,ACCUM_LS
+        movl    SUM_MIDDLE,%eax
+        mull    4(%esi)                 /* x ms long */
+        addl    %eax,ACCUM_LS
+        adcl    %edx,ACCUM_MIDDLE
+        adcl    $0,ACCUM_MS
+        movl    SUM_MS,%eax
+        mull    (%esi)                  /* x ls long */
+        addl    %eax,ACCUM_LS
+        adcl    %edx,ACCUM_MIDDLE
+        adcl    $0,ACCUM_MS
+        movl    SUM_MS,%eax
+        mull    4(%esi)                 /* x ms long */
+        addl    %eax,ACCUM_MIDDLE
+        adcl    %edx,ACCUM_MS
+        testb   $0xff,OVERFLOWED
+        jz      L_no_overflow
+        movl    (%esi),%eax
+        addl    %eax,ACCUM_MIDDLE
+        movl    4(%esi),%eax
+        adcl    %eax,ACCUM_MS           /* This could overflow too */
+L_no_overflow:
+/*
+ * Now put the sum of next term and the accumulator
+ * into the sum register
+ */
+        movl    ACCUM_LS,%eax
+        addl    (%edi),%eax             /* term ls long */
+        movl    %eax,SUM_LS
+        movl    ACCUM_MIDDLE,%eax
+        adcl    (%edi),%eax             /* term ls long */
+        movl    %eax,SUM_MIDDLE
+        movl    ACCUM_MS,%eax
+        adcl    4(%edi),%eax            /* term ms long */
+        movl    %eax,SUM_MS
+        sbbb    %al,%al
+        movb    %al,OVERFLOWED          /* Used in the next iteration */
+        subl    TERM_SIZE,%edi
+        decl    PARAM4
+        jns     L_accum_loop
+L_accum_done:
+        movl    PARAM1,%edi             /* accum */
+        movl    SUM_LS,%eax
+        addl    %eax,(%edi)
+        movl    SUM_MIDDLE,%eax
+        adcl    %eax,4(%edi)
+        movl    SUM_MS,%eax
+        adcl    %eax,8(%edi)
+        popl    %ebx
+        popl    %edi
+        popl    %esi
+        leave
+        ret

diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S new file mode 100644 index 000000000000..17315c89ff3d --- /dev/null +++ b/arch/i386/math-emu/polynom_Xsig.S
@@ -0,0 +1,135 @@
	1	/*---------------------------------------------------------------------------+
	2	\| polynomial_Xsig.S \|
	3	\| \|
	4	\| Fixed point arithmetic polynomial evaluation. \|
	5	\| \|
	6	\| Copyright (C) 1992,1993,1994,1995 \|
	7	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
	8	\| Australia. E-mail billm@jacobi.maths.monash.edu.au \|
	9	\| \|
	10	\| Call from C as: \|
	11	\| void polynomial_Xsig(Xsig *accum, unsigned long long x, \|
	12	\| unsigned long long terms[], int n) \|
	13	\| \|
	14	\| Computes: \|
	15	\| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]x)x)x)x) ... )*x \|
	16	\| and adds the result to the 12 byte Xsig. \|
	17	\| The terms[] are each 8 bytes, but all computation is performed to 12 byte \|
	18	\| precision. \|
	19	\| \|
	20	\| This function must be used carefully: most overflow of intermediate \|
	21	\| results is controlled, but overflow of the result is not. \|
	22	\| \|
	23	+---------------------------------------------------------------------------*/
	24	.file "polynomial_Xsig.S"
	25
	26	#include "fpu_emu.h"
	27
	28
	29	#define TERM_SIZE $8
	30	#define SUM_MS -20(%ebp) /* sum ms long */
	31	#define SUM_MIDDLE -24(%ebp) /* sum middle long */
	32	#define SUM_LS -28(%ebp) /* sum ls long */
	33	#define ACCUM_MS -4(%ebp) /* accum ms long */
	34	#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
	35	#define ACCUM_LS -12(%ebp) /* accum ls long */
	36	#define OVERFLOWED -16(%ebp) /* addition overflow flag */
	37
	38	.text
	39	ENTRY(polynomial_Xsig)
	40	pushl %ebp
	41	movl %esp,%ebp
	42	subl $32,%esp
	43	pushl %esi
	44	pushl %edi
	45	pushl %ebx
	46
	47	movl PARAM2,%esi /* x */
	48	movl PARAM3,%edi /* terms */
	49
	50	movl TERM_SIZE,%eax
	51	mull PARAM4 /* n */
	52	addl %eax,%edi
	53
	54	movl 4(%edi),%edx /* terms[n] */
	55	movl %edx,SUM_MS
	56	movl (%edi),%edx /* terms[n] */
	57	movl %edx,SUM_MIDDLE
	58	xor %eax,%eax
	59	movl %eax,SUM_LS
	60	movb %al,OVERFLOWED
	61
	62	subl TERM_SIZE,%edi
	63	decl PARAM4
	64	js L_accum_done
	65
	66	L_accum_loop:
	67	xor %eax,%eax
	68	movl %eax,ACCUM_MS
	69	movl %eax,ACCUM_MIDDLE
	70
	71	movl SUM_MIDDLE,%eax
	72	mull (%esi) /* x ls long */
	73	movl %edx,ACCUM_LS
	74
	75	movl SUM_MIDDLE,%eax
	76	mull 4(%esi) /* x ms long */
	77	addl %eax,ACCUM_LS
	78	adcl %edx,ACCUM_MIDDLE
	79	adcl $0,ACCUM_MS
	80
	81	movl SUM_MS,%eax
	82	mull (%esi) /* x ls long */
	83	addl %eax,ACCUM_LS
	84	adcl %edx,ACCUM_MIDDLE
	85	adcl $0,ACCUM_MS
	86
	87	movl SUM_MS,%eax
	88	mull 4(%esi) /* x ms long */
	89	addl %eax,ACCUM_MIDDLE
	90	adcl %edx,ACCUM_MS
	91
	92	testb $0xff,OVERFLOWED
	93	jz L_no_overflow
	94
	95	movl (%esi),%eax
	96	addl %eax,ACCUM_MIDDLE
	97	movl 4(%esi),%eax
	98	adcl %eax,ACCUM_MS /* This could overflow too */
	99
	100	L_no_overflow:
	101
	102	/*
	103	* Now put the sum of next term and the accumulator
	104	* into the sum register
	105	*/
	106	movl ACCUM_LS,%eax
	107	addl (%edi),%eax /* term ls long */
	108	movl %eax,SUM_LS
	109	movl ACCUM_MIDDLE,%eax
	110	adcl (%edi),%eax /* term ls long */
	111	movl %eax,SUM_MIDDLE
	112	movl ACCUM_MS,%eax
	113	adcl 4(%edi),%eax /* term ms long */
	114	movl %eax,SUM_MS
	115	sbbb %al,%al
	116	movb %al,OVERFLOWED /* Used in the next iteration */
	117
	118	subl TERM_SIZE,%edi
	119	decl PARAM4
	120	jns L_accum_loop
	121
	122	L_accum_done:
	123	movl PARAM1,%edi /* accum */
	124	movl SUM_LS,%eax
	125	addl %eax,(%edi)
	126	movl SUM_MIDDLE,%eax
	127	adcl %eax,4(%edi)
	128	movl SUM_MS,%eax
	129	adcl %eax,8(%edi)
	130
	131	popl %ebx
	132	popl %edi
	133	popl %esi
	134	leave
	135	ret