diff options
Diffstat (limited to 'arch/x86/math-emu/polynom_Xsig.S')
-rw-r--r-- | arch/x86/math-emu/polynom_Xsig.S | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S new file mode 100644 index 000000000000..17315c89ff3d --- /dev/null +++ b/arch/x86/math-emu/polynom_Xsig.S | |||
@@ -0,0 +1,135 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | polynomial_Xsig.S | | ||
3 | | | | ||
4 | | Fixed point arithmetic polynomial evaluation. | | ||
5 | | | | ||
6 | | Copyright (C) 1992,1993,1994,1995 | | ||
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | ||
8 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | ||
9 | | | | ||
10 | | Call from C as: | | ||
11 | | void polynomial_Xsig(Xsig *accum, unsigned long long x, | | ||
12 | | unsigned long long terms[], int n) | | ||
13 | | | | ||
14 | | Computes: | | ||
15 | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | | ||
16 | | and adds the result to the 12 byte Xsig. | | ||
17 | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | ||
18 | | precision. | | ||
19 | | | | ||
20 | | This function must be used carefully: most overflow of intermediate | | ||
21 | | results is controlled, but overflow of the result is not. | | ||
22 | | | | ||
23 | +---------------------------------------------------------------------------*/ | ||
24 | .file "polynomial_Xsig.S" | ||
25 | |||
26 | #include "fpu_emu.h" | ||
27 | |||
28 | |||
29 | #define TERM_SIZE $8 | ||
30 | #define SUM_MS -20(%ebp) /* sum ms long */ | ||
31 | #define SUM_MIDDLE -24(%ebp) /* sum middle long */ | ||
32 | #define SUM_LS -28(%ebp) /* sum ls long */ | ||
33 | #define ACCUM_MS -4(%ebp) /* accum ms long */ | ||
34 | #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ | ||
35 | #define ACCUM_LS -12(%ebp) /* accum ls long */ | ||
36 | #define OVERFLOWED -16(%ebp) /* addition overflow flag */ | ||
37 | |||
38 | .text | ||
39 | ENTRY(polynomial_Xsig) | ||
40 | pushl %ebp | ||
41 | movl %esp,%ebp | ||
42 | subl $32,%esp | ||
43 | pushl %esi | ||
44 | pushl %edi | ||
45 | pushl %ebx | ||
46 | |||
47 | movl PARAM2,%esi /* x */ | ||
48 | movl PARAM3,%edi /* terms */ | ||
49 | |||
50 | movl TERM_SIZE,%eax | ||
51 | mull PARAM4 /* n */ | ||
52 | addl %eax,%edi | ||
53 | |||
54 | movl 4(%edi),%edx /* terms[n] */ | ||
55 | movl %edx,SUM_MS | ||
56 | movl (%edi),%edx /* terms[n] */ | ||
57 | movl %edx,SUM_MIDDLE | ||
58 | xor %eax,%eax | ||
59 | movl %eax,SUM_LS | ||
60 | movb %al,OVERFLOWED | ||
61 | |||
62 | subl TERM_SIZE,%edi | ||
63 | decl PARAM4 | ||
64 | js L_accum_done | ||
65 | |||
66 | L_accum_loop: | ||
67 | xor %eax,%eax | ||
68 | movl %eax,ACCUM_MS | ||
69 | movl %eax,ACCUM_MIDDLE | ||
70 | |||
71 | movl SUM_MIDDLE,%eax | ||
72 | mull (%esi) /* x ls long */ | ||
73 | movl %edx,ACCUM_LS | ||
74 | |||
75 | movl SUM_MIDDLE,%eax | ||
76 | mull 4(%esi) /* x ms long */ | ||
77 | addl %eax,ACCUM_LS | ||
78 | adcl %edx,ACCUM_MIDDLE | ||
79 | adcl $0,ACCUM_MS | ||
80 | |||
81 | movl SUM_MS,%eax | ||
82 | mull (%esi) /* x ls long */ | ||
83 | addl %eax,ACCUM_LS | ||
84 | adcl %edx,ACCUM_MIDDLE | ||
85 | adcl $0,ACCUM_MS | ||
86 | |||
87 | movl SUM_MS,%eax | ||
88 | mull 4(%esi) /* x ms long */ | ||
89 | addl %eax,ACCUM_MIDDLE | ||
90 | adcl %edx,ACCUM_MS | ||
91 | |||
92 | testb $0xff,OVERFLOWED | ||
93 | jz L_no_overflow | ||
94 | |||
95 | movl (%esi),%eax | ||
96 | addl %eax,ACCUM_MIDDLE | ||
97 | movl 4(%esi),%eax | ||
98 | adcl %eax,ACCUM_MS /* This could overflow too */ | ||
99 | |||
100 | L_no_overflow: | ||
101 | |||
102 | /* | ||
103 | * Now put the sum of next term and the accumulator | ||
104 | * into the sum register | ||
105 | */ | ||
106 | movl ACCUM_LS,%eax | ||
107 | addl (%edi),%eax /* term ls long */ | ||
108 | movl %eax,SUM_LS | ||
109 | movl ACCUM_MIDDLE,%eax | ||
110 | adcl (%edi),%eax /* term ls long */ | ||
111 | movl %eax,SUM_MIDDLE | ||
112 | movl ACCUM_MS,%eax | ||
113 | adcl 4(%edi),%eax /* term ms long */ | ||
114 | movl %eax,SUM_MS | ||
115 | sbbb %al,%al | ||
116 | movb %al,OVERFLOWED /* Used in the next iteration */ | ||
117 | |||
118 | subl TERM_SIZE,%edi | ||
119 | decl PARAM4 | ||
120 | jns L_accum_loop | ||
121 | |||
122 | L_accum_done: | ||
123 | movl PARAM1,%edi /* accum */ | ||
124 | movl SUM_LS,%eax | ||
125 | addl %eax,(%edi) | ||
126 | movl SUM_MIDDLE,%eax | ||
127 | adcl %eax,4(%edi) | ||
128 | movl SUM_MS,%eax | ||
129 | adcl %eax,8(%edi) | ||
130 | |||
131 | popl %ebx | ||
132 | popl %edi | ||
133 | popl %esi | ||
134 | leave | ||
135 | ret | ||