aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/math-emu/poly.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/math-emu/poly.h')
-rw-r--r--arch/x86/math-emu/poly.h69
1 files changed, 31 insertions, 38 deletions
diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h
index 4db79811492..168eb44c93c 100644
--- a/arch/x86/math-emu/poly.h
+++ b/arch/x86/math-emu/poly.h
@@ -21,9 +21,9 @@
21 allows. 9-byte would probably be sufficient. 21 allows. 9-byte would probably be sufficient.
22 */ 22 */
23typedef struct { 23typedef struct {
24 unsigned long lsw; 24 unsigned long lsw;
25 unsigned long midw; 25 unsigned long midw;
26 unsigned long msw; 26 unsigned long msw;
27} Xsig; 27} Xsig;
28 28
29asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, 29asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
@@ -49,7 +49,6 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
49/* Macro to access the 8 ms bytes of an Xsig as a long long */ 49/* Macro to access the 8 ms bytes of an Xsig as a long long */
50#define XSIG_LL(x) (*(unsigned long long *)&x.midw) 50#define XSIG_LL(x) (*(unsigned long long *)&x.midw)
51 51
52
53/* 52/*
54 Need to run gcc with optimizations on to get these to 53 Need to run gcc with optimizations on to get these to
55 actually be in-line. 54 actually be in-line.
@@ -63,59 +62,53 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
63static inline unsigned long mul_32_32(const unsigned long arg1, 62static inline unsigned long mul_32_32(const unsigned long arg1,
64 const unsigned long arg2) 63 const unsigned long arg2)
65{ 64{
66 int retval; 65 int retval;
67 asm volatile ("mull %2; movl %%edx,%%eax" \ 66 asm volatile ("mull %2; movl %%edx,%%eax":"=a" (retval)
68 :"=a" (retval) \ 67 :"0"(arg1), "g"(arg2)
69 :"0" (arg1), "g" (arg2) \ 68 :"dx");
70 :"dx"); 69 return retval;
71 return retval;
72} 70}
73 71
74
75/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ 72/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
76static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) 73static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
77{ 74{
78 asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" 75 asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
79 "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" 76 "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
80 "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" 77 "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
81 "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n" 78 "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n":"=g"
82 :"=g" (*dest):"g" (dest), "g" (x2) 79 (*dest):"g"(dest), "g"(x2)
83 :"ax","si","di"); 80 :"ax", "si", "di");
84} 81}
85 82
86
87/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ 83/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
88/* Note: the constraints in the asm statement didn't always work properly 84/* Note: the constraints in the asm statement didn't always work properly
89 with gcc 2.5.8. Changing from using edi to using ecx got around the 85 with gcc 2.5.8. Changing from using edi to using ecx got around the
90 problem, but keep fingers crossed! */ 86 problem, but keep fingers crossed! */
91static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) 87static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
92{ 88{
93 asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" 89 asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
94 "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" 90 "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
95 "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" 91 "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
96 "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" 92 "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
97 "jnc 0f;\n" 93 "jnc 0f;\n"
98 "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" 94 "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
99 "movl %4,%%ecx; incl (%%ecx)\n" 95 "movl %4,%%ecx; incl (%%ecx)\n"
100 "movl $1,%%eax; jmp 1f;\n" 96 "movl $1,%%eax; jmp 1f;\n"
101 "0: xorl %%eax,%%eax;\n" 97 "0: xorl %%eax,%%eax;\n" "1:\n":"=g" (*exp), "=g"(*dest)
102 "1:\n" 98 :"g"(dest), "g"(x2), "g"(exp)
103 :"=g" (*exp), "=g" (*dest) 99 :"cx", "si", "ax");
104 :"g" (dest), "g" (x2), "g" (exp)
105 :"cx","si","ax");
106} 100}
107 101
108
109/* Negate (subtract from 1.0) the 12 byte Xsig */ 102/* Negate (subtract from 1.0) the 12 byte Xsig */
110/* This is faster in a loop on my 386 than using the "neg" instruction. */ 103/* This is faster in a loop on my 386 than using the "neg" instruction. */
111static inline void negate_Xsig(Xsig *x) 104static inline void negate_Xsig(Xsig *x)
112{ 105{
113 asm volatile("movl %1,%%esi;\n" 106 asm volatile ("movl %1,%%esi;\n"
114 "xorl %%ecx,%%ecx;\n" 107 "xorl %%ecx,%%ecx;\n"
115 "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" 108 "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
116 "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" 109 "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
117 "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n" 110 "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n":"=g"
118 :"=g" (*x):"g" (x):"si","ax","cx"); 111 (*x):"g"(x):"si", "ax", "cx");
119} 112}
120 113
121#endif /* _POLY_H */ 114#endif /* _POLY_H */