aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/math-emu/poly.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/math-emu/poly.h')
-rw-r--r--arch/x86/math-emu/poly.h79
1 files changed, 36 insertions, 43 deletions
diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h
index 4db798114923..f317de7d8864 100644
--- a/arch/x86/math-emu/poly.h
+++ b/arch/x86/math-emu/poly.h
@@ -21,9 +21,9 @@
21 allows. 9-byte would probably be sufficient. 21 allows. 9-byte would probably be sufficient.
22 */ 22 */
23typedef struct { 23typedef struct {
24 unsigned long lsw; 24 unsigned long lsw;
25 unsigned long midw; 25 unsigned long midw;
26 unsigned long msw; 26 unsigned long msw;
27} Xsig; 27} Xsig;
28 28
29asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, 29asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
@@ -33,12 +33,12 @@ asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
33 33
34asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); 34asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
35asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); 35asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
36asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); 36asmlinkage void mul_Xsig_Xsig(Xsig * dest, const Xsig * mult);
37 37
38asmlinkage void shr_Xsig(Xsig *, const int n); 38asmlinkage void shr_Xsig(Xsig *, const int n);
39asmlinkage int round_Xsig(Xsig *); 39asmlinkage int round_Xsig(Xsig *);
40asmlinkage int norm_Xsig(Xsig *); 40asmlinkage int norm_Xsig(Xsig *);
41asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); 41asmlinkage void div_Xsig(Xsig * x1, const Xsig * x2, const Xsig * dest);
42 42
43/* Macro to extract the most significant 32 bits from a long long */ 43/* Macro to extract the most significant 32 bits from a long long */
44#define LL_MSW(x) (((unsigned long *)&x)[1]) 44#define LL_MSW(x) (((unsigned long *)&x)[1])
@@ -49,7 +49,6 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
49/* Macro to access the 8 ms bytes of an Xsig as a long long */ 49/* Macro to access the 8 ms bytes of an Xsig as a long long */
50#define XSIG_LL(x) (*(unsigned long long *)&x.midw) 50#define XSIG_LL(x) (*(unsigned long long *)&x.midw)
51 51
52
53/* 52/*
54 Need to run gcc with optimizations on to get these to 53 Need to run gcc with optimizations on to get these to
55 actually be in-line. 54 actually be in-line.
@@ -63,59 +62,53 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
63static inline unsigned long mul_32_32(const unsigned long arg1, 62static inline unsigned long mul_32_32(const unsigned long arg1,
64 const unsigned long arg2) 63 const unsigned long arg2)
65{ 64{
66 int retval; 65 int retval;
67 asm volatile ("mull %2; movl %%edx,%%eax" \ 66 asm volatile ("mull %2; movl %%edx,%%eax":"=a" (retval)
68 :"=a" (retval) \ 67 :"0"(arg1), "g"(arg2)
69 :"0" (arg1), "g" (arg2) \ 68 :"dx");
70 :"dx"); 69 return retval;
71 return retval;
72} 70}
73 71
74
75/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ 72/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
76static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) 73static inline void add_Xsig_Xsig(Xsig * dest, const Xsig * x2)
77{ 74{
78 asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" 75 asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
79 "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" 76 "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
80 "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" 77 "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
81 "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n" 78 "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n":"=g"
82 :"=g" (*dest):"g" (dest), "g" (x2) 79 (*dest):"g"(dest), "g"(x2)
83 :"ax","si","di"); 80 :"ax", "si", "di");
84} 81}
85 82
86
87/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ 83/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
88/* Note: the constraints in the asm statement didn't always work properly 84/* Note: the constraints in the asm statement didn't always work properly
89 with gcc 2.5.8. Changing from using edi to using ecx got around the 85 with gcc 2.5.8. Changing from using edi to using ecx got around the
90 problem, but keep fingers crossed! */ 86 problem, but keep fingers crossed! */
91static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) 87static inline void add_two_Xsig(Xsig * dest, const Xsig * x2, long int *exp)
92{ 88{
93 asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" 89 asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
94 "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" 90 "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
95 "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" 91 "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
96 "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" 92 "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
97 "jnc 0f;\n" 93 "jnc 0f;\n"
98 "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" 94 "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
99 "movl %4,%%ecx; incl (%%ecx)\n" 95 "movl %4,%%ecx; incl (%%ecx)\n"
100 "movl $1,%%eax; jmp 1f;\n" 96 "movl $1,%%eax; jmp 1f;\n"
101 "0: xorl %%eax,%%eax;\n" 97 "0: xorl %%eax,%%eax;\n" "1:\n":"=g" (*exp), "=g"(*dest)
102 "1:\n" 98 :"g"(dest), "g"(x2), "g"(exp)
103 :"=g" (*exp), "=g" (*dest) 99 :"cx", "si", "ax");
104 :"g" (dest), "g" (x2), "g" (exp)
105 :"cx","si","ax");
106} 100}
107 101
108
109/* Negate (subtract from 1.0) the 12 byte Xsig */ 102/* Negate (subtract from 1.0) the 12 byte Xsig */
110/* This is faster in a loop on my 386 than using the "neg" instruction. */ 103/* This is faster in a loop on my 386 than using the "neg" instruction. */
111static inline void negate_Xsig(Xsig *x) 104static inline void negate_Xsig(Xsig * x)
112{ 105{
113 asm volatile("movl %1,%%esi;\n" 106 asm volatile ("movl %1,%%esi;\n"
114 "xorl %%ecx,%%ecx;\n" 107 "xorl %%ecx,%%ecx;\n"
115 "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" 108 "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
116 "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" 109 "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
117 "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n" 110 "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n":"=g"
118 :"=g" (*x):"g" (x):"si","ax","cx"); 111 (*x):"g"(x):"si", "ax", "cx");
119} 112}
120 113
121#endif /* _POLY_H */ 114#endif /* _POLY_H */