diff options
Diffstat (limited to 'arch/x86/math-emu/poly.h')
-rw-r--r-- | arch/x86/math-emu/poly.h | 79 |
1 files changed, 36 insertions, 43 deletions
diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h index 4db798114923..f317de7d8864 100644 --- a/arch/x86/math-emu/poly.h +++ b/arch/x86/math-emu/poly.h | |||
@@ -21,9 +21,9 @@ | |||
21 | allows. 9-byte would probably be sufficient. | 21 | allows. 9-byte would probably be sufficient. |
22 | */ | 22 | */ |
23 | typedef struct { | 23 | typedef struct { |
24 | unsigned long lsw; | 24 | unsigned long lsw; |
25 | unsigned long midw; | 25 | unsigned long midw; |
26 | unsigned long msw; | 26 | unsigned long msw; |
27 | } Xsig; | 27 | } Xsig; |
28 | 28 | ||
29 | asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, | 29 | asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, |
@@ -33,12 +33,12 @@ asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, | |||
33 | 33 | ||
34 | asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); | 34 | asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); |
35 | asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); | 35 | asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); |
36 | asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); | 36 | asmlinkage void mul_Xsig_Xsig(Xsig * dest, const Xsig * mult); |
37 | 37 | ||
38 | asmlinkage void shr_Xsig(Xsig *, const int n); | 38 | asmlinkage void shr_Xsig(Xsig *, const int n); |
39 | asmlinkage int round_Xsig(Xsig *); | 39 | asmlinkage int round_Xsig(Xsig *); |
40 | asmlinkage int norm_Xsig(Xsig *); | 40 | asmlinkage int norm_Xsig(Xsig *); |
41 | asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); | 41 | asmlinkage void div_Xsig(Xsig * x1, const Xsig * x2, const Xsig * dest); |
42 | 42 | ||
43 | /* Macro to extract the most significant 32 bits from a long long */ | 43 | /* Macro to extract the most significant 32 bits from a long long */ |
44 | #define LL_MSW(x) (((unsigned long *)&x)[1]) | 44 | #define LL_MSW(x) (((unsigned long *)&x)[1]) |
@@ -49,7 +49,6 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); | |||
49 | /* Macro to access the 8 ms bytes of an Xsig as a long long */ | 49 | /* Macro to access the 8 ms bytes of an Xsig as a long long */ |
50 | #define XSIG_LL(x) (*(unsigned long long *)&x.midw) | 50 | #define XSIG_LL(x) (*(unsigned long long *)&x.midw) |
51 | 51 | ||
52 | |||
53 | /* | 52 | /* |
54 | Need to run gcc with optimizations on to get these to | 53 | Need to run gcc with optimizations on to get these to |
55 | actually be in-line. | 54 | actually be in-line. |
@@ -63,59 +62,53 @@ asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); | |||
63 | static inline unsigned long mul_32_32(const unsigned long arg1, | 62 | static inline unsigned long mul_32_32(const unsigned long arg1, |
64 | const unsigned long arg2) | 63 | const unsigned long arg2) |
65 | { | 64 | { |
66 | int retval; | 65 | int retval; |
67 | asm volatile ("mull %2; movl %%edx,%%eax" \ | 66 | asm volatile ("mull %2; movl %%edx,%%eax":"=a" (retval) |
68 | :"=a" (retval) \ | 67 | :"0"(arg1), "g"(arg2) |
69 | :"0" (arg1), "g" (arg2) \ | 68 | :"dx"); |
70 | :"dx"); | 69 | return retval; |
71 | return retval; | ||
72 | } | 70 | } |
73 | 71 | ||
74 | |||
75 | /* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ | 72 | /* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ |
76 | static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) | 73 | static inline void add_Xsig_Xsig(Xsig * dest, const Xsig * x2) |
77 | { | 74 | { |
78 | asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" | 75 | asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" |
79 | "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" | 76 | "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" |
80 | "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" | 77 | "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" |
81 | "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n" | 78 | "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n":"=g" |
82 | :"=g" (*dest):"g" (dest), "g" (x2) | 79 | (*dest):"g"(dest), "g"(x2) |
83 | :"ax","si","di"); | 80 | :"ax", "si", "di"); |
84 | } | 81 | } |
85 | 82 | ||
86 | |||
87 | /* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ | 83 | /* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ |
88 | /* Note: the constraints in the asm statement didn't always work properly | 84 | /* Note: the constraints in the asm statement didn't always work properly |
89 | with gcc 2.5.8. Changing from using edi to using ecx got around the | 85 | with gcc 2.5.8. Changing from using edi to using ecx got around the |
90 | problem, but keep fingers crossed! */ | 86 | problem, but keep fingers crossed! */ |
91 | static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) | 87 | static inline void add_two_Xsig(Xsig * dest, const Xsig * x2, long int *exp) |
92 | { | 88 | { |
93 | asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" | 89 | asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" |
94 | "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" | 90 | "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" |
95 | "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" | 91 | "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" |
96 | "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" | 92 | "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" |
97 | "jnc 0f;\n" | 93 | "jnc 0f;\n" |
98 | "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" | 94 | "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" |
99 | "movl %4,%%ecx; incl (%%ecx)\n" | 95 | "movl %4,%%ecx; incl (%%ecx)\n" |
100 | "movl $1,%%eax; jmp 1f;\n" | 96 | "movl $1,%%eax; jmp 1f;\n" |
101 | "0: xorl %%eax,%%eax;\n" | 97 | "0: xorl %%eax,%%eax;\n" "1:\n":"=g" (*exp), "=g"(*dest) |
102 | "1:\n" | 98 | :"g"(dest), "g"(x2), "g"(exp) |
103 | :"=g" (*exp), "=g" (*dest) | 99 | :"cx", "si", "ax"); |
104 | :"g" (dest), "g" (x2), "g" (exp) | ||
105 | :"cx","si","ax"); | ||
106 | } | 100 | } |
107 | 101 | ||
108 | |||
109 | /* Negate (subtract from 1.0) the 12 byte Xsig */ | 102 | /* Negate (subtract from 1.0) the 12 byte Xsig */ |
110 | /* This is faster in a loop on my 386 than using the "neg" instruction. */ | 103 | /* This is faster in a loop on my 386 than using the "neg" instruction. */ |
111 | static inline void negate_Xsig(Xsig *x) | 104 | static inline void negate_Xsig(Xsig * x) |
112 | { | 105 | { |
113 | asm volatile("movl %1,%%esi;\n" | 106 | asm volatile ("movl %1,%%esi;\n" |
114 | "xorl %%ecx,%%ecx;\n" | 107 | "xorl %%ecx,%%ecx;\n" |
115 | "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" | 108 | "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" |
116 | "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" | 109 | "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" |
117 | "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n" | 110 | "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n":"=g" |
118 | :"=g" (*x):"g" (x):"si","ax","cx"); | 111 | (*x):"g"(x):"si", "ax", "cx"); |
119 | } | 112 | } |
120 | 113 | ||
121 | #endif /* _POLY_H */ | 114 | #endif /* _POLY_H */ |