diff options
Diffstat (limited to 'arch/x86/include/asm/xor_64.h')
-rw-r--r-- | arch/x86/include/asm/xor_64.h | 63 |
1 files changed, 10 insertions, 53 deletions
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index b9b2323e90fe..87ac522c4af5 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h | |||
@@ -34,41 +34,7 @@ | |||
34 | * no advantages to be gotten from x86-64 here anyways. | 34 | * no advantages to be gotten from x86-64 here anyways. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | typedef struct { | 37 | #include <asm/i387.h> |
38 | unsigned long a, b; | ||
39 | } __attribute__((aligned(16))) xmm_store_t; | ||
40 | |||
41 | /* Doesn't use gcc to save the XMM registers, because there is no easy way to | ||
42 | tell it to do a clts before the register saving. */ | ||
43 | #define XMMS_SAVE \ | ||
44 | do { \ | ||
45 | preempt_disable(); \ | ||
46 | asm volatile( \ | ||
47 | "movq %%cr0,%0 ;\n\t" \ | ||
48 | "clts ;\n\t" \ | ||
49 | "movups %%xmm0,(%1) ;\n\t" \ | ||
50 | "movups %%xmm1,0x10(%1) ;\n\t" \ | ||
51 | "movups %%xmm2,0x20(%1) ;\n\t" \ | ||
52 | "movups %%xmm3,0x30(%1) ;\n\t" \ | ||
53 | : "=&r" (cr0) \ | ||
54 | : "r" (xmm_save) \ | ||
55 | : "memory"); \ | ||
56 | } while (0) | ||
57 | |||
58 | #define XMMS_RESTORE \ | ||
59 | do { \ | ||
60 | asm volatile( \ | ||
61 | "sfence ;\n\t" \ | ||
62 | "movups (%1),%%xmm0 ;\n\t" \ | ||
63 | "movups 0x10(%1),%%xmm1 ;\n\t" \ | ||
64 | "movups 0x20(%1),%%xmm2 ;\n\t" \ | ||
65 | "movups 0x30(%1),%%xmm3 ;\n\t" \ | ||
66 | "movq %0,%%cr0 ;\n\t" \ | ||
67 | : \ | ||
68 | : "r" (cr0), "r" (xmm_save) \ | ||
69 | : "memory"); \ | ||
70 | preempt_enable(); \ | ||
71 | } while (0) | ||
72 | 38 | ||
73 | #define OFFS(x) "16*("#x")" | 39 | #define OFFS(x) "16*("#x")" |
74 | #define PF_OFFS(x) "256+16*("#x")" | 40 | #define PF_OFFS(x) "256+16*("#x")" |
@@ -91,10 +57,8 @@ static void | |||
91 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | 57 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
92 | { | 58 | { |
93 | unsigned int lines = bytes >> 8; | 59 | unsigned int lines = bytes >> 8; |
94 | unsigned long cr0; | ||
95 | xmm_store_t xmm_save[4]; | ||
96 | 60 | ||
97 | XMMS_SAVE; | 61 | kernel_fpu_begin(); |
98 | 62 | ||
99 | asm volatile( | 63 | asm volatile( |
100 | #undef BLOCK | 64 | #undef BLOCK |
@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |||
135 | : [inc] "r" (256UL) | 99 | : [inc] "r" (256UL) |
136 | : "memory"); | 100 | : "memory"); |
137 | 101 | ||
138 | XMMS_RESTORE; | 102 | kernel_fpu_end(); |
139 | } | 103 | } |
140 | 104 | ||
141 | static void | 105 | static void |
@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
143 | unsigned long *p3) | 107 | unsigned long *p3) |
144 | { | 108 | { |
145 | unsigned int lines = bytes >> 8; | 109 | unsigned int lines = bytes >> 8; |
146 | xmm_store_t xmm_save[4]; | ||
147 | unsigned long cr0; | ||
148 | |||
149 | XMMS_SAVE; | ||
150 | 110 | ||
111 | kernel_fpu_begin(); | ||
151 | asm volatile( | 112 | asm volatile( |
152 | #undef BLOCK | 113 | #undef BLOCK |
153 | #define BLOCK(i) \ | 114 | #define BLOCK(i) \ |
@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
194 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) | 155 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) |
195 | : [inc] "r" (256UL) | 156 | : [inc] "r" (256UL) |
196 | : "memory"); | 157 | : "memory"); |
197 | XMMS_RESTORE; | 158 | kernel_fpu_end(); |
198 | } | 159 | } |
199 | 160 | ||
200 | static void | 161 | static void |
@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
202 | unsigned long *p3, unsigned long *p4) | 163 | unsigned long *p3, unsigned long *p4) |
203 | { | 164 | { |
204 | unsigned int lines = bytes >> 8; | 165 | unsigned int lines = bytes >> 8; |
205 | xmm_store_t xmm_save[4]; | ||
206 | unsigned long cr0; | ||
207 | 166 | ||
208 | XMMS_SAVE; | 167 | kernel_fpu_begin(); |
209 | 168 | ||
210 | asm volatile( | 169 | asm volatile( |
211 | #undef BLOCK | 170 | #undef BLOCK |
@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
261 | : [inc] "r" (256UL) | 220 | : [inc] "r" (256UL) |
262 | : "memory" ); | 221 | : "memory" ); |
263 | 222 | ||
264 | XMMS_RESTORE; | 223 | kernel_fpu_end(); |
265 | } | 224 | } |
266 | 225 | ||
267 | static void | 226 | static void |
@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
269 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | 228 | unsigned long *p3, unsigned long *p4, unsigned long *p5) |
270 | { | 229 | { |
271 | unsigned int lines = bytes >> 8; | 230 | unsigned int lines = bytes >> 8; |
272 | xmm_store_t xmm_save[4]; | ||
273 | unsigned long cr0; | ||
274 | 231 | ||
275 | XMMS_SAVE; | 232 | kernel_fpu_begin(); |
276 | 233 | ||
277 | asm volatile( | 234 | asm volatile( |
278 | #undef BLOCK | 235 | #undef BLOCK |
@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
336 | : [inc] "r" (256UL) | 293 | : [inc] "r" (256UL) |
337 | : "memory"); | 294 | : "memory"); |
338 | 295 | ||
339 | XMMS_RESTORE; | 296 | kernel_fpu_end(); |
340 | } | 297 | } |
341 | 298 | ||
342 | static struct xor_block_template xor_block_sse = { | 299 | static struct xor_block_template xor_block_sse = { |
@@ -349,7 +306,7 @@ static struct xor_block_template xor_block_sse = { | |||
349 | 306 | ||
350 | 307 | ||
351 | /* Also try the AVX routines */ | 308 | /* Also try the AVX routines */ |
352 | #include "xor_avx.h" | 309 | #include <asm/xor_avx.h> |
353 | 310 | ||
354 | #undef XOR_TRY_TEMPLATES | 311 | #undef XOR_TRY_TEMPLATES |
355 | #define XOR_TRY_TEMPLATES \ | 312 | #define XOR_TRY_TEMPLATES \ |