aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/xor_32.h56
-rw-r--r--arch/x86/include/asm/xor_64.h61
-rw-r--r--arch/x86/include/asm/xor_avx.h54
3 files changed, 29 insertions, 142 deletions
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 454570891bdc..aabd5850bdb9 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
534 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) 534 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
535 */ 535 */
536 536
537#define XMMS_SAVE \
538do { \
539 preempt_disable(); \
540 cr0 = read_cr0(); \
541 clts(); \
542 asm volatile( \
543 "movups %%xmm0,(%0) ;\n\t" \
544 "movups %%xmm1,0x10(%0) ;\n\t" \
545 "movups %%xmm2,0x20(%0) ;\n\t" \
546 "movups %%xmm3,0x30(%0) ;\n\t" \
547 : \
548 : "r" (xmm_save) \
549 : "memory"); \
550} while (0)
551
552#define XMMS_RESTORE \
553do { \
554 asm volatile( \
555 "sfence ;\n\t" \
556 "movups (%0),%%xmm0 ;\n\t" \
557 "movups 0x10(%0),%%xmm1 ;\n\t" \
558 "movups 0x20(%0),%%xmm2 ;\n\t" \
559 "movups 0x30(%0),%%xmm3 ;\n\t" \
560 : \
561 : "r" (xmm_save) \
562 : "memory"); \
563 write_cr0(cr0); \
564 preempt_enable(); \
565} while (0)
566
567#define ALIGN16 __attribute__((aligned(16)))
568
569#define OFFS(x) "16*("#x")" 537#define OFFS(x) "16*("#x")"
570#define PF_OFFS(x) "256+16*("#x")" 538#define PF_OFFS(x) "256+16*("#x")"
571#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" 539#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
@@ -587,10 +555,8 @@ static void
587xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 555xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
588{ 556{
589 unsigned long lines = bytes >> 8; 557 unsigned long lines = bytes >> 8;
590 char xmm_save[16*4] ALIGN16;
591 int cr0;
592 558
593 XMMS_SAVE; 559 kernel_fpu_begin();
594 560
595 asm volatile( 561 asm volatile(
596#undef BLOCK 562#undef BLOCK
@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
633 : 599 :
634 : "memory"); 600 : "memory");
635 601
636 XMMS_RESTORE; 602 kernel_fpu_end();
637} 603}
638 604
639static void 605static void
@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
641 unsigned long *p3) 607 unsigned long *p3)
642{ 608{
643 unsigned long lines = bytes >> 8; 609 unsigned long lines = bytes >> 8;
644 char xmm_save[16*4] ALIGN16;
645 int cr0;
646 610
647 XMMS_SAVE; 611 kernel_fpu_begin();
648 612
649 asm volatile( 613 asm volatile(
650#undef BLOCK 614#undef BLOCK
@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
694 : 658 :
695 : "memory" ); 659 : "memory" );
696 660
697 XMMS_RESTORE; 661 kernel_fpu_end();
698} 662}
699 663
700static void 664static void
@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
702 unsigned long *p3, unsigned long *p4) 666 unsigned long *p3, unsigned long *p4)
703{ 667{
704 unsigned long lines = bytes >> 8; 668 unsigned long lines = bytes >> 8;
705 char xmm_save[16*4] ALIGN16;
706 int cr0;
707 669
708 XMMS_SAVE; 670 kernel_fpu_begin();
709 671
710 asm volatile( 672 asm volatile(
711#undef BLOCK 673#undef BLOCK
@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
762 : 724 :
763 : "memory" ); 725 : "memory" );
764 726
765 XMMS_RESTORE; 727 kernel_fpu_end();
766} 728}
767 729
768static void 730static void
@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
770 unsigned long *p3, unsigned long *p4, unsigned long *p5) 732 unsigned long *p3, unsigned long *p4, unsigned long *p5)
771{ 733{
772 unsigned long lines = bytes >> 8; 734 unsigned long lines = bytes >> 8;
773 char xmm_save[16*4] ALIGN16;
774 int cr0;
775 735
776 XMMS_SAVE; 736 kernel_fpu_begin();
777 737
778 /* Make sure GCC forgets anything it knows about p4 or p5, 738 /* Make sure GCC forgets anything it knows about p4 or p5,
779 such that it won't pass to the asm volatile below a 739 such that it won't pass to the asm volatile below a
@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
850 like assuming they have some legal value. */ 810 like assuming they have some legal value. */
851 asm("" : "=r" (p4), "=r" (p5)); 811 asm("" : "=r" (p4), "=r" (p5));
852 812
853 XMMS_RESTORE; 813 kernel_fpu_end();
854} 814}
855 815
856static struct xor_block_template xor_block_pIII_sse = { 816static struct xor_block_template xor_block_pIII_sse = {
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index b9b2323e90fe..5fc06d0b7eb5 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -34,41 +34,7 @@
34 * no advantages to be gotten from x86-64 here anyways. 34 * no advantages to be gotten from x86-64 here anyways.
35 */ 35 */
36 36
37typedef struct { 37#include <asm/i387.h>
38 unsigned long a, b;
39} __attribute__((aligned(16))) xmm_store_t;
40
41/* Doesn't use gcc to save the XMM registers, because there is no easy way to
42 tell it to do a clts before the register saving. */
43#define XMMS_SAVE \
44do { \
45 preempt_disable(); \
46 asm volatile( \
47 "movq %%cr0,%0 ;\n\t" \
48 "clts ;\n\t" \
49 "movups %%xmm0,(%1) ;\n\t" \
50 "movups %%xmm1,0x10(%1) ;\n\t" \
51 "movups %%xmm2,0x20(%1) ;\n\t" \
52 "movups %%xmm3,0x30(%1) ;\n\t" \
53 : "=&r" (cr0) \
54 : "r" (xmm_save) \
55 : "memory"); \
56} while (0)
57
58#define XMMS_RESTORE \
59do { \
60 asm volatile( \
61 "sfence ;\n\t" \
62 "movups (%1),%%xmm0 ;\n\t" \
63 "movups 0x10(%1),%%xmm1 ;\n\t" \
64 "movups 0x20(%1),%%xmm2 ;\n\t" \
65 "movups 0x30(%1),%%xmm3 ;\n\t" \
66 "movq %0,%%cr0 ;\n\t" \
67 : \
68 : "r" (cr0), "r" (xmm_save) \
69 : "memory"); \
70 preempt_enable(); \
71} while (0)
72 38
73#define OFFS(x) "16*("#x")" 39#define OFFS(x) "16*("#x")"
74#define PF_OFFS(x) "256+16*("#x")" 40#define PF_OFFS(x) "256+16*("#x")"
@@ -91,10 +57,8 @@ static void
91xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 57xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
92{ 58{
93 unsigned int lines = bytes >> 8; 59 unsigned int lines = bytes >> 8;
94 unsigned long cr0;
95 xmm_store_t xmm_save[4];
96 60
97 XMMS_SAVE; 61 kernel_fpu_begin();
98 62
99 asm volatile( 63 asm volatile(
100#undef BLOCK 64#undef BLOCK
@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
135 : [inc] "r" (256UL) 99 : [inc] "r" (256UL)
136 : "memory"); 100 : "memory");
137 101
138 XMMS_RESTORE; 102 kernel_fpu_end();
139} 103}
140 104
141static void 105static void
@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
143 unsigned long *p3) 107 unsigned long *p3)
144{ 108{
145 unsigned int lines = bytes >> 8; 109 unsigned int lines = bytes >> 8;
146 xmm_store_t xmm_save[4];
147 unsigned long cr0;
148
149 XMMS_SAVE;
150 110
111 kernel_fpu_begin();
151 asm volatile( 112 asm volatile(
152#undef BLOCK 113#undef BLOCK
153#define BLOCK(i) \ 114#define BLOCK(i) \
@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
194 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) 155 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
195 : [inc] "r" (256UL) 156 : [inc] "r" (256UL)
196 : "memory"); 157 : "memory");
197 XMMS_RESTORE; 158 kernel_fpu_end();
198} 159}
199 160
200static void 161static void
@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
202 unsigned long *p3, unsigned long *p4) 163 unsigned long *p3, unsigned long *p4)
203{ 164{
204 unsigned int lines = bytes >> 8; 165 unsigned int lines = bytes >> 8;
205 xmm_store_t xmm_save[4];
206 unsigned long cr0;
207 166
208 XMMS_SAVE; 167 kernel_fpu_begin();
209 168
210 asm volatile( 169 asm volatile(
211#undef BLOCK 170#undef BLOCK
@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
261 : [inc] "r" (256UL) 220 : [inc] "r" (256UL)
262 : "memory" ); 221 : "memory" );
263 222
264 XMMS_RESTORE; 223 kernel_fpu_end();
265} 224}
266 225
267static void 226static void
@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
269 unsigned long *p3, unsigned long *p4, unsigned long *p5) 228 unsigned long *p3, unsigned long *p4, unsigned long *p5)
270{ 229{
271 unsigned int lines = bytes >> 8; 230 unsigned int lines = bytes >> 8;
272 xmm_store_t xmm_save[4];
273 unsigned long cr0;
274 231
275 XMMS_SAVE; 232 kernel_fpu_begin();
276 233
277 asm volatile( 234 asm volatile(
278#undef BLOCK 235#undef BLOCK
@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
336 : [inc] "r" (256UL) 293 : [inc] "r" (256UL)
337 : "memory"); 294 : "memory");
338 295
339 XMMS_RESTORE; 296 kernel_fpu_end();
340} 297}
341 298
342static struct xor_block_template xor_block_sse = { 299static struct xor_block_template xor_block_sse = {
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 2510d35f480e..7ea79c5fa1f2 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -20,32 +20,6 @@
20#include <linux/compiler.h> 20#include <linux/compiler.h>
21#include <asm/i387.h> 21#include <asm/i387.h>
22 22
23#define ALIGN32 __aligned(32)
24
25#define YMM_SAVED_REGS 4
26
27#define YMMS_SAVE \
28do { \
29 preempt_disable(); \
30 cr0 = read_cr0(); \
31 clts(); \
32 asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
33 asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
34 asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
35 asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
36} while (0);
37
38#define YMMS_RESTORE \
39do { \
40 asm volatile("sfence" : : : "memory"); \
41 asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
42 asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
43 asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
44 asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
45 write_cr0(cr0); \
46 preempt_enable(); \
47} while (0);
48
49#define BLOCK4(i) \ 23#define BLOCK4(i) \
50 BLOCK(32 * i, 0) \ 24 BLOCK(32 * i, 0) \
51 BLOCK(32 * (i + 1), 1) \ 25 BLOCK(32 * (i + 1), 1) \
@@ -60,10 +34,9 @@ do { \
60 34
61static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) 35static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
62{ 36{
63 unsigned long cr0, lines = bytes >> 9; 37 unsigned long lines = bytes >> 9;
64 char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
65 38
66 YMMS_SAVE 39 kernel_fpu_begin();
67 40
68 while (lines--) { 41 while (lines--) {
69#undef BLOCK 42#undef BLOCK
@@ -82,16 +55,15 @@ do { \
82 p1 = (unsigned long *)((uintptr_t)p1 + 512); 55 p1 = (unsigned long *)((uintptr_t)p1 + 512);
83 } 56 }
84 57
85 YMMS_RESTORE 58 kernel_fpu_end();
86} 59}
87 60
88static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, 61static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
89 unsigned long *p2) 62 unsigned long *p2)
90{ 63{
91 unsigned long cr0, lines = bytes >> 9; 64 unsigned long lines = bytes >> 9;
92 char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
93 65
94 YMMS_SAVE 66 kernel_fpu_begin();
95 67
96 while (lines--) { 68 while (lines--) {
97#undef BLOCK 69#undef BLOCK
@@ -113,16 +85,15 @@ do { \
113 p2 = (unsigned long *)((uintptr_t)p2 + 512); 85 p2 = (unsigned long *)((uintptr_t)p2 + 512);
114 } 86 }
115 87
116 YMMS_RESTORE 88 kernel_fpu_end();
117} 89}
118 90
119static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, 91static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
120 unsigned long *p2, unsigned long *p3) 92 unsigned long *p2, unsigned long *p3)
121{ 93{
122 unsigned long cr0, lines = bytes >> 9; 94 unsigned long lines = bytes >> 9;
123 char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
124 95
125 YMMS_SAVE 96 kernel_fpu_begin();
126 97
127 while (lines--) { 98 while (lines--) {
128#undef BLOCK 99#undef BLOCK
@@ -147,16 +118,15 @@ do { \
147 p3 = (unsigned long *)((uintptr_t)p3 + 512); 118 p3 = (unsigned long *)((uintptr_t)p3 + 512);
148 } 119 }
149 120
150 YMMS_RESTORE 121 kernel_fpu_end();
151} 122}
152 123
153static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, 124static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
154 unsigned long *p2, unsigned long *p3, unsigned long *p4) 125 unsigned long *p2, unsigned long *p3, unsigned long *p4)
155{ 126{
156 unsigned long cr0, lines = bytes >> 9; 127 unsigned long lines = bytes >> 9;
157 char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
158 128
159 YMMS_SAVE 129 kernel_fpu_begin();
160 130
161 while (lines--) { 131 while (lines--) {
162#undef BLOCK 132#undef BLOCK
@@ -184,7 +154,7 @@ do { \
184 p4 = (unsigned long *)((uintptr_t)p4 + 512); 154 p4 = (unsigned long *)((uintptr_t)p4 + 512);
185 } 155 }
186 156
187 YMMS_RESTORE 157 kernel_fpu_end();
188} 158}
189 159
190static struct xor_block_template xor_block_avx = { 160static struct xor_block_template xor_block_avx = {