diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/raid6mmx.c | 16 | ||||
-rw-r--r-- | drivers/md/raid6sse1.c | 17 | ||||
-rw-r--r-- | drivers/md/raid6sse2.c | 22 | ||||
-rw-r--r-- | drivers/md/raid6x86.h | 218 |
4 files changed, 32 insertions, 241 deletions
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c index 359157aaf9e0..6181a5a3365a 100644 --- a/drivers/md/raid6mmx.c +++ b/drivers/md/raid6mmx.c | |||
@@ -30,14 +30,8 @@ const struct raid6_mmx_constants { | |||
30 | 30 | ||
31 | static int raid6_have_mmx(void) | 31 | static int raid6_have_mmx(void) |
32 | { | 32 | { |
33 | #ifdef __KERNEL__ | ||
34 | /* Not really "boot_cpu" but "all_cpus" */ | 33 | /* Not really "boot_cpu" but "all_cpus" */ |
35 | return boot_cpu_has(X86_FEATURE_MMX); | 34 | return boot_cpu_has(X86_FEATURE_MMX); |
36 | #else | ||
37 | /* User space test code */ | ||
38 | u32 features = cpuid_features(); | ||
39 | return ( (features & (1<<23)) == (1<<23) ); | ||
40 | #endif | ||
41 | } | 35 | } |
42 | 36 | ||
43 | /* | 37 | /* |
@@ -48,13 +42,12 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
48 | u8 **dptr = (u8 **)ptrs; | 42 | u8 **dptr = (u8 **)ptrs; |
49 | u8 *p, *q; | 43 | u8 *p, *q; |
50 | int d, z, z0; | 44 | int d, z, z0; |
51 | raid6_mmx_save_t sa; | ||
52 | 45 | ||
53 | z0 = disks - 3; /* Highest data disk */ | 46 | z0 = disks - 3; /* Highest data disk */ |
54 | p = dptr[z0+1]; /* XOR parity */ | 47 | p = dptr[z0+1]; /* XOR parity */ |
55 | q = dptr[z0+2]; /* RS syndrome */ | 48 | q = dptr[z0+2]; /* RS syndrome */ |
56 | 49 | ||
57 | raid6_before_mmx(&sa); | 50 | kernel_fpu_begin(); |
58 | 51 | ||
59 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | 52 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); |
60 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | 53 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ |
@@ -78,7 +71,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
78 | asm volatile("pxor %mm4,%mm4"); | 71 | asm volatile("pxor %mm4,%mm4"); |
79 | } | 72 | } |
80 | 73 | ||
81 | raid6_after_mmx(&sa); | 74 | kernel_fpu_end(); |
82 | } | 75 | } |
83 | 76 | ||
84 | const struct raid6_calls raid6_mmxx1 = { | 77 | const struct raid6_calls raid6_mmxx1 = { |
@@ -96,13 +89,12 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
96 | u8 **dptr = (u8 **)ptrs; | 89 | u8 **dptr = (u8 **)ptrs; |
97 | u8 *p, *q; | 90 | u8 *p, *q; |
98 | int d, z, z0; | 91 | int d, z, z0; |
99 | raid6_mmx_save_t sa; | ||
100 | 92 | ||
101 | z0 = disks - 3; /* Highest data disk */ | 93 | z0 = disks - 3; /* Highest data disk */ |
102 | p = dptr[z0+1]; /* XOR parity */ | 94 | p = dptr[z0+1]; /* XOR parity */ |
103 | q = dptr[z0+2]; /* RS syndrome */ | 95 | q = dptr[z0+2]; /* RS syndrome */ |
104 | 96 | ||
105 | raid6_before_mmx(&sa); | 97 | kernel_fpu_begin(); |
106 | 98 | ||
107 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | 99 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); |
108 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | 100 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ |
@@ -137,7 +129,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
137 | asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); | 129 | asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); |
138 | } | 130 | } |
139 | 131 | ||
140 | raid6_after_mmx(&sa); | 132 | kernel_fpu_end(); |
141 | } | 133 | } |
142 | 134 | ||
143 | const struct raid6_calls raid6_mmxx2 = { | 135 | const struct raid6_calls raid6_mmxx2 = { |
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c index f7e7859f71aa..f0a1ba8f40ba 100644 --- a/drivers/md/raid6sse1.c +++ b/drivers/md/raid6sse1.c | |||
@@ -33,16 +33,10 @@ extern const struct raid6_mmx_constants { | |||
33 | 33 | ||
34 | static int raid6_have_sse1_or_mmxext(void) | 34 | static int raid6_have_sse1_or_mmxext(void) |
35 | { | 35 | { |
36 | #ifdef __KERNEL__ | ||
37 | /* Not really boot_cpu but "all_cpus" */ | 36 | /* Not really boot_cpu but "all_cpus" */ |
38 | return boot_cpu_has(X86_FEATURE_MMX) && | 37 | return boot_cpu_has(X86_FEATURE_MMX) && |
39 | (boot_cpu_has(X86_FEATURE_XMM) || | 38 | (boot_cpu_has(X86_FEATURE_XMM) || |
40 | boot_cpu_has(X86_FEATURE_MMXEXT)); | 39 | boot_cpu_has(X86_FEATURE_MMXEXT)); |
41 | #else | ||
42 | /* User space test code - this incorrectly breaks on some Athlons */ | ||
43 | u32 features = cpuid_features(); | ||
44 | return ( (features & (5<<23)) == (5<<23) ); | ||
45 | #endif | ||
46 | } | 40 | } |
47 | 41 | ||
48 | /* | 42 | /* |
@@ -53,14 +47,12 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
53 | u8 **dptr = (u8 **)ptrs; | 47 | u8 **dptr = (u8 **)ptrs; |
54 | u8 *p, *q; | 48 | u8 *p, *q; |
55 | int d, z, z0; | 49 | int d, z, z0; |
56 | raid6_mmx_save_t sa; | ||
57 | 50 | ||
58 | z0 = disks - 3; /* Highest data disk */ | 51 | z0 = disks - 3; /* Highest data disk */ |
59 | p = dptr[z0+1]; /* XOR parity */ | 52 | p = dptr[z0+1]; /* XOR parity */ |
60 | q = dptr[z0+2]; /* RS syndrome */ | 53 | q = dptr[z0+2]; /* RS syndrome */ |
61 | 54 | ||
62 | /* This is really MMX code, not SSE */ | 55 | kernel_fpu_begin(); |
63 | raid6_before_mmx(&sa); | ||
64 | 56 | ||
65 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | 57 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); |
66 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | 58 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ |
@@ -94,8 +86,8 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
94 | asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | 86 | asm volatile("movntq %%mm4,%0" : "=m" (q[d])); |
95 | } | 87 | } |
96 | 88 | ||
97 | raid6_after_mmx(&sa); | ||
98 | asm volatile("sfence" : : : "memory"); | 89 | asm volatile("sfence" : : : "memory"); |
90 | kernel_fpu_end(); | ||
99 | } | 91 | } |
100 | 92 | ||
101 | const struct raid6_calls raid6_sse1x1 = { | 93 | const struct raid6_calls raid6_sse1x1 = { |
@@ -113,13 +105,12 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
113 | u8 **dptr = (u8 **)ptrs; | 105 | u8 **dptr = (u8 **)ptrs; |
114 | u8 *p, *q; | 106 | u8 *p, *q; |
115 | int d, z, z0; | 107 | int d, z, z0; |
116 | raid6_mmx_save_t sa; | ||
117 | 108 | ||
118 | z0 = disks - 3; /* Highest data disk */ | 109 | z0 = disks - 3; /* Highest data disk */ |
119 | p = dptr[z0+1]; /* XOR parity */ | 110 | p = dptr[z0+1]; /* XOR parity */ |
120 | q = dptr[z0+2]; /* RS syndrome */ | 111 | q = dptr[z0+2]; /* RS syndrome */ |
121 | 112 | ||
122 | raid6_before_mmx(&sa); | 113 | kernel_fpu_begin(); |
123 | 114 | ||
124 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | 115 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); |
125 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | 116 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ |
@@ -157,8 +148,8 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
157 | asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); | 148 | asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); |
158 | } | 149 | } |
159 | 150 | ||
160 | raid6_after_mmx(&sa); | ||
161 | asm volatile("sfence" : :: "memory"); | 151 | asm volatile("sfence" : :: "memory"); |
152 | kernel_fpu_end(); | ||
162 | } | 153 | } |
163 | 154 | ||
164 | const struct raid6_calls raid6_sse1x2 = { | 155 | const struct raid6_calls raid6_sse1x2 = { |
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c index b3aa7fe0877e..0f019762a7c3 100644 --- a/drivers/md/raid6sse2.c +++ b/drivers/md/raid6sse2.c | |||
@@ -30,17 +30,11 @@ static const struct raid6_sse_constants { | |||
30 | 30 | ||
31 | static int raid6_have_sse2(void) | 31 | static int raid6_have_sse2(void) |
32 | { | 32 | { |
33 | #ifdef __KERNEL__ | ||
34 | /* Not really boot_cpu but "all_cpus" */ | 33 | /* Not really boot_cpu but "all_cpus" */ |
35 | return boot_cpu_has(X86_FEATURE_MMX) && | 34 | return boot_cpu_has(X86_FEATURE_MMX) && |
36 | boot_cpu_has(X86_FEATURE_FXSR) && | 35 | boot_cpu_has(X86_FEATURE_FXSR) && |
37 | boot_cpu_has(X86_FEATURE_XMM) && | 36 | boot_cpu_has(X86_FEATURE_XMM) && |
38 | boot_cpu_has(X86_FEATURE_XMM2); | 37 | boot_cpu_has(X86_FEATURE_XMM2); |
39 | #else | ||
40 | /* User space test code */ | ||
41 | u32 features = cpuid_features(); | ||
42 | return ( (features & (15<<23)) == (15<<23) ); | ||
43 | #endif | ||
44 | } | 38 | } |
45 | 39 | ||
46 | /* | 40 | /* |
@@ -51,13 +45,12 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
51 | u8 **dptr = (u8 **)ptrs; | 45 | u8 **dptr = (u8 **)ptrs; |
52 | u8 *p, *q; | 46 | u8 *p, *q; |
53 | int d, z, z0; | 47 | int d, z, z0; |
54 | raid6_sse_save_t sa; | ||
55 | 48 | ||
56 | z0 = disks - 3; /* Highest data disk */ | 49 | z0 = disks - 3; /* Highest data disk */ |
57 | p = dptr[z0+1]; /* XOR parity */ | 50 | p = dptr[z0+1]; /* XOR parity */ |
58 | q = dptr[z0+2]; /* RS syndrome */ | 51 | q = dptr[z0+2]; /* RS syndrome */ |
59 | 52 | ||
60 | raid6_before_sse2(&sa); | 53 | kernel_fpu_begin(); |
61 | 54 | ||
62 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | 55 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); |
63 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | 56 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ |
@@ -93,8 +86,8 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
93 | asm volatile("pxor %xmm4,%xmm4"); | 86 | asm volatile("pxor %xmm4,%xmm4"); |
94 | } | 87 | } |
95 | 88 | ||
96 | raid6_after_sse2(&sa); | ||
97 | asm volatile("sfence" : : : "memory"); | 89 | asm volatile("sfence" : : : "memory"); |
90 | kernel_fpu_end(); | ||
98 | } | 91 | } |
99 | 92 | ||
100 | const struct raid6_calls raid6_sse2x1 = { | 93 | const struct raid6_calls raid6_sse2x1 = { |
@@ -112,13 +105,12 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
112 | u8 **dptr = (u8 **)ptrs; | 105 | u8 **dptr = (u8 **)ptrs; |
113 | u8 *p, *q; | 106 | u8 *p, *q; |
114 | int d, z, z0; | 107 | int d, z, z0; |
115 | raid6_sse_save_t sa; | ||
116 | 108 | ||
117 | z0 = disks - 3; /* Highest data disk */ | 109 | z0 = disks - 3; /* Highest data disk */ |
118 | p = dptr[z0+1]; /* XOR parity */ | 110 | p = dptr[z0+1]; /* XOR parity */ |
119 | q = dptr[z0+2]; /* RS syndrome */ | 111 | q = dptr[z0+2]; /* RS syndrome */ |
120 | 112 | ||
121 | raid6_before_sse2(&sa); | 113 | kernel_fpu_begin(); |
122 | 114 | ||
123 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | 115 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); |
124 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | 116 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ |
@@ -156,8 +148,8 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
156 | asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | 148 | asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); |
157 | } | 149 | } |
158 | 150 | ||
159 | raid6_after_sse2(&sa); | ||
160 | asm volatile("sfence" : : : "memory"); | 151 | asm volatile("sfence" : : : "memory"); |
152 | kernel_fpu_end(); | ||
161 | } | 153 | } |
162 | 154 | ||
163 | const struct raid6_calls raid6_sse2x2 = { | 155 | const struct raid6_calls raid6_sse2x2 = { |
@@ -179,13 +171,12 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
179 | u8 **dptr = (u8 **)ptrs; | 171 | u8 **dptr = (u8 **)ptrs; |
180 | u8 *p, *q; | 172 | u8 *p, *q; |
181 | int d, z, z0; | 173 | int d, z, z0; |
182 | raid6_sse16_save_t sa; | ||
183 | 174 | ||
184 | z0 = disks - 3; /* Highest data disk */ | 175 | z0 = disks - 3; /* Highest data disk */ |
185 | p = dptr[z0+1]; /* XOR parity */ | 176 | p = dptr[z0+1]; /* XOR parity */ |
186 | q = dptr[z0+2]; /* RS syndrome */ | 177 | q = dptr[z0+2]; /* RS syndrome */ |
187 | 178 | ||
188 | raid6_before_sse16(&sa); | 179 | kernel_fpu_begin(); |
189 | 180 | ||
190 | asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); | 181 | asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); |
191 | asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ | 182 | asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ |
@@ -256,8 +247,9 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
256 | asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); | 247 | asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); |
257 | asm volatile("pxor %xmm14,%xmm14"); | 248 | asm volatile("pxor %xmm14,%xmm14"); |
258 | } | 249 | } |
250 | |||
259 | asm volatile("sfence" : : : "memory"); | 251 | asm volatile("sfence" : : : "memory"); |
260 | raid6_after_sse16(&sa); | 252 | kernel_fpu_end(); |
261 | } | 253 | } |
262 | 254 | ||
263 | const struct raid6_calls raid6_sse2x4 = { | 255 | const struct raid6_calls raid6_sse2x4 = { |
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h index 4cf20534fe44..9111950414ff 100644 --- a/drivers/md/raid6x86.h +++ b/drivers/md/raid6x86.h | |||
@@ -21,224 +21,40 @@ | |||
21 | 21 | ||
22 | #if defined(__i386__) || defined(__x86_64__) | 22 | #if defined(__i386__) || defined(__x86_64__) |
23 | 23 | ||
24 | #ifdef __x86_64__ | ||
25 | |||
26 | typedef struct { | ||
27 | unsigned int fsave[27]; | ||
28 | unsigned long cr0; | ||
29 | } raid6_mmx_save_t __attribute__((aligned(16))); | ||
30 | |||
31 | /* N.B.: For SSE we only save %xmm0-%xmm7 even for x86-64, since | ||
32 | the code doesn't know about the additional x86-64 registers */ | ||
33 | typedef struct { | ||
34 | unsigned int sarea[8*4+2]; | ||
35 | unsigned long cr0; | ||
36 | } raid6_sse_save_t __attribute__((aligned(16))); | ||
37 | |||
38 | /* This is for x86-64-specific code which uses all 16 XMM registers */ | ||
39 | typedef struct { | ||
40 | unsigned int sarea[16*4+2]; | ||
41 | unsigned long cr0; | ||
42 | } raid6_sse16_save_t __attribute__((aligned(16))); | ||
43 | |||
44 | /* On x86-64 the stack *SHOULD* be 16-byte aligned, but currently this | ||
45 | is buggy in the kernel and it's only 8-byte aligned in places, so | ||
46 | we need to do this anyway. Sigh. */ | ||
47 | #define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15)) | ||
48 | |||
49 | #else /* __i386__ */ | ||
50 | |||
51 | typedef struct { | ||
52 | unsigned int fsave[27]; | ||
53 | unsigned long cr0; | ||
54 | } raid6_mmx_save_t; | ||
55 | |||
56 | /* On i386, the stack is only 8-byte aligned, but SSE requires 16-byte | ||
57 | alignment. The +3 is so we have the slack space to manually align | ||
58 | a properly-sized area correctly. */ | ||
59 | typedef struct { | ||
60 | unsigned int sarea[8*4+3]; | ||
61 | unsigned long cr0; | ||
62 | } raid6_sse_save_t; | ||
63 | |||
64 | /* Find the 16-byte aligned save area */ | ||
65 | #define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15)) | ||
66 | |||
67 | #endif | ||
68 | |||
69 | #ifdef __KERNEL__ /* Real code */ | 24 | #ifdef __KERNEL__ /* Real code */ |
70 | 25 | ||
71 | /* Note: %cr0 is 32 bits on i386 and 64 bits on x86-64 */ | 26 | #include <asm/i387.h> |
72 | |||
73 | static inline unsigned long raid6_get_fpu(void) | ||
74 | { | ||
75 | unsigned long cr0; | ||
76 | |||
77 | preempt_disable(); | ||
78 | asm volatile("mov %%cr0,%0 ; clts" : "=r" (cr0)); | ||
79 | return cr0; | ||
80 | } | ||
81 | |||
82 | static inline void raid6_put_fpu(unsigned long cr0) | ||
83 | { | ||
84 | asm volatile("mov %0,%%cr0" : : "r" (cr0)); | ||
85 | preempt_enable(); | ||
86 | } | ||
87 | 27 | ||
88 | #else /* Dummy code for user space testing */ | 28 | #else /* Dummy code for user space testing */ |
89 | 29 | ||
90 | static inline unsigned long raid6_get_fpu(void) | 30 | static inline void kernel_fpu_begin(void) |
91 | { | ||
92 | return 0xf00ba6; | ||
93 | } | ||
94 | |||
95 | static inline void raid6_put_fpu(unsigned long cr0) | ||
96 | { | ||
97 | (void)cr0; | ||
98 | } | ||
99 | |||
100 | #endif | ||
101 | |||
102 | static inline void raid6_before_mmx(raid6_mmx_save_t *s) | ||
103 | { | ||
104 | s->cr0 = raid6_get_fpu(); | ||
105 | asm volatile("fsave %0 ; fwait" : "=m" (s->fsave[0])); | ||
106 | } | ||
107 | |||
108 | static inline void raid6_after_mmx(raid6_mmx_save_t *s) | ||
109 | { | ||
110 | asm volatile("frstor %0" : : "m" (s->fsave[0])); | ||
111 | raid6_put_fpu(s->cr0); | ||
112 | } | ||
113 | |||
114 | static inline void raid6_before_sse(raid6_sse_save_t *s) | ||
115 | { | ||
116 | unsigned int *rsa = SAREA(s); | ||
117 | |||
118 | s->cr0 = raid6_get_fpu(); | ||
119 | |||
120 | asm volatile("movaps %%xmm0,%0" : "=m" (rsa[0])); | ||
121 | asm volatile("movaps %%xmm1,%0" : "=m" (rsa[4])); | ||
122 | asm volatile("movaps %%xmm2,%0" : "=m" (rsa[8])); | ||
123 | asm volatile("movaps %%xmm3,%0" : "=m" (rsa[12])); | ||
124 | asm volatile("movaps %%xmm4,%0" : "=m" (rsa[16])); | ||
125 | asm volatile("movaps %%xmm5,%0" : "=m" (rsa[20])); | ||
126 | asm volatile("movaps %%xmm6,%0" : "=m" (rsa[24])); | ||
127 | asm volatile("movaps %%xmm7,%0" : "=m" (rsa[28])); | ||
128 | } | ||
129 | |||
130 | static inline void raid6_after_sse(raid6_sse_save_t *s) | ||
131 | { | ||
132 | unsigned int *rsa = SAREA(s); | ||
133 | |||
134 | asm volatile("movaps %0,%%xmm0" : : "m" (rsa[0])); | ||
135 | asm volatile("movaps %0,%%xmm1" : : "m" (rsa[4])); | ||
136 | asm volatile("movaps %0,%%xmm2" : : "m" (rsa[8])); | ||
137 | asm volatile("movaps %0,%%xmm3" : : "m" (rsa[12])); | ||
138 | asm volatile("movaps %0,%%xmm4" : : "m" (rsa[16])); | ||
139 | asm volatile("movaps %0,%%xmm5" : : "m" (rsa[20])); | ||
140 | asm volatile("movaps %0,%%xmm6" : : "m" (rsa[24])); | ||
141 | asm volatile("movaps %0,%%xmm7" : : "m" (rsa[28])); | ||
142 | |||
143 | raid6_put_fpu(s->cr0); | ||
144 | } | ||
145 | |||
146 | static inline void raid6_before_sse2(raid6_sse_save_t *s) | ||
147 | { | 31 | { |
148 | unsigned int *rsa = SAREA(s); | ||
149 | |||
150 | s->cr0 = raid6_get_fpu(); | ||
151 | |||
152 | asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0])); | ||
153 | asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4])); | ||
154 | asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8])); | ||
155 | asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12])); | ||
156 | asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16])); | ||
157 | asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20])); | ||
158 | asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24])); | ||
159 | asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28])); | ||
160 | } | 32 | } |
161 | 33 | ||
162 | static inline void raid6_after_sse2(raid6_sse_save_t *s) | 34 | static inline void kernel_fpu_end(void) |
163 | { | 35 | { |
164 | unsigned int *rsa = SAREA(s); | ||
165 | |||
166 | asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); | ||
167 | asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); | ||
168 | asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8])); | ||
169 | asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12])); | ||
170 | asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16])); | ||
171 | asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20])); | ||
172 | asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24])); | ||
173 | asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28])); | ||
174 | |||
175 | raid6_put_fpu(s->cr0); | ||
176 | } | 36 | } |
177 | 37 | ||
178 | #ifdef __x86_64__ | 38 | #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ |
179 | 39 | #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions | |
180 | static inline void raid6_before_sse16(raid6_sse16_save_t *s) | 40 | * (fast save and restore) */ |
181 | { | 41 | #define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ |
182 | unsigned int *rsa = SAREA(s); | 42 | #define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ |
183 | 43 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | |
184 | s->cr0 = raid6_get_fpu(); | ||
185 | 44 | ||
186 | asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0])); | 45 | /* Should work well enough on modern CPUs for testing */ |
187 | asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4])); | 46 | static inline int boot_cpu_has(int flag) |
188 | asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8])); | ||
189 | asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12])); | ||
190 | asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16])); | ||
191 | asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20])); | ||
192 | asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24])); | ||
193 | asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28])); | ||
194 | asm volatile("movdqa %%xmm8,%0" : "=m" (rsa[32])); | ||
195 | asm volatile("movdqa %%xmm9,%0" : "=m" (rsa[36])); | ||
196 | asm volatile("movdqa %%xmm10,%0" : "=m" (rsa[40])); | ||
197 | asm volatile("movdqa %%xmm11,%0" : "=m" (rsa[44])); | ||
198 | asm volatile("movdqa %%xmm12,%0" : "=m" (rsa[48])); | ||
199 | asm volatile("movdqa %%xmm13,%0" : "=m" (rsa[52])); | ||
200 | asm volatile("movdqa %%xmm14,%0" : "=m" (rsa[56])); | ||
201 | asm volatile("movdqa %%xmm15,%0" : "=m" (rsa[60])); | ||
202 | } | ||
203 | |||
204 | static inline void raid6_after_sse16(raid6_sse16_save_t *s) | ||
205 | { | 47 | { |
206 | unsigned int *rsa = SAREA(s); | 48 | u32 eax = (flag >> 5) ? 0x80000001 : 1; |
49 | u32 edx; | ||
207 | 50 | ||
208 | asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); | 51 | asm volatile("cpuid" |
209 | asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); | 52 | : "+a" (eax), "=d" (edx) |
210 | asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8])); | 53 | : : "ecx", "ebx"); |
211 | asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12])); | ||
212 | asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16])); | ||
213 | asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20])); | ||
214 | asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24])); | ||
215 | asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28])); | ||
216 | asm volatile("movdqa %0,%%xmm8" : : "m" (rsa[32])); | ||
217 | asm volatile("movdqa %0,%%xmm9" : : "m" (rsa[36])); | ||
218 | asm volatile("movdqa %0,%%xmm10" : : "m" (rsa[40])); | ||
219 | asm volatile("movdqa %0,%%xmm11" : : "m" (rsa[44])); | ||
220 | asm volatile("movdqa %0,%%xmm12" : : "m" (rsa[48])); | ||
221 | asm volatile("movdqa %0,%%xmm13" : : "m" (rsa[52])); | ||
222 | asm volatile("movdqa %0,%%xmm14" : : "m" (rsa[56])); | ||
223 | asm volatile("movdqa %0,%%xmm15" : : "m" (rsa[60])); | ||
224 | 54 | ||
225 | raid6_put_fpu(s->cr0); | 55 | return (edx >> (flag & 31)) & 1; |
226 | } | 56 | } |
227 | 57 | ||
228 | #endif /* __x86_64__ */ | ||
229 | |||
230 | /* User space test hack */ | ||
231 | #ifndef __KERNEL__ | ||
232 | static inline int cpuid_features(void) | ||
233 | { | ||
234 | u32 eax = 1; | ||
235 | u32 ebx, ecx, edx; | ||
236 | |||
237 | asm volatile("cpuid" : | ||
238 | "+a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)); | ||
239 | |||
240 | return edx; | ||
241 | } | ||
242 | #endif /* ndef __KERNEL__ */ | 58 | #endif /* ndef __KERNEL__ */ |
243 | 59 | ||
244 | #endif | 60 | #endif |