diff options
Diffstat (limited to 'drivers/md/raid6x86.h')
-rw-r--r-- | drivers/md/raid6x86.h | 218 |
1 files changed, 17 insertions, 201 deletions
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h index 4cf20534fe44..9111950414ff 100644 --- a/drivers/md/raid6x86.h +++ b/drivers/md/raid6x86.h | |||
@@ -21,224 +21,40 @@ | |||
21 | 21 | ||
22 | #if defined(__i386__) || defined(__x86_64__) | 22 | #if defined(__i386__) || defined(__x86_64__) |
23 | 23 | ||
24 | #ifdef __x86_64__ | ||
25 | |||
26 | typedef struct { | ||
27 | unsigned int fsave[27]; | ||
28 | unsigned long cr0; | ||
29 | } raid6_mmx_save_t __attribute__((aligned(16))); | ||
30 | |||
31 | /* N.B.: For SSE we only save %xmm0-%xmm7 even for x86-64, since | ||
32 | the code doesn't know about the additional x86-64 registers */ | ||
33 | typedef struct { | ||
34 | unsigned int sarea[8*4+2]; | ||
35 | unsigned long cr0; | ||
36 | } raid6_sse_save_t __attribute__((aligned(16))); | ||
37 | |||
38 | /* This is for x86-64-specific code which uses all 16 XMM registers */ | ||
39 | typedef struct { | ||
40 | unsigned int sarea[16*4+2]; | ||
41 | unsigned long cr0; | ||
42 | } raid6_sse16_save_t __attribute__((aligned(16))); | ||
43 | |||
44 | /* On x86-64 the stack *SHOULD* be 16-byte aligned, but currently this | ||
45 | is buggy in the kernel and it's only 8-byte aligned in places, so | ||
46 | we need to do this anyway. Sigh. */ | ||
47 | #define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15)) | ||
48 | |||
49 | #else /* __i386__ */ | ||
50 | |||
51 | typedef struct { | ||
52 | unsigned int fsave[27]; | ||
53 | unsigned long cr0; | ||
54 | } raid6_mmx_save_t; | ||
55 | |||
56 | /* On i386, the stack is only 8-byte aligned, but SSE requires 16-byte | ||
57 | alignment. The +3 is so we have the slack space to manually align | ||
58 | a properly-sized area correctly. */ | ||
59 | typedef struct { | ||
60 | unsigned int sarea[8*4+3]; | ||
61 | unsigned long cr0; | ||
62 | } raid6_sse_save_t; | ||
63 | |||
64 | /* Find the 16-byte aligned save area */ | ||
65 | #define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15)) | ||
66 | |||
67 | #endif | ||
68 | |||
69 | #ifdef __KERNEL__ /* Real code */ | 24 | #ifdef __KERNEL__ /* Real code */ |
70 | 25 | ||
71 | /* Note: %cr0 is 32 bits on i386 and 64 bits on x86-64 */ | 26 | #include <asm/i387.h> |
72 | |||
73 | static inline unsigned long raid6_get_fpu(void) | ||
74 | { | ||
75 | unsigned long cr0; | ||
76 | |||
77 | preempt_disable(); | ||
78 | asm volatile("mov %%cr0,%0 ; clts" : "=r" (cr0)); | ||
79 | return cr0; | ||
80 | } | ||
81 | |||
82 | static inline void raid6_put_fpu(unsigned long cr0) | ||
83 | { | ||
84 | asm volatile("mov %0,%%cr0" : : "r" (cr0)); | ||
85 | preempt_enable(); | ||
86 | } | ||
87 | 27 | ||
88 | #else /* Dummy code for user space testing */ | 28 | #else /* Dummy code for user space testing */ |
89 | 29 | ||
90 | static inline unsigned long raid6_get_fpu(void) | 30 | static inline void kernel_fpu_begin(void) |
91 | { | ||
92 | return 0xf00ba6; | ||
93 | } | ||
94 | |||
95 | static inline void raid6_put_fpu(unsigned long cr0) | ||
96 | { | ||
97 | (void)cr0; | ||
98 | } | ||
99 | |||
100 | #endif | ||
101 | |||
102 | static inline void raid6_before_mmx(raid6_mmx_save_t *s) | ||
103 | { | ||
104 | s->cr0 = raid6_get_fpu(); | ||
105 | asm volatile("fsave %0 ; fwait" : "=m" (s->fsave[0])); | ||
106 | } | ||
107 | |||
108 | static inline void raid6_after_mmx(raid6_mmx_save_t *s) | ||
109 | { | ||
110 | asm volatile("frstor %0" : : "m" (s->fsave[0])); | ||
111 | raid6_put_fpu(s->cr0); | ||
112 | } | ||
113 | |||
114 | static inline void raid6_before_sse(raid6_sse_save_t *s) | ||
115 | { | ||
116 | unsigned int *rsa = SAREA(s); | ||
117 | |||
118 | s->cr0 = raid6_get_fpu(); | ||
119 | |||
120 | asm volatile("movaps %%xmm0,%0" : "=m" (rsa[0])); | ||
121 | asm volatile("movaps %%xmm1,%0" : "=m" (rsa[4])); | ||
122 | asm volatile("movaps %%xmm2,%0" : "=m" (rsa[8])); | ||
123 | asm volatile("movaps %%xmm3,%0" : "=m" (rsa[12])); | ||
124 | asm volatile("movaps %%xmm4,%0" : "=m" (rsa[16])); | ||
125 | asm volatile("movaps %%xmm5,%0" : "=m" (rsa[20])); | ||
126 | asm volatile("movaps %%xmm6,%0" : "=m" (rsa[24])); | ||
127 | asm volatile("movaps %%xmm7,%0" : "=m" (rsa[28])); | ||
128 | } | ||
129 | |||
130 | static inline void raid6_after_sse(raid6_sse_save_t *s) | ||
131 | { | ||
132 | unsigned int *rsa = SAREA(s); | ||
133 | |||
134 | asm volatile("movaps %0,%%xmm0" : : "m" (rsa[0])); | ||
135 | asm volatile("movaps %0,%%xmm1" : : "m" (rsa[4])); | ||
136 | asm volatile("movaps %0,%%xmm2" : : "m" (rsa[8])); | ||
137 | asm volatile("movaps %0,%%xmm3" : : "m" (rsa[12])); | ||
138 | asm volatile("movaps %0,%%xmm4" : : "m" (rsa[16])); | ||
139 | asm volatile("movaps %0,%%xmm5" : : "m" (rsa[20])); | ||
140 | asm volatile("movaps %0,%%xmm6" : : "m" (rsa[24])); | ||
141 | asm volatile("movaps %0,%%xmm7" : : "m" (rsa[28])); | ||
142 | |||
143 | raid6_put_fpu(s->cr0); | ||
144 | } | ||
145 | |||
146 | static inline void raid6_before_sse2(raid6_sse_save_t *s) | ||
147 | { | 31 | { |
148 | unsigned int *rsa = SAREA(s); | ||
149 | |||
150 | s->cr0 = raid6_get_fpu(); | ||
151 | |||
152 | asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0])); | ||
153 | asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4])); | ||
154 | asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8])); | ||
155 | asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12])); | ||
156 | asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16])); | ||
157 | asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20])); | ||
158 | asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24])); | ||
159 | asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28])); | ||
160 | } | 32 | } |
161 | 33 | ||
162 | static inline void raid6_after_sse2(raid6_sse_save_t *s) | 34 | static inline void kernel_fpu_end(void) |
163 | { | 35 | { |
164 | unsigned int *rsa = SAREA(s); | ||
165 | |||
166 | asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); | ||
167 | asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); | ||
168 | asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8])); | ||
169 | asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12])); | ||
170 | asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16])); | ||
171 | asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20])); | ||
172 | asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24])); | ||
173 | asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28])); | ||
174 | |||
175 | raid6_put_fpu(s->cr0); | ||
176 | } | 36 | } |
177 | 37 | ||
178 | #ifdef __x86_64__ | 38 | #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ |
179 | 39 | #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions | |
180 | static inline void raid6_before_sse16(raid6_sse16_save_t *s) | 40 | * (fast save and restore) */ |
181 | { | 41 | #define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ |
182 | unsigned int *rsa = SAREA(s); | 42 | #define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ |
183 | 43 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | |
184 | s->cr0 = raid6_get_fpu(); | ||
185 | 44 | ||
186 | asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0])); | 45 | /* Should work well enough on modern CPUs for testing */ |
187 | asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4])); | 46 | static inline int boot_cpu_has(int flag) |
188 | asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8])); | ||
189 | asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12])); | ||
190 | asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16])); | ||
191 | asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20])); | ||
192 | asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24])); | ||
193 | asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28])); | ||
194 | asm volatile("movdqa %%xmm8,%0" : "=m" (rsa[32])); | ||
195 | asm volatile("movdqa %%xmm9,%0" : "=m" (rsa[36])); | ||
196 | asm volatile("movdqa %%xmm10,%0" : "=m" (rsa[40])); | ||
197 | asm volatile("movdqa %%xmm11,%0" : "=m" (rsa[44])); | ||
198 | asm volatile("movdqa %%xmm12,%0" : "=m" (rsa[48])); | ||
199 | asm volatile("movdqa %%xmm13,%0" : "=m" (rsa[52])); | ||
200 | asm volatile("movdqa %%xmm14,%0" : "=m" (rsa[56])); | ||
201 | asm volatile("movdqa %%xmm15,%0" : "=m" (rsa[60])); | ||
202 | } | ||
203 | |||
204 | static inline void raid6_after_sse16(raid6_sse16_save_t *s) | ||
205 | { | 47 | { |
206 | unsigned int *rsa = SAREA(s); | 48 | u32 eax = (flag >> 5) ? 0x80000001 : 1; |
49 | u32 edx; | ||
207 | 50 | ||
208 | asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); | 51 | asm volatile("cpuid" |
209 | asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); | 52 | : "+a" (eax), "=d" (edx) |
210 | asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8])); | 53 | : : "ecx", "ebx"); |
211 | asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12])); | ||
212 | asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16])); | ||
213 | asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20])); | ||
214 | asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24])); | ||
215 | asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28])); | ||
216 | asm volatile("movdqa %0,%%xmm8" : : "m" (rsa[32])); | ||
217 | asm volatile("movdqa %0,%%xmm9" : : "m" (rsa[36])); | ||
218 | asm volatile("movdqa %0,%%xmm10" : : "m" (rsa[40])); | ||
219 | asm volatile("movdqa %0,%%xmm11" : : "m" (rsa[44])); | ||
220 | asm volatile("movdqa %0,%%xmm12" : : "m" (rsa[48])); | ||
221 | asm volatile("movdqa %0,%%xmm13" : : "m" (rsa[52])); | ||
222 | asm volatile("movdqa %0,%%xmm14" : : "m" (rsa[56])); | ||
223 | asm volatile("movdqa %0,%%xmm15" : : "m" (rsa[60])); | ||
224 | 54 | ||
225 | raid6_put_fpu(s->cr0); | 55 | return (edx >> (flag & 31)) & 1; |
226 | } | 56 | } |
227 | 57 | ||
228 | #endif /* __x86_64__ */ | ||
229 | |||
230 | /* User space test hack */ | ||
231 | #ifndef __KERNEL__ | ||
232 | static inline int cpuid_features(void) | ||
233 | { | ||
234 | u32 eax = 1; | ||
235 | u32 ebx, ecx, edx; | ||
236 | |||
237 | asm volatile("cpuid" : | ||
238 | "+a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)); | ||
239 | |||
240 | return edx; | ||
241 | } | ||
242 | #endif /* ndef __KERNEL__ */ | 58 | #endif /* ndef __KERNEL__ */ |
243 | 59 | ||
244 | #endif | 60 | #endif |