diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-07 12:45:43 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-07 12:45:43 -0400 |
| commit | c23112e0395a89c8a52cd955442240de7fba46aa (patch) | |
| tree | b9a8876cfaf5d86fc7a04be1560b2503d2a71969 /lib/raid6 | |
| parent | 4dfddf503670d8def0fddb497e628130fc4522a8 (diff) | |
| parent | bb086a89a406b5d877ee616f1490fcc81f8e1b2b (diff) | |
Merge tag 'md/4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li:
"This update includes:
- new AVX512 instruction based raid6 gen/recovery algorithm
- a couple of md-cluster related bug fixes
- fix a potential deadlock
- set nonrotational bit for raid array with SSD
- set correct max_hw_sectors for raid5/6, which hopefuly can improve
performance a little bit
- other minor fixes"
* tag 'md/4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
md: set rotational bit
raid6/test/test.c: bug fix: Specify aligned(alignment) attributes to the char arrays
raid5: handle register_shrinker failure
raid5: fix to detect failure of register_shrinker
md: fix a potential deadlock
md/bitmap: fix wrong cleanup
raid5: allow arbitrary max_hw_sectors
lib/raid6: Add AVX512 optimized xor_syndrome functions
lib/raid6/test/Makefile: Add avx512 gen_syndrome and recovery functions
lib/raid6: Add AVX512 optimized recovery functions
lib/raid6: Add AVX512 optimized gen_syndrome functions
md-cluster: make resync lock also could be interruptted
md-cluster: introduce dlm_lock_sync_interruptible to fix tasks hang
md-cluster: convert the completion to wait queue
md-cluster: protect md_find_rdev_nr_rcu with rcu lock
md-cluster: clean related infos of cluster
md: changes for MD_STILL_CLOSED flag
md-cluster: remove some unnecessary dlm_unlock_sync
md-cluster: use FORCEUNLOCK in lockres_free
md-cluster: call md_kick_rdev_from_array once ack failed
Diffstat (limited to 'lib/raid6')
| -rw-r--r-- | lib/raid6/Makefile | 2 | ||||
| -rw-r--r-- | lib/raid6/algos.c | 12 | ||||
| -rw-r--r-- | lib/raid6/avx512.c | 569 | ||||
| -rw-r--r-- | lib/raid6/recov_avx512.c | 388 | ||||
| -rw-r--r-- | lib/raid6/test/Makefile | 5 | ||||
| -rw-r--r-- | lib/raid6/test/test.c | 7 | ||||
| -rw-r--r-- | lib/raid6/x86.h | 10 |
7 files changed, 988 insertions, 5 deletions
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 29f503ebfd60..3057011f5599 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile | |||
| @@ -3,7 +3,7 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o | |||
| 3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ | 3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ |
| 4 | int8.o int16.o int32.o | 4 | int8.o int16.o int32.o |
| 5 | 5 | ||
| 6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o | 6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o |
| 7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o | 7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o |
| 8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o | 8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o |
| 9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o | 9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o |
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 592ff49df47d..7857049fd7d3 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c | |||
| @@ -49,6 +49,10 @@ const struct raid6_calls * const raid6_algos[] = { | |||
| 49 | &raid6_avx2x1, | 49 | &raid6_avx2x1, |
| 50 | &raid6_avx2x2, | 50 | &raid6_avx2x2, |
| 51 | #endif | 51 | #endif |
| 52 | #ifdef CONFIG_AS_AVX512 | ||
| 53 | &raid6_avx512x1, | ||
| 54 | &raid6_avx512x2, | ||
| 55 | #endif | ||
| 52 | #endif | 56 | #endif |
| 53 | #if defined(__x86_64__) && !defined(__arch_um__) | 57 | #if defined(__x86_64__) && !defined(__arch_um__) |
| 54 | &raid6_sse2x1, | 58 | &raid6_sse2x1, |
| @@ -59,6 +63,11 @@ const struct raid6_calls * const raid6_algos[] = { | |||
| 59 | &raid6_avx2x2, | 63 | &raid6_avx2x2, |
| 60 | &raid6_avx2x4, | 64 | &raid6_avx2x4, |
| 61 | #endif | 65 | #endif |
| 66 | #ifdef CONFIG_AS_AVX512 | ||
| 67 | &raid6_avx512x1, | ||
| 68 | &raid6_avx512x2, | ||
| 69 | &raid6_avx512x4, | ||
| 70 | #endif | ||
| 62 | #endif | 71 | #endif |
| 63 | #ifdef CONFIG_ALTIVEC | 72 | #ifdef CONFIG_ALTIVEC |
| 64 | &raid6_altivec1, | 73 | &raid6_altivec1, |
| @@ -92,6 +101,9 @@ void (*raid6_datap_recov)(int, size_t, int, void **); | |||
| 92 | EXPORT_SYMBOL_GPL(raid6_datap_recov); | 101 | EXPORT_SYMBOL_GPL(raid6_datap_recov); |
| 93 | 102 | ||
| 94 | const struct raid6_recov_calls *const raid6_recov_algos[] = { | 103 | const struct raid6_recov_calls *const raid6_recov_algos[] = { |
| 104 | #ifdef CONFIG_AS_AVX512 | ||
| 105 | &raid6_recov_avx512, | ||
| 106 | #endif | ||
| 95 | #ifdef CONFIG_AS_AVX2 | 107 | #ifdef CONFIG_AS_AVX2 |
| 96 | &raid6_recov_avx2, | 108 | &raid6_recov_avx2, |
| 97 | #endif | 109 | #endif |
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c new file mode 100644 index 000000000000..f524a7972006 --- /dev/null +++ b/lib/raid6/avx512.c | |||
| @@ -0,0 +1,569 @@ | |||
| 1 | /* -*- linux-c -*- -------------------------------------------------------- | ||
| 2 | * | ||
| 3 | * Copyright (C) 2016 Intel Corporation | ||
| 4 | * | ||
| 5 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | ||
| 6 | * Author: Megha Dey <megha.dey@linux.intel.com> | ||
| 7 | * | ||
| 8 | * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved | ||
| 9 | * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify | ||
| 12 | * it under the terms of the GNU General Public License as published by | ||
| 13 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
| 14 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
| 15 | * (at your option) any later version; incorporated herein by reference. | ||
| 16 | * | ||
| 17 | * ----------------------------------------------------------------------- | ||
| 18 | */ | ||
| 19 | |||
| 20 | /* | ||
| 21 | * AVX512 implementation of RAID-6 syndrome functions | ||
| 22 | * | ||
| 23 | */ | ||
| 24 | |||
| 25 | #ifdef CONFIG_AS_AVX512 | ||
| 26 | |||
| 27 | #include <linux/raid/pq.h> | ||
| 28 | #include "x86.h" | ||
| 29 | |||
| 30 | static const struct raid6_avx512_constants { | ||
| 31 | u64 x1d[8]; | ||
| 32 | } raid6_avx512_constants __aligned(512) = { | ||
| 33 | { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
| 34 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
| 35 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
| 36 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, | ||
| 37 | }; | ||
| 38 | |||
| 39 | static int raid6_have_avx512(void) | ||
| 40 | { | ||
| 41 | return boot_cpu_has(X86_FEATURE_AVX2) && | ||
| 42 | boot_cpu_has(X86_FEATURE_AVX) && | ||
| 43 | boot_cpu_has(X86_FEATURE_AVX512F) && | ||
| 44 | boot_cpu_has(X86_FEATURE_AVX512BW) && | ||
| 45 | boot_cpu_has(X86_FEATURE_AVX512VL) && | ||
| 46 | boot_cpu_has(X86_FEATURE_AVX512DQ); | ||
| 47 | } | ||
| 48 | |||
| 49 | static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
| 50 | { | ||
| 51 | u8 **dptr = (u8 **)ptrs; | ||
| 52 | u8 *p, *q; | ||
| 53 | int d, z, z0; | ||
| 54 | |||
| 55 | z0 = disks - 3; /* Highest data disk */ | ||
| 56 | p = dptr[z0+1]; /* XOR parity */ | ||
| 57 | q = dptr[z0+2]; /* RS syndrome */ | ||
| 58 | |||
| 59 | kernel_fpu_begin(); | ||
| 60 | |||
| 61 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
| 62 | "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ | ||
| 63 | : | ||
| 64 | : "m" (raid6_avx512_constants.x1d[0])); | ||
| 65 | |||
| 66 | for (d = 0; d < bytes; d += 64) { | ||
| 67 | asm volatile("prefetchnta %0\n\t" | ||
| 68 | "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ | ||
| 69 | "prefetchnta %1\n\t" | ||
| 70 | "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ | ||
| 71 | "vmovdqa64 %1,%%zmm6" | ||
| 72 | : | ||
| 73 | : "m" (dptr[z0][d]), "m" (dptr[z0-1][d])); | ||
| 74 | for (z = z0-2; z >= 0; z--) { | ||
| 75 | asm volatile("prefetchnta %0\n\t" | ||
| 76 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
| 77 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 78 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 79 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 80 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 81 | "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" | ||
| 82 | "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" | ||
| 83 | "vmovdqa64 %0,%%zmm6" | ||
| 84 | : | ||
| 85 | : "m" (dptr[z][d])); | ||
| 86 | } | ||
| 87 | asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
| 88 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 89 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 90 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 91 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 92 | "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" | ||
| 93 | "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" | ||
| 94 | "vmovntdq %%zmm2,%0\n\t" | ||
| 95 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" | ||
| 96 | "vmovntdq %%zmm4,%1\n\t" | ||
| 97 | "vpxorq %%zmm4,%%zmm4,%%zmm4" | ||
| 98 | : | ||
| 99 | : "m" (p[d]), "m" (q[d])); | ||
| 100 | } | ||
| 101 | |||
| 102 | asm volatile("sfence" : : : "memory"); | ||
| 103 | kernel_fpu_end(); | ||
| 104 | } | ||
| 105 | |||
| 106 | static void raid6_avx5121_xor_syndrome(int disks, int start, int stop, | ||
| 107 | size_t bytes, void **ptrs) | ||
| 108 | { | ||
| 109 | u8 **dptr = (u8 **)ptrs; | ||
| 110 | u8 *p, *q; | ||
| 111 | int d, z, z0; | ||
| 112 | |||
| 113 | z0 = stop; /* P/Q right side optimization */ | ||
| 114 | p = dptr[disks-2]; /* XOR parity */ | ||
| 115 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 116 | |||
| 117 | kernel_fpu_begin(); | ||
| 118 | |||
| 119 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
| 120 | : : "m" (raid6_avx512_constants.x1d[0])); | ||
| 121 | |||
| 122 | for (d = 0 ; d < bytes ; d += 64) { | ||
| 123 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
| 124 | "vmovdqa64 %1,%%zmm2\n\t" | ||
| 125 | "vpxorq %%zmm4,%%zmm2,%%zmm2" | ||
| 126 | : | ||
| 127 | : "m" (dptr[z0][d]), "m" (p[d])); | ||
| 128 | /* P/Q data pages */ | ||
| 129 | for (z = z0-1 ; z >= start ; z--) { | ||
| 130 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
| 131 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
| 132 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 133 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 134 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 135 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 136 | "vmovdqa64 %0,%%zmm5\n\t" | ||
| 137 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
| 138 | "vpxorq %%zmm5,%%zmm4,%%zmm4" | ||
| 139 | : | ||
| 140 | : "m" (dptr[z][d])); | ||
| 141 | } | ||
| 142 | /* P/Q left side optimization */ | ||
| 143 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 144 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
| 145 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
| 146 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 147 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 148 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 149 | "vpxorq %%zmm5,%%zmm4,%%zmm4" | ||
| 150 | : | ||
| 151 | : ); | ||
| 152 | } | ||
| 153 | asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" | ||
| 154 | /* Don't use movntdq for r/w memory area < cache line */ | ||
| 155 | "vmovdqa64 %%zmm4,%0\n\t" | ||
| 156 | "vmovdqa64 %%zmm2,%1" | ||
| 157 | : | ||
| 158 | : "m" (q[d]), "m" (p[d])); | ||
| 159 | } | ||
| 160 | |||
| 161 | asm volatile("sfence" : : : "memory"); | ||
| 162 | kernel_fpu_end(); | ||
| 163 | } | ||
| 164 | |||
| 165 | const struct raid6_calls raid6_avx512x1 = { | ||
| 166 | raid6_avx5121_gen_syndrome, | ||
| 167 | raid6_avx5121_xor_syndrome, | ||
| 168 | raid6_have_avx512, | ||
| 169 | "avx512x1", | ||
| 170 | 1 /* Has cache hints */ | ||
| 171 | }; | ||
| 172 | |||
| 173 | /* | ||
| 174 | * Unrolled-by-2 AVX512 implementation | ||
| 175 | */ | ||
| 176 | static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
| 177 | { | ||
| 178 | u8 **dptr = (u8 **)ptrs; | ||
| 179 | u8 *p, *q; | ||
| 180 | int d, z, z0; | ||
| 181 | |||
| 182 | z0 = disks - 3; /* Highest data disk */ | ||
| 183 | p = dptr[z0+1]; /* XOR parity */ | ||
| 184 | q = dptr[z0+2]; /* RS syndrome */ | ||
| 185 | |||
| 186 | kernel_fpu_begin(); | ||
| 187 | |||
| 188 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
| 189 | "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ | ||
| 190 | : | ||
| 191 | : "m" (raid6_avx512_constants.x1d[0])); | ||
| 192 | |||
| 193 | /* We uniformly assume a single prefetch covers at least 64 bytes */ | ||
| 194 | for (d = 0; d < bytes; d += 128) { | ||
| 195 | asm volatile("prefetchnta %0\n\t" | ||
| 196 | "prefetchnta %1\n\t" | ||
| 197 | "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ | ||
| 198 | "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */ | ||
| 199 | "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ | ||
| 200 | "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */ | ||
| 201 | : | ||
| 202 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64])); | ||
| 203 | for (z = z0-1; z >= 0; z--) { | ||
| 204 | asm volatile("prefetchnta %0\n\t" | ||
| 205 | "prefetchnta %1\n\t" | ||
| 206 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
| 207 | "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" | ||
| 208 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 209 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
| 210 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 211 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
| 212 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 213 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
| 214 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 215 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
| 216 | "vmovdqa64 %0,%%zmm5\n\t" | ||
| 217 | "vmovdqa64 %1,%%zmm7\n\t" | ||
| 218 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
| 219 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
| 220 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 221 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
| 222 | : | ||
| 223 | : "m" (dptr[z][d]), "m" (dptr[z][d+64])); | ||
| 224 | } | ||
| 225 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
| 226 | "vmovntdq %%zmm3,%1\n\t" | ||
| 227 | "vmovntdq %%zmm4,%2\n\t" | ||
| 228 | "vmovntdq %%zmm6,%3" | ||
| 229 | : | ||
| 230 | : "m" (p[d]), "m" (p[d+64]), "m" (q[d]), | ||
| 231 | "m" (q[d+64])); | ||
| 232 | } | ||
| 233 | |||
| 234 | asm volatile("sfence" : : : "memory"); | ||
| 235 | kernel_fpu_end(); | ||
| 236 | } | ||
| 237 | |||
| 238 | static void raid6_avx5122_xor_syndrome(int disks, int start, int stop, | ||
| 239 | size_t bytes, void **ptrs) | ||
| 240 | { | ||
| 241 | u8 **dptr = (u8 **)ptrs; | ||
| 242 | u8 *p, *q; | ||
| 243 | int d, z, z0; | ||
| 244 | |||
| 245 | z0 = stop; /* P/Q right side optimization */ | ||
| 246 | p = dptr[disks-2]; /* XOR parity */ | ||
| 247 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 248 | |||
| 249 | kernel_fpu_begin(); | ||
| 250 | |||
| 251 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
| 252 | : : "m" (raid6_avx512_constants.x1d[0])); | ||
| 253 | |||
| 254 | for (d = 0 ; d < bytes ; d += 128) { | ||
| 255 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
| 256 | "vmovdqa64 %1,%%zmm6\n\t" | ||
| 257 | "vmovdqa64 %2,%%zmm2\n\t" | ||
| 258 | "vmovdqa64 %3,%%zmm3\n\t" | ||
| 259 | "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" | ||
| 260 | "vpxorq %%zmm6,%%zmm3,%%zmm3" | ||
| 261 | : | ||
| 262 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), | ||
| 263 | "m" (p[d]), "m" (p[d+64])); | ||
| 264 | /* P/Q data pages */ | ||
| 265 | for (z = z0-1 ; z >= start ; z--) { | ||
| 266 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
| 267 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
| 268 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
| 269 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
| 270 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 271 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
| 272 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 273 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
| 274 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 275 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
| 276 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 277 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
| 278 | "vmovdqa64 %0,%%zmm5\n\t" | ||
| 279 | "vmovdqa64 %1,%%zmm7\n\t" | ||
| 280 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
| 281 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
| 282 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 283 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
| 284 | : | ||
| 285 | : "m" (dptr[z][d]), "m" (dptr[z][d+64])); | ||
| 286 | } | ||
| 287 | /* P/Q left side optimization */ | ||
| 288 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 289 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
| 290 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
| 291 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
| 292 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
| 293 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 294 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
| 295 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 296 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
| 297 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 298 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
| 299 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 300 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
| 301 | : | ||
| 302 | : ); | ||
| 303 | } | ||
| 304 | asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" | ||
| 305 | "vpxorq %1,%%zmm6,%%zmm6\n\t" | ||
| 306 | /* Don't use movntdq for r/w | ||
| 307 | * memory area < cache line | ||
| 308 | */ | ||
| 309 | "vmovdqa64 %%zmm4,%0\n\t" | ||
| 310 | "vmovdqa64 %%zmm6,%1\n\t" | ||
| 311 | "vmovdqa64 %%zmm2,%2\n\t" | ||
| 312 | "vmovdqa64 %%zmm3,%3" | ||
| 313 | : | ||
| 314 | : "m" (q[d]), "m" (q[d+64]), "m" (p[d]), | ||
| 315 | "m" (p[d+64])); | ||
| 316 | } | ||
| 317 | |||
| 318 | asm volatile("sfence" : : : "memory"); | ||
| 319 | kernel_fpu_end(); | ||
| 320 | } | ||
| 321 | |||
| 322 | const struct raid6_calls raid6_avx512x2 = { | ||
| 323 | raid6_avx5122_gen_syndrome, | ||
| 324 | raid6_avx5122_xor_syndrome, | ||
| 325 | raid6_have_avx512, | ||
| 326 | "avx512x2", | ||
| 327 | 1 /* Has cache hints */ | ||
| 328 | }; | ||
| 329 | |||
| 330 | #ifdef CONFIG_X86_64 | ||
| 331 | |||
| 332 | /* | ||
| 333 | * Unrolled-by-4 AVX2 implementation | ||
| 334 | */ | ||
| 335 | static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
| 336 | { | ||
| 337 | u8 **dptr = (u8 **)ptrs; | ||
| 338 | u8 *p, *q; | ||
| 339 | int d, z, z0; | ||
| 340 | |||
| 341 | z0 = disks - 3; /* Highest data disk */ | ||
| 342 | p = dptr[z0+1]; /* XOR parity */ | ||
| 343 | q = dptr[z0+2]; /* RS syndrome */ | ||
| 344 | |||
| 345 | kernel_fpu_begin(); | ||
| 346 | |||
| 347 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
| 348 | "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */ | ||
| 349 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */ | ||
| 350 | "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */ | ||
| 351 | "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */ | ||
| 352 | "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */ | ||
| 353 | "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */ | ||
| 354 | "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */ | ||
| 355 | "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */ | ||
| 356 | "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */ | ||
| 357 | : | ||
| 358 | : "m" (raid6_avx512_constants.x1d[0])); | ||
| 359 | |||
| 360 | for (d = 0; d < bytes; d += 256) { | ||
| 361 | for (z = z0; z >= 0; z--) { | ||
| 362 | asm volatile("prefetchnta %0\n\t" | ||
| 363 | "prefetchnta %1\n\t" | ||
| 364 | "prefetchnta %2\n\t" | ||
| 365 | "prefetchnta %3\n\t" | ||
| 366 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
| 367 | "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" | ||
| 368 | "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t" | ||
| 369 | "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t" | ||
| 370 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 371 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
| 372 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
| 373 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
| 374 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 375 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
| 376 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
| 377 | "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" | ||
| 378 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 379 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
| 380 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
| 381 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
| 382 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 383 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
| 384 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
| 385 | "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" | ||
| 386 | "vmovdqa64 %0,%%zmm5\n\t" | ||
| 387 | "vmovdqa64 %1,%%zmm7\n\t" | ||
| 388 | "vmovdqa64 %2,%%zmm13\n\t" | ||
| 389 | "vmovdqa64 %3,%%zmm15\n\t" | ||
| 390 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
| 391 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
| 392 | "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" | ||
| 393 | "vpxorq %%zmm15,%%zmm11,%%zmm11\n" | ||
| 394 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 395 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
| 396 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
| 397 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
| 398 | : | ||
| 399 | : "m" (dptr[z][d]), "m" (dptr[z][d+64]), | ||
| 400 | "m" (dptr[z][d+128]), "m" (dptr[z][d+192])); | ||
| 401 | } | ||
| 402 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
| 403 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" | ||
| 404 | "vmovntdq %%zmm3,%1\n\t" | ||
| 405 | "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" | ||
| 406 | "vmovntdq %%zmm10,%2\n\t" | ||
| 407 | "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" | ||
| 408 | "vmovntdq %%zmm11,%3\n\t" | ||
| 409 | "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" | ||
| 410 | "vmovntdq %%zmm4,%4\n\t" | ||
| 411 | "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 412 | "vmovntdq %%zmm6,%5\n\t" | ||
| 413 | "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" | ||
| 414 | "vmovntdq %%zmm12,%6\n\t" | ||
| 415 | "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" | ||
| 416 | "vmovntdq %%zmm14,%7\n\t" | ||
| 417 | "vpxorq %%zmm14,%%zmm14,%%zmm14" | ||
| 418 | : | ||
| 419 | : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
| 420 | "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), | ||
| 421 | "m" (q[d+128]), "m" (q[d+192])); | ||
| 422 | } | ||
| 423 | |||
| 424 | asm volatile("sfence" : : : "memory"); | ||
| 425 | kernel_fpu_end(); | ||
| 426 | } | ||
| 427 | |||
| 428 | static void raid6_avx5124_xor_syndrome(int disks, int start, int stop, | ||
| 429 | size_t bytes, void **ptrs) | ||
| 430 | { | ||
| 431 | u8 **dptr = (u8 **)ptrs; | ||
| 432 | u8 *p, *q; | ||
| 433 | int d, z, z0; | ||
| 434 | |||
| 435 | z0 = stop; /* P/Q right side optimization */ | ||
| 436 | p = dptr[disks-2]; /* XOR parity */ | ||
| 437 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 438 | |||
| 439 | kernel_fpu_begin(); | ||
| 440 | |||
| 441 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
| 442 | :: "m" (raid6_avx512_constants.x1d[0])); | ||
| 443 | |||
| 444 | for (d = 0 ; d < bytes ; d += 256) { | ||
| 445 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
| 446 | "vmovdqa64 %1,%%zmm6\n\t" | ||
| 447 | "vmovdqa64 %2,%%zmm12\n\t" | ||
| 448 | "vmovdqa64 %3,%%zmm14\n\t" | ||
| 449 | "vmovdqa64 %4,%%zmm2\n\t" | ||
| 450 | "vmovdqa64 %5,%%zmm3\n\t" | ||
| 451 | "vmovdqa64 %6,%%zmm10\n\t" | ||
| 452 | "vmovdqa64 %7,%%zmm11\n\t" | ||
| 453 | "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" | ||
| 454 | "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t" | ||
| 455 | "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t" | ||
| 456 | "vpxorq %%zmm14,%%zmm11,%%zmm11" | ||
| 457 | : | ||
| 458 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), | ||
| 459 | "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]), | ||
| 460 | "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
| 461 | "m" (p[d+192])); | ||
| 462 | /* P/Q data pages */ | ||
| 463 | for (z = z0-1 ; z >= start ; z--) { | ||
| 464 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
| 465 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
| 466 | "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" | ||
| 467 | "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" | ||
| 468 | "prefetchnta %0\n\t" | ||
| 469 | "prefetchnta %2\n\t" | ||
| 470 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
| 471 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
| 472 | "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" | ||
| 473 | "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" | ||
| 474 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 475 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
| 476 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
| 477 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
| 478 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 479 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
| 480 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
| 481 | "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t" | ||
| 482 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 483 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
| 484 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
| 485 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
| 486 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 487 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
| 488 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
| 489 | "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" | ||
| 490 | "vmovdqa64 %0,%%zmm5\n\t" | ||
| 491 | "vmovdqa64 %1,%%zmm7\n\t" | ||
| 492 | "vmovdqa64 %2,%%zmm13\n\t" | ||
| 493 | "vmovdqa64 %3,%%zmm15\n\t" | ||
| 494 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
| 495 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
| 496 | "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" | ||
| 497 | "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t" | ||
| 498 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 499 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
| 500 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
| 501 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
| 502 | : | ||
| 503 | : "m" (dptr[z][d]), "m" (dptr[z][d+64]), | ||
| 504 | "m" (dptr[z][d+128]), | ||
| 505 | "m" (dptr[z][d+192])); | ||
| 506 | } | ||
| 507 | asm volatile("prefetchnta %0\n\t" | ||
| 508 | "prefetchnta %1\n\t" | ||
| 509 | : | ||
| 510 | : "m" (q[d]), "m" (q[d+128])); | ||
| 511 | /* P/Q left side optimization */ | ||
| 512 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 513 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
| 514 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
| 515 | "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" | ||
| 516 | "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" | ||
| 517 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
| 518 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
| 519 | "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" | ||
| 520 | "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" | ||
| 521 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
| 522 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
| 523 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
| 524 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
| 525 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
| 526 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
| 527 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
| 528 | "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" | ||
| 529 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
| 530 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
| 531 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
| 532 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
| 533 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
| 534 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
| 535 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
| 536 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
| 537 | : | ||
| 538 | : ); | ||
| 539 | } | ||
| 540 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
| 541 | "vmovntdq %%zmm3,%1\n\t" | ||
| 542 | "vmovntdq %%zmm10,%2\n\t" | ||
| 543 | "vmovntdq %%zmm11,%3\n\t" | ||
| 544 | "vpxorq %4,%%zmm4,%%zmm4\n\t" | ||
| 545 | "vpxorq %5,%%zmm6,%%zmm6\n\t" | ||
| 546 | "vpxorq %6,%%zmm12,%%zmm12\n\t" | ||
| 547 | "vpxorq %7,%%zmm14,%%zmm14\n\t" | ||
| 548 | "vmovntdq %%zmm4,%4\n\t" | ||
| 549 | "vmovntdq %%zmm6,%5\n\t" | ||
| 550 | "vmovntdq %%zmm12,%6\n\t" | ||
| 551 | "vmovntdq %%zmm14,%7" | ||
| 552 | : | ||
| 553 | : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
| 554 | "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), | ||
| 555 | "m" (q[d+128]), "m" (q[d+192])); | ||
| 556 | } | ||
| 557 | asm volatile("sfence" : : : "memory"); | ||
| 558 | kernel_fpu_end(); | ||
| 559 | } | ||
| 560 | const struct raid6_calls raid6_avx512x4 = { | ||
| 561 | raid6_avx5124_gen_syndrome, | ||
| 562 | raid6_avx5124_xor_syndrome, | ||
| 563 | raid6_have_avx512, | ||
| 564 | "avx512x4", | ||
| 565 | 1 /* Has cache hints */ | ||
| 566 | }; | ||
| 567 | #endif | ||
| 568 | |||
| 569 | #endif /* CONFIG_AS_AVX512 */ | ||
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c new file mode 100644 index 000000000000..625aafa33b61 --- /dev/null +++ b/lib/raid6/recov_avx512.c | |||
| @@ -0,0 +1,388 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2016 Intel Corporation | ||
| 3 | * | ||
| 4 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | ||
| 5 | * Author: Megha Dey <megha.dey@linux.intel.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; version 2 | ||
| 10 | * of the License. | ||
| 11 | * | ||
| 12 | */ | ||
| 13 | |||
| 14 | #ifdef CONFIG_AS_AVX512 | ||
| 15 | |||
| 16 | #include <linux/raid/pq.h> | ||
| 17 | #include "x86.h" | ||
| 18 | |||
| 19 | static int raid6_has_avx512(void) | ||
| 20 | { | ||
| 21 | return boot_cpu_has(X86_FEATURE_AVX2) && | ||
| 22 | boot_cpu_has(X86_FEATURE_AVX) && | ||
| 23 | boot_cpu_has(X86_FEATURE_AVX512F) && | ||
| 24 | boot_cpu_has(X86_FEATURE_AVX512BW) && | ||
| 25 | boot_cpu_has(X86_FEATURE_AVX512VL) && | ||
| 26 | boot_cpu_has(X86_FEATURE_AVX512DQ); | ||
| 27 | } | ||
| 28 | |||
| 29 | static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, | ||
| 30 | int failb, void **ptrs) | ||
| 31 | { | ||
| 32 | u8 *p, *q, *dp, *dq; | ||
| 33 | const u8 *pbmul; /* P multiplier table for B data */ | ||
| 34 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
| 35 | const u8 x0f = 0x0f; | ||
| 36 | |||
| 37 | p = (u8 *)ptrs[disks-2]; | ||
| 38 | q = (u8 *)ptrs[disks-1]; | ||
| 39 | |||
| 40 | /* | ||
| 41 | * Compute syndrome with zero for the missing data pages | ||
| 42 | * Use the dead data pages as temporary storage for | ||
| 43 | * delta p and delta q | ||
| 44 | */ | ||
| 45 | |||
| 46 | dp = (u8 *)ptrs[faila]; | ||
| 47 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
| 48 | ptrs[disks-2] = dp; | ||
| 49 | dq = (u8 *)ptrs[failb]; | ||
| 50 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
| 51 | ptrs[disks-1] = dq; | ||
| 52 | |||
| 53 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
| 54 | |||
| 55 | /* Restore pointer table */ | ||
| 56 | ptrs[faila] = dp; | ||
| 57 | ptrs[failb] = dq; | ||
| 58 | ptrs[disks-2] = p; | ||
| 59 | ptrs[disks-1] = q; | ||
| 60 | |||
| 61 | /* Now, pick the proper data tables */ | ||
| 62 | pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; | ||
| 63 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ | ||
| 64 | raid6_gfexp[failb]]]; | ||
| 65 | |||
| 66 | kernel_fpu_begin(); | ||
| 67 | |||
| 68 | /* zmm0 = x0f[16] */ | ||
| 69 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | ||
| 70 | |||
| 71 | while (bytes) { | ||
| 72 | #ifdef CONFIG_X86_64 | ||
| 73 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | ||
| 74 | "vmovdqa64 %1, %%zmm9\n\t" | ||
| 75 | "vmovdqa64 %2, %%zmm0\n\t" | ||
| 76 | "vmovdqa64 %3, %%zmm8\n\t" | ||
| 77 | "vpxorq %4, %%zmm1, %%zmm1\n\t" | ||
| 78 | "vpxorq %5, %%zmm9, %%zmm9\n\t" | ||
| 79 | "vpxorq %6, %%zmm0, %%zmm0\n\t" | ||
| 80 | "vpxorq %7, %%zmm8, %%zmm8" | ||
| 81 | : | ||
| 82 | : "m" (q[0]), "m" (q[64]), "m" (p[0]), | ||
| 83 | "m" (p[64]), "m" (dq[0]), "m" (dq[64]), | ||
| 84 | "m" (dp[0]), "m" (dp[64])); | ||
| 85 | |||
| 86 | /* | ||
| 87 | * 1 = dq[0] ^ q[0] | ||
| 88 | * 9 = dq[64] ^ q[64] | ||
| 89 | * 0 = dp[0] ^ p[0] | ||
| 90 | * 8 = dp[64] ^ p[64] | ||
| 91 | */ | ||
| 92 | |||
| 93 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
| 94 | "vbroadcasti64x2 %1, %%zmm5" | ||
| 95 | : | ||
| 96 | : "m" (qmul[0]), "m" (qmul[16])); | ||
| 97 | |||
| 98 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | ||
| 99 | "vpsraw $4, %%zmm9, %%zmm12\n\t" | ||
| 100 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | ||
| 101 | "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" | ||
| 102 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
| 103 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | ||
| 104 | "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" | ||
| 105 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | ||
| 106 | "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" | ||
| 107 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | ||
| 108 | "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" | ||
| 109 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | ||
| 110 | : | ||
| 111 | : ); | ||
| 112 | |||
| 113 | /* | ||
| 114 | * 5 = qx[0] | ||
| 115 | * 15 = qx[64] | ||
| 116 | */ | ||
| 117 | |||
| 118 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
| 119 | "vbroadcasti64x2 %1, %%zmm1\n\t" | ||
| 120 | "vpsraw $4, %%zmm0, %%zmm2\n\t" | ||
| 121 | "vpsraw $4, %%zmm8, %%zmm6\n\t" | ||
| 122 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | ||
| 123 | "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" | ||
| 124 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | ||
| 125 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
| 126 | "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" | ||
| 127 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | ||
| 128 | "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" | ||
| 129 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | ||
| 130 | "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" | ||
| 131 | "vpxorq %%zmm12, %%zmm13, %%zmm13" | ||
| 132 | : | ||
| 133 | : "m" (pbmul[0]), "m" (pbmul[16])); | ||
| 134 | |||
| 135 | /* | ||
| 136 | * 1 = pbmul[px[0]] | ||
| 137 | * 13 = pbmul[px[64]] | ||
| 138 | */ | ||
| 139 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | ||
| 140 | "vpxorq %%zmm15, %%zmm13, %%zmm13" | ||
| 141 | : | ||
| 142 | : ); | ||
| 143 | |||
| 144 | /* | ||
| 145 | * 1 = db = DQ | ||
| 146 | * 13 = db[64] = DQ[64] | ||
| 147 | */ | ||
| 148 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
| 149 | "vmovdqa64 %%zmm13,%1\n\t" | ||
| 150 | "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | ||
| 151 | "vpxorq %%zmm13, %%zmm8, %%zmm8" | ||
| 152 | : | ||
| 153 | : "m" (dq[0]), "m" (dq[64])); | ||
| 154 | |||
| 155 | asm volatile("vmovdqa64 %%zmm0, %0\n\t" | ||
| 156 | "vmovdqa64 %%zmm8, %1" | ||
| 157 | : | ||
| 158 | : "m" (dp[0]), "m" (dp[64])); | ||
| 159 | |||
| 160 | bytes -= 128; | ||
| 161 | p += 128; | ||
| 162 | q += 128; | ||
| 163 | dp += 128; | ||
| 164 | dq += 128; | ||
| 165 | #else | ||
| 166 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | ||
| 167 | "vmovdqa64 %1, %%zmm0\n\t" | ||
| 168 | "vpxorq %2, %%zmm1, %%zmm1\n\t" | ||
| 169 | "vpxorq %3, %%zmm0, %%zmm0" | ||
| 170 | : | ||
| 171 | : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); | ||
| 172 | |||
| 173 | /* 1 = dq ^ q; 0 = dp ^ p */ | ||
| 174 | |||
| 175 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
| 176 | "vbroadcasti64x2 %1, %%zmm5" | ||
| 177 | : | ||
| 178 | : "m" (qmul[0]), "m" (qmul[16])); | ||
| 179 | |||
| 180 | /* | ||
| 181 | * 1 = dq ^ q | ||
| 182 | * 3 = dq ^ p >> 4 | ||
| 183 | */ | ||
| 184 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | ||
| 185 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | ||
| 186 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
| 187 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | ||
| 188 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | ||
| 189 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | ||
| 190 | : | ||
| 191 | : ); | ||
| 192 | |||
| 193 | /* 5 = qx */ | ||
| 194 | |||
| 195 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
| 196 | "vbroadcasti64x2 %1, %%zmm1" | ||
| 197 | : | ||
| 198 | : "m" (pbmul[0]), "m" (pbmul[16])); | ||
| 199 | |||
| 200 | asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" | ||
| 201 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | ||
| 202 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | ||
| 203 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | ||
| 204 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | ||
| 205 | "vpxorq %%zmm4, %%zmm1, %%zmm1" | ||
| 206 | : | ||
| 207 | : ); | ||
| 208 | |||
| 209 | /* 1 = pbmul[px] */ | ||
| 210 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | ||
| 211 | /* 1 = db = DQ */ | ||
| 212 | "vmovdqa64 %%zmm1, %0\n\t" | ||
| 213 | : | ||
| 214 | : "m" (dq[0])); | ||
| 215 | |||
| 216 | asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | ||
| 217 | "vmovdqa64 %%zmm0, %0" | ||
| 218 | : | ||
| 219 | : "m" (dp[0])); | ||
| 220 | |||
| 221 | bytes -= 64; | ||
| 222 | p += 64; | ||
| 223 | q += 64; | ||
| 224 | dp += 64; | ||
| 225 | dq += 64; | ||
| 226 | #endif | ||
| 227 | } | ||
| 228 | |||
| 229 | kernel_fpu_end(); | ||
| 230 | } | ||
| 231 | |||
| 232 | static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, | ||
| 233 | void **ptrs) | ||
| 234 | { | ||
| 235 | u8 *p, *q, *dq; | ||
| 236 | const u8 *qmul; /* Q multiplier table */ | ||
| 237 | const u8 x0f = 0x0f; | ||
| 238 | |||
| 239 | p = (u8 *)ptrs[disks-2]; | ||
| 240 | q = (u8 *)ptrs[disks-1]; | ||
| 241 | |||
| 242 | /* | ||
| 243 | * Compute syndrome with zero for the missing data page | ||
| 244 | * Use the dead data page as temporary storage for delta q | ||
| 245 | */ | ||
| 246 | |||
| 247 | dq = (u8 *)ptrs[faila]; | ||
| 248 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
| 249 | ptrs[disks-1] = dq; | ||
| 250 | |||
| 251 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
| 252 | |||
| 253 | /* Restore pointer table */ | ||
| 254 | ptrs[faila] = dq; | ||
| 255 | ptrs[disks-1] = q; | ||
| 256 | |||
| 257 | /* Now, pick the proper data tables */ | ||
| 258 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
| 259 | |||
| 260 | kernel_fpu_begin(); | ||
| 261 | |||
| 262 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | ||
| 263 | |||
| 264 | while (bytes) { | ||
| 265 | #ifdef CONFIG_X86_64 | ||
| 266 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | ||
| 267 | "vmovdqa64 %1, %%zmm8\n\t" | ||
| 268 | "vpxorq %2, %%zmm3, %%zmm3\n\t" | ||
| 269 | "vpxorq %3, %%zmm8, %%zmm8" | ||
| 270 | : | ||
| 271 | : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), | ||
| 272 | "m" (q[64])); | ||
| 273 | |||
| 274 | /* | ||
| 275 | * 3 = q[0] ^ dq[0] | ||
| 276 | * 8 = q[64] ^ dq[64] | ||
| 277 | */ | ||
| 278 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | ||
| 279 | "vmovapd %%zmm0, %%zmm13\n\t" | ||
| 280 | "vbroadcasti64x2 %1, %%zmm1\n\t" | ||
| 281 | "vmovapd %%zmm1, %%zmm14" | ||
| 282 | : | ||
| 283 | : "m" (qmul[0]), "m" (qmul[16])); | ||
| 284 | |||
| 285 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | ||
| 286 | "vpsraw $4, %%zmm8, %%zmm12\n\t" | ||
| 287 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
| 288 | "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" | ||
| 289 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
| 290 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | ||
| 291 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | ||
| 292 | "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" | ||
| 293 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | ||
| 294 | "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" | ||
| 295 | "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" | ||
| 296 | "vpxorq %%zmm13, %%zmm14, %%zmm14" | ||
| 297 | : | ||
| 298 | : ); | ||
| 299 | |||
| 300 | /* | ||
| 301 | * 1 = qmul[q[0] ^ dq[0]] | ||
| 302 | * 14 = qmul[q[64] ^ dq[64]] | ||
| 303 | */ | ||
| 304 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | ||
| 305 | "vmovdqa64 %1, %%zmm12\n\t" | ||
| 306 | "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" | ||
| 307 | "vpxorq %%zmm14, %%zmm12, %%zmm12" | ||
| 308 | : | ||
| 309 | : "m" (p[0]), "m" (p[64])); | ||
| 310 | |||
| 311 | /* | ||
| 312 | * 2 = p[0] ^ qmul[q[0] ^ dq[0]] | ||
| 313 | * 12 = p[64] ^ qmul[q[64] ^ dq[64]] | ||
| 314 | */ | ||
| 315 | |||
| 316 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
| 317 | "vmovdqa64 %%zmm14, %1\n\t" | ||
| 318 | "vmovdqa64 %%zmm2, %2\n\t" | ||
| 319 | "vmovdqa64 %%zmm12,%3" | ||
| 320 | : | ||
| 321 | : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), | ||
| 322 | "m" (p[64])); | ||
| 323 | |||
| 324 | bytes -= 128; | ||
| 325 | p += 128; | ||
| 326 | q += 128; | ||
| 327 | dq += 128; | ||
| 328 | #else | ||
| 329 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | ||
| 330 | "vpxorq %1, %%zmm3, %%zmm3" | ||
| 331 | : | ||
| 332 | : "m" (dq[0]), "m" (q[0])); | ||
| 333 | |||
| 334 | /* 3 = q ^ dq */ | ||
| 335 | |||
| 336 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | ||
| 337 | "vbroadcasti64x2 %1, %%zmm1" | ||
| 338 | : | ||
| 339 | : "m" (qmul[0]), "m" (qmul[16])); | ||
| 340 | |||
| 341 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | ||
| 342 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
| 343 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
| 344 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | ||
| 345 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | ||
| 346 | "vpxorq %%zmm0, %%zmm1, %%zmm1" | ||
| 347 | : | ||
| 348 | : ); | ||
| 349 | |||
| 350 | /* 1 = qmul[q ^ dq] */ | ||
| 351 | |||
| 352 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | ||
| 353 | "vpxorq %%zmm1, %%zmm2, %%zmm2" | ||
| 354 | : | ||
| 355 | : "m" (p[0])); | ||
| 356 | |||
| 357 | /* 2 = p ^ qmul[q ^ dq] */ | ||
| 358 | |||
| 359 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
| 360 | "vmovdqa64 %%zmm2, %1" | ||
| 361 | : | ||
| 362 | : "m" (dq[0]), "m" (p[0])); | ||
| 363 | |||
| 364 | bytes -= 64; | ||
| 365 | p += 64; | ||
| 366 | q += 64; | ||
| 367 | dq += 64; | ||
| 368 | #endif | ||
| 369 | } | ||
| 370 | |||
| 371 | kernel_fpu_end(); | ||
| 372 | } | ||
| 373 | |||
| 374 | const struct raid6_recov_calls raid6_recov_avx512 = { | ||
| 375 | .data2 = raid6_2data_recov_avx512, | ||
| 376 | .datap = raid6_datap_recov_avx512, | ||
| 377 | .valid = raid6_has_avx512, | ||
| 378 | #ifdef CONFIG_X86_64 | ||
| 379 | .name = "avx512x2", | ||
| 380 | #else | ||
| 381 | .name = "avx512x1", | ||
| 382 | #endif | ||
| 383 | .priority = 3, | ||
| 384 | }; | ||
| 385 | |||
| 386 | #else | ||
| 387 | #warning "your version of binutils lacks AVX512 support" | ||
| 388 | #endif | ||
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 29090f3db677..2c7b60edea04 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile | |||
| @@ -32,10 +32,13 @@ ifeq ($(ARCH),arm64) | |||
| 32 | endif | 32 | endif |
| 33 | 33 | ||
| 34 | ifeq ($(IS_X86),yes) | 34 | ifeq ($(IS_X86),yes) |
| 35 | OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o | 35 | OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o |
| 36 | CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ | 36 | CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ |
| 37 | gcc -c -x assembler - >&/dev/null && \ | 37 | gcc -c -x assembler - >&/dev/null && \ |
| 38 | rm ./-.o && echo -DCONFIG_AS_AVX2=1) | 38 | rm ./-.o && echo -DCONFIG_AS_AVX2=1) |
| 39 | CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \ | ||
| 40 | gcc -c -x assembler - >&/dev/null && \ | ||
| 41 | rm ./-.o && echo -DCONFIG_AS_AVX512=1) | ||
| 39 | else ifeq ($(HAS_NEON),yes) | 42 | else ifeq ($(HAS_NEON),yes) |
| 40 | OBJS += neon.o neon1.o neon2.o neon4.o neon8.o | 43 | OBJS += neon.o neon1.o neon2.o neon4.o neon8.o |
| 41 | CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 | 44 | CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 |
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 3bebbabdb510..b07f4d8e6b03 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c | |||
| @@ -21,12 +21,13 @@ | |||
| 21 | 21 | ||
| 22 | #define NDISKS 16 /* Including P and Q */ | 22 | #define NDISKS 16 /* Including P and Q */ |
| 23 | 23 | ||
| 24 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | 24 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
| 25 | struct raid6_calls raid6_call; | 25 | struct raid6_calls raid6_call; |
| 26 | 26 | ||
| 27 | char *dataptrs[NDISKS]; | 27 | char *dataptrs[NDISKS]; |
| 28 | char data[NDISKS][PAGE_SIZE]; | 28 | char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
| 29 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | 29 | char recovi[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
| 30 | char recovj[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); | ||
| 30 | 31 | ||
| 31 | static void makedata(int start, int stop) | 32 | static void makedata(int start, int stop) |
| 32 | { | 33 | { |
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index 8fe9d9662abb..834d268a4b05 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h | |||
| @@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void) | |||
| 46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ | 46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ |
| 47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ | 47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ |
| 48 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ | 48 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ |
| 49 | #define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */ | ||
| 50 | #define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular) | ||
| 51 | * Instructions | ||
| 52 | */ | ||
| 53 | #define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular) | ||
| 54 | * Instructions | ||
| 55 | */ | ||
| 56 | #define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length) | ||
| 57 | * Extensions | ||
| 58 | */ | ||
| 49 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | 59 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ |
| 50 | 60 | ||
| 51 | /* Should work well enough on modern CPUs for testing */ | 61 | /* Should work well enough on modern CPUs for testing */ |
