diff options
author | NeilBrown <neilb@suse.de> | 2010-08-09 20:02:33 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2010-08-09 20:02:33 -0400 |
commit | fd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (patch) | |
tree | 311567d03758afc3a93b4273fe172836e89bb01d /lib | |
parent | 6e17b0276452912cb13445e5ea552b599984675f (diff) | |
parent | 2144381da478cc4aa3a29ee29b0c5e6ddaaced14 (diff) |
Merge git://git.infradead.org/users/dwmw2/libraid-2.6 into for-linus
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 3 | ||||
-rw-r--r-- | lib/Kconfig.debug | 63 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/atomic64_test.c | 2 | ||||
-rw-r--r-- | lib/devres.c | 2 | ||||
-rw-r--r-- | lib/ioremap.c | 10 | ||||
-rw-r--r-- | lib/raid6/Makefile | 78 | ||||
-rw-r--r-- | lib/raid6/mktables.c | 132 | ||||
-rw-r--r-- | lib/raid6/raid6algos.c | 154 | ||||
-rw-r--r-- | lib/raid6/raid6altivec.uc | 130 | ||||
-rw-r--r-- | lib/raid6/raid6int.uc | 117 | ||||
-rw-r--r-- | lib/raid6/raid6mmx.c | 142 | ||||
-rw-r--r-- | lib/raid6/raid6recov.c | 132 | ||||
-rw-r--r-- | lib/raid6/raid6sse1.c | 162 | ||||
-rw-r--r-- | lib/raid6/raid6sse2.c | 262 | ||||
-rw-r--r-- | lib/raid6/raid6test/Makefile | 75 | ||||
-rw-r--r-- | lib/raid6/raid6test/test.c | 124 | ||||
-rw-r--r-- | lib/raid6/raid6x86.h | 61 | ||||
-rw-r--r-- | lib/raid6/unroll.awk | 20 | ||||
-rw-r--r-- | lib/random32.c | 2 | ||||
-rw-r--r-- | lib/swiotlb.c | 137 | ||||
-rw-r--r-- | lib/vsprintf.c | 9 |
22 files changed, 1734 insertions, 84 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 5b916bc0fba..fa9bf2c0619 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -7,6 +7,9 @@ config BINARY_PRINTF | |||
7 | 7 | ||
8 | menu "Library routines" | 8 | menu "Library routines" |
9 | 9 | ||
10 | config RAID6_PQ | ||
11 | tristate | ||
12 | |||
10 | config BITREVERSE | 13 | config BITREVERSE |
11 | tristate | 14 | tristate |
12 | 15 | ||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e722e9d6222..79e0dff1cdc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -76,7 +76,6 @@ config UNUSED_SYMBOLS | |||
76 | 76 | ||
77 | config DEBUG_FS | 77 | config DEBUG_FS |
78 | bool "Debug Filesystem" | 78 | bool "Debug Filesystem" |
79 | depends on SYSFS | ||
80 | help | 79 | help |
81 | debugfs is a virtual file system that kernel developers use to put | 80 | debugfs is a virtual file system that kernel developers use to put |
82 | debugging files into. Enable this option to be able to read and | 81 | debugging files into. Enable this option to be able to read and |
@@ -152,28 +151,33 @@ config DEBUG_SHIRQ | |||
152 | Drivers ought to be able to handle interrupts coming in at those | 151 | Drivers ought to be able to handle interrupts coming in at those |
153 | points; some don't and need to be caught. | 152 | points; some don't and need to be caught. |
154 | 153 | ||
155 | config DETECT_SOFTLOCKUP | 154 | config LOCKUP_DETECTOR |
156 | bool "Detect Soft Lockups" | 155 | bool "Detect Hard and Soft Lockups" |
157 | depends on DEBUG_KERNEL && !S390 | 156 | depends on DEBUG_KERNEL && !S390 |
158 | default y | ||
159 | help | 157 | help |
160 | Say Y here to enable the kernel to detect "soft lockups", | 158 | Say Y here to enable the kernel to act as a watchdog to detect |
161 | which are bugs that cause the kernel to loop in kernel | 159 | hard and soft lockups. |
160 | |||
161 | Softlockups are bugs that cause the kernel to loop in kernel | ||
162 | mode for more than 60 seconds, without giving other tasks a | 162 | mode for more than 60 seconds, without giving other tasks a |
163 | chance to run. | 163 | chance to run. The current stack trace is displayed upon |
164 | detection and the system will stay locked up. | ||
164 | 165 | ||
165 | When a soft-lockup is detected, the kernel will print the | 166 | Hardlockups are bugs that cause the CPU to loop in kernel mode |
166 | current stack trace (which you should report), but the | 167 | for more than 60 seconds, without letting other interrupts have a |
167 | system will stay locked up. This feature has negligible | 168 | chance to run. The current stack trace is displayed upon detection |
168 | overhead. | 169 | and the system will stay locked up. |
169 | 170 | ||
170 | (Note that "hard lockups" are separate type of bugs that | 171 | The overhead should be minimal. A periodic hrtimer runs to |
171 | can be detected via the NMI-watchdog, on platforms that | 172 | generate interrupts and kick the watchdog task every 10-12 seconds. |
172 | support it.) | 173 | An NMI is generated every 60 seconds or so to check for hardlockups. |
174 | |||
175 | config HARDLOCKUP_DETECTOR | ||
176 | def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI | ||
173 | 177 | ||
174 | config BOOTPARAM_SOFTLOCKUP_PANIC | 178 | config BOOTPARAM_SOFTLOCKUP_PANIC |
175 | bool "Panic (Reboot) On Soft Lockups" | 179 | bool "Panic (Reboot) On Soft Lockups" |
176 | depends on DETECT_SOFTLOCKUP | 180 | depends on LOCKUP_DETECTOR |
177 | help | 181 | help |
178 | Say Y here to enable the kernel to panic on "soft lockups", | 182 | Say Y here to enable the kernel to panic on "soft lockups", |
179 | which are bugs that cause the kernel to loop in kernel | 183 | which are bugs that cause the kernel to loop in kernel |
@@ -190,7 +194,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC | |||
190 | 194 | ||
191 | config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE | 195 | config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE |
192 | int | 196 | int |
193 | depends on DETECT_SOFTLOCKUP | 197 | depends on LOCKUP_DETECTOR |
194 | range 0 1 | 198 | range 0 1 |
195 | default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC | 199 | default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC |
196 | default 1 if BOOTPARAM_SOFTLOCKUP_PANIC | 200 | default 1 if BOOTPARAM_SOFTLOCKUP_PANIC |
@@ -307,6 +311,12 @@ config DEBUG_OBJECTS_WORK | |||
307 | work queue routines to track the life time of work objects and | 311 | work queue routines to track the life time of work objects and |
308 | validate the work operations. | 312 | validate the work operations. |
309 | 313 | ||
314 | config DEBUG_OBJECTS_RCU_HEAD | ||
315 | bool "Debug RCU callbacks objects" | ||
316 | depends on DEBUG_OBJECTS && PREEMPT | ||
317 | help | ||
318 | Enable this to turn on debugging of RCU list heads (call_rcu() usage). | ||
319 | |||
310 | config DEBUG_OBJECTS_ENABLE_DEFAULT | 320 | config DEBUG_OBJECTS_ENABLE_DEFAULT |
311 | int "debug_objects bootup default value (0-1)" | 321 | int "debug_objects bootup default value (0-1)" |
312 | range 0 1 | 322 | range 0 1 |
@@ -528,7 +538,7 @@ config LOCKDEP | |||
528 | bool | 538 | bool |
529 | depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT | 539 | depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT |
530 | select STACKTRACE | 540 | select STACKTRACE |
531 | select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 | 541 | select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE |
532 | select KALLSYMS | 542 | select KALLSYMS |
533 | select KALLSYMS_ALL | 543 | select KALLSYMS_ALL |
534 | 544 | ||
@@ -628,6 +638,19 @@ config DEBUG_INFO | |||
628 | 638 | ||
629 | If unsure, say N. | 639 | If unsure, say N. |
630 | 640 | ||
641 | config DEBUG_INFO_REDUCED | ||
642 | bool "Reduce debugging information" | ||
643 | depends on DEBUG_INFO | ||
644 | help | ||
645 | If you say Y here gcc is instructed to generate less debugging | ||
646 | information for structure types. This means that tools that | ||
647 | need full debugging information (like kgdb or systemtap) won't | ||
648 | be happy. But if you merely need debugging information to | ||
649 | resolve line numbers there is no loss. Advantage is that | ||
650 | build directory object sizes shrink dramatically over a full | ||
651 | DEBUG_INFO build and compile times are reduced too. | ||
652 | Only works with newer gcc versions. | ||
653 | |||
631 | config DEBUG_VM | 654 | config DEBUG_VM |
632 | bool "Debug VM" | 655 | bool "Debug VM" |
633 | depends on DEBUG_KERNEL | 656 | depends on DEBUG_KERNEL |
@@ -937,7 +960,7 @@ config FAIL_MAKE_REQUEST | |||
937 | Provide fault-injection capability for disk IO. | 960 | Provide fault-injection capability for disk IO. |
938 | 961 | ||
939 | config FAIL_IO_TIMEOUT | 962 | config FAIL_IO_TIMEOUT |
940 | bool "Faul-injection capability for faking disk interrupts" | 963 | bool "Fault-injection capability for faking disk interrupts" |
941 | depends on FAULT_INJECTION && BLOCK | 964 | depends on FAULT_INJECTION && BLOCK |
942 | help | 965 | help |
943 | Provide fault-injection capability on end IO handling. This | 966 | Provide fault-injection capability on end IO handling. This |
@@ -958,13 +981,13 @@ config FAULT_INJECTION_STACKTRACE_FILTER | |||
958 | depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT | 981 | depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT |
959 | depends on !X86_64 | 982 | depends on !X86_64 |
960 | select STACKTRACE | 983 | select STACKTRACE |
961 | select FRAME_POINTER if !PPC && !S390 | 984 | select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE |
962 | help | 985 | help |
963 | Provide stacktrace filter for fault-injection capabilities | 986 | Provide stacktrace filter for fault-injection capabilities |
964 | 987 | ||
965 | config LATENCYTOP | 988 | config LATENCYTOP |
966 | bool "Latency measuring infrastructure" | 989 | bool "Latency measuring infrastructure" |
967 | select FRAME_POINTER if !MIPS && !PPC && !S390 | 990 | select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE |
968 | select KALLSYMS | 991 | select KALLSYMS |
969 | select KALLSYMS_ALL | 992 | select KALLSYMS_ALL |
970 | select STACKTRACE | 993 | select STACKTRACE |
diff --git a/lib/Makefile b/lib/Makefile index 0bfabba1bb3..e6a3763b821 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -69,6 +69,7 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ | |||
69 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ | 69 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ |
70 | obj-$(CONFIG_LZO_COMPRESS) += lzo/ | 70 | obj-$(CONFIG_LZO_COMPRESS) += lzo/ |
71 | obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ | 71 | obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ |
72 | obj-$(CONFIG_RAID6_PQ) += raid6/ | ||
72 | 73 | ||
73 | lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o | 74 | lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o |
74 | lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o | 75 | lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o |
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 250ed11d3ed..44524cc8c32 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c | |||
@@ -114,7 +114,7 @@ static __init int test_atomic64(void) | |||
114 | BUG_ON(v.counter != r); | 114 | BUG_ON(v.counter != r); |
115 | 115 | ||
116 | #if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \ | 116 | #if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \ |
117 | defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) | 117 | defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM) |
118 | INIT(onestwos); | 118 | INIT(onestwos); |
119 | BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); | 119 | BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); |
120 | r -= one; | 120 | r -= one; |
diff --git a/lib/devres.c b/lib/devres.c index 49368608f98..6efddf53b90 100644 --- a/lib/devres.c +++ b/lib/devres.c | |||
@@ -328,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all); | |||
328 | * @pdev: PCI device to map IO resources for | 328 | * @pdev: PCI device to map IO resources for |
329 | * @mask: Mask of BARs to unmap and release | 329 | * @mask: Mask of BARs to unmap and release |
330 | * | 330 | * |
331 | * Unamp and release regions specified by @mask. | 331 | * Unmap and release regions specified by @mask. |
332 | */ | 332 | */ |
333 | void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) | 333 | void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) |
334 | { | 334 | { |
diff --git a/lib/ioremap.c b/lib/ioremap.c index 14c6078f17a..5730ecd3eb6 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c | |||
@@ -13,10 +13,10 @@ | |||
13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
14 | 14 | ||
15 | static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, | 15 | static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, |
16 | unsigned long end, unsigned long phys_addr, pgprot_t prot) | 16 | unsigned long end, phys_addr_t phys_addr, pgprot_t prot) |
17 | { | 17 | { |
18 | pte_t *pte; | 18 | pte_t *pte; |
19 | unsigned long pfn; | 19 | u64 pfn; |
20 | 20 | ||
21 | pfn = phys_addr >> PAGE_SHIFT; | 21 | pfn = phys_addr >> PAGE_SHIFT; |
22 | pte = pte_alloc_kernel(pmd, addr); | 22 | pte = pte_alloc_kernel(pmd, addr); |
@@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, | |||
31 | } | 31 | } |
32 | 32 | ||
33 | static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, | 33 | static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, |
34 | unsigned long end, unsigned long phys_addr, pgprot_t prot) | 34 | unsigned long end, phys_addr_t phys_addr, pgprot_t prot) |
35 | { | 35 | { |
36 | pmd_t *pmd; | 36 | pmd_t *pmd; |
37 | unsigned long next; | 37 | unsigned long next; |
@@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, | |||
49 | } | 49 | } |
50 | 50 | ||
51 | static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, | 51 | static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, |
52 | unsigned long end, unsigned long phys_addr, pgprot_t prot) | 52 | unsigned long end, phys_addr_t phys_addr, pgprot_t prot) |
53 | { | 53 | { |
54 | pud_t *pud; | 54 | pud_t *pud; |
55 | unsigned long next; | 55 | unsigned long next; |
@@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, | |||
67 | } | 67 | } |
68 | 68 | ||
69 | int ioremap_page_range(unsigned long addr, | 69 | int ioremap_page_range(unsigned long addr, |
70 | unsigned long end, unsigned long phys_addr, pgprot_t prot) | 70 | unsigned long end, phys_addr_t phys_addr, pgprot_t prot) |
71 | { | 71 | { |
72 | pgd_t *pgd; | 72 | pgd_t *pgd; |
73 | unsigned long start; | 73 | unsigned long start; |
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile new file mode 100644 index 00000000000..19bf32da644 --- /dev/null +++ b/lib/raid6/Makefile | |||
@@ -0,0 +1,78 @@ | |||
1 | obj-$(CONFIG_RAID6_PQ) += raid6_pq.o | ||
2 | |||
3 | raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ | ||
4 | raid6int1.o raid6int2.o raid6int4.o \ | ||
5 | raid6int8.o raid6int16.o raid6int32.o \ | ||
6 | raid6altivec1.o raid6altivec2.o raid6altivec4.o \ | ||
7 | raid6altivec8.o \ | ||
8 | raid6mmx.o raid6sse1.o raid6sse2.o | ||
9 | hostprogs-y += mktables | ||
10 | |||
11 | quiet_cmd_unroll = UNROLL $@ | ||
12 | cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ | ||
13 | < $< > $@ || ( rm -f $@ && exit 1 ) | ||
14 | |||
15 | ifeq ($(CONFIG_ALTIVEC),y) | ||
16 | altivec_flags := -maltivec -mabi=altivec | ||
17 | endif | ||
18 | |||
19 | targets += raid6int1.c | ||
20 | $(obj)/raid6int1.c: UNROLL := 1 | ||
21 | $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
22 | $(call if_changed,unroll) | ||
23 | |||
24 | targets += raid6int2.c | ||
25 | $(obj)/raid6int2.c: UNROLL := 2 | ||
26 | $(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
27 | $(call if_changed,unroll) | ||
28 | |||
29 | targets += raid6int4.c | ||
30 | $(obj)/raid6int4.c: UNROLL := 4 | ||
31 | $(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
32 | $(call if_changed,unroll) | ||
33 | |||
34 | targets += raid6int8.c | ||
35 | $(obj)/raid6int8.c: UNROLL := 8 | ||
36 | $(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
37 | $(call if_changed,unroll) | ||
38 | |||
39 | targets += raid6int16.c | ||
40 | $(obj)/raid6int16.c: UNROLL := 16 | ||
41 | $(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
42 | $(call if_changed,unroll) | ||
43 | |||
44 | targets += raid6int32.c | ||
45 | $(obj)/raid6int32.c: UNROLL := 32 | ||
46 | $(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE | ||
47 | $(call if_changed,unroll) | ||
48 | |||
49 | CFLAGS_raid6altivec1.o += $(altivec_flags) | ||
50 | targets += raid6altivec1.c | ||
51 | $(obj)/raid6altivec1.c: UNROLL := 1 | ||
52 | $(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
53 | $(call if_changed,unroll) | ||
54 | |||
55 | CFLAGS_raid6altivec2.o += $(altivec_flags) | ||
56 | targets += raid6altivec2.c | ||
57 | $(obj)/raid6altivec2.c: UNROLL := 2 | ||
58 | $(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
59 | $(call if_changed,unroll) | ||
60 | |||
61 | CFLAGS_raid6altivec4.o += $(altivec_flags) | ||
62 | targets += raid6altivec4.c | ||
63 | $(obj)/raid6altivec4.c: UNROLL := 4 | ||
64 | $(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
65 | $(call if_changed,unroll) | ||
66 | |||
67 | CFLAGS_raid6altivec8.o += $(altivec_flags) | ||
68 | targets += raid6altivec8.c | ||
69 | $(obj)/raid6altivec8.c: UNROLL := 8 | ||
70 | $(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE | ||
71 | $(call if_changed,unroll) | ||
72 | |||
73 | quiet_cmd_mktable = TABLE $@ | ||
74 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | ||
75 | |||
76 | targets += raid6tables.c | ||
77 | $(obj)/raid6tables.c: $(obj)/mktables FORCE | ||
78 | $(call if_changed,mktable) | ||
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c new file mode 100644 index 00000000000..3b1500843bb --- /dev/null +++ b/lib/raid6/mktables.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available under | ||
6 | * the terms of the GNU General Public License version 2 or (at your | ||
7 | * option) any later version; incorporated herein by reference. | ||
8 | * | ||
9 | * ----------------------------------------------------------------------- */ | ||
10 | |||
11 | /* | ||
12 | * mktables.c | ||
13 | * | ||
14 | * Make RAID-6 tables. This is a host user space program to be run at | ||
15 | * compile time. | ||
16 | */ | ||
17 | |||
18 | #include <stdio.h> | ||
19 | #include <string.h> | ||
20 | #include <inttypes.h> | ||
21 | #include <stdlib.h> | ||
22 | #include <time.h> | ||
23 | |||
24 | static uint8_t gfmul(uint8_t a, uint8_t b) | ||
25 | { | ||
26 | uint8_t v = 0; | ||
27 | |||
28 | while (b) { | ||
29 | if (b & 1) | ||
30 | v ^= a; | ||
31 | a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); | ||
32 | b >>= 1; | ||
33 | } | ||
34 | |||
35 | return v; | ||
36 | } | ||
37 | |||
38 | static uint8_t gfpow(uint8_t a, int b) | ||
39 | { | ||
40 | uint8_t v = 1; | ||
41 | |||
42 | b %= 255; | ||
43 | if (b < 0) | ||
44 | b += 255; | ||
45 | |||
46 | while (b) { | ||
47 | if (b & 1) | ||
48 | v = gfmul(v, a); | ||
49 | a = gfmul(a, a); | ||
50 | b >>= 1; | ||
51 | } | ||
52 | |||
53 | return v; | ||
54 | } | ||
55 | |||
56 | int main(int argc, char *argv[]) | ||
57 | { | ||
58 | int i, j, k; | ||
59 | uint8_t v; | ||
60 | uint8_t exptbl[256], invtbl[256]; | ||
61 | |||
62 | printf("#include <linux/raid/pq.h>\n"); | ||
63 | |||
64 | /* Compute multiplication table */ | ||
65 | printf("\nconst u8 __attribute__((aligned(256)))\n" | ||
66 | "raid6_gfmul[256][256] =\n" | ||
67 | "{\n"); | ||
68 | for (i = 0; i < 256; i++) { | ||
69 | printf("\t{\n"); | ||
70 | for (j = 0; j < 256; j += 8) { | ||
71 | printf("\t\t"); | ||
72 | for (k = 0; k < 8; k++) | ||
73 | printf("0x%02x,%c", gfmul(i, j + k), | ||
74 | (k == 7) ? '\n' : ' '); | ||
75 | } | ||
76 | printf("\t},\n"); | ||
77 | } | ||
78 | printf("};\n"); | ||
79 | printf("#ifdef __KERNEL__\n"); | ||
80 | printf("EXPORT_SYMBOL(raid6_gfmul);\n"); | ||
81 | printf("#endif\n"); | ||
82 | |||
83 | /* Compute power-of-2 table (exponent) */ | ||
84 | v = 1; | ||
85 | printf("\nconst u8 __attribute__((aligned(256)))\n" | ||
86 | "raid6_gfexp[256] =\n" "{\n"); | ||
87 | for (i = 0; i < 256; i += 8) { | ||
88 | printf("\t"); | ||
89 | for (j = 0; j < 8; j++) { | ||
90 | exptbl[i + j] = v; | ||
91 | printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); | ||
92 | v = gfmul(v, 2); | ||
93 | if (v == 1) | ||
94 | v = 0; /* For entry 255, not a real entry */ | ||
95 | } | ||
96 | } | ||
97 | printf("};\n"); | ||
98 | printf("#ifdef __KERNEL__\n"); | ||
99 | printf("EXPORT_SYMBOL(raid6_gfexp);\n"); | ||
100 | printf("#endif\n"); | ||
101 | |||
102 | /* Compute inverse table x^-1 == x^254 */ | ||
103 | printf("\nconst u8 __attribute__((aligned(256)))\n" | ||
104 | "raid6_gfinv[256] =\n" "{\n"); | ||
105 | for (i = 0; i < 256; i += 8) { | ||
106 | printf("\t"); | ||
107 | for (j = 0; j < 8; j++) { | ||
108 | invtbl[i + j] = v = gfpow(i + j, 254); | ||
109 | printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); | ||
110 | } | ||
111 | } | ||
112 | printf("};\n"); | ||
113 | printf("#ifdef __KERNEL__\n"); | ||
114 | printf("EXPORT_SYMBOL(raid6_gfinv);\n"); | ||
115 | printf("#endif\n"); | ||
116 | |||
117 | /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ | ||
118 | printf("\nconst u8 __attribute__((aligned(256)))\n" | ||
119 | "raid6_gfexi[256] =\n" "{\n"); | ||
120 | for (i = 0; i < 256; i += 8) { | ||
121 | printf("\t"); | ||
122 | for (j = 0; j < 8; j++) | ||
123 | printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], | ||
124 | (j == 7) ? '\n' : ' '); | ||
125 | } | ||
126 | printf("};\n"); | ||
127 | printf("#ifdef __KERNEL__\n"); | ||
128 | printf("EXPORT_SYMBOL(raid6_gfexi);\n"); | ||
129 | printf("#endif\n"); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
diff --git a/lib/raid6/raid6algos.c b/lib/raid6/raid6algos.c new file mode 100644 index 00000000000..1f8784bfd44 --- /dev/null +++ b/lib/raid6/raid6algos.c | |||
@@ -0,0 +1,154 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6algos.c | ||
15 | * | ||
16 | * Algorithm list and algorithm selection for RAID-6 | ||
17 | */ | ||
18 | |||
19 | #include <linux/raid/pq.h> | ||
20 | #include <linux/gfp.h> | ||
21 | #ifndef __KERNEL__ | ||
22 | #include <sys/mman.h> | ||
23 | #include <stdio.h> | ||
24 | #else | ||
25 | #if !RAID6_USE_EMPTY_ZERO_PAGE | ||
26 | /* In .bss so it's zeroed */ | ||
27 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | ||
28 | EXPORT_SYMBOL(raid6_empty_zero_page); | ||
29 | #endif | ||
30 | #endif | ||
31 | |||
32 | struct raid6_calls raid6_call; | ||
33 | EXPORT_SYMBOL_GPL(raid6_call); | ||
34 | |||
35 | const struct raid6_calls * const raid6_algos[] = { | ||
36 | &raid6_intx1, | ||
37 | &raid6_intx2, | ||
38 | &raid6_intx4, | ||
39 | &raid6_intx8, | ||
40 | #if defined(__ia64__) | ||
41 | &raid6_intx16, | ||
42 | &raid6_intx32, | ||
43 | #endif | ||
44 | #if defined(__i386__) && !defined(__arch_um__) | ||
45 | &raid6_mmxx1, | ||
46 | &raid6_mmxx2, | ||
47 | &raid6_sse1x1, | ||
48 | &raid6_sse1x2, | ||
49 | &raid6_sse2x1, | ||
50 | &raid6_sse2x2, | ||
51 | #endif | ||
52 | #if defined(__x86_64__) && !defined(__arch_um__) | ||
53 | &raid6_sse2x1, | ||
54 | &raid6_sse2x2, | ||
55 | &raid6_sse2x4, | ||
56 | #endif | ||
57 | #ifdef CONFIG_ALTIVEC | ||
58 | &raid6_altivec1, | ||
59 | &raid6_altivec2, | ||
60 | &raid6_altivec4, | ||
61 | &raid6_altivec8, | ||
62 | #endif | ||
63 | NULL | ||
64 | }; | ||
65 | |||
66 | #ifdef __KERNEL__ | ||
67 | #define RAID6_TIME_JIFFIES_LG2 4 | ||
68 | #else | ||
69 | /* Need more time to be stable in userspace */ | ||
70 | #define RAID6_TIME_JIFFIES_LG2 9 | ||
71 | #define time_before(x, y) ((x) < (y)) | ||
72 | #endif | ||
73 | |||
74 | /* Try to pick the best algorithm */ | ||
75 | /* This code uses the gfmul table as convenient data set to abuse */ | ||
76 | |||
77 | int __init raid6_select_algo(void) | ||
78 | { | ||
79 | const struct raid6_calls * const * algo; | ||
80 | const struct raid6_calls * best; | ||
81 | char *syndromes; | ||
82 | void *dptrs[(65536/PAGE_SIZE)+2]; | ||
83 | int i, disks; | ||
84 | unsigned long perf, bestperf; | ||
85 | int bestprefer; | ||
86 | unsigned long j0, j1; | ||
87 | |||
88 | disks = (65536/PAGE_SIZE)+2; | ||
89 | for ( i = 0 ; i < disks-2 ; i++ ) { | ||
90 | dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; | ||
91 | } | ||
92 | |||
93 | /* Normal code - use a 2-page allocation to avoid D$ conflict */ | ||
94 | syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); | ||
95 | |||
96 | if ( !syndromes ) { | ||
97 | printk("raid6: Yikes! No memory available.\n"); | ||
98 | return -ENOMEM; | ||
99 | } | ||
100 | |||
101 | dptrs[disks-2] = syndromes; | ||
102 | dptrs[disks-1] = syndromes + PAGE_SIZE; | ||
103 | |||
104 | bestperf = 0; bestprefer = 0; best = NULL; | ||
105 | |||
106 | for ( algo = raid6_algos ; *algo ; algo++ ) { | ||
107 | if ( !(*algo)->valid || (*algo)->valid() ) { | ||
108 | perf = 0; | ||
109 | |||
110 | preempt_disable(); | ||
111 | j0 = jiffies; | ||
112 | while ( (j1 = jiffies) == j0 ) | ||
113 | cpu_relax(); | ||
114 | while (time_before(jiffies, | ||
115 | j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { | ||
116 | (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); | ||
117 | perf++; | ||
118 | } | ||
119 | preempt_enable(); | ||
120 | |||
121 | if ( (*algo)->prefer > bestprefer || | ||
122 | ((*algo)->prefer == bestprefer && | ||
123 | perf > bestperf) ) { | ||
124 | best = *algo; | ||
125 | bestprefer = best->prefer; | ||
126 | bestperf = perf; | ||
127 | } | ||
128 | printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, | ||
129 | (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | if (best) { | ||
134 | printk("raid6: using algorithm %s (%ld MB/s)\n", | ||
135 | best->name, | ||
136 | (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); | ||
137 | raid6_call = *best; | ||
138 | } else | ||
139 | printk("raid6: Yikes! No algorithm found!\n"); | ||
140 | |||
141 | free_pages((unsigned long)syndromes, 1); | ||
142 | |||
143 | return best ? 0 : -EINVAL; | ||
144 | } | ||
145 | |||
146 | static void raid6_exit(void) | ||
147 | { | ||
148 | do { } while (0); | ||
149 | } | ||
150 | |||
151 | subsys_initcall(raid6_select_algo); | ||
152 | module_exit(raid6_exit); | ||
153 | MODULE_LICENSE("GPL"); | ||
154 | MODULE_DESCRIPTION("RAID6 Q-syndrome calculations"); | ||
diff --git a/lib/raid6/raid6altivec.uc b/lib/raid6/raid6altivec.uc new file mode 100644 index 00000000000..2654d5c854b --- /dev/null +++ b/lib/raid6/raid6altivec.uc | |||
@@ -0,0 +1,130 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6altivec$#.c | ||
15 | * | ||
16 | * $#-way unrolled portable integer math RAID-6 instruction set | ||
17 | * | ||
18 | * This file is postprocessed using unroll.awk | ||
19 | * | ||
20 | * <benh> hpa: in process, | ||
21 | * you can just "steal" the vec unit with enable_kernel_altivec() (but | ||
22 | * bracked this with preempt_disable/enable or in a lock) | ||
23 | */ | ||
24 | |||
25 | #include <linux/raid/pq.h> | ||
26 | |||
27 | #ifdef CONFIG_ALTIVEC | ||
28 | |||
29 | #include <altivec.h> | ||
30 | #ifdef __KERNEL__ | ||
31 | # include <asm/system.h> | ||
32 | # include <asm/cputable.h> | ||
33 | #endif | ||
34 | |||
35 | /* | ||
36 | * This is the C data type to use. We use a vector of | ||
37 | * signed char so vec_cmpgt() will generate the right | ||
38 | * instruction. | ||
39 | */ | ||
40 | |||
41 | typedef vector signed char unative_t; | ||
42 | |||
43 | #define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) | ||
44 | #define NSIZE sizeof(unative_t) | ||
45 | |||
46 | /* | ||
47 | * The SHLBYTE() operation shifts each byte left by 1, *not* | ||
48 | * rolling over into the next byte | ||
49 | */ | ||
50 | static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | ||
51 | { | ||
52 | return vec_add(v,v); | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * The MASK() operation returns 0xFF in any byte for which the high | ||
57 | * bit is 1, 0x00 for any byte for which the high bit is 0. | ||
58 | */ | ||
59 | static inline __attribute_const__ unative_t MASK(unative_t v) | ||
60 | { | ||
61 | unative_t zv = NBYTES(0); | ||
62 | |||
63 | /* vec_cmpgt returns a vector bool char; thus the need for the cast */ | ||
64 | return (unative_t)vec_cmpgt(zv, v); | ||
65 | } | ||
66 | |||
67 | |||
68 | /* This is noinline to make damned sure that gcc doesn't move any of the | ||
69 | Altivec code around the enable/disable code */ | ||
70 | static void noinline | ||
71 | raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) | ||
72 | { | ||
73 | u8 **dptr = (u8 **)ptrs; | ||
74 | u8 *p, *q; | ||
75 | int d, z, z0; | ||
76 | |||
77 | unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | ||
78 | unative_t x1d = NBYTES(0x1d); | ||
79 | |||
80 | z0 = disks - 3; /* Highest data disk */ | ||
81 | p = dptr[z0+1]; /* XOR parity */ | ||
82 | q = dptr[z0+2]; /* RS syndrome */ | ||
83 | |||
84 | for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | ||
85 | wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | ||
86 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
87 | wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | ||
88 | wp$$ = vec_xor(wp$$, wd$$); | ||
89 | w2$$ = MASK(wq$$); | ||
90 | w1$$ = SHLBYTE(wq$$); | ||
91 | w2$$ = vec_and(w2$$, x1d); | ||
92 | w1$$ = vec_xor(w1$$, w2$$); | ||
93 | wq$$ = vec_xor(w1$$, wd$$); | ||
94 | } | ||
95 | *(unative_t *)&p[d+NSIZE*$$] = wp$$; | ||
96 | *(unative_t *)&q[d+NSIZE*$$] = wq$$; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
101 | { | ||
102 | preempt_disable(); | ||
103 | enable_kernel_altivec(); | ||
104 | |||
105 | raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); | ||
106 | |||
107 | preempt_enable(); | ||
108 | } | ||
109 | |||
110 | int raid6_have_altivec(void); | ||
111 | #if $# == 1 | ||
112 | int raid6_have_altivec(void) | ||
113 | { | ||
114 | /* This assumes either all CPUs have Altivec or none does */ | ||
115 | # ifdef __KERNEL__ | ||
116 | return cpu_has_feature(CPU_FTR_ALTIVEC); | ||
117 | # else | ||
118 | return 1; | ||
119 | # endif | ||
120 | } | ||
121 | #endif | ||
122 | |||
123 | const struct raid6_calls raid6_altivec$# = { | ||
124 | raid6_altivec$#_gen_syndrome, | ||
125 | raid6_have_altivec, | ||
126 | "altivecx$#", | ||
127 | 0 | ||
128 | }; | ||
129 | |||
130 | #endif /* CONFIG_ALTIVEC */ | ||
diff --git a/lib/raid6/raid6int.uc b/lib/raid6/raid6int.uc new file mode 100644 index 00000000000..d1e276a14fa --- /dev/null +++ b/lib/raid6/raid6int.uc | |||
@@ -0,0 +1,117 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6int$#.c | ||
15 | * | ||
16 | * $#-way unrolled portable integer math RAID-6 instruction set | ||
17 | * | ||
18 | * This file is postprocessed using unroll.awk | ||
19 | */ | ||
20 | |||
21 | #include <linux/raid/pq.h> | ||
22 | |||
23 | /* | ||
24 | * This is the C data type to use | ||
25 | */ | ||
26 | |||
27 | /* Change this from BITS_PER_LONG if there is something better... */ | ||
28 | #if BITS_PER_LONG == 64 | ||
29 | # define NBYTES(x) ((x) * 0x0101010101010101UL) | ||
30 | # define NSIZE 8 | ||
31 | # define NSHIFT 3 | ||
32 | # define NSTRING "64" | ||
33 | typedef u64 unative_t; | ||
34 | #else | ||
35 | # define NBYTES(x) ((x) * 0x01010101U) | ||
36 | # define NSIZE 4 | ||
37 | # define NSHIFT 2 | ||
38 | # define NSTRING "32" | ||
39 | typedef u32 unative_t; | ||
40 | #endif | ||
41 | |||
42 | |||
43 | |||
44 | /* | ||
45 | * IA-64 wants insane amounts of unrolling. On other architectures that | ||
46 | * is just a waste of space. | ||
47 | */ | ||
48 | #if ($# <= 8) || defined(__ia64__) | ||
49 | |||
50 | |||
51 | /* | ||
52 | * These sub-operations are separate inlines since they can sometimes be | ||
53 | * specially optimized using architecture-specific hacks. | ||
54 | */ | ||
55 | |||
56 | /* | ||
57 | * The SHLBYTE() operation shifts each byte left by 1, *not* | ||
58 | * rolling over into the next byte | ||
59 | */ | ||
60 | static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | ||
61 | { | ||
62 | unative_t vv; | ||
63 | |||
64 | vv = (v << 1) & NBYTES(0xfe); | ||
65 | return vv; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * The MASK() operation returns 0xFF in any byte for which the high | ||
70 | * bit is 1, 0x00 for any byte for which the high bit is 0. | ||
71 | */ | ||
72 | static inline __attribute_const__ unative_t MASK(unative_t v) | ||
73 | { | ||
74 | unative_t vv; | ||
75 | |||
76 | vv = v & NBYTES(0x80); | ||
77 | vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ | ||
78 | return vv; | ||
79 | } | ||
80 | |||
81 | |||
82 | static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
83 | { | ||
84 | u8 **dptr = (u8 **)ptrs; | ||
85 | u8 *p, *q; | ||
86 | int d, z, z0; | ||
87 | |||
88 | unative_t wd$$, wq$$, wp$$, w1$$, w2$$; | ||
89 | |||
90 | z0 = disks - 3; /* Highest data disk */ | ||
91 | p = dptr[z0+1]; /* XOR parity */ | ||
92 | q = dptr[z0+2]; /* RS syndrome */ | ||
93 | |||
94 | for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { | ||
95 | wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; | ||
96 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
97 | wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | ||
98 | wp$$ ^= wd$$; | ||
99 | w2$$ = MASK(wq$$); | ||
100 | w1$$ = SHLBYTE(wq$$); | ||
101 | w2$$ &= NBYTES(0x1d); | ||
102 | w1$$ ^= w2$$; | ||
103 | wq$$ = w1$$ ^ wd$$; | ||
104 | } | ||
105 | *(unative_t *)&p[d+NSIZE*$$] = wp$$; | ||
106 | *(unative_t *)&q[d+NSIZE*$$] = wq$$; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | const struct raid6_calls raid6_intx$# = { | ||
111 | raid6_int$#_gen_syndrome, | ||
112 | NULL, /* always valid */ | ||
113 | "int" NSTRING "x$#", | ||
114 | 0 | ||
115 | }; | ||
116 | |||
117 | #endif | ||
diff --git a/lib/raid6/raid6mmx.c b/lib/raid6/raid6mmx.c new file mode 100644 index 00000000000..e7f6c13132b --- /dev/null +++ b/lib/raid6/raid6mmx.c | |||
@@ -0,0 +1,142 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6mmx.c | ||
15 | * | ||
16 | * MMX implementation of RAID-6 syndrome functions | ||
17 | */ | ||
18 | |||
19 | #if defined(__i386__) && !defined(__arch_um__) | ||
20 | |||
21 | #include <linux/raid/pq.h> | ||
22 | #include "raid6x86.h" | ||
23 | |||
24 | /* Shared with raid6sse1.c */ | ||
25 | const struct raid6_mmx_constants { | ||
26 | u64 x1d; | ||
27 | } raid6_mmx_constants = { | ||
28 | 0x1d1d1d1d1d1d1d1dULL, | ||
29 | }; | ||
30 | |||
31 | static int raid6_have_mmx(void) | ||
32 | { | ||
33 | /* Not really "boot_cpu" but "all_cpus" */ | ||
34 | return boot_cpu_has(X86_FEATURE_MMX); | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * Plain MMX implementation | ||
39 | */ | ||
40 | static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
41 | { | ||
42 | u8 **dptr = (u8 **)ptrs; | ||
43 | u8 *p, *q; | ||
44 | int d, z, z0; | ||
45 | |||
46 | z0 = disks - 3; /* Highest data disk */ | ||
47 | p = dptr[z0+1]; /* XOR parity */ | ||
48 | q = dptr[z0+2]; /* RS syndrome */ | ||
49 | |||
50 | kernel_fpu_begin(); | ||
51 | |||
52 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | ||
53 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | ||
54 | |||
55 | for ( d = 0 ; d < bytes ; d += 8 ) { | ||
56 | asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
57 | asm volatile("movq %mm2,%mm4"); /* Q[0] */ | ||
58 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
59 | asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); | ||
60 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
61 | asm volatile("paddb %mm4,%mm4"); | ||
62 | asm volatile("pand %mm0,%mm5"); | ||
63 | asm volatile("pxor %mm5,%mm4"); | ||
64 | asm volatile("pxor %mm5,%mm5"); | ||
65 | asm volatile("pxor %mm6,%mm2"); | ||
66 | asm volatile("pxor %mm6,%mm4"); | ||
67 | } | ||
68 | asm volatile("movq %%mm2,%0" : "=m" (p[d])); | ||
69 | asm volatile("pxor %mm2,%mm2"); | ||
70 | asm volatile("movq %%mm4,%0" : "=m" (q[d])); | ||
71 | asm volatile("pxor %mm4,%mm4"); | ||
72 | } | ||
73 | |||
74 | kernel_fpu_end(); | ||
75 | } | ||
76 | |||
77 | const struct raid6_calls raid6_mmxx1 = { | ||
78 | raid6_mmx1_gen_syndrome, | ||
79 | raid6_have_mmx, | ||
80 | "mmxx1", | ||
81 | 0 | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * Unrolled-by-2 MMX implementation | ||
86 | */ | ||
87 | static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
88 | { | ||
89 | u8 **dptr = (u8 **)ptrs; | ||
90 | u8 *p, *q; | ||
91 | int d, z, z0; | ||
92 | |||
93 | z0 = disks - 3; /* Highest data disk */ | ||
94 | p = dptr[z0+1]; /* XOR parity */ | ||
95 | q = dptr[z0+2]; /* RS syndrome */ | ||
96 | |||
97 | kernel_fpu_begin(); | ||
98 | |||
99 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | ||
100 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | ||
101 | asm volatile("pxor %mm7,%mm7"); /* Zero temp */ | ||
102 | |||
103 | for ( d = 0 ; d < bytes ; d += 16 ) { | ||
104 | asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
105 | asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); | ||
106 | asm volatile("movq %mm2,%mm4"); /* Q[0] */ | ||
107 | asm volatile("movq %mm3,%mm6"); /* Q[1] */ | ||
108 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
109 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
110 | asm volatile("pcmpgtb %mm6,%mm7"); | ||
111 | asm volatile("paddb %mm4,%mm4"); | ||
112 | asm volatile("paddb %mm6,%mm6"); | ||
113 | asm volatile("pand %mm0,%mm5"); | ||
114 | asm volatile("pand %mm0,%mm7"); | ||
115 | asm volatile("pxor %mm5,%mm4"); | ||
116 | asm volatile("pxor %mm7,%mm6"); | ||
117 | asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); | ||
118 | asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); | ||
119 | asm volatile("pxor %mm5,%mm2"); | ||
120 | asm volatile("pxor %mm7,%mm3"); | ||
121 | asm volatile("pxor %mm5,%mm4"); | ||
122 | asm volatile("pxor %mm7,%mm6"); | ||
123 | asm volatile("pxor %mm5,%mm5"); | ||
124 | asm volatile("pxor %mm7,%mm7"); | ||
125 | } | ||
126 | asm volatile("movq %%mm2,%0" : "=m" (p[d])); | ||
127 | asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); | ||
128 | asm volatile("movq %%mm4,%0" : "=m" (q[d])); | ||
129 | asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); | ||
130 | } | ||
131 | |||
132 | kernel_fpu_end(); | ||
133 | } | ||
134 | |||
135 | const struct raid6_calls raid6_mmxx2 = { | ||
136 | raid6_mmx2_gen_syndrome, | ||
137 | raid6_have_mmx, | ||
138 | "mmxx2", | ||
139 | 0 | ||
140 | }; | ||
141 | |||
142 | #endif | ||
diff --git a/lib/raid6/raid6recov.c b/lib/raid6/raid6recov.c new file mode 100644 index 00000000000..2609f00e0d6 --- /dev/null +++ b/lib/raid6/raid6recov.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6recov.c | ||
15 | * | ||
16 | * RAID-6 data recovery in dual failure mode. In single failure mode, | ||
17 | * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct | ||
18 | * the syndrome.) | ||
19 | */ | ||
20 | |||
21 | #include <linux/raid/pq.h> | ||
22 | |||
23 | /* Recover two failed data blocks. */ | ||
24 | void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | ||
25 | void **ptrs) | ||
26 | { | ||
27 | u8 *p, *q, *dp, *dq; | ||
28 | u8 px, qx, db; | ||
29 | const u8 *pbmul; /* P multiplier table for B data */ | ||
30 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
31 | |||
32 | p = (u8 *)ptrs[disks-2]; | ||
33 | q = (u8 *)ptrs[disks-1]; | ||
34 | |||
35 | /* Compute syndrome with zero for the missing data pages | ||
36 | Use the dead data pages as temporary storage for | ||
37 | delta p and delta q */ | ||
38 | dp = (u8 *)ptrs[faila]; | ||
39 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
40 | ptrs[disks-2] = dp; | ||
41 | dq = (u8 *)ptrs[failb]; | ||
42 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
43 | ptrs[disks-1] = dq; | ||
44 | |||
45 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
46 | |||
47 | /* Restore pointer table */ | ||
48 | ptrs[faila] = dp; | ||
49 | ptrs[failb] = dq; | ||
50 | ptrs[disks-2] = p; | ||
51 | ptrs[disks-1] = q; | ||
52 | |||
53 | /* Now, pick the proper data tables */ | ||
54 | pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; | ||
55 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; | ||
56 | |||
57 | /* Now do it... */ | ||
58 | while ( bytes-- ) { | ||
59 | px = *p ^ *dp; | ||
60 | qx = qmul[*q ^ *dq]; | ||
61 | *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ | ||
62 | *dp++ = db ^ px; /* Reconstructed A */ | ||
63 | p++; q++; | ||
64 | } | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(raid6_2data_recov); | ||
67 | |||
68 | /* Recover failure of one data block plus the P block */ | ||
69 | void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) | ||
70 | { | ||
71 | u8 *p, *q, *dq; | ||
72 | const u8 *qmul; /* Q multiplier table */ | ||
73 | |||
74 | p = (u8 *)ptrs[disks-2]; | ||
75 | q = (u8 *)ptrs[disks-1]; | ||
76 | |||
77 | /* Compute syndrome with zero for the missing data page | ||
78 | Use the dead data page as temporary storage for delta q */ | ||
79 | dq = (u8 *)ptrs[faila]; | ||
80 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
81 | ptrs[disks-1] = dq; | ||
82 | |||
83 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
84 | |||
85 | /* Restore pointer table */ | ||
86 | ptrs[faila] = dq; | ||
87 | ptrs[disks-1] = q; | ||
88 | |||
89 | /* Now, pick the proper data tables */ | ||
90 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
91 | |||
92 | /* Now do it... */ | ||
93 | while ( bytes-- ) { | ||
94 | *p++ ^= *dq = qmul[*q ^ *dq]; | ||
95 | q++; dq++; | ||
96 | } | ||
97 | } | ||
98 | EXPORT_SYMBOL_GPL(raid6_datap_recov); | ||
99 | |||
100 | #ifndef __KERNEL__ | ||
101 | /* Testing only */ | ||
102 | |||
103 | /* Recover two failed blocks. */ | ||
104 | void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) | ||
105 | { | ||
106 | if ( faila > failb ) { | ||
107 | int tmp = faila; | ||
108 | faila = failb; | ||
109 | failb = tmp; | ||
110 | } | ||
111 | |||
112 | if ( failb == disks-1 ) { | ||
113 | if ( faila == disks-2 ) { | ||
114 | /* P+Q failure. Just rebuild the syndrome. */ | ||
115 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
116 | } else { | ||
117 | /* data+Q failure. Reconstruct data from P, | ||
118 | then rebuild syndrome. */ | ||
119 | /* NOT IMPLEMENTED - equivalent to RAID-5 */ | ||
120 | } | ||
121 | } else { | ||
122 | if ( failb == disks-2 ) { | ||
123 | /* data+P failure. */ | ||
124 | raid6_datap_recov(disks, bytes, faila, ptrs); | ||
125 | } else { | ||
126 | /* data+data failure. */ | ||
127 | raid6_2data_recov(disks, bytes, faila, failb, ptrs); | ||
128 | } | ||
129 | } | ||
130 | } | ||
131 | |||
132 | #endif | ||
diff --git a/lib/raid6/raid6sse1.c b/lib/raid6/raid6sse1.c new file mode 100644 index 00000000000..b274dd5eab8 --- /dev/null +++ b/lib/raid6/raid6sse1.c | |||
@@ -0,0 +1,162 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6sse1.c | ||
15 | * | ||
16 | * SSE-1/MMXEXT implementation of RAID-6 syndrome functions | ||
17 | * | ||
18 | * This is really an MMX implementation, but it requires SSE-1 or | ||
19 | * AMD MMXEXT for prefetch support and a few other features. The | ||
20 | * support for nontemporal memory accesses is enough to make this | ||
21 | * worthwhile as a separate implementation. | ||
22 | */ | ||
23 | |||
24 | #if defined(__i386__) && !defined(__arch_um__) | ||
25 | |||
26 | #include <linux/raid/pq.h> | ||
27 | #include "raid6x86.h" | ||
28 | |||
29 | /* Defined in raid6mmx.c */ | ||
30 | extern const struct raid6_mmx_constants { | ||
31 | u64 x1d; | ||
32 | } raid6_mmx_constants; | ||
33 | |||
34 | static int raid6_have_sse1_or_mmxext(void) | ||
35 | { | ||
36 | /* Not really boot_cpu but "all_cpus" */ | ||
37 | return boot_cpu_has(X86_FEATURE_MMX) && | ||
38 | (boot_cpu_has(X86_FEATURE_XMM) || | ||
39 | boot_cpu_has(X86_FEATURE_MMXEXT)); | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * Plain SSE1 implementation | ||
44 | */ | ||
45 | static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
46 | { | ||
47 | u8 **dptr = (u8 **)ptrs; | ||
48 | u8 *p, *q; | ||
49 | int d, z, z0; | ||
50 | |||
51 | z0 = disks - 3; /* Highest data disk */ | ||
52 | p = dptr[z0+1]; /* XOR parity */ | ||
53 | q = dptr[z0+2]; /* RS syndrome */ | ||
54 | |||
55 | kernel_fpu_begin(); | ||
56 | |||
57 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | ||
58 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | ||
59 | |||
60 | for ( d = 0 ; d < bytes ; d += 8 ) { | ||
61 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
62 | asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
63 | asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | ||
64 | asm volatile("movq %mm2,%mm4"); /* Q[0] */ | ||
65 | asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); | ||
66 | for ( z = z0-2 ; z >= 0 ; z-- ) { | ||
67 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
68 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
69 | asm volatile("paddb %mm4,%mm4"); | ||
70 | asm volatile("pand %mm0,%mm5"); | ||
71 | asm volatile("pxor %mm5,%mm4"); | ||
72 | asm volatile("pxor %mm5,%mm5"); | ||
73 | asm volatile("pxor %mm6,%mm2"); | ||
74 | asm volatile("pxor %mm6,%mm4"); | ||
75 | asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); | ||
76 | } | ||
77 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
78 | asm volatile("paddb %mm4,%mm4"); | ||
79 | asm volatile("pand %mm0,%mm5"); | ||
80 | asm volatile("pxor %mm5,%mm4"); | ||
81 | asm volatile("pxor %mm5,%mm5"); | ||
82 | asm volatile("pxor %mm6,%mm2"); | ||
83 | asm volatile("pxor %mm6,%mm4"); | ||
84 | |||
85 | asm volatile("movntq %%mm2,%0" : "=m" (p[d])); | ||
86 | asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | ||
87 | } | ||
88 | |||
89 | asm volatile("sfence" : : : "memory"); | ||
90 | kernel_fpu_end(); | ||
91 | } | ||
92 | |||
93 | const struct raid6_calls raid6_sse1x1 = { | ||
94 | raid6_sse11_gen_syndrome, | ||
95 | raid6_have_sse1_or_mmxext, | ||
96 | "sse1x1", | ||
97 | 1 /* Has cache hints */ | ||
98 | }; | ||
99 | |||
100 | /* | ||
101 | * Unrolled-by-2 SSE1 implementation | ||
102 | */ | ||
103 | static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
104 | { | ||
105 | u8 **dptr = (u8 **)ptrs; | ||
106 | u8 *p, *q; | ||
107 | int d, z, z0; | ||
108 | |||
109 | z0 = disks - 3; /* Highest data disk */ | ||
110 | p = dptr[z0+1]; /* XOR parity */ | ||
111 | q = dptr[z0+2]; /* RS syndrome */ | ||
112 | |||
113 | kernel_fpu_begin(); | ||
114 | |||
115 | asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); | ||
116 | asm volatile("pxor %mm5,%mm5"); /* Zero temp */ | ||
117 | asm volatile("pxor %mm7,%mm7"); /* Zero temp */ | ||
118 | |||
119 | /* We uniformly assume a single prefetch covers at least 16 bytes */ | ||
120 | for ( d = 0 ; d < bytes ; d += 16 ) { | ||
121 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
122 | asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
123 | asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ | ||
124 | asm volatile("movq %mm2,%mm4"); /* Q[0] */ | ||
125 | asm volatile("movq %mm3,%mm6"); /* Q[1] */ | ||
126 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
127 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
128 | asm volatile("pcmpgtb %mm4,%mm5"); | ||
129 | asm volatile("pcmpgtb %mm6,%mm7"); | ||
130 | asm volatile("paddb %mm4,%mm4"); | ||
131 | asm volatile("paddb %mm6,%mm6"); | ||
132 | asm volatile("pand %mm0,%mm5"); | ||
133 | asm volatile("pand %mm0,%mm7"); | ||
134 | asm volatile("pxor %mm5,%mm4"); | ||
135 | asm volatile("pxor %mm7,%mm6"); | ||
136 | asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); | ||
137 | asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); | ||
138 | asm volatile("pxor %mm5,%mm2"); | ||
139 | asm volatile("pxor %mm7,%mm3"); | ||
140 | asm volatile("pxor %mm5,%mm4"); | ||
141 | asm volatile("pxor %mm7,%mm6"); | ||
142 | asm volatile("pxor %mm5,%mm5"); | ||
143 | asm volatile("pxor %mm7,%mm7"); | ||
144 | } | ||
145 | asm volatile("movntq %%mm2,%0" : "=m" (p[d])); | ||
146 | asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); | ||
147 | asm volatile("movntq %%mm4,%0" : "=m" (q[d])); | ||
148 | asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); | ||
149 | } | ||
150 | |||
151 | asm volatile("sfence" : :: "memory"); | ||
152 | kernel_fpu_end(); | ||
153 | } | ||
154 | |||
155 | const struct raid6_calls raid6_sse1x2 = { | ||
156 | raid6_sse12_gen_syndrome, | ||
157 | raid6_have_sse1_or_mmxext, | ||
158 | "sse1x2", | ||
159 | 1 /* Has cache hints */ | ||
160 | }; | ||
161 | |||
162 | #endif | ||
diff --git a/lib/raid6/raid6sse2.c b/lib/raid6/raid6sse2.c new file mode 100644 index 00000000000..6ed6c6c0389 --- /dev/null +++ b/lib/raid6/raid6sse2.c | |||
@@ -0,0 +1,262 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6sse2.c | ||
15 | * | ||
16 | * SSE-2 implementation of RAID-6 syndrome functions | ||
17 | * | ||
18 | */ | ||
19 | |||
20 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | ||
21 | |||
22 | #include <linux/raid/pq.h> | ||
23 | #include "raid6x86.h" | ||
24 | |||
25 | static const struct raid6_sse_constants { | ||
26 | u64 x1d[2]; | ||
27 | } raid6_sse_constants __attribute__((aligned(16))) = { | ||
28 | { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, | ||
29 | }; | ||
30 | |||
31 | static int raid6_have_sse2(void) | ||
32 | { | ||
33 | /* Not really boot_cpu but "all_cpus" */ | ||
34 | return boot_cpu_has(X86_FEATURE_MMX) && | ||
35 | boot_cpu_has(X86_FEATURE_FXSR) && | ||
36 | boot_cpu_has(X86_FEATURE_XMM) && | ||
37 | boot_cpu_has(X86_FEATURE_XMM2); | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | * Plain SSE2 implementation | ||
42 | */ | ||
43 | static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
44 | { | ||
45 | u8 **dptr = (u8 **)ptrs; | ||
46 | u8 *p, *q; | ||
47 | int d, z, z0; | ||
48 | |||
49 | z0 = disks - 3; /* Highest data disk */ | ||
50 | p = dptr[z0+1]; /* XOR parity */ | ||
51 | q = dptr[z0+2]; /* RS syndrome */ | ||
52 | |||
53 | kernel_fpu_begin(); | ||
54 | |||
55 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | ||
56 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | ||
57 | |||
58 | for ( d = 0 ; d < bytes ; d += 16 ) { | ||
59 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
60 | asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
61 | asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); | ||
62 | asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ | ||
63 | asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); | ||
64 | for ( z = z0-2 ; z >= 0 ; z-- ) { | ||
65 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
66 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
67 | asm volatile("paddb %xmm4,%xmm4"); | ||
68 | asm volatile("pand %xmm0,%xmm5"); | ||
69 | asm volatile("pxor %xmm5,%xmm4"); | ||
70 | asm volatile("pxor %xmm5,%xmm5"); | ||
71 | asm volatile("pxor %xmm6,%xmm2"); | ||
72 | asm volatile("pxor %xmm6,%xmm4"); | ||
73 | asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); | ||
74 | } | ||
75 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
76 | asm volatile("paddb %xmm4,%xmm4"); | ||
77 | asm volatile("pand %xmm0,%xmm5"); | ||
78 | asm volatile("pxor %xmm5,%xmm4"); | ||
79 | asm volatile("pxor %xmm5,%xmm5"); | ||
80 | asm volatile("pxor %xmm6,%xmm2"); | ||
81 | asm volatile("pxor %xmm6,%xmm4"); | ||
82 | |||
83 | asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | ||
84 | asm volatile("pxor %xmm2,%xmm2"); | ||
85 | asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | ||
86 | asm volatile("pxor %xmm4,%xmm4"); | ||
87 | } | ||
88 | |||
89 | asm volatile("sfence" : : : "memory"); | ||
90 | kernel_fpu_end(); | ||
91 | } | ||
92 | |||
93 | const struct raid6_calls raid6_sse2x1 = { | ||
94 | raid6_sse21_gen_syndrome, | ||
95 | raid6_have_sse2, | ||
96 | "sse2x1", | ||
97 | 1 /* Has cache hints */ | ||
98 | }; | ||
99 | |||
100 | /* | ||
101 | * Unrolled-by-2 SSE2 implementation | ||
102 | */ | ||
103 | static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
104 | { | ||
105 | u8 **dptr = (u8 **)ptrs; | ||
106 | u8 *p, *q; | ||
107 | int d, z, z0; | ||
108 | |||
109 | z0 = disks - 3; /* Highest data disk */ | ||
110 | p = dptr[z0+1]; /* XOR parity */ | ||
111 | q = dptr[z0+2]; /* RS syndrome */ | ||
112 | |||
113 | kernel_fpu_begin(); | ||
114 | |||
115 | asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); | ||
116 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | ||
117 | asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ | ||
118 | |||
119 | /* We uniformly assume a single prefetch covers at least 32 bytes */ | ||
120 | for ( d = 0 ; d < bytes ; d += 32 ) { | ||
121 | asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); | ||
122 | asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ | ||
123 | asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ | ||
124 | asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ | ||
125 | asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ | ||
126 | for ( z = z0-1 ; z >= 0 ; z-- ) { | ||
127 | asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); | ||
128 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
129 | asm volatile("pcmpgtb %xmm6,%xmm7"); | ||
130 | asm volatile("paddb %xmm4,%xmm4"); | ||
131 | asm volatile("paddb %xmm6,%xmm6"); | ||
132 | asm volatile("pand %xmm0,%xmm5"); | ||
133 | asm volatile("pand %xmm0,%xmm7"); | ||
134 | asm volatile("pxor %xmm5,%xmm4"); | ||
135 | asm volatile("pxor %xmm7,%xmm6"); | ||
136 | asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); | ||
137 | asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); | ||
138 | asm volatile("pxor %xmm5,%xmm2"); | ||
139 | asm volatile("pxor %xmm7,%xmm3"); | ||
140 | asm volatile("pxor %xmm5,%xmm4"); | ||
141 | asm volatile("pxor %xmm7,%xmm6"); | ||
142 | asm volatile("pxor %xmm5,%xmm5"); | ||
143 | asm volatile("pxor %xmm7,%xmm7"); | ||
144 | } | ||
145 | asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | ||
146 | asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | ||
147 | asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | ||
148 | asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | ||
149 | } | ||
150 | |||
151 | asm volatile("sfence" : : : "memory"); | ||
152 | kernel_fpu_end(); | ||
153 | } | ||
154 | |||
155 | const struct raid6_calls raid6_sse2x2 = { | ||
156 | raid6_sse22_gen_syndrome, | ||
157 | raid6_have_sse2, | ||
158 | "sse2x2", | ||
159 | 1 /* Has cache hints */ | ||
160 | }; | ||
161 | |||
162 | #endif | ||
163 | |||
164 | #if defined(__x86_64__) && !defined(__arch_um__) | ||
165 | |||
166 | /* | ||
167 | * Unrolled-by-4 SSE2 implementation | ||
168 | */ | ||
169 | static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
170 | { | ||
171 | u8 **dptr = (u8 **)ptrs; | ||
172 | u8 *p, *q; | ||
173 | int d, z, z0; | ||
174 | |||
175 | z0 = disks - 3; /* Highest data disk */ | ||
176 | p = dptr[z0+1]; /* XOR parity */ | ||
177 | q = dptr[z0+2]; /* RS syndrome */ | ||
178 | |||
179 | kernel_fpu_begin(); | ||
180 | |||
181 | asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); | ||
182 | asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ | ||
183 | asm volatile("pxor %xmm3,%xmm3"); /* P[1] */ | ||
184 | asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */ | ||
185 | asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ | ||
186 | asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */ | ||
187 | asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ | ||
188 | asm volatile("pxor %xmm10,%xmm10"); /* P[2] */ | ||
189 | asm volatile("pxor %xmm11,%xmm11"); /* P[3] */ | ||
190 | asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */ | ||
191 | asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */ | ||
192 | asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */ | ||
193 | asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */ | ||
194 | |||
195 | for ( d = 0 ; d < bytes ; d += 64 ) { | ||
196 | for ( z = z0 ; z >= 0 ; z-- ) { | ||
197 | /* The second prefetch seems to improve performance... */ | ||
198 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); | ||
199 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); | ||
200 | asm volatile("pcmpgtb %xmm4,%xmm5"); | ||
201 | asm volatile("pcmpgtb %xmm6,%xmm7"); | ||
202 | asm volatile("pcmpgtb %xmm12,%xmm13"); | ||
203 | asm volatile("pcmpgtb %xmm14,%xmm15"); | ||
204 | asm volatile("paddb %xmm4,%xmm4"); | ||
205 | asm volatile("paddb %xmm6,%xmm6"); | ||
206 | asm volatile("paddb %xmm12,%xmm12"); | ||
207 | asm volatile("paddb %xmm14,%xmm14"); | ||
208 | asm volatile("pand %xmm0,%xmm5"); | ||
209 | asm volatile("pand %xmm0,%xmm7"); | ||
210 | asm volatile("pand %xmm0,%xmm13"); | ||
211 | asm volatile("pand %xmm0,%xmm15"); | ||
212 | asm volatile("pxor %xmm5,%xmm4"); | ||
213 | asm volatile("pxor %xmm7,%xmm6"); | ||
214 | asm volatile("pxor %xmm13,%xmm12"); | ||
215 | asm volatile("pxor %xmm15,%xmm14"); | ||
216 | asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); | ||
217 | asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); | ||
218 | asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); | ||
219 | asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); | ||
220 | asm volatile("pxor %xmm5,%xmm2"); | ||
221 | asm volatile("pxor %xmm7,%xmm3"); | ||
222 | asm volatile("pxor %xmm13,%xmm10"); | ||
223 | asm volatile("pxor %xmm15,%xmm11"); | ||
224 | asm volatile("pxor %xmm5,%xmm4"); | ||
225 | asm volatile("pxor %xmm7,%xmm6"); | ||
226 | asm volatile("pxor %xmm13,%xmm12"); | ||
227 | asm volatile("pxor %xmm15,%xmm14"); | ||
228 | asm volatile("pxor %xmm5,%xmm5"); | ||
229 | asm volatile("pxor %xmm7,%xmm7"); | ||
230 | asm volatile("pxor %xmm13,%xmm13"); | ||
231 | asm volatile("pxor %xmm15,%xmm15"); | ||
232 | } | ||
233 | asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); | ||
234 | asm volatile("pxor %xmm2,%xmm2"); | ||
235 | asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); | ||
236 | asm volatile("pxor %xmm3,%xmm3"); | ||
237 | asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); | ||
238 | asm volatile("pxor %xmm10,%xmm10"); | ||
239 | asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); | ||
240 | asm volatile("pxor %xmm11,%xmm11"); | ||
241 | asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); | ||
242 | asm volatile("pxor %xmm4,%xmm4"); | ||
243 | asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); | ||
244 | asm volatile("pxor %xmm6,%xmm6"); | ||
245 | asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); | ||
246 | asm volatile("pxor %xmm12,%xmm12"); | ||
247 | asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); | ||
248 | asm volatile("pxor %xmm14,%xmm14"); | ||
249 | } | ||
250 | |||
251 | asm volatile("sfence" : : : "memory"); | ||
252 | kernel_fpu_end(); | ||
253 | } | ||
254 | |||
255 | const struct raid6_calls raid6_sse2x4 = { | ||
256 | raid6_sse24_gen_syndrome, | ||
257 | raid6_have_sse2, | ||
258 | "sse2x4", | ||
259 | 1 /* Has cache hints */ | ||
260 | }; | ||
261 | |||
262 | #endif | ||
diff --git a/lib/raid6/raid6test/Makefile b/lib/raid6/raid6test/Makefile new file mode 100644 index 00000000000..2874cbef529 --- /dev/null +++ b/lib/raid6/raid6test/Makefile | |||
@@ -0,0 +1,75 @@ | |||
1 | # | ||
2 | # This is a simple Makefile to test some of the RAID-6 code | ||
3 | # from userspace. | ||
4 | # | ||
5 | |||
6 | CC = gcc | ||
7 | OPTFLAGS = -O2 # Adjust as desired | ||
8 | CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) | ||
9 | LD = ld | ||
10 | AWK = awk | ||
11 | AR = ar | ||
12 | RANLIB = ranlib | ||
13 | |||
14 | .c.o: | ||
15 | $(CC) $(CFLAGS) -c -o $@ $< | ||
16 | |||
17 | %.c: ../%.c | ||
18 | cp -f $< $@ | ||
19 | |||
20 | %.uc: ../%.uc | ||
21 | cp -f $< $@ | ||
22 | |||
23 | all: raid6.a raid6test | ||
24 | |||
25 | raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \ | ||
26 | raid6int32.o \ | ||
27 | raid6mmx.o raid6sse1.o raid6sse2.o \ | ||
28 | raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \ | ||
29 | raid6recov.o raid6algos.o \ | ||
30 | raid6tables.o | ||
31 | rm -f $@ | ||
32 | $(AR) cq $@ $^ | ||
33 | $(RANLIB) $@ | ||
34 | |||
35 | raid6test: test.c raid6.a | ||
36 | $(CC) $(CFLAGS) -o raid6test $^ | ||
37 | |||
38 | raid6altivec1.c: raid6altivec.uc ../unroll.awk | ||
39 | $(AWK) ../unroll.awk -vN=1 < raid6altivec.uc > $@ | ||
40 | |||
41 | raid6altivec2.c: raid6altivec.uc ../unroll.awk | ||
42 | $(AWK) ../unroll.awk -vN=2 < raid6altivec.uc > $@ | ||
43 | |||
44 | raid6altivec4.c: raid6altivec.uc ../unroll.awk | ||
45 | $(AWK) ../unroll.awk -vN=4 < raid6altivec.uc > $@ | ||
46 | |||
47 | raid6altivec8.c: raid6altivec.uc ../unroll.awk | ||
48 | $(AWK) ../unroll.awk -vN=8 < raid6altivec.uc > $@ | ||
49 | |||
50 | raid6int1.c: raid6int.uc ../unroll.awk | ||
51 | $(AWK) ../unroll.awk -vN=1 < raid6int.uc > $@ | ||
52 | |||
53 | raid6int2.c: raid6int.uc ../unroll.awk | ||
54 | $(AWK) ../unroll.awk -vN=2 < raid6int.uc > $@ | ||
55 | |||
56 | raid6int4.c: raid6int.uc ../unroll.awk | ||
57 | $(AWK) ../unroll.awk -vN=4 < raid6int.uc > $@ | ||
58 | |||
59 | raid6int8.c: raid6int.uc ../unroll.awk | ||
60 | $(AWK) ../unroll.awk -vN=8 < raid6int.uc > $@ | ||
61 | |||
62 | raid6int16.c: raid6int.uc ../unroll.awk | ||
63 | $(AWK) ../unroll.awk -vN=16 < raid6int.uc > $@ | ||
64 | |||
65 | raid6int32.c: raid6int.uc ../unroll.awk | ||
66 | $(AWK) ../unroll.awk -vN=32 < raid6int.uc > $@ | ||
67 | |||
68 | raid6tables.c: mktables | ||
69 | ./mktables > raid6tables.c | ||
70 | |||
71 | clean: | ||
72 | rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test | ||
73 | |||
74 | spotless: clean | ||
75 | rm -f *~ | ||
diff --git a/lib/raid6/raid6test/test.c b/lib/raid6/raid6test/test.c new file mode 100644 index 00000000000..7a930318b17 --- /dev/null +++ b/lib/raid6/raid6test/test.c | |||
@@ -0,0 +1,124 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available under | ||
6 | * the terms of the GNU General Public License version 2 or (at your | ||
7 | * option) any later version; incorporated herein by reference. | ||
8 | * | ||
9 | * ----------------------------------------------------------------------- */ | ||
10 | |||
11 | /* | ||
12 | * raid6test.c | ||
13 | * | ||
14 | * Test RAID-6 recovery with various algorithms | ||
15 | */ | ||
16 | |||
17 | #include <stdlib.h> | ||
18 | #include <stdio.h> | ||
19 | #include <string.h> | ||
20 | #include <linux/raid/pq.h> | ||
21 | |||
22 | #define NDISKS 16 /* Including P and Q */ | ||
23 | |||
24 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | ||
25 | struct raid6_calls raid6_call; | ||
26 | |||
27 | char *dataptrs[NDISKS]; | ||
28 | char data[NDISKS][PAGE_SIZE]; | ||
29 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | ||
30 | |||
31 | static void makedata(void) | ||
32 | { | ||
33 | int i, j; | ||
34 | |||
35 | for (i = 0; i < NDISKS; i++) { | ||
36 | for (j = 0; j < PAGE_SIZE; j++) | ||
37 | data[i][j] = rand(); | ||
38 | |||
39 | dataptrs[i] = data[i]; | ||
40 | } | ||
41 | } | ||
42 | |||
43 | static char disk_type(int d) | ||
44 | { | ||
45 | switch (d) { | ||
46 | case NDISKS-2: | ||
47 | return 'P'; | ||
48 | case NDISKS-1: | ||
49 | return 'Q'; | ||
50 | default: | ||
51 | return 'D'; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | static int test_disks(int i, int j) | ||
56 | { | ||
57 | int erra, errb; | ||
58 | |||
59 | memset(recovi, 0xf0, PAGE_SIZE); | ||
60 | memset(recovj, 0xba, PAGE_SIZE); | ||
61 | |||
62 | dataptrs[i] = recovi; | ||
63 | dataptrs[j] = recovj; | ||
64 | |||
65 | raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); | ||
66 | |||
67 | erra = memcmp(data[i], recovi, PAGE_SIZE); | ||
68 | errb = memcmp(data[j], recovj, PAGE_SIZE); | ||
69 | |||
70 | if (i < NDISKS-2 && j == NDISKS-1) { | ||
71 | /* We don't implement the DQ failure scenario, since it's | ||
72 | equivalent to a RAID-5 failure (XOR, then recompute Q) */ | ||
73 | erra = errb = 0; | ||
74 | } else { | ||
75 | printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", | ||
76 | raid6_call.name, | ||
77 | i, disk_type(i), | ||
78 | j, disk_type(j), | ||
79 | (!erra && !errb) ? "OK" : | ||
80 | !erra ? "ERRB" : | ||
81 | !errb ? "ERRA" : "ERRAB"); | ||
82 | } | ||
83 | |||
84 | dataptrs[i] = data[i]; | ||
85 | dataptrs[j] = data[j]; | ||
86 | |||
87 | return erra || errb; | ||
88 | } | ||
89 | |||
90 | int main(int argc, char *argv[]) | ||
91 | { | ||
92 | const struct raid6_calls *const *algo; | ||
93 | int i, j; | ||
94 | int err = 0; | ||
95 | |||
96 | makedata(); | ||
97 | |||
98 | for (algo = raid6_algos; *algo; algo++) { | ||
99 | if (!(*algo)->valid || (*algo)->valid()) { | ||
100 | raid6_call = **algo; | ||
101 | |||
102 | /* Nuke syndromes */ | ||
103 | memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); | ||
104 | |||
105 | /* Generate assumed good syndrome */ | ||
106 | raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, | ||
107 | (void **)&dataptrs); | ||
108 | |||
109 | for (i = 0; i < NDISKS-1; i++) | ||
110 | for (j = i+1; j < NDISKS; j++) | ||
111 | err += test_disks(i, j); | ||
112 | } | ||
113 | printf("\n"); | ||
114 | } | ||
115 | |||
116 | printf("\n"); | ||
117 | /* Pick the best algorithm test */ | ||
118 | raid6_select_algo(); | ||
119 | |||
120 | if (err) | ||
121 | printf("\n*** ERRORS FOUND ***\n"); | ||
122 | |||
123 | return err; | ||
124 | } | ||
diff --git a/lib/raid6/raid6x86.h b/lib/raid6/raid6x86.h new file mode 100644 index 00000000000..4c22c156855 --- /dev/null +++ b/lib/raid6/raid6x86.h | |||
@@ -0,0 +1,61 @@ | |||
1 | /* ----------------------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
8 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * raid6x86.h | ||
15 | * | ||
16 | * Definitions common to x86 and x86-64 RAID-6 code only | ||
17 | */ | ||
18 | |||
19 | #ifndef LINUX_RAID_RAID6X86_H | ||
20 | #define LINUX_RAID_RAID6X86_H | ||
21 | |||
22 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | ||
23 | |||
24 | #ifdef __KERNEL__ /* Real code */ | ||
25 | |||
26 | #include <asm/i387.h> | ||
27 | |||
28 | #else /* Dummy code for user space testing */ | ||
29 | |||
30 | static inline void kernel_fpu_begin(void) | ||
31 | { | ||
32 | } | ||
33 | |||
34 | static inline void kernel_fpu_end(void) | ||
35 | { | ||
36 | } | ||
37 | |||
38 | #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ | ||
39 | #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions | ||
40 | * (fast save and restore) */ | ||
41 | #define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ | ||
42 | #define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ | ||
43 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | ||
44 | |||
45 | /* Should work well enough on modern CPUs for testing */ | ||
46 | static inline int boot_cpu_has(int flag) | ||
47 | { | ||
48 | u32 eax = (flag >> 5) ? 0x80000001 : 1; | ||
49 | u32 edx; | ||
50 | |||
51 | asm volatile("cpuid" | ||
52 | : "+a" (eax), "=d" (edx) | ||
53 | : : "ecx", "ebx"); | ||
54 | |||
55 | return (edx >> (flag & 31)) & 1; | ||
56 | } | ||
57 | |||
58 | #endif /* ndef __KERNEL__ */ | ||
59 | |||
60 | #endif | ||
61 | #endif | ||
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk new file mode 100644 index 00000000000..c6aa03631df --- /dev/null +++ b/lib/raid6/unroll.awk | |||
@@ -0,0 +1,20 @@ | |||
1 | |||
2 | # This filter requires one command line option of form -vN=n | ||
3 | # where n must be a decimal number. | ||
4 | # | ||
5 | # Repeat each input line containing $$ n times, replacing $$ with 0...n-1. | ||
6 | # Replace each $# with n, and each $* with a single $. | ||
7 | |||
8 | BEGIN { | ||
9 | n = N + 0 | ||
10 | } | ||
11 | { | ||
12 | if (/\$\$/) { rep = n } else { rep = 1 } | ||
13 | for (i = 0; i < rep; ++i) { | ||
14 | tmp = $0 | ||
15 | gsub(/\$\$/, i, tmp) | ||
16 | gsub(/\$\#/, n, tmp) | ||
17 | gsub(/\$\*/, "$", tmp) | ||
18 | print tmp | ||
19 | } | ||
20 | } | ||
diff --git a/lib/random32.c b/lib/random32.c index 870dc3fc0f0..fc3545a3277 100644 --- a/lib/random32.c +++ b/lib/random32.c | |||
@@ -127,7 +127,7 @@ core_initcall(random32_init); | |||
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Generate better values after random number generator | 129 | * Generate better values after random number generator |
130 | * is fully initalized. | 130 | * is fully initialized. |
131 | */ | 131 | */ |
132 | static int __init random32_reseed(void) | 132 | static int __init random32_reseed(void) |
133 | { | 133 | { |
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index a009055140e..34e3082632d 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
@@ -50,19 +50,11 @@ | |||
50 | */ | 50 | */ |
51 | #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) | 51 | #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) |
52 | 52 | ||
53 | /* | ||
54 | * Enumeration for sync targets | ||
55 | */ | ||
56 | enum dma_sync_target { | ||
57 | SYNC_FOR_CPU = 0, | ||
58 | SYNC_FOR_DEVICE = 1, | ||
59 | }; | ||
60 | |||
61 | int swiotlb_force; | 53 | int swiotlb_force; |
62 | 54 | ||
63 | /* | 55 | /* |
64 | * Used to do a quick range check in unmap_single and | 56 | * Used to do a quick range check in swiotlb_tbl_unmap_single and |
65 | * sync_single_*, to see if the memory was in fact allocated by this | 57 | * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this |
66 | * API. | 58 | * API. |
67 | */ | 59 | */ |
68 | static char *io_tlb_start, *io_tlb_end; | 60 | static char *io_tlb_start, *io_tlb_end; |
@@ -140,28 +132,14 @@ void swiotlb_print_info(void) | |||
140 | (unsigned long long)pend); | 132 | (unsigned long long)pend); |
141 | } | 133 | } |
142 | 134 | ||
143 | /* | 135 | void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) |
144 | * Statically reserve bounce buffer space and initialize bounce buffer data | ||
145 | * structures for the software IO TLB used to implement the DMA API. | ||
146 | */ | ||
147 | void __init | ||
148 | swiotlb_init_with_default_size(size_t default_size, int verbose) | ||
149 | { | 136 | { |
150 | unsigned long i, bytes; | 137 | unsigned long i, bytes; |
151 | 138 | ||
152 | if (!io_tlb_nslabs) { | 139 | bytes = nslabs << IO_TLB_SHIFT; |
153 | io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); | ||
154 | io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); | ||
155 | } | ||
156 | |||
157 | bytes = io_tlb_nslabs << IO_TLB_SHIFT; | ||
158 | 140 | ||
159 | /* | 141 | io_tlb_nslabs = nslabs; |
160 | * Get IO TLB memory from the low pages | 142 | io_tlb_start = tlb; |
161 | */ | ||
162 | io_tlb_start = alloc_bootmem_low_pages(bytes); | ||
163 | if (!io_tlb_start) | ||
164 | panic("Cannot allocate SWIOTLB buffer"); | ||
165 | io_tlb_end = io_tlb_start + bytes; | 143 | io_tlb_end = io_tlb_start + bytes; |
166 | 144 | ||
167 | /* | 145 | /* |
@@ -185,6 +163,32 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) | |||
185 | swiotlb_print_info(); | 163 | swiotlb_print_info(); |
186 | } | 164 | } |
187 | 165 | ||
166 | /* | ||
167 | * Statically reserve bounce buffer space and initialize bounce buffer data | ||
168 | * structures for the software IO TLB used to implement the DMA API. | ||
169 | */ | ||
170 | void __init | ||
171 | swiotlb_init_with_default_size(size_t default_size, int verbose) | ||
172 | { | ||
173 | unsigned long bytes; | ||
174 | |||
175 | if (!io_tlb_nslabs) { | ||
176 | io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); | ||
177 | io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); | ||
178 | } | ||
179 | |||
180 | bytes = io_tlb_nslabs << IO_TLB_SHIFT; | ||
181 | |||
182 | /* | ||
183 | * Get IO TLB memory from the low pages | ||
184 | */ | ||
185 | io_tlb_start = alloc_bootmem_low_pages(bytes); | ||
186 | if (!io_tlb_start) | ||
187 | panic("Cannot allocate SWIOTLB buffer"); | ||
188 | |||
189 | swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); | ||
190 | } | ||
191 | |||
188 | void __init | 192 | void __init |
189 | swiotlb_init(int verbose) | 193 | swiotlb_init(int verbose) |
190 | { | 194 | { |
@@ -323,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr) | |||
323 | /* | 327 | /* |
324 | * Bounce: copy the swiotlb buffer back to the original dma location | 328 | * Bounce: copy the swiotlb buffer back to the original dma location |
325 | */ | 329 | */ |
326 | static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, | 330 | void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, |
327 | enum dma_data_direction dir) | 331 | enum dma_data_direction dir) |
328 | { | 332 | { |
329 | unsigned long pfn = PFN_DOWN(phys); | 333 | unsigned long pfn = PFN_DOWN(phys); |
330 | 334 | ||
@@ -360,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, | |||
360 | memcpy(phys_to_virt(phys), dma_addr, size); | 364 | memcpy(phys_to_virt(phys), dma_addr, size); |
361 | } | 365 | } |
362 | } | 366 | } |
367 | EXPORT_SYMBOL_GPL(swiotlb_bounce); | ||
363 | 368 | ||
364 | /* | 369 | void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, |
365 | * Allocates bounce buffer and returns its kernel virtual address. | 370 | phys_addr_t phys, size_t size, |
366 | */ | 371 | enum dma_data_direction dir) |
367 | static void * | ||
368 | map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) | ||
369 | { | 372 | { |
370 | unsigned long flags; | 373 | unsigned long flags; |
371 | char *dma_addr; | 374 | char *dma_addr; |
372 | unsigned int nslots, stride, index, wrap; | 375 | unsigned int nslots, stride, index, wrap; |
373 | int i; | 376 | int i; |
374 | unsigned long start_dma_addr; | ||
375 | unsigned long mask; | 377 | unsigned long mask; |
376 | unsigned long offset_slots; | 378 | unsigned long offset_slots; |
377 | unsigned long max_slots; | 379 | unsigned long max_slots; |
378 | 380 | ||
379 | mask = dma_get_seg_boundary(hwdev); | 381 | mask = dma_get_seg_boundary(hwdev); |
380 | start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask; | ||
381 | 382 | ||
382 | offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; | 383 | tbl_dma_addr &= mask; |
384 | |||
385 | offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; | ||
383 | 386 | ||
384 | /* | 387 | /* |
385 | * Carefully handle integer overflow which can occur when mask == ~0UL. | 388 | * Carefully handle integer overflow which can occur when mask == ~0UL. |
@@ -466,12 +469,27 @@ found: | |||
466 | 469 | ||
467 | return dma_addr; | 470 | return dma_addr; |
468 | } | 471 | } |
472 | EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); | ||
473 | |||
474 | /* | ||
475 | * Allocates bounce buffer and returns its kernel virtual address. | ||
476 | */ | ||
477 | |||
478 | static void * | ||
479 | map_single(struct device *hwdev, phys_addr_t phys, size_t size, | ||
480 | enum dma_data_direction dir) | ||
481 | { | ||
482 | dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); | ||
483 | |||
484 | return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); | ||
485 | } | ||
469 | 486 | ||
470 | /* | 487 | /* |
471 | * dma_addr is the kernel virtual address of the bounce buffer to unmap. | 488 | * dma_addr is the kernel virtual address of the bounce buffer to unmap. |
472 | */ | 489 | */ |
473 | static void | 490 | void |
474 | do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) | 491 | swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, |
492 | enum dma_data_direction dir) | ||
475 | { | 493 | { |
476 | unsigned long flags; | 494 | unsigned long flags; |
477 | int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; | 495 | int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
@@ -509,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) | |||
509 | } | 527 | } |
510 | spin_unlock_irqrestore(&io_tlb_lock, flags); | 528 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
511 | } | 529 | } |
530 | EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); | ||
512 | 531 | ||
513 | static void | 532 | void |
514 | sync_single(struct device *hwdev, char *dma_addr, size_t size, | 533 | swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, |
515 | int dir, int target) | 534 | enum dma_data_direction dir, |
535 | enum dma_sync_target target) | ||
516 | { | 536 | { |
517 | int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; | 537 | int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; |
518 | phys_addr_t phys = io_tlb_orig_addr[index]; | 538 | phys_addr_t phys = io_tlb_orig_addr[index]; |
@@ -536,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, | |||
536 | BUG(); | 556 | BUG(); |
537 | } | 557 | } |
538 | } | 558 | } |
559 | EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); | ||
539 | 560 | ||
540 | void * | 561 | void * |
541 | swiotlb_alloc_coherent(struct device *hwdev, size_t size, | 562 | swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
@@ -559,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
559 | } | 580 | } |
560 | if (!ret) { | 581 | if (!ret) { |
561 | /* | 582 | /* |
562 | * We are either out of memory or the device can't DMA | 583 | * We are either out of memory or the device can't DMA to |
563 | * to GFP_DMA memory; fall back on map_single(), which | 584 | * GFP_DMA memory; fall back on map_single(), which |
564 | * will grab memory from the lowest available address range. | 585 | * will grab memory from the lowest available address range. |
565 | */ | 586 | */ |
566 | ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); | 587 | ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); |
@@ -578,7 +599,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
578 | (unsigned long long)dev_addr); | 599 | (unsigned long long)dev_addr); |
579 | 600 | ||
580 | /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ | 601 | /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ |
581 | do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); | 602 | swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); |
582 | return NULL; | 603 | return NULL; |
583 | } | 604 | } |
584 | *dma_handle = dev_addr; | 605 | *dma_handle = dev_addr; |
@@ -596,13 +617,14 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, | |||
596 | if (!is_swiotlb_buffer(paddr)) | 617 | if (!is_swiotlb_buffer(paddr)) |
597 | free_pages((unsigned long)vaddr, get_order(size)); | 618 | free_pages((unsigned long)vaddr, get_order(size)); |
598 | else | 619 | else |
599 | /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ | 620 | /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ |
600 | do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); | 621 | swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); |
601 | } | 622 | } |
602 | EXPORT_SYMBOL(swiotlb_free_coherent); | 623 | EXPORT_SYMBOL(swiotlb_free_coherent); |
603 | 624 | ||
604 | static void | 625 | static void |
605 | swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) | 626 | swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, |
627 | int do_panic) | ||
606 | { | 628 | { |
607 | /* | 629 | /* |
608 | * Ran out of IOMMU space for this operation. This is very bad. | 630 | * Ran out of IOMMU space for this operation. This is very bad. |
@@ -680,14 +702,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); | |||
680 | * whatever the device wrote there. | 702 | * whatever the device wrote there. |
681 | */ | 703 | */ |
682 | static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, | 704 | static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, |
683 | size_t size, int dir) | 705 | size_t size, enum dma_data_direction dir) |
684 | { | 706 | { |
685 | phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); | 707 | phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
686 | 708 | ||
687 | BUG_ON(dir == DMA_NONE); | 709 | BUG_ON(dir == DMA_NONE); |
688 | 710 | ||
689 | if (is_swiotlb_buffer(paddr)) { | 711 | if (is_swiotlb_buffer(paddr)) { |
690 | do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); | 712 | swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); |
691 | return; | 713 | return; |
692 | } | 714 | } |
693 | 715 | ||
@@ -723,14 +745,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page); | |||
723 | */ | 745 | */ |
724 | static void | 746 | static void |
725 | swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, | 747 | swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, |
726 | size_t size, int dir, int target) | 748 | size_t size, enum dma_data_direction dir, |
749 | enum dma_sync_target target) | ||
727 | { | 750 | { |
728 | phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); | 751 | phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
729 | 752 | ||
730 | BUG_ON(dir == DMA_NONE); | 753 | BUG_ON(dir == DMA_NONE); |
731 | 754 | ||
732 | if (is_swiotlb_buffer(paddr)) { | 755 | if (is_swiotlb_buffer(paddr)) { |
733 | sync_single(hwdev, phys_to_virt(paddr), size, dir, target); | 756 | swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, |
757 | target); | ||
734 | return; | 758 | return; |
735 | } | 759 | } |
736 | 760 | ||
@@ -809,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs); | |||
809 | 833 | ||
810 | int | 834 | int |
811 | swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, | 835 | swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, |
812 | int dir) | 836 | enum dma_data_direction dir) |
813 | { | 837 | { |
814 | return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); | 838 | return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); |
815 | } | 839 | } |
@@ -836,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); | |||
836 | 860 | ||
837 | void | 861 | void |
838 | swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, | 862 | swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, |
839 | int dir) | 863 | enum dma_data_direction dir) |
840 | { | 864 | { |
841 | return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); | 865 | return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); |
842 | } | 866 | } |
@@ -851,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg); | |||
851 | */ | 875 | */ |
852 | static void | 876 | static void |
853 | swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, | 877 | swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, |
854 | int nelems, int dir, int target) | 878 | int nelems, enum dma_data_direction dir, |
879 | enum dma_sync_target target) | ||
855 | { | 880 | { |
856 | struct scatterlist *sg; | 881 | struct scatterlist *sg; |
857 | int i; | 882 | int i; |
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index b8a2f549ab0..4ee19d0d391 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c | |||
@@ -980,6 +980,11 @@ char *uuid_string(char *buf, char *end, const u8 *addr, | |||
980 | * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] | 980 | * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] |
981 | * little endian output byte order is: | 981 | * little endian output byte order is: |
982 | * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] | 982 | * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] |
983 | * - 'V' For a struct va_format which contains a format string * and va_list *, | ||
984 | * call vsnprintf(->format, *->va_list). | ||
985 | * Implements a "recursive vsnprintf". | ||
986 | * Do not use this feature without some mechanism to verify the | ||
987 | * correctness of the format string and va_list arguments. | ||
983 | * | 988 | * |
984 | * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 | 989 | * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 |
985 | * function pointers are really function descriptors, which contain a | 990 | * function pointers are really function descriptors, which contain a |
@@ -1025,6 +1030,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, | |||
1025 | break; | 1030 | break; |
1026 | case 'U': | 1031 | case 'U': |
1027 | return uuid_string(buf, end, ptr, spec, fmt); | 1032 | return uuid_string(buf, end, ptr, spec, fmt); |
1033 | case 'V': | ||
1034 | return buf + vsnprintf(buf, end - buf, | ||
1035 | ((struct va_format *)ptr)->fmt, | ||
1036 | *(((struct va_format *)ptr)->va)); | ||
1028 | } | 1037 | } |
1029 | spec.flags |= SMALL; | 1038 | spec.flags |= SMALL; |
1030 | if (spec.field_width == -1) { | 1039 | if (spec.field_width == -1) { |