aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig9
-rw-r--r--lib/Kconfig.debug1
-rw-r--r--lib/Kconfig.kasan8
-rw-r--r--lib/Makefile4
-rw-r--r--lib/bitmap.c30
-rw-r--r--lib/cpumask.c37
-rw-r--r--lib/devres.c28
-rw-r--r--lib/dma-debug.c2
-rw-r--r--lib/find_bit.c193
-rw-r--r--lib/find_last_bit.c49
-rw-r--r--lib/find_next_bit.c285
-rw-r--r--lib/iommu-common.c270
-rw-r--r--lib/raid6/algos.c41
-rw-r--r--lib/raid6/altivec.uc1
-rw-r--r--lib/raid6/avx2.c3
-rw-r--r--lib/raid6/int.uc41
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/neon.c1
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c227
-rw-r--r--lib/raid6/test/test.c51
-rw-r--r--lib/raid6/tilegx.uc1
-rw-r--r--lib/rhashtable.c11
-rw-r--r--lib/string_helpers.c68
-rw-r--r--lib/test-hexdump.c2
-rw-r--r--lib/vsprintf.c244
26 files changed, 1038 insertions, 573 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 87da53bb1fef..601965a948e8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -18,9 +18,8 @@ config HAVE_ARCH_BITREVERSE
18 default n 18 default n
19 depends on BITREVERSE 19 depends on BITREVERSE
20 help 20 help
21 This option provides an config for the architecture which have instruction 21 This option enables the use of hardware bit-reversal instructions on
22 can do bitreverse operation, we use the hardware instruction if the architecture 22 architectures which support such operations.
23 have this capability.
24 23
25config RATIONAL 24config RATIONAL
26 bool 25 bool
@@ -397,10 +396,6 @@ config CPUMASK_OFFSTACK
397 them on the stack. This is a bit more expensive, but avoids 396 them on the stack. This is a bit more expensive, but avoids
398 stack overflow. 397 stack overflow.
399 398
400config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
401 bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
402 depends on BROKEN
403
404config CPU_RMAP 399config CPU_RMAP
405 bool 400 bool
406 depends on SMP 401 depends on SMP
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 17670573dda8..ba2b0c87e65b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1281,6 +1281,7 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY
1281 int "How much to slow down RCU grace-period initialization" 1281 int "How much to slow down RCU grace-period initialization"
1282 range 0 5 1282 range 0 5
1283 default 3 1283 default 3
1284 depends on RCU_TORTURE_TEST_SLOW_INIT
1284 help 1285 help
1285 This option specifies the number of jiffies to wait between 1286 This option specifies the number of jiffies to wait between
1286 each rcu_node structure initialization. 1287 each rcu_node structure initialization.
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 4fecaedc80a2..777eda7d1ab4 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -10,8 +10,11 @@ config KASAN
10 help 10 help
11 Enables kernel address sanitizer - runtime memory debugger, 11 Enables kernel address sanitizer - runtime memory debugger,
12 designed to find out-of-bounds accesses and use-after-free bugs. 12 designed to find out-of-bounds accesses and use-after-free bugs.
13 This is strictly debugging feature. It consumes about 1/8 13 This is strictly a debugging feature and it requires a gcc version
14 of available memory and brings about ~x3 performance slowdown. 14 of 4.9.2 or later. Detection of out of bounds accesses to stack or
15 global variables requires gcc 5.0 or later.
16 This feature consumes about 1/8 of available memory and brings about
17 ~x3 performance slowdown.
15 For better error detection enable CONFIG_STACKTRACE, 18 For better error detection enable CONFIG_STACKTRACE,
16 and add slub_debug=U to boot cmdline. 19 and add slub_debug=U to boot cmdline.
17 20
@@ -40,6 +43,7 @@ config KASAN_INLINE
40 memory accesses. This is faster than outline (in some workloads 43 memory accesses. This is faster than outline (in some workloads
41 it gives about x2 boost over outline instrumentation), but 44 it gives about x2 boost over outline instrumentation), but
42 make kernel's .text size much bigger. 45 make kernel's .text size much bigger.
46 This requires a gcc version of 5.0 or later.
43 47
44endchoice 48endchoice
45 49
diff --git a/lib/Makefile b/lib/Makefile
index 58f74d2dd396..6c37933336a0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,7 +25,7 @@ obj-y += lockref.o
25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
26 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ 26 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
27 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ 27 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
28 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ 28 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
29 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o 29 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
30obj-y += string_helpers.o 30obj-y += string_helpers.o
31obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o 31obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
@@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
106obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o 106obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
107 107
108obj-$(CONFIG_SWIOTLB) += swiotlb.o 108obj-$(CONFIG_SWIOTLB) += swiotlb.o
109obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o 109obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
110obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 110obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
111obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o 111obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
112obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o 112obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d456f4c15a9f..64c0926f5dd8 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -42,36 +42,6 @@
42 * for the best explanations of this ordering. 42 * for the best explanations of this ordering.
43 */ 43 */
44 44
45int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
46{
47 unsigned int k, lim = bits/BITS_PER_LONG;
48 for (k = 0; k < lim; ++k)
49 if (bitmap[k])
50 return 0;
51
52 if (bits % BITS_PER_LONG)
53 if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
54 return 0;
55
56 return 1;
57}
58EXPORT_SYMBOL(__bitmap_empty);
59
60int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
61{
62 unsigned int k, lim = bits/BITS_PER_LONG;
63 for (k = 0; k < lim; ++k)
64 if (~bitmap[k])
65 return 0;
66
67 if (bits % BITS_PER_LONG)
68 if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
69 return 0;
70
71 return 1;
72}
73EXPORT_SYMBOL(__bitmap_full);
74
75int __bitmap_equal(const unsigned long *bitmap1, 45int __bitmap_equal(const unsigned long *bitmap1,
76 const unsigned long *bitmap2, unsigned int bits) 46 const unsigned long *bitmap2, unsigned int bits)
77{ 47{
diff --git a/lib/cpumask.c b/lib/cpumask.c
index b6513a9f2892..830dd5dec40f 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -5,27 +5,6 @@
5#include <linux/export.h> 5#include <linux/export.h>
6#include <linux/bootmem.h> 6#include <linux/bootmem.h>
7 7
8int __first_cpu(const cpumask_t *srcp)
9{
10 return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS));
11}
12EXPORT_SYMBOL(__first_cpu);
13
14int __next_cpu(int n, const cpumask_t *srcp)
15{
16 return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1));
17}
18EXPORT_SYMBOL(__next_cpu);
19
20#if NR_CPUS > 64
21int __next_cpu_nr(int n, const cpumask_t *srcp)
22{
23 return min_t(int, nr_cpu_ids,
24 find_next_bit(srcp->bits, nr_cpu_ids, n+1));
25}
26EXPORT_SYMBOL(__next_cpu_nr);
27#endif
28
29/** 8/**
30 * cpumask_next_and - get the next cpu in *src1p & *src2p 9 * cpumask_next_and - get the next cpu in *src1p & *src2p
31 * @n: the cpu prior to the place to search (ie. return will be > @n) 10 * @n: the cpu prior to the place to search (ie. return will be > @n)
@@ -37,10 +16,11 @@ EXPORT_SYMBOL(__next_cpu_nr);
37int cpumask_next_and(int n, const struct cpumask *src1p, 16int cpumask_next_and(int n, const struct cpumask *src1p,
38 const struct cpumask *src2p) 17 const struct cpumask *src2p)
39{ 18{
40 while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) 19 struct cpumask tmp;
41 if (cpumask_test_cpu(n, src2p)) 20
42 break; 21 if (cpumask_and(&tmp, src1p, src2p))
43 return n; 22 return cpumask_next(n, &tmp);
23 return nr_cpu_ids;
44} 24}
45EXPORT_SYMBOL(cpumask_next_and); 25EXPORT_SYMBOL(cpumask_next_and);
46 26
@@ -89,13 +69,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
89 dump_stack(); 69 dump_stack();
90 } 70 }
91#endif 71#endif
92 /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */
93 if (*mask) {
94 unsigned char *ptr = (unsigned char *)cpumask_bits(*mask);
95 unsigned int tail;
96 tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long);
97 memset(ptr + cpumask_size() - tail, 0, tail);
98 }
99 72
100 return *mask != NULL; 73 return *mask != NULL;
101} 74}
diff --git a/lib/devres.c b/lib/devres.c
index 0f1dd2e9d2c1..fbe2aac522e6 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -72,6 +72,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
72EXPORT_SYMBOL(devm_ioremap_nocache); 72EXPORT_SYMBOL(devm_ioremap_nocache);
73 73
74/** 74/**
75 * devm_ioremap_wc - Managed ioremap_wc()
76 * @dev: Generic device to remap IO address for
77 * @offset: BUS offset to map
78 * @size: Size of map
79 *
80 * Managed ioremap_wc(). Map is automatically unmapped on driver detach.
81 */
82void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
83 resource_size_t size)
84{
85 void __iomem **ptr, *addr;
86
87 ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
88 if (!ptr)
89 return NULL;
90
91 addr = ioremap_wc(offset, size);
92 if (addr) {
93 *ptr = addr;
94 devres_add(dev, ptr);
95 } else
96 devres_free(ptr);
97
98 return addr;
99}
100EXPORT_SYMBOL(devm_ioremap_wc);
101
102/**
75 * devm_iounmap - Managed iounmap() 103 * devm_iounmap - Managed iounmap()
76 * @dev: Generic device to unmap for 104 * @dev: Generic device to unmap for
77 * @addr: Address to unmap 105 * @addr: Address to unmap
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 9722bd2dbc9b..ae4b65e17e64 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -361,7 +361,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
361 unsigned int range = 0; 361 unsigned int range = 0;
362 362
363 while (range <= max_range) { 363 while (range <= max_range) {
364 entry = __hash_bucket_find(*bucket, &index, containing_match); 364 entry = __hash_bucket_find(*bucket, ref, containing_match);
365 365
366 if (entry) 366 if (entry)
367 return entry; 367 return entry;
diff --git a/lib/find_bit.c b/lib/find_bit.c
new file mode 100644
index 000000000000..18072ea9c20e
--- /dev/null
+++ b/lib/find_bit.c
@@ -0,0 +1,193 @@
1/* bit search implementation
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * Copyright (C) 2008 IBM Corporation
7 * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
8 * (Inspired by David Howell's find_next_bit implementation)
9 *
10 * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
11 * size and improve performance, 2015.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#include <linux/bitops.h>
20#include <linux/bitmap.h>
21#include <linux/export.h>
22#include <linux/kernel.h>
23
24#if !defined(find_next_bit) || !defined(find_next_zero_bit)
25
26/*
27 * This is a common helper function for find_next_bit and
28 * find_next_zero_bit. The difference is the "invert" argument, which
29 * is XORed with each fetched word before searching it for one bits.
30 */
31static unsigned long _find_next_bit(const unsigned long *addr,
32 unsigned long nbits, unsigned long start, unsigned long invert)
33{
34 unsigned long tmp;
35
36 if (!nbits || start >= nbits)
37 return nbits;
38
39 tmp = addr[start / BITS_PER_LONG] ^ invert;
40
41 /* Handle 1st word. */
42 tmp &= BITMAP_FIRST_WORD_MASK(start);
43 start = round_down(start, BITS_PER_LONG);
44
45 while (!tmp) {
46 start += BITS_PER_LONG;
47 if (start >= nbits)
48 return nbits;
49
50 tmp = addr[start / BITS_PER_LONG] ^ invert;
51 }
52
53 return min(start + __ffs(tmp), nbits);
54}
55#endif
56
57#ifndef find_next_bit
58/*
59 * Find the next set bit in a memory region.
60 */
61unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
62 unsigned long offset)
63{
64 return _find_next_bit(addr, size, offset, 0UL);
65}
66EXPORT_SYMBOL(find_next_bit);
67#endif
68
69#ifndef find_next_zero_bit
70unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
71 unsigned long offset)
72{
73 return _find_next_bit(addr, size, offset, ~0UL);
74}
75EXPORT_SYMBOL(find_next_zero_bit);
76#endif
77
78#ifndef find_first_bit
79/*
80 * Find the first set bit in a memory region.
81 */
82unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
83{
84 unsigned long idx;
85
86 for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
87 if (addr[idx])
88 return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
89 }
90
91 return size;
92}
93EXPORT_SYMBOL(find_first_bit);
94#endif
95
96#ifndef find_first_zero_bit
97/*
98 * Find the first cleared bit in a memory region.
99 */
100unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
101{
102 unsigned long idx;
103
104 for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
105 if (addr[idx] != ~0UL)
106 return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
107 }
108
109 return size;
110}
111EXPORT_SYMBOL(find_first_zero_bit);
112#endif
113
114#ifndef find_last_bit
115unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
116{
117 if (size) {
118 unsigned long val = BITMAP_LAST_WORD_MASK(size);
119 unsigned long idx = (size-1) / BITS_PER_LONG;
120
121 do {
122 val &= addr[idx];
123 if (val)
124 return idx * BITS_PER_LONG + __fls(val);
125
126 val = ~0ul;
127 } while (idx--);
128 }
129 return size;
130}
131EXPORT_SYMBOL(find_last_bit);
132#endif
133
134#ifdef __BIG_ENDIAN
135
136/* include/linux/byteorder does not support "unsigned long" type */
137static inline unsigned long ext2_swab(const unsigned long y)
138{
139#if BITS_PER_LONG == 64
140 return (unsigned long) __swab64((u64) y);
141#elif BITS_PER_LONG == 32
142 return (unsigned long) __swab32((u32) y);
143#else
144#error BITS_PER_LONG not defined
145#endif
146}
147
148#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
149static unsigned long _find_next_bit_le(const unsigned long *addr,
150 unsigned long nbits, unsigned long start, unsigned long invert)
151{
152 unsigned long tmp;
153
154 if (!nbits || start >= nbits)
155 return nbits;
156
157 tmp = addr[start / BITS_PER_LONG] ^ invert;
158
159 /* Handle 1st word. */
160 tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
161 start = round_down(start, BITS_PER_LONG);
162
163 while (!tmp) {
164 start += BITS_PER_LONG;
165 if (start >= nbits)
166 return nbits;
167
168 tmp = addr[start / BITS_PER_LONG] ^ invert;
169 }
170
171 return min(start + __ffs(ext2_swab(tmp)), nbits);
172}
173#endif
174
175#ifndef find_next_zero_bit_le
176unsigned long find_next_zero_bit_le(const void *addr, unsigned
177 long size, unsigned long offset)
178{
179 return _find_next_bit_le(addr, size, offset, ~0UL);
180}
181EXPORT_SYMBOL(find_next_zero_bit_le);
182#endif
183
184#ifndef find_next_bit_le
185unsigned long find_next_bit_le(const void *addr, unsigned
186 long size, unsigned long offset)
187{
188 return _find_next_bit_le(addr, size, offset, 0UL);
189}
190EXPORT_SYMBOL(find_next_bit_le);
191#endif
192
193#endif /* __BIG_ENDIAN */
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
deleted file mode 100644
index 91ca09fbf6f9..000000000000
--- a/lib/find_last_bit.c
+++ /dev/null
@@ -1,49 +0,0 @@
1/* find_last_bit.c: fallback find next bit implementation
2 *
3 * Copyright (C) 2008 IBM Corporation
4 * Written by Rusty Russell <rusty@rustcorp.com.au>
5 * (Inspired by David Howell's find_next_bit implementation)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/bitops.h>
14#include <linux/export.h>
15#include <asm/types.h>
16#include <asm/byteorder.h>
17
18#ifndef find_last_bit
19
20unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
21{
22 unsigned long words;
23 unsigned long tmp;
24
25 /* Start at final word. */
26 words = size / BITS_PER_LONG;
27
28 /* Partial final word? */
29 if (size & (BITS_PER_LONG-1)) {
30 tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
31 - (size & (BITS_PER_LONG-1)))));
32 if (tmp)
33 goto found;
34 }
35
36 while (words) {
37 tmp = addr[--words];
38 if (tmp) {
39found:
40 return words * BITS_PER_LONG + __fls(tmp);
41 }
42 }
43
44 /* Not found */
45 return size;
46}
47EXPORT_SYMBOL(find_last_bit);
48
49#endif
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
deleted file mode 100644
index 0cbfc0b4398f..000000000000
--- a/lib/find_next_bit.c
+++ /dev/null
@@ -1,285 +0,0 @@
1/* find_next_bit.c: fallback find next bit implementation
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/bitops.h>
13#include <linux/export.h>
14#include <asm/types.h>
15#include <asm/byteorder.h>
16
17#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
18
19#ifndef find_next_bit
20/*
21 * Find the next set bit in a memory region.
22 */
23unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
24 unsigned long offset)
25{
26 const unsigned long *p = addr + BITOP_WORD(offset);
27 unsigned long result = offset & ~(BITS_PER_LONG-1);
28 unsigned long tmp;
29
30 if (offset >= size)
31 return size;
32 size -= result;
33 offset %= BITS_PER_LONG;
34 if (offset) {
35 tmp = *(p++);
36 tmp &= (~0UL << offset);
37 if (size < BITS_PER_LONG)
38 goto found_first;
39 if (tmp)
40 goto found_middle;
41 size -= BITS_PER_LONG;
42 result += BITS_PER_LONG;
43 }
44 while (size & ~(BITS_PER_LONG-1)) {
45 if ((tmp = *(p++)))
46 goto found_middle;
47 result += BITS_PER_LONG;
48 size -= BITS_PER_LONG;
49 }
50 if (!size)
51 return result;
52 tmp = *p;
53
54found_first:
55 tmp &= (~0UL >> (BITS_PER_LONG - size));
56 if (tmp == 0UL) /* Are any bits set? */
57 return result + size; /* Nope. */
58found_middle:
59 return result + __ffs(tmp);
60}
61EXPORT_SYMBOL(find_next_bit);
62#endif
63
64#ifndef find_next_zero_bit
65/*
66 * This implementation of find_{first,next}_zero_bit was stolen from
67 * Linus' asm-alpha/bitops.h.
68 */
69unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
70 unsigned long offset)
71{
72 const unsigned long *p = addr + BITOP_WORD(offset);
73 unsigned long result = offset & ~(BITS_PER_LONG-1);
74 unsigned long tmp;
75
76 if (offset >= size)
77 return size;
78 size -= result;
79 offset %= BITS_PER_LONG;
80 if (offset) {
81 tmp = *(p++);
82 tmp |= ~0UL >> (BITS_PER_LONG - offset);
83 if (size < BITS_PER_LONG)
84 goto found_first;
85 if (~tmp)
86 goto found_middle;
87 size -= BITS_PER_LONG;
88 result += BITS_PER_LONG;
89 }
90 while (size & ~(BITS_PER_LONG-1)) {
91 if (~(tmp = *(p++)))
92 goto found_middle;
93 result += BITS_PER_LONG;
94 size -= BITS_PER_LONG;
95 }
96 if (!size)
97 return result;
98 tmp = *p;
99
100found_first:
101 tmp |= ~0UL << size;
102 if (tmp == ~0UL) /* Are any bits zero? */
103 return result + size; /* Nope. */
104found_middle:
105 return result + ffz(tmp);
106}
107EXPORT_SYMBOL(find_next_zero_bit);
108#endif
109
110#ifndef find_first_bit
111/*
112 * Find the first set bit in a memory region.
113 */
114unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
115{
116 const unsigned long *p = addr;
117 unsigned long result = 0;
118 unsigned long tmp;
119
120 while (size & ~(BITS_PER_LONG-1)) {
121 if ((tmp = *(p++)))
122 goto found;
123 result += BITS_PER_LONG;
124 size -= BITS_PER_LONG;
125 }
126 if (!size)
127 return result;
128
129 tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
130 if (tmp == 0UL) /* Are any bits set? */
131 return result + size; /* Nope. */
132found:
133 return result + __ffs(tmp);
134}
135EXPORT_SYMBOL(find_first_bit);
136#endif
137
138#ifndef find_first_zero_bit
139/*
140 * Find the first cleared bit in a memory region.
141 */
142unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
143{
144 const unsigned long *p = addr;
145 unsigned long result = 0;
146 unsigned long tmp;
147
148 while (size & ~(BITS_PER_LONG-1)) {
149 if (~(tmp = *(p++)))
150 goto found;
151 result += BITS_PER_LONG;
152 size -= BITS_PER_LONG;
153 }
154 if (!size)
155 return result;
156
157 tmp = (*p) | (~0UL << size);
158 if (tmp == ~0UL) /* Are any bits zero? */
159 return result + size; /* Nope. */
160found:
161 return result + ffz(tmp);
162}
163EXPORT_SYMBOL(find_first_zero_bit);
164#endif
165
166#ifdef __BIG_ENDIAN
167
168/* include/linux/byteorder does not support "unsigned long" type */
169static inline unsigned long ext2_swabp(const unsigned long * x)
170{
171#if BITS_PER_LONG == 64
172 return (unsigned long) __swab64p((u64 *) x);
173#elif BITS_PER_LONG == 32
174 return (unsigned long) __swab32p((u32 *) x);
175#else
176#error BITS_PER_LONG not defined
177#endif
178}
179
180/* include/linux/byteorder doesn't support "unsigned long" type */
181static inline unsigned long ext2_swab(const unsigned long y)
182{
183#if BITS_PER_LONG == 64
184 return (unsigned long) __swab64((u64) y);
185#elif BITS_PER_LONG == 32
186 return (unsigned long) __swab32((u32) y);
187#else
188#error BITS_PER_LONG not defined
189#endif
190}
191
192#ifndef find_next_zero_bit_le
193unsigned long find_next_zero_bit_le(const void *addr, unsigned
194 long size, unsigned long offset)
195{
196 const unsigned long *p = addr;
197 unsigned long result = offset & ~(BITS_PER_LONG - 1);
198 unsigned long tmp;
199
200 if (offset >= size)
201 return size;
202 p += BITOP_WORD(offset);
203 size -= result;
204 offset &= (BITS_PER_LONG - 1UL);
205 if (offset) {
206 tmp = ext2_swabp(p++);
207 tmp |= (~0UL >> (BITS_PER_LONG - offset));
208 if (size < BITS_PER_LONG)
209 goto found_first;
210 if (~tmp)
211 goto found_middle;
212 size -= BITS_PER_LONG;
213 result += BITS_PER_LONG;
214 }
215
216 while (size & ~(BITS_PER_LONG - 1)) {
217 if (~(tmp = *(p++)))
218 goto found_middle_swap;
219 result += BITS_PER_LONG;
220 size -= BITS_PER_LONG;
221 }
222 if (!size)
223 return result;
224 tmp = ext2_swabp(p);
225found_first:
226 tmp |= ~0UL << size;
227 if (tmp == ~0UL) /* Are any bits zero? */
228 return result + size; /* Nope. Skip ffz */
229found_middle:
230 return result + ffz(tmp);
231
232found_middle_swap:
233 return result + ffz(ext2_swab(tmp));
234}
235EXPORT_SYMBOL(find_next_zero_bit_le);
236#endif
237
238#ifndef find_next_bit_le
239unsigned long find_next_bit_le(const void *addr, unsigned
240 long size, unsigned long offset)
241{
242 const unsigned long *p = addr;
243 unsigned long result = offset & ~(BITS_PER_LONG - 1);
244 unsigned long tmp;
245
246 if (offset >= size)
247 return size;
248 p += BITOP_WORD(offset);
249 size -= result;
250 offset &= (BITS_PER_LONG - 1UL);
251 if (offset) {
252 tmp = ext2_swabp(p++);
253 tmp &= (~0UL << offset);
254 if (size < BITS_PER_LONG)
255 goto found_first;
256 if (tmp)
257 goto found_middle;
258 size -= BITS_PER_LONG;
259 result += BITS_PER_LONG;
260 }
261
262 while (size & ~(BITS_PER_LONG - 1)) {
263 tmp = *(p++);
264 if (tmp)
265 goto found_middle_swap;
266 result += BITS_PER_LONG;
267 size -= BITS_PER_LONG;
268 }
269 if (!size)
270 return result;
271 tmp = ext2_swabp(p);
272found_first:
273 tmp &= (~0UL >> (BITS_PER_LONG - size));
274 if (tmp == 0UL) /* Are any bits set? */
275 return result + size; /* Nope. */
276found_middle:
277 return result + __ffs(tmp);
278
279found_middle_swap:
280 return result + __ffs(ext2_swab(tmp));
281}
282EXPORT_SYMBOL(find_next_bit_le);
283#endif
284
285#endif /* __BIG_ENDIAN */
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
new file mode 100644
index 000000000000..df30632f0bef
--- /dev/null
+++ b/lib/iommu-common.c
@@ -0,0 +1,270 @@
1/*
2 * IOMMU mmap management and range allocation functions.
3 * Based almost entirely upon the powerpc iommu allocator.
4 */
5
6#include <linux/export.h>
7#include <linux/bitmap.h>
8#include <linux/bug.h>
9#include <linux/iommu-helper.h>
10#include <linux/iommu-common.h>
11#include <linux/dma-mapping.h>
12#include <linux/hash.h>
13
14#ifndef DMA_ERROR_CODE
15#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
16#endif
17
18static unsigned long iommu_large_alloc = 15;
19
20static DEFINE_PER_CPU(unsigned int, iommu_hash_common);
21
22static inline bool need_flush(struct iommu_map_table *iommu)
23{
24 return (iommu->lazy_flush != NULL &&
25 (iommu->flags & IOMMU_NEED_FLUSH) != 0);
26}
27
28static inline void set_flush(struct iommu_map_table *iommu)
29{
30 iommu->flags |= IOMMU_NEED_FLUSH;
31}
32
33static inline void clear_flush(struct iommu_map_table *iommu)
34{
35 iommu->flags &= ~IOMMU_NEED_FLUSH;
36}
37
38static void setup_iommu_pool_hash(void)
39{
40 unsigned int i;
41 static bool do_once;
42
43 if (do_once)
44 return;
45 do_once = true;
46 for_each_possible_cpu(i)
47 per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
48}
49
50/*
51 * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
52 * is the number of table entries. If `large_pool' is set to true,
53 * the top 1/4 of the table will be set aside for pool allocations
54 * of more than iommu_large_alloc pages.
55 */
56void iommu_tbl_pool_init(struct iommu_map_table *iommu,
57 unsigned long num_entries,
58 u32 table_shift,
59 void (*lazy_flush)(struct iommu_map_table *),
60 bool large_pool, u32 npools,
61 bool skip_span_boundary_check)
62{
63 unsigned int start, i;
64 struct iommu_pool *p = &(iommu->large_pool);
65
66 setup_iommu_pool_hash();
67 if (npools == 0)
68 iommu->nr_pools = IOMMU_NR_POOLS;
69 else
70 iommu->nr_pools = npools;
71 BUG_ON(npools > IOMMU_NR_POOLS);
72
73 iommu->table_shift = table_shift;
74 iommu->lazy_flush = lazy_flush;
75 start = 0;
76 if (skip_span_boundary_check)
77 iommu->flags |= IOMMU_NO_SPAN_BOUND;
78 if (large_pool)
79 iommu->flags |= IOMMU_HAS_LARGE_POOL;
80
81 if (!large_pool)
82 iommu->poolsize = num_entries/iommu->nr_pools;
83 else
84 iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
85 for (i = 0; i < iommu->nr_pools; i++) {
86 spin_lock_init(&(iommu->pools[i].lock));
87 iommu->pools[i].start = start;
88 iommu->pools[i].hint = start;
89 start += iommu->poolsize; /* start for next pool */
90 iommu->pools[i].end = start - 1;
91 }
92 if (!large_pool)
93 return;
94 /* initialize large_pool */
95 spin_lock_init(&(p->lock));
96 p->start = start;
97 p->hint = p->start;
98 p->end = num_entries;
99}
100EXPORT_SYMBOL(iommu_tbl_pool_init);
101
102unsigned long iommu_tbl_range_alloc(struct device *dev,
103 struct iommu_map_table *iommu,
104 unsigned long npages,
105 unsigned long *handle,
106 unsigned long mask,
107 unsigned int align_order)
108{
109 unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
110 unsigned long n, end, start, limit, boundary_size;
111 struct iommu_pool *pool;
112 int pass = 0;
113 unsigned int pool_nr;
114 unsigned int npools = iommu->nr_pools;
115 unsigned long flags;
116 bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
117 bool largealloc = (large_pool && npages > iommu_large_alloc);
118 unsigned long shift;
119 unsigned long align_mask = 0;
120
121 if (align_order > 0)
122 align_mask = 0xffffffffffffffffl >> (64 - align_order);
123
124 /* Sanity check */
125 if (unlikely(npages == 0)) {
126 WARN_ON_ONCE(1);
127 return DMA_ERROR_CODE;
128 }
129
130 if (largealloc) {
131 pool = &(iommu->large_pool);
132 pool_nr = 0; /* to keep compiler happy */
133 } else {
134 /* pick out pool_nr */
135 pool_nr = pool_hash & (npools - 1);
136 pool = &(iommu->pools[pool_nr]);
137 }
138 spin_lock_irqsave(&pool->lock, flags);
139
140 again:
141 if (pass == 0 && handle && *handle &&
142 (*handle >= pool->start) && (*handle < pool->end))
143 start = *handle;
144 else
145 start = pool->hint;
146
147 limit = pool->end;
148
149 /* The case below can happen if we have a small segment appended
150 * to a large, or when the previous alloc was at the very end of
151 * the available space. If so, go back to the beginning. If a
152 * flush is needed, it will get done based on the return value
153 * from iommu_area_alloc() below.
154 */
155 if (start >= limit)
156 start = pool->start;
157 shift = iommu->table_map_base >> iommu->table_shift;
158 if (limit + shift > mask) {
159 limit = mask - shift + 1;
160 /* If we're constrained on address range, first try
161 * at the masked hint to avoid O(n) search complexity,
162 * but on second pass, start at 0 in pool 0.
163 */
164 if ((start & mask) >= limit || pass > 0) {
165 spin_unlock(&(pool->lock));
166 pool = &(iommu->pools[0]);
167 spin_lock(&(pool->lock));
168 start = pool->start;
169 } else {
170 start &= mask;
171 }
172 }
173
174 if (dev)
175 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
176 1 << iommu->table_shift);
177 else
178 boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
179
180 boundary_size = boundary_size >> iommu->table_shift;
181 /*
182 * if the skip_span_boundary_check had been set during init, we set
183 * things up so that iommu_is_span_boundary() merely checks if the
184 * (index + npages) < num_tsb_entries
185 */
186 if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
187 shift = 0;
188 boundary_size = iommu->poolsize * iommu->nr_pools;
189 }
190 n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
191 boundary_size, align_mask);
192 if (n == -1) {
193 if (likely(pass == 0)) {
194 /* First failure, rescan from the beginning. */
195 pool->hint = pool->start;
196 set_flush(iommu);
197 pass++;
198 goto again;
199 } else if (!largealloc && pass <= iommu->nr_pools) {
200 spin_unlock(&(pool->lock));
201 pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
202 pool = &(iommu->pools[pool_nr]);
203 spin_lock(&(pool->lock));
204 pool->hint = pool->start;
205 set_flush(iommu);
206 pass++;
207 goto again;
208 } else {
209 /* give up */
210 n = DMA_ERROR_CODE;
211 goto bail;
212 }
213 }
214 if (n < pool->hint || need_flush(iommu)) {
215 clear_flush(iommu);
216 iommu->lazy_flush(iommu);
217 }
218
219 end = n + npages;
220 pool->hint = end;
221
222 /* Update handle for SG allocations */
223 if (handle)
224 *handle = end;
225bail:
226 spin_unlock_irqrestore(&(pool->lock), flags);
227
228 return n;
229}
230EXPORT_SYMBOL(iommu_tbl_range_alloc);
231
232static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
233 unsigned long entry)
234{
235 struct iommu_pool *p;
236 unsigned long largepool_start = tbl->large_pool.start;
237 bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
238
239 /* The large pool is the last pool at the top of the table */
240 if (large_pool && entry >= largepool_start) {
241 p = &tbl->large_pool;
242 } else {
243 unsigned int pool_nr = entry / tbl->poolsize;
244
245 BUG_ON(pool_nr >= tbl->nr_pools);
246 p = &tbl->pools[pool_nr];
247 }
248 return p;
249}
250
251/* Caller supplies the index of the entry into the iommu map table
252 * itself when the mapping from dma_addr to the entry is not the
253 * default addr->entry mapping below.
254 */
255void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
256 unsigned long npages, unsigned long entry)
257{
258 struct iommu_pool *pool;
259 unsigned long flags;
260 unsigned long shift = iommu->table_shift;
261
262 if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
263 entry = (dma_addr - iommu->table_map_base) >> shift;
264 pool = get_pool(iommu, entry);
265
266 spin_lock_irqsave(&(pool->lock), flags);
267 bitmap_clear(iommu->map, entry, npages);
268 spin_unlock_irqrestore(&(pool->lock), flags);
269}
270EXPORT_SYMBOL(iommu_tbl_range_free);
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index dbef2314901e..975c6e0434bd 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
131static inline const struct raid6_calls *raid6_choose_gen( 131static inline const struct raid6_calls *raid6_choose_gen(
132 void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) 132 void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
133{ 133{
134 unsigned long perf, bestperf, j0, j1; 134 unsigned long perf, bestgenperf, bestxorperf, j0, j1;
135 int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
135 const struct raid6_calls *const *algo; 136 const struct raid6_calls *const *algo;
136 const struct raid6_calls *best; 137 const struct raid6_calls *best;
137 138
138 for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { 139 for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
139 if (!best || (*algo)->prefer >= best->prefer) { 140 if (!best || (*algo)->prefer >= best->prefer) {
140 if ((*algo)->valid && !(*algo)->valid()) 141 if ((*algo)->valid && !(*algo)->valid())
141 continue; 142 continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
153 } 154 }
154 preempt_enable(); 155 preempt_enable();
155 156
156 if (perf > bestperf) { 157 if (perf > bestgenperf) {
157 bestperf = perf; 158 bestgenperf = perf;
158 best = *algo; 159 best = *algo;
159 } 160 }
160 pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, 161 pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
161 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); 162 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
163
164 if (!(*algo)->xor_syndrome)
165 continue;
166
167 perf = 0;
168
169 preempt_disable();
170 j0 = jiffies;
171 while ((j1 = jiffies) == j0)
172 cpu_relax();
173 while (time_before(jiffies,
174 j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
175 (*algo)->xor_syndrome(disks, start, stop,
176 PAGE_SIZE, *dptrs);
177 perf++;
178 }
179 preempt_enable();
180
181 if (best == *algo)
182 bestxorperf = perf;
183
184 pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
185 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
162 } 186 }
163 } 187 }
164 188
165 if (best) { 189 if (best) {
166 pr_info("raid6: using algorithm %s (%ld MB/s)\n", 190 pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
167 best->name, 191 best->name,
168 (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); 192 (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
193 if (best->xor_syndrome)
194 pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
195 (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
169 raid6_call = *best; 196 raid6_call = *best;
170 } else 197 } else
171 pr_err("raid6: Yikes! No algorithm found!\n"); 198 pr_err("raid6: Yikes! No algorithm found!\n");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 7cc12b532e95..bec27fce7501 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
119 119
120const struct raid6_calls raid6_altivec$# = { 120const struct raid6_calls raid6_altivec$# = {
121 raid6_altivec$#_gen_syndrome, 121 raid6_altivec$#_gen_syndrome,
122 NULL, /* XOR not yet implemented */
122 raid6_have_altivec, 123 raid6_have_altivec,
123 "altivecx$#", 124 "altivecx$#",
124 0 125 0
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
index bc3b1dd436eb..76734004358d 100644
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
89 89
90const struct raid6_calls raid6_avx2x1 = { 90const struct raid6_calls raid6_avx2x1 = {
91 raid6_avx21_gen_syndrome, 91 raid6_avx21_gen_syndrome,
92 NULL, /* XOR not yet implemented */
92 raid6_have_avx2, 93 raid6_have_avx2,
93 "avx2x1", 94 "avx2x1",
94 1 /* Has cache hints */ 95 1 /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
150 151
151const struct raid6_calls raid6_avx2x2 = { 152const struct raid6_calls raid6_avx2x2 = {
152 raid6_avx22_gen_syndrome, 153 raid6_avx22_gen_syndrome,
154 NULL, /* XOR not yet implemented */
153 raid6_have_avx2, 155 raid6_have_avx2,
154 "avx2x2", 156 "avx2x2",
155 1 /* Has cache hints */ 157 1 /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
242 244
243const struct raid6_calls raid6_avx2x4 = { 245const struct raid6_calls raid6_avx2x4 = {
244 raid6_avx24_gen_syndrome, 246 raid6_avx24_gen_syndrome,
247 NULL, /* XOR not yet implemented */
245 raid6_have_avx2, 248 raid6_have_avx2,
246 "avx2x4", 249 "avx2x4",
247 1 /* Has cache hints */ 250 1 /* Has cache hints */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
index 5b50f8dfc5d2..558aeac9342a 100644
--- a/lib/raid6/int.uc
+++ b/lib/raid6/int.uc
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
107 } 107 }
108} 108}
109 109
110static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
111 size_t bytes, void **ptrs)
112{
113 u8 **dptr = (u8 **)ptrs;
114 u8 *p, *q;
115 int d, z, z0;
116
117 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
118
119 z0 = stop; /* P/Q right side optimization */
120 p = dptr[disks-2]; /* XOR parity */
121 q = dptr[disks-1]; /* RS syndrome */
122
123 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
124 /* P/Q data pages */
125 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
126 for ( z = z0-1 ; z >= start ; z-- ) {
127 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
128 wp$$ ^= wd$$;
129 w2$$ = MASK(wq$$);
130 w1$$ = SHLBYTE(wq$$);
131 w2$$ &= NBYTES(0x1d);
132 w1$$ ^= w2$$;
133 wq$$ = w1$$ ^ wd$$;
134 }
135 /* P/Q left side optimization */
136 for ( z = start-1 ; z >= 0 ; z-- ) {
137 w2$$ = MASK(wq$$);
138 w1$$ = SHLBYTE(wq$$);
139 w2$$ &= NBYTES(0x1d);
140 wq$$ = w1$$ ^ w2$$;
141 }
142 *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
143 *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
144 }
145
146}
147
110const struct raid6_calls raid6_intx$# = { 148const struct raid6_calls raid6_intx$# = {
111 raid6_int$#_gen_syndrome, 149 raid6_int$#_gen_syndrome,
112 NULL, /* always valid */ 150 raid6_int$#_xor_syndrome,
151 NULL, /* always valid */
113 "int" NSTRING "x$#", 152 "int" NSTRING "x$#",
114 0 153 0
115}; 154};
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 590c71c9e200..b3b0e1fcd3af 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
76 76
77const struct raid6_calls raid6_mmxx1 = { 77const struct raid6_calls raid6_mmxx1 = {
78 raid6_mmx1_gen_syndrome, 78 raid6_mmx1_gen_syndrome,
79 NULL, /* XOR not yet implemented */
79 raid6_have_mmx, 80 raid6_have_mmx,
80 "mmxx1", 81 "mmxx1",
81 0 82 0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
134 135
135const struct raid6_calls raid6_mmxx2 = { 136const struct raid6_calls raid6_mmxx2 = {
136 raid6_mmx2_gen_syndrome, 137 raid6_mmx2_gen_syndrome,
138 NULL, /* XOR not yet implemented */
137 raid6_have_mmx, 139 raid6_have_mmx,
138 "mmxx2", 140 "mmxx2",
139 0 141 0
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index 36ad4705df1a..d9ad6ee284f4 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -42,6 +42,7 @@
42 } \ 42 } \
43 struct raid6_calls const raid6_neonx ## _n = { \ 43 struct raid6_calls const raid6_neonx ## _n = { \
44 raid6_neon ## _n ## _gen_syndrome, \ 44 raid6_neon ## _n ## _gen_syndrome, \
45 NULL, /* XOR not yet implemented */ \
45 raid6_have_neon, \ 46 raid6_have_neon, \
46 "neonx" #_n, \ 47 "neonx" #_n, \
47 0 \ 48 0 \
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index f76297139445..9025b8ca9aa3 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
92 92
93const struct raid6_calls raid6_sse1x1 = { 93const struct raid6_calls raid6_sse1x1 = {
94 raid6_sse11_gen_syndrome, 94 raid6_sse11_gen_syndrome,
95 NULL, /* XOR not yet implemented */
95 raid6_have_sse1_or_mmxext, 96 raid6_have_sse1_or_mmxext,
96 "sse1x1", 97 "sse1x1",
97 1 /* Has cache hints */ 98 1 /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
154 155
155const struct raid6_calls raid6_sse1x2 = { 156const struct raid6_calls raid6_sse1x2 = {
156 raid6_sse12_gen_syndrome, 157 raid6_sse12_gen_syndrome,
158 NULL, /* XOR not yet implemented */
157 raid6_have_sse1_or_mmxext, 159 raid6_have_sse1_or_mmxext,
158 "sse1x2", 160 "sse1x2",
159 1 /* Has cache hints */ 161 1 /* Has cache hints */
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 85b82c85f28e..1d2276b007ee 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
88 kernel_fpu_end(); 88 kernel_fpu_end();
89} 89}
90 90
91
92static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
93 size_t bytes, void **ptrs)
94 {
95 u8 **dptr = (u8 **)ptrs;
96 u8 *p, *q;
97 int d, z, z0;
98
99 z0 = stop; /* P/Q right side optimization */
100 p = dptr[disks-2]; /* XOR parity */
101 q = dptr[disks-1]; /* RS syndrome */
102
103 kernel_fpu_begin();
104
105 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
106
107 for ( d = 0 ; d < bytes ; d += 16 ) {
108 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
109 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
110 asm volatile("pxor %xmm4,%xmm2");
111 /* P/Q data pages */
112 for ( z = z0-1 ; z >= start ; z-- ) {
113 asm volatile("pxor %xmm5,%xmm5");
114 asm volatile("pcmpgtb %xmm4,%xmm5");
115 asm volatile("paddb %xmm4,%xmm4");
116 asm volatile("pand %xmm0,%xmm5");
117 asm volatile("pxor %xmm5,%xmm4");
118 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
119 asm volatile("pxor %xmm5,%xmm2");
120 asm volatile("pxor %xmm5,%xmm4");
121 }
122 /* P/Q left side optimization */
123 for ( z = start-1 ; z >= 0 ; z-- ) {
124 asm volatile("pxor %xmm5,%xmm5");
125 asm volatile("pcmpgtb %xmm4,%xmm5");
126 asm volatile("paddb %xmm4,%xmm4");
127 asm volatile("pand %xmm0,%xmm5");
128 asm volatile("pxor %xmm5,%xmm4");
129 }
130 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
131 /* Don't use movntdq for r/w memory area < cache line */
132 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
133 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
134 }
135
136 asm volatile("sfence" : : : "memory");
137 kernel_fpu_end();
138}
139
91const struct raid6_calls raid6_sse2x1 = { 140const struct raid6_calls raid6_sse2x1 = {
92 raid6_sse21_gen_syndrome, 141 raid6_sse21_gen_syndrome,
142 raid6_sse21_xor_syndrome,
93 raid6_have_sse2, 143 raid6_have_sse2,
94 "sse2x1", 144 "sse2x1",
95 1 /* Has cache hints */ 145 1 /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
150 kernel_fpu_end(); 200 kernel_fpu_end();
151} 201}
152 202
203 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
204 size_t bytes, void **ptrs)
205 {
206 u8 **dptr = (u8 **)ptrs;
207 u8 *p, *q;
208 int d, z, z0;
209
210 z0 = stop; /* P/Q right side optimization */
211 p = dptr[disks-2]; /* XOR parity */
212 q = dptr[disks-1]; /* RS syndrome */
213
214 kernel_fpu_begin();
215
216 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
217
218 for ( d = 0 ; d < bytes ; d += 32 ) {
219 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
220 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
221 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
222 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
223 asm volatile("pxor %xmm4,%xmm2");
224 asm volatile("pxor %xmm6,%xmm3");
225 /* P/Q data pages */
226 for ( z = z0-1 ; z >= start ; z-- ) {
227 asm volatile("pxor %xmm5,%xmm5");
228 asm volatile("pxor %xmm7,%xmm7");
229 asm volatile("pcmpgtb %xmm4,%xmm5");
230 asm volatile("pcmpgtb %xmm6,%xmm7");
231 asm volatile("paddb %xmm4,%xmm4");
232 asm volatile("paddb %xmm6,%xmm6");
233 asm volatile("pand %xmm0,%xmm5");
234 asm volatile("pand %xmm0,%xmm7");
235 asm volatile("pxor %xmm5,%xmm4");
236 asm volatile("pxor %xmm7,%xmm6");
237 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
238 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
239 asm volatile("pxor %xmm5,%xmm2");
240 asm volatile("pxor %xmm7,%xmm3");
241 asm volatile("pxor %xmm5,%xmm4");
242 asm volatile("pxor %xmm7,%xmm6");
243 }
244 /* P/Q left side optimization */
245 for ( z = start-1 ; z >= 0 ; z-- ) {
246 asm volatile("pxor %xmm5,%xmm5");
247 asm volatile("pxor %xmm7,%xmm7");
248 asm volatile("pcmpgtb %xmm4,%xmm5");
249 asm volatile("pcmpgtb %xmm6,%xmm7");
250 asm volatile("paddb %xmm4,%xmm4");
251 asm volatile("paddb %xmm6,%xmm6");
252 asm volatile("pand %xmm0,%xmm5");
253 asm volatile("pand %xmm0,%xmm7");
254 asm volatile("pxor %xmm5,%xmm4");
255 asm volatile("pxor %xmm7,%xmm6");
256 }
257 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
258 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
259 /* Don't use movntdq for r/w memory area < cache line */
260 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
261 asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
262 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
263 asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
264 }
265
266 asm volatile("sfence" : : : "memory");
267 kernel_fpu_end();
268 }
269
153const struct raid6_calls raid6_sse2x2 = { 270const struct raid6_calls raid6_sse2x2 = {
154 raid6_sse22_gen_syndrome, 271 raid6_sse22_gen_syndrome,
272 raid6_sse22_xor_syndrome,
155 raid6_have_sse2, 273 raid6_have_sse2,
156 "sse2x2", 274 "sse2x2",
157 1 /* Has cache hints */ 275 1 /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
248 kernel_fpu_end(); 366 kernel_fpu_end();
249} 367}
250 368
369 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
370 size_t bytes, void **ptrs)
371 {
372 u8 **dptr = (u8 **)ptrs;
373 u8 *p, *q;
374 int d, z, z0;
375
376 z0 = stop; /* P/Q right side optimization */
377 p = dptr[disks-2]; /* XOR parity */
378 q = dptr[disks-1]; /* RS syndrome */
379
380 kernel_fpu_begin();
381
382 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
383
384 for ( d = 0 ; d < bytes ; d += 64 ) {
385 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
386 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
387 asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
388 asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
389 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
390 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
391 asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
392 asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
393 asm volatile("pxor %xmm4,%xmm2");
394 asm volatile("pxor %xmm6,%xmm3");
395 asm volatile("pxor %xmm12,%xmm10");
396 asm volatile("pxor %xmm14,%xmm11");
397 /* P/Q data pages */
398 for ( z = z0-1 ; z >= start ; z-- ) {
399 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
400 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
401 asm volatile("pxor %xmm5,%xmm5");
402 asm volatile("pxor %xmm7,%xmm7");
403 asm volatile("pxor %xmm13,%xmm13");
404 asm volatile("pxor %xmm15,%xmm15");
405 asm volatile("pcmpgtb %xmm4,%xmm5");
406 asm volatile("pcmpgtb %xmm6,%xmm7");
407 asm volatile("pcmpgtb %xmm12,%xmm13");
408 asm volatile("pcmpgtb %xmm14,%xmm15");
409 asm volatile("paddb %xmm4,%xmm4");
410 asm volatile("paddb %xmm6,%xmm6");
411 asm volatile("paddb %xmm12,%xmm12");
412 asm volatile("paddb %xmm14,%xmm14");
413 asm volatile("pand %xmm0,%xmm5");
414 asm volatile("pand %xmm0,%xmm7");
415 asm volatile("pand %xmm0,%xmm13");
416 asm volatile("pand %xmm0,%xmm15");
417 asm volatile("pxor %xmm5,%xmm4");
418 asm volatile("pxor %xmm7,%xmm6");
419 asm volatile("pxor %xmm13,%xmm12");
420 asm volatile("pxor %xmm15,%xmm14");
421 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
422 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
423 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
424 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
425 asm volatile("pxor %xmm5,%xmm2");
426 asm volatile("pxor %xmm7,%xmm3");
427 asm volatile("pxor %xmm13,%xmm10");
428 asm volatile("pxor %xmm15,%xmm11");
429 asm volatile("pxor %xmm5,%xmm4");
430 asm volatile("pxor %xmm7,%xmm6");
431 asm volatile("pxor %xmm13,%xmm12");
432 asm volatile("pxor %xmm15,%xmm14");
433 }
434 asm volatile("prefetchnta %0" :: "m" (q[d]));
435 asm volatile("prefetchnta %0" :: "m" (q[d+32]));
436 /* P/Q left side optimization */
437 for ( z = start-1 ; z >= 0 ; z-- ) {
438 asm volatile("pxor %xmm5,%xmm5");
439 asm volatile("pxor %xmm7,%xmm7");
440 asm volatile("pxor %xmm13,%xmm13");
441 asm volatile("pxor %xmm15,%xmm15");
442 asm volatile("pcmpgtb %xmm4,%xmm5");
443 asm volatile("pcmpgtb %xmm6,%xmm7");
444 asm volatile("pcmpgtb %xmm12,%xmm13");
445 asm volatile("pcmpgtb %xmm14,%xmm15");
446 asm volatile("paddb %xmm4,%xmm4");
447 asm volatile("paddb %xmm6,%xmm6");
448 asm volatile("paddb %xmm12,%xmm12");
449 asm volatile("paddb %xmm14,%xmm14");
450 asm volatile("pand %xmm0,%xmm5");
451 asm volatile("pand %xmm0,%xmm7");
452 asm volatile("pand %xmm0,%xmm13");
453 asm volatile("pand %xmm0,%xmm15");
454 asm volatile("pxor %xmm5,%xmm4");
455 asm volatile("pxor %xmm7,%xmm6");
456 asm volatile("pxor %xmm13,%xmm12");
457 asm volatile("pxor %xmm15,%xmm14");
458 }
459 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
460 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
461 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
462 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
463 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
464 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
465 asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
466 asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
467 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
468 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
469 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
470 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
471 }
472 asm volatile("sfence" : : : "memory");
473 kernel_fpu_end();
474 }
475
476
251const struct raid6_calls raid6_sse2x4 = { 477const struct raid6_calls raid6_sse2x4 = {
252 raid6_sse24_gen_syndrome, 478 raid6_sse24_gen_syndrome,
479 raid6_sse24_xor_syndrome,
253 raid6_have_sse2, 480 raid6_have_sse2,
254 "sse2x4", 481 "sse2x4",
255 1 /* Has cache hints */ 482 1 /* Has cache hints */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 5a485b7a7d3c..3bebbabdb510 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE]; 28char data[NDISKS][PAGE_SIZE];
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; 29char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30 30
31static void makedata(void) 31static void makedata(int start, int stop)
32{ 32{
33 int i, j; 33 int i, j;
34 34
35 for (i = 0; i < NDISKS; i++) { 35 for (i = start; i <= stop; i++) {
36 for (j = 0; j < PAGE_SIZE; j++) 36 for (j = 0; j < PAGE_SIZE; j++)
37 data[i][j] = rand(); 37 data[i][j] = rand();
38 38
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
91{ 91{
92 const struct raid6_calls *const *algo; 92 const struct raid6_calls *const *algo;
93 const struct raid6_recov_calls *const *ra; 93 const struct raid6_recov_calls *const *ra;
94 int i, j; 94 int i, j, p1, p2;
95 int err = 0; 95 int err = 0;
96 96
97 makedata(); 97 makedata(0, NDISKS-1);
98 98
99 for (ra = raid6_recov_algos; *ra; ra++) { 99 for (ra = raid6_recov_algos; *ra; ra++) {
100 if ((*ra)->valid && !(*ra)->valid()) 100 if ((*ra)->valid && !(*ra)->valid())
101 continue; 101 continue;
102
102 raid6_2data_recov = (*ra)->data2; 103 raid6_2data_recov = (*ra)->data2;
103 raid6_datap_recov = (*ra)->datap; 104 raid6_datap_recov = (*ra)->datap;
104 105
105 printf("using recovery %s\n", (*ra)->name); 106 printf("using recovery %s\n", (*ra)->name);
106 107
107 for (algo = raid6_algos; *algo; algo++) { 108 for (algo = raid6_algos; *algo; algo++) {
108 if (!(*algo)->valid || (*algo)->valid()) { 109 if ((*algo)->valid && !(*algo)->valid())
109 raid6_call = **algo; 110 continue;
111
112 raid6_call = **algo;
113
114 /* Nuke syndromes */
115 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
116
117 /* Generate assumed good syndrome */
118 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
119 (void **)&dataptrs);
120
121 for (i = 0; i < NDISKS-1; i++)
122 for (j = i+1; j < NDISKS; j++)
123 err += test_disks(i, j);
124
125 if (!raid6_call.xor_syndrome)
126 continue;
127
128 for (p1 = 0; p1 < NDISKS-2; p1++)
129 for (p2 = p1; p2 < NDISKS-2; p2++) {
110 130
111 /* Nuke syndromes */ 131 /* Simulate rmw run */
112 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); 132 raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
133 (void **)&dataptrs);
134 makedata(p1, p2);
135 raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
136 (void **)&dataptrs);
113 137
114 /* Generate assumed good syndrome */ 138 for (i = 0; i < NDISKS-1; i++)
115 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, 139 for (j = i+1; j < NDISKS; j++)
116 (void **)&dataptrs); 140 err += test_disks(i, j);
141 }
117 142
118 for (i = 0; i < NDISKS-1; i++)
119 for (j = i+1; j < NDISKS; j++)
120 err += test_disks(i, j);
121 }
122 } 143 }
123 printf("\n"); 144 printf("\n");
124 } 145 }
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
index e7c29459cbcd..2dd291a11264 100644
--- a/lib/raid6/tilegx.uc
+++ b/lib/raid6/tilegx.uc
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
80 80
81const struct raid6_calls raid6_tilegx$# = { 81const struct raid6_calls raid6_tilegx$# = {
82 raid6_tilegx$#_gen_syndrome, 82 raid6_tilegx$#_gen_syndrome,
83 NULL, /* XOR not yet implemented */
83 NULL, 84 NULL,
84 "tilegx$#", 85 "tilegx$#",
85 0 86 0
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 4898442b837f..b28df4019ade 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -405,13 +405,18 @@ int rhashtable_insert_rehash(struct rhashtable *ht)
405 405
406 if (rht_grow_above_75(ht, tbl)) 406 if (rht_grow_above_75(ht, tbl))
407 size *= 2; 407 size *= 2;
408 /* More than two rehashes (not resizes) detected. */ 408 /* Do not schedule more than one rehash */
409 else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) 409 else if (old_tbl != tbl)
410 return -EBUSY; 410 return -EBUSY;
411 411
412 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); 412 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
413 if (new_tbl == NULL) 413 if (new_tbl == NULL) {
414 /* Schedule async resize/rehash to try allocation
415 * non-atomic context.
416 */
417 schedule_work(&ht->run_work);
414 return -ENOMEM; 418 return -ENOMEM;
419 }
415 420
416 err = rhashtable_rehash_attach(ht, tbl, new_tbl); 421 err = rhashtable_rehash_attach(ht, tbl, new_tbl);
417 if (err) { 422 if (err) {
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 1826c7407258..c98ae818eb4e 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -4,6 +4,7 @@
4 * Copyright 31 August 2008 James Bottomley 4 * Copyright 31 August 2008 James Bottomley
5 * Copyright (C) 2013, Intel Corporation 5 * Copyright (C) 2013, Intel Corporation
6 */ 6 */
7#include <linux/bug.h>
7#include <linux/kernel.h> 8#include <linux/kernel.h>
8#include <linux/math64.h> 9#include <linux/math64.h>
9#include <linux/export.h> 10#include <linux/export.h>
@@ -14,7 +15,8 @@
14 15
15/** 16/**
16 * string_get_size - get the size in the specified units 17 * string_get_size - get the size in the specified units
17 * @size: The size to be converted 18 * @size: The size to be converted in blocks
19 * @blk_size: Size of the block (use 1 for size in bytes)
18 * @units: units to use (powers of 1000 or 1024) 20 * @units: units to use (powers of 1000 or 1024)
19 * @buf: buffer to format to 21 * @buf: buffer to format to
20 * @len: length of buffer 22 * @len: length of buffer
@@ -24,14 +26,14 @@
24 * at least 9 bytes and will always be zero terminated. 26 * at least 9 bytes and will always be zero terminated.
25 * 27 *
26 */ 28 */
27void string_get_size(u64 size, const enum string_size_units units, 29void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
28 char *buf, int len) 30 char *buf, int len)
29{ 31{
30 static const char *const units_10[] = { 32 static const char *const units_10[] = {
31 "B", "kB", "MB", "GB", "TB", "PB", "EB" 33 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
32 }; 34 };
33 static const char *const units_2[] = { 35 static const char *const units_2[] = {
34 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB" 36 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
35 }; 37 };
36 static const char *const *const units_str[] = { 38 static const char *const *const units_str[] = {
37 [STRING_UNITS_10] = units_10, 39 [STRING_UNITS_10] = units_10,
@@ -42,31 +44,57 @@ void string_get_size(u64 size, const enum string_size_units units,
42 [STRING_UNITS_2] = 1024, 44 [STRING_UNITS_2] = 1024,
43 }; 45 };
44 int i, j; 46 int i, j;
45 u32 remainder = 0, sf_cap; 47 u32 remainder = 0, sf_cap, exp;
46 char tmp[8]; 48 char tmp[8];
49 const char *unit;
47 50
48 tmp[0] = '\0'; 51 tmp[0] = '\0';
49 i = 0; 52 i = 0;
50 if (size >= divisor[units]) { 53 if (!size)
51 while (size >= divisor[units]) { 54 goto out;
52 remainder = do_div(size, divisor[units]);
53 i++;
54 }
55 55
56 sf_cap = size; 56 while (blk_size >= divisor[units]) {
57 for (j = 0; sf_cap*10 < 1000; j++) 57 remainder = do_div(blk_size, divisor[units]);
58 sf_cap *= 10; 58 i++;
59 }
59 60
60 if (j) { 61 exp = divisor[units] / (u32)blk_size;
61 remainder *= 1000; 62 if (size >= exp) {
62 remainder /= divisor[units]; 63 remainder = do_div(size, divisor[units]);
63 snprintf(tmp, sizeof(tmp), ".%03u", remainder); 64 remainder *= blk_size;
64 tmp[j+1] = '\0'; 65 i++;
65 } 66 } else {
67 remainder *= size;
68 }
69
70 size *= blk_size;
71 size += remainder / divisor[units];
72 remainder %= divisor[units];
73
74 while (size >= divisor[units]) {
75 remainder = do_div(size, divisor[units]);
76 i++;
66 } 77 }
67 78
79 sf_cap = size;
80 for (j = 0; sf_cap*10 < 1000; j++)
81 sf_cap *= 10;
82
83 if (j) {
84 remainder *= 1000;
85 remainder /= divisor[units];
86 snprintf(tmp, sizeof(tmp), ".%03u", remainder);
87 tmp[j+1] = '\0';
88 }
89
90 out:
91 if (i >= ARRAY_SIZE(units_2))
92 unit = "UNK";
93 else
94 unit = units_str[units][i];
95
68 snprintf(buf, len, "%u%s %s", (u32)size, 96 snprintf(buf, len, "%u%s %s", (u32)size,
69 tmp, units_str[units][i]); 97 tmp, unit);
70} 98}
71EXPORT_SYMBOL(string_get_size); 99EXPORT_SYMBOL(string_get_size);
72 100
diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c
index 9846ff7428b3..c227cc43ec0a 100644
--- a/lib/test-hexdump.c
+++ b/lib/test-hexdump.c
@@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize,
48 char test[32 * 3 + 2 + 32 + 1]; 48 char test[32 * 3 + 2 + 32 + 1];
49 char real[32 * 3 + 2 + 32 + 1]; 49 char real[32 * 3 + 2 + 32 + 1];
50 char *p; 50 char *p;
51 const char **result; 51 const char * const *result;
52 size_t l = len; 52 size_t l = len;
53 int gs = groupsize, rs = rowsize; 53 int gs = groupsize, rs = rowsize;
54 unsigned int i; 54 unsigned int i;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3a1e0843f9a2..da39c608a28c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -33,6 +33,7 @@
33 33
34#include <asm/page.h> /* for PAGE_SIZE */ 34#include <asm/page.h> /* for PAGE_SIZE */
35#include <asm/sections.h> /* for dereference_function_descriptor() */ 35#include <asm/sections.h> /* for dereference_function_descriptor() */
36#include <asm/byteorder.h> /* cpu_to_le16 */
36 37
37#include <linux/string_helpers.h> 38#include <linux/string_helpers.h>
38#include "kstrtox.h" 39#include "kstrtox.h"
@@ -122,142 +123,145 @@ int skip_atoi(const char **s)
122 return i; 123 return i;
123} 124}
124 125
125/* Decimal conversion is by far the most typical, and is used 126/*
126 * for /proc and /sys data. This directly impacts e.g. top performance 127 * Decimal conversion is by far the most typical, and is used for
127 * with many processes running. We optimize it for speed 128 * /proc and /sys data. This directly impacts e.g. top performance
128 * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html> 129 * with many processes running. We optimize it for speed by emitting
129 * (with permission from the author, Douglas W. Jones). 130 * two characters at a time, using a 200 byte lookup table. This
131 * roughly halves the number of multiplications compared to computing
132 * the digits one at a time. Implementation strongly inspired by the
133 * previous version, which in turn used ideas described at
134 * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission
135 * from the author, Douglas W. Jones).
136 *
137 * It turns out there is precisely one 26 bit fixed-point
138 * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32
139 * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual
140 * range happens to be somewhat larger (x <= 1073741898), but that's
141 * irrelevant for our purpose.
142 *
143 * For dividing a number in the range [10^4, 10^6-1] by 100, we still
144 * need a 32x32->64 bit multiply, so we simply use the same constant.
145 *
146 * For dividing a number in the range [100, 10^4-1] by 100, there are
147 * several options. The simplest is (x * 0x147b) >> 19, which is valid
148 * for all x <= 43698.
130 */ 149 */
131 150
132#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 151static const u16 decpair[100] = {
133/* Formats correctly any integer in [0, 999999999] */ 152#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030)
153 _( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9),
154 _(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19),
155 _(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29),
156 _(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39),
157 _(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49),
158 _(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59),
159 _(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69),
160 _(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79),
161 _(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89),
162 _(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99),
163#undef _
164};
165
166/*
167 * This will print a single '0' even if r == 0, since we would
168 * immediately jump to out_r where two 0s would be written but only
169 * one of them accounted for in buf. This is needed by ip4_string
170 * below. All other callers pass a non-zero value of r.
171*/
134static noinline_for_stack 172static noinline_for_stack
135char *put_dec_full9(char *buf, unsigned q) 173char *put_dec_trunc8(char *buf, unsigned r)
136{ 174{
137 unsigned r; 175 unsigned q;
138 176
139 /* 177 /* 1 <= r < 10^8 */
140 * Possible ways to approx. divide by 10 178 if (r < 100)
141 * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) 179 goto out_r;
142 * (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul) 180
143 * (x * 0x6667) >> 18 x < 43699 181 /* 100 <= r < 10^8 */
144 * (x * 0x3334) >> 17 x < 16389 182 q = (r * (u64)0x28f5c29) >> 32;
145 * (x * 0x199a) >> 16 x < 16389 183 *((u16 *)buf) = decpair[r - 100*q];
146 * (x * 0x0ccd) >> 15 x < 16389 184 buf += 2;
147 * (x * 0x0667) >> 14 x < 2739 185
148 * (x * 0x0334) >> 13 x < 1029 186 /* 1 <= q < 10^6 */
149 * (x * 0x019a) >> 12 x < 1029 187 if (q < 100)
150 * (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386) 188 goto out_q;
151 * (x * 0x0067) >> 10 x < 179 189
152 * (x * 0x0034) >> 9 x < 69 same 190 /* 100 <= q < 10^6 */
153 * (x * 0x001a) >> 8 x < 69 same 191 r = (q * (u64)0x28f5c29) >> 32;
154 * (x * 0x000d) >> 7 x < 69 same, shortest code (on i386) 192 *((u16 *)buf) = decpair[q - 100*r];
155 * (x * 0x0007) >> 6 x < 19 193 buf += 2;
156 * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html> 194
157 */ 195 /* 1 <= r < 10^4 */
158 r = (q * (uint64_t)0x1999999a) >> 32; 196 if (r < 100)
159 *buf++ = (q - 10 * r) + '0'; /* 1 */ 197 goto out_r;
160 q = (r * (uint64_t)0x1999999a) >> 32; 198
161 *buf++ = (r - 10 * q) + '0'; /* 2 */ 199 /* 100 <= r < 10^4 */
162 r = (q * (uint64_t)0x1999999a) >> 32; 200 q = (r * 0x147b) >> 19;
163 *buf++ = (q - 10 * r) + '0'; /* 3 */ 201 *((u16 *)buf) = decpair[r - 100*q];
164 q = (r * (uint64_t)0x1999999a) >> 32; 202 buf += 2;
165 *buf++ = (r - 10 * q) + '0'; /* 4 */ 203out_q:
166 r = (q * (uint64_t)0x1999999a) >> 32; 204 /* 1 <= q < 100 */
167 *buf++ = (q - 10 * r) + '0'; /* 5 */ 205 r = q;
168 /* Now value is under 10000, can avoid 64-bit multiply */ 206out_r:
169 q = (r * 0x199a) >> 16; 207 /* 1 <= r < 100 */
170 *buf++ = (r - 10 * q) + '0'; /* 6 */ 208 *((u16 *)buf) = decpair[r];
171 r = (q * 0xcd) >> 11; 209 buf += r < 10 ? 1 : 2;
172 *buf++ = (q - 10 * r) + '0'; /* 7 */
173 q = (r * 0xcd) >> 11;
174 *buf++ = (r - 10 * q) + '0'; /* 8 */
175 *buf++ = q + '0'; /* 9 */
176 return buf; 210 return buf;
177} 211}
178#endif
179 212
180/* Similar to above but do not pad with zeros. 213#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64
181 * Code can be easily arranged to print 9 digits too, but our callers
182 * always call put_dec_full9() instead when the number has 9 decimal digits.
183 */
184static noinline_for_stack 214static noinline_for_stack
185char *put_dec_trunc8(char *buf, unsigned r) 215char *put_dec_full8(char *buf, unsigned r)
186{ 216{
187 unsigned q; 217 unsigned q;
188 218
189 /* Copy of previous function's body with added early returns */ 219 /* 0 <= r < 10^8 */
190 while (r >= 10000) { 220 q = (r * (u64)0x28f5c29) >> 32;
191 q = r + '0'; 221 *((u16 *)buf) = decpair[r - 100*q];
192 r = (r * (uint64_t)0x1999999a) >> 32; 222 buf += 2;
193 *buf++ = q - 10*r;
194 }
195 223
196 q = (r * 0x199a) >> 16; /* r <= 9999 */ 224 /* 0 <= q < 10^6 */
197 *buf++ = (r - 10 * q) + '0'; 225 r = (q * (u64)0x28f5c29) >> 32;
198 if (q == 0) 226 *((u16 *)buf) = decpair[q - 100*r];
199 return buf; 227 buf += 2;
200 r = (q * 0xcd) >> 11; /* q <= 999 */
201 *buf++ = (q - 10 * r) + '0';
202 if (r == 0)
203 return buf;
204 q = (r * 0xcd) >> 11; /* r <= 99 */
205 *buf++ = (r - 10 * q) + '0';
206 if (q == 0)
207 return buf;
208 *buf++ = q + '0'; /* q <= 9 */
209 return buf;
210}
211 228
212/* There are two algorithms to print larger numbers. 229 /* 0 <= r < 10^4 */
213 * One is generic: divide by 1000000000 and repeatedly print 230 q = (r * 0x147b) >> 19;
214 * groups of (up to) 9 digits. It's conceptually simple, 231 *((u16 *)buf) = decpair[r - 100*q];
215 * but requires a (unsigned long long) / 1000000000 division. 232 buf += 2;
216 *
217 * Second algorithm splits 64-bit unsigned long long into 16-bit chunks,
218 * manipulates them cleverly and generates groups of 4 decimal digits.
219 * It so happens that it does NOT require long long division.
220 *
221 * If long is > 32 bits, division of 64-bit values is relatively easy,
222 * and we will use the first algorithm.
223 * If long long is > 64 bits (strange architecture with VERY large long long),
224 * second algorithm can't be used, and we again use the first one.
225 *
226 * Else (if long is 32 bits and long long is 64 bits) we use second one.
227 */
228 233
229#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 234 /* 0 <= q < 100 */
230 235 *((u16 *)buf) = decpair[q];
231/* First algorithm: generic */ 236 buf += 2;
237 return buf;
238}
232 239
233static 240static noinline_for_stack
234char *put_dec(char *buf, unsigned long long n) 241char *put_dec(char *buf, unsigned long long n)
235{ 242{
236 if (n >= 100*1000*1000) { 243 if (n >= 100*1000*1000)
237 while (n >= 1000*1000*1000) 244 buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
238 buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); 245 /* 1 <= n <= 1.6e11 */
239 if (n >= 100*1000*1000) 246 if (n >= 100*1000*1000)
240 return put_dec_full9(buf, n); 247 buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
241 } 248 /* 1 <= n < 1e8 */
242 return put_dec_trunc8(buf, n); 249 return put_dec_trunc8(buf, n);
243} 250}
244 251
245#else 252#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64
246 253
247/* Second algorithm: valid only for 64-bit long longs */ 254static void
248 255put_dec_full4(char *buf, unsigned r)
249/* See comment in put_dec_full9 for choice of constants */
250static noinline_for_stack
251void put_dec_full4(char *buf, unsigned q)
252{ 256{
253 unsigned r; 257 unsigned q;
254 r = (q * 0xccd) >> 15; 258
255 buf[0] = (q - 10 * r) + '0'; 259 /* 0 <= r < 10^4 */
256 q = (r * 0xcd) >> 11; 260 q = (r * 0x147b) >> 19;
257 buf[1] = (r - 10 * q) + '0'; 261 *((u16 *)buf) = decpair[r - 100*q];
258 r = (q * 0xcd) >> 11; 262 buf += 2;
259 buf[2] = (q - 10 * r) + '0'; 263 /* 0 <= q < 100 */
260 buf[3] = r + '0'; 264 *((u16 *)buf) = decpair[q];
261} 265}
262 266
263/* 267/*
@@ -265,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q)
265 * The approximation x/10000 == (x * 0x346DC5D7) >> 43 269 * The approximation x/10000 == (x * 0x346DC5D7) >> 43
266 * holds for all x < 1,128,869,999. The largest value this 270 * holds for all x < 1,128,869,999. The largest value this
267 * helper will ever be asked to convert is 1,125,520,955. 271 * helper will ever be asked to convert is 1,125,520,955.
268 * (d1 in the put_dec code, assuming n is all-ones). 272 * (second call in the put_dec code, assuming n is all-ones).
269 */ 273 */
270static 274static noinline_for_stack
271unsigned put_dec_helper4(char *buf, unsigned x) 275unsigned put_dec_helper4(char *buf, unsigned x)
272{ 276{
273 uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; 277 uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43;
@@ -294,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n)
294 d2 = (h ) & 0xffff; 298 d2 = (h ) & 0xffff;
295 d3 = (h >> 16); /* implicit "& 0xffff" */ 299 d3 = (h >> 16); /* implicit "& 0xffff" */
296 300
301 /* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
302 = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
297 q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); 303 q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
298 q = put_dec_helper4(buf, q); 304 q = put_dec_helper4(buf, q);
299 305
@@ -323,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n)
323 */ 329 */
324int num_to_str(char *buf, int size, unsigned long long num) 330int num_to_str(char *buf, int size, unsigned long long num)
325{ 331{
326 char tmp[sizeof(num) * 3]; 332 /* put_dec requires 2-byte alignment of the buffer. */
333 char tmp[sizeof(num) * 3] __aligned(2);
327 int idx, len; 334 int idx, len;
328 335
329 /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ 336 /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
@@ -384,7 +391,8 @@ static noinline_for_stack
384char *number(char *buf, char *end, unsigned long long num, 391char *number(char *buf, char *end, unsigned long long num,
385 struct printf_spec spec) 392 struct printf_spec spec)
386{ 393{
387 char tmp[3 * sizeof(num)]; 394 /* put_dec requires 2-byte alignment of the buffer. */
395 char tmp[3 * sizeof(num)] __aligned(2);
388 char sign; 396 char sign;
389 char locase; 397 char locase;
390 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); 398 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
@@ -944,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)
944 break; 952 break;
945 } 953 }
946 for (i = 0; i < 4; i++) { 954 for (i = 0; i < 4; i++) {
947 char temp[3]; /* hold each IP quad in reverse order */ 955 char temp[4] __aligned(2); /* hold each IP quad in reverse order */
948 int digits = put_dec_trunc8(temp, addr[index]) - temp; 956 int digits = put_dec_trunc8(temp, addr[index]) - temp;
949 if (leading_zeros) { 957 if (leading_zeros) {
950 if (digits < 3) 958 if (digits < 3)