aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig9
-rw-r--r--lib/Kconfig.debug60
-rw-r--r--lib/Makefile6
-rw-r--r--lib/bitmap.c30
-rw-r--r--lib/cpumask.c37
-rw-r--r--lib/devres.c28
-rw-r--r--lib/div64.c2
-rw-r--r--lib/dma-debug.c2
-rw-r--r--lib/find_bit.c193
-rw-r--r--lib/find_last_bit.c36
-rw-r--r--lib/find_next_bit.c285
-rw-r--r--lib/iommu-common.c270
-rw-r--r--lib/ioremap.c53
-rw-r--r--lib/iov_iter.c851
-rw-r--r--lib/kobject.c7
-rw-r--r--lib/lcm.c11
-rw-r--r--lib/lockref.c2
-rw-r--r--lib/lru_cache.c9
-rw-r--r--lib/lz4/lz4_decompress.c21
-rw-r--r--lib/nlattr.c2
-rw-r--r--lib/raid6/algos.c41
-rw-r--r--lib/raid6/altivec.uc1
-rw-r--r--lib/raid6/avx2.c3
-rw-r--r--lib/raid6/int.uc41
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/neon.c1
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c227
-rw-r--r--lib/raid6/test/test.c51
-rw-r--r--lib/raid6/tilegx.uc1
-rw-r--r--lib/rhashtable.c1034
-rw-r--r--lib/seq_buf.c4
-rw-r--r--lib/sha1.c1
-rw-r--r--lib/string.c2
-rw-r--r--lib/string_helpers.c261
-rw-r--r--lib/test-hexdump.c10
-rw-r--r--lib/test-string_helpers.c40
-rw-r--r--lib/test_rhashtable.c67
-rw-r--r--lib/vsprintf.c352
39 files changed, 2596 insertions, 1459 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 87da53bb1fef..601965a948e8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -18,9 +18,8 @@ config HAVE_ARCH_BITREVERSE
18 default n 18 default n
19 depends on BITREVERSE 19 depends on BITREVERSE
20 help 20 help
21 This option provides an config for the architecture which have instruction 21 This option enables the use of hardware bit-reversal instructions on
22 can do bitreverse operation, we use the hardware instruction if the architecture 22 architectures which support such operations.
23 have this capability.
24 23
25config RATIONAL 24config RATIONAL
26 bool 25 bool
@@ -397,10 +396,6 @@ config CPUMASK_OFFSTACK
397 them on the stack. This is a bit more expensive, but avoids 396 them on the stack. This is a bit more expensive, but avoids
398 stack overflow. 397 stack overflow.
399 398
400config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
401 bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
402 depends on BROKEN
403
404config CPU_RMAP 399config CPU_RMAP
405 bool 400 bool
406 depends on SMP 401 depends on SMP
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c5cefb3c009c..17670573dda8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
865 data corruption or a sporadic crash at a later stage once the region 865 data corruption or a sporadic crash at a later stage once the region
866 is examined. The runtime overhead introduced is minimal. 866 is examined. The runtime overhead introduced is minimal.
867 867
868config DEBUG_TIMEKEEPING
869 bool "Enable extra timekeeping sanity checking"
870 help
871 This option will enable additional timekeeping sanity checks
872 which may be helpful when diagnosing issues where timekeeping
873 problems are suspected.
874
875 This may include checks in the timekeeping hotpaths, so this
876 option may have a (very small) performance impact to some
877 workloads.
878
879 If unsure, say N.
880
868config TIMER_STATS 881config TIMER_STATS
869 bool "Collect kernel timers statistics" 882 bool "Collect kernel timers statistics"
870 depends on DEBUG_KERNEL && PROC_FS 883 depends on DEBUG_KERNEL && PROC_FS
@@ -1180,16 +1193,7 @@ config DEBUG_CREDENTIALS
1180menu "RCU Debugging" 1193menu "RCU Debugging"
1181 1194
1182config PROVE_RCU 1195config PROVE_RCU
1183 bool "RCU debugging: prove RCU correctness" 1196 def_bool PROVE_LOCKING
1184 depends on PROVE_LOCKING
1185 default n
1186 help
1187 This feature enables lockdep extensions that check for correct
1188 use of RCU APIs. This is currently under development. Say Y
1189 if you want to debug RCU usage or help work on the PROVE_RCU
1190 feature.
1191
1192 Say N if you are unsure.
1193 1197
1194config PROVE_RCU_REPEATEDLY 1198config PROVE_RCU_REPEATEDLY
1195 bool "RCU debugging: don't disable PROVE_RCU on first splat" 1199 bool "RCU debugging: don't disable PROVE_RCU on first splat"
@@ -1257,6 +1261,30 @@ config RCU_TORTURE_TEST_RUNNABLE
1257 Say N here if you want the RCU torture tests to start only 1261 Say N here if you want the RCU torture tests to start only
1258 after being manually enabled via /proc. 1262 after being manually enabled via /proc.
1259 1263
1264config RCU_TORTURE_TEST_SLOW_INIT
1265 bool "Slow down RCU grace-period initialization to expose races"
1266 depends on RCU_TORTURE_TEST
1267 help
1268 This option makes grace-period initialization block for a
1269 few jiffies between initializing each pair of consecutive
1270 rcu_node structures. This helps to expose races involving
1271 grace-period initialization, in other words, it makes your
1272 kernel less stable. It can also greatly increase grace-period
1273 latency, especially on systems with large numbers of CPUs.
1274 This is useful when torture-testing RCU, but in almost no
1275 other circumstance.
1276
1277 Say Y here if you want your system to crash and hang more often.
1278 Say N if you want a sane system.
1279
1280config RCU_TORTURE_TEST_SLOW_INIT_DELAY
1281 int "How much to slow down RCU grace-period initialization"
1282 range 0 5
1283 default 3
1284 help
1285 This option specifies the number of jiffies to wait between
1286 each rcu_node structure initialization.
1287
1260config RCU_CPU_STALL_TIMEOUT 1288config RCU_CPU_STALL_TIMEOUT
1261 int "RCU CPU stall timeout in seconds" 1289 int "RCU CPU stall timeout in seconds"
1262 depends on RCU_STALL_COMMON 1290 depends on RCU_STALL_COMMON
@@ -1732,6 +1760,18 @@ config TEST_UDELAY
1732 1760
1733 If unsure, say N. 1761 If unsure, say N.
1734 1762
1763config MEMTEST
1764 bool "Memtest"
1765 depends on HAVE_MEMBLOCK
1766 ---help---
1767 This option adds a kernel parameter 'memtest', which allows memtest
1768 to be set.
1769 memtest=0, mean disabled; -- default
1770 memtest=1, mean do 1 test pattern;
1771 ...
1772 memtest=17, mean do 17 test patterns.
1773 If you are unsure how to answer this question, answer N.
1774
1735source "samples/Kconfig" 1775source "samples/Kconfig"
1736 1776
1737source "lib/Kconfig.kgdb" 1777source "lib/Kconfig.kgdb"
diff --git a/lib/Makefile b/lib/Makefile
index 87eb3bffc283..6c37933336a0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -24,8 +24,8 @@ obj-y += lockref.o
24 24
25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
26 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ 26 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
27 gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \ 27 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
28 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ 28 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
29 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o 29 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
30obj-y += string_helpers.o 30obj-y += string_helpers.o
31obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o 31obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
@@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
106obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o 106obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
107 107
108obj-$(CONFIG_SWIOTLB) += swiotlb.o 108obj-$(CONFIG_SWIOTLB) += swiotlb.o
109obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o 109obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
110obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 110obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
111obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o 111obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
112obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o 112obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d456f4c15a9f..64c0926f5dd8 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -42,36 +42,6 @@
42 * for the best explanations of this ordering. 42 * for the best explanations of this ordering.
43 */ 43 */
44 44
45int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
46{
47 unsigned int k, lim = bits/BITS_PER_LONG;
48 for (k = 0; k < lim; ++k)
49 if (bitmap[k])
50 return 0;
51
52 if (bits % BITS_PER_LONG)
53 if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
54 return 0;
55
56 return 1;
57}
58EXPORT_SYMBOL(__bitmap_empty);
59
60int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
61{
62 unsigned int k, lim = bits/BITS_PER_LONG;
63 for (k = 0; k < lim; ++k)
64 if (~bitmap[k])
65 return 0;
66
67 if (bits % BITS_PER_LONG)
68 if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
69 return 0;
70
71 return 1;
72}
73EXPORT_SYMBOL(__bitmap_full);
74
75int __bitmap_equal(const unsigned long *bitmap1, 45int __bitmap_equal(const unsigned long *bitmap1,
76 const unsigned long *bitmap2, unsigned int bits) 46 const unsigned long *bitmap2, unsigned int bits)
77{ 47{
diff --git a/lib/cpumask.c b/lib/cpumask.c
index b6513a9f2892..830dd5dec40f 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -5,27 +5,6 @@
5#include <linux/export.h> 5#include <linux/export.h>
6#include <linux/bootmem.h> 6#include <linux/bootmem.h>
7 7
8int __first_cpu(const cpumask_t *srcp)
9{
10 return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS));
11}
12EXPORT_SYMBOL(__first_cpu);
13
14int __next_cpu(int n, const cpumask_t *srcp)
15{
16 return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1));
17}
18EXPORT_SYMBOL(__next_cpu);
19
20#if NR_CPUS > 64
21int __next_cpu_nr(int n, const cpumask_t *srcp)
22{
23 return min_t(int, nr_cpu_ids,
24 find_next_bit(srcp->bits, nr_cpu_ids, n+1));
25}
26EXPORT_SYMBOL(__next_cpu_nr);
27#endif
28
29/** 8/**
30 * cpumask_next_and - get the next cpu in *src1p & *src2p 9 * cpumask_next_and - get the next cpu in *src1p & *src2p
31 * @n: the cpu prior to the place to search (ie. return will be > @n) 10 * @n: the cpu prior to the place to search (ie. return will be > @n)
@@ -37,10 +16,11 @@ EXPORT_SYMBOL(__next_cpu_nr);
37int cpumask_next_and(int n, const struct cpumask *src1p, 16int cpumask_next_and(int n, const struct cpumask *src1p,
38 const struct cpumask *src2p) 17 const struct cpumask *src2p)
39{ 18{
40 while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) 19 struct cpumask tmp;
41 if (cpumask_test_cpu(n, src2p)) 20
42 break; 21 if (cpumask_and(&tmp, src1p, src2p))
43 return n; 22 return cpumask_next(n, &tmp);
23 return nr_cpu_ids;
44} 24}
45EXPORT_SYMBOL(cpumask_next_and); 25EXPORT_SYMBOL(cpumask_next_and);
46 26
@@ -89,13 +69,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
89 dump_stack(); 69 dump_stack();
90 } 70 }
91#endif 71#endif
92 /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */
93 if (*mask) {
94 unsigned char *ptr = (unsigned char *)cpumask_bits(*mask);
95 unsigned int tail;
96 tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long);
97 memset(ptr + cpumask_size() - tail, 0, tail);
98 }
99 72
100 return *mask != NULL; 73 return *mask != NULL;
101} 74}
diff --git a/lib/devres.c b/lib/devres.c
index 0f1dd2e9d2c1..fbe2aac522e6 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -72,6 +72,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
72EXPORT_SYMBOL(devm_ioremap_nocache); 72EXPORT_SYMBOL(devm_ioremap_nocache);
73 73
74/** 74/**
75 * devm_ioremap_wc - Managed ioremap_wc()
76 * @dev: Generic device to remap IO address for
77 * @offset: BUS offset to map
78 * @size: Size of map
79 *
80 * Managed ioremap_wc(). Map is automatically unmapped on driver detach.
81 */
82void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
83 resource_size_t size)
84{
85 void __iomem **ptr, *addr;
86
87 ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
88 if (!ptr)
89 return NULL;
90
91 addr = ioremap_wc(offset, size);
92 if (addr) {
93 *ptr = addr;
94 devres_add(dev, ptr);
95 } else
96 devres_free(ptr);
97
98 return addr;
99}
100EXPORT_SYMBOL(devm_ioremap_wc);
101
102/**
75 * devm_iounmap - Managed iounmap() 103 * devm_iounmap - Managed iounmap()
76 * @dev: Generic device to unmap for 104 * @dev: Generic device to unmap for
77 * @addr: Address to unmap 105 * @addr: Address to unmap
diff --git a/lib/div64.c b/lib/div64.c
index 4382ad77777e..19ea7ed4b948 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(div64_u64_rem);
127 * by the book 'Hacker's Delight'. The original source and full proof 127 * by the book 'Hacker's Delight'. The original source and full proof
128 * can be found here and is available for use without restriction. 128 * can be found here and is available for use without restriction.
129 * 129 *
130 * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt' 130 * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
131 */ 131 */
132#ifndef div64_u64 132#ifndef div64_u64
133u64 div64_u64(u64 dividend, u64 divisor) 133u64 div64_u64(u64 dividend, u64 divisor)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 9722bd2dbc9b..ae4b65e17e64 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -361,7 +361,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
361 unsigned int range = 0; 361 unsigned int range = 0;
362 362
363 while (range <= max_range) { 363 while (range <= max_range) {
364 entry = __hash_bucket_find(*bucket, &index, containing_match); 364 entry = __hash_bucket_find(*bucket, ref, containing_match);
365 365
366 if (entry) 366 if (entry)
367 return entry; 367 return entry;
diff --git a/lib/find_bit.c b/lib/find_bit.c
new file mode 100644
index 000000000000..18072ea9c20e
--- /dev/null
+++ b/lib/find_bit.c
@@ -0,0 +1,193 @@
1/* bit search implementation
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * Copyright (C) 2008 IBM Corporation
7 * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
8 * (Inspired by David Howell's find_next_bit implementation)
9 *
10 * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
11 * size and improve performance, 2015.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#include <linux/bitops.h>
20#include <linux/bitmap.h>
21#include <linux/export.h>
22#include <linux/kernel.h>
23
24#if !defined(find_next_bit) || !defined(find_next_zero_bit)
25
26/*
27 * This is a common helper function for find_next_bit and
28 * find_next_zero_bit. The difference is the "invert" argument, which
29 * is XORed with each fetched word before searching it for one bits.
30 */
31static unsigned long _find_next_bit(const unsigned long *addr,
32 unsigned long nbits, unsigned long start, unsigned long invert)
33{
34 unsigned long tmp;
35
36 if (!nbits || start >= nbits)
37 return nbits;
38
39 tmp = addr[start / BITS_PER_LONG] ^ invert;
40
41 /* Handle 1st word. */
42 tmp &= BITMAP_FIRST_WORD_MASK(start);
43 start = round_down(start, BITS_PER_LONG);
44
45 while (!tmp) {
46 start += BITS_PER_LONG;
47 if (start >= nbits)
48 return nbits;
49
50 tmp = addr[start / BITS_PER_LONG] ^ invert;
51 }
52
53 return min(start + __ffs(tmp), nbits);
54}
55#endif
56
57#ifndef find_next_bit
58/*
59 * Find the next set bit in a memory region.
60 */
61unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
62 unsigned long offset)
63{
64 return _find_next_bit(addr, size, offset, 0UL);
65}
66EXPORT_SYMBOL(find_next_bit);
67#endif
68
69#ifndef find_next_zero_bit
70unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
71 unsigned long offset)
72{
73 return _find_next_bit(addr, size, offset, ~0UL);
74}
75EXPORT_SYMBOL(find_next_zero_bit);
76#endif
77
78#ifndef find_first_bit
79/*
80 * Find the first set bit in a memory region.
81 */
82unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
83{
84 unsigned long idx;
85
86 for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
87 if (addr[idx])
88 return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
89 }
90
91 return size;
92}
93EXPORT_SYMBOL(find_first_bit);
94#endif
95
96#ifndef find_first_zero_bit
97/*
98 * Find the first cleared bit in a memory region.
99 */
100unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
101{
102 unsigned long idx;
103
104 for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
105 if (addr[idx] != ~0UL)
106 return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
107 }
108
109 return size;
110}
111EXPORT_SYMBOL(find_first_zero_bit);
112#endif
113
114#ifndef find_last_bit
115unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
116{
117 if (size) {
118 unsigned long val = BITMAP_LAST_WORD_MASK(size);
119 unsigned long idx = (size-1) / BITS_PER_LONG;
120
121 do {
122 val &= addr[idx];
123 if (val)
124 return idx * BITS_PER_LONG + __fls(val);
125
126 val = ~0ul;
127 } while (idx--);
128 }
129 return size;
130}
131EXPORT_SYMBOL(find_last_bit);
132#endif
133
134#ifdef __BIG_ENDIAN
135
136/* include/linux/byteorder does not support "unsigned long" type */
137static inline unsigned long ext2_swab(const unsigned long y)
138{
139#if BITS_PER_LONG == 64
140 return (unsigned long) __swab64((u64) y);
141#elif BITS_PER_LONG == 32
142 return (unsigned long) __swab32((u32) y);
143#else
144#error BITS_PER_LONG not defined
145#endif
146}
147
148#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
149static unsigned long _find_next_bit_le(const unsigned long *addr,
150 unsigned long nbits, unsigned long start, unsigned long invert)
151{
152 unsigned long tmp;
153
154 if (!nbits || start >= nbits)
155 return nbits;
156
157 tmp = addr[start / BITS_PER_LONG] ^ invert;
158
159 /* Handle 1st word. */
160 tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
161 start = round_down(start, BITS_PER_LONG);
162
163 while (!tmp) {
164 start += BITS_PER_LONG;
165 if (start >= nbits)
166 return nbits;
167
168 tmp = addr[start / BITS_PER_LONG] ^ invert;
169 }
170
171 return min(start + __ffs(ext2_swab(tmp)), nbits);
172}
173#endif
174
175#ifndef find_next_zero_bit_le
176unsigned long find_next_zero_bit_le(const void *addr, unsigned
177 long size, unsigned long offset)
178{
179 return _find_next_bit_le(addr, size, offset, ~0UL);
180}
181EXPORT_SYMBOL(find_next_zero_bit_le);
182#endif
183
184#ifndef find_next_bit_le
185unsigned long find_next_bit_le(const void *addr, unsigned
186 long size, unsigned long offset)
187{
188 return _find_next_bit_le(addr, size, offset, 0UL);
189}
190EXPORT_SYMBOL(find_next_bit_le);
191#endif
192
193#endif /* __BIG_ENDIAN */
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index 91ca09fbf6f9..3e3be40c6a6e 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -4,6 +4,9 @@
4 * Written by Rusty Russell <rusty@rustcorp.com.au> 4 * Written by Rusty Russell <rusty@rustcorp.com.au>
5 * (Inspired by David Howell's find_next_bit implementation) 5 * (Inspired by David Howell's find_next_bit implementation)
6 * 6 *
7 * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
8 * size and improve performance, 2015.
9 *
7 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 11 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 12 * as published by the Free Software Foundation; either version
@@ -11,37 +14,26 @@
11 */ 14 */
12 15
13#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/bitmap.h>
14#include <linux/export.h> 18#include <linux/export.h>
15#include <asm/types.h> 19#include <linux/kernel.h>
16#include <asm/byteorder.h>
17 20
18#ifndef find_last_bit 21#ifndef find_last_bit
19 22
20unsigned long find_last_bit(const unsigned long *addr, unsigned long size) 23unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
21{ 24{
22 unsigned long words; 25 if (size) {
23 unsigned long tmp; 26 unsigned long val = BITMAP_LAST_WORD_MASK(size);
24 27 unsigned long idx = (size-1) / BITS_PER_LONG;
25 /* Start at final word. */
26 words = size / BITS_PER_LONG;
27 28
28 /* Partial final word? */ 29 do {
29 if (size & (BITS_PER_LONG-1)) { 30 val &= addr[idx];
30 tmp = (addr[words] & (~0UL >> (BITS_PER_LONG 31 if (val)
31 - (size & (BITS_PER_LONG-1))))); 32 return idx * BITS_PER_LONG + __fls(val);
32 if (tmp)
33 goto found;
34 }
35 33
36 while (words) { 34 val = ~0ul;
37 tmp = addr[--words]; 35 } while (idx--);
38 if (tmp) {
39found:
40 return words * BITS_PER_LONG + __fls(tmp);
41 }
42 } 36 }
43
44 /* Not found */
45 return size; 37 return size;
46} 38}
47EXPORT_SYMBOL(find_last_bit); 39EXPORT_SYMBOL(find_last_bit);
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
deleted file mode 100644
index 0cbfc0b4398f..000000000000
--- a/lib/find_next_bit.c
+++ /dev/null
@@ -1,285 +0,0 @@
1/* find_next_bit.c: fallback find next bit implementation
2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/bitops.h>
13#include <linux/export.h>
14#include <asm/types.h>
15#include <asm/byteorder.h>
16
17#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
18
19#ifndef find_next_bit
20/*
21 * Find the next set bit in a memory region.
22 */
23unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
24 unsigned long offset)
25{
26 const unsigned long *p = addr + BITOP_WORD(offset);
27 unsigned long result = offset & ~(BITS_PER_LONG-1);
28 unsigned long tmp;
29
30 if (offset >= size)
31 return size;
32 size -= result;
33 offset %= BITS_PER_LONG;
34 if (offset) {
35 tmp = *(p++);
36 tmp &= (~0UL << offset);
37 if (size < BITS_PER_LONG)
38 goto found_first;
39 if (tmp)
40 goto found_middle;
41 size -= BITS_PER_LONG;
42 result += BITS_PER_LONG;
43 }
44 while (size & ~(BITS_PER_LONG-1)) {
45 if ((tmp = *(p++)))
46 goto found_middle;
47 result += BITS_PER_LONG;
48 size -= BITS_PER_LONG;
49 }
50 if (!size)
51 return result;
52 tmp = *p;
53
54found_first:
55 tmp &= (~0UL >> (BITS_PER_LONG - size));
56 if (tmp == 0UL) /* Are any bits set? */
57 return result + size; /* Nope. */
58found_middle:
59 return result + __ffs(tmp);
60}
61EXPORT_SYMBOL(find_next_bit);
62#endif
63
64#ifndef find_next_zero_bit
65/*
66 * This implementation of find_{first,next}_zero_bit was stolen from
67 * Linus' asm-alpha/bitops.h.
68 */
69unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
70 unsigned long offset)
71{
72 const unsigned long *p = addr + BITOP_WORD(offset);
73 unsigned long result = offset & ~(BITS_PER_LONG-1);
74 unsigned long tmp;
75
76 if (offset >= size)
77 return size;
78 size -= result;
79 offset %= BITS_PER_LONG;
80 if (offset) {
81 tmp = *(p++);
82 tmp |= ~0UL >> (BITS_PER_LONG - offset);
83 if (size < BITS_PER_LONG)
84 goto found_first;
85 if (~tmp)
86 goto found_middle;
87 size -= BITS_PER_LONG;
88 result += BITS_PER_LONG;
89 }
90 while (size & ~(BITS_PER_LONG-1)) {
91 if (~(tmp = *(p++)))
92 goto found_middle;
93 result += BITS_PER_LONG;
94 size -= BITS_PER_LONG;
95 }
96 if (!size)
97 return result;
98 tmp = *p;
99
100found_first:
101 tmp |= ~0UL << size;
102 if (tmp == ~0UL) /* Are any bits zero? */
103 return result + size; /* Nope. */
104found_middle:
105 return result + ffz(tmp);
106}
107EXPORT_SYMBOL(find_next_zero_bit);
108#endif
109
110#ifndef find_first_bit
111/*
112 * Find the first set bit in a memory region.
113 */
114unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
115{
116 const unsigned long *p = addr;
117 unsigned long result = 0;
118 unsigned long tmp;
119
120 while (size & ~(BITS_PER_LONG-1)) {
121 if ((tmp = *(p++)))
122 goto found;
123 result += BITS_PER_LONG;
124 size -= BITS_PER_LONG;
125 }
126 if (!size)
127 return result;
128
129 tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
130 if (tmp == 0UL) /* Are any bits set? */
131 return result + size; /* Nope. */
132found:
133 return result + __ffs(tmp);
134}
135EXPORT_SYMBOL(find_first_bit);
136#endif
137
138#ifndef find_first_zero_bit
139/*
140 * Find the first cleared bit in a memory region.
141 */
142unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
143{
144 const unsigned long *p = addr;
145 unsigned long result = 0;
146 unsigned long tmp;
147
148 while (size & ~(BITS_PER_LONG-1)) {
149 if (~(tmp = *(p++)))
150 goto found;
151 result += BITS_PER_LONG;
152 size -= BITS_PER_LONG;
153 }
154 if (!size)
155 return result;
156
157 tmp = (*p) | (~0UL << size);
158 if (tmp == ~0UL) /* Are any bits zero? */
159 return result + size; /* Nope. */
160found:
161 return result + ffz(tmp);
162}
163EXPORT_SYMBOL(find_first_zero_bit);
164#endif
165
166#ifdef __BIG_ENDIAN
167
168/* include/linux/byteorder does not support "unsigned long" type */
169static inline unsigned long ext2_swabp(const unsigned long * x)
170{
171#if BITS_PER_LONG == 64
172 return (unsigned long) __swab64p((u64 *) x);
173#elif BITS_PER_LONG == 32
174 return (unsigned long) __swab32p((u32 *) x);
175#else
176#error BITS_PER_LONG not defined
177#endif
178}
179
180/* include/linux/byteorder doesn't support "unsigned long" type */
181static inline unsigned long ext2_swab(const unsigned long y)
182{
183#if BITS_PER_LONG == 64
184 return (unsigned long) __swab64((u64) y);
185#elif BITS_PER_LONG == 32
186 return (unsigned long) __swab32((u32) y);
187#else
188#error BITS_PER_LONG not defined
189#endif
190}
191
192#ifndef find_next_zero_bit_le
193unsigned long find_next_zero_bit_le(const void *addr, unsigned
194 long size, unsigned long offset)
195{
196 const unsigned long *p = addr;
197 unsigned long result = offset & ~(BITS_PER_LONG - 1);
198 unsigned long tmp;
199
200 if (offset >= size)
201 return size;
202 p += BITOP_WORD(offset);
203 size -= result;
204 offset &= (BITS_PER_LONG - 1UL);
205 if (offset) {
206 tmp = ext2_swabp(p++);
207 tmp |= (~0UL >> (BITS_PER_LONG - offset));
208 if (size < BITS_PER_LONG)
209 goto found_first;
210 if (~tmp)
211 goto found_middle;
212 size -= BITS_PER_LONG;
213 result += BITS_PER_LONG;
214 }
215
216 while (size & ~(BITS_PER_LONG - 1)) {
217 if (~(tmp = *(p++)))
218 goto found_middle_swap;
219 result += BITS_PER_LONG;
220 size -= BITS_PER_LONG;
221 }
222 if (!size)
223 return result;
224 tmp = ext2_swabp(p);
225found_first:
226 tmp |= ~0UL << size;
227 if (tmp == ~0UL) /* Are any bits zero? */
228 return result + size; /* Nope. Skip ffz */
229found_middle:
230 return result + ffz(tmp);
231
232found_middle_swap:
233 return result + ffz(ext2_swab(tmp));
234}
235EXPORT_SYMBOL(find_next_zero_bit_le);
236#endif
237
238#ifndef find_next_bit_le
239unsigned long find_next_bit_le(const void *addr, unsigned
240 long size, unsigned long offset)
241{
242 const unsigned long *p = addr;
243 unsigned long result = offset & ~(BITS_PER_LONG - 1);
244 unsigned long tmp;
245
246 if (offset >= size)
247 return size;
248 p += BITOP_WORD(offset);
249 size -= result;
250 offset &= (BITS_PER_LONG - 1UL);
251 if (offset) {
252 tmp = ext2_swabp(p++);
253 tmp &= (~0UL << offset);
254 if (size < BITS_PER_LONG)
255 goto found_first;
256 if (tmp)
257 goto found_middle;
258 size -= BITS_PER_LONG;
259 result += BITS_PER_LONG;
260 }
261
262 while (size & ~(BITS_PER_LONG - 1)) {
263 tmp = *(p++);
264 if (tmp)
265 goto found_middle_swap;
266 result += BITS_PER_LONG;
267 size -= BITS_PER_LONG;
268 }
269 if (!size)
270 return result;
271 tmp = ext2_swabp(p);
272found_first:
273 tmp &= (~0UL >> (BITS_PER_LONG - size));
274 if (tmp == 0UL) /* Are any bits set? */
275 return result + size; /* Nope. */
276found_middle:
277 return result + __ffs(tmp);
278
279found_middle_swap:
280 return result + __ffs(ext2_swab(tmp));
281}
282EXPORT_SYMBOL(find_next_bit_le);
283#endif
284
285#endif /* __BIG_ENDIAN */
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
new file mode 100644
index 000000000000..df30632f0bef
--- /dev/null
+++ b/lib/iommu-common.c
@@ -0,0 +1,270 @@
1/*
2 * IOMMU mmap management and range allocation functions.
3 * Based almost entirely upon the powerpc iommu allocator.
4 */
5
6#include <linux/export.h>
7#include <linux/bitmap.h>
8#include <linux/bug.h>
9#include <linux/iommu-helper.h>
10#include <linux/iommu-common.h>
11#include <linux/dma-mapping.h>
12#include <linux/hash.h>
13
14#ifndef DMA_ERROR_CODE
15#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
16#endif
17
18static unsigned long iommu_large_alloc = 15;
19
20static DEFINE_PER_CPU(unsigned int, iommu_hash_common);
21
22static inline bool need_flush(struct iommu_map_table *iommu)
23{
24 return (iommu->lazy_flush != NULL &&
25 (iommu->flags & IOMMU_NEED_FLUSH) != 0);
26}
27
28static inline void set_flush(struct iommu_map_table *iommu)
29{
30 iommu->flags |= IOMMU_NEED_FLUSH;
31}
32
33static inline void clear_flush(struct iommu_map_table *iommu)
34{
35 iommu->flags &= ~IOMMU_NEED_FLUSH;
36}
37
38static void setup_iommu_pool_hash(void)
39{
40 unsigned int i;
41 static bool do_once;
42
43 if (do_once)
44 return;
45 do_once = true;
46 for_each_possible_cpu(i)
47 per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
48}
49
50/*
51 * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
52 * is the number of table entries. If `large_pool' is set to true,
53 * the top 1/4 of the table will be set aside for pool allocations
54 * of more than iommu_large_alloc pages.
55 */
56void iommu_tbl_pool_init(struct iommu_map_table *iommu,
57 unsigned long num_entries,
58 u32 table_shift,
59 void (*lazy_flush)(struct iommu_map_table *),
60 bool large_pool, u32 npools,
61 bool skip_span_boundary_check)
62{
63 unsigned int start, i;
64 struct iommu_pool *p = &(iommu->large_pool);
65
66 setup_iommu_pool_hash();
67 if (npools == 0)
68 iommu->nr_pools = IOMMU_NR_POOLS;
69 else
70 iommu->nr_pools = npools;
71 BUG_ON(npools > IOMMU_NR_POOLS);
72
73 iommu->table_shift = table_shift;
74 iommu->lazy_flush = lazy_flush;
75 start = 0;
76 if (skip_span_boundary_check)
77 iommu->flags |= IOMMU_NO_SPAN_BOUND;
78 if (large_pool)
79 iommu->flags |= IOMMU_HAS_LARGE_POOL;
80
81 if (!large_pool)
82 iommu->poolsize = num_entries/iommu->nr_pools;
83 else
84 iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
85 for (i = 0; i < iommu->nr_pools; i++) {
86 spin_lock_init(&(iommu->pools[i].lock));
87 iommu->pools[i].start = start;
88 iommu->pools[i].hint = start;
89 start += iommu->poolsize; /* start for next pool */
90 iommu->pools[i].end = start - 1;
91 }
92 if (!large_pool)
93 return;
94 /* initialize large_pool */
95 spin_lock_init(&(p->lock));
96 p->start = start;
97 p->hint = p->start;
98 p->end = num_entries;
99}
100EXPORT_SYMBOL(iommu_tbl_pool_init);
101
102unsigned long iommu_tbl_range_alloc(struct device *dev,
103 struct iommu_map_table *iommu,
104 unsigned long npages,
105 unsigned long *handle,
106 unsigned long mask,
107 unsigned int align_order)
108{
109 unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
110 unsigned long n, end, start, limit, boundary_size;
111 struct iommu_pool *pool;
112 int pass = 0;
113 unsigned int pool_nr;
114 unsigned int npools = iommu->nr_pools;
115 unsigned long flags;
116 bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
117 bool largealloc = (large_pool && npages > iommu_large_alloc);
118 unsigned long shift;
119 unsigned long align_mask = 0;
120
121 if (align_order > 0)
122 align_mask = 0xffffffffffffffffl >> (64 - align_order);
123
124 /* Sanity check */
125 if (unlikely(npages == 0)) {
126 WARN_ON_ONCE(1);
127 return DMA_ERROR_CODE;
128 }
129
130 if (largealloc) {
131 pool = &(iommu->large_pool);
132 pool_nr = 0; /* to keep compiler happy */
133 } else {
134 /* pick out pool_nr */
135 pool_nr = pool_hash & (npools - 1);
136 pool = &(iommu->pools[pool_nr]);
137 }
138 spin_lock_irqsave(&pool->lock, flags);
139
140 again:
141 if (pass == 0 && handle && *handle &&
142 (*handle >= pool->start) && (*handle < pool->end))
143 start = *handle;
144 else
145 start = pool->hint;
146
147 limit = pool->end;
148
149 /* The case below can happen if we have a small segment appended
150 * to a large, or when the previous alloc was at the very end of
151 * the available space. If so, go back to the beginning. If a
152 * flush is needed, it will get done based on the return value
153 * from iommu_area_alloc() below.
154 */
155 if (start >= limit)
156 start = pool->start;
157 shift = iommu->table_map_base >> iommu->table_shift;
158 if (limit + shift > mask) {
159 limit = mask - shift + 1;
160 /* If we're constrained on address range, first try
161 * at the masked hint to avoid O(n) search complexity,
162 * but on second pass, start at 0 in pool 0.
163 */
164 if ((start & mask) >= limit || pass > 0) {
165 spin_unlock(&(pool->lock));
166 pool = &(iommu->pools[0]);
167 spin_lock(&(pool->lock));
168 start = pool->start;
169 } else {
170 start &= mask;
171 }
172 }
173
174 if (dev)
175 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
176 1 << iommu->table_shift);
177 else
178 boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
179
180 boundary_size = boundary_size >> iommu->table_shift;
181 /*
182 * if the skip_span_boundary_check had been set during init, we set
183 * things up so that iommu_is_span_boundary() merely checks if the
184 * (index + npages) < num_tsb_entries
185 */
186 if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
187 shift = 0;
188 boundary_size = iommu->poolsize * iommu->nr_pools;
189 }
190 n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
191 boundary_size, align_mask);
192 if (n == -1) {
193 if (likely(pass == 0)) {
194 /* First failure, rescan from the beginning. */
195 pool->hint = pool->start;
196 set_flush(iommu);
197 pass++;
198 goto again;
199 } else if (!largealloc && pass <= iommu->nr_pools) {
200 spin_unlock(&(pool->lock));
201 pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
202 pool = &(iommu->pools[pool_nr]);
203 spin_lock(&(pool->lock));
204 pool->hint = pool->start;
205 set_flush(iommu);
206 pass++;
207 goto again;
208 } else {
209 /* give up */
210 n = DMA_ERROR_CODE;
211 goto bail;
212 }
213 }
214 if (n < pool->hint || need_flush(iommu)) {
215 clear_flush(iommu);
216 iommu->lazy_flush(iommu);
217 }
218
219 end = n + npages;
220 pool->hint = end;
221
222 /* Update handle for SG allocations */
223 if (handle)
224 *handle = end;
225bail:
226 spin_unlock_irqrestore(&(pool->lock), flags);
227
228 return n;
229}
230EXPORT_SYMBOL(iommu_tbl_range_alloc);
231
232static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
233 unsigned long entry)
234{
235 struct iommu_pool *p;
236 unsigned long largepool_start = tbl->large_pool.start;
237 bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
238
239 /* The large pool is the last pool at the top of the table */
240 if (large_pool && entry >= largepool_start) {
241 p = &tbl->large_pool;
242 } else {
243 unsigned int pool_nr = entry / tbl->poolsize;
244
245 BUG_ON(pool_nr >= tbl->nr_pools);
246 p = &tbl->pools[pool_nr];
247 }
248 return p;
249}
250
251/* Caller supplies the index of the entry into the iommu map table
252 * itself when the mapping from dma_addr to the entry is not the
253 * default addr->entry mapping below.
254 */
255void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
256 unsigned long npages, unsigned long entry)
257{
258 struct iommu_pool *pool;
259 unsigned long flags;
260 unsigned long shift = iommu->table_shift;
261
262 if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
263 entry = (dma_addr - iommu->table_map_base) >> shift;
264 pool = get_pool(iommu, entry);
265
266 spin_lock_irqsave(&(pool->lock), flags);
267 bitmap_clear(iommu->map, entry, npages);
268 spin_unlock_irqrestore(&(pool->lock), flags);
269}
270EXPORT_SYMBOL(iommu_tbl_range_free);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 0c9216c48762..86c8911b0e3a 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -13,6 +13,43 @@
13#include <asm/cacheflush.h> 13#include <asm/cacheflush.h>
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15 15
16#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
17static int __read_mostly ioremap_pud_capable;
18static int __read_mostly ioremap_pmd_capable;
19static int __read_mostly ioremap_huge_disabled;
20
21static int __init set_nohugeiomap(char *str)
22{
23 ioremap_huge_disabled = 1;
24 return 0;
25}
26early_param("nohugeiomap", set_nohugeiomap);
27
28void __init ioremap_huge_init(void)
29{
30 if (!ioremap_huge_disabled) {
31 if (arch_ioremap_pud_supported())
32 ioremap_pud_capable = 1;
33 if (arch_ioremap_pmd_supported())
34 ioremap_pmd_capable = 1;
35 }
36}
37
38static inline int ioremap_pud_enabled(void)
39{
40 return ioremap_pud_capable;
41}
42
43static inline int ioremap_pmd_enabled(void)
44{
45 return ioremap_pmd_capable;
46}
47
48#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
49static inline int ioremap_pud_enabled(void) { return 0; }
50static inline int ioremap_pmd_enabled(void) { return 0; }
51#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
52
16static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, 53static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
17 unsigned long end, phys_addr_t phys_addr, pgprot_t prot) 54 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
18{ 55{
@@ -43,6 +80,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
43 return -ENOMEM; 80 return -ENOMEM;
44 do { 81 do {
45 next = pmd_addr_end(addr, end); 82 next = pmd_addr_end(addr, end);
83
84 if (ioremap_pmd_enabled() &&
85 ((next - addr) == PMD_SIZE) &&
86 IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
87 if (pmd_set_huge(pmd, phys_addr + addr, prot))
88 continue;
89 }
90
46 if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) 91 if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
47 return -ENOMEM; 92 return -ENOMEM;
48 } while (pmd++, addr = next, addr != end); 93 } while (pmd++, addr = next, addr != end);
@@ -61,6 +106,14 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
61 return -ENOMEM; 106 return -ENOMEM;
62 do { 107 do {
63 next = pud_addr_end(addr, end); 108 next = pud_addr_end(addr, end);
109
110 if (ioremap_pud_enabled() &&
111 ((next - addr) == PUD_SIZE) &&
112 IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
113 if (pud_set_huge(pud, phys_addr + addr, prot))
114 continue;
115 }
116
64 if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) 117 if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
65 return -ENOMEM; 118 return -ENOMEM;
66 } while (pud++, addr = next, addr != end); 119 } while (pud++, addr = next, addr != end);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
new file mode 100644
index 000000000000..75232ad0a5e7
--- /dev/null
+++ b/lib/iov_iter.c
@@ -0,0 +1,851 @@
1#include <linux/export.h>
2#include <linux/uio.h>
3#include <linux/pagemap.h>
4#include <linux/slab.h>
5#include <linux/vmalloc.h>
6#include <net/checksum.h>
7
8#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
9 size_t left; \
10 size_t wanted = n; \
11 __p = i->iov; \
12 __v.iov_len = min(n, __p->iov_len - skip); \
13 if (likely(__v.iov_len)) { \
14 __v.iov_base = __p->iov_base + skip; \
15 left = (STEP); \
16 __v.iov_len -= left; \
17 skip += __v.iov_len; \
18 n -= __v.iov_len; \
19 } else { \
20 left = 0; \
21 } \
22 while (unlikely(!left && n)) { \
23 __p++; \
24 __v.iov_len = min(n, __p->iov_len); \
25 if (unlikely(!__v.iov_len)) \
26 continue; \
27 __v.iov_base = __p->iov_base; \
28 left = (STEP); \
29 __v.iov_len -= left; \
30 skip = __v.iov_len; \
31 n -= __v.iov_len; \
32 } \
33 n = wanted - n; \
34}
35
36#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
37 size_t wanted = n; \
38 __p = i->kvec; \
39 __v.iov_len = min(n, __p->iov_len - skip); \
40 if (likely(__v.iov_len)) { \
41 __v.iov_base = __p->iov_base + skip; \
42 (void)(STEP); \
43 skip += __v.iov_len; \
44 n -= __v.iov_len; \
45 } \
46 while (unlikely(n)) { \
47 __p++; \
48 __v.iov_len = min(n, __p->iov_len); \
49 if (unlikely(!__v.iov_len)) \
50 continue; \
51 __v.iov_base = __p->iov_base; \
52 (void)(STEP); \
53 skip = __v.iov_len; \
54 n -= __v.iov_len; \
55 } \
56 n = wanted; \
57}
58
59#define iterate_bvec(i, n, __v, __p, skip, STEP) { \
60 size_t wanted = n; \
61 __p = i->bvec; \
62 __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \
63 if (likely(__v.bv_len)) { \
64 __v.bv_page = __p->bv_page; \
65 __v.bv_offset = __p->bv_offset + skip; \
66 (void)(STEP); \
67 skip += __v.bv_len; \
68 n -= __v.bv_len; \
69 } \
70 while (unlikely(n)) { \
71 __p++; \
72 __v.bv_len = min_t(size_t, n, __p->bv_len); \
73 if (unlikely(!__v.bv_len)) \
74 continue; \
75 __v.bv_page = __p->bv_page; \
76 __v.bv_offset = __p->bv_offset; \
77 (void)(STEP); \
78 skip = __v.bv_len; \
79 n -= __v.bv_len; \
80 } \
81 n = wanted; \
82}
83
84#define iterate_all_kinds(i, n, v, I, B, K) { \
85 size_t skip = i->iov_offset; \
86 if (unlikely(i->type & ITER_BVEC)) { \
87 const struct bio_vec *bvec; \
88 struct bio_vec v; \
89 iterate_bvec(i, n, v, bvec, skip, (B)) \
90 } else if (unlikely(i->type & ITER_KVEC)) { \
91 const struct kvec *kvec; \
92 struct kvec v; \
93 iterate_kvec(i, n, v, kvec, skip, (K)) \
94 } else { \
95 const struct iovec *iov; \
96 struct iovec v; \
97 iterate_iovec(i, n, v, iov, skip, (I)) \
98 } \
99}
100
101#define iterate_and_advance(i, n, v, I, B, K) { \
102 size_t skip = i->iov_offset; \
103 if (unlikely(i->type & ITER_BVEC)) { \
104 const struct bio_vec *bvec; \
105 struct bio_vec v; \
106 iterate_bvec(i, n, v, bvec, skip, (B)) \
107 if (skip == bvec->bv_len) { \
108 bvec++; \
109 skip = 0; \
110 } \
111 i->nr_segs -= bvec - i->bvec; \
112 i->bvec = bvec; \
113 } else if (unlikely(i->type & ITER_KVEC)) { \
114 const struct kvec *kvec; \
115 struct kvec v; \
116 iterate_kvec(i, n, v, kvec, skip, (K)) \
117 if (skip == kvec->iov_len) { \
118 kvec++; \
119 skip = 0; \
120 } \
121 i->nr_segs -= kvec - i->kvec; \
122 i->kvec = kvec; \
123 } else { \
124 const struct iovec *iov; \
125 struct iovec v; \
126 iterate_iovec(i, n, v, iov, skip, (I)) \
127 if (skip == iov->iov_len) { \
128 iov++; \
129 skip = 0; \
130 } \
131 i->nr_segs -= iov - i->iov; \
132 i->iov = iov; \
133 } \
134 i->count -= n; \
135 i->iov_offset = skip; \
136}
137
138static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
139 struct iov_iter *i)
140{
141 size_t skip, copy, left, wanted;
142 const struct iovec *iov;
143 char __user *buf;
144 void *kaddr, *from;
145
146 if (unlikely(bytes > i->count))
147 bytes = i->count;
148
149 if (unlikely(!bytes))
150 return 0;
151
152 wanted = bytes;
153 iov = i->iov;
154 skip = i->iov_offset;
155 buf = iov->iov_base + skip;
156 copy = min(bytes, iov->iov_len - skip);
157
158 if (!fault_in_pages_writeable(buf, copy)) {
159 kaddr = kmap_atomic(page);
160 from = kaddr + offset;
161
162 /* first chunk, usually the only one */
163 left = __copy_to_user_inatomic(buf, from, copy);
164 copy -= left;
165 skip += copy;
166 from += copy;
167 bytes -= copy;
168
169 while (unlikely(!left && bytes)) {
170 iov++;
171 buf = iov->iov_base;
172 copy = min(bytes, iov->iov_len);
173 left = __copy_to_user_inatomic(buf, from, copy);
174 copy -= left;
175 skip = copy;
176 from += copy;
177 bytes -= copy;
178 }
179 if (likely(!bytes)) {
180 kunmap_atomic(kaddr);
181 goto done;
182 }
183 offset = from - kaddr;
184 buf += copy;
185 kunmap_atomic(kaddr);
186 copy = min(bytes, iov->iov_len - skip);
187 }
188 /* Too bad - revert to non-atomic kmap */
189 kaddr = kmap(page);
190 from = kaddr + offset;
191 left = __copy_to_user(buf, from, copy);
192 copy -= left;
193 skip += copy;
194 from += copy;
195 bytes -= copy;
196 while (unlikely(!left && bytes)) {
197 iov++;
198 buf = iov->iov_base;
199 copy = min(bytes, iov->iov_len);
200 left = __copy_to_user(buf, from, copy);
201 copy -= left;
202 skip = copy;
203 from += copy;
204 bytes -= copy;
205 }
206 kunmap(page);
207done:
208 if (skip == iov->iov_len) {
209 iov++;
210 skip = 0;
211 }
212 i->count -= wanted - bytes;
213 i->nr_segs -= iov - i->iov;
214 i->iov = iov;
215 i->iov_offset = skip;
216 return wanted - bytes;
217}
218
219static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
220 struct iov_iter *i)
221{
222 size_t skip, copy, left, wanted;
223 const struct iovec *iov;
224 char __user *buf;
225 void *kaddr, *to;
226
227 if (unlikely(bytes > i->count))
228 bytes = i->count;
229
230 if (unlikely(!bytes))
231 return 0;
232
233 wanted = bytes;
234 iov = i->iov;
235 skip = i->iov_offset;
236 buf = iov->iov_base + skip;
237 copy = min(bytes, iov->iov_len - skip);
238
239 if (!fault_in_pages_readable(buf, copy)) {
240 kaddr = kmap_atomic(page);
241 to = kaddr + offset;
242
243 /* first chunk, usually the only one */
244 left = __copy_from_user_inatomic(to, buf, copy);
245 copy -= left;
246 skip += copy;
247 to += copy;
248 bytes -= copy;
249
250 while (unlikely(!left && bytes)) {
251 iov++;
252 buf = iov->iov_base;
253 copy = min(bytes, iov->iov_len);
254 left = __copy_from_user_inatomic(to, buf, copy);
255 copy -= left;
256 skip = copy;
257 to += copy;
258 bytes -= copy;
259 }
260 if (likely(!bytes)) {
261 kunmap_atomic(kaddr);
262 goto done;
263 }
264 offset = to - kaddr;
265 buf += copy;
266 kunmap_atomic(kaddr);
267 copy = min(bytes, iov->iov_len - skip);
268 }
269 /* Too bad - revert to non-atomic kmap */
270 kaddr = kmap(page);
271 to = kaddr + offset;
272 left = __copy_from_user(to, buf, copy);
273 copy -= left;
274 skip += copy;
275 to += copy;
276 bytes -= copy;
277 while (unlikely(!left && bytes)) {
278 iov++;
279 buf = iov->iov_base;
280 copy = min(bytes, iov->iov_len);
281 left = __copy_from_user(to, buf, copy);
282 copy -= left;
283 skip = copy;
284 to += copy;
285 bytes -= copy;
286 }
287 kunmap(page);
288done:
289 if (skip == iov->iov_len) {
290 iov++;
291 skip = 0;
292 }
293 i->count -= wanted - bytes;
294 i->nr_segs -= iov - i->iov;
295 i->iov = iov;
296 i->iov_offset = skip;
297 return wanted - bytes;
298}
299
300/*
301 * Fault in the first iovec of the given iov_iter, to a maximum length
302 * of bytes. Returns 0 on success, or non-zero if the memory could not be
303 * accessed (ie. because it is an invalid address).
304 *
305 * writev-intensive code may want this to prefault several iovecs -- that
306 * would be possible (callers must not rely on the fact that _only_ the
307 * first iovec will be faulted with the current implementation).
308 */
309int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
310{
311 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
312 char __user *buf = i->iov->iov_base + i->iov_offset;
313 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
314 return fault_in_pages_readable(buf, bytes);
315 }
316 return 0;
317}
318EXPORT_SYMBOL(iov_iter_fault_in_readable);
319
320/*
321 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
322 * bytes. For each iovec, fault in each page that constitutes the iovec.
323 *
324 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
325 * because it is an invalid address).
326 */
327int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
328{
329 size_t skip = i->iov_offset;
330 const struct iovec *iov;
331 int err;
332 struct iovec v;
333
334 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
335 iterate_iovec(i, bytes, v, iov, skip, ({
336 err = fault_in_multipages_readable(v.iov_base,
337 v.iov_len);
338 if (unlikely(err))
339 return err;
340 0;}))
341 }
342 return 0;
343}
344EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
345
346void iov_iter_init(struct iov_iter *i, int direction,
347 const struct iovec *iov, unsigned long nr_segs,
348 size_t count)
349{
350 /* It will get better. Eventually... */
351 if (segment_eq(get_fs(), KERNEL_DS)) {
352 direction |= ITER_KVEC;
353 i->type = direction;
354 i->kvec = (struct kvec *)iov;
355 } else {
356 i->type = direction;
357 i->iov = iov;
358 }
359 i->nr_segs = nr_segs;
360 i->iov_offset = 0;
361 i->count = count;
362}
363EXPORT_SYMBOL(iov_iter_init);
364
365static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
366{
367 char *from = kmap_atomic(page);
368 memcpy(to, from + offset, len);
369 kunmap_atomic(from);
370}
371
372static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
373{
374 char *to = kmap_atomic(page);
375 memcpy(to + offset, from, len);
376 kunmap_atomic(to);
377}
378
379static void memzero_page(struct page *page, size_t offset, size_t len)
380{
381 char *addr = kmap_atomic(page);
382 memset(addr + offset, 0, len);
383 kunmap_atomic(addr);
384}
385
386size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
387{
388 char *from = addr;
389 if (unlikely(bytes > i->count))
390 bytes = i->count;
391
392 if (unlikely(!bytes))
393 return 0;
394
395 iterate_and_advance(i, bytes, v,
396 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
397 v.iov_len),
398 memcpy_to_page(v.bv_page, v.bv_offset,
399 (from += v.bv_len) - v.bv_len, v.bv_len),
400 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
401 )
402
403 return bytes;
404}
405EXPORT_SYMBOL(copy_to_iter);
406
407size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
408{
409 char *to = addr;
410 if (unlikely(bytes > i->count))
411 bytes = i->count;
412
413 if (unlikely(!bytes))
414 return 0;
415
416 iterate_and_advance(i, bytes, v,
417 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
418 v.iov_len),
419 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
420 v.bv_offset, v.bv_len),
421 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
422 )
423
424 return bytes;
425}
426EXPORT_SYMBOL(copy_from_iter);
427
428size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
429{
430 char *to = addr;
431 if (unlikely(bytes > i->count))
432 bytes = i->count;
433
434 if (unlikely(!bytes))
435 return 0;
436
437 iterate_and_advance(i, bytes, v,
438 __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
439 v.iov_base, v.iov_len),
440 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
441 v.bv_offset, v.bv_len),
442 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
443 )
444
445 return bytes;
446}
447EXPORT_SYMBOL(copy_from_iter_nocache);
448
449size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
450 struct iov_iter *i)
451{
452 if (i->type & (ITER_BVEC|ITER_KVEC)) {
453 void *kaddr = kmap_atomic(page);
454 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
455 kunmap_atomic(kaddr);
456 return wanted;
457 } else
458 return copy_page_to_iter_iovec(page, offset, bytes, i);
459}
460EXPORT_SYMBOL(copy_page_to_iter);
461
462size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
463 struct iov_iter *i)
464{
465 if (i->type & (ITER_BVEC|ITER_KVEC)) {
466 void *kaddr = kmap_atomic(page);
467 size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
468 kunmap_atomic(kaddr);
469 return wanted;
470 } else
471 return copy_page_from_iter_iovec(page, offset, bytes, i);
472}
473EXPORT_SYMBOL(copy_page_from_iter);
474
475size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
476{
477 if (unlikely(bytes > i->count))
478 bytes = i->count;
479
480 if (unlikely(!bytes))
481 return 0;
482
483 iterate_and_advance(i, bytes, v,
484 __clear_user(v.iov_base, v.iov_len),
485 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
486 memset(v.iov_base, 0, v.iov_len)
487 )
488
489 return bytes;
490}
491EXPORT_SYMBOL(iov_iter_zero);
492
493size_t iov_iter_copy_from_user_atomic(struct page *page,
494 struct iov_iter *i, unsigned long offset, size_t bytes)
495{
496 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
497 iterate_all_kinds(i, bytes, v,
498 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
499 v.iov_base, v.iov_len),
500 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
501 v.bv_offset, v.bv_len),
502 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
503 )
504 kunmap_atomic(kaddr);
505 return bytes;
506}
507EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
508
509void iov_iter_advance(struct iov_iter *i, size_t size)
510{
511 iterate_and_advance(i, size, v, 0, 0, 0)
512}
513EXPORT_SYMBOL(iov_iter_advance);
514
515/*
516 * Return the count of just the current iov_iter segment.
517 */
518size_t iov_iter_single_seg_count(const struct iov_iter *i)
519{
520 if (i->nr_segs == 1)
521 return i->count;
522 else if (i->type & ITER_BVEC)
523 return min(i->count, i->bvec->bv_len - i->iov_offset);
524 else
525 return min(i->count, i->iov->iov_len - i->iov_offset);
526}
527EXPORT_SYMBOL(iov_iter_single_seg_count);
528
529void iov_iter_kvec(struct iov_iter *i, int direction,
530 const struct kvec *kvec, unsigned long nr_segs,
531 size_t count)
532{
533 BUG_ON(!(direction & ITER_KVEC));
534 i->type = direction;
535 i->kvec = kvec;
536 i->nr_segs = nr_segs;
537 i->iov_offset = 0;
538 i->count = count;
539}
540EXPORT_SYMBOL(iov_iter_kvec);
541
542void iov_iter_bvec(struct iov_iter *i, int direction,
543 const struct bio_vec *bvec, unsigned long nr_segs,
544 size_t count)
545{
546 BUG_ON(!(direction & ITER_BVEC));
547 i->type = direction;
548 i->bvec = bvec;
549 i->nr_segs = nr_segs;
550 i->iov_offset = 0;
551 i->count = count;
552}
553EXPORT_SYMBOL(iov_iter_bvec);
554
555unsigned long iov_iter_alignment(const struct iov_iter *i)
556{
557 unsigned long res = 0;
558 size_t size = i->count;
559
560 if (!size)
561 return 0;
562
563 iterate_all_kinds(i, size, v,
564 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
565 res |= v.bv_offset | v.bv_len,
566 res |= (unsigned long)v.iov_base | v.iov_len
567 )
568 return res;
569}
570EXPORT_SYMBOL(iov_iter_alignment);
571
572ssize_t iov_iter_get_pages(struct iov_iter *i,
573 struct page **pages, size_t maxsize, unsigned maxpages,
574 size_t *start)
575{
576 if (maxsize > i->count)
577 maxsize = i->count;
578
579 if (!maxsize)
580 return 0;
581
582 iterate_all_kinds(i, maxsize, v, ({
583 unsigned long addr = (unsigned long)v.iov_base;
584 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
585 int n;
586 int res;
587
588 if (len > maxpages * PAGE_SIZE)
589 len = maxpages * PAGE_SIZE;
590 addr &= ~(PAGE_SIZE - 1);
591 n = DIV_ROUND_UP(len, PAGE_SIZE);
592 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
593 if (unlikely(res < 0))
594 return res;
595 return (res == n ? len : res * PAGE_SIZE) - *start;
596 0;}),({
597 /* can't be more than PAGE_SIZE */
598 *start = v.bv_offset;
599 get_page(*pages = v.bv_page);
600 return v.bv_len;
601 }),({
602 return -EFAULT;
603 })
604 )
605 return 0;
606}
607EXPORT_SYMBOL(iov_iter_get_pages);
608
609static struct page **get_pages_array(size_t n)
610{
611 struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
612 if (!p)
613 p = vmalloc(n * sizeof(struct page *));
614 return p;
615}
616
617ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
618 struct page ***pages, size_t maxsize,
619 size_t *start)
620{
621 struct page **p;
622
623 if (maxsize > i->count)
624 maxsize = i->count;
625
626 if (!maxsize)
627 return 0;
628
629 iterate_all_kinds(i, maxsize, v, ({
630 unsigned long addr = (unsigned long)v.iov_base;
631 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
632 int n;
633 int res;
634
635 addr &= ~(PAGE_SIZE - 1);
636 n = DIV_ROUND_UP(len, PAGE_SIZE);
637 p = get_pages_array(n);
638 if (!p)
639 return -ENOMEM;
640 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
641 if (unlikely(res < 0)) {
642 kvfree(p);
643 return res;
644 }
645 *pages = p;
646 return (res == n ? len : res * PAGE_SIZE) - *start;
647 0;}),({
648 /* can't be more than PAGE_SIZE */
649 *start = v.bv_offset;
650 *pages = p = get_pages_array(1);
651 if (!p)
652 return -ENOMEM;
653 get_page(*p = v.bv_page);
654 return v.bv_len;
655 }),({
656 return -EFAULT;
657 })
658 )
659 return 0;
660}
661EXPORT_SYMBOL(iov_iter_get_pages_alloc);
662
663size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
664 struct iov_iter *i)
665{
666 char *to = addr;
667 __wsum sum, next;
668 size_t off = 0;
669 if (unlikely(bytes > i->count))
670 bytes = i->count;
671
672 if (unlikely(!bytes))
673 return 0;
674
675 sum = *csum;
676 iterate_and_advance(i, bytes, v, ({
677 int err = 0;
678 next = csum_and_copy_from_user(v.iov_base,
679 (to += v.iov_len) - v.iov_len,
680 v.iov_len, 0, &err);
681 if (!err) {
682 sum = csum_block_add(sum, next, off);
683 off += v.iov_len;
684 }
685 err ? v.iov_len : 0;
686 }), ({
687 char *p = kmap_atomic(v.bv_page);
688 next = csum_partial_copy_nocheck(p + v.bv_offset,
689 (to += v.bv_len) - v.bv_len,
690 v.bv_len, 0);
691 kunmap_atomic(p);
692 sum = csum_block_add(sum, next, off);
693 off += v.bv_len;
694 }),({
695 next = csum_partial_copy_nocheck(v.iov_base,
696 (to += v.iov_len) - v.iov_len,
697 v.iov_len, 0);
698 sum = csum_block_add(sum, next, off);
699 off += v.iov_len;
700 })
701 )
702 *csum = sum;
703 return bytes;
704}
705EXPORT_SYMBOL(csum_and_copy_from_iter);
706
707size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum,
708 struct iov_iter *i)
709{
710 char *from = addr;
711 __wsum sum, next;
712 size_t off = 0;
713 if (unlikely(bytes > i->count))
714 bytes = i->count;
715
716 if (unlikely(!bytes))
717 return 0;
718
719 sum = *csum;
720 iterate_and_advance(i, bytes, v, ({
721 int err = 0;
722 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
723 v.iov_base,
724 v.iov_len, 0, &err);
725 if (!err) {
726 sum = csum_block_add(sum, next, off);
727 off += v.iov_len;
728 }
729 err ? v.iov_len : 0;
730 }), ({
731 char *p = kmap_atomic(v.bv_page);
732 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
733 p + v.bv_offset,
734 v.bv_len, 0);
735 kunmap_atomic(p);
736 sum = csum_block_add(sum, next, off);
737 off += v.bv_len;
738 }),({
739 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
740 v.iov_base,
741 v.iov_len, 0);
742 sum = csum_block_add(sum, next, off);
743 off += v.iov_len;
744 })
745 )
746 *csum = sum;
747 return bytes;
748}
749EXPORT_SYMBOL(csum_and_copy_to_iter);
750
751int iov_iter_npages(const struct iov_iter *i, int maxpages)
752{
753 size_t size = i->count;
754 int npages = 0;
755
756 if (!size)
757 return 0;
758
759 iterate_all_kinds(i, size, v, ({
760 unsigned long p = (unsigned long)v.iov_base;
761 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
762 - p / PAGE_SIZE;
763 if (npages >= maxpages)
764 return maxpages;
765 0;}),({
766 npages++;
767 if (npages >= maxpages)
768 return maxpages;
769 }),({
770 unsigned long p = (unsigned long)v.iov_base;
771 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
772 - p / PAGE_SIZE;
773 if (npages >= maxpages)
774 return maxpages;
775 })
776 )
777 return npages;
778}
779EXPORT_SYMBOL(iov_iter_npages);
780
781const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
782{
783 *new = *old;
784 if (new->type & ITER_BVEC)
785 return new->bvec = kmemdup(new->bvec,
786 new->nr_segs * sizeof(struct bio_vec),
787 flags);
788 else
789 /* iovec and kvec have identical layout */
790 return new->iov = kmemdup(new->iov,
791 new->nr_segs * sizeof(struct iovec),
792 flags);
793}
794EXPORT_SYMBOL(dup_iter);
795
796int import_iovec(int type, const struct iovec __user * uvector,
797 unsigned nr_segs, unsigned fast_segs,
798 struct iovec **iov, struct iov_iter *i)
799{
800 ssize_t n;
801 struct iovec *p;
802 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
803 *iov, &p);
804 if (n < 0) {
805 if (p != *iov)
806 kfree(p);
807 *iov = NULL;
808 return n;
809 }
810 iov_iter_init(i, type, p, nr_segs, n);
811 *iov = p == *iov ? NULL : p;
812 return 0;
813}
814EXPORT_SYMBOL(import_iovec);
815
816#ifdef CONFIG_COMPAT
817#include <linux/compat.h>
818
819int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
820 unsigned nr_segs, unsigned fast_segs,
821 struct iovec **iov, struct iov_iter *i)
822{
823 ssize_t n;
824 struct iovec *p;
825 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
826 *iov, &p);
827 if (n < 0) {
828 if (p != *iov)
829 kfree(p);
830 *iov = NULL;
831 return n;
832 }
833 iov_iter_init(i, type, p, nr_segs, n);
834 *iov = p == *iov ? NULL : p;
835 return 0;
836}
837#endif
838
839int import_single_range(int rw, void __user *buf, size_t len,
840 struct iovec *iov, struct iov_iter *i)
841{
842 if (len > MAX_RW_COUNT)
843 len = MAX_RW_COUNT;
844 if (unlikely(!access_ok(!rw, buf, len)))
845 return -EFAULT;
846
847 iov->iov_base = buf;
848 iov->iov_len = len;
849 iov_iter_init(i, rw, iov, 1, len);
850 return 0;
851}
diff --git a/lib/kobject.c b/lib/kobject.c
index 03d4ab349fa7..3b841b97fccd 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -576,8 +576,13 @@ void kobject_del(struct kobject *kobj)
576 */ 576 */
577struct kobject *kobject_get(struct kobject *kobj) 577struct kobject *kobject_get(struct kobject *kobj)
578{ 578{
579 if (kobj) 579 if (kobj) {
580 if (!kobj->state_initialized)
581 WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
582 "initialized, yet kobject_get() is being "
583 "called.\n", kobject_name(kobj), kobj);
580 kref_get(&kobj->kref); 584 kref_get(&kobj->kref);
585 }
581 return kobj; 586 return kobj;
582} 587}
583 588
diff --git a/lib/lcm.c b/lib/lcm.c
index e97dbd51e756..03d7fcb420b5 100644
--- a/lib/lcm.c
+++ b/lib/lcm.c
@@ -12,3 +12,14 @@ unsigned long lcm(unsigned long a, unsigned long b)
12 return 0; 12 return 0;
13} 13}
14EXPORT_SYMBOL_GPL(lcm); 14EXPORT_SYMBOL_GPL(lcm);
15
16unsigned long lcm_not_zero(unsigned long a, unsigned long b)
17{
18 unsigned long l = lcm(a, b);
19
20 if (l)
21 return l;
22
23 return (b ? : a);
24}
25EXPORT_SYMBOL_GPL(lcm_not_zero);
diff --git a/lib/lockref.c b/lib/lockref.c
index ecb9a665ec19..494994bf17c8 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -18,7 +18,7 @@
18#define CMPXCHG_LOOP(CODE, SUCCESS) do { \ 18#define CMPXCHG_LOOP(CODE, SUCCESS) do { \
19 struct lockref old; \ 19 struct lockref old; \
20 BUILD_BUG_ON(sizeof(old) != 8); \ 20 BUILD_BUG_ON(sizeof(old) != 8); \
21 old.lock_count = ACCESS_ONCE(lockref->lock_count); \ 21 old.lock_count = READ_ONCE(lockref->lock_count); \
22 while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ 22 while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \
23 struct lockref new = old, prev = old; \ 23 struct lockref new = old, prev = old; \
24 CODE \ 24 CODE \
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 852c81e3ba9a..028f5d996eef 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -247,10 +247,11 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
247 * progress) and "changed", when this in fact lead to an successful 247 * progress) and "changed", when this in fact lead to an successful
248 * update of the cache. 248 * update of the cache.
249 */ 249 */
250 return seq_printf(seq, "\t%s: used:%u/%u " 250 seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
251 "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", 251 lc->name, lc->used, lc->nr_elements,
252 lc->name, lc->used, lc->nr_elements, 252 lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
253 lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); 253
254 return 0;
254} 255}
255 256
256static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) 257static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 7a85967060a5..26cc6029b280 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -47,6 +47,11 @@
47 47
48#include "lz4defs.h" 48#include "lz4defs.h"
49 49
50static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
51#if LZ4_ARCH64
52static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
53#endif
54
50static int lz4_uncompress(const char *source, char *dest, int osize) 55static int lz4_uncompress(const char *source, char *dest, int osize)
51{ 56{
52 const BYTE *ip = (const BYTE *) source; 57 const BYTE *ip = (const BYTE *) source;
@@ -56,10 +61,6 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
56 BYTE *cpy; 61 BYTE *cpy;
57 unsigned token; 62 unsigned token;
58 size_t length; 63 size_t length;
59 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
60#if LZ4_ARCH64
61 size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
62#endif
63 64
64 while (1) { 65 while (1) {
65 66
@@ -116,7 +117,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
116 /* copy repeated sequence */ 117 /* copy repeated sequence */
117 if (unlikely((op - ref) < STEPSIZE)) { 118 if (unlikely((op - ref) < STEPSIZE)) {
118#if LZ4_ARCH64 119#if LZ4_ARCH64
119 size_t dec64 = dec64table[op - ref]; 120 int dec64 = dec64table[op - ref];
120#else 121#else
121 const int dec64 = 0; 122 const int dec64 = 0;
122#endif 123#endif
@@ -139,6 +140,9 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
139 /* Error: request to write beyond destination buffer */ 140 /* Error: request to write beyond destination buffer */
140 if (cpy > oend) 141 if (cpy > oend)
141 goto _output_error; 142 goto _output_error;
143 if ((ref + COPYLENGTH) > oend ||
144 (op + COPYLENGTH) > oend)
145 goto _output_error;
142 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 146 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
143 while (op < cpy) 147 while (op < cpy)
144 *op++ = *ref++; 148 *op++ = *ref++;
@@ -174,11 +178,6 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
174 BYTE * const oend = op + maxoutputsize; 178 BYTE * const oend = op + maxoutputsize;
175 BYTE *cpy; 179 BYTE *cpy;
176 180
177 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
178#if LZ4_ARCH64
179 size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
180#endif
181
182 /* Main Loop */ 181 /* Main Loop */
183 while (ip < iend) { 182 while (ip < iend) {
184 183
@@ -246,7 +245,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
246 /* copy repeated sequence */ 245 /* copy repeated sequence */
247 if (unlikely((op - ref) < STEPSIZE)) { 246 if (unlikely((op - ref) < STEPSIZE)) {
248#if LZ4_ARCH64 247#if LZ4_ARCH64
249 size_t dec64 = dec64table[op - ref]; 248 int dec64 = dec64table[op - ref];
250#else 249#else
251 const int dec64 = 0; 250 const int dec64 = 0;
252#endif 251#endif
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 76a1b59523ab..f5907d23272d 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -279,6 +279,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count)
279 int minlen = min_t(int, count, nla_len(src)); 279 int minlen = min_t(int, count, nla_len(src));
280 280
281 memcpy(dest, nla_data(src), minlen); 281 memcpy(dest, nla_data(src), minlen);
282 if (count > minlen)
283 memset(dest + minlen, 0, count - minlen);
282 284
283 return minlen; 285 return minlen;
284} 286}
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index dbef2314901e..975c6e0434bd 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
131static inline const struct raid6_calls *raid6_choose_gen( 131static inline const struct raid6_calls *raid6_choose_gen(
132 void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) 132 void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
133{ 133{
134 unsigned long perf, bestperf, j0, j1; 134 unsigned long perf, bestgenperf, bestxorperf, j0, j1;
135 int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
135 const struct raid6_calls *const *algo; 136 const struct raid6_calls *const *algo;
136 const struct raid6_calls *best; 137 const struct raid6_calls *best;
137 138
138 for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { 139 for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
139 if (!best || (*algo)->prefer >= best->prefer) { 140 if (!best || (*algo)->prefer >= best->prefer) {
140 if ((*algo)->valid && !(*algo)->valid()) 141 if ((*algo)->valid && !(*algo)->valid())
141 continue; 142 continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
153 } 154 }
154 preempt_enable(); 155 preempt_enable();
155 156
156 if (perf > bestperf) { 157 if (perf > bestgenperf) {
157 bestperf = perf; 158 bestgenperf = perf;
158 best = *algo; 159 best = *algo;
159 } 160 }
160 pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, 161 pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
161 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); 162 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
163
164 if (!(*algo)->xor_syndrome)
165 continue;
166
167 perf = 0;
168
169 preempt_disable();
170 j0 = jiffies;
171 while ((j1 = jiffies) == j0)
172 cpu_relax();
173 while (time_before(jiffies,
174 j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
175 (*algo)->xor_syndrome(disks, start, stop,
176 PAGE_SIZE, *dptrs);
177 perf++;
178 }
179 preempt_enable();
180
181 if (best == *algo)
182 bestxorperf = perf;
183
184 pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
185 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
162 } 186 }
163 } 187 }
164 188
165 if (best) { 189 if (best) {
166 pr_info("raid6: using algorithm %s (%ld MB/s)\n", 190 pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
167 best->name, 191 best->name,
168 (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); 192 (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
193 if (best->xor_syndrome)
194 pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
195 (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
169 raid6_call = *best; 196 raid6_call = *best;
170 } else 197 } else
171 pr_err("raid6: Yikes! No algorithm found!\n"); 198 pr_err("raid6: Yikes! No algorithm found!\n");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 7cc12b532e95..bec27fce7501 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
119 119
120const struct raid6_calls raid6_altivec$# = { 120const struct raid6_calls raid6_altivec$# = {
121 raid6_altivec$#_gen_syndrome, 121 raid6_altivec$#_gen_syndrome,
122 NULL, /* XOR not yet implemented */
122 raid6_have_altivec, 123 raid6_have_altivec,
123 "altivecx$#", 124 "altivecx$#",
124 0 125 0
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
index bc3b1dd436eb..76734004358d 100644
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
89 89
90const struct raid6_calls raid6_avx2x1 = { 90const struct raid6_calls raid6_avx2x1 = {
91 raid6_avx21_gen_syndrome, 91 raid6_avx21_gen_syndrome,
92 NULL, /* XOR not yet implemented */
92 raid6_have_avx2, 93 raid6_have_avx2,
93 "avx2x1", 94 "avx2x1",
94 1 /* Has cache hints */ 95 1 /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
150 151
151const struct raid6_calls raid6_avx2x2 = { 152const struct raid6_calls raid6_avx2x2 = {
152 raid6_avx22_gen_syndrome, 153 raid6_avx22_gen_syndrome,
154 NULL, /* XOR not yet implemented */
153 raid6_have_avx2, 155 raid6_have_avx2,
154 "avx2x2", 156 "avx2x2",
155 1 /* Has cache hints */ 157 1 /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
242 244
243const struct raid6_calls raid6_avx2x4 = { 245const struct raid6_calls raid6_avx2x4 = {
244 raid6_avx24_gen_syndrome, 246 raid6_avx24_gen_syndrome,
247 NULL, /* XOR not yet implemented */
245 raid6_have_avx2, 248 raid6_have_avx2,
246 "avx2x4", 249 "avx2x4",
247 1 /* Has cache hints */ 250 1 /* Has cache hints */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
index 5b50f8dfc5d2..558aeac9342a 100644
--- a/lib/raid6/int.uc
+++ b/lib/raid6/int.uc
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
107 } 107 }
108} 108}
109 109
110static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
111 size_t bytes, void **ptrs)
112{
113 u8 **dptr = (u8 **)ptrs;
114 u8 *p, *q;
115 int d, z, z0;
116
117 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
118
119 z0 = stop; /* P/Q right side optimization */
120 p = dptr[disks-2]; /* XOR parity */
121 q = dptr[disks-1]; /* RS syndrome */
122
123 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
124 /* P/Q data pages */
125 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
126 for ( z = z0-1 ; z >= start ; z-- ) {
127 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
128 wp$$ ^= wd$$;
129 w2$$ = MASK(wq$$);
130 w1$$ = SHLBYTE(wq$$);
131 w2$$ &= NBYTES(0x1d);
132 w1$$ ^= w2$$;
133 wq$$ = w1$$ ^ wd$$;
134 }
135 /* P/Q left side optimization */
136 for ( z = start-1 ; z >= 0 ; z-- ) {
137 w2$$ = MASK(wq$$);
138 w1$$ = SHLBYTE(wq$$);
139 w2$$ &= NBYTES(0x1d);
140 wq$$ = w1$$ ^ w2$$;
141 }
142 *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
143 *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
144 }
145
146}
147
110const struct raid6_calls raid6_intx$# = { 148const struct raid6_calls raid6_intx$# = {
111 raid6_int$#_gen_syndrome, 149 raid6_int$#_gen_syndrome,
112 NULL, /* always valid */ 150 raid6_int$#_xor_syndrome,
151 NULL, /* always valid */
113 "int" NSTRING "x$#", 152 "int" NSTRING "x$#",
114 0 153 0
115}; 154};
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 590c71c9e200..b3b0e1fcd3af 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
76 76
77const struct raid6_calls raid6_mmxx1 = { 77const struct raid6_calls raid6_mmxx1 = {
78 raid6_mmx1_gen_syndrome, 78 raid6_mmx1_gen_syndrome,
79 NULL, /* XOR not yet implemented */
79 raid6_have_mmx, 80 raid6_have_mmx,
80 "mmxx1", 81 "mmxx1",
81 0 82 0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
134 135
135const struct raid6_calls raid6_mmxx2 = { 136const struct raid6_calls raid6_mmxx2 = {
136 raid6_mmx2_gen_syndrome, 137 raid6_mmx2_gen_syndrome,
138 NULL, /* XOR not yet implemented */
137 raid6_have_mmx, 139 raid6_have_mmx,
138 "mmxx2", 140 "mmxx2",
139 0 141 0
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index 36ad4705df1a..d9ad6ee284f4 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -42,6 +42,7 @@
42 } \ 42 } \
43 struct raid6_calls const raid6_neonx ## _n = { \ 43 struct raid6_calls const raid6_neonx ## _n = { \
44 raid6_neon ## _n ## _gen_syndrome, \ 44 raid6_neon ## _n ## _gen_syndrome, \
45 NULL, /* XOR not yet implemented */ \
45 raid6_have_neon, \ 46 raid6_have_neon, \
46 "neonx" #_n, \ 47 "neonx" #_n, \
47 0 \ 48 0 \
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index f76297139445..9025b8ca9aa3 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
92 92
93const struct raid6_calls raid6_sse1x1 = { 93const struct raid6_calls raid6_sse1x1 = {
94 raid6_sse11_gen_syndrome, 94 raid6_sse11_gen_syndrome,
95 NULL, /* XOR not yet implemented */
95 raid6_have_sse1_or_mmxext, 96 raid6_have_sse1_or_mmxext,
96 "sse1x1", 97 "sse1x1",
97 1 /* Has cache hints */ 98 1 /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
154 155
155const struct raid6_calls raid6_sse1x2 = { 156const struct raid6_calls raid6_sse1x2 = {
156 raid6_sse12_gen_syndrome, 157 raid6_sse12_gen_syndrome,
158 NULL, /* XOR not yet implemented */
157 raid6_have_sse1_or_mmxext, 159 raid6_have_sse1_or_mmxext,
158 "sse1x2", 160 "sse1x2",
159 1 /* Has cache hints */ 161 1 /* Has cache hints */
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 85b82c85f28e..1d2276b007ee 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
88 kernel_fpu_end(); 88 kernel_fpu_end();
89} 89}
90 90
91
92static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
93 size_t bytes, void **ptrs)
94 {
95 u8 **dptr = (u8 **)ptrs;
96 u8 *p, *q;
97 int d, z, z0;
98
99 z0 = stop; /* P/Q right side optimization */
100 p = dptr[disks-2]; /* XOR parity */
101 q = dptr[disks-1]; /* RS syndrome */
102
103 kernel_fpu_begin();
104
105 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
106
107 for ( d = 0 ; d < bytes ; d += 16 ) {
108 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
109 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
110 asm volatile("pxor %xmm4,%xmm2");
111 /* P/Q data pages */
112 for ( z = z0-1 ; z >= start ; z-- ) {
113 asm volatile("pxor %xmm5,%xmm5");
114 asm volatile("pcmpgtb %xmm4,%xmm5");
115 asm volatile("paddb %xmm4,%xmm4");
116 asm volatile("pand %xmm0,%xmm5");
117 asm volatile("pxor %xmm5,%xmm4");
118 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
119 asm volatile("pxor %xmm5,%xmm2");
120 asm volatile("pxor %xmm5,%xmm4");
121 }
122 /* P/Q left side optimization */
123 for ( z = start-1 ; z >= 0 ; z-- ) {
124 asm volatile("pxor %xmm5,%xmm5");
125 asm volatile("pcmpgtb %xmm4,%xmm5");
126 asm volatile("paddb %xmm4,%xmm4");
127 asm volatile("pand %xmm0,%xmm5");
128 asm volatile("pxor %xmm5,%xmm4");
129 }
130 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
131 /* Don't use movntdq for r/w memory area < cache line */
132 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
133 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
134 }
135
136 asm volatile("sfence" : : : "memory");
137 kernel_fpu_end();
138}
139
91const struct raid6_calls raid6_sse2x1 = { 140const struct raid6_calls raid6_sse2x1 = {
92 raid6_sse21_gen_syndrome, 141 raid6_sse21_gen_syndrome,
142 raid6_sse21_xor_syndrome,
93 raid6_have_sse2, 143 raid6_have_sse2,
94 "sse2x1", 144 "sse2x1",
95 1 /* Has cache hints */ 145 1 /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
150 kernel_fpu_end(); 200 kernel_fpu_end();
151} 201}
152 202
203 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
204 size_t bytes, void **ptrs)
205 {
206 u8 **dptr = (u8 **)ptrs;
207 u8 *p, *q;
208 int d, z, z0;
209
210 z0 = stop; /* P/Q right side optimization */
211 p = dptr[disks-2]; /* XOR parity */
212 q = dptr[disks-1]; /* RS syndrome */
213
214 kernel_fpu_begin();
215
216 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
217
218 for ( d = 0 ; d < bytes ; d += 32 ) {
219 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
220 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
221 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
222 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
223 asm volatile("pxor %xmm4,%xmm2");
224 asm volatile("pxor %xmm6,%xmm3");
225 /* P/Q data pages */
226 for ( z = z0-1 ; z >= start ; z-- ) {
227 asm volatile("pxor %xmm5,%xmm5");
228 asm volatile("pxor %xmm7,%xmm7");
229 asm volatile("pcmpgtb %xmm4,%xmm5");
230 asm volatile("pcmpgtb %xmm6,%xmm7");
231 asm volatile("paddb %xmm4,%xmm4");
232 asm volatile("paddb %xmm6,%xmm6");
233 asm volatile("pand %xmm0,%xmm5");
234 asm volatile("pand %xmm0,%xmm7");
235 asm volatile("pxor %xmm5,%xmm4");
236 asm volatile("pxor %xmm7,%xmm6");
237 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
238 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
239 asm volatile("pxor %xmm5,%xmm2");
240 asm volatile("pxor %xmm7,%xmm3");
241 asm volatile("pxor %xmm5,%xmm4");
242 asm volatile("pxor %xmm7,%xmm6");
243 }
244 /* P/Q left side optimization */
245 for ( z = start-1 ; z >= 0 ; z-- ) {
246 asm volatile("pxor %xmm5,%xmm5");
247 asm volatile("pxor %xmm7,%xmm7");
248 asm volatile("pcmpgtb %xmm4,%xmm5");
249 asm volatile("pcmpgtb %xmm6,%xmm7");
250 asm volatile("paddb %xmm4,%xmm4");
251 asm volatile("paddb %xmm6,%xmm6");
252 asm volatile("pand %xmm0,%xmm5");
253 asm volatile("pand %xmm0,%xmm7");
254 asm volatile("pxor %xmm5,%xmm4");
255 asm volatile("pxor %xmm7,%xmm6");
256 }
257 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
258 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
259 /* Don't use movntdq for r/w memory area < cache line */
260 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
261 asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
262 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
263 asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
264 }
265
266 asm volatile("sfence" : : : "memory");
267 kernel_fpu_end();
268 }
269
153const struct raid6_calls raid6_sse2x2 = { 270const struct raid6_calls raid6_sse2x2 = {
154 raid6_sse22_gen_syndrome, 271 raid6_sse22_gen_syndrome,
272 raid6_sse22_xor_syndrome,
155 raid6_have_sse2, 273 raid6_have_sse2,
156 "sse2x2", 274 "sse2x2",
157 1 /* Has cache hints */ 275 1 /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
248 kernel_fpu_end(); 366 kernel_fpu_end();
249} 367}
250 368
369 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
370 size_t bytes, void **ptrs)
371 {
372 u8 **dptr = (u8 **)ptrs;
373 u8 *p, *q;
374 int d, z, z0;
375
376 z0 = stop; /* P/Q right side optimization */
377 p = dptr[disks-2]; /* XOR parity */
378 q = dptr[disks-1]; /* RS syndrome */
379
380 kernel_fpu_begin();
381
382 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
383
384 for ( d = 0 ; d < bytes ; d += 64 ) {
385 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
386 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
387 asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
388 asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
389 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
390 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
391 asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
392 asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
393 asm volatile("pxor %xmm4,%xmm2");
394 asm volatile("pxor %xmm6,%xmm3");
395 asm volatile("pxor %xmm12,%xmm10");
396 asm volatile("pxor %xmm14,%xmm11");
397 /* P/Q data pages */
398 for ( z = z0-1 ; z >= start ; z-- ) {
399 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
400 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
401 asm volatile("pxor %xmm5,%xmm5");
402 asm volatile("pxor %xmm7,%xmm7");
403 asm volatile("pxor %xmm13,%xmm13");
404 asm volatile("pxor %xmm15,%xmm15");
405 asm volatile("pcmpgtb %xmm4,%xmm5");
406 asm volatile("pcmpgtb %xmm6,%xmm7");
407 asm volatile("pcmpgtb %xmm12,%xmm13");
408 asm volatile("pcmpgtb %xmm14,%xmm15");
409 asm volatile("paddb %xmm4,%xmm4");
410 asm volatile("paddb %xmm6,%xmm6");
411 asm volatile("paddb %xmm12,%xmm12");
412 asm volatile("paddb %xmm14,%xmm14");
413 asm volatile("pand %xmm0,%xmm5");
414 asm volatile("pand %xmm0,%xmm7");
415 asm volatile("pand %xmm0,%xmm13");
416 asm volatile("pand %xmm0,%xmm15");
417 asm volatile("pxor %xmm5,%xmm4");
418 asm volatile("pxor %xmm7,%xmm6");
419 asm volatile("pxor %xmm13,%xmm12");
420 asm volatile("pxor %xmm15,%xmm14");
421 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
422 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
423 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
424 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
425 asm volatile("pxor %xmm5,%xmm2");
426 asm volatile("pxor %xmm7,%xmm3");
427 asm volatile("pxor %xmm13,%xmm10");
428 asm volatile("pxor %xmm15,%xmm11");
429 asm volatile("pxor %xmm5,%xmm4");
430 asm volatile("pxor %xmm7,%xmm6");
431 asm volatile("pxor %xmm13,%xmm12");
432 asm volatile("pxor %xmm15,%xmm14");
433 }
434 asm volatile("prefetchnta %0" :: "m" (q[d]));
435 asm volatile("prefetchnta %0" :: "m" (q[d+32]));
436 /* P/Q left side optimization */
437 for ( z = start-1 ; z >= 0 ; z-- ) {
438 asm volatile("pxor %xmm5,%xmm5");
439 asm volatile("pxor %xmm7,%xmm7");
440 asm volatile("pxor %xmm13,%xmm13");
441 asm volatile("pxor %xmm15,%xmm15");
442 asm volatile("pcmpgtb %xmm4,%xmm5");
443 asm volatile("pcmpgtb %xmm6,%xmm7");
444 asm volatile("pcmpgtb %xmm12,%xmm13");
445 asm volatile("pcmpgtb %xmm14,%xmm15");
446 asm volatile("paddb %xmm4,%xmm4");
447 asm volatile("paddb %xmm6,%xmm6");
448 asm volatile("paddb %xmm12,%xmm12");
449 asm volatile("paddb %xmm14,%xmm14");
450 asm volatile("pand %xmm0,%xmm5");
451 asm volatile("pand %xmm0,%xmm7");
452 asm volatile("pand %xmm0,%xmm13");
453 asm volatile("pand %xmm0,%xmm15");
454 asm volatile("pxor %xmm5,%xmm4");
455 asm volatile("pxor %xmm7,%xmm6");
456 asm volatile("pxor %xmm13,%xmm12");
457 asm volatile("pxor %xmm15,%xmm14");
458 }
459 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
460 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
461 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
462 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
463 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
464 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
465 asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
466 asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
467 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
468 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
469 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
470 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
471 }
472 asm volatile("sfence" : : : "memory");
473 kernel_fpu_end();
474 }
475
476
251const struct raid6_calls raid6_sse2x4 = { 477const struct raid6_calls raid6_sse2x4 = {
252 raid6_sse24_gen_syndrome, 478 raid6_sse24_gen_syndrome,
479 raid6_sse24_xor_syndrome,
253 raid6_have_sse2, 480 raid6_have_sse2,
254 "sse2x4", 481 "sse2x4",
255 1 /* Has cache hints */ 482 1 /* Has cache hints */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 5a485b7a7d3c..3bebbabdb510 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE]; 28char data[NDISKS][PAGE_SIZE];
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; 29char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30 30
31static void makedata(void) 31static void makedata(int start, int stop)
32{ 32{
33 int i, j; 33 int i, j;
34 34
35 for (i = 0; i < NDISKS; i++) { 35 for (i = start; i <= stop; i++) {
36 for (j = 0; j < PAGE_SIZE; j++) 36 for (j = 0; j < PAGE_SIZE; j++)
37 data[i][j] = rand(); 37 data[i][j] = rand();
38 38
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
91{ 91{
92 const struct raid6_calls *const *algo; 92 const struct raid6_calls *const *algo;
93 const struct raid6_recov_calls *const *ra; 93 const struct raid6_recov_calls *const *ra;
94 int i, j; 94 int i, j, p1, p2;
95 int err = 0; 95 int err = 0;
96 96
97 makedata(); 97 makedata(0, NDISKS-1);
98 98
99 for (ra = raid6_recov_algos; *ra; ra++) { 99 for (ra = raid6_recov_algos; *ra; ra++) {
100 if ((*ra)->valid && !(*ra)->valid()) 100 if ((*ra)->valid && !(*ra)->valid())
101 continue; 101 continue;
102
102 raid6_2data_recov = (*ra)->data2; 103 raid6_2data_recov = (*ra)->data2;
103 raid6_datap_recov = (*ra)->datap; 104 raid6_datap_recov = (*ra)->datap;
104 105
105 printf("using recovery %s\n", (*ra)->name); 106 printf("using recovery %s\n", (*ra)->name);
106 107
107 for (algo = raid6_algos; *algo; algo++) { 108 for (algo = raid6_algos; *algo; algo++) {
108 if (!(*algo)->valid || (*algo)->valid()) { 109 if ((*algo)->valid && !(*algo)->valid())
109 raid6_call = **algo; 110 continue;
111
112 raid6_call = **algo;
113
114 /* Nuke syndromes */
115 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
116
117 /* Generate assumed good syndrome */
118 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
119 (void **)&dataptrs);
120
121 for (i = 0; i < NDISKS-1; i++)
122 for (j = i+1; j < NDISKS; j++)
123 err += test_disks(i, j);
124
125 if (!raid6_call.xor_syndrome)
126 continue;
127
128 for (p1 = 0; p1 < NDISKS-2; p1++)
129 for (p2 = p1; p2 < NDISKS-2; p2++) {
110 130
111 /* Nuke syndromes */ 131 /* Simulate rmw run */
112 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); 132 raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
133 (void **)&dataptrs);
134 makedata(p1, p2);
135 raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
136 (void **)&dataptrs);
113 137
114 /* Generate assumed good syndrome */ 138 for (i = 0; i < NDISKS-1; i++)
115 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, 139 for (j = i+1; j < NDISKS; j++)
116 (void **)&dataptrs); 140 err += test_disks(i, j);
141 }
117 142
118 for (i = 0; i < NDISKS-1; i++)
119 for (j = i+1; j < NDISKS; j++)
120 err += test_disks(i, j);
121 }
122 } 143 }
123 printf("\n"); 144 printf("\n");
124 } 145 }
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
index e7c29459cbcd..2dd291a11264 100644
--- a/lib/raid6/tilegx.uc
+++ b/lib/raid6/tilegx.uc
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
80 80
81const struct raid6_calls raid6_tilegx$# = { 81const struct raid6_calls raid6_tilegx$# = {
82 raid6_tilegx$#_gen_syndrome, 82 raid6_tilegx$#_gen_syndrome,
83 NULL, /* XOR not yet implemented */
83 NULL, 84 NULL,
84 "tilegx$#", 85 "tilegx$#",
85 0 86 0
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9cc4c4a90d00..4898442b837f 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,13 +1,13 @@
1/* 1/*
2 * Resizable, Scalable, Concurrent Hash Table 2 * Resizable, Scalable, Concurrent Hash Table
3 * 3 *
4 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
4 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> 5 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
5 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 6 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * Based on the following paper:
8 * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
9 *
10 * Code partially derived from nft_hash 8 * Code partially derived from nft_hash
9 * Rewritten with rehash code from br_multicast plus single list
10 * pointer as suggested by Josh Triplett
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -17,6 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/log2.h> 19#include <linux/log2.h>
20#include <linux/sched.h>
20#include <linux/slab.h> 21#include <linux/slab.h>
21#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
22#include <linux/mm.h> 23#include <linux/mm.h>
@@ -26,121 +27,18 @@
26#include <linux/err.h> 27#include <linux/err.h>
27 28
28#define HASH_DEFAULT_SIZE 64UL 29#define HASH_DEFAULT_SIZE 64UL
29#define HASH_MIN_SIZE 4UL 30#define HASH_MIN_SIZE 4U
30#define BUCKET_LOCKS_PER_CPU 128UL 31#define BUCKET_LOCKS_PER_CPU 128UL
31 32
32/* Base bits plus 1 bit for nulls marker */ 33static u32 head_hashfn(struct rhashtable *ht,
33#define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
34
35enum {
36 RHT_LOCK_NORMAL,
37 RHT_LOCK_NESTED,
38};
39
40/* The bucket lock is selected based on the hash and protects mutations
41 * on a group of hash buckets.
42 *
43 * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
44 * a single lock always covers both buckets which may both contains
45 * entries which link to the same bucket of the old table during resizing.
46 * This allows to simplify the locking as locking the bucket in both
47 * tables during resize always guarantee protection.
48 *
49 * IMPORTANT: When holding the bucket lock of both the old and new table
50 * during expansions and shrinking, the old bucket lock must always be
51 * acquired first.
52 */
53static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
54{
55 return &tbl->locks[hash & tbl->locks_mask];
56}
57
58static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
59{
60 return (void *) he - ht->p.head_offset;
61}
62
63static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
64{
65 return hash & (tbl->size - 1);
66}
67
68static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
69{
70 u32 hash;
71
72 if (unlikely(!ht->p.key_len))
73 hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
74 else
75 hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
76 ht->p.hash_rnd);
77
78 return hash >> HASH_RESERVED_SPACE;
79}
80
81static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
82{
83 return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE;
84}
85
86static u32 head_hashfn(const struct rhashtable *ht,
87 const struct bucket_table *tbl, 34 const struct bucket_table *tbl,
88 const struct rhash_head *he) 35 const struct rhash_head *he)
89{ 36{
90 return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); 37 return rht_head_hashfn(ht, tbl, he, ht->p);
91} 38}
92 39
93#ifdef CONFIG_PROVE_LOCKING 40#ifdef CONFIG_PROVE_LOCKING
94static void debug_dump_buckets(const struct rhashtable *ht,
95 const struct bucket_table *tbl)
96{
97 struct rhash_head *he;
98 unsigned int i, hash;
99
100 for (i = 0; i < tbl->size; i++) {
101 pr_warn(" [Bucket %d] ", i);
102 rht_for_each_rcu(he, tbl, i) {
103 hash = head_hashfn(ht, tbl, he);
104 pr_cont("[hash = %#x, lock = %p] ",
105 hash, bucket_lock(tbl, hash));
106 }
107 pr_cont("\n");
108 }
109
110}
111
112static void debug_dump_table(struct rhashtable *ht,
113 const struct bucket_table *tbl,
114 unsigned int hash)
115{
116 struct bucket_table *old_tbl, *future_tbl;
117
118 pr_emerg("BUG: lock for hash %#x in table %p not held\n",
119 hash, tbl);
120
121 rcu_read_lock();
122 future_tbl = rht_dereference_rcu(ht->future_tbl, ht);
123 old_tbl = rht_dereference_rcu(ht->tbl, ht);
124 if (future_tbl != old_tbl) {
125 pr_warn("Future table %p (size: %zd)\n",
126 future_tbl, future_tbl->size);
127 debug_dump_buckets(ht, future_tbl);
128 }
129
130 pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size);
131 debug_dump_buckets(ht, old_tbl);
132
133 rcu_read_unlock();
134}
135
136#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 41#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
137#define ASSERT_BUCKET_LOCK(HT, TBL, HASH) \
138 do { \
139 if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) { \
140 debug_dump_table(HT, TBL, HASH); \
141 BUG(); \
142 } \
143 } while (0)
144 42
145int lockdep_rht_mutex_is_held(struct rhashtable *ht) 43int lockdep_rht_mutex_is_held(struct rhashtable *ht)
146{ 44{
@@ -150,30 +48,18 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
150 48
151int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) 49int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
152{ 50{
153 spinlock_t *lock = bucket_lock(tbl, hash); 51 spinlock_t *lock = rht_bucket_lock(tbl, hash);
154 52
155 return (debug_locks) ? lockdep_is_held(lock) : 1; 53 return (debug_locks) ? lockdep_is_held(lock) : 1;
156} 54}
157EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); 55EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
158#else 56#else
159#define ASSERT_RHT_MUTEX(HT) 57#define ASSERT_RHT_MUTEX(HT)
160#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)
161#endif 58#endif
162 59
163 60
164static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n) 61static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
165{ 62 gfp_t gfp)
166 struct rhash_head __rcu **pprev;
167
168 for (pprev = &tbl->buckets[n];
169 !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n));
170 pprev = &rht_dereference_bucket(*pprev, tbl, n)->next)
171 ;
172
173 return pprev;
174}
175
176static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
177{ 63{
178 unsigned int i, size; 64 unsigned int i, size;
179#if defined(CONFIG_PROVE_LOCKING) 65#if defined(CONFIG_PROVE_LOCKING)
@@ -190,12 +76,13 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
190 76
191 if (sizeof(spinlock_t) != 0) { 77 if (sizeof(spinlock_t) != 0) {
192#ifdef CONFIG_NUMA 78#ifdef CONFIG_NUMA
193 if (size * sizeof(spinlock_t) > PAGE_SIZE) 79 if (size * sizeof(spinlock_t) > PAGE_SIZE &&
80 gfp == GFP_KERNEL)
194 tbl->locks = vmalloc(size * sizeof(spinlock_t)); 81 tbl->locks = vmalloc(size * sizeof(spinlock_t));
195 else 82 else
196#endif 83#endif
197 tbl->locks = kmalloc_array(size, sizeof(spinlock_t), 84 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
198 GFP_KERNEL); 85 gfp);
199 if (!tbl->locks) 86 if (!tbl->locks)
200 return -ENOMEM; 87 return -ENOMEM;
201 for (i = 0; i < size; i++) 88 for (i = 0; i < size; i++)
@@ -214,155 +101,181 @@ static void bucket_table_free(const struct bucket_table *tbl)
214 kvfree(tbl); 101 kvfree(tbl);
215} 102}
216 103
104static void bucket_table_free_rcu(struct rcu_head *head)
105{
106 bucket_table_free(container_of(head, struct bucket_table, rcu));
107}
108
217static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, 109static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
218 size_t nbuckets) 110 size_t nbuckets,
111 gfp_t gfp)
219{ 112{
220 struct bucket_table *tbl; 113 struct bucket_table *tbl = NULL;
221 size_t size; 114 size_t size;
222 int i; 115 int i;
223 116
224 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); 117 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
225 tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 118 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
226 if (tbl == NULL) 119 gfp != GFP_KERNEL)
120 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
121 if (tbl == NULL && gfp == GFP_KERNEL)
227 tbl = vzalloc(size); 122 tbl = vzalloc(size);
228
229 if (tbl == NULL) 123 if (tbl == NULL)
230 return NULL; 124 return NULL;
231 125
232 tbl->size = nbuckets; 126 tbl->size = nbuckets;
233 127
234 if (alloc_bucket_locks(ht, tbl) < 0) { 128 if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
235 bucket_table_free(tbl); 129 bucket_table_free(tbl);
236 return NULL; 130 return NULL;
237 } 131 }
238 132
133 INIT_LIST_HEAD(&tbl->walkers);
134
135 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
136
239 for (i = 0; i < nbuckets; i++) 137 for (i = 0; i < nbuckets; i++)
240 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); 138 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
241 139
242 return tbl; 140 return tbl;
243} 141}
244 142
245/** 143static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
246 * rht_grow_above_75 - returns true if nelems > 0.75 * table-size 144 struct bucket_table *tbl)
247 * @ht: hash table
248 * @new_size: new table size
249 */
250bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
251{ 145{
252 /* Expand table when exceeding 75% load */ 146 struct bucket_table *new_tbl;
253 return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
254 (ht->p.max_shift && atomic_read(&ht->shift) < ht->p.max_shift);
255}
256EXPORT_SYMBOL_GPL(rht_grow_above_75);
257 147
258/** 148 do {
259 * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size 149 new_tbl = tbl;
260 * @ht: hash table 150 tbl = rht_dereference_rcu(tbl->future_tbl, ht);
261 * @new_size: new table size 151 } while (tbl);
262 */
263bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
264{
265 /* Shrink table beneath 30% load */
266 return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
267 (atomic_read(&ht->shift) > ht->p.min_shift);
268}
269EXPORT_SYMBOL_GPL(rht_shrink_below_30);
270 152
271static void lock_buckets(struct bucket_table *new_tbl, 153 return new_tbl;
272 struct bucket_table *old_tbl, unsigned int hash)
273 __acquires(old_bucket_lock)
274{
275 spin_lock_bh(bucket_lock(old_tbl, hash));
276 if (new_tbl != old_tbl)
277 spin_lock_bh_nested(bucket_lock(new_tbl, hash),
278 RHT_LOCK_NESTED);
279} 154}
280 155
281static void unlock_buckets(struct bucket_table *new_tbl, 156static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
282 struct bucket_table *old_tbl, unsigned int hash)
283 __releases(old_bucket_lock)
284{ 157{
285 if (new_tbl != old_tbl) 158 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
286 spin_unlock_bh(bucket_lock(new_tbl, hash)); 159 struct bucket_table *new_tbl = rhashtable_last_table(ht,
287 spin_unlock_bh(bucket_lock(old_tbl, hash)); 160 rht_dereference_rcu(old_tbl->future_tbl, ht));
161 struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
162 int err = -ENOENT;
163 struct rhash_head *head, *next, *entry;
164 spinlock_t *new_bucket_lock;
165 unsigned int new_hash;
166
167 rht_for_each(entry, old_tbl, old_hash) {
168 err = 0;
169 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
170
171 if (rht_is_a_nulls(next))
172 break;
173
174 pprev = &entry->next;
175 }
176
177 if (err)
178 goto out;
179
180 new_hash = head_hashfn(ht, new_tbl, entry);
181
182 new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
183
184 spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
185 head = rht_dereference_bucket(new_tbl->buckets[new_hash],
186 new_tbl, new_hash);
187
188 if (rht_is_a_nulls(head))
189 INIT_RHT_NULLS_HEAD(entry->next, ht, new_hash);
190 else
191 RCU_INIT_POINTER(entry->next, head);
192
193 rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
194 spin_unlock(new_bucket_lock);
195
196 rcu_assign_pointer(*pprev, next);
197
198out:
199 return err;
288} 200}
289 201
290/** 202static void rhashtable_rehash_chain(struct rhashtable *ht,
291 * Unlink entries on bucket which hash to different bucket. 203 unsigned int old_hash)
292 *
293 * Returns true if no more work needs to be performed on the bucket.
294 */
295static bool hashtable_chain_unzip(struct rhashtable *ht,
296 const struct bucket_table *new_tbl,
297 struct bucket_table *old_tbl,
298 size_t old_hash)
299{ 204{
300 struct rhash_head *he, *p, *next; 205 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
301 unsigned int new_hash, new_hash2; 206 spinlock_t *old_bucket_lock;
302 207
303 ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash); 208 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
304 209
305 /* Old bucket empty, no work needed. */ 210 spin_lock_bh(old_bucket_lock);
306 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, 211 while (!rhashtable_rehash_one(ht, old_hash))
307 old_hash); 212 ;
308 if (rht_is_a_nulls(p)) 213 old_tbl->rehash++;
309 return false; 214 spin_unlock_bh(old_bucket_lock);
310 215}
311 new_hash = head_hashfn(ht, new_tbl, p);
312 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
313 216
314 /* Advance the old bucket pointer one or more times until it 217static int rhashtable_rehash_attach(struct rhashtable *ht,
315 * reaches a node that doesn't hash to the same bucket as the 218 struct bucket_table *old_tbl,
316 * previous node p. Call the previous node p; 219 struct bucket_table *new_tbl)
317 */ 220{
318 rht_for_each_continue(he, p->next, old_tbl, old_hash) { 221 /* Protect future_tbl using the first bucket lock. */
319 new_hash2 = head_hashfn(ht, new_tbl, he); 222 spin_lock_bh(old_tbl->locks);
320 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2);
321 223
322 if (new_hash != new_hash2) 224 /* Did somebody beat us to it? */
323 break; 225 if (rcu_access_pointer(old_tbl->future_tbl)) {
324 p = he; 226 spin_unlock_bh(old_tbl->locks);
227 return -EEXIST;
325 } 228 }
326 rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
327 229
328 /* Find the subsequent node which does hash to the same 230 /* Make insertions go into the new, empty table right away. Deletions
329 * bucket as node P, or NULL if no such node exists. 231 * and lookups will be attempted in both tables until we synchronize.
330 */ 232 */
331 INIT_RHT_NULLS_HEAD(next, ht, old_hash); 233 rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
332 if (!rht_is_a_nulls(he)) {
333 rht_for_each_continue(he, he->next, old_tbl, old_hash) {
334 if (head_hashfn(ht, new_tbl, he) == new_hash) {
335 next = he;
336 break;
337 }
338 }
339 }
340 234
341 /* Set p's next pointer to that subsequent node pointer, 235 /* Ensure the new table is visible to readers. */
342 * bypassing the nodes which do not hash to p's bucket 236 smp_wmb();
343 */
344 rcu_assign_pointer(p->next, next);
345 237
346 p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, 238 spin_unlock_bh(old_tbl->locks);
347 old_hash);
348 239
349 return !rht_is_a_nulls(p); 240 return 0;
350} 241}
351 242
352static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl, 243static int rhashtable_rehash_table(struct rhashtable *ht)
353 unsigned int new_hash, struct rhash_head *entry)
354{ 244{
355 ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); 245 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
246 struct bucket_table *new_tbl;
247 struct rhashtable_walker *walker;
248 unsigned int old_hash;
249
250 new_tbl = rht_dereference(old_tbl->future_tbl, ht);
251 if (!new_tbl)
252 return 0;
253
254 for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
255 rhashtable_rehash_chain(ht, old_hash);
256
257 /* Publish the new table pointer. */
258 rcu_assign_pointer(ht->tbl, new_tbl);
356 259
357 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry); 260 spin_lock(&ht->lock);
261 list_for_each_entry(walker, &old_tbl->walkers, list)
262 walker->tbl = NULL;
263 spin_unlock(&ht->lock);
264
265 /* Wait for readers. All new readers will see the new
266 * table, and thus no references to the old table will
267 * remain.
268 */
269 call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
270
271 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
358} 272}
359 273
360/** 274/**
361 * rhashtable_expand - Expand hash table while allowing concurrent lookups 275 * rhashtable_expand - Expand hash table while allowing concurrent lookups
362 * @ht: the hash table to expand 276 * @ht: the hash table to expand
363 * 277 *
364 * A secondary bucket array is allocated and the hash entries are migrated 278 * A secondary bucket array is allocated and the hash entries are migrated.
365 * while keeping them on both lists until the end of the RCU grace period.
366 * 279 *
367 * This function may only be called in a context where it is safe to call 280 * This function may only be called in a context where it is safe to call
368 * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 281 * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
@@ -373,87 +286,32 @@ static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
373 * It is valid to have concurrent insertions and deletions protected by per 286 * It is valid to have concurrent insertions and deletions protected by per
374 * bucket locks or concurrent RCU protected lookups and traversals. 287 * bucket locks or concurrent RCU protected lookups and traversals.
375 */ 288 */
376int rhashtable_expand(struct rhashtable *ht) 289static int rhashtable_expand(struct rhashtable *ht)
377{ 290{
378 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 291 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
379 struct rhash_head *he; 292 int err;
380 unsigned int new_hash, old_hash;
381 bool complete = false;
382 293
383 ASSERT_RHT_MUTEX(ht); 294 ASSERT_RHT_MUTEX(ht);
384 295
385 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); 296 old_tbl = rhashtable_last_table(ht, old_tbl);
297
298 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
386 if (new_tbl == NULL) 299 if (new_tbl == NULL)
387 return -ENOMEM; 300 return -ENOMEM;
388 301
389 atomic_inc(&ht->shift); 302 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
303 if (err)
304 bucket_table_free(new_tbl);
390 305
391 /* Make insertions go into the new, empty table right away. Deletions 306 return err;
392 * and lookups will be attempted in both tables until we synchronize.
393 * The synchronize_rcu() guarantees for the new table to be picked up
394 * so no new additions go into the old table while we relink.
395 */
396 rcu_assign_pointer(ht->future_tbl, new_tbl);
397 synchronize_rcu();
398
399 /* For each new bucket, search the corresponding old bucket for the
400 * first entry that hashes to the new bucket, and link the end of
401 * newly formed bucket chain (containing entries added to future
402 * table) to that entry. Since all the entries which will end up in
403 * the new bucket appear in the same old bucket, this constructs an
404 * entirely valid new hash table, but with multiple buckets
405 * "zipped" together into a single imprecise chain.
406 */
407 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
408 old_hash = rht_bucket_index(old_tbl, new_hash);
409 lock_buckets(new_tbl, old_tbl, new_hash);
410 rht_for_each(he, old_tbl, old_hash) {
411 if (head_hashfn(ht, new_tbl, he) == new_hash) {
412 link_old_to_new(ht, new_tbl, new_hash, he);
413 break;
414 }
415 }
416 unlock_buckets(new_tbl, old_tbl, new_hash);
417 }
418
419 /* Unzip interleaved hash chains */
420 while (!complete && !ht->being_destroyed) {
421 /* Wait for readers. All new readers will see the new
422 * table, and thus no references to the old table will
423 * remain.
424 */
425 synchronize_rcu();
426
427 /* For each bucket in the old table (each of which
428 * contains items from multiple buckets of the new
429 * table): ...
430 */
431 complete = true;
432 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
433 lock_buckets(new_tbl, old_tbl, old_hash);
434
435 if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
436 old_hash))
437 complete = false;
438
439 unlock_buckets(new_tbl, old_tbl, old_hash);
440 }
441 }
442
443 rcu_assign_pointer(ht->tbl, new_tbl);
444 synchronize_rcu();
445
446 bucket_table_free(old_tbl);
447 return 0;
448} 307}
449EXPORT_SYMBOL_GPL(rhashtable_expand);
450 308
451/** 309/**
452 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups 310 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
453 * @ht: the hash table to shrink 311 * @ht: the hash table to shrink
454 * 312 *
455 * This function may only be called in a context where it is safe to call 313 * This function shrinks the hash table to fit, i.e., the smallest
456 * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 314 * size would not cause it to expand right away automatically.
457 * 315 *
458 * The caller must ensure that no concurrent resizing occurs by holding 316 * The caller must ensure that no concurrent resizing occurs by holding
459 * ht->mutex. 317 * ht->mutex.
@@ -464,403 +322,146 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
464 * It is valid to have concurrent insertions and deletions protected by per 322 * It is valid to have concurrent insertions and deletions protected by per
465 * bucket locks or concurrent RCU protected lookups and traversals. 323 * bucket locks or concurrent RCU protected lookups and traversals.
466 */ 324 */
467int rhashtable_shrink(struct rhashtable *ht) 325static int rhashtable_shrink(struct rhashtable *ht)
468{ 326{
469 struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); 327 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
470 unsigned int new_hash; 328 unsigned int size;
329 int err;
471 330
472 ASSERT_RHT_MUTEX(ht); 331 ASSERT_RHT_MUTEX(ht);
473 332
474 new_tbl = bucket_table_alloc(ht, tbl->size / 2); 333 size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
475 if (new_tbl == NULL) 334 if (size < ht->p.min_size)
476 return -ENOMEM; 335 size = ht->p.min_size;
477
478 rcu_assign_pointer(ht->future_tbl, new_tbl);
479 synchronize_rcu();
480
481 /* Link the first entry in the old bucket to the end of the
482 * bucket in the new table. As entries are concurrently being
483 * added to the new table, lock down the new bucket. As we
484 * always divide the size in half when shrinking, each bucket
485 * in the new table maps to exactly two buckets in the old
486 * table.
487 */
488 for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
489 lock_buckets(new_tbl, tbl, new_hash);
490
491 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
492 tbl->buckets[new_hash]);
493 ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size);
494 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
495 tbl->buckets[new_hash + new_tbl->size]);
496 336
497 unlock_buckets(new_tbl, tbl, new_hash); 337 if (old_tbl->size <= size)
498 } 338 return 0;
499 339
500 /* Publish the new, valid hash table */ 340 if (rht_dereference(old_tbl->future_tbl, ht))
501 rcu_assign_pointer(ht->tbl, new_tbl); 341 return -EEXIST;
502 atomic_dec(&ht->shift);
503 342
504 /* Wait for readers. No new readers will have references to the 343 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
505 * old hash table. 344 if (new_tbl == NULL)
506 */ 345 return -ENOMEM;
507 synchronize_rcu();
508 346
509 bucket_table_free(tbl); 347 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
348 if (err)
349 bucket_table_free(new_tbl);
510 350
511 return 0; 351 return err;
512} 352}
513EXPORT_SYMBOL_GPL(rhashtable_shrink);
514 353
515static void rht_deferred_worker(struct work_struct *work) 354static void rht_deferred_worker(struct work_struct *work)
516{ 355{
517 struct rhashtable *ht; 356 struct rhashtable *ht;
518 struct bucket_table *tbl; 357 struct bucket_table *tbl;
519 struct rhashtable_walker *walker; 358 int err = 0;
520 359
521 ht = container_of(work, struct rhashtable, run_work); 360 ht = container_of(work, struct rhashtable, run_work);
522 mutex_lock(&ht->mutex); 361 mutex_lock(&ht->mutex);
523 if (ht->being_destroyed)
524 goto unlock;
525 362
526 tbl = rht_dereference(ht->tbl, ht); 363 tbl = rht_dereference(ht->tbl, ht);
364 tbl = rhashtable_last_table(ht, tbl);
527 365
528 list_for_each_entry(walker, &ht->walkers, list) 366 if (rht_grow_above_75(ht, tbl))
529 walker->resize = true;
530
531 if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
532 rhashtable_expand(ht); 367 rhashtable_expand(ht);
533 else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size)) 368 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
534 rhashtable_shrink(ht); 369 rhashtable_shrink(ht);
535 370
536unlock: 371 err = rhashtable_rehash_table(ht);
372
537 mutex_unlock(&ht->mutex); 373 mutex_unlock(&ht->mutex);
538}
539 374
540static void rhashtable_wakeup_worker(struct rhashtable *ht) 375 if (err)
541{
542 struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
543 struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
544 size_t size = tbl->size;
545
546 /* Only adjust the table if no resizing is currently in progress. */
547 if (tbl == new_tbl &&
548 ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) ||
549 (ht->p.shrink_decision && ht->p.shrink_decision(ht, size))))
550 schedule_work(&ht->run_work); 376 schedule_work(&ht->run_work);
551} 377}
552 378
553static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, 379static bool rhashtable_check_elasticity(struct rhashtable *ht,
554 struct bucket_table *tbl, u32 hash) 380 struct bucket_table *tbl,
381 unsigned int hash)
555{ 382{
383 unsigned int elasticity = ht->elasticity;
556 struct rhash_head *head; 384 struct rhash_head *head;
557 385
558 hash = rht_bucket_index(tbl, hash); 386 rht_for_each(head, tbl, hash)
559 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); 387 if (!--elasticity)
560 388 return true;
561 ASSERT_BUCKET_LOCK(ht, tbl, hash);
562
563 if (rht_is_a_nulls(head))
564 INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
565 else
566 RCU_INIT_POINTER(obj->next, head);
567
568 rcu_assign_pointer(tbl->buckets[hash], obj);
569 389
570 atomic_inc(&ht->nelems); 390 return false;
571
572 rhashtable_wakeup_worker(ht);
573} 391}
574 392
575/** 393int rhashtable_insert_rehash(struct rhashtable *ht)
576 * rhashtable_insert - insert object into hash table
577 * @ht: hash table
578 * @obj: pointer to hash head inside object
579 *
580 * Will take a per bucket spinlock to protect against mutual mutations
581 * on the same bucket. Multiple insertions may occur in parallel unless
582 * they map to the same bucket lock.
583 *
584 * It is safe to call this function from atomic context.
585 *
586 * Will trigger an automatic deferred table resizing if the size grows
587 * beyond the watermark indicated by grow_decision() which can be passed
588 * to rhashtable_init().
589 */
590void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
591{ 394{
592 struct bucket_table *tbl, *old_tbl; 395 struct bucket_table *old_tbl;
593 unsigned hash; 396 struct bucket_table *new_tbl;
594 397 struct bucket_table *tbl;
595 rcu_read_lock(); 398 unsigned int size;
596 399 int err;
597 tbl = rht_dereference_rcu(ht->future_tbl, ht);
598 old_tbl = rht_dereference_rcu(ht->tbl, ht);
599 hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
600
601 lock_buckets(tbl, old_tbl, hash);
602 __rhashtable_insert(ht, obj, tbl, hash);
603 unlock_buckets(tbl, old_tbl, hash);
604
605 rcu_read_unlock();
606}
607EXPORT_SYMBOL_GPL(rhashtable_insert);
608
609/**
610 * rhashtable_remove - remove object from hash table
611 * @ht: hash table
612 * @obj: pointer to hash head inside object
613 *
614 * Since the hash chain is single linked, the removal operation needs to
615 * walk the bucket chain upon removal. The removal operation is thus
616 * considerable slow if the hash table is not correctly sized.
617 *
618 * Will automatically shrink the table via rhashtable_expand() if the
619 * shrink_decision function specified at rhashtable_init() returns true.
620 *
621 * The caller must ensure that no concurrent table mutations occur. It is
622 * however valid to have concurrent lookups if they are RCU protected.
623 */
624bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
625{
626 struct bucket_table *tbl, *new_tbl, *old_tbl;
627 struct rhash_head __rcu **pprev;
628 struct rhash_head *he, *he2;
629 unsigned int hash, new_hash;
630 bool ret = false;
631 400
632 rcu_read_lock();
633 old_tbl = rht_dereference_rcu(ht->tbl, ht); 401 old_tbl = rht_dereference_rcu(ht->tbl, ht);
634 tbl = new_tbl = rht_dereference_rcu(ht->future_tbl, ht); 402 tbl = rhashtable_last_table(ht, old_tbl);
635 new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
636
637 lock_buckets(new_tbl, old_tbl, new_hash);
638restart:
639 hash = rht_bucket_index(tbl, new_hash);
640 pprev = &tbl->buckets[hash];
641 rht_for_each(he, tbl, hash) {
642 if (he != obj) {
643 pprev = &he->next;
644 continue;
645 }
646
647 ASSERT_BUCKET_LOCK(ht, tbl, hash);
648
649 if (old_tbl->size > new_tbl->size && tbl == old_tbl &&
650 !rht_is_a_nulls(obj->next) &&
651 head_hashfn(ht, tbl, obj->next) != hash) {
652 rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
653 } else if (unlikely(old_tbl->size < new_tbl->size && tbl == new_tbl)) {
654 rht_for_each_continue(he2, obj->next, tbl, hash) {
655 if (head_hashfn(ht, tbl, he2) == hash) {
656 rcu_assign_pointer(*pprev, he2);
657 goto found;
658 }
659 }
660
661 rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
662 } else {
663 rcu_assign_pointer(*pprev, obj->next);
664 }
665
666found:
667 ret = true;
668 break;
669 }
670 403
671 /* The entry may be linked in either 'tbl', 'future_tbl', or both. 404 size = tbl->size;
672 * 'future_tbl' only exists for a short period of time during
673 * resizing. Thus traversing both is fine and the added cost is
674 * very rare.
675 */
676 if (tbl != old_tbl) {
677 tbl = old_tbl;
678 goto restart;
679 }
680 405
681 unlock_buckets(new_tbl, old_tbl, new_hash); 406 if (rht_grow_above_75(ht, tbl))
682 407 size *= 2;
683 if (ret) { 408 /* More than two rehashes (not resizes) detected. */
684 atomic_dec(&ht->nelems); 409 else if (WARN_ON(old_tbl != tbl && old_tbl->size == size))
685 rhashtable_wakeup_worker(ht); 410 return -EBUSY;
686 }
687
688 rcu_read_unlock();
689 411
690 return ret; 412 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
691} 413 if (new_tbl == NULL)
692EXPORT_SYMBOL_GPL(rhashtable_remove); 414 return -ENOMEM;
693
694struct rhashtable_compare_arg {
695 struct rhashtable *ht;
696 const void *key;
697};
698
699static bool rhashtable_compare(void *ptr, void *arg)
700{
701 struct rhashtable_compare_arg *x = arg;
702 struct rhashtable *ht = x->ht;
703
704 return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len);
705}
706
707/**
708 * rhashtable_lookup - lookup key in hash table
709 * @ht: hash table
710 * @key: pointer to key
711 *
712 * Computes the hash value for the key and traverses the bucket chain looking
713 * for a entry with an identical key. The first matching entry is returned.
714 *
715 * This lookup function may only be used for fixed key hash table (key_len
716 * parameter set). It will BUG() if used inappropriately.
717 *
718 * Lookups may occur in parallel with hashtable mutations and resizing.
719 */
720void *rhashtable_lookup(struct rhashtable *ht, const void *key)
721{
722 struct rhashtable_compare_arg arg = {
723 .ht = ht,
724 .key = key,
725 };
726
727 BUG_ON(!ht->p.key_len);
728
729 return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg);
730}
731EXPORT_SYMBOL_GPL(rhashtable_lookup);
732
733/**
734 * rhashtable_lookup_compare - search hash table with compare function
735 * @ht: hash table
736 * @key: the pointer to the key
737 * @compare: compare function, must return true on match
738 * @arg: argument passed on to compare function
739 *
740 * Traverses the bucket chain behind the provided hash value and calls the
741 * specified compare function for each entry.
742 *
743 * Lookups may occur in parallel with hashtable mutations and resizing.
744 *
745 * Returns the first entry on which the compare function returned true.
746 */
747void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
748 bool (*compare)(void *, void *), void *arg)
749{
750 const struct bucket_table *tbl, *old_tbl;
751 struct rhash_head *he;
752 u32 hash;
753
754 rcu_read_lock();
755
756 old_tbl = rht_dereference_rcu(ht->tbl, ht);
757 tbl = rht_dereference_rcu(ht->future_tbl, ht);
758 hash = key_hashfn(ht, key, ht->p.key_len);
759restart:
760 rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
761 if (!compare(rht_obj(ht, he), arg))
762 continue;
763 rcu_read_unlock();
764 return rht_obj(ht, he);
765 }
766 415
767 if (unlikely(tbl != old_tbl)) { 416 err = rhashtable_rehash_attach(ht, tbl, new_tbl);
768 tbl = old_tbl; 417 if (err) {
769 goto restart; 418 bucket_table_free(new_tbl);
770 } 419 if (err == -EEXIST)
771 rcu_read_unlock(); 420 err = 0;
421 } else
422 schedule_work(&ht->run_work);
772 423
773 return NULL; 424 return err;
774} 425}
775EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); 426EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
776 427
777/** 428int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
778 * rhashtable_lookup_insert - lookup and insert object into hash table 429 struct rhash_head *obj,
779 * @ht: hash table 430 struct bucket_table *tbl)
780 * @obj: pointer to hash head inside object
781 *
782 * Locks down the bucket chain in both the old and new table if a resize
783 * is in progress to ensure that writers can't remove from the old table
784 * and can't insert to the new table during the atomic operation of search
785 * and insertion. Searches for duplicates in both the old and new table if
786 * a resize is in progress.
787 *
788 * This lookup function may only be used for fixed key hash table (key_len
789 * parameter set). It will BUG() if used inappropriately.
790 *
791 * It is safe to call this function from atomic context.
792 *
793 * Will trigger an automatic deferred table resizing if the size grows
794 * beyond the watermark indicated by grow_decision() which can be passed
795 * to rhashtable_init().
796 */
797bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
798{ 431{
799 struct rhashtable_compare_arg arg = { 432 struct rhash_head *head;
800 .ht = ht, 433 unsigned int hash;
801 .key = rht_obj(ht, obj) + ht->p.key_offset, 434 int err;
802 };
803 435
804 BUG_ON(!ht->p.key_len); 436 tbl = rhashtable_last_table(ht, tbl);
437 hash = head_hashfn(ht, tbl, obj);
438 spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
805 439
806 return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, 440 err = -EEXIST;
807 &arg); 441 if (key && rhashtable_lookup_fast(ht, key, ht->p))
808} 442 goto exit;
809EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
810 443
811/** 444 err = -EAGAIN;
812 * rhashtable_lookup_compare_insert - search and insert object to hash table 445 if (rhashtable_check_elasticity(ht, tbl, hash) ||
813 * with compare function 446 rht_grow_above_100(ht, tbl))
814 * @ht: hash table 447 goto exit;
815 * @obj: pointer to hash head inside object
816 * @compare: compare function, must return true on match
817 * @arg: argument passed on to compare function
818 *
819 * Locks down the bucket chain in both the old and new table if a resize
820 * is in progress to ensure that writers can't remove from the old table
821 * and can't insert to the new table during the atomic operation of search
822 * and insertion. Searches for duplicates in both the old and new table if
823 * a resize is in progress.
824 *
825 * Lookups may occur in parallel with hashtable mutations and resizing.
826 *
827 * Will trigger an automatic deferred table resizing if the size grows
828 * beyond the watermark indicated by grow_decision() which can be passed
829 * to rhashtable_init().
830 */
831bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
832 struct rhash_head *obj,
833 bool (*compare)(void *, void *),
834 void *arg)
835{
836 struct bucket_table *new_tbl, *old_tbl;
837 u32 new_hash;
838 bool success = true;
839 448
840 BUG_ON(!ht->p.key_len); 449 err = 0;
841 450
842 rcu_read_lock(); 451 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
843 old_tbl = rht_dereference_rcu(ht->tbl, ht);
844 new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
845 new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
846 452
847 lock_buckets(new_tbl, old_tbl, new_hash); 453 RCU_INIT_POINTER(obj->next, head);
848 454
849 if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, 455 rcu_assign_pointer(tbl->buckets[hash], obj);
850 compare, arg)) {
851 success = false;
852 goto exit;
853 }
854 456
855 __rhashtable_insert(ht, obj, new_tbl, new_hash); 457 atomic_inc(&ht->nelems);
856 458
857exit: 459exit:
858 unlock_buckets(new_tbl, old_tbl, new_hash); 460 spin_unlock(rht_bucket_lock(tbl, hash));
859 rcu_read_unlock();
860 461
861 return success; 462 return err;
862} 463}
863EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); 464EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
864 465
865/** 466/**
866 * rhashtable_walk_init - Initialise an iterator 467 * rhashtable_walk_init - Initialise an iterator
@@ -895,7 +496,8 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
895 return -ENOMEM; 496 return -ENOMEM;
896 497
897 mutex_lock(&ht->mutex); 498 mutex_lock(&ht->mutex);
898 list_add(&iter->walker->list, &ht->walkers); 499 iter->walker->tbl = rht_dereference(ht->tbl, ht);
500 list_add(&iter->walker->list, &iter->walker->tbl->walkers);
899 mutex_unlock(&ht->mutex); 501 mutex_unlock(&ht->mutex);
900 502
901 return 0; 503 return 0;
@@ -911,7 +513,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
911void rhashtable_walk_exit(struct rhashtable_iter *iter) 513void rhashtable_walk_exit(struct rhashtable_iter *iter)
912{ 514{
913 mutex_lock(&iter->ht->mutex); 515 mutex_lock(&iter->ht->mutex);
914 list_del(&iter->walker->list); 516 if (iter->walker->tbl)
517 list_del(&iter->walker->list);
915 mutex_unlock(&iter->ht->mutex); 518 mutex_unlock(&iter->ht->mutex);
916 kfree(iter->walker); 519 kfree(iter->walker);
917} 520}
@@ -932,13 +535,21 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
932 * by calling rhashtable_walk_next. 535 * by calling rhashtable_walk_next.
933 */ 536 */
934int rhashtable_walk_start(struct rhashtable_iter *iter) 537int rhashtable_walk_start(struct rhashtable_iter *iter)
538 __acquires(RCU)
935{ 539{
540 struct rhashtable *ht = iter->ht;
541
542 mutex_lock(&ht->mutex);
543
544 if (iter->walker->tbl)
545 list_del(&iter->walker->list);
546
936 rcu_read_lock(); 547 rcu_read_lock();
937 548
938 if (iter->walker->resize) { 549 mutex_unlock(&ht->mutex);
939 iter->slot = 0; 550
940 iter->skip = 0; 551 if (!iter->walker->tbl) {
941 iter->walker->resize = false; 552 iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
942 return -EAGAIN; 553 return -EAGAIN;
943 } 554 }
944 555
@@ -960,13 +571,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
960 */ 571 */
961void *rhashtable_walk_next(struct rhashtable_iter *iter) 572void *rhashtable_walk_next(struct rhashtable_iter *iter)
962{ 573{
963 const struct bucket_table *tbl; 574 struct bucket_table *tbl = iter->walker->tbl;
964 struct rhashtable *ht = iter->ht; 575 struct rhashtable *ht = iter->ht;
965 struct rhash_head *p = iter->p; 576 struct rhash_head *p = iter->p;
966 void *obj = NULL; 577 void *obj = NULL;
967 578
968 tbl = rht_dereference_rcu(ht->tbl, ht);
969
970 if (p) { 579 if (p) {
971 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); 580 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
972 goto next; 581 goto next;
@@ -992,17 +601,20 @@ next:
992 iter->skip = 0; 601 iter->skip = 0;
993 } 602 }
994 603
995 iter->p = NULL; 604 /* Ensure we see any new tables. */
605 smp_rmb();
996 606
997out: 607 iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
998 if (iter->walker->resize) { 608 if (iter->walker->tbl) {
999 iter->p = NULL;
1000 iter->slot = 0; 609 iter->slot = 0;
1001 iter->skip = 0; 610 iter->skip = 0;
1002 iter->walker->resize = false;
1003 return ERR_PTR(-EAGAIN); 611 return ERR_PTR(-EAGAIN);
1004 } 612 }
1005 613
614 iter->p = NULL;
615
616out:
617
1006 return obj; 618 return obj;
1007} 619}
1008EXPORT_SYMBOL_GPL(rhashtable_walk_next); 620EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -1014,16 +626,39 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
1014 * Finish a hash table walk. 626 * Finish a hash table walk.
1015 */ 627 */
1016void rhashtable_walk_stop(struct rhashtable_iter *iter) 628void rhashtable_walk_stop(struct rhashtable_iter *iter)
629 __releases(RCU)
1017{ 630{
1018 rcu_read_unlock(); 631 struct rhashtable *ht;
632 struct bucket_table *tbl = iter->walker->tbl;
633
634 if (!tbl)
635 goto out;
636
637 ht = iter->ht;
638
639 spin_lock(&ht->lock);
640 if (tbl->rehash < tbl->size)
641 list_add(&iter->walker->list, &tbl->walkers);
642 else
643 iter->walker->tbl = NULL;
644 spin_unlock(&ht->lock);
645
1019 iter->p = NULL; 646 iter->p = NULL;
647
648out:
649 rcu_read_unlock();
1020} 650}
1021EXPORT_SYMBOL_GPL(rhashtable_walk_stop); 651EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
1022 652
1023static size_t rounded_hashtable_size(struct rhashtable_params *params) 653static size_t rounded_hashtable_size(const struct rhashtable_params *params)
1024{ 654{
1025 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 655 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
1026 1UL << params->min_shift); 656 (unsigned long)params->min_size);
657}
658
659static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
660{
661 return jhash2(key, length, seed);
1027} 662}
1028 663
1029/** 664/**
@@ -1056,7 +691,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
1056 * struct rhash_head node; 691 * struct rhash_head node;
1057 * }; 692 * };
1058 * 693 *
1059 * u32 my_hash_fn(const void *data, u32 seed) 694 * u32 my_hash_fn(const void *data, u32 len, u32 seed)
1060 * { 695 * {
1061 * struct test_obj *obj = data; 696 * struct test_obj *obj = data;
1062 * 697 *
@@ -1069,72 +704,129 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
1069 * .obj_hashfn = my_hash_fn, 704 * .obj_hashfn = my_hash_fn,
1070 * }; 705 * };
1071 */ 706 */
1072int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) 707int rhashtable_init(struct rhashtable *ht,
708 const struct rhashtable_params *params)
1073{ 709{
1074 struct bucket_table *tbl; 710 struct bucket_table *tbl;
1075 size_t size; 711 size_t size;
1076 712
1077 size = HASH_DEFAULT_SIZE; 713 size = HASH_DEFAULT_SIZE;
1078 714
1079 if ((params->key_len && !params->hashfn) || 715 if ((!params->key_len && !params->obj_hashfn) ||
1080 (!params->key_len && !params->obj_hashfn)) 716 (params->obj_hashfn && !params->obj_cmpfn))
1081 return -EINVAL; 717 return -EINVAL;
1082 718
1083 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) 719 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
1084 return -EINVAL; 720 return -EINVAL;
1085 721
1086 params->min_shift = max_t(size_t, params->min_shift,
1087 ilog2(HASH_MIN_SIZE));
1088
1089 if (params->nelem_hint) 722 if (params->nelem_hint)
1090 size = rounded_hashtable_size(params); 723 size = rounded_hashtable_size(params);
1091 724
1092 memset(ht, 0, sizeof(*ht)); 725 memset(ht, 0, sizeof(*ht));
1093 mutex_init(&ht->mutex); 726 mutex_init(&ht->mutex);
727 spin_lock_init(&ht->lock);
1094 memcpy(&ht->p, params, sizeof(*params)); 728 memcpy(&ht->p, params, sizeof(*params));
1095 INIT_LIST_HEAD(&ht->walkers); 729
730 if (params->min_size)
731 ht->p.min_size = roundup_pow_of_two(params->min_size);
732
733 if (params->max_size)
734 ht->p.max_size = rounddown_pow_of_two(params->max_size);
735
736 ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
737
738 /* The maximum (not average) chain length grows with the
739 * size of the hash table, at a rate of (log N)/(log log N).
740 * The value of 16 is selected so that even if the hash
741 * table grew to 2^32 you would not expect the maximum
742 * chain length to exceed it unless we are under attack
743 * (or extremely unlucky).
744 *
745 * As this limit is only to detect attacks, we don't need
746 * to set it to a lower value as you'd need the chain
747 * length to vastly exceed 16 to have any real effect
748 * on the system.
749 */
750 if (!params->insecure_elasticity)
751 ht->elasticity = 16;
1096 752
1097 if (params->locks_mul) 753 if (params->locks_mul)
1098 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); 754 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
1099 else 755 else
1100 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; 756 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
1101 757
1102 tbl = bucket_table_alloc(ht, size); 758 ht->key_len = ht->p.key_len;
759 if (!params->hashfn) {
760 ht->p.hashfn = jhash;
761
762 if (!(ht->key_len & (sizeof(u32) - 1))) {
763 ht->key_len /= sizeof(u32);
764 ht->p.hashfn = rhashtable_jhash2;
765 }
766 }
767
768 tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
1103 if (tbl == NULL) 769 if (tbl == NULL)
1104 return -ENOMEM; 770 return -ENOMEM;
1105 771
1106 atomic_set(&ht->nelems, 0); 772 atomic_set(&ht->nelems, 0);
1107 atomic_set(&ht->shift, ilog2(tbl->size));
1108 RCU_INIT_POINTER(ht->tbl, tbl);
1109 RCU_INIT_POINTER(ht->future_tbl, tbl);
1110 773
1111 if (!ht->p.hash_rnd) 774 RCU_INIT_POINTER(ht->tbl, tbl);
1112 get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
1113 775
1114 if (ht->p.grow_decision || ht->p.shrink_decision) 776 INIT_WORK(&ht->run_work, rht_deferred_worker);
1115 INIT_WORK(&ht->run_work, rht_deferred_worker);
1116 777
1117 return 0; 778 return 0;
1118} 779}
1119EXPORT_SYMBOL_GPL(rhashtable_init); 780EXPORT_SYMBOL_GPL(rhashtable_init);
1120 781
1121/** 782/**
1122 * rhashtable_destroy - destroy hash table 783 * rhashtable_free_and_destroy - free elements and destroy hash table
1123 * @ht: the hash table to destroy 784 * @ht: the hash table to destroy
785 * @free_fn: callback to release resources of element
786 * @arg: pointer passed to free_fn
787 *
788 * Stops an eventual async resize. If defined, invokes free_fn for each
789 * element to releasal resources. Please note that RCU protected
790 * readers may still be accessing the elements. Releasing of resources
791 * must occur in a compatible manner. Then frees the bucket array.
1124 * 792 *
1125 * Frees the bucket array. This function is not rcu safe, therefore the caller 793 * This function will eventually sleep to wait for an async resize
1126 * has to make sure that no resizing may happen by unpublishing the hashtable 794 * to complete. The caller is responsible that no further write operations
1127 * and waiting for the quiescent cycle before releasing the bucket array. 795 * occurs in parallel.
1128 */ 796 */
1129void rhashtable_destroy(struct rhashtable *ht) 797void rhashtable_free_and_destroy(struct rhashtable *ht,
798 void (*free_fn)(void *ptr, void *arg),
799 void *arg)
1130{ 800{
1131 ht->being_destroyed = true; 801 const struct bucket_table *tbl;
802 unsigned int i;
1132 803
1133 if (ht->p.grow_decision || ht->p.shrink_decision) 804 cancel_work_sync(&ht->run_work);
1134 cancel_work_sync(&ht->run_work);
1135 805
1136 mutex_lock(&ht->mutex); 806 mutex_lock(&ht->mutex);
1137 bucket_table_free(rht_dereference(ht->tbl, ht)); 807 tbl = rht_dereference(ht->tbl, ht);
808 if (free_fn) {
809 for (i = 0; i < tbl->size; i++) {
810 struct rhash_head *pos, *next;
811
812 for (pos = rht_dereference(tbl->buckets[i], ht),
813 next = !rht_is_a_nulls(pos) ?
814 rht_dereference(pos->next, ht) : NULL;
815 !rht_is_a_nulls(pos);
816 pos = next,
817 next = !rht_is_a_nulls(pos) ?
818 rht_dereference(pos->next, ht) : NULL)
819 free_fn(rht_obj(ht, pos), arg);
820 }
821 }
822
823 bucket_table_free(tbl);
1138 mutex_unlock(&ht->mutex); 824 mutex_unlock(&ht->mutex);
1139} 825}
826EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
827
828void rhashtable_destroy(struct rhashtable *ht)
829{
830 return rhashtable_free_and_destroy(ht, NULL, NULL);
831}
1140EXPORT_SYMBOL_GPL(rhashtable_destroy); 832EXPORT_SYMBOL_GPL(rhashtable_destroy);
diff --git a/lib/seq_buf.c b/lib/seq_buf.c
index 88c0854bd752..5c94e1012a91 100644
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -61,7 +61,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
61 61
62 if (s->len < s->size) { 62 if (s->len < s->size) {
63 len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args); 63 len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args);
64 if (seq_buf_can_fit(s, len)) { 64 if (s->len + len < s->size) {
65 s->len += len; 65 s->len += len;
66 return 0; 66 return 0;
67 } 67 }
@@ -118,7 +118,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
118 118
119 if (s->len < s->size) { 119 if (s->len < s->size) {
120 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 120 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
121 if (seq_buf_can_fit(s, ret)) { 121 if (s->len + ret < s->size) {
122 s->len += ret; 122 s->len += ret;
123 return 0; 123 return 0;
124 } 124 }
diff --git a/lib/sha1.c b/lib/sha1.c
index 1df191e04a24..5a56dfd7b99d 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -198,3 +198,4 @@ void sha_init(__u32 *buf)
198 buf[3] = 0x10325476; 198 buf[3] = 0x10325476;
199 buf[4] = 0xc3d2e1f0; 199 buf[4] = 0xc3d2e1f0;
200} 200}
201EXPORT_SYMBOL(sha_init);
diff --git a/lib/string.c b/lib/string.c
index ce81aaec3839..a5792019193c 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset);
607void memzero_explicit(void *s, size_t count) 607void memzero_explicit(void *s, size_t count)
608{ 608{
609 memset(s, 0, count); 609 memset(s, 0, count);
610 OPTIMIZER_HIDE_VAR(s); 610 barrier();
611} 611}
612EXPORT_SYMBOL(memzero_explicit); 612EXPORT_SYMBOL(memzero_explicit);
613 613
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 8f8c4417f228..c98ae818eb4e 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -4,6 +4,7 @@
4 * Copyright 31 August 2008 James Bottomley 4 * Copyright 31 August 2008 James Bottomley
5 * Copyright (C) 2013, Intel Corporation 5 * Copyright (C) 2013, Intel Corporation
6 */ 6 */
7#include <linux/bug.h>
7#include <linux/kernel.h> 8#include <linux/kernel.h>
8#include <linux/math64.h> 9#include <linux/math64.h>
9#include <linux/export.h> 10#include <linux/export.h>
@@ -14,7 +15,8 @@
14 15
15/** 16/**
16 * string_get_size - get the size in the specified units 17 * string_get_size - get the size in the specified units
17 * @size: The size to be converted 18 * @size: The size to be converted in blocks
19 * @blk_size: Size of the block (use 1 for size in bytes)
18 * @units: units to use (powers of 1000 or 1024) 20 * @units: units to use (powers of 1000 or 1024)
19 * @buf: buffer to format to 21 * @buf: buffer to format to
20 * @len: length of buffer 22 * @len: length of buffer
@@ -24,14 +26,14 @@
24 * at least 9 bytes and will always be zero terminated. 26 * at least 9 bytes and will always be zero terminated.
25 * 27 *
26 */ 28 */
27void string_get_size(u64 size, const enum string_size_units units, 29void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
28 char *buf, int len) 30 char *buf, int len)
29{ 31{
30 static const char *const units_10[] = { 32 static const char *const units_10[] = {
31 "B", "kB", "MB", "GB", "TB", "PB", "EB" 33 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
32 }; 34 };
33 static const char *const units_2[] = { 35 static const char *const units_2[] = {
34 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB" 36 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
35 }; 37 };
36 static const char *const *const units_str[] = { 38 static const char *const *const units_str[] = {
37 [STRING_UNITS_10] = units_10, 39 [STRING_UNITS_10] = units_10,
@@ -42,31 +44,57 @@ void string_get_size(u64 size, const enum string_size_units units,
42 [STRING_UNITS_2] = 1024, 44 [STRING_UNITS_2] = 1024,
43 }; 45 };
44 int i, j; 46 int i, j;
45 u32 remainder = 0, sf_cap; 47 u32 remainder = 0, sf_cap, exp;
46 char tmp[8]; 48 char tmp[8];
49 const char *unit;
47 50
48 tmp[0] = '\0'; 51 tmp[0] = '\0';
49 i = 0; 52 i = 0;
50 if (size >= divisor[units]) { 53 if (!size)
51 while (size >= divisor[units]) { 54 goto out;
52 remainder = do_div(size, divisor[units]);
53 i++;
54 }
55 55
56 sf_cap = size; 56 while (blk_size >= divisor[units]) {
57 for (j = 0; sf_cap*10 < 1000; j++) 57 remainder = do_div(blk_size, divisor[units]);
58 sf_cap *= 10; 58 i++;
59 }
59 60
60 if (j) { 61 exp = divisor[units] / (u32)blk_size;
61 remainder *= 1000; 62 if (size >= exp) {
62 remainder /= divisor[units]; 63 remainder = do_div(size, divisor[units]);
63 snprintf(tmp, sizeof(tmp), ".%03u", remainder); 64 remainder *= blk_size;
64 tmp[j+1] = '\0'; 65 i++;
65 } 66 } else {
67 remainder *= size;
68 }
69
70 size *= blk_size;
71 size += remainder / divisor[units];
72 remainder %= divisor[units];
73
74 while (size >= divisor[units]) {
75 remainder = do_div(size, divisor[units]);
76 i++;
66 } 77 }
67 78
79 sf_cap = size;
80 for (j = 0; sf_cap*10 < 1000; j++)
81 sf_cap *= 10;
82
83 if (j) {
84 remainder *= 1000;
85 remainder /= divisor[units];
86 snprintf(tmp, sizeof(tmp), ".%03u", remainder);
87 tmp[j+1] = '\0';
88 }
89
90 out:
91 if (i >= ARRAY_SIZE(units_2))
92 unit = "UNK";
93 else
94 unit = units_str[units][i];
95
68 snprintf(buf, len, "%u%s %s", (u32)size, 96 snprintf(buf, len, "%u%s %s", (u32)size,
69 tmp, units_str[units][i]); 97 tmp, unit);
70} 98}
71EXPORT_SYMBOL(string_get_size); 99EXPORT_SYMBOL(string_get_size);
72 100
@@ -239,29 +267,21 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
239} 267}
240EXPORT_SYMBOL(string_unescape); 268EXPORT_SYMBOL(string_unescape);
241 269
242static int escape_passthrough(unsigned char c, char **dst, size_t *osz) 270static bool escape_passthrough(unsigned char c, char **dst, char *end)
243{ 271{
244 char *out = *dst; 272 char *out = *dst;
245 273
246 if (*osz < 1) 274 if (out < end)
247 return -ENOMEM; 275 *out = c;
248 276 *dst = out + 1;
249 *out++ = c; 277 return true;
250
251 *dst = out;
252 *osz -= 1;
253
254 return 1;
255} 278}
256 279
257static int escape_space(unsigned char c, char **dst, size_t *osz) 280static bool escape_space(unsigned char c, char **dst, char *end)
258{ 281{
259 char *out = *dst; 282 char *out = *dst;
260 unsigned char to; 283 unsigned char to;
261 284
262 if (*osz < 2)
263 return -ENOMEM;
264
265 switch (c) { 285 switch (c) {
266 case '\n': 286 case '\n':
267 to = 'n'; 287 to = 'n';
@@ -279,26 +299,25 @@ static int escape_space(unsigned char c, char **dst, size_t *osz)
279 to = 'f'; 299 to = 'f';
280 break; 300 break;
281 default: 301 default:
282 return 0; 302 return false;
283 } 303 }
284 304
285 *out++ = '\\'; 305 if (out < end)
286 *out++ = to; 306 *out = '\\';
307 ++out;
308 if (out < end)
309 *out = to;
310 ++out;
287 311
288 *dst = out; 312 *dst = out;
289 *osz -= 2; 313 return true;
290
291 return 1;
292} 314}
293 315
294static int escape_special(unsigned char c, char **dst, size_t *osz) 316static bool escape_special(unsigned char c, char **dst, char *end)
295{ 317{
296 char *out = *dst; 318 char *out = *dst;
297 unsigned char to; 319 unsigned char to;
298 320
299 if (*osz < 2)
300 return -ENOMEM;
301
302 switch (c) { 321 switch (c) {
303 case '\\': 322 case '\\':
304 to = '\\'; 323 to = '\\';
@@ -310,71 +329,78 @@ static int escape_special(unsigned char c, char **dst, size_t *osz)
310 to = 'e'; 329 to = 'e';
311 break; 330 break;
312 default: 331 default:
313 return 0; 332 return false;
314 } 333 }
315 334
316 *out++ = '\\'; 335 if (out < end)
317 *out++ = to; 336 *out = '\\';
337 ++out;
338 if (out < end)
339 *out = to;
340 ++out;
318 341
319 *dst = out; 342 *dst = out;
320 *osz -= 2; 343 return true;
321
322 return 1;
323} 344}
324 345
325static int escape_null(unsigned char c, char **dst, size_t *osz) 346static bool escape_null(unsigned char c, char **dst, char *end)
326{ 347{
327 char *out = *dst; 348 char *out = *dst;
328 349
329 if (*osz < 2)
330 return -ENOMEM;
331
332 if (c) 350 if (c)
333 return 0; 351 return false;
334 352
335 *out++ = '\\'; 353 if (out < end)
336 *out++ = '0'; 354 *out = '\\';
355 ++out;
356 if (out < end)
357 *out = '0';
358 ++out;
337 359
338 *dst = out; 360 *dst = out;
339 *osz -= 2; 361 return true;
340
341 return 1;
342} 362}
343 363
344static int escape_octal(unsigned char c, char **dst, size_t *osz) 364static bool escape_octal(unsigned char c, char **dst, char *end)
345{ 365{
346 char *out = *dst; 366 char *out = *dst;
347 367
348 if (*osz < 4) 368 if (out < end)
349 return -ENOMEM; 369 *out = '\\';
350 370 ++out;
351 *out++ = '\\'; 371 if (out < end)
352 *out++ = ((c >> 6) & 0x07) + '0'; 372 *out = ((c >> 6) & 0x07) + '0';
353 *out++ = ((c >> 3) & 0x07) + '0'; 373 ++out;
354 *out++ = ((c >> 0) & 0x07) + '0'; 374 if (out < end)
375 *out = ((c >> 3) & 0x07) + '0';
376 ++out;
377 if (out < end)
378 *out = ((c >> 0) & 0x07) + '0';
379 ++out;
355 380
356 *dst = out; 381 *dst = out;
357 *osz -= 4; 382 return true;
358
359 return 1;
360} 383}
361 384
362static int escape_hex(unsigned char c, char **dst, size_t *osz) 385static bool escape_hex(unsigned char c, char **dst, char *end)
363{ 386{
364 char *out = *dst; 387 char *out = *dst;
365 388
366 if (*osz < 4) 389 if (out < end)
367 return -ENOMEM; 390 *out = '\\';
368 391 ++out;
369 *out++ = '\\'; 392 if (out < end)
370 *out++ = 'x'; 393 *out = 'x';
371 *out++ = hex_asc_hi(c); 394 ++out;
372 *out++ = hex_asc_lo(c); 395 if (out < end)
396 *out = hex_asc_hi(c);
397 ++out;
398 if (out < end)
399 *out = hex_asc_lo(c);
400 ++out;
373 401
374 *dst = out; 402 *dst = out;
375 *osz -= 4; 403 return true;
376
377 return 1;
378} 404}
379 405
380/** 406/**
@@ -426,19 +452,17 @@ static int escape_hex(unsigned char c, char **dst, size_t *osz)
426 * it if needs. 452 * it if needs.
427 * 453 *
428 * Return: 454 * Return:
429 * The amount of the characters processed to the destination buffer, or 455 * The total size of the escaped output that would be generated for
430 * %-ENOMEM if the size of buffer is not enough to put an escaped character is 456 * the given input and flags. To check whether the output was
431 * returned. 457 * truncated, compare the return value to osz. There is room left in
432 * 458 * dst for a '\0' terminator if and only if ret < osz.
433 * Even in the case of error @dst pointer will be updated to point to the byte
434 * after the last processed character.
435 */ 459 */
436int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, 460int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
437 unsigned int flags, const char *esc) 461 unsigned int flags, const char *esc)
438{ 462{
439 char *out = *dst, *p = out; 463 char *p = dst;
464 char *end = p + osz;
440 bool is_dict = esc && *esc; 465 bool is_dict = esc && *esc;
441 int ret = 0;
442 466
443 while (isz--) { 467 while (isz--) {
444 unsigned char c = *src++; 468 unsigned char c = *src++;
@@ -458,55 +482,26 @@ int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
458 (is_dict && !strchr(esc, c))) { 482 (is_dict && !strchr(esc, c))) {
459 /* do nothing */ 483 /* do nothing */
460 } else { 484 } else {
461 if (flags & ESCAPE_SPACE) { 485 if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
462 ret = escape_space(c, &p, &osz); 486 continue;
463 if (ret < 0) 487
464 break; 488 if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
465 if (ret > 0) 489 continue;
466 continue; 490
467 } 491 if (flags & ESCAPE_NULL && escape_null(c, &p, end))
468 492 continue;
469 if (flags & ESCAPE_SPECIAL) {
470 ret = escape_special(c, &p, &osz);
471 if (ret < 0)
472 break;
473 if (ret > 0)
474 continue;
475 }
476
477 if (flags & ESCAPE_NULL) {
478 ret = escape_null(c, &p, &osz);
479 if (ret < 0)
480 break;
481 if (ret > 0)
482 continue;
483 }
484 493
485 /* ESCAPE_OCTAL and ESCAPE_HEX always go last */ 494 /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
486 if (flags & ESCAPE_OCTAL) { 495 if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
487 ret = escape_octal(c, &p, &osz);
488 if (ret < 0)
489 break;
490 continue; 496 continue;
491 } 497
492 if (flags & ESCAPE_HEX) { 498 if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
493 ret = escape_hex(c, &p, &osz);
494 if (ret < 0)
495 break;
496 continue; 499 continue;
497 }
498 } 500 }
499 501
500 ret = escape_passthrough(c, &p, &osz); 502 escape_passthrough(c, &p, end);
501 if (ret < 0)
502 break;
503 } 503 }
504 504
505 *dst = p; 505 return p - dst;
506
507 if (ret < 0)
508 return ret;
509
510 return p - out;
511} 506}
512EXPORT_SYMBOL(string_escape_mem); 507EXPORT_SYMBOL(string_escape_mem);
diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c
index daf29a390a89..c227cc43ec0a 100644
--- a/lib/test-hexdump.c
+++ b/lib/test-hexdump.c
@@ -18,26 +18,26 @@ static const unsigned char data_b[] = {
18 18
19static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C..."; 19static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C...";
20 20
21static const char *test_data_1_le[] __initconst = { 21static const char * const test_data_1_le[] __initconst = {
22 "be", "32", "db", "7b", "0a", "18", "93", "b2", 22 "be", "32", "db", "7b", "0a", "18", "93", "b2",
23 "70", "ba", "c4", "24", "7d", "83", "34", "9b", 23 "70", "ba", "c4", "24", "7d", "83", "34", "9b",
24 "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9", 24 "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9",
25 "4c", "d1", "19", "99", "43", "b1", "af", "0c", 25 "4c", "d1", "19", "99", "43", "b1", "af", "0c",
26}; 26};
27 27
28static const char *test_data_2_le[] __initconst = { 28static const char *test_data_2_le[] __initdata = {
29 "32be", "7bdb", "180a", "b293", 29 "32be", "7bdb", "180a", "b293",
30 "ba70", "24c4", "837d", "9b34", 30 "ba70", "24c4", "837d", "9b34",
31 "9ca6", "ad31", "0f9c", "e9ac", 31 "9ca6", "ad31", "0f9c", "e9ac",
32 "d14c", "9919", "b143", "0caf", 32 "d14c", "9919", "b143", "0caf",
33}; 33};
34 34
35static const char *test_data_4_le[] __initconst = { 35static const char *test_data_4_le[] __initdata = {
36 "7bdb32be", "b293180a", "24c4ba70", "9b34837d", 36 "7bdb32be", "b293180a", "24c4ba70", "9b34837d",
37 "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143", 37 "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143",
38}; 38};
39 39
40static const char *test_data_8_le[] __initconst = { 40static const char *test_data_8_le[] __initdata = {
41 "b293180a7bdb32be", "9b34837d24c4ba70", 41 "b293180a7bdb32be", "9b34837d24c4ba70",
42 "e9ac0f9cad319ca6", "0cafb1439919d14c", 42 "e9ac0f9cad319ca6", "0cafb1439919d14c",
43}; 43};
@@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize,
48 char test[32 * 3 + 2 + 32 + 1]; 48 char test[32 * 3 + 2 + 32 + 1];
49 char real[32 * 3 + 2 + 32 + 1]; 49 char real[32 * 3 + 2 + 32 + 1];
50 char *p; 50 char *p;
51 const char **result; 51 const char * const *result;
52 size_t l = len; 52 size_t l = len;
53 int gs = groupsize, rs = rowsize; 53 int gs = groupsize, rs = rowsize;
54 unsigned int i; 54 unsigned int i;
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index ab0d30e1e18f..8e376efd88a4 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -260,16 +260,28 @@ static __init const char *test_string_find_match(const struct test_string_2 *s2,
260 return NULL; 260 return NULL;
261} 261}
262 262
263static __init void
264test_string_escape_overflow(const char *in, int p, unsigned int flags, const char *esc,
265 int q_test, const char *name)
266{
267 int q_real;
268
269 q_real = string_escape_mem(in, p, NULL, 0, flags, esc);
270 if (q_real != q_test)
271 pr_warn("Test '%s' failed: flags = %u, osz = 0, expected %d, got %d\n",
272 name, flags, q_test, q_real);
273}
274
263static __init void test_string_escape(const char *name, 275static __init void test_string_escape(const char *name,
264 const struct test_string_2 *s2, 276 const struct test_string_2 *s2,
265 unsigned int flags, const char *esc) 277 unsigned int flags, const char *esc)
266{ 278{
267 int q_real = 512; 279 size_t out_size = 512;
268 char *out_test = kmalloc(q_real, GFP_KERNEL); 280 char *out_test = kmalloc(out_size, GFP_KERNEL);
269 char *out_real = kmalloc(q_real, GFP_KERNEL); 281 char *out_real = kmalloc(out_size, GFP_KERNEL);
270 char *in = kmalloc(256, GFP_KERNEL); 282 char *in = kmalloc(256, GFP_KERNEL);
271 char *buf = out_real;
272 int p = 0, q_test = 0; 283 int p = 0, q_test = 0;
284 int q_real;
273 285
274 if (!out_test || !out_real || !in) 286 if (!out_test || !out_real || !in)
275 goto out; 287 goto out;
@@ -301,29 +313,19 @@ static __init void test_string_escape(const char *name,
301 q_test += len; 313 q_test += len;
302 } 314 }
303 315
304 q_real = string_escape_mem(in, p, &buf, q_real, flags, esc); 316 q_real = string_escape_mem(in, p, out_real, out_size, flags, esc);
305 317
306 test_string_check_buf(name, flags, in, p, out_real, q_real, out_test, 318 test_string_check_buf(name, flags, in, p, out_real, q_real, out_test,
307 q_test); 319 q_test);
320
321 test_string_escape_overflow(in, p, flags, esc, q_test, name);
322
308out: 323out:
309 kfree(in); 324 kfree(in);
310 kfree(out_real); 325 kfree(out_real);
311 kfree(out_test); 326 kfree(out_test);
312} 327}
313 328
314static __init void test_string_escape_nomem(void)
315{
316 char *in = "\eb \\C\007\"\x90\r]";
317 char out[64], *buf = out;
318 int rc = -ENOMEM, ret;
319
320 ret = string_escape_str_any_np(in, &buf, strlen(in), NULL);
321 if (ret == rc)
322 return;
323
324 pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc);
325}
326
327static int __init test_string_helpers_init(void) 329static int __init test_string_helpers_init(void)
328{ 330{
329 unsigned int i; 331 unsigned int i;
@@ -342,8 +344,6 @@ static int __init test_string_helpers_init(void)
342 for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) 344 for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
343 test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); 345 test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1);
344 346
345 test_string_escape_nomem();
346
347 return -EINVAL; 347 return -EINVAL;
348} 348}
349module_init(test_string_helpers_init); 349module_init(test_string_helpers_init);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 1dfeba73fc74..b2957540d3c7 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,6 +38,15 @@ struct test_obj {
38 struct rhash_head node; 38 struct rhash_head node;
39}; 39};
40 40
41static const struct rhashtable_params test_rht_params = {
42 .nelem_hint = TEST_HT_SIZE,
43 .head_offset = offsetof(struct test_obj, node),
44 .key_offset = offsetof(struct test_obj, value),
45 .key_len = sizeof(int),
46 .hashfn = jhash,
47 .nulls_base = (3U << RHT_BASE_SHIFT),
48};
49
41static int __init test_rht_lookup(struct rhashtable *ht) 50static int __init test_rht_lookup(struct rhashtable *ht)
42{ 51{
43 unsigned int i; 52 unsigned int i;
@@ -47,7 +56,7 @@ static int __init test_rht_lookup(struct rhashtable *ht)
47 bool expected = !(i % 2); 56 bool expected = !(i % 2);
48 u32 key = i; 57 u32 key = i;
49 58
50 obj = rhashtable_lookup(ht, &key); 59 obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
51 60
52 if (expected && !obj) { 61 if (expected && !obj) {
53 pr_warn("Test failed: Could not find key %u\n", key); 62 pr_warn("Test failed: Could not find key %u\n", key);
@@ -80,7 +89,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
80 rcu_cnt = cnt = 0; 89 rcu_cnt = cnt = 0;
81 90
82 if (!quiet) 91 if (!quiet)
83 pr_info(" [%#4x/%zu]", i, tbl->size); 92 pr_info(" [%#4x/%u]", i, tbl->size);
84 93
85 rht_for_each_entry_rcu(obj, pos, tbl, i, node) { 94 rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
86 cnt++; 95 cnt++;
@@ -133,7 +142,11 @@ static int __init test_rhashtable(struct rhashtable *ht)
133 obj->ptr = TEST_PTR; 142 obj->ptr = TEST_PTR;
134 obj->value = i * 2; 143 obj->value = i * 2;
135 144
136 rhashtable_insert(ht, &obj->node); 145 err = rhashtable_insert_fast(ht, &obj->node, test_rht_params);
146 if (err) {
147 kfree(obj);
148 goto error;
149 }
137 } 150 }
138 151
139 rcu_read_lock(); 152 rcu_read_lock();
@@ -141,30 +154,6 @@ static int __init test_rhashtable(struct rhashtable *ht)
141 test_rht_lookup(ht); 154 test_rht_lookup(ht);
142 rcu_read_unlock(); 155 rcu_read_unlock();
143 156
144 for (i = 0; i < TEST_NEXPANDS; i++) {
145 pr_info(" Table expansion iteration %u...\n", i);
146 mutex_lock(&ht->mutex);
147 rhashtable_expand(ht);
148 mutex_unlock(&ht->mutex);
149
150 rcu_read_lock();
151 pr_info(" Verifying lookups...\n");
152 test_rht_lookup(ht);
153 rcu_read_unlock();
154 }
155
156 for (i = 0; i < TEST_NEXPANDS; i++) {
157 pr_info(" Table shrinkage iteration %u...\n", i);
158 mutex_lock(&ht->mutex);
159 rhashtable_shrink(ht);
160 mutex_unlock(&ht->mutex);
161
162 rcu_read_lock();
163 pr_info(" Verifying lookups...\n");
164 test_rht_lookup(ht);
165 rcu_read_unlock();
166 }
167
168 rcu_read_lock(); 157 rcu_read_lock();
169 test_bucket_stats(ht, true); 158 test_bucket_stats(ht, true);
170 rcu_read_unlock(); 159 rcu_read_unlock();
@@ -173,10 +162,10 @@ static int __init test_rhashtable(struct rhashtable *ht)
173 for (i = 0; i < TEST_ENTRIES; i++) { 162 for (i = 0; i < TEST_ENTRIES; i++) {
174 u32 key = i * 2; 163 u32 key = i * 2;
175 164
176 obj = rhashtable_lookup(ht, &key); 165 obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
177 BUG_ON(!obj); 166 BUG_ON(!obj);
178 167
179 rhashtable_remove(ht, &obj->node); 168 rhashtable_remove_fast(ht, &obj->node, test_rht_params);
180 kfree(obj); 169 kfree(obj);
181 } 170 }
182 171
@@ -191,24 +180,15 @@ error:
191 return err; 180 return err;
192} 181}
193 182
183static struct rhashtable ht;
184
194static int __init test_rht_init(void) 185static int __init test_rht_init(void)
195{ 186{
196 struct rhashtable ht;
197 struct rhashtable_params params = {
198 .nelem_hint = TEST_HT_SIZE,
199 .head_offset = offsetof(struct test_obj, node),
200 .key_offset = offsetof(struct test_obj, value),
201 .key_len = sizeof(int),
202 .hashfn = jhash,
203 .nulls_base = (3U << RHT_BASE_SHIFT),
204 .grow_decision = rht_grow_above_75,
205 .shrink_decision = rht_shrink_below_30,
206 };
207 int err; 187 int err;
208 188
209 pr_info("Running resizable hashtable tests...\n"); 189 pr_info("Running resizable hashtable tests...\n");
210 190
211 err = rhashtable_init(&ht, &params); 191 err = rhashtable_init(&ht, &test_rht_params);
212 if (err < 0) { 192 if (err < 0) {
213 pr_warn("Test failed: Unable to initialize hashtable: %d\n", 193 pr_warn("Test failed: Unable to initialize hashtable: %d\n",
214 err); 194 err);
@@ -222,6 +202,11 @@ static int __init test_rht_init(void)
222 return err; 202 return err;
223} 203}
224 204
205static void __exit test_rht_exit(void)
206{
207}
208
225module_init(test_rht_init); 209module_init(test_rht_init);
210module_exit(test_rht_exit);
226 211
227MODULE_LICENSE("GPL v2"); 212MODULE_LICENSE("GPL v2");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index b235c96167d3..da39c608a28c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <stdarg.h> 19#include <stdarg.h>
20#include <linux/clk-provider.h>
20#include <linux/module.h> /* for KSYM_SYMBOL_LEN */ 21#include <linux/module.h> /* for KSYM_SYMBOL_LEN */
21#include <linux/types.h> 22#include <linux/types.h>
22#include <linux/string.h> 23#include <linux/string.h>
@@ -32,6 +33,7 @@
32 33
33#include <asm/page.h> /* for PAGE_SIZE */ 34#include <asm/page.h> /* for PAGE_SIZE */
34#include <asm/sections.h> /* for dereference_function_descriptor() */ 35#include <asm/sections.h> /* for dereference_function_descriptor() */
36#include <asm/byteorder.h> /* cpu_to_le16 */
35 37
36#include <linux/string_helpers.h> 38#include <linux/string_helpers.h>
37#include "kstrtox.h" 39#include "kstrtox.h"
@@ -121,142 +123,145 @@ int skip_atoi(const char **s)
121 return i; 123 return i;
122} 124}
123 125
124/* Decimal conversion is by far the most typical, and is used 126/*
125 * for /proc and /sys data. This directly impacts e.g. top performance 127 * Decimal conversion is by far the most typical, and is used for
126 * with many processes running. We optimize it for speed 128 * /proc and /sys data. This directly impacts e.g. top performance
127 * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html> 129 * with many processes running. We optimize it for speed by emitting
128 * (with permission from the author, Douglas W. Jones). 130 * two characters at a time, using a 200 byte lookup table. This
131 * roughly halves the number of multiplications compared to computing
132 * the digits one at a time. Implementation strongly inspired by the
133 * previous version, which in turn used ideas described at
134 * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission
135 * from the author, Douglas W. Jones).
136 *
137 * It turns out there is precisely one 26 bit fixed-point
138 * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32
139 * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual
140 * range happens to be somewhat larger (x <= 1073741898), but that's
141 * irrelevant for our purpose.
142 *
143 * For dividing a number in the range [10^4, 10^6-1] by 100, we still
144 * need a 32x32->64 bit multiply, so we simply use the same constant.
145 *
146 * For dividing a number in the range [100, 10^4-1] by 100, there are
147 * several options. The simplest is (x * 0x147b) >> 19, which is valid
148 * for all x <= 43698.
129 */ 149 */
130 150
131#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 151static const u16 decpair[100] = {
132/* Formats correctly any integer in [0, 999999999] */ 152#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030)
153 _( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9),
154 _(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19),
155 _(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29),
156 _(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39),
157 _(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49),
158 _(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59),
159 _(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69),
160 _(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79),
161 _(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89),
162 _(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99),
163#undef _
164};
165
166/*
167 * This will print a single '0' even if r == 0, since we would
168 * immediately jump to out_r where two 0s would be written but only
169 * one of them accounted for in buf. This is needed by ip4_string
170 * below. All other callers pass a non-zero value of r.
171*/
133static noinline_for_stack 172static noinline_for_stack
134char *put_dec_full9(char *buf, unsigned q) 173char *put_dec_trunc8(char *buf, unsigned r)
135{ 174{
136 unsigned r; 175 unsigned q;
137 176
138 /* 177 /* 1 <= r < 10^8 */
139 * Possible ways to approx. divide by 10 178 if (r < 100)
140 * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) 179 goto out_r;
141 * (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul) 180
142 * (x * 0x6667) >> 18 x < 43699 181 /* 100 <= r < 10^8 */
143 * (x * 0x3334) >> 17 x < 16389 182 q = (r * (u64)0x28f5c29) >> 32;
144 * (x * 0x199a) >> 16 x < 16389 183 *((u16 *)buf) = decpair[r - 100*q];
145 * (x * 0x0ccd) >> 15 x < 16389 184 buf += 2;
146 * (x * 0x0667) >> 14 x < 2739 185
147 * (x * 0x0334) >> 13 x < 1029 186 /* 1 <= q < 10^6 */
148 * (x * 0x019a) >> 12 x < 1029 187 if (q < 100)
149 * (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386) 188 goto out_q;
150 * (x * 0x0067) >> 10 x < 179 189
151 * (x * 0x0034) >> 9 x < 69 same 190 /* 100 <= q < 10^6 */
152 * (x * 0x001a) >> 8 x < 69 same 191 r = (q * (u64)0x28f5c29) >> 32;
153 * (x * 0x000d) >> 7 x < 69 same, shortest code (on i386) 192 *((u16 *)buf) = decpair[q - 100*r];
154 * (x * 0x0007) >> 6 x < 19 193 buf += 2;
155 * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html> 194
156 */ 195 /* 1 <= r < 10^4 */
157 r = (q * (uint64_t)0x1999999a) >> 32; 196 if (r < 100)
158 *buf++ = (q - 10 * r) + '0'; /* 1 */ 197 goto out_r;
159 q = (r * (uint64_t)0x1999999a) >> 32; 198
160 *buf++ = (r - 10 * q) + '0'; /* 2 */ 199 /* 100 <= r < 10^4 */
161 r = (q * (uint64_t)0x1999999a) >> 32; 200 q = (r * 0x147b) >> 19;
162 *buf++ = (q - 10 * r) + '0'; /* 3 */ 201 *((u16 *)buf) = decpair[r - 100*q];
163 q = (r * (uint64_t)0x1999999a) >> 32; 202 buf += 2;
164 *buf++ = (r - 10 * q) + '0'; /* 4 */ 203out_q:
165 r = (q * (uint64_t)0x1999999a) >> 32; 204 /* 1 <= q < 100 */
166 *buf++ = (q - 10 * r) + '0'; /* 5 */ 205 r = q;
167 /* Now value is under 10000, can avoid 64-bit multiply */ 206out_r:
168 q = (r * 0x199a) >> 16; 207 /* 1 <= r < 100 */
169 *buf++ = (r - 10 * q) + '0'; /* 6 */ 208 *((u16 *)buf) = decpair[r];
170 r = (q * 0xcd) >> 11; 209 buf += r < 10 ? 1 : 2;
171 *buf++ = (q - 10 * r) + '0'; /* 7 */
172 q = (r * 0xcd) >> 11;
173 *buf++ = (r - 10 * q) + '0'; /* 8 */
174 *buf++ = q + '0'; /* 9 */
175 return buf; 210 return buf;
176} 211}
177#endif
178 212
179/* Similar to above but do not pad with zeros. 213#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64
180 * Code can be easily arranged to print 9 digits too, but our callers
181 * always call put_dec_full9() instead when the number has 9 decimal digits.
182 */
183static noinline_for_stack 214static noinline_for_stack
184char *put_dec_trunc8(char *buf, unsigned r) 215char *put_dec_full8(char *buf, unsigned r)
185{ 216{
186 unsigned q; 217 unsigned q;
187 218
188 /* Copy of previous function's body with added early returns */ 219 /* 0 <= r < 10^8 */
189 while (r >= 10000) { 220 q = (r * (u64)0x28f5c29) >> 32;
190 q = r + '0'; 221 *((u16 *)buf) = decpair[r - 100*q];
191 r = (r * (uint64_t)0x1999999a) >> 32; 222 buf += 2;
192 *buf++ = q - 10*r;
193 }
194
195 q = (r * 0x199a) >> 16; /* r <= 9999 */
196 *buf++ = (r - 10 * q) + '0';
197 if (q == 0)
198 return buf;
199 r = (q * 0xcd) >> 11; /* q <= 999 */
200 *buf++ = (q - 10 * r) + '0';
201 if (r == 0)
202 return buf;
203 q = (r * 0xcd) >> 11; /* r <= 99 */
204 *buf++ = (r - 10 * q) + '0';
205 if (q == 0)
206 return buf;
207 *buf++ = q + '0'; /* q <= 9 */
208 return buf;
209}
210 223
211/* There are two algorithms to print larger numbers. 224 /* 0 <= q < 10^6 */
212 * One is generic: divide by 1000000000 and repeatedly print 225 r = (q * (u64)0x28f5c29) >> 32;
213 * groups of (up to) 9 digits. It's conceptually simple, 226 *((u16 *)buf) = decpair[q - 100*r];
214 * but requires a (unsigned long long) / 1000000000 division. 227 buf += 2;
215 *
216 * Second algorithm splits 64-bit unsigned long long into 16-bit chunks,
217 * manipulates them cleverly and generates groups of 4 decimal digits.
218 * It so happens that it does NOT require long long division.
219 *
220 * If long is > 32 bits, division of 64-bit values is relatively easy,
221 * and we will use the first algorithm.
222 * If long long is > 64 bits (strange architecture with VERY large long long),
223 * second algorithm can't be used, and we again use the first one.
224 *
225 * Else (if long is 32 bits and long long is 64 bits) we use second one.
226 */
227 228
228#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 229 /* 0 <= r < 10^4 */
230 q = (r * 0x147b) >> 19;
231 *((u16 *)buf) = decpair[r - 100*q];
232 buf += 2;
229 233
230/* First algorithm: generic */ 234 /* 0 <= q < 100 */
235 *((u16 *)buf) = decpair[q];
236 buf += 2;
237 return buf;
238}
231 239
232static 240static noinline_for_stack
233char *put_dec(char *buf, unsigned long long n) 241char *put_dec(char *buf, unsigned long long n)
234{ 242{
235 if (n >= 100*1000*1000) { 243 if (n >= 100*1000*1000)
236 while (n >= 1000*1000*1000) 244 buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
237 buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); 245 /* 1 <= n <= 1.6e11 */
238 if (n >= 100*1000*1000) 246 if (n >= 100*1000*1000)
239 return put_dec_full9(buf, n); 247 buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
240 } 248 /* 1 <= n < 1e8 */
241 return put_dec_trunc8(buf, n); 249 return put_dec_trunc8(buf, n);
242} 250}
243 251
244#else 252#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64
245
246/* Second algorithm: valid only for 64-bit long longs */
247 253
248/* See comment in put_dec_full9 for choice of constants */ 254static void
249static noinline_for_stack 255put_dec_full4(char *buf, unsigned r)
250void put_dec_full4(char *buf, unsigned q)
251{ 256{
252 unsigned r; 257 unsigned q;
253 r = (q * 0xccd) >> 15; 258
254 buf[0] = (q - 10 * r) + '0'; 259 /* 0 <= r < 10^4 */
255 q = (r * 0xcd) >> 11; 260 q = (r * 0x147b) >> 19;
256 buf[1] = (r - 10 * q) + '0'; 261 *((u16 *)buf) = decpair[r - 100*q];
257 r = (q * 0xcd) >> 11; 262 buf += 2;
258 buf[2] = (q - 10 * r) + '0'; 263 /* 0 <= q < 100 */
259 buf[3] = r + '0'; 264 *((u16 *)buf) = decpair[q];
260} 265}
261 266
262/* 267/*
@@ -264,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q)
264 * The approximation x/10000 == (x * 0x346DC5D7) >> 43 269 * The approximation x/10000 == (x * 0x346DC5D7) >> 43
265 * holds for all x < 1,128,869,999. The largest value this 270 * holds for all x < 1,128,869,999. The largest value this
266 * helper will ever be asked to convert is 1,125,520,955. 271 * helper will ever be asked to convert is 1,125,520,955.
267 * (d1 in the put_dec code, assuming n is all-ones). 272 * (second call in the put_dec code, assuming n is all-ones).
268 */ 273 */
269static 274static noinline_for_stack
270unsigned put_dec_helper4(char *buf, unsigned x) 275unsigned put_dec_helper4(char *buf, unsigned x)
271{ 276{
272 uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; 277 uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43;
@@ -293,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n)
293 d2 = (h ) & 0xffff; 298 d2 = (h ) & 0xffff;
294 d3 = (h >> 16); /* implicit "& 0xffff" */ 299 d3 = (h >> 16); /* implicit "& 0xffff" */
295 300
301 /* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
302 = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
296 q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); 303 q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
297 q = put_dec_helper4(buf, q); 304 q = put_dec_helper4(buf, q);
298 305
@@ -322,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n)
322 */ 329 */
323int num_to_str(char *buf, int size, unsigned long long num) 330int num_to_str(char *buf, int size, unsigned long long num)
324{ 331{
325 char tmp[sizeof(num) * 3]; 332 /* put_dec requires 2-byte alignment of the buffer. */
333 char tmp[sizeof(num) * 3] __aligned(2);
326 int idx, len; 334 int idx, len;
327 335
328 /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ 336 /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
@@ -340,11 +348,11 @@ int num_to_str(char *buf, int size, unsigned long long num)
340 return len; 348 return len;
341} 349}
342 350
343#define ZEROPAD 1 /* pad with zero */ 351#define SIGN 1 /* unsigned/signed, must be 1 */
344#define SIGN 2 /* unsigned/signed long */ 352#define LEFT 2 /* left justified */
345#define PLUS 4 /* show plus */ 353#define PLUS 4 /* show plus */
346#define SPACE 8 /* space if plus */ 354#define SPACE 8 /* space if plus */
347#define LEFT 16 /* left justified */ 355#define ZEROPAD 16 /* pad with zero, must be 16 == '0' - ' ' */
348#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */ 356#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */
349#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */ 357#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */
350 358
@@ -383,10 +391,8 @@ static noinline_for_stack
383char *number(char *buf, char *end, unsigned long long num, 391char *number(char *buf, char *end, unsigned long long num,
384 struct printf_spec spec) 392 struct printf_spec spec)
385{ 393{
386 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ 394 /* put_dec requires 2-byte alignment of the buffer. */
387 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ 395 char tmp[3 * sizeof(num)] __aligned(2);
388
389 char tmp[66];
390 char sign; 396 char sign;
391 char locase; 397 char locase;
392 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); 398 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
@@ -422,12 +428,7 @@ char *number(char *buf, char *end, unsigned long long num,
422 /* generate full string in tmp[], in reverse order */ 428 /* generate full string in tmp[], in reverse order */
423 i = 0; 429 i = 0;
424 if (num < spec.base) 430 if (num < spec.base)
425 tmp[i++] = digits[num] | locase; 431 tmp[i++] = hex_asc_upper[num] | locase;
426 /* Generic code, for any base:
427 else do {
428 tmp[i++] = (digits[do_div(num,base)] | locase);
429 } while (num != 0);
430 */
431 else if (spec.base != 10) { /* 8 or 16 */ 432 else if (spec.base != 10) { /* 8 or 16 */
432 int mask = spec.base - 1; 433 int mask = spec.base - 1;
433 int shift = 3; 434 int shift = 3;
@@ -435,7 +436,7 @@ char *number(char *buf, char *end, unsigned long long num,
435 if (spec.base == 16) 436 if (spec.base == 16)
436 shift = 4; 437 shift = 4;
437 do { 438 do {
438 tmp[i++] = (digits[((unsigned char)num) & mask] | locase); 439 tmp[i++] = (hex_asc_upper[((unsigned char)num) & mask] | locase);
439 num >>= shift; 440 num >>= shift;
440 } while (num); 441 } while (num);
441 } else { /* base 10 */ 442 } else { /* base 10 */
@@ -447,7 +448,7 @@ char *number(char *buf, char *end, unsigned long long num,
447 spec.precision = i; 448 spec.precision = i;
448 /* leading space padding */ 449 /* leading space padding */
449 spec.field_width -= spec.precision; 450 spec.field_width -= spec.precision;
450 if (!(spec.flags & (ZEROPAD+LEFT))) { 451 if (!(spec.flags & (ZEROPAD | LEFT))) {
451 while (--spec.field_width >= 0) { 452 while (--spec.field_width >= 0) {
452 if (buf < end) 453 if (buf < end)
453 *buf = ' '; 454 *buf = ' ';
@@ -475,7 +476,8 @@ char *number(char *buf, char *end, unsigned long long num,
475 } 476 }
476 /* zero or space padding */ 477 /* zero or space padding */
477 if (!(spec.flags & LEFT)) { 478 if (!(spec.flags & LEFT)) {
478 char c = (spec.flags & ZEROPAD) ? '0' : ' '; 479 char c = ' ' + (spec.flags & ZEROPAD);
480 BUILD_BUG_ON(' ' + ZEROPAD != '0');
479 while (--spec.field_width >= 0) { 481 while (--spec.field_width >= 0) {
480 if (buf < end) 482 if (buf < end)
481 *buf = c; 483 *buf = c;
@@ -783,11 +785,19 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
783 if (spec.field_width > 0) 785 if (spec.field_width > 0)
784 len = min_t(int, spec.field_width, 64); 786 len = min_t(int, spec.field_width, 64);
785 787
786 for (i = 0; i < len && buf < end - 1; i++) { 788 for (i = 0; i < len; ++i) {
787 buf = hex_byte_pack(buf, addr[i]); 789 if (buf < end)
790 *buf = hex_asc_hi(addr[i]);
791 ++buf;
792 if (buf < end)
793 *buf = hex_asc_lo(addr[i]);
794 ++buf;
788 795
789 if (buf < end && separator && i != len - 1) 796 if (separator && i != len - 1) {
790 *buf++ = separator; 797 if (buf < end)
798 *buf = separator;
799 ++buf;
800 }
791 } 801 }
792 802
793 return buf; 803 return buf;
@@ -942,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)
942 break; 952 break;
943 } 953 }
944 for (i = 0; i < 4; i++) { 954 for (i = 0; i < 4; i++) {
945 char temp[3]; /* hold each IP quad in reverse order */ 955 char temp[4] __aligned(2); /* hold each IP quad in reverse order */
946 int digits = put_dec_trunc8(temp, addr[index]) - temp; 956 int digits = put_dec_trunc8(temp, addr[index]) - temp;
947 if (leading_zeros) { 957 if (leading_zeros) {
948 if (digits < 3) 958 if (digits < 3)
@@ -1233,8 +1243,12 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
1233 1243
1234 len = spec.field_width < 0 ? 1 : spec.field_width; 1244 len = spec.field_width < 0 ? 1 : spec.field_width;
1235 1245
1236 /* Ignore the error. We print as many characters as we can */ 1246 /*
1237 string_escape_mem(addr, len, &buf, end - buf, flags, NULL); 1247 * string_escape_mem() writes as many characters as it can to
1248 * the given buffer, and returns the total size of the output
1249 * had the buffer been big enough.
1250 */
1251 buf += string_escape_mem(addr, len, buf, buf < end ? end - buf : 0, flags, NULL);
1238 1252
1239 return buf; 1253 return buf;
1240} 1254}
@@ -1322,6 +1336,30 @@ char *address_val(char *buf, char *end, const void *addr,
1322 return number(buf, end, num, spec); 1336 return number(buf, end, num, spec);
1323} 1337}
1324 1338
1339static noinline_for_stack
1340char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
1341 const char *fmt)
1342{
1343 if (!IS_ENABLED(CONFIG_HAVE_CLK) || !clk)
1344 return string(buf, end, NULL, spec);
1345
1346 switch (fmt[1]) {
1347 case 'r':
1348 return number(buf, end, clk_get_rate(clk), spec);
1349
1350 case 'n':
1351 default:
1352#ifdef CONFIG_COMMON_CLK
1353 return string(buf, end, __clk_get_name(clk), spec);
1354#else
1355 spec.base = 16;
1356 spec.field_width = sizeof(unsigned long) * 2 + 2;
1357 spec.flags |= SPECIAL | SMALL | ZEROPAD;
1358 return number(buf, end, (unsigned long)clk, spec);
1359#endif
1360 }
1361}
1362
1325int kptr_restrict __read_mostly; 1363int kptr_restrict __read_mostly;
1326 1364
1327/* 1365/*
@@ -1404,6 +1442,11 @@ int kptr_restrict __read_mostly;
1404 * (default assumed to be phys_addr_t, passed by reference) 1442 * (default assumed to be phys_addr_t, passed by reference)
1405 * - 'd[234]' For a dentry name (optionally 2-4 last components) 1443 * - 'd[234]' For a dentry name (optionally 2-4 last components)
1406 * - 'D[234]' Same as 'd' but for a struct file 1444 * - 'D[234]' Same as 'd' but for a struct file
1445 * - 'C' For a clock, it prints the name (Common Clock Framework) or address
1446 * (legacy clock framework) of the clock
1447 * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address
1448 * (legacy clock framework) of the clock
1449 * - 'Cr' For a clock, it prints the current rate of the clock
1407 * 1450 *
1408 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 1451 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
1409 * function pointers are really function descriptors, which contain a 1452 * function pointers are really function descriptors, which contain a
@@ -1548,6 +1591,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
1548 return address_val(buf, end, ptr, spec, fmt); 1591 return address_val(buf, end, ptr, spec, fmt);
1549 case 'd': 1592 case 'd':
1550 return dentry_name(buf, end, ptr, spec, fmt); 1593 return dentry_name(buf, end, ptr, spec, fmt);
1594 case 'C':
1595 return clock(buf, end, ptr, spec, fmt);
1551 case 'D': 1596 case 'D':
1552 return dentry_name(buf, end, 1597 return dentry_name(buf, end,
1553 ((const struct file *)ptr)->f_path.dentry, 1598 ((const struct file *)ptr)->f_path.dentry,
@@ -1738,29 +1783,21 @@ qualifier:
1738 if (spec->qualifier == 'L') 1783 if (spec->qualifier == 'L')
1739 spec->type = FORMAT_TYPE_LONG_LONG; 1784 spec->type = FORMAT_TYPE_LONG_LONG;
1740 else if (spec->qualifier == 'l') { 1785 else if (spec->qualifier == 'l') {
1741 if (spec->flags & SIGN) 1786 BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG);
1742 spec->type = FORMAT_TYPE_LONG; 1787 spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN);
1743 else
1744 spec->type = FORMAT_TYPE_ULONG;
1745 } else if (_tolower(spec->qualifier) == 'z') { 1788 } else if (_tolower(spec->qualifier) == 'z') {
1746 spec->type = FORMAT_TYPE_SIZE_T; 1789 spec->type = FORMAT_TYPE_SIZE_T;
1747 } else if (spec->qualifier == 't') { 1790 } else if (spec->qualifier == 't') {
1748 spec->type = FORMAT_TYPE_PTRDIFF; 1791 spec->type = FORMAT_TYPE_PTRDIFF;
1749 } else if (spec->qualifier == 'H') { 1792 } else if (spec->qualifier == 'H') {
1750 if (spec->flags & SIGN) 1793 BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE);
1751 spec->type = FORMAT_TYPE_BYTE; 1794 spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN);
1752 else
1753 spec->type = FORMAT_TYPE_UBYTE;
1754 } else if (spec->qualifier == 'h') { 1795 } else if (spec->qualifier == 'h') {
1755 if (spec->flags & SIGN) 1796 BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT);
1756 spec->type = FORMAT_TYPE_SHORT; 1797 spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN);
1757 else
1758 spec->type = FORMAT_TYPE_USHORT;
1759 } else { 1798 } else {
1760 if (spec->flags & SIGN) 1799 BUILD_BUG_ON(FORMAT_TYPE_UINT + SIGN != FORMAT_TYPE_INT);
1761 spec->type = FORMAT_TYPE_INT; 1800 spec->type = FORMAT_TYPE_UINT + (spec->flags & SIGN);
1762 else
1763 spec->type = FORMAT_TYPE_UINT;
1764 } 1801 }
1765 1802
1766 return ++fmt - start; 1803 return ++fmt - start;
@@ -1800,6 +1837,11 @@ qualifier:
1800 * %*pE[achnops] print an escaped buffer 1837 * %*pE[achnops] print an escaped buffer
1801 * %*ph[CDN] a variable-length hex string with a separator (supports up to 64 1838 * %*ph[CDN] a variable-length hex string with a separator (supports up to 64
1802 * bytes of the input) 1839 * bytes of the input)
1840 * %pC output the name (Common Clock Framework) or address (legacy clock
1841 * framework) of a clock
1842 * %pCn output the name (Common Clock Framework) or address (legacy clock
1843 * framework) of a clock
1844 * %pCr output the current rate of a clock
1803 * %n is ignored 1845 * %n is ignored
1804 * 1846 *
1805 * ** Please update Documentation/printk-formats.txt when making changes ** 1847 * ** Please update Documentation/printk-formats.txt when making changes **