aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2016-11-04 14:16:38 -0400
committerMark Brown <broonie@kernel.org>2016-11-04 14:16:38 -0400
commitcc9b94029e9ef51787af908e9856b1eed314bc00 (patch)
tree9675310b89d0f6fb1f7bd9423f0638c4ee5226fd /lib
parent13bed58ce8748d430a26e353a09b89f9d613a71f (diff)
parent1b5b42216469b05ef4b5916cb40b127dfab1da88 (diff)
Merge branch 'topic/error' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator into regulator-fixed
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig6
-rw-r--r--lib/Kconfig.debug81
-rw-r--r--lib/Kconfig.kasan4
-rw-r--r--lib/Kconfig.ubsan11
-rw-r--r--lib/Makefile14
-rw-r--r--lib/atomic64.c32
-rw-r--r--lib/atomic64_test.c38
-rw-r--r--lib/bitmap.c52
-rw-r--r--lib/chacha20.c79
-rw-r--r--lib/cpu-notifier-error-inject.c46
-rw-r--r--lib/crc32.c16
-rw-r--r--lib/digsig.c16
-rw-r--r--lib/dma-debug.c90
-rw-r--r--lib/dma-noop.c9
-rw-r--r--lib/dynamic_debug.c7
-rw-r--r--lib/earlycpio.c5
-rw-r--r--lib/hweight.c4
-rw-r--r--lib/iommu-helper.c3
-rw-r--r--lib/iov_iter.c489
-rw-r--r--lib/irq_poll.c28
-rw-r--r--lib/kstrtox.c6
-rw-r--r--lib/mpi/mpicoder.c247
-rw-r--r--lib/nmi_backtrace.c42
-rw-r--r--lib/percpu-refcount.c169
-rw-r--r--lib/radix-tree.c120
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile8
-rw-r--r--lib/raid6/algos.c18
-rw-r--r--lib/raid6/avx512.c569
-rw-r--r--lib/raid6/recov_avx512.c388
-rw-r--r--lib/raid6/recov_s390xc.c116
-rw-r--r--lib/raid6/s390vx.uc168
-rw-r--r--lib/raid6/test/Makefile5
-rw-r--r--lib/raid6/test/test.c7
-rw-r--r--lib/raid6/x86.h10
-rw-r--r--lib/random32.c7
-rw-r--r--lib/ratelimit.c10
-rw-r--r--lib/rbtree.c26
-rw-r--r--lib/rhashtable.c320
-rw-r--r--lib/sbitmap.c347
-rw-r--r--lib/stackdepot.c1
-rw-r--r--lib/strncpy_from_user.c10
-rw-r--r--lib/strnlen_user.c7
-rw-r--r--lib/swiotlb.c13
-rw-r--r--lib/syscall.c15
-rw-r--r--lib/test_bpf.c1
-rw-r--r--lib/test_hash.c30
-rw-r--r--lib/test_rhashtable.c2
-rw-r--r--lib/test_uuid.c133
-rw-r--r--lib/ubsan.c2
-rw-r--r--lib/ucs2_string.c2
-rw-r--r--lib/usercopy.c9
-rw-r--r--lib/uuid.c4
-rw-r--r--lib/win_minmax.c98
54 files changed, 3320 insertions, 621 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index d79909dc01ec..260a80e313b9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -457,9 +457,6 @@ config NLATTR
457config GENERIC_ATOMIC64 457config GENERIC_ATOMIC64
458 bool 458 bool
459 459
460config ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
461 def_bool y if GENERIC_ATOMIC64
462
463config LRU_CACHE 460config LRU_CACHE
464 tristate 461 tristate
465 462
@@ -550,4 +547,7 @@ config STACKDEPOT
550 bool 547 bool
551 select STACKTRACE 548 select STACKTRACE
552 549
550config SBITMAP
551 bool
552
553endmenu 553endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 77d7d034bac3..33bc56cf60d7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -244,6 +244,7 @@ config PAGE_OWNER
244 depends on DEBUG_KERNEL && STACKTRACE_SUPPORT 244 depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
245 select DEBUG_FS 245 select DEBUG_FS
246 select STACKTRACE 246 select STACKTRACE
247 select STACKDEPOT
247 select PAGE_EXTENSION 248 select PAGE_EXTENSION
248 help 249 help
249 This keeps track of what call chain is the owner of a page, may 250 This keeps track of what call chain is the owner of a page, may
@@ -304,7 +305,7 @@ config DEBUG_SECTION_MISMATCH
304 a larger kernel). 305 a larger kernel).
305 - Run the section mismatch analysis for each module/built-in.o file. 306 - Run the section mismatch analysis for each module/built-in.o file.
306 When we run the section mismatch analysis on vmlinux.o, we 307 When we run the section mismatch analysis on vmlinux.o, we
307 lose valueble information about where the mismatch was 308 lose valuable information about where the mismatch was
308 introduced. 309 introduced.
309 Running the analysis for each module/built-in.o file 310 Running the analysis for each module/built-in.o file
310 tells where the mismatch happens much closer to the 311 tells where the mismatch happens much closer to the
@@ -708,6 +709,8 @@ config KCOV
708 bool "Code coverage for fuzzing" 709 bool "Code coverage for fuzzing"
709 depends on ARCH_HAS_KCOV 710 depends on ARCH_HAS_KCOV
710 select DEBUG_FS 711 select DEBUG_FS
712 select GCC_PLUGINS if !COMPILE_TEST
713 select GCC_PLUGIN_SANCOV if !COMPILE_TEST
711 help 714 help
712 KCOV exposes kernel code coverage information in a form suitable 715 KCOV exposes kernel code coverage information in a form suitable
713 for coverage-guided fuzzing (randomized testing). 716 for coverage-guided fuzzing (randomized testing).
@@ -718,6 +721,17 @@ config KCOV
718 721
719 For more details, see Documentation/kcov.txt. 722 For more details, see Documentation/kcov.txt.
720 723
724config KCOV_INSTRUMENT_ALL
725 bool "Instrument all code by default"
726 depends on KCOV
727 default y if KCOV
728 help
729 If you are doing generic system call fuzzing (like e.g. syzkaller),
730 then you will want to instrument the whole kernel and you should
731 say y here. If you are doing more targeted fuzzing (like e.g.
732 filesystem fuzzing with AFL) then you will want to enable coverage
733 for more specific subsets of files, and should say n here.
734
721config DEBUG_SHIRQ 735config DEBUG_SHIRQ
722 bool "Debug shared IRQ handlers" 736 bool "Debug shared IRQ handlers"
723 depends on DEBUG_KERNEL 737 depends on DEBUG_KERNEL
@@ -807,7 +821,7 @@ config DETECT_HUNG_TASK
807 help 821 help
808 Say Y here to enable the kernel to detect "hung tasks", 822 Say Y here to enable the kernel to detect "hung tasks",
809 which are bugs that cause the task to be stuck in 823 which are bugs that cause the task to be stuck in
810 uninterruptible "D" state indefinitiley. 824 uninterruptible "D" state indefinitely.
811 825
812 When a hung task is detected, the kernel will print the 826 When a hung task is detected, the kernel will print the
813 current stack trace (which you should report), but the 827 current stack trace (which you should report), but the
@@ -1307,22 +1321,6 @@ config RCU_PERF_TEST
1307 Say M if you want the RCU performance tests to build as a module. 1321 Say M if you want the RCU performance tests to build as a module.
1308 Say N if you are unsure. 1322 Say N if you are unsure.
1309 1323
1310config RCU_PERF_TEST_RUNNABLE
1311 bool "performance tests for RCU runnable by default"
1312 depends on RCU_PERF_TEST = y
1313 default n
1314 help
1315 This option provides a way to build the RCU performance tests
1316 directly into the kernel without them starting up at boot time.
1317 You can use /sys/module to manually override this setting.
1318 This /proc file is available only when the RCU performance
1319 tests have been built into the kernel.
1320
1321 Say Y here if you want the RCU performance tests to start during
1322 boot (you probably don't).
1323 Say N here if you want the RCU performance tests to start only
1324 after being manually enabled via /sys/module.
1325
1326config RCU_TORTURE_TEST 1324config RCU_TORTURE_TEST
1327 tristate "torture tests for RCU" 1325 tristate "torture tests for RCU"
1328 depends on DEBUG_KERNEL 1326 depends on DEBUG_KERNEL
@@ -1340,23 +1338,6 @@ config RCU_TORTURE_TEST
1340 Say M if you want the RCU torture tests to build as a module. 1338 Say M if you want the RCU torture tests to build as a module.
1341 Say N if you are unsure. 1339 Say N if you are unsure.
1342 1340
1343config RCU_TORTURE_TEST_RUNNABLE
1344 bool "torture tests for RCU runnable by default"
1345 depends on RCU_TORTURE_TEST = y
1346 default n
1347 help
1348 This option provides a way to build the RCU torture tests
1349 directly into the kernel without them starting up at boot
1350 time. You can use /proc/sys/kernel/rcutorture_runnable
1351 to manually override this setting. This /proc file is
1352 available only when the RCU torture tests have been built
1353 into the kernel.
1354
1355 Say Y here if you want the RCU torture tests to start during
1356 boot (you probably don't).
1357 Say N here if you want the RCU torture tests to start only
1358 after being manually enabled via /proc.
1359
1360config RCU_TORTURE_TEST_SLOW_PREINIT 1341config RCU_TORTURE_TEST_SLOW_PREINIT
1361 bool "Slow down RCU grace-period pre-initialization to expose races" 1342 bool "Slow down RCU grace-period pre-initialization to expose races"
1362 depends on RCU_TORTURE_TEST 1343 depends on RCU_TORTURE_TEST
@@ -1705,24 +1686,6 @@ config LATENCYTOP
1705 Enable this option if you want to use the LatencyTOP tool 1686 Enable this option if you want to use the LatencyTOP tool
1706 to find out which userspace is blocking on what kernel operations. 1687 to find out which userspace is blocking on what kernel operations.
1707 1688
1708config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
1709 bool
1710
1711config DEBUG_STRICT_USER_COPY_CHECKS
1712 bool "Strict user copy size checks"
1713 depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
1714 depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
1715 help
1716 Enabling this option turns a certain set of sanity checks for user
1717 copy operations into compile time failures.
1718
1719 The copy_from_user() etc checks are there to help test if there
1720 are sufficient security checks on the length argument of
1721 the copy operation, by having gcc prove that the argument is
1722 within bounds.
1723
1724 If unsure, say N.
1725
1726source kernel/trace/Kconfig 1689source kernel/trace/Kconfig
1727 1690
1728menu "Runtime Testing" 1691menu "Runtime Testing"
@@ -1841,6 +1804,9 @@ config TEST_BITMAP
1841 1804
1842 If unsure, say N. 1805 If unsure, say N.
1843 1806
1807config TEST_UUID
1808 tristate "Test functions located in the uuid module at runtime"
1809
1844config TEST_RHASHTABLE 1810config TEST_RHASHTABLE
1845 tristate "Perform selftest on resizable hash table" 1811 tristate "Perform selftest on resizable hash table"
1846 default n 1812 default n
@@ -1891,15 +1857,6 @@ config PROVIDE_OHCI1394_DMA_INIT
1891 1857
1892 See Documentation/debugging-via-ohci1394.txt for more information. 1858 See Documentation/debugging-via-ohci1394.txt for more information.
1893 1859
1894config BUILD_DOCSRC
1895 bool "Build targets in Documentation/ tree"
1896 depends on HEADERS_CHECK
1897 help
1898 This option attempts to build objects from the source files in the
1899 kernel Documentation/ tree.
1900
1901 Say N if you are unsure.
1902
1903config DMA_API_DEBUG 1860config DMA_API_DEBUG
1904 bool "Enable debugging of DMA-API usage" 1861 bool "Enable debugging of DMA-API usage"
1905 depends on HAVE_DMA_API_DEBUG 1862 depends on HAVE_DMA_API_DEBUG
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 67d8c6838ba9..bd38aab05929 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,9 +5,9 @@ if HAVE_ARCH_KASAN
5 5
6config KASAN 6config KASAN
7 bool "KASan: runtime memory debugger" 7 bool "KASan: runtime memory debugger"
8 depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB) 8 depends on SLUB || (SLAB && !DEBUG_SLAB)
9 select CONSTRUCTORS 9 select CONSTRUCTORS
10 select STACKDEPOT if SLAB 10 select STACKDEPOT
11 help 11 help
12 Enables kernel address sanitizer - runtime memory debugger, 12 Enables kernel address sanitizer - runtime memory debugger,
13 designed to find out-of-bounds accesses and use-after-free bugs. 13 designed to find out-of-bounds accesses and use-after-free bugs.
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 39494af9a84a..bc6e651df68c 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,6 +1,9 @@
1config ARCH_HAS_UBSAN_SANITIZE_ALL 1config ARCH_HAS_UBSAN_SANITIZE_ALL
2 bool 2 bool
3 3
4config ARCH_WANTS_UBSAN_NO_NULL
5 def_bool n
6
4config UBSAN 7config UBSAN
5 bool "Undefined behaviour sanity checker" 8 bool "Undefined behaviour sanity checker"
6 help 9 help
@@ -34,3 +37,11 @@ config UBSAN_ALIGNMENT
34 This option enables detection of unaligned memory accesses. 37 This option enables detection of unaligned memory accesses.
35 Enabling this option on architectures that support unaligned 38 Enabling this option on architectures that support unaligned
36 accesses may produce a lot of false positives. 39 accesses may produce a lot of false positives.
40
41config UBSAN_NULL
42 bool "Enable checking of null pointers"
43 depends on UBSAN
44 default y if !ARCH_WANTS_UBSAN_NO_NULL
45 help
46 This option enables detection of memory accesses via a
47 null pointer.
diff --git a/lib/Makefile b/lib/Makefile
index 499fb354d627..50144a3aeebd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,19 +15,15 @@ KCOV_INSTRUMENT_rbtree.o := n
15KCOV_INSTRUMENT_list_debug.o := n 15KCOV_INSTRUMENT_list_debug.o := n
16KCOV_INSTRUMENT_debugobjects.o := n 16KCOV_INSTRUMENT_debugobjects.o := n
17KCOV_INSTRUMENT_dynamic_debug.o := n 17KCOV_INSTRUMENT_dynamic_debug.o := n
18# Kernel does not boot if we instrument this file as it uses custom calling
19# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
20KCOV_INSTRUMENT_hweight.o := n
21 18
22lib-y := ctype.o string.o vsprintf.o cmdline.o \ 19lib-y := ctype.o string.o vsprintf.o cmdline.o \
23 rbtree.o radix-tree.o dump_stack.o timerqueue.o\ 20 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
24 idr.o int_sqrt.o extable.o \ 21 idr.o int_sqrt.o extable.o \
25 sha1.o md5.o irq_regs.o argv_split.o \ 22 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
26 flex_proportions.o ratelimit.o show_mem.o \ 23 flex_proportions.o ratelimit.o show_mem.o \
27 is_single_threaded.o plist.o decompress.o kobject_uevent.o \ 24 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
28 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o 25 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
29 26
30obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
31lib-$(CONFIG_MMU) += ioremap.o 27lib-$(CONFIG_MMU) += ioremap.o
32lib-$(CONFIG_SMP) += cpumask.o 28lib-$(CONFIG_SMP) += cpumask.o
33lib-$(CONFIG_HAS_DMA) += dma-noop.o 29lib-$(CONFIG_HAS_DMA) += dma-noop.o
@@ -58,6 +54,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
58obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o 54obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
59obj-$(CONFIG_TEST_PRINTF) += test_printf.o 55obj-$(CONFIG_TEST_PRINTF) += test_printf.o
60obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o 56obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
57obj-$(CONFIG_TEST_UUID) += test_uuid.o
61 58
62ifeq ($(CONFIG_DEBUG_KOBJECT),y) 59ifeq ($(CONFIG_DEBUG_KOBJECT),y)
63CFLAGS_kobject.o += -DDEBUG 60CFLAGS_kobject.o += -DDEBUG
@@ -73,8 +70,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
73obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o 70obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
74obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o 71obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
75 72
76GCOV_PROFILE_hweight.o := n
77CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
78obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 73obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
79 74
80obj-$(CONFIG_BTREE) += btree.o 75obj-$(CONFIG_BTREE) += btree.o
@@ -185,6 +180,7 @@ obj-$(CONFIG_IRQ_POLL) += irq_poll.o
185 180
186obj-$(CONFIG_STACKDEPOT) += stackdepot.o 181obj-$(CONFIG_STACKDEPOT) += stackdepot.o
187KASAN_SANITIZE_stackdepot.o := n 182KASAN_SANITIZE_stackdepot.o := n
183KCOV_INSTRUMENT_stackdepot.o := n
188 184
189libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ 185libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
190 fdt_empty_tree.o 186 fdt_empty_tree.o
@@ -232,3 +228,5 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
232obj-$(CONFIG_UBSAN) += ubsan.o 228obj-$(CONFIG_UBSAN) += ubsan.o
233 229
234UBSAN_SANITIZE_ubsan.o := n 230UBSAN_SANITIZE_ubsan.o := n
231
232obj-$(CONFIG_SBITMAP) += sbitmap.o
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 2886ebac6567..53c2d5edc826 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -96,17 +96,41 @@ long long atomic64_##op##_return(long long a, atomic64_t *v) \
96} \ 96} \
97EXPORT_SYMBOL(atomic64_##op##_return); 97EXPORT_SYMBOL(atomic64_##op##_return);
98 98
99#define ATOMIC64_FETCH_OP(op, c_op) \
100long long atomic64_fetch_##op(long long a, atomic64_t *v) \
101{ \
102 unsigned long flags; \
103 raw_spinlock_t *lock = lock_addr(v); \
104 long long val; \
105 \
106 raw_spin_lock_irqsave(lock, flags); \
107 val = v->counter; \
108 v->counter c_op a; \
109 raw_spin_unlock_irqrestore(lock, flags); \
110 return val; \
111} \
112EXPORT_SYMBOL(atomic64_fetch_##op);
113
99#define ATOMIC64_OPS(op, c_op) \ 114#define ATOMIC64_OPS(op, c_op) \
100 ATOMIC64_OP(op, c_op) \ 115 ATOMIC64_OP(op, c_op) \
101 ATOMIC64_OP_RETURN(op, c_op) 116 ATOMIC64_OP_RETURN(op, c_op) \
117 ATOMIC64_FETCH_OP(op, c_op)
102 118
103ATOMIC64_OPS(add, +=) 119ATOMIC64_OPS(add, +=)
104ATOMIC64_OPS(sub, -=) 120ATOMIC64_OPS(sub, -=)
105ATOMIC64_OP(and, &=)
106ATOMIC64_OP(or, |=)
107ATOMIC64_OP(xor, ^=)
108 121
109#undef ATOMIC64_OPS 122#undef ATOMIC64_OPS
123#define ATOMIC64_OPS(op, c_op) \
124 ATOMIC64_OP(op, c_op) \
125 ATOMIC64_OP_RETURN(op, c_op) \
126 ATOMIC64_FETCH_OP(op, c_op)
127
128ATOMIC64_OPS(and, &=)
129ATOMIC64_OPS(or, |=)
130ATOMIC64_OPS(xor, ^=)
131
132#undef ATOMIC64_OPS
133#undef ATOMIC64_FETCH_OP
110#undef ATOMIC64_OP_RETURN 134#undef ATOMIC64_OP_RETURN
111#undef ATOMIC64_OP 135#undef ATOMIC64_OP
112 136
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 123481814320..46042901130f 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -53,11 +53,25 @@ do { \
53 BUG_ON(atomic##bit##_read(&v) != r); \ 53 BUG_ON(atomic##bit##_read(&v) != r); \
54} while (0) 54} while (0)
55 55
56#define TEST_FETCH(bit, op, c_op, val) \
57do { \
58 atomic##bit##_set(&v, v0); \
59 r = v0; \
60 r c_op val; \
61 BUG_ON(atomic##bit##_##op(val, &v) != v0); \
62 BUG_ON(atomic##bit##_read(&v) != r); \
63} while (0)
64
56#define RETURN_FAMILY_TEST(bit, op, c_op, val) \ 65#define RETURN_FAMILY_TEST(bit, op, c_op, val) \
57do { \ 66do { \
58 FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \ 67 FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \
59} while (0) 68} while (0)
60 69
70#define FETCH_FAMILY_TEST(bit, op, c_op, val) \
71do { \
72 FAMILY_TEST(TEST_FETCH, bit, op, c_op, val); \
73} while (0)
74
61#define TEST_ARGS(bit, op, init, ret, expect, args...) \ 75#define TEST_ARGS(bit, op, init, ret, expect, args...) \
62do { \ 76do { \
63 atomic##bit##_set(&v, init); \ 77 atomic##bit##_set(&v, init); \
@@ -114,6 +128,16 @@ static __init void test_atomic(void)
114 RETURN_FAMILY_TEST(, sub_return, -=, onestwos); 128 RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
115 RETURN_FAMILY_TEST(, sub_return, -=, -one); 129 RETURN_FAMILY_TEST(, sub_return, -=, -one);
116 130
131 FETCH_FAMILY_TEST(, fetch_add, +=, onestwos);
132 FETCH_FAMILY_TEST(, fetch_add, +=, -one);
133 FETCH_FAMILY_TEST(, fetch_sub, -=, onestwos);
134 FETCH_FAMILY_TEST(, fetch_sub, -=, -one);
135
136 FETCH_FAMILY_TEST(, fetch_or, |=, v1);
137 FETCH_FAMILY_TEST(, fetch_and, &=, v1);
138 FETCH_FAMILY_TEST(, fetch_andnot, &= ~, v1);
139 FETCH_FAMILY_TEST(, fetch_xor, ^=, v1);
140
117 INC_RETURN_FAMILY_TEST(, v0); 141 INC_RETURN_FAMILY_TEST(, v0);
118 DEC_RETURN_FAMILY_TEST(, v0); 142 DEC_RETURN_FAMILY_TEST(, v0);
119 143
@@ -154,6 +178,16 @@ static __init void test_atomic64(void)
154 RETURN_FAMILY_TEST(64, sub_return, -=, onestwos); 178 RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
155 RETURN_FAMILY_TEST(64, sub_return, -=, -one); 179 RETURN_FAMILY_TEST(64, sub_return, -=, -one);
156 180
181 FETCH_FAMILY_TEST(64, fetch_add, +=, onestwos);
182 FETCH_FAMILY_TEST(64, fetch_add, +=, -one);
183 FETCH_FAMILY_TEST(64, fetch_sub, -=, onestwos);
184 FETCH_FAMILY_TEST(64, fetch_sub, -=, -one);
185
186 FETCH_FAMILY_TEST(64, fetch_or, |=, v1);
187 FETCH_FAMILY_TEST(64, fetch_and, &=, v1);
188 FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, v1);
189 FETCH_FAMILY_TEST(64, fetch_xor, ^=, v1);
190
157 INIT(v0); 191 INIT(v0);
158 atomic64_inc(&v); 192 atomic64_inc(&v);
159 r += one; 193 r += one;
@@ -179,7 +213,6 @@ static __init void test_atomic64(void)
179 r += one; 213 r += one;
180 BUG_ON(v.counter != r); 214 BUG_ON(v.counter != r);
181 215
182#ifdef CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
183 INIT(onestwos); 216 INIT(onestwos);
184 BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); 217 BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
185 r -= one; 218 r -= one;
@@ -192,9 +225,6 @@ static __init void test_atomic64(void)
192 INIT(-one); 225 INIT(-one);
193 BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); 226 BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
194 BUG_ON(v.counter != r); 227 BUG_ON(v.counter != r);
195#else
196#warning Please implement atomic64_dec_if_positive for your architecture and select the above Kconfig symbol
197#endif
198 228
199 INIT(onestwos); 229 INIT(onestwos);
200 BUG_ON(!atomic64_inc_not_zero(&v)); 230 BUG_ON(!atomic64_inc_not_zero(&v));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c66da508cbf7..0b66f0e5eb6b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -14,9 +14,9 @@
14#include <linux/bug.h> 14#include <linux/bug.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/uaccess.h>
17 18
18#include <asm/page.h> 19#include <asm/page.h>
19#include <asm/uaccess.h>
20 20
21/* 21/*
22 * bitmaps provide an array of bits, implemented using an an 22 * bitmaps provide an array of bits, implemented using an an
@@ -496,6 +496,11 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf);
496 * ranges. Consecutively set bits are shown as two hyphen-separated 496 * ranges. Consecutively set bits are shown as two hyphen-separated
497 * decimal numbers, the smallest and largest bit numbers set in 497 * decimal numbers, the smallest and largest bit numbers set in
498 * the range. 498 * the range.
499 * Optionally each range can be postfixed to denote that only parts of it
500 * should be set. The range will divided to groups of specific size.
501 * From each group will be used only defined amount of bits.
502 * Syntax: range:used_size/group_size
503 * Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769
499 * 504 *
500 * Returns 0 on success, -errno on invalid input strings. 505 * Returns 0 on success, -errno on invalid input strings.
501 * Error values: 506 * Error values:
@@ -507,16 +512,20 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
507 int is_user, unsigned long *maskp, 512 int is_user, unsigned long *maskp,
508 int nmaskbits) 513 int nmaskbits)
509{ 514{
510 unsigned a, b; 515 unsigned int a, b, old_a, old_b;
516 unsigned int group_size, used_size;
511 int c, old_c, totaldigits, ndigits; 517 int c, old_c, totaldigits, ndigits;
512 const char __user __force *ubuf = (const char __user __force *)buf; 518 const char __user __force *ubuf = (const char __user __force *)buf;
513 int at_start, in_range; 519 int at_start, in_range, in_partial_range;
514 520
515 totaldigits = c = 0; 521 totaldigits = c = 0;
522 old_a = old_b = 0;
523 group_size = used_size = 0;
516 bitmap_zero(maskp, nmaskbits); 524 bitmap_zero(maskp, nmaskbits);
517 do { 525 do {
518 at_start = 1; 526 at_start = 1;
519 in_range = 0; 527 in_range = 0;
528 in_partial_range = 0;
520 a = b = 0; 529 a = b = 0;
521 ndigits = totaldigits; 530 ndigits = totaldigits;
522 531
@@ -547,6 +556,24 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
547 if ((totaldigits != ndigits) && isspace(old_c)) 556 if ((totaldigits != ndigits) && isspace(old_c))
548 return -EINVAL; 557 return -EINVAL;
549 558
559 if (c == '/') {
560 used_size = a;
561 at_start = 1;
562 in_range = 0;
563 a = b = 0;
564 continue;
565 }
566
567 if (c == ':') {
568 old_a = a;
569 old_b = b;
570 at_start = 1;
571 in_range = 0;
572 in_partial_range = 1;
573 a = b = 0;
574 continue;
575 }
576
550 if (c == '-') { 577 if (c == '-') {
551 if (at_start || in_range) 578 if (at_start || in_range)
552 return -EINVAL; 579 return -EINVAL;
@@ -567,15 +594,30 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
567 } 594 }
568 if (ndigits == totaldigits) 595 if (ndigits == totaldigits)
569 continue; 596 continue;
597 if (in_partial_range) {
598 group_size = a;
599 a = old_a;
600 b = old_b;
601 old_a = old_b = 0;
602 }
570 /* if no digit is after '-', it's wrong*/ 603 /* if no digit is after '-', it's wrong*/
571 if (at_start && in_range) 604 if (at_start && in_range)
572 return -EINVAL; 605 return -EINVAL;
573 if (!(a <= b)) 606 if (!(a <= b) || !(used_size <= group_size))
574 return -EINVAL; 607 return -EINVAL;
575 if (b >= nmaskbits) 608 if (b >= nmaskbits)
576 return -ERANGE; 609 return -ERANGE;
577 while (a <= b) { 610 while (a <= b) {
578 set_bit(a, maskp); 611 if (in_partial_range) {
612 static int pos_in_group = 1;
613
614 if (pos_in_group <= used_size)
615 set_bit(a, maskp);
616
617 if (a == b || ++pos_in_group > group_size)
618 pos_in_group = 1;
619 } else
620 set_bit(a, maskp);
579 a++; 621 a++;
580 } 622 }
581 } while (buflen && c == ','); 623 } while (buflen && c == ',');
diff --git a/lib/chacha20.c b/lib/chacha20.c
new file mode 100644
index 000000000000..250ceed9ec9a
--- /dev/null
+++ b/lib/chacha20.c
@@ -0,0 +1,79 @@
1/*
2 * ChaCha20 256-bit cipher algorithm, RFC7539
3 *
4 * Copyright (C) 2015 Martin Willi
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/export.h>
14#include <linux/bitops.h>
15#include <linux/cryptohash.h>
16#include <asm/unaligned.h>
17#include <crypto/chacha20.h>
18
19static inline u32 rotl32(u32 v, u8 n)
20{
21 return (v << n) | (v >> (sizeof(v) * 8 - n));
22}
23
24extern void chacha20_block(u32 *state, void *stream)
25{
26 u32 x[16], *out = stream;
27 int i;
28
29 for (i = 0; i < ARRAY_SIZE(x); i++)
30 x[i] = state[i];
31
32 for (i = 0; i < 20; i += 2) {
33 x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16);
34 x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16);
35 x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16);
36 x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16);
37
38 x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12);
39 x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12);
40 x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12);
41 x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12);
42
43 x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8);
44 x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8);
45 x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8);
46 x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8);
47
48 x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7);
49 x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7);
50 x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7);
51 x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7);
52
53 x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16);
54 x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16);
55 x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16);
56 x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16);
57
58 x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12);
59 x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12);
60 x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12);
61 x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12);
62
63 x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8);
64 x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8);
65 x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8);
66 x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8);
67
68 x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7);
69 x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7);
70 x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7);
71 x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7);
72 }
73
74 for (i = 0; i < ARRAY_SIZE(x); i++)
75 out[i] = cpu_to_le32(x[i] + state[i]);
76
77 state[12]++;
78}
79EXPORT_SYMBOL(chacha20_block);
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
index 707ca24f7b18..0e2c9a1e958a 100644
--- a/lib/cpu-notifier-error-inject.c
+++ b/lib/cpu-notifier-error-inject.c
@@ -8,16 +8,47 @@ static int priority;
8module_param(priority, int, 0); 8module_param(priority, int, 0);
9MODULE_PARM_DESC(priority, "specify cpu notifier priority"); 9MODULE_PARM_DESC(priority, "specify cpu notifier priority");
10 10
11#define UP_PREPARE 0
12#define UP_PREPARE_FROZEN 0
13#define DOWN_PREPARE 0
14#define DOWN_PREPARE_FROZEN 0
15
11static struct notifier_err_inject cpu_notifier_err_inject = { 16static struct notifier_err_inject cpu_notifier_err_inject = {
12 .actions = { 17 .actions = {
13 { NOTIFIER_ERR_INJECT_ACTION(CPU_UP_PREPARE) }, 18 { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) },
14 { NOTIFIER_ERR_INJECT_ACTION(CPU_UP_PREPARE_FROZEN) }, 19 { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) },
15 { NOTIFIER_ERR_INJECT_ACTION(CPU_DOWN_PREPARE) }, 20 { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) },
16 { NOTIFIER_ERR_INJECT_ACTION(CPU_DOWN_PREPARE_FROZEN) }, 21 { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) },
17 {} 22 {}
18 } 23 }
19}; 24};
20 25
26static int notf_err_handle(struct notifier_err_inject_action *action)
27{
28 int ret;
29
30 ret = action->error;
31 if (ret)
32 pr_info("Injecting error (%d) to %s\n", ret, action->name);
33 return ret;
34}
35
36static int notf_err_inj_up_prepare(unsigned int cpu)
37{
38 if (!cpuhp_tasks_frozen)
39 return notf_err_handle(&cpu_notifier_err_inject.actions[0]);
40 else
41 return notf_err_handle(&cpu_notifier_err_inject.actions[1]);
42}
43
44static int notf_err_inj_dead(unsigned int cpu)
45{
46 if (!cpuhp_tasks_frozen)
47 return notf_err_handle(&cpu_notifier_err_inject.actions[2]);
48 else
49 return notf_err_handle(&cpu_notifier_err_inject.actions[3]);
50}
51
21static struct dentry *dir; 52static struct dentry *dir;
22 53
23static int err_inject_init(void) 54static int err_inject_init(void)
@@ -29,7 +60,10 @@ static int err_inject_init(void)
29 if (IS_ERR(dir)) 60 if (IS_ERR(dir))
30 return PTR_ERR(dir); 61 return PTR_ERR(dir);
31 62
32 err = register_hotcpu_notifier(&cpu_notifier_err_inject.nb); 63 err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE,
64 "cpu-err-notif:prepare",
65 notf_err_inj_up_prepare,
66 notf_err_inj_dead);
33 if (err) 67 if (err)
34 debugfs_remove_recursive(dir); 68 debugfs_remove_recursive(dir);
35 69
@@ -38,7 +72,7 @@ static int err_inject_init(void)
38 72
39static void err_inject_exit(void) 73static void err_inject_exit(void)
40{ 74{
41 unregister_hotcpu_notifier(&cpu_notifier_err_inject.nb); 75 cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE);
42 debugfs_remove_recursive(dir); 76 debugfs_remove_recursive(dir);
43} 77}
44 78
diff --git a/lib/crc32.c b/lib/crc32.c
index 9a907d489d95..7fbd1a112b9d 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -979,7 +979,6 @@ static int __init crc32c_test(void)
979 int i; 979 int i;
980 int errors = 0; 980 int errors = 0;
981 int bytes = 0; 981 int bytes = 0;
982 struct timespec start, stop;
983 u64 nsec; 982 u64 nsec;
984 unsigned long flags; 983 unsigned long flags;
985 984
@@ -999,20 +998,17 @@ static int __init crc32c_test(void)
999 local_irq_save(flags); 998 local_irq_save(flags);
1000 local_irq_disable(); 999 local_irq_disable();
1001 1000
1002 getnstimeofday(&start); 1001 nsec = ktime_get_ns();
1003 for (i = 0; i < 100; i++) { 1002 for (i = 0; i < 100; i++) {
1004 if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf + 1003 if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
1005 test[i].start, test[i].length)) 1004 test[i].start, test[i].length))
1006 errors++; 1005 errors++;
1007 } 1006 }
1008 getnstimeofday(&stop); 1007 nsec = ktime_get_ns() - nsec;
1009 1008
1010 local_irq_restore(flags); 1009 local_irq_restore(flags);
1011 local_irq_enable(); 1010 local_irq_enable();
1012 1011
1013 nsec = stop.tv_nsec - start.tv_nsec +
1014 1000000000 * (stop.tv_sec - start.tv_sec);
1015
1016 pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); 1012 pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
1017 1013
1018 if (errors) 1014 if (errors)
@@ -1065,7 +1061,6 @@ static int __init crc32_test(void)
1065 int i; 1061 int i;
1066 int errors = 0; 1062 int errors = 0;
1067 int bytes = 0; 1063 int bytes = 0;
1068 struct timespec start, stop;
1069 u64 nsec; 1064 u64 nsec;
1070 unsigned long flags; 1065 unsigned long flags;
1071 1066
@@ -1088,7 +1083,7 @@ static int __init crc32_test(void)
1088 local_irq_save(flags); 1083 local_irq_save(flags);
1089 local_irq_disable(); 1084 local_irq_disable();
1090 1085
1091 getnstimeofday(&start); 1086 nsec = ktime_get_ns();
1092 for (i = 0; i < 100; i++) { 1087 for (i = 0; i < 100; i++) {
1093 if (test[i].crc_le != crc32_le(test[i].crc, test_buf + 1088 if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
1094 test[i].start, test[i].length)) 1089 test[i].start, test[i].length))
@@ -1098,14 +1093,11 @@ static int __init crc32_test(void)
1098 test[i].start, test[i].length)) 1093 test[i].start, test[i].length))
1099 errors++; 1094 errors++;
1100 } 1095 }
1101 getnstimeofday(&stop); 1096 nsec = ktime_get_ns() - nsec;
1102 1097
1103 local_irq_restore(flags); 1098 local_irq_restore(flags);
1104 local_irq_enable(); 1099 local_irq_enable();
1105 1100
1106 nsec = stop.tv_nsec - start.tv_nsec +
1107 1000000000 * (stop.tv_sec - start.tv_sec);
1108
1109 pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", 1101 pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
1110 CRC_LE_BITS, CRC_BE_BITS); 1102 CRC_LE_BITS, CRC_BE_BITS);
1111 1103
diff --git a/lib/digsig.c b/lib/digsig.c
index 07be6c1ef4e2..55b8b2f41a9e 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -104,21 +104,25 @@ static int digsig_verify_rsa(struct key *key,
104 datap = pkh->mpi; 104 datap = pkh->mpi;
105 endp = ukp->data + ukp->datalen; 105 endp = ukp->data + ukp->datalen;
106 106
107 err = -ENOMEM;
108
109 for (i = 0; i < pkh->nmpi; i++) { 107 for (i = 0; i < pkh->nmpi; i++) {
110 unsigned int remaining = endp - datap; 108 unsigned int remaining = endp - datap;
111 pkey[i] = mpi_read_from_buffer(datap, &remaining); 109 pkey[i] = mpi_read_from_buffer(datap, &remaining);
112 if (!pkey[i]) 110 if (IS_ERR(pkey[i])) {
111 err = PTR_ERR(pkey[i]);
113 goto err; 112 goto err;
113 }
114 datap += remaining; 114 datap += remaining;
115 } 115 }
116 116
117 mblen = mpi_get_nbits(pkey[0]); 117 mblen = mpi_get_nbits(pkey[0]);
118 mlen = DIV_ROUND_UP(mblen, 8); 118 mlen = DIV_ROUND_UP(mblen, 8);
119 119
120 if (mlen == 0) 120 if (mlen == 0) {
121 err = -EINVAL;
121 goto err; 122 goto err;
123 }
124
125 err = -ENOMEM;
122 126
123 out1 = kzalloc(mlen, GFP_KERNEL); 127 out1 = kzalloc(mlen, GFP_KERNEL);
124 if (!out1) 128 if (!out1)
@@ -126,8 +130,10 @@ static int digsig_verify_rsa(struct key *key,
126 130
127 nret = siglen; 131 nret = siglen;
128 in = mpi_read_from_buffer(sig, &nret); 132 in = mpi_read_from_buffer(sig, &nret);
129 if (!in) 133 if (IS_ERR(in)) {
134 err = PTR_ERR(in);
130 goto err; 135 goto err;
136 }
131 137
132 res = mpi_alloc(mpi_get_nlimbs(in) * 2); 138 res = mpi_alloc(mpi_get_nlimbs(in) * 2);
133 if (!res) 139 if (!res)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 51a76af25c66..8971370bfb16 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -22,6 +22,7 @@
22#include <linux/stacktrace.h> 22#include <linux/stacktrace.h>
23#include <linux/dma-debug.h> 23#include <linux/dma-debug.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/vmalloc.h>
25#include <linux/debugfs.h> 26#include <linux/debugfs.h>
26#include <linux/uaccess.h> 27#include <linux/uaccess.h>
27#include <linux/export.h> 28#include <linux/export.h>
@@ -43,6 +44,7 @@ enum {
43 dma_debug_page, 44 dma_debug_page,
44 dma_debug_sg, 45 dma_debug_sg,
45 dma_debug_coherent, 46 dma_debug_coherent,
47 dma_debug_resource,
46}; 48};
47 49
48enum map_err_types { 50enum map_err_types {
@@ -150,8 +152,9 @@ static const char *const maperr2str[] = {
150 [MAP_ERR_CHECKED] = "dma map error checked", 152 [MAP_ERR_CHECKED] = "dma map error checked",
151}; 153};
152 154
153static const char *type2name[4] = { "single", "page", 155static const char *type2name[5] = { "single", "page",
154 "scather-gather", "coherent" }; 156 "scather-gather", "coherent",
157 "resource" };
155 158
156static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", 159static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
157 "DMA_FROM_DEVICE", "DMA_NONE" }; 160 "DMA_FROM_DEVICE", "DMA_NONE" };
@@ -253,6 +256,7 @@ static int hash_fn(struct dma_debug_entry *entry)
253 */ 256 */
254static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, 257static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
255 unsigned long *flags) 258 unsigned long *flags)
259 __acquires(&dma_entry_hash[idx].lock)
256{ 260{
257 int idx = hash_fn(entry); 261 int idx = hash_fn(entry);
258 unsigned long __flags; 262 unsigned long __flags;
@@ -267,6 +271,7 @@ static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
267 */ 271 */
268static void put_hash_bucket(struct hash_bucket *bucket, 272static void put_hash_bucket(struct hash_bucket *bucket,
269 unsigned long *flags) 273 unsigned long *flags)
274 __releases(&bucket->lock)
270{ 275{
271 unsigned long __flags = *flags; 276 unsigned long __flags = *flags;
272 277
@@ -397,6 +402,9 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
397 402
398static unsigned long long phys_addr(struct dma_debug_entry *entry) 403static unsigned long long phys_addr(struct dma_debug_entry *entry)
399{ 404{
405 if (entry->type == dma_debug_resource)
406 return __pfn_to_phys(entry->pfn) + entry->offset;
407
400 return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; 408 return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
401} 409}
402 410
@@ -1162,11 +1170,32 @@ static void check_unmap(struct dma_debug_entry *ref)
1162 put_hash_bucket(bucket, &flags); 1170 put_hash_bucket(bucket, &flags);
1163} 1171}
1164 1172
1165static void check_for_stack(struct device *dev, void *addr) 1173static void check_for_stack(struct device *dev,
1174 struct page *page, size_t offset)
1166{ 1175{
1167 if (object_is_on_stack(addr)) 1176 void *addr;
1168 err_printk(dev, NULL, "DMA-API: device driver maps memory from " 1177 struct vm_struct *stack_vm_area = task_stack_vm_area(current);
1169 "stack [addr=%p]\n", addr); 1178
1179 if (!stack_vm_area) {
1180 /* Stack is direct-mapped. */
1181 if (PageHighMem(page))
1182 return;
1183 addr = page_address(page) + offset;
1184 if (object_is_on_stack(addr))
1185 err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
1186 } else {
1187 /* Stack is vmalloced. */
1188 int i;
1189
1190 for (i = 0; i < stack_vm_area->nr_pages; i++) {
1191 if (page != stack_vm_area->pages[i])
1192 continue;
1193
1194 addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
1195 err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
1196 break;
1197 }
1198 }
1170} 1199}
1171 1200
1172static inline bool overlap(void *addr, unsigned long len, void *start, void *end) 1201static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
@@ -1289,10 +1318,11 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
1289 if (map_single) 1318 if (map_single)
1290 entry->type = dma_debug_single; 1319 entry->type = dma_debug_single;
1291 1320
1321 check_for_stack(dev, page, offset);
1322
1292 if (!PageHighMem(page)) { 1323 if (!PageHighMem(page)) {
1293 void *addr = page_address(page) + offset; 1324 void *addr = page_address(page) + offset;
1294 1325
1295 check_for_stack(dev, addr);
1296 check_for_illegal_area(dev, addr, size); 1326 check_for_illegal_area(dev, addr, size);
1297 } 1327 }
1298 1328
@@ -1384,8 +1414,9 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
1384 entry->sg_call_ents = nents; 1414 entry->sg_call_ents = nents;
1385 entry->sg_mapped_ents = mapped_ents; 1415 entry->sg_mapped_ents = mapped_ents;
1386 1416
1417 check_for_stack(dev, sg_page(s), s->offset);
1418
1387 if (!PageHighMem(sg_page(s))) { 1419 if (!PageHighMem(sg_page(s))) {
1388 check_for_stack(dev, sg_virt(s));
1389 check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); 1420 check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
1390 } 1421 }
1391 1422
@@ -1493,6 +1524,49 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
1493} 1524}
1494EXPORT_SYMBOL(debug_dma_free_coherent); 1525EXPORT_SYMBOL(debug_dma_free_coherent);
1495 1526
1527void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
1528 int direction, dma_addr_t dma_addr)
1529{
1530 struct dma_debug_entry *entry;
1531
1532 if (unlikely(dma_debug_disabled()))
1533 return;
1534
1535 entry = dma_entry_alloc();
1536 if (!entry)
1537 return;
1538
1539 entry->type = dma_debug_resource;
1540 entry->dev = dev;
1541 entry->pfn = PHYS_PFN(addr);
1542 entry->offset = offset_in_page(addr);
1543 entry->size = size;
1544 entry->dev_addr = dma_addr;
1545 entry->direction = direction;
1546 entry->map_err_type = MAP_ERR_NOT_CHECKED;
1547
1548 add_dma_entry(entry);
1549}
1550EXPORT_SYMBOL(debug_dma_map_resource);
1551
1552void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
1553 size_t size, int direction)
1554{
1555 struct dma_debug_entry ref = {
1556 .type = dma_debug_resource,
1557 .dev = dev,
1558 .dev_addr = dma_addr,
1559 .size = size,
1560 .direction = direction,
1561 };
1562
1563 if (unlikely(dma_debug_disabled()))
1564 return;
1565
1566 check_unmap(&ref);
1567}
1568EXPORT_SYMBOL(debug_dma_unmap_resource);
1569
1496void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, 1570void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
1497 size_t size, int direction) 1571 size_t size, int direction)
1498{ 1572{
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
index 72145646857e..3d766e78fbe2 100644
--- a/lib/dma-noop.c
+++ b/lib/dma-noop.c
@@ -10,7 +10,7 @@
10 10
11static void *dma_noop_alloc(struct device *dev, size_t size, 11static void *dma_noop_alloc(struct device *dev, size_t size,
12 dma_addr_t *dma_handle, gfp_t gfp, 12 dma_addr_t *dma_handle, gfp_t gfp,
13 struct dma_attrs *attrs) 13 unsigned long attrs)
14{ 14{
15 void *ret; 15 void *ret;
16 16
@@ -22,7 +22,7 @@ static void *dma_noop_alloc(struct device *dev, size_t size,
22 22
23static void dma_noop_free(struct device *dev, size_t size, 23static void dma_noop_free(struct device *dev, size_t size,
24 void *cpu_addr, dma_addr_t dma_addr, 24 void *cpu_addr, dma_addr_t dma_addr,
25 struct dma_attrs *attrs) 25 unsigned long attrs)
26{ 26{
27 free_pages((unsigned long)cpu_addr, get_order(size)); 27 free_pages((unsigned long)cpu_addr, get_order(size));
28} 28}
@@ -30,13 +30,14 @@ static void dma_noop_free(struct device *dev, size_t size,
30static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page, 30static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
31 unsigned long offset, size_t size, 31 unsigned long offset, size_t size,
32 enum dma_data_direction dir, 32 enum dma_data_direction dir,
33 struct dma_attrs *attrs) 33 unsigned long attrs)
34{ 34{
35 return page_to_phys(page) + offset; 35 return page_to_phys(page) + offset;
36} 36}
37 37
38static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents, 38static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
39 enum dma_data_direction dir, struct dma_attrs *attrs) 39 enum dma_data_direction dir,
40 unsigned long attrs)
40{ 41{
41 int i; 42 int i;
42 struct scatterlist *sg; 43 struct scatterlist *sg;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index fe42b6ec3f0c..da796e2dc4f5 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -188,6 +188,13 @@ static int ddebug_change(const struct ddebug_query *query,
188 newflags = (dp->flags & mask) | flags; 188 newflags = (dp->flags & mask) | flags;
189 if (newflags == dp->flags) 189 if (newflags == dp->flags)
190 continue; 190 continue;
191#ifdef HAVE_JUMP_LABEL
192 if (dp->flags & _DPRINTK_FLAGS_PRINT) {
193 if (!(flags & _DPRINTK_FLAGS_PRINT))
194 static_branch_disable(&dp->key.dd_key_true);
195 } else if (flags & _DPRINTK_FLAGS_PRINT)
196 static_branch_enable(&dp->key.dd_key_true);
197#endif
191 dp->flags = newflags; 198 dp->flags = newflags;
192 vpr_info("changed %s:%d [%s]%s =%s\n", 199 vpr_info("changed %s:%d [%s]%s =%s\n",
193 trim_prefix(dp->filename), dp->lineno, 200 trim_prefix(dp->filename), dp->lineno,
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index 3eb3e4722b8e..db283ba4d2c1 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -125,7 +125,10 @@ struct cpio_data find_cpio_data(const char *path, void *data,
125 if ((ch[C_MODE] & 0170000) == 0100000 && 125 if ((ch[C_MODE] & 0170000) == 0100000 &&
126 ch[C_NAMESIZE] >= mypathsize && 126 ch[C_NAMESIZE] >= mypathsize &&
127 !memcmp(p, path, mypathsize)) { 127 !memcmp(p, path, mypathsize)) {
128 *nextoff = (long)nptr - (long)data; 128
129 if (nextoff)
130 *nextoff = (long)nptr - (long)data;
131
129 if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) { 132 if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) {
130 pr_warn( 133 pr_warn(
131 "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", 134 "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221558..43273a7d83cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
9 * The Hamming Weight of a number is the total number of bits set in it. 9 * The Hamming Weight of a number is the total number of bits set in it.
10 */ 10 */
11 11
12#ifndef __HAVE_ARCH_SW_HWEIGHT
12unsigned int __sw_hweight32(unsigned int w) 13unsigned int __sw_hweight32(unsigned int w)
13{ 14{
14#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER 15#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
25#endif 26#endif
26} 27}
27EXPORT_SYMBOL(__sw_hweight32); 28EXPORT_SYMBOL(__sw_hweight32);
29#endif
28 30
29unsigned int __sw_hweight16(unsigned int w) 31unsigned int __sw_hweight16(unsigned int w)
30{ 32{
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
43} 45}
44EXPORT_SYMBOL(__sw_hweight8); 46EXPORT_SYMBOL(__sw_hweight8);
45 47
48#ifndef __HAVE_ARCH_SW_HWEIGHT
46unsigned long __sw_hweight64(__u64 w) 49unsigned long __sw_hweight64(__u64 w)
47{ 50{
48#if BITS_PER_LONG == 32 51#if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
65#endif 68#endif
66} 69}
67EXPORT_SYMBOL(__sw_hweight64); 70EXPORT_SYMBOL(__sw_hweight64);
71#endif
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index c27e269210c4..a816f3a80625 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -29,8 +29,7 @@ again:
29 index = bitmap_find_next_zero_area(map, size, start, nr, align_mask); 29 index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
30 if (index < size) { 30 if (index < size) {
31 if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { 31 if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
32 /* we could do more effectively */ 32 start = ALIGN(shift + index, boundary_size) - shift;
33 start = index + 1;
34 goto again; 33 goto again;
35 } 34 }
36 bitmap_set(map, index, nr); 35 bitmap_set(map, index, nr);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0cd522753ff5..f0c7f1481bae 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -3,8 +3,11 @@
3#include <linux/pagemap.h> 3#include <linux/pagemap.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/vmalloc.h> 5#include <linux/vmalloc.h>
6#include <linux/splice.h>
6#include <net/checksum.h> 7#include <net/checksum.h>
7 8
9#define PIPE_PARANOIA /* for now */
10
8#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 11#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
9 size_t left; \ 12 size_t left; \
10 size_t wanted = n; \ 13 size_t wanted = n; \
@@ -56,37 +59,24 @@
56 n = wanted; \ 59 n = wanted; \
57} 60}
58 61
59#define iterate_bvec(i, n, __v, __p, skip, STEP) { \ 62#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
60 size_t wanted = n; \ 63 struct bvec_iter __start; \
61 __p = i->bvec; \ 64 __start.bi_size = n; \
62 __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \ 65 __start.bi_bvec_done = skip; \
63 if (likely(__v.bv_len)) { \ 66 __start.bi_idx = 0; \
64 __v.bv_page = __p->bv_page; \ 67 for_each_bvec(__v, i->bvec, __bi, __start) { \
65 __v.bv_offset = __p->bv_offset + skip; \ 68 if (!__v.bv_len) \
66 (void)(STEP); \
67 skip += __v.bv_len; \
68 n -= __v.bv_len; \
69 } \
70 while (unlikely(n)) { \
71 __p++; \
72 __v.bv_len = min_t(size_t, n, __p->bv_len); \
73 if (unlikely(!__v.bv_len)) \
74 continue; \ 69 continue; \
75 __v.bv_page = __p->bv_page; \
76 __v.bv_offset = __p->bv_offset; \
77 (void)(STEP); \ 70 (void)(STEP); \
78 skip = __v.bv_len; \
79 n -= __v.bv_len; \
80 } \ 71 } \
81 n = wanted; \
82} 72}
83 73
84#define iterate_all_kinds(i, n, v, I, B, K) { \ 74#define iterate_all_kinds(i, n, v, I, B, K) { \
85 size_t skip = i->iov_offset; \ 75 size_t skip = i->iov_offset; \
86 if (unlikely(i->type & ITER_BVEC)) { \ 76 if (unlikely(i->type & ITER_BVEC)) { \
87 const struct bio_vec *bvec; \
88 struct bio_vec v; \ 77 struct bio_vec v; \
89 iterate_bvec(i, n, v, bvec, skip, (B)) \ 78 struct bvec_iter __bi; \
79 iterate_bvec(i, n, v, __bi, skip, (B)) \
90 } else if (unlikely(i->type & ITER_KVEC)) { \ 80 } else if (unlikely(i->type & ITER_KVEC)) { \
91 const struct kvec *kvec; \ 81 const struct kvec *kvec; \
92 struct kvec v; \ 82 struct kvec v; \
@@ -104,15 +94,13 @@
104 if (i->count) { \ 94 if (i->count) { \
105 size_t skip = i->iov_offset; \ 95 size_t skip = i->iov_offset; \
106 if (unlikely(i->type & ITER_BVEC)) { \ 96 if (unlikely(i->type & ITER_BVEC)) { \
107 const struct bio_vec *bvec; \ 97 const struct bio_vec *bvec = i->bvec; \
108 struct bio_vec v; \ 98 struct bio_vec v; \
109 iterate_bvec(i, n, v, bvec, skip, (B)) \ 99 struct bvec_iter __bi; \
110 if (skip == bvec->bv_len) { \ 100 iterate_bvec(i, n, v, __bi, skip, (B)) \
111 bvec++; \ 101 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
112 skip = 0; \ 102 i->nr_segs -= i->bvec - bvec; \
113 } \ 103 skip = __bi.bi_bvec_done; \
114 i->nr_segs -= bvec - i->bvec; \
115 i->bvec = bvec; \
116 } else if (unlikely(i->type & ITER_KVEC)) { \ 104 } else if (unlikely(i->type & ITER_KVEC)) { \
117 const struct kvec *kvec; \ 105 const struct kvec *kvec; \
118 struct kvec v; \ 106 struct kvec v; \
@@ -159,7 +147,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
159 buf = iov->iov_base + skip; 147 buf = iov->iov_base + skip;
160 copy = min(bytes, iov->iov_len - skip); 148 copy = min(bytes, iov->iov_len - skip);
161 149
162 if (!fault_in_pages_writeable(buf, copy)) { 150 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
163 kaddr = kmap_atomic(page); 151 kaddr = kmap_atomic(page);
164 from = kaddr + offset; 152 from = kaddr + offset;
165 153
@@ -190,6 +178,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
190 copy = min(bytes, iov->iov_len - skip); 178 copy = min(bytes, iov->iov_len - skip);
191 } 179 }
192 /* Too bad - revert to non-atomic kmap */ 180 /* Too bad - revert to non-atomic kmap */
181
193 kaddr = kmap(page); 182 kaddr = kmap(page);
194 from = kaddr + offset; 183 from = kaddr + offset;
195 left = __copy_to_user(buf, from, copy); 184 left = __copy_to_user(buf, from, copy);
@@ -208,6 +197,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
208 bytes -= copy; 197 bytes -= copy;
209 } 198 }
210 kunmap(page); 199 kunmap(page);
200
211done: 201done:
212 if (skip == iov->iov_len) { 202 if (skip == iov->iov_len) {
213 iov++; 203 iov++;
@@ -240,7 +230,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
240 buf = iov->iov_base + skip; 230 buf = iov->iov_base + skip;
241 copy = min(bytes, iov->iov_len - skip); 231 copy = min(bytes, iov->iov_len - skip);
242 232
243 if (!fault_in_pages_readable(buf, copy)) { 233 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
244 kaddr = kmap_atomic(page); 234 kaddr = kmap_atomic(page);
245 to = kaddr + offset; 235 to = kaddr + offset;
246 236
@@ -271,6 +261,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
271 copy = min(bytes, iov->iov_len - skip); 261 copy = min(bytes, iov->iov_len - skip);
272 } 262 }
273 /* Too bad - revert to non-atomic kmap */ 263 /* Too bad - revert to non-atomic kmap */
264
274 kaddr = kmap(page); 265 kaddr = kmap(page);
275 to = kaddr + offset; 266 to = kaddr + offset;
276 left = __copy_from_user(to, buf, copy); 267 left = __copy_from_user(to, buf, copy);
@@ -289,6 +280,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
289 bytes -= copy; 280 bytes -= copy;
290 } 281 }
291 kunmap(page); 282 kunmap(page);
283
292done: 284done:
293 if (skip == iov->iov_len) { 285 if (skip == iov->iov_len) {
294 iov++; 286 iov++;
@@ -301,25 +293,92 @@ done:
301 return wanted - bytes; 293 return wanted - bytes;
302} 294}
303 295
304/* 296#ifdef PIPE_PARANOIA
305 * Fault in the first iovec of the given iov_iter, to a maximum length 297static bool sanity(const struct iov_iter *i)
306 * of bytes. Returns 0 on success, or non-zero if the memory could not be
307 * accessed (ie. because it is an invalid address).
308 *
309 * writev-intensive code may want this to prefault several iovecs -- that
310 * would be possible (callers must not rely on the fact that _only_ the
311 * first iovec will be faulted with the current implementation).
312 */
313int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
314{ 298{
315 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 299 struct pipe_inode_info *pipe = i->pipe;
316 char __user *buf = i->iov->iov_base + i->iov_offset; 300 int idx = i->idx;
317 bytes = min(bytes, i->iov->iov_len - i->iov_offset); 301 int next = pipe->curbuf + pipe->nrbufs;
318 return fault_in_pages_readable(buf, bytes); 302 if (i->iov_offset) {
303 struct pipe_buffer *p;
304 if (unlikely(!pipe->nrbufs))
305 goto Bad; // pipe must be non-empty
306 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
307 goto Bad; // must be at the last buffer...
308
309 p = &pipe->bufs[idx];
310 if (unlikely(p->offset + p->len != i->iov_offset))
311 goto Bad; // ... at the end of segment
312 } else {
313 if (idx != (next & (pipe->buffers - 1)))
314 goto Bad; // must be right after the last buffer
319 } 315 }
320 return 0; 316 return true;
317Bad:
318 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
319 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
320 pipe->curbuf, pipe->nrbufs, pipe->buffers);
321 for (idx = 0; idx < pipe->buffers; idx++)
322 printk(KERN_ERR "[%p %p %d %d]\n",
323 pipe->bufs[idx].ops,
324 pipe->bufs[idx].page,
325 pipe->bufs[idx].offset,
326 pipe->bufs[idx].len);
327 WARN_ON(1);
328 return false;
329}
330#else
331#define sanity(i) true
332#endif
333
334static inline int next_idx(int idx, struct pipe_inode_info *pipe)
335{
336 return (idx + 1) & (pipe->buffers - 1);
337}
338
339static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
340 struct iov_iter *i)
341{
342 struct pipe_inode_info *pipe = i->pipe;
343 struct pipe_buffer *buf;
344 size_t off;
345 int idx;
346
347 if (unlikely(bytes > i->count))
348 bytes = i->count;
349
350 if (unlikely(!bytes))
351 return 0;
352
353 if (!sanity(i))
354 return 0;
355
356 off = i->iov_offset;
357 idx = i->idx;
358 buf = &pipe->bufs[idx];
359 if (off) {
360 if (offset == off && buf->page == page) {
361 /* merge with the last one */
362 buf->len += bytes;
363 i->iov_offset += bytes;
364 goto out;
365 }
366 idx = next_idx(idx, pipe);
367 buf = &pipe->bufs[idx];
368 }
369 if (idx == pipe->curbuf && pipe->nrbufs)
370 return 0;
371 pipe->nrbufs++;
372 buf->ops = &page_cache_pipe_buf_ops;
373 get_page(buf->page = page);
374 buf->offset = offset;
375 buf->len = bytes;
376 i->iov_offset = offset + bytes;
377 i->idx = idx;
378out:
379 i->count -= bytes;
380 return bytes;
321} 381}
322EXPORT_SYMBOL(iov_iter_fault_in_readable);
323 382
324/* 383/*
325 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 384 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
@@ -328,7 +387,7 @@ EXPORT_SYMBOL(iov_iter_fault_in_readable);
328 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 387 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
329 * because it is an invalid address). 388 * because it is an invalid address).
330 */ 389 */
331int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) 390int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
332{ 391{
333 size_t skip = i->iov_offset; 392 size_t skip = i->iov_offset;
334 const struct iovec *iov; 393 const struct iovec *iov;
@@ -337,15 +396,14 @@ int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
337 396
338 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 397 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
339 iterate_iovec(i, bytes, v, iov, skip, ({ 398 iterate_iovec(i, bytes, v, iov, skip, ({
340 err = fault_in_multipages_readable(v.iov_base, 399 err = fault_in_pages_readable(v.iov_base, v.iov_len);
341 v.iov_len);
342 if (unlikely(err)) 400 if (unlikely(err))
343 return err; 401 return err;
344 0;})) 402 0;}))
345 } 403 }
346 return 0; 404 return 0;
347} 405}
348EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable); 406EXPORT_SYMBOL(iov_iter_fault_in_readable);
349 407
350void iov_iter_init(struct iov_iter *i, int direction, 408void iov_iter_init(struct iov_iter *i, int direction,
351 const struct iovec *iov, unsigned long nr_segs, 409 const struct iovec *iov, unsigned long nr_segs,
@@ -387,9 +445,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
387 kunmap_atomic(addr); 445 kunmap_atomic(addr);
388} 446}
389 447
448static inline bool allocated(struct pipe_buffer *buf)
449{
450 return buf->ops == &default_pipe_buf_ops;
451}
452
453static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
454{
455 size_t off = i->iov_offset;
456 int idx = i->idx;
457 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
458 idx = next_idx(idx, i->pipe);
459 off = 0;
460 }
461 *idxp = idx;
462 *offp = off;
463}
464
465static size_t push_pipe(struct iov_iter *i, size_t size,
466 int *idxp, size_t *offp)
467{
468 struct pipe_inode_info *pipe = i->pipe;
469 size_t off;
470 int idx;
471 ssize_t left;
472
473 if (unlikely(size > i->count))
474 size = i->count;
475 if (unlikely(!size))
476 return 0;
477
478 left = size;
479 data_start(i, &idx, &off);
480 *idxp = idx;
481 *offp = off;
482 if (off) {
483 left -= PAGE_SIZE - off;
484 if (left <= 0) {
485 pipe->bufs[idx].len += size;
486 return size;
487 }
488 pipe->bufs[idx].len = PAGE_SIZE;
489 idx = next_idx(idx, pipe);
490 }
491 while (idx != pipe->curbuf || !pipe->nrbufs) {
492 struct page *page = alloc_page(GFP_USER);
493 if (!page)
494 break;
495 pipe->nrbufs++;
496 pipe->bufs[idx].ops = &default_pipe_buf_ops;
497 pipe->bufs[idx].page = page;
498 pipe->bufs[idx].offset = 0;
499 if (left <= PAGE_SIZE) {
500 pipe->bufs[idx].len = left;
501 return size;
502 }
503 pipe->bufs[idx].len = PAGE_SIZE;
504 left -= PAGE_SIZE;
505 idx = next_idx(idx, pipe);
506 }
507 return size - left;
508}
509
510static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
511 struct iov_iter *i)
512{
513 struct pipe_inode_info *pipe = i->pipe;
514 size_t n, off;
515 int idx;
516
517 if (!sanity(i))
518 return 0;
519
520 bytes = n = push_pipe(i, bytes, &idx, &off);
521 if (unlikely(!n))
522 return 0;
523 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
524 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
525 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
526 i->idx = idx;
527 i->iov_offset = off + chunk;
528 n -= chunk;
529 addr += chunk;
530 }
531 i->count -= bytes;
532 return bytes;
533}
534
390size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 535size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
391{ 536{
392 const char *from = addr; 537 const char *from = addr;
538 if (unlikely(i->type & ITER_PIPE))
539 return copy_pipe_to_iter(addr, bytes, i);
393 iterate_and_advance(i, bytes, v, 540 iterate_and_advance(i, bytes, v,
394 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, 541 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
395 v.iov_len), 542 v.iov_len),
@@ -405,6 +552,10 @@ EXPORT_SYMBOL(copy_to_iter);
405size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 552size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
406{ 553{
407 char *to = addr; 554 char *to = addr;
555 if (unlikely(i->type & ITER_PIPE)) {
556 WARN_ON(1);
557 return 0;
558 }
408 iterate_and_advance(i, bytes, v, 559 iterate_and_advance(i, bytes, v,
409 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, 560 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
410 v.iov_len), 561 v.iov_len),
@@ -420,6 +571,10 @@ EXPORT_SYMBOL(copy_from_iter);
420size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 571size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
421{ 572{
422 char *to = addr; 573 char *to = addr;
574 if (unlikely(i->type & ITER_PIPE)) {
575 WARN_ON(1);
576 return 0;
577 }
423 iterate_and_advance(i, bytes, v, 578 iterate_and_advance(i, bytes, v,
424 __copy_from_user_nocache((to += v.iov_len) - v.iov_len, 579 __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
425 v.iov_base, v.iov_len), 580 v.iov_base, v.iov_len),
@@ -440,14 +595,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
440 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 595 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
441 kunmap_atomic(kaddr); 596 kunmap_atomic(kaddr);
442 return wanted; 597 return wanted;
443 } else 598 } else if (likely(!(i->type & ITER_PIPE)))
444 return copy_page_to_iter_iovec(page, offset, bytes, i); 599 return copy_page_to_iter_iovec(page, offset, bytes, i);
600 else
601 return copy_page_to_iter_pipe(page, offset, bytes, i);
445} 602}
446EXPORT_SYMBOL(copy_page_to_iter); 603EXPORT_SYMBOL(copy_page_to_iter);
447 604
448size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 605size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
449 struct iov_iter *i) 606 struct iov_iter *i)
450{ 607{
608 if (unlikely(i->type & ITER_PIPE)) {
609 WARN_ON(1);
610 return 0;
611 }
451 if (i->type & (ITER_BVEC|ITER_KVEC)) { 612 if (i->type & (ITER_BVEC|ITER_KVEC)) {
452 void *kaddr = kmap_atomic(page); 613 void *kaddr = kmap_atomic(page);
453 size_t wanted = copy_from_iter(kaddr + offset, bytes, i); 614 size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
@@ -458,8 +619,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
458} 619}
459EXPORT_SYMBOL(copy_page_from_iter); 620EXPORT_SYMBOL(copy_page_from_iter);
460 621
622static size_t pipe_zero(size_t bytes, struct iov_iter *i)
623{
624 struct pipe_inode_info *pipe = i->pipe;
625 size_t n, off;
626 int idx;
627
628 if (!sanity(i))
629 return 0;
630
631 bytes = n = push_pipe(i, bytes, &idx, &off);
632 if (unlikely(!n))
633 return 0;
634
635 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
636 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
637 memzero_page(pipe->bufs[idx].page, off, chunk);
638 i->idx = idx;
639 i->iov_offset = off + chunk;
640 n -= chunk;
641 }
642 i->count -= bytes;
643 return bytes;
644}
645
461size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 646size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
462{ 647{
648 if (unlikely(i->type & ITER_PIPE))
649 return pipe_zero(bytes, i);
463 iterate_and_advance(i, bytes, v, 650 iterate_and_advance(i, bytes, v,
464 __clear_user(v.iov_base, v.iov_len), 651 __clear_user(v.iov_base, v.iov_len),
465 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 652 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@ -474,6 +661,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
474 struct iov_iter *i, unsigned long offset, size_t bytes) 661 struct iov_iter *i, unsigned long offset, size_t bytes)
475{ 662{
476 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 663 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
664 if (unlikely(i->type & ITER_PIPE)) {
665 kunmap_atomic(kaddr);
666 WARN_ON(1);
667 return 0;
668 }
477 iterate_all_kinds(i, bytes, v, 669 iterate_all_kinds(i, bytes, v,
478 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, 670 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
479 v.iov_base, v.iov_len), 671 v.iov_base, v.iov_len),
@@ -486,8 +678,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
486} 678}
487EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 679EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
488 680
681static void pipe_advance(struct iov_iter *i, size_t size)
682{
683 struct pipe_inode_info *pipe = i->pipe;
684 struct pipe_buffer *buf;
685 int idx = i->idx;
686 size_t off = i->iov_offset;
687
688 if (unlikely(i->count < size))
689 size = i->count;
690
691 if (size) {
692 if (off) /* make it relative to the beginning of buffer */
693 size += off - pipe->bufs[idx].offset;
694 while (1) {
695 buf = &pipe->bufs[idx];
696 if (size <= buf->len)
697 break;
698 size -= buf->len;
699 idx = next_idx(idx, pipe);
700 }
701 buf->len = size;
702 i->idx = idx;
703 off = i->iov_offset = buf->offset + size;
704 }
705 if (off)
706 idx = next_idx(idx, pipe);
707 if (pipe->nrbufs) {
708 int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
709 /* [curbuf,unused) is in use. Free [idx,unused) */
710 while (idx != unused) {
711 pipe_buf_release(pipe, &pipe->bufs[idx]);
712 idx = next_idx(idx, pipe);
713 pipe->nrbufs--;
714 }
715 }
716}
717
489void iov_iter_advance(struct iov_iter *i, size_t size) 718void iov_iter_advance(struct iov_iter *i, size_t size)
490{ 719{
720 if (unlikely(i->type & ITER_PIPE)) {
721 pipe_advance(i, size);
722 return;
723 }
491 iterate_and_advance(i, size, v, 0, 0, 0) 724 iterate_and_advance(i, size, v, 0, 0, 0)
492} 725}
493EXPORT_SYMBOL(iov_iter_advance); 726EXPORT_SYMBOL(iov_iter_advance);
@@ -497,6 +730,8 @@ EXPORT_SYMBOL(iov_iter_advance);
497 */ 730 */
498size_t iov_iter_single_seg_count(const struct iov_iter *i) 731size_t iov_iter_single_seg_count(const struct iov_iter *i)
499{ 732{
733 if (unlikely(i->type & ITER_PIPE))
734 return i->count; // it is a silly place, anyway
500 if (i->nr_segs == 1) 735 if (i->nr_segs == 1)
501 return i->count; 736 return i->count;
502 else if (i->type & ITER_BVEC) 737 else if (i->type & ITER_BVEC)
@@ -532,6 +767,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
532} 767}
533EXPORT_SYMBOL(iov_iter_bvec); 768EXPORT_SYMBOL(iov_iter_bvec);
534 769
770void iov_iter_pipe(struct iov_iter *i, int direction,
771 struct pipe_inode_info *pipe,
772 size_t count)
773{
774 BUG_ON(direction != ITER_PIPE);
775 i->type = direction;
776 i->pipe = pipe;
777 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
778 i->iov_offset = 0;
779 i->count = count;
780}
781EXPORT_SYMBOL(iov_iter_pipe);
782
535unsigned long iov_iter_alignment(const struct iov_iter *i) 783unsigned long iov_iter_alignment(const struct iov_iter *i)
536{ 784{
537 unsigned long res = 0; 785 unsigned long res = 0;
@@ -540,6 +788,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
540 if (!size) 788 if (!size)
541 return 0; 789 return 0;
542 790
791 if (unlikely(i->type & ITER_PIPE)) {
792 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
793 return size | i->iov_offset;
794 return size;
795 }
543 iterate_all_kinds(i, size, v, 796 iterate_all_kinds(i, size, v,
544 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 797 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
545 res |= v.bv_offset | v.bv_len, 798 res |= v.bv_offset | v.bv_len,
@@ -556,6 +809,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
556 if (!size) 809 if (!size)
557 return 0; 810 return 0;
558 811
812 if (unlikely(i->type & ITER_PIPE)) {
813 WARN_ON(1);
814 return ~0U;
815 }
816
559 iterate_all_kinds(i, size, v, 817 iterate_all_kinds(i, size, v,
560 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 818 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
561 (size != v.iov_len ? size : 0), 0), 819 (size != v.iov_len ? size : 0), 0),
@@ -568,6 +826,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
568} 826}
569EXPORT_SYMBOL(iov_iter_gap_alignment); 827EXPORT_SYMBOL(iov_iter_gap_alignment);
570 828
829static inline size_t __pipe_get_pages(struct iov_iter *i,
830 size_t maxsize,
831 struct page **pages,
832 int idx,
833 size_t *start)
834{
835 struct pipe_inode_info *pipe = i->pipe;
836 ssize_t n = push_pipe(i, maxsize, &idx, start);
837 if (!n)
838 return -EFAULT;
839
840 maxsize = n;
841 n += *start;
842 while (n > 0) {
843 get_page(*pages++ = pipe->bufs[idx].page);
844 idx = next_idx(idx, pipe);
845 n -= PAGE_SIZE;
846 }
847
848 return maxsize;
849}
850
851static ssize_t pipe_get_pages(struct iov_iter *i,
852 struct page **pages, size_t maxsize, unsigned maxpages,
853 size_t *start)
854{
855 unsigned npages;
856 size_t capacity;
857 int idx;
858
859 if (!sanity(i))
860 return -EFAULT;
861
862 data_start(i, &idx, start);
863 /* some of this one + all after this one */
864 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
865 capacity = min(npages,maxpages) * PAGE_SIZE - *start;
866
867 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
868}
869
571ssize_t iov_iter_get_pages(struct iov_iter *i, 870ssize_t iov_iter_get_pages(struct iov_iter *i,
572 struct page **pages, size_t maxsize, unsigned maxpages, 871 struct page **pages, size_t maxsize, unsigned maxpages,
573 size_t *start) 872 size_t *start)
@@ -578,6 +877,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
578 if (!maxsize) 877 if (!maxsize)
579 return 0; 878 return 0;
580 879
880 if (unlikely(i->type & ITER_PIPE))
881 return pipe_get_pages(i, pages, maxsize, maxpages, start);
581 iterate_all_kinds(i, maxsize, v, ({ 882 iterate_all_kinds(i, maxsize, v, ({
582 unsigned long addr = (unsigned long)v.iov_base; 883 unsigned long addr = (unsigned long)v.iov_base;
583 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 884 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -613,6 +914,37 @@ static struct page **get_pages_array(size_t n)
613 return p; 914 return p;
614} 915}
615 916
917static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
918 struct page ***pages, size_t maxsize,
919 size_t *start)
920{
921 struct page **p;
922 size_t n;
923 int idx;
924 int npages;
925
926 if (!sanity(i))
927 return -EFAULT;
928
929 data_start(i, &idx, start);
930 /* some of this one + all after this one */
931 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
932 n = npages * PAGE_SIZE - *start;
933 if (maxsize > n)
934 maxsize = n;
935 else
936 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
937 p = get_pages_array(npages);
938 if (!p)
939 return -ENOMEM;
940 n = __pipe_get_pages(i, maxsize, p, idx, start);
941 if (n > 0)
942 *pages = p;
943 else
944 kvfree(p);
945 return n;
946}
947
616ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 948ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
617 struct page ***pages, size_t maxsize, 949 struct page ***pages, size_t maxsize,
618 size_t *start) 950 size_t *start)
@@ -625,6 +957,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
625 if (!maxsize) 957 if (!maxsize)
626 return 0; 958 return 0;
627 959
960 if (unlikely(i->type & ITER_PIPE))
961 return pipe_get_pages_alloc(i, pages, maxsize, start);
628 iterate_all_kinds(i, maxsize, v, ({ 962 iterate_all_kinds(i, maxsize, v, ({
629 unsigned long addr = (unsigned long)v.iov_base; 963 unsigned long addr = (unsigned long)v.iov_base;
630 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 964 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -666,6 +1000,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
666 __wsum sum, next; 1000 __wsum sum, next;
667 size_t off = 0; 1001 size_t off = 0;
668 sum = *csum; 1002 sum = *csum;
1003 if (unlikely(i->type & ITER_PIPE)) {
1004 WARN_ON(1);
1005 return 0;
1006 }
669 iterate_and_advance(i, bytes, v, ({ 1007 iterate_and_advance(i, bytes, v, ({
670 int err = 0; 1008 int err = 0;
671 next = csum_and_copy_from_user(v.iov_base, 1009 next = csum_and_copy_from_user(v.iov_base,
@@ -704,6 +1042,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
704 __wsum sum, next; 1042 __wsum sum, next;
705 size_t off = 0; 1043 size_t off = 0;
706 sum = *csum; 1044 sum = *csum;
1045 if (unlikely(i->type & ITER_PIPE)) {
1046 WARN_ON(1); /* for now */
1047 return 0;
1048 }
707 iterate_and_advance(i, bytes, v, ({ 1049 iterate_and_advance(i, bytes, v, ({
708 int err = 0; 1050 int err = 0;
709 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1051 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
@@ -743,7 +1085,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
743 if (!size) 1085 if (!size)
744 return 0; 1086 return 0;
745 1087
746 iterate_all_kinds(i, size, v, ({ 1088 if (unlikely(i->type & ITER_PIPE)) {
1089 struct pipe_inode_info *pipe = i->pipe;
1090 size_t off;
1091 int idx;
1092
1093 if (!sanity(i))
1094 return 0;
1095
1096 data_start(i, &idx, &off);
1097 /* some of this one + all after this one */
1098 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1099 if (npages >= maxpages)
1100 return maxpages;
1101 } else iterate_all_kinds(i, size, v, ({
747 unsigned long p = (unsigned long)v.iov_base; 1102 unsigned long p = (unsigned long)v.iov_base;
748 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1103 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
749 - p / PAGE_SIZE; 1104 - p / PAGE_SIZE;
@@ -768,6 +1123,10 @@ EXPORT_SYMBOL(iov_iter_npages);
768const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1123const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
769{ 1124{
770 *new = *old; 1125 *new = *old;
1126 if (unlikely(new->type & ITER_PIPE)) {
1127 WARN_ON(1);
1128 return NULL;
1129 }
771 if (new->type & ITER_BVEC) 1130 if (new->type & ITER_BVEC)
772 return new->bvec = kmemdup(new->bvec, 1131 return new->bvec = kmemdup(new->bvec,
773 new->nr_segs * sizeof(struct bio_vec), 1132 new->nr_segs * sizeof(struct bio_vec),
@@ -780,6 +1139,28 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
780} 1139}
781EXPORT_SYMBOL(dup_iter); 1140EXPORT_SYMBOL(dup_iter);
782 1141
1142/**
1143 * import_iovec() - Copy an array of &struct iovec from userspace
1144 * into the kernel, check that it is valid, and initialize a new
1145 * &struct iov_iter iterator to access it.
1146 *
1147 * @type: One of %READ or %WRITE.
1148 * @uvector: Pointer to the userspace array.
1149 * @nr_segs: Number of elements in userspace array.
1150 * @fast_segs: Number of elements in @iov.
1151 * @iov: (input and output parameter) Pointer to pointer to (usually small
1152 * on-stack) kernel array.
1153 * @i: Pointer to iterator that will be initialized on success.
1154 *
1155 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1156 * then this function places %NULL in *@iov on return. Otherwise, a new
1157 * array will be allocated and the result placed in *@iov. This means that
1158 * the caller may call kfree() on *@iov regardless of whether the small
1159 * on-stack array was used or not (and regardless of whether this function
1160 * returns an error or not).
1161 *
1162 * Return: 0 on success or negative error code on error.
1163 */
783int import_iovec(int type, const struct iovec __user * uvector, 1164int import_iovec(int type, const struct iovec __user * uvector,
784 unsigned nr_segs, unsigned fast_segs, 1165 unsigned nr_segs, unsigned fast_segs,
785 struct iovec **iov, struct iov_iter *i) 1166 struct iovec **iov, struct iov_iter *i)
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 836f7db4e548..1d6565e81030 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -74,7 +74,7 @@ void irq_poll_complete(struct irq_poll *iop)
74} 74}
75EXPORT_SYMBOL(irq_poll_complete); 75EXPORT_SYMBOL(irq_poll_complete);
76 76
77static void irq_poll_softirq(struct softirq_action *h) 77static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
78{ 78{
79 struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); 79 struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
80 int rearm = 0, budget = irq_poll_budget; 80 int rearm = 0, budget = irq_poll_budget;
@@ -184,30 +184,21 @@ void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
184} 184}
185EXPORT_SYMBOL(irq_poll_init); 185EXPORT_SYMBOL(irq_poll_init);
186 186
187static int irq_poll_cpu_notify(struct notifier_block *self, 187static int irq_poll_cpu_dead(unsigned int cpu)
188 unsigned long action, void *hcpu)
189{ 188{
190 /* 189 /*
191 * If a CPU goes away, splice its entries to the current CPU 190 * If a CPU goes away, splice its entries to the current CPU
192 * and trigger a run of the softirq 191 * and trigger a run of the softirq
193 */ 192 */
194 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 193 local_irq_disable();
195 int cpu = (unsigned long) hcpu; 194 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
196 195 this_cpu_ptr(&blk_cpu_iopoll));
197 local_irq_disable(); 196 __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
198 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), 197 local_irq_enable();
199 this_cpu_ptr(&blk_cpu_iopoll));
200 __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
201 local_irq_enable();
202 }
203 198
204 return NOTIFY_OK; 199 return 0;
205} 200}
206 201
207static struct notifier_block irq_poll_cpu_notifier = {
208 .notifier_call = irq_poll_cpu_notify,
209};
210
211static __init int irq_poll_setup(void) 202static __init int irq_poll_setup(void)
212{ 203{
213 int i; 204 int i;
@@ -216,7 +207,8 @@ static __init int irq_poll_setup(void)
216 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); 207 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
217 208
218 open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq); 209 open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
219 register_hotcpu_notifier(&irq_poll_cpu_notifier); 210 cpuhp_setup_state_nocalls(CPUHP_IRQ_POLL_DEAD, "irq_poll:dead", NULL,
211 irq_poll_cpu_dead);
220 return 0; 212 return 0;
221} 213}
222subsys_initcall(irq_poll_setup); 214subsys_initcall(irq_poll_setup);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index d8a5cf66c316..b8e2080c1a47 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -48,11 +48,9 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
48{ 48{
49 unsigned long long res; 49 unsigned long long res;
50 unsigned int rv; 50 unsigned int rv;
51 int overflow;
52 51
53 res = 0; 52 res = 0;
54 rv = 0; 53 rv = 0;
55 overflow = 0;
56 while (*s) { 54 while (*s) {
57 unsigned int val; 55 unsigned int val;
58 56
@@ -71,15 +69,13 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
71 */ 69 */
72 if (unlikely(res & (~0ull << 60))) { 70 if (unlikely(res & (~0ull << 60))) {
73 if (res > div_u64(ULLONG_MAX - val, base)) 71 if (res > div_u64(ULLONG_MAX - val, base))
74 overflow = 1; 72 rv |= KSTRTOX_OVERFLOW;
75 } 73 }
76 res = res * base + val; 74 res = res * base + val;
77 rv++; 75 rv++;
78 s++; 76 s++;
79 } 77 }
80 *p = res; 78 *p = res;
81 if (overflow)
82 rv |= KSTRTOX_OVERFLOW;
83 return rv; 79 return rv;
84} 80}
85 81
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 747606f9e4a3..5a0f75a3bf01 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -21,6 +21,7 @@
21#include <linux/bitops.h> 21#include <linux/bitops.h>
22#include <linux/count_zeros.h> 22#include <linux/count_zeros.h>
23#include <linux/byteorder/generic.h> 23#include <linux/byteorder/generic.h>
24#include <linux/scatterlist.h>
24#include <linux/string.h> 25#include <linux/string.h>
25#include "mpi-internal.h" 26#include "mpi-internal.h"
26 27
@@ -50,9 +51,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes)
50 return NULL; 51 return NULL;
51 } 52 }
52 if (nbytes > 0) 53 if (nbytes > 0)
53 nbits -= count_leading_zeros(buffer[0]); 54 nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8);
54 else
55 nbits = 0;
56 55
57 nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); 56 nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
58 val = mpi_alloc(nlimbs); 57 val = mpi_alloc(nlimbs);
@@ -82,50 +81,30 @@ EXPORT_SYMBOL_GPL(mpi_read_raw_data);
82MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) 81MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
83{ 82{
84 const uint8_t *buffer = xbuffer; 83 const uint8_t *buffer = xbuffer;
85 int i, j; 84 unsigned int nbits, nbytes;
86 unsigned nbits, nbytes, nlimbs, nread = 0; 85 MPI val;
87 mpi_limb_t a;
88 MPI val = NULL;
89 86
90 if (*ret_nread < 2) 87 if (*ret_nread < 2)
91 goto leave; 88 return ERR_PTR(-EINVAL);
92 nbits = buffer[0] << 8 | buffer[1]; 89 nbits = buffer[0] << 8 | buffer[1];
93 90
94 if (nbits > MAX_EXTERN_MPI_BITS) { 91 if (nbits > MAX_EXTERN_MPI_BITS) {
95 pr_info("MPI: mpi too large (%u bits)\n", nbits); 92 pr_info("MPI: mpi too large (%u bits)\n", nbits);
96 goto leave; 93 return ERR_PTR(-EINVAL);
97 } 94 }
98 buffer += 2;
99 nread = 2;
100 95
101 nbytes = DIV_ROUND_UP(nbits, 8); 96 nbytes = DIV_ROUND_UP(nbits, 8);
102 nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); 97 if (nbytes + 2 > *ret_nread) {
103 val = mpi_alloc(nlimbs); 98 pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n",
104 if (!val) 99 nbytes, *ret_nread);
105 return NULL; 100 return ERR_PTR(-EINVAL);
106 i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
107 i %= BYTES_PER_MPI_LIMB;
108 val->nbits = nbits;
109 j = val->nlimbs = nlimbs;
110 val->sign = 0;
111 for (; j > 0; j--) {
112 a = 0;
113 for (; i < BYTES_PER_MPI_LIMB; i++) {
114 if (++nread > *ret_nread) {
115 printk
116 ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
117 nread, *ret_nread);
118 goto leave;
119 }
120 a <<= 8;
121 a |= *buffer++;
122 }
123 i = 0;
124 val->d[j - 1] = a;
125 } 101 }
126 102
127leave: 103 val = mpi_read_raw_data(buffer + 2, nbytes);
128 *ret_nread = nread; 104 if (!val)
105 return ERR_PTR(-ENOMEM);
106
107 *ret_nread = nbytes + 2;
129 return val; 108 return val;
130} 109}
131EXPORT_SYMBOL_GPL(mpi_read_from_buffer); 110EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
@@ -250,82 +229,6 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
250} 229}
251EXPORT_SYMBOL_GPL(mpi_get_buffer); 230EXPORT_SYMBOL_GPL(mpi_get_buffer);
252 231
253/****************
254 * Use BUFFER to update MPI.
255 */
256int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
257{
258 const uint8_t *buffer = xbuffer, *p;
259 mpi_limb_t alimb;
260 int nlimbs;
261 int i;
262
263 nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
264 if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
265 return -ENOMEM;
266 a->sign = sign;
267
268 for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
269#if BYTES_PER_MPI_LIMB == 4
270 alimb = (mpi_limb_t) *p--;
271 alimb |= (mpi_limb_t) *p-- << 8;
272 alimb |= (mpi_limb_t) *p-- << 16;
273 alimb |= (mpi_limb_t) *p-- << 24;
274#elif BYTES_PER_MPI_LIMB == 8
275 alimb = (mpi_limb_t) *p--;
276 alimb |= (mpi_limb_t) *p-- << 8;
277 alimb |= (mpi_limb_t) *p-- << 16;
278 alimb |= (mpi_limb_t) *p-- << 24;
279 alimb |= (mpi_limb_t) *p-- << 32;
280 alimb |= (mpi_limb_t) *p-- << 40;
281 alimb |= (mpi_limb_t) *p-- << 48;
282 alimb |= (mpi_limb_t) *p-- << 56;
283#else
284#error please implement for this limb size.
285#endif
286 a->d[i++] = alimb;
287 }
288 if (p >= buffer) {
289#if BYTES_PER_MPI_LIMB == 4
290 alimb = *p--;
291 if (p >= buffer)
292 alimb |= (mpi_limb_t) *p-- << 8;
293 if (p >= buffer)
294 alimb |= (mpi_limb_t) *p-- << 16;
295 if (p >= buffer)
296 alimb |= (mpi_limb_t) *p-- << 24;
297#elif BYTES_PER_MPI_LIMB == 8
298 alimb = (mpi_limb_t) *p--;
299 if (p >= buffer)
300 alimb |= (mpi_limb_t) *p-- << 8;
301 if (p >= buffer)
302 alimb |= (mpi_limb_t) *p-- << 16;
303 if (p >= buffer)
304 alimb |= (mpi_limb_t) *p-- << 24;
305 if (p >= buffer)
306 alimb |= (mpi_limb_t) *p-- << 32;
307 if (p >= buffer)
308 alimb |= (mpi_limb_t) *p-- << 40;
309 if (p >= buffer)
310 alimb |= (mpi_limb_t) *p-- << 48;
311 if (p >= buffer)
312 alimb |= (mpi_limb_t) *p-- << 56;
313#else
314#error please implement for this limb size.
315#endif
316 a->d[i++] = alimb;
317 }
318 a->nlimbs = i;
319
320 if (i != nlimbs) {
321 pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
322 nlimbs);
323 BUG();
324 }
325 return 0;
326}
327EXPORT_SYMBOL_GPL(mpi_set_buffer);
328
329/** 232/**
330 * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first) 233 * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first)
331 * 234 *
@@ -335,16 +238,13 @@ EXPORT_SYMBOL_GPL(mpi_set_buffer);
335 * @a: a multi precision integer 238 * @a: a multi precision integer
336 * @sgl: scatterlist to write to. Needs to be at least 239 * @sgl: scatterlist to write to. Needs to be at least
337 * mpi_get_size(a) long. 240 * mpi_get_size(a) long.
338 * @nbytes: in/out param - it has the be set to the maximum number of 241 * @nbytes: the number of bytes to write. Leading bytes will be
339 * bytes that can be written to sgl. This has to be at least 242 * filled with zero.
340 * the size of the integer a. On return it receives the actual
341 * length of the data written on success or the data that would
342 * be written if buffer was too small.
343 * @sign: if not NULL, it will be set to the sign of a. 243 * @sign: if not NULL, it will be set to the sign of a.
344 * 244 *
345 * Return: 0 on success or error code in case of error 245 * Return: 0 on success or error code in case of error
346 */ 246 */
347int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, 247int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes,
348 int *sign) 248 int *sign)
349{ 249{
350 u8 *p, *p2; 250 u8 *p, *p2;
@@ -356,55 +256,60 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
356#error please implement for this limb size. 256#error please implement for this limb size.
357#endif 257#endif
358 unsigned int n = mpi_get_size(a); 258 unsigned int n = mpi_get_size(a);
359 int i, x, y = 0, lzeros, buf_len; 259 struct sg_mapping_iter miter;
360 260 int i, x, buf_len;
361 if (!nbytes) 261 int nents;
362 return -EINVAL;
363 262
364 if (sign) 263 if (sign)
365 *sign = a->sign; 264 *sign = a->sign;
366 265
367 lzeros = count_lzeros(a); 266 if (nbytes < n)
368
369 if (*nbytes < n - lzeros) {
370 *nbytes = n - lzeros;
371 return -EOVERFLOW; 267 return -EOVERFLOW;
372 }
373 268
374 *nbytes = n - lzeros; 269 nents = sg_nents_for_len(sgl, nbytes);
375 buf_len = sgl->length; 270 if (nents < 0)
376 p2 = sg_virt(sgl); 271 return -EINVAL;
377 272
378 for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB, 273 sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG);
379 lzeros %= BYTES_PER_MPI_LIMB; 274 sg_miter_next(&miter);
380 i >= 0; i--) { 275 buf_len = miter.length;
276 p2 = miter.addr;
277
278 while (nbytes > n) {
279 i = min_t(unsigned, nbytes - n, buf_len);
280 memset(p2, 0, i);
281 p2 += i;
282 nbytes -= i;
283
284 buf_len -= i;
285 if (!buf_len) {
286 sg_miter_next(&miter);
287 buf_len = miter.length;
288 p2 = miter.addr;
289 }
290 }
291
292 for (i = a->nlimbs - 1; i >= 0; i--) {
381#if BYTES_PER_MPI_LIMB == 4 293#if BYTES_PER_MPI_LIMB == 4
382 alimb = cpu_to_be32(a->d[i]); 294 alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0;
383#elif BYTES_PER_MPI_LIMB == 8 295#elif BYTES_PER_MPI_LIMB == 8
384 alimb = cpu_to_be64(a->d[i]); 296 alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0;
385#else 297#else
386#error please implement for this limb size. 298#error please implement for this limb size.
387#endif 299#endif
388 if (lzeros) { 300 p = (u8 *)&alimb;
389 y = lzeros;
390 lzeros = 0;
391 }
392
393 p = (u8 *)&alimb + y;
394 301
395 for (x = 0; x < sizeof(alimb) - y; x++) { 302 for (x = 0; x < sizeof(alimb); x++) {
396 if (!buf_len) {
397 sgl = sg_next(sgl);
398 if (!sgl)
399 return -EINVAL;
400 buf_len = sgl->length;
401 p2 = sg_virt(sgl);
402 }
403 *p2++ = *p++; 303 *p2++ = *p++;
404 buf_len--; 304 if (!--buf_len) {
305 sg_miter_next(&miter);
306 buf_len = miter.length;
307 p2 = miter.addr;
308 }
405 } 309 }
406 y = 0;
407 } 310 }
311
312 sg_miter_stop(&miter);
408 return 0; 313 return 0;
409} 314}
410EXPORT_SYMBOL_GPL(mpi_write_to_sgl); 315EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
@@ -424,19 +329,23 @@ EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
424 */ 329 */
425MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) 330MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
426{ 331{
427 struct scatterlist *sg; 332 struct sg_mapping_iter miter;
428 int x, i, j, z, lzeros, ents;
429 unsigned int nbits, nlimbs; 333 unsigned int nbits, nlimbs;
334 int x, j, z, lzeros, ents;
335 unsigned int len;
336 const u8 *buff;
430 mpi_limb_t a; 337 mpi_limb_t a;
431 MPI val = NULL; 338 MPI val = NULL;
432 339
433 lzeros = 0; 340 ents = sg_nents_for_len(sgl, nbytes);
434 ents = sg_nents(sgl); 341 if (ents < 0)
342 return NULL;
435 343
436 for_each_sg(sgl, sg, ents, i) { 344 sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG);
437 const u8 *buff = sg_virt(sg);
438 int len = sg->length;
439 345
346 lzeros = 0;
347 len = 0;
348 while (nbytes > 0) {
440 while (len && !*buff) { 349 while (len && !*buff) {
441 lzeros++; 350 lzeros++;
442 len--; 351 len--;
@@ -446,12 +355,17 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
446 if (len && *buff) 355 if (len && *buff)
447 break; 356 break;
448 357
449 ents--; 358 sg_miter_next(&miter);
359 buff = miter.addr;
360 len = miter.length;
361
450 nbytes -= lzeros; 362 nbytes -= lzeros;
451 lzeros = 0; 363 lzeros = 0;
452 } 364 }
453 365
454 sgl = sg; 366 miter.consumed = lzeros;
367 sg_miter_stop(&miter);
368
455 nbytes -= lzeros; 369 nbytes -= lzeros;
456 nbits = nbytes * 8; 370 nbits = nbytes * 8;
457 if (nbits > MAX_EXTERN_MPI_BITS) { 371 if (nbits > MAX_EXTERN_MPI_BITS) {
@@ -460,8 +374,7 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
460 } 374 }
461 375
462 if (nbytes > 0) 376 if (nbytes > 0)
463 nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros)) - 377 nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8);
464 (BITS_PER_LONG - 8);
465 378
466 nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); 379 nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
467 val = mpi_alloc(nlimbs); 380 val = mpi_alloc(nlimbs);
@@ -480,21 +393,21 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
480 z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; 393 z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
481 z %= BYTES_PER_MPI_LIMB; 394 z %= BYTES_PER_MPI_LIMB;
482 395
483 for_each_sg(sgl, sg, ents, i) { 396 while (sg_miter_next(&miter)) {
484 const u8 *buffer = sg_virt(sg) + lzeros; 397 buff = miter.addr;
485 int len = sg->length - lzeros; 398 len = miter.length;
486 399
487 for (x = 0; x < len; x++) { 400 for (x = 0; x < len; x++) {
488 a <<= 8; 401 a <<= 8;
489 a |= *buffer++; 402 a |= *buff++;
490 if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) { 403 if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) {
491 val->d[j--] = a; 404 val->d[j--] = a;
492 a = 0; 405 a = 0;
493 } 406 }
494 } 407 }
495 z += x; 408 z += x;
496 lzeros = 0;
497 } 409 }
410
498 return val; 411 return val;
499} 412}
500EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl); 413EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 26caf51cc238..75554754eadf 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -16,21 +16,23 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/kprobes.h> 17#include <linux/kprobes.h>
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/cpu.h>
19 20
20#ifdef arch_trigger_all_cpu_backtrace 21#ifdef arch_trigger_cpumask_backtrace
21/* For reliability, we're prepared to waste bits here. */ 22/* For reliability, we're prepared to waste bits here. */
22static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 23static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
23 24
24/* "in progress" flag of arch_trigger_all_cpu_backtrace */ 25/* "in progress" flag of arch_trigger_cpumask_backtrace */
25static unsigned long backtrace_flag; 26static unsigned long backtrace_flag;
26 27
27/* 28/*
28 * When raise() is called it will be is passed a pointer to the 29 * When raise() is called it will be passed a pointer to the
29 * backtrace_mask. Architectures that call nmi_cpu_backtrace() 30 * backtrace_mask. Architectures that call nmi_cpu_backtrace()
30 * directly from their raise() functions may rely on the mask 31 * directly from their raise() functions may rely on the mask
31 * they are passed being updated as a side effect of this call. 32 * they are passed being updated as a side effect of this call.
32 */ 33 */
33void nmi_trigger_all_cpu_backtrace(bool include_self, 34void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
35 bool exclude_self,
34 void (*raise)(cpumask_t *mask)) 36 void (*raise)(cpumask_t *mask))
35{ 37{
36 int i, this_cpu = get_cpu(); 38 int i, this_cpu = get_cpu();
@@ -44,13 +46,22 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
44 return; 46 return;
45 } 47 }
46 48
47 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 49 cpumask_copy(to_cpumask(backtrace_mask), mask);
48 if (!include_self) 50 if (exclude_self)
49 cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); 51 cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
50 52
53 /*
54 * Don't try to send an NMI to this cpu; it may work on some
55 * architectures, but on others it may not, and we'll get
56 * information at least as useful just by doing a dump_stack() here.
57 * Note that nmi_cpu_backtrace(NULL) will clear the cpu bit.
58 */
59 if (cpumask_test_cpu(this_cpu, to_cpumask(backtrace_mask)))
60 nmi_cpu_backtrace(NULL);
61
51 if (!cpumask_empty(to_cpumask(backtrace_mask))) { 62 if (!cpumask_empty(to_cpumask(backtrace_mask))) {
52 pr_info("Sending NMI to %s CPUs:\n", 63 pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n",
53 (include_self ? "all" : "other")); 64 this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask));
54 raise(to_cpumask(backtrace_mask)); 65 raise(to_cpumask(backtrace_mask));
55 } 66 }
56 67
@@ -77,11 +88,16 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
77 int cpu = smp_processor_id(); 88 int cpu = smp_processor_id();
78 89
79 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 90 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
80 pr_warn("NMI backtrace for cpu %d\n", cpu); 91 if (regs && cpu_in_idle(instruction_pointer(regs))) {
81 if (regs) 92 pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n",
82 show_regs(regs); 93 cpu, instruction_pointer(regs));
83 else 94 } else {
84 dump_stack(); 95 pr_warn("NMI backtrace for cpu %d\n", cpu);
96 if (regs)
97 show_regs(regs);
98 else
99 dump_stack();
100 }
85 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 101 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
86 return true; 102 return true;
87 } 103 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 27fe74948882..9ac959ef4cae 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -33,6 +33,7 @@
33 33
34#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) 34#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
35 35
36static DEFINE_SPINLOCK(percpu_ref_switch_lock);
36static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); 37static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
37 38
38static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) 39static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
@@ -82,6 +83,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
82 atomic_long_set(&ref->count, start_count); 83 atomic_long_set(&ref->count, start_count);
83 84
84 ref->release = release; 85 ref->release = release;
86 ref->confirm_switch = NULL;
85 return 0; 87 return 0;
86} 88}
87EXPORT_SYMBOL_GPL(percpu_ref_init); 89EXPORT_SYMBOL_GPL(percpu_ref_init);
@@ -101,6 +103,8 @@ void percpu_ref_exit(struct percpu_ref *ref)
101 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 103 unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
102 104
103 if (percpu_count) { 105 if (percpu_count) {
106 /* non-NULL confirm_switch indicates switching in progress */
107 WARN_ON_ONCE(ref->confirm_switch);
104 free_percpu(percpu_count); 108 free_percpu(percpu_count);
105 ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; 109 ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
106 } 110 }
@@ -161,66 +165,23 @@ static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
161static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, 165static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
162 percpu_ref_func_t *confirm_switch) 166 percpu_ref_func_t *confirm_switch)
163{ 167{
164 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) { 168 if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) {
165 /* switching from percpu to atomic */ 169 if (confirm_switch)
166 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; 170 confirm_switch(ref);
167 171 return;
168 /*
169 * Non-NULL ->confirm_switch is used to indicate that
170 * switching is in progress. Use noop one if unspecified.
171 */
172 WARN_ON_ONCE(ref->confirm_switch);
173 ref->confirm_switch =
174 confirm_switch ?: percpu_ref_noop_confirm_switch;
175
176 percpu_ref_get(ref); /* put after confirmation */
177 call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
178 } else if (confirm_switch) {
179 /*
180 * Somebody already set ATOMIC. Switching may still be in
181 * progress. @confirm_switch must be invoked after the
182 * switching is complete and a full sched RCU grace period
183 * has passed. Wait synchronously for the previous
184 * switching and schedule @confirm_switch invocation.
185 */
186 wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
187 ref->confirm_switch = confirm_switch;
188
189 percpu_ref_get(ref); /* put after confirmation */
190 call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
191 } 172 }
192}
193 173
194/** 174 /* switching from percpu to atomic */
195 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode 175 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
196 * @ref: percpu_ref to switch to atomic mode 176
197 * @confirm_switch: optional confirmation callback 177 /*
198 * 178 * Non-NULL ->confirm_switch is used to indicate that switching is
199 * There's no reason to use this function for the usual reference counting. 179 * in progress. Use noop one if unspecified.
200 * Use percpu_ref_kill[_and_confirm](). 180 */
201 * 181 ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch;
202 * Schedule switching of @ref to atomic mode. All its percpu counts will 182
203 * be collected to the main atomic counter. On completion, when all CPUs 183 percpu_ref_get(ref); /* put after confirmation */
204 * are guaraneed to be in atomic mode, @confirm_switch, which may not 184 call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
205 * block, is invoked. This function may be invoked concurrently with all
206 * the get/put operations and can safely be mixed with kill and reinit
207 * operations. Note that @ref will stay in atomic mode across kill/reinit
208 * cycles until percpu_ref_switch_to_percpu() is called.
209 *
210 * This function normally doesn't block and can be called from any context
211 * but it may block if @confirm_kill is specified and @ref is already in
212 * the process of switching to atomic mode. In such cases, @confirm_switch
213 * will be invoked after the switching is complete.
214 *
215 * Due to the way percpu_ref is implemented, @confirm_switch will be called
216 * after at least one full sched RCU grace period has passed but this is an
217 * implementation detail and must not be depended upon.
218 */
219void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
220 percpu_ref_func_t *confirm_switch)
221{
222 ref->force_atomic = true;
223 __percpu_ref_switch_to_atomic(ref, confirm_switch);
224} 185}
225 186
226static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) 187static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
@@ -233,8 +194,6 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
233 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) 194 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
234 return; 195 return;
235 196
236 wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
237
238 atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); 197 atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
239 198
240 /* 199 /*
@@ -250,6 +209,58 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
250 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); 209 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
251} 210}
252 211
212static void __percpu_ref_switch_mode(struct percpu_ref *ref,
213 percpu_ref_func_t *confirm_switch)
214{
215 lockdep_assert_held(&percpu_ref_switch_lock);
216
217 /*
218 * If the previous ATOMIC switching hasn't finished yet, wait for
219 * its completion. If the caller ensures that ATOMIC switching
220 * isn't in progress, this function can be called from any context.
221 */
222 wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch,
223 percpu_ref_switch_lock);
224
225 if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
226 __percpu_ref_switch_to_atomic(ref, confirm_switch);
227 else
228 __percpu_ref_switch_to_percpu(ref);
229}
230
231/**
232 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
233 * @ref: percpu_ref to switch to atomic mode
234 * @confirm_switch: optional confirmation callback
235 *
236 * There's no reason to use this function for the usual reference counting.
237 * Use percpu_ref_kill[_and_confirm]().
238 *
239 * Schedule switching of @ref to atomic mode. All its percpu counts will
240 * be collected to the main atomic counter. On completion, when all CPUs
241 * are guaraneed to be in atomic mode, @confirm_switch, which may not
242 * block, is invoked. This function may be invoked concurrently with all
243 * the get/put operations and can safely be mixed with kill and reinit
244 * operations. Note that @ref will stay in atomic mode across kill/reinit
245 * cycles until percpu_ref_switch_to_percpu() is called.
246 *
247 * This function may block if @ref is in the process of switching to atomic
248 * mode. If the caller ensures that @ref is not in the process of
249 * switching to atomic mode, this function can be called from any context.
250 */
251void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
252 percpu_ref_func_t *confirm_switch)
253{
254 unsigned long flags;
255
256 spin_lock_irqsave(&percpu_ref_switch_lock, flags);
257
258 ref->force_atomic = true;
259 __percpu_ref_switch_mode(ref, confirm_switch);
260
261 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
262}
263
253/** 264/**
254 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode 265 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
255 * @ref: percpu_ref to switch to percpu mode 266 * @ref: percpu_ref to switch to percpu mode
@@ -264,17 +275,20 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
264 * dying or dead, the actual switching takes place on the following 275 * dying or dead, the actual switching takes place on the following
265 * percpu_ref_reinit(). 276 * percpu_ref_reinit().
266 * 277 *
267 * This function normally doesn't block and can be called from any context 278 * This function may block if @ref is in the process of switching to atomic
268 * but it may block if @ref is in the process of switching to atomic mode 279 * mode. If the caller ensures that @ref is not in the process of
269 * by percpu_ref_switch_atomic(). 280 * switching to atomic mode, this function can be called from any context.
270 */ 281 */
271void percpu_ref_switch_to_percpu(struct percpu_ref *ref) 282void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
272{ 283{
284 unsigned long flags;
285
286 spin_lock_irqsave(&percpu_ref_switch_lock, flags);
287
273 ref->force_atomic = false; 288 ref->force_atomic = false;
289 __percpu_ref_switch_mode(ref, NULL);
274 290
275 /* a dying or dead ref can't be switched to percpu mode w/o reinit */ 291 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
276 if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
277 __percpu_ref_switch_to_percpu(ref);
278} 292}
279 293
280/** 294/**
@@ -290,21 +304,23 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
290 * 304 *
291 * This function normally doesn't block and can be called from any context 305 * This function normally doesn't block and can be called from any context
292 * but it may block if @confirm_kill is specified and @ref is in the 306 * but it may block if @confirm_kill is specified and @ref is in the
293 * process of switching to atomic mode by percpu_ref_switch_atomic(). 307 * process of switching to atomic mode by percpu_ref_switch_to_atomic().
294 *
295 * Due to the way percpu_ref is implemented, @confirm_switch will be called
296 * after at least one full sched RCU grace period has passed but this is an
297 * implementation detail and must not be depended upon.
298 */ 308 */
299void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 309void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
300 percpu_ref_func_t *confirm_kill) 310 percpu_ref_func_t *confirm_kill)
301{ 311{
312 unsigned long flags;
313
314 spin_lock_irqsave(&percpu_ref_switch_lock, flags);
315
302 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, 316 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
303 "%s called more than once on %pf!", __func__, ref->release); 317 "%s called more than once on %pf!", __func__, ref->release);
304 318
305 ref->percpu_count_ptr |= __PERCPU_REF_DEAD; 319 ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
306 __percpu_ref_switch_to_atomic(ref, confirm_kill); 320 __percpu_ref_switch_mode(ref, confirm_kill);
307 percpu_ref_put(ref); 321 percpu_ref_put(ref);
322
323 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
308} 324}
309EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); 325EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
310 326
@@ -321,11 +337,16 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
321 */ 337 */
322void percpu_ref_reinit(struct percpu_ref *ref) 338void percpu_ref_reinit(struct percpu_ref *ref)
323{ 339{
340 unsigned long flags;
341
342 spin_lock_irqsave(&percpu_ref_switch_lock, flags);
343
324 WARN_ON_ONCE(!percpu_ref_is_zero(ref)); 344 WARN_ON_ONCE(!percpu_ref_is_zero(ref));
325 345
326 ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; 346 ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
327 percpu_ref_get(ref); 347 percpu_ref_get(ref);
328 if (!ref->force_atomic) 348 __percpu_ref_switch_mode(ref, NULL);
329 __percpu_ref_switch_to_percpu(ref); 349
350 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
330} 351}
331EXPORT_SYMBOL_GPL(percpu_ref_reinit); 352EXPORT_SYMBOL_GPL(percpu_ref_reinit);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8b7d8459bb9d..8e6d552c40dd 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,6 +38,9 @@
38#include <linux/preempt.h> /* in_interrupt() */ 38#include <linux/preempt.h> /* in_interrupt() */
39 39
40 40
41/* Number of nodes in fully populated tree of given height */
42static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
43
41/* 44/*
42 * Radix tree node cache. 45 * Radix tree node cache.
43 */ 46 */
@@ -102,10 +105,10 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent,
102 105
103#ifdef CONFIG_RADIX_TREE_MULTIORDER 106#ifdef CONFIG_RADIX_TREE_MULTIORDER
104 if (radix_tree_is_internal_node(entry)) { 107 if (radix_tree_is_internal_node(entry)) {
105 unsigned long siboff = get_slot_offset(parent, entry); 108 if (is_sibling_entry(parent, entry)) {
106 if (siboff < RADIX_TREE_MAP_SIZE) { 109 void **sibentry = (void **) entry_to_node(entry);
107 offset = siboff; 110 offset = get_slot_offset(parent, sibentry);
108 entry = rcu_dereference_raw(parent->slots[offset]); 111 entry = rcu_dereference_raw(*sibentry);
109 } 112 }
110 } 113 }
111#endif 114#endif
@@ -274,10 +277,11 @@ radix_tree_node_alloc(struct radix_tree_root *root)
274 277
275 /* 278 /*
276 * Even if the caller has preloaded, try to allocate from the 279 * Even if the caller has preloaded, try to allocate from the
277 * cache first for the new node to get accounted. 280 * cache first for the new node to get accounted to the memory
281 * cgroup.
278 */ 282 */
279 ret = kmem_cache_alloc(radix_tree_node_cachep, 283 ret = kmem_cache_alloc(radix_tree_node_cachep,
280 gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN); 284 gfp_mask | __GFP_NOWARN);
281 if (ret) 285 if (ret)
282 goto out; 286 goto out;
283 287
@@ -300,8 +304,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
300 kmemleak_update_trace(ret); 304 kmemleak_update_trace(ret);
301 goto out; 305 goto out;
302 } 306 }
303 ret = kmem_cache_alloc(radix_tree_node_cachep, 307 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
304 gfp_mask | __GFP_ACCOUNT);
305out: 308out:
306 BUG_ON(radix_tree_is_internal_node(ret)); 309 BUG_ON(radix_tree_is_internal_node(ret));
307 return ret; 310 return ret;
@@ -342,22 +345,28 @@ radix_tree_node_free(struct radix_tree_node *node)
342 * To make use of this facility, the radix tree must be initialised without 345 * To make use of this facility, the radix tree must be initialised without
343 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 346 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
344 */ 347 */
345static int __radix_tree_preload(gfp_t gfp_mask) 348static int __radix_tree_preload(gfp_t gfp_mask, int nr)
346{ 349{
347 struct radix_tree_preload *rtp; 350 struct radix_tree_preload *rtp;
348 struct radix_tree_node *node; 351 struct radix_tree_node *node;
349 int ret = -ENOMEM; 352 int ret = -ENOMEM;
350 353
354 /*
355 * Nodes preloaded by one cgroup can be be used by another cgroup, so
356 * they should never be accounted to any particular memory cgroup.
357 */
358 gfp_mask &= ~__GFP_ACCOUNT;
359
351 preempt_disable(); 360 preempt_disable();
352 rtp = this_cpu_ptr(&radix_tree_preloads); 361 rtp = this_cpu_ptr(&radix_tree_preloads);
353 while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { 362 while (rtp->nr < nr) {
354 preempt_enable(); 363 preempt_enable();
355 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 364 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
356 if (node == NULL) 365 if (node == NULL)
357 goto out; 366 goto out;
358 preempt_disable(); 367 preempt_disable();
359 rtp = this_cpu_ptr(&radix_tree_preloads); 368 rtp = this_cpu_ptr(&radix_tree_preloads);
360 if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { 369 if (rtp->nr < nr) {
361 node->private_data = rtp->nodes; 370 node->private_data = rtp->nodes;
362 rtp->nodes = node; 371 rtp->nodes = node;
363 rtp->nr++; 372 rtp->nr++;
@@ -383,7 +392,7 @@ int radix_tree_preload(gfp_t gfp_mask)
383{ 392{
384 /* Warn on non-sensical use... */ 393 /* Warn on non-sensical use... */
385 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); 394 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
386 return __radix_tree_preload(gfp_mask); 395 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
387} 396}
388EXPORT_SYMBOL(radix_tree_preload); 397EXPORT_SYMBOL(radix_tree_preload);
389 398
@@ -395,7 +404,7 @@ EXPORT_SYMBOL(radix_tree_preload);
395int radix_tree_maybe_preload(gfp_t gfp_mask) 404int radix_tree_maybe_preload(gfp_t gfp_mask)
396{ 405{
397 if (gfpflags_allow_blocking(gfp_mask)) 406 if (gfpflags_allow_blocking(gfp_mask))
398 return __radix_tree_preload(gfp_mask); 407 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
399 /* Preloading doesn't help anything with this gfp mask, skip it */ 408 /* Preloading doesn't help anything with this gfp mask, skip it */
400 preempt_disable(); 409 preempt_disable();
401 return 0; 410 return 0;
@@ -403,6 +412,51 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
403EXPORT_SYMBOL(radix_tree_maybe_preload); 412EXPORT_SYMBOL(radix_tree_maybe_preload);
404 413
405/* 414/*
415 * The same as function above, but preload number of nodes required to insert
416 * (1 << order) continuous naturally-aligned elements.
417 */
418int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
419{
420 unsigned long nr_subtrees;
421 int nr_nodes, subtree_height;
422
423 /* Preloading doesn't help anything with this gfp mask, skip it */
424 if (!gfpflags_allow_blocking(gfp_mask)) {
425 preempt_disable();
426 return 0;
427 }
428
429 /*
430 * Calculate number and height of fully populated subtrees it takes to
431 * store (1 << order) elements.
432 */
433 nr_subtrees = 1 << order;
434 for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
435 subtree_height++)
436 nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
437
438 /*
439 * The worst case is zero height tree with a single item at index 0 and
440 * then inserting items starting at ULONG_MAX - (1 << order).
441 *
442 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
443 * 0-index item.
444 */
445 nr_nodes = RADIX_TREE_MAX_PATH;
446
447 /* Plus branch to fully populated subtrees. */
448 nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
449
450 /* Root node is shared. */
451 nr_nodes--;
452
453 /* Plus nodes required to build subtrees. */
454 nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
455
456 return __radix_tree_preload(gfp_mask, nr_nodes);
457}
458
459/*
406 * The maximum index which can be stored in a radix tree 460 * The maximum index which can be stored in a radix tree
407 */ 461 */
408static inline unsigned long shift_maxindex(unsigned int shift) 462static inline unsigned long shift_maxindex(unsigned int shift)
@@ -1529,15 +1583,10 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1529} 1583}
1530EXPORT_SYMBOL(radix_tree_delete); 1584EXPORT_SYMBOL(radix_tree_delete);
1531 1585
1532struct radix_tree_node *radix_tree_replace_clear_tags( 1586void radix_tree_clear_tags(struct radix_tree_root *root,
1533 struct radix_tree_root *root, 1587 struct radix_tree_node *node,
1534 unsigned long index, void *entry) 1588 void **slot)
1535{ 1589{
1536 struct radix_tree_node *node;
1537 void **slot;
1538
1539 __radix_tree_lookup(root, index, &node, &slot);
1540
1541 if (node) { 1590 if (node) {
1542 unsigned int tag, offset = get_slot_offset(node, slot); 1591 unsigned int tag, offset = get_slot_offset(node, slot);
1543 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1592 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
@@ -1546,9 +1595,6 @@ struct radix_tree_node *radix_tree_replace_clear_tags(
1546 /* Clear root node tags */ 1595 /* Clear root node tags */
1547 root->gfp_mask &= __GFP_BITS_MASK; 1596 root->gfp_mask &= __GFP_BITS_MASK;
1548 } 1597 }
1549
1550 radix_tree_replace_slot(slot, entry);
1551 return node;
1552} 1598}
1553 1599
1554/** 1600/**
@@ -1571,6 +1617,31 @@ radix_tree_node_ctor(void *arg)
1571 INIT_LIST_HEAD(&node->private_list); 1617 INIT_LIST_HEAD(&node->private_list);
1572} 1618}
1573 1619
1620static __init unsigned long __maxindex(unsigned int height)
1621{
1622 unsigned int width = height * RADIX_TREE_MAP_SHIFT;
1623 int shift = RADIX_TREE_INDEX_BITS - width;
1624
1625 if (shift < 0)
1626 return ~0UL;
1627 if (shift >= BITS_PER_LONG)
1628 return 0UL;
1629 return ~0UL >> shift;
1630}
1631
1632static __init void radix_tree_init_maxnodes(void)
1633{
1634 unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
1635 unsigned int i, j;
1636
1637 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
1638 height_to_maxindex[i] = __maxindex(i);
1639 for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
1640 for (j = i; j > 0; j--)
1641 height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
1642 }
1643}
1644
1574static int radix_tree_callback(struct notifier_block *nfb, 1645static int radix_tree_callback(struct notifier_block *nfb,
1575 unsigned long action, void *hcpu) 1646 unsigned long action, void *hcpu)
1576{ 1647{
@@ -1597,5 +1668,6 @@ void __init radix_tree_init(void)
1597 sizeof(struct radix_tree_node), 0, 1668 sizeof(struct radix_tree_node), 0,
1598 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1669 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1599 radix_tree_node_ctor); 1670 radix_tree_node_ctor);
1671 radix_tree_init_maxnodes();
1600 hotcpu_notifier(radix_tree_callback, 0); 1672 hotcpu_notifier(radix_tree_callback, 0);
1601} 1673}
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 0a7e494b2bcd..f01b1cb04f91 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -3,3 +3,4 @@ altivec*.c
3int*.c 3int*.c
4tables.c 4tables.c
5neon?.c 5neon?.c
6s390vx?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3b10a48fa040..3057011f5599 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -3,10 +3,11 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ 3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
4 int8.o int16.o int32.o 4 int8.o int16.o int32.o
5 5
6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o 6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o 7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
8raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o 8raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
9raid6_pq-$(CONFIG_TILEGX) += tilegx8.o 9raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
10raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
10 11
11hostprogs-y += mktables 12hostprogs-y += mktables
12 13
@@ -116,6 +117,11 @@ $(obj)/tilegx8.c: UNROLL := 8
116$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE 117$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE
117 $(call if_changed,unroll) 118 $(call if_changed,unroll)
118 119
120targets += s390vx8.c
121$(obj)/s390vx8.c: UNROLL := 8
122$(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE
123 $(call if_changed,unroll)
124
119quiet_cmd_mktable = TABLE $@ 125quiet_cmd_mktable = TABLE $@
120 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) 126 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
121 127
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 975c6e0434bd..7857049fd7d3 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -49,6 +49,10 @@ const struct raid6_calls * const raid6_algos[] = {
49 &raid6_avx2x1, 49 &raid6_avx2x1,
50 &raid6_avx2x2, 50 &raid6_avx2x2,
51#endif 51#endif
52#ifdef CONFIG_AS_AVX512
53 &raid6_avx512x1,
54 &raid6_avx512x2,
55#endif
52#endif 56#endif
53#if defined(__x86_64__) && !defined(__arch_um__) 57#if defined(__x86_64__) && !defined(__arch_um__)
54 &raid6_sse2x1, 58 &raid6_sse2x1,
@@ -59,6 +63,11 @@ const struct raid6_calls * const raid6_algos[] = {
59 &raid6_avx2x2, 63 &raid6_avx2x2,
60 &raid6_avx2x4, 64 &raid6_avx2x4,
61#endif 65#endif
66#ifdef CONFIG_AS_AVX512
67 &raid6_avx512x1,
68 &raid6_avx512x2,
69 &raid6_avx512x4,
70#endif
62#endif 71#endif
63#ifdef CONFIG_ALTIVEC 72#ifdef CONFIG_ALTIVEC
64 &raid6_altivec1, 73 &raid6_altivec1,
@@ -69,6 +78,9 @@ const struct raid6_calls * const raid6_algos[] = {
69#if defined(CONFIG_TILEGX) 78#if defined(CONFIG_TILEGX)
70 &raid6_tilegx8, 79 &raid6_tilegx8,
71#endif 80#endif
81#if defined(CONFIG_S390)
82 &raid6_s390vx8,
83#endif
72 &raid6_intx1, 84 &raid6_intx1,
73 &raid6_intx2, 85 &raid6_intx2,
74 &raid6_intx4, 86 &raid6_intx4,
@@ -89,12 +101,18 @@ void (*raid6_datap_recov)(int, size_t, int, void **);
89EXPORT_SYMBOL_GPL(raid6_datap_recov); 101EXPORT_SYMBOL_GPL(raid6_datap_recov);
90 102
91const struct raid6_recov_calls *const raid6_recov_algos[] = { 103const struct raid6_recov_calls *const raid6_recov_algos[] = {
104#ifdef CONFIG_AS_AVX512
105 &raid6_recov_avx512,
106#endif
92#ifdef CONFIG_AS_AVX2 107#ifdef CONFIG_AS_AVX2
93 &raid6_recov_avx2, 108 &raid6_recov_avx2,
94#endif 109#endif
95#ifdef CONFIG_AS_SSSE3 110#ifdef CONFIG_AS_SSSE3
96 &raid6_recov_ssse3, 111 &raid6_recov_ssse3,
97#endif 112#endif
113#ifdef CONFIG_S390
114 &raid6_recov_s390xc,
115#endif
98 &raid6_recov_intx1, 116 &raid6_recov_intx1,
99 NULL 117 NULL
100}; 118};
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c
new file mode 100644
index 000000000000..f524a7972006
--- /dev/null
+++ b/lib/raid6/avx512.c
@@ -0,0 +1,569 @@
1/* -*- linux-c -*- --------------------------------------------------------
2 *
3 * Copyright (C) 2016 Intel Corporation
4 *
5 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6 * Author: Megha Dey <megha.dey@linux.intel.com>
7 *
8 * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
9 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
14 * Boston MA 02111-1307, USA; either version 2 of the License, or
15 * (at your option) any later version; incorporated herein by reference.
16 *
17 * -----------------------------------------------------------------------
18 */
19
20/*
21 * AVX512 implementation of RAID-6 syndrome functions
22 *
23 */
24
25#ifdef CONFIG_AS_AVX512
26
27#include <linux/raid/pq.h>
28#include "x86.h"
29
30static const struct raid6_avx512_constants {
31 u64 x1d[8];
32} raid6_avx512_constants __aligned(512) = {
33 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
34 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
35 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
36 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
37};
38
39static int raid6_have_avx512(void)
40{
41 return boot_cpu_has(X86_FEATURE_AVX2) &&
42 boot_cpu_has(X86_FEATURE_AVX) &&
43 boot_cpu_has(X86_FEATURE_AVX512F) &&
44 boot_cpu_has(X86_FEATURE_AVX512BW) &&
45 boot_cpu_has(X86_FEATURE_AVX512VL) &&
46 boot_cpu_has(X86_FEATURE_AVX512DQ);
47}
48
49static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
50{
51 u8 **dptr = (u8 **)ptrs;
52 u8 *p, *q;
53 int d, z, z0;
54
55 z0 = disks - 3; /* Highest data disk */
56 p = dptr[z0+1]; /* XOR parity */
57 q = dptr[z0+2]; /* RS syndrome */
58
59 kernel_fpu_begin();
60
61 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
62 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
63 :
64 : "m" (raid6_avx512_constants.x1d[0]));
65
66 for (d = 0; d < bytes; d += 64) {
67 asm volatile("prefetchnta %0\n\t"
68 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
69 "prefetchnta %1\n\t"
70 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
71 "vmovdqa64 %1,%%zmm6"
72 :
73 : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
74 for (z = z0-2; z >= 0; z--) {
75 asm volatile("prefetchnta %0\n\t"
76 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
77 "vpmovm2b %%k1,%%zmm5\n\t"
78 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
79 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
80 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
81 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
82 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
83 "vmovdqa64 %0,%%zmm6"
84 :
85 : "m" (dptr[z][d]));
86 }
87 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
88 "vpmovm2b %%k1,%%zmm5\n\t"
89 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
90 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
91 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
92 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
93 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
94 "vmovntdq %%zmm2,%0\n\t"
95 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
96 "vmovntdq %%zmm4,%1\n\t"
97 "vpxorq %%zmm4,%%zmm4,%%zmm4"
98 :
99 : "m" (p[d]), "m" (q[d]));
100 }
101
102 asm volatile("sfence" : : : "memory");
103 kernel_fpu_end();
104}
105
106static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
107 size_t bytes, void **ptrs)
108{
109 u8 **dptr = (u8 **)ptrs;
110 u8 *p, *q;
111 int d, z, z0;
112
113 z0 = stop; /* P/Q right side optimization */
114 p = dptr[disks-2]; /* XOR parity */
115 q = dptr[disks-1]; /* RS syndrome */
116
117 kernel_fpu_begin();
118
119 asm volatile("vmovdqa64 %0,%%zmm0"
120 : : "m" (raid6_avx512_constants.x1d[0]));
121
122 for (d = 0 ; d < bytes ; d += 64) {
123 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
124 "vmovdqa64 %1,%%zmm2\n\t"
125 "vpxorq %%zmm4,%%zmm2,%%zmm2"
126 :
127 : "m" (dptr[z0][d]), "m" (p[d]));
128 /* P/Q data pages */
129 for (z = z0-1 ; z >= start ; z--) {
130 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
131 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
132 "vpmovm2b %%k1,%%zmm5\n\t"
133 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
134 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
135 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
136 "vmovdqa64 %0,%%zmm5\n\t"
137 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
138 "vpxorq %%zmm5,%%zmm4,%%zmm4"
139 :
140 : "m" (dptr[z][d]));
141 }
142 /* P/Q left side optimization */
143 for (z = start-1 ; z >= 0 ; z--) {
144 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
145 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
146 "vpmovm2b %%k1,%%zmm5\n\t"
147 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
148 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
149 "vpxorq %%zmm5,%%zmm4,%%zmm4"
150 :
151 : );
152 }
153 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
154 /* Don't use movntdq for r/w memory area < cache line */
155 "vmovdqa64 %%zmm4,%0\n\t"
156 "vmovdqa64 %%zmm2,%1"
157 :
158 : "m" (q[d]), "m" (p[d]));
159 }
160
161 asm volatile("sfence" : : : "memory");
162 kernel_fpu_end();
163}
164
165const struct raid6_calls raid6_avx512x1 = {
166 raid6_avx5121_gen_syndrome,
167 raid6_avx5121_xor_syndrome,
168 raid6_have_avx512,
169 "avx512x1",
170 1 /* Has cache hints */
171};
172
173/*
174 * Unrolled-by-2 AVX512 implementation
175 */
176static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
177{
178 u8 **dptr = (u8 **)ptrs;
179 u8 *p, *q;
180 int d, z, z0;
181
182 z0 = disks - 3; /* Highest data disk */
183 p = dptr[z0+1]; /* XOR parity */
184 q = dptr[z0+2]; /* RS syndrome */
185
186 kernel_fpu_begin();
187
188 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
189 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
190 :
191 : "m" (raid6_avx512_constants.x1d[0]));
192
193 /* We uniformly assume a single prefetch covers at least 64 bytes */
194 for (d = 0; d < bytes; d += 128) {
195 asm volatile("prefetchnta %0\n\t"
196 "prefetchnta %1\n\t"
197 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
198 "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */
199 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
200 "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */
201 :
202 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
203 for (z = z0-1; z >= 0; z--) {
204 asm volatile("prefetchnta %0\n\t"
205 "prefetchnta %1\n\t"
206 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
207 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
208 "vpmovm2b %%k1,%%zmm5\n\t"
209 "vpmovm2b %%k2,%%zmm7\n\t"
210 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
211 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
212 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
213 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
214 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
215 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
216 "vmovdqa64 %0,%%zmm5\n\t"
217 "vmovdqa64 %1,%%zmm7\n\t"
218 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
219 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
220 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
221 "vpxorq %%zmm7,%%zmm6,%%zmm6"
222 :
223 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
224 }
225 asm volatile("vmovntdq %%zmm2,%0\n\t"
226 "vmovntdq %%zmm3,%1\n\t"
227 "vmovntdq %%zmm4,%2\n\t"
228 "vmovntdq %%zmm6,%3"
229 :
230 : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
231 "m" (q[d+64]));
232 }
233
234 asm volatile("sfence" : : : "memory");
235 kernel_fpu_end();
236}
237
238static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
239 size_t bytes, void **ptrs)
240{
241 u8 **dptr = (u8 **)ptrs;
242 u8 *p, *q;
243 int d, z, z0;
244
245 z0 = stop; /* P/Q right side optimization */
246 p = dptr[disks-2]; /* XOR parity */
247 q = dptr[disks-1]; /* RS syndrome */
248
249 kernel_fpu_begin();
250
251 asm volatile("vmovdqa64 %0,%%zmm0"
252 : : "m" (raid6_avx512_constants.x1d[0]));
253
254 for (d = 0 ; d < bytes ; d += 128) {
255 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
256 "vmovdqa64 %1,%%zmm6\n\t"
257 "vmovdqa64 %2,%%zmm2\n\t"
258 "vmovdqa64 %3,%%zmm3\n\t"
259 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
260 "vpxorq %%zmm6,%%zmm3,%%zmm3"
261 :
262 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
263 "m" (p[d]), "m" (p[d+64]));
264 /* P/Q data pages */
265 for (z = z0-1 ; z >= start ; z--) {
266 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
267 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
268 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
269 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
270 "vpmovm2b %%k1,%%zmm5\n\t"
271 "vpmovm2b %%k2,%%zmm7\n\t"
272 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
273 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
274 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
275 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
276 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
277 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
278 "vmovdqa64 %0,%%zmm5\n\t"
279 "vmovdqa64 %1,%%zmm7\n\t"
280 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
281 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
282 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
283 "vpxorq %%zmm7,%%zmm6,%%zmm6"
284 :
285 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
286 }
287 /* P/Q left side optimization */
288 for (z = start-1 ; z >= 0 ; z--) {
289 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
290 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
291 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
292 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
293 "vpmovm2b %%k1,%%zmm5\n\t"
294 "vpmovm2b %%k2,%%zmm7\n\t"
295 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
296 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
297 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
298 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
299 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
300 "vpxorq %%zmm7,%%zmm6,%%zmm6"
301 :
302 : );
303 }
304 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
305 "vpxorq %1,%%zmm6,%%zmm6\n\t"
306 /* Don't use movntdq for r/w
307 * memory area < cache line
308 */
309 "vmovdqa64 %%zmm4,%0\n\t"
310 "vmovdqa64 %%zmm6,%1\n\t"
311 "vmovdqa64 %%zmm2,%2\n\t"
312 "vmovdqa64 %%zmm3,%3"
313 :
314 : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
315 "m" (p[d+64]));
316 }
317
318 asm volatile("sfence" : : : "memory");
319 kernel_fpu_end();
320}
321
322const struct raid6_calls raid6_avx512x2 = {
323 raid6_avx5122_gen_syndrome,
324 raid6_avx5122_xor_syndrome,
325 raid6_have_avx512,
326 "avx512x2",
327 1 /* Has cache hints */
328};
329
330#ifdef CONFIG_X86_64
331
332/*
333 * Unrolled-by-4 AVX2 implementation
334 */
335static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
336{
337 u8 **dptr = (u8 **)ptrs;
338 u8 *p, *q;
339 int d, z, z0;
340
341 z0 = disks - 3; /* Highest data disk */
342 p = dptr[z0+1]; /* XOR parity */
343 q = dptr[z0+2]; /* RS syndrome */
344
345 kernel_fpu_begin();
346
347 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
348 "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */
349 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */
350 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */
351 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */
352 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */
353 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */
354 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */
355 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */
356 "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */
357 :
358 : "m" (raid6_avx512_constants.x1d[0]));
359
360 for (d = 0; d < bytes; d += 256) {
361 for (z = z0; z >= 0; z--) {
362 asm volatile("prefetchnta %0\n\t"
363 "prefetchnta %1\n\t"
364 "prefetchnta %2\n\t"
365 "prefetchnta %3\n\t"
366 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
367 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
368 "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
369 "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
370 "vpmovm2b %%k1,%%zmm5\n\t"
371 "vpmovm2b %%k2,%%zmm7\n\t"
372 "vpmovm2b %%k3,%%zmm13\n\t"
373 "vpmovm2b %%k4,%%zmm15\n\t"
374 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
375 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
376 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
377 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
378 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
379 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
380 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
381 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
382 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
383 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
384 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
385 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
386 "vmovdqa64 %0,%%zmm5\n\t"
387 "vmovdqa64 %1,%%zmm7\n\t"
388 "vmovdqa64 %2,%%zmm13\n\t"
389 "vmovdqa64 %3,%%zmm15\n\t"
390 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
391 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
392 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
393 "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
394 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
395 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
396 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
397 "vpxorq %%zmm15,%%zmm14,%%zmm14"
398 :
399 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
400 "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
401 }
402 asm volatile("vmovntdq %%zmm2,%0\n\t"
403 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
404 "vmovntdq %%zmm3,%1\n\t"
405 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
406 "vmovntdq %%zmm10,%2\n\t"
407 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
408 "vmovntdq %%zmm11,%3\n\t"
409 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
410 "vmovntdq %%zmm4,%4\n\t"
411 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
412 "vmovntdq %%zmm6,%5\n\t"
413 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
414 "vmovntdq %%zmm12,%6\n\t"
415 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
416 "vmovntdq %%zmm14,%7\n\t"
417 "vpxorq %%zmm14,%%zmm14,%%zmm14"
418 :
419 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
420 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
421 "m" (q[d+128]), "m" (q[d+192]));
422 }
423
424 asm volatile("sfence" : : : "memory");
425 kernel_fpu_end();
426}
427
428static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
429 size_t bytes, void **ptrs)
430{
431 u8 **dptr = (u8 **)ptrs;
432 u8 *p, *q;
433 int d, z, z0;
434
435 z0 = stop; /* P/Q right side optimization */
436 p = dptr[disks-2]; /* XOR parity */
437 q = dptr[disks-1]; /* RS syndrome */
438
439 kernel_fpu_begin();
440
441 asm volatile("vmovdqa64 %0,%%zmm0"
442 :: "m" (raid6_avx512_constants.x1d[0]));
443
444 for (d = 0 ; d < bytes ; d += 256) {
445 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
446 "vmovdqa64 %1,%%zmm6\n\t"
447 "vmovdqa64 %2,%%zmm12\n\t"
448 "vmovdqa64 %3,%%zmm14\n\t"
449 "vmovdqa64 %4,%%zmm2\n\t"
450 "vmovdqa64 %5,%%zmm3\n\t"
451 "vmovdqa64 %6,%%zmm10\n\t"
452 "vmovdqa64 %7,%%zmm11\n\t"
453 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
454 "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
455 "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
456 "vpxorq %%zmm14,%%zmm11,%%zmm11"
457 :
458 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
459 "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
460 "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
461 "m" (p[d+192]));
462 /* P/Q data pages */
463 for (z = z0-1 ; z >= start ; z--) {
464 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
465 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
466 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
467 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
468 "prefetchnta %0\n\t"
469 "prefetchnta %2\n\t"
470 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
471 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
472 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
473 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
474 "vpmovm2b %%k1,%%zmm5\n\t"
475 "vpmovm2b %%k2,%%zmm7\n\t"
476 "vpmovm2b %%k3,%%zmm13\n\t"
477 "vpmovm2b %%k4,%%zmm15\n\t"
478 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
479 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
480 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
481 "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
482 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
483 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
484 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
485 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
486 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
487 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
488 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
489 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
490 "vmovdqa64 %0,%%zmm5\n\t"
491 "vmovdqa64 %1,%%zmm7\n\t"
492 "vmovdqa64 %2,%%zmm13\n\t"
493 "vmovdqa64 %3,%%zmm15\n\t"
494 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
495 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
496 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
497 "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
498 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
499 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
500 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
501 "vpxorq %%zmm15,%%zmm14,%%zmm14"
502 :
503 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
504 "m" (dptr[z][d+128]),
505 "m" (dptr[z][d+192]));
506 }
507 asm volatile("prefetchnta %0\n\t"
508 "prefetchnta %1\n\t"
509 :
510 : "m" (q[d]), "m" (q[d+128]));
511 /* P/Q left side optimization */
512 for (z = start-1 ; z >= 0 ; z--) {
513 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
514 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
515 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
516 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
517 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
518 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
519 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
520 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
521 "vpmovm2b %%k1,%%zmm5\n\t"
522 "vpmovm2b %%k2,%%zmm7\n\t"
523 "vpmovm2b %%k3,%%zmm13\n\t"
524 "vpmovm2b %%k4,%%zmm15\n\t"
525 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
526 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
527 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
528 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
529 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
530 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
531 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
532 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
533 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
534 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
535 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
536 "vpxorq %%zmm15,%%zmm14,%%zmm14"
537 :
538 : );
539 }
540 asm volatile("vmovntdq %%zmm2,%0\n\t"
541 "vmovntdq %%zmm3,%1\n\t"
542 "vmovntdq %%zmm10,%2\n\t"
543 "vmovntdq %%zmm11,%3\n\t"
544 "vpxorq %4,%%zmm4,%%zmm4\n\t"
545 "vpxorq %5,%%zmm6,%%zmm6\n\t"
546 "vpxorq %6,%%zmm12,%%zmm12\n\t"
547 "vpxorq %7,%%zmm14,%%zmm14\n\t"
548 "vmovntdq %%zmm4,%4\n\t"
549 "vmovntdq %%zmm6,%5\n\t"
550 "vmovntdq %%zmm12,%6\n\t"
551 "vmovntdq %%zmm14,%7"
552 :
553 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
554 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
555 "m" (q[d+128]), "m" (q[d+192]));
556 }
557 asm volatile("sfence" : : : "memory");
558 kernel_fpu_end();
559}
560const struct raid6_calls raid6_avx512x4 = {
561 raid6_avx5124_gen_syndrome,
562 raid6_avx5124_xor_syndrome,
563 raid6_have_avx512,
564 "avx512x4",
565 1 /* Has cache hints */
566};
567#endif
568
569#endif /* CONFIG_AS_AVX512 */
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c
new file mode 100644
index 000000000000..625aafa33b61
--- /dev/null
+++ b/lib/raid6/recov_avx512.c
@@ -0,0 +1,388 @@
1/*
2 * Copyright (C) 2016 Intel Corporation
3 *
4 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
5 * Author: Megha Dey <megha.dey@linux.intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 */
13
14#ifdef CONFIG_AS_AVX512
15
16#include <linux/raid/pq.h>
17#include "x86.h"
18
19static int raid6_has_avx512(void)
20{
21 return boot_cpu_has(X86_FEATURE_AVX2) &&
22 boot_cpu_has(X86_FEATURE_AVX) &&
23 boot_cpu_has(X86_FEATURE_AVX512F) &&
24 boot_cpu_has(X86_FEATURE_AVX512BW) &&
25 boot_cpu_has(X86_FEATURE_AVX512VL) &&
26 boot_cpu_has(X86_FEATURE_AVX512DQ);
27}
28
29static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
30 int failb, void **ptrs)
31{
32 u8 *p, *q, *dp, *dq;
33 const u8 *pbmul; /* P multiplier table for B data */
34 const u8 *qmul; /* Q multiplier table (for both) */
35 const u8 x0f = 0x0f;
36
37 p = (u8 *)ptrs[disks-2];
38 q = (u8 *)ptrs[disks-1];
39
40 /*
41 * Compute syndrome with zero for the missing data pages
42 * Use the dead data pages as temporary storage for
43 * delta p and delta q
44 */
45
46 dp = (u8 *)ptrs[faila];
47 ptrs[faila] = (void *)raid6_empty_zero_page;
48 ptrs[disks-2] = dp;
49 dq = (u8 *)ptrs[failb];
50 ptrs[failb] = (void *)raid6_empty_zero_page;
51 ptrs[disks-1] = dq;
52
53 raid6_call.gen_syndrome(disks, bytes, ptrs);
54
55 /* Restore pointer table */
56 ptrs[faila] = dp;
57 ptrs[failb] = dq;
58 ptrs[disks-2] = p;
59 ptrs[disks-1] = q;
60
61 /* Now, pick the proper data tables */
62 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
63 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
64 raid6_gfexp[failb]]];
65
66 kernel_fpu_begin();
67
68 /* zmm0 = x0f[16] */
69 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
70
71 while (bytes) {
72#ifdef CONFIG_X86_64
73 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
74 "vmovdqa64 %1, %%zmm9\n\t"
75 "vmovdqa64 %2, %%zmm0\n\t"
76 "vmovdqa64 %3, %%zmm8\n\t"
77 "vpxorq %4, %%zmm1, %%zmm1\n\t"
78 "vpxorq %5, %%zmm9, %%zmm9\n\t"
79 "vpxorq %6, %%zmm0, %%zmm0\n\t"
80 "vpxorq %7, %%zmm8, %%zmm8"
81 :
82 : "m" (q[0]), "m" (q[64]), "m" (p[0]),
83 "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
84 "m" (dp[0]), "m" (dp[64]));
85
86 /*
87 * 1 = dq[0] ^ q[0]
88 * 9 = dq[64] ^ q[64]
89 * 0 = dp[0] ^ p[0]
90 * 8 = dp[64] ^ p[64]
91 */
92
93 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
94 "vbroadcasti64x2 %1, %%zmm5"
95 :
96 : "m" (qmul[0]), "m" (qmul[16]));
97
98 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
99 "vpsraw $4, %%zmm9, %%zmm12\n\t"
100 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
101 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
102 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
103 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
104 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
105 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
106 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
107 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
108 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
109 "vpxorq %%zmm4, %%zmm5, %%zmm5"
110 :
111 : );
112
113 /*
114 * 5 = qx[0]
115 * 15 = qx[64]
116 */
117
118 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
119 "vbroadcasti64x2 %1, %%zmm1\n\t"
120 "vpsraw $4, %%zmm0, %%zmm2\n\t"
121 "vpsraw $4, %%zmm8, %%zmm6\n\t"
122 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
123 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
124 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
125 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
126 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
127 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
128 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
129 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
130 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
131 "vpxorq %%zmm12, %%zmm13, %%zmm13"
132 :
133 : "m" (pbmul[0]), "m" (pbmul[16]));
134
135 /*
136 * 1 = pbmul[px[0]]
137 * 13 = pbmul[px[64]]
138 */
139 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
140 "vpxorq %%zmm15, %%zmm13, %%zmm13"
141 :
142 : );
143
144 /*
145 * 1 = db = DQ
146 * 13 = db[64] = DQ[64]
147 */
148 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
149 "vmovdqa64 %%zmm13,%1\n\t"
150 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
151 "vpxorq %%zmm13, %%zmm8, %%zmm8"
152 :
153 : "m" (dq[0]), "m" (dq[64]));
154
155 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
156 "vmovdqa64 %%zmm8, %1"
157 :
158 : "m" (dp[0]), "m" (dp[64]));
159
160 bytes -= 128;
161 p += 128;
162 q += 128;
163 dp += 128;
164 dq += 128;
165#else
166 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
167 "vmovdqa64 %1, %%zmm0\n\t"
168 "vpxorq %2, %%zmm1, %%zmm1\n\t"
169 "vpxorq %3, %%zmm0, %%zmm0"
170 :
171 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
172
173 /* 1 = dq ^ q; 0 = dp ^ p */
174
175 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
176 "vbroadcasti64x2 %1, %%zmm5"
177 :
178 : "m" (qmul[0]), "m" (qmul[16]));
179
180 /*
181 * 1 = dq ^ q
182 * 3 = dq ^ p >> 4
183 */
184 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
185 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
186 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
187 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
188 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
189 "vpxorq %%zmm4, %%zmm5, %%zmm5"
190 :
191 : );
192
193 /* 5 = qx */
194
195 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
196 "vbroadcasti64x2 %1, %%zmm1"
197 :
198 : "m" (pbmul[0]), "m" (pbmul[16]));
199
200 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
201 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
202 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
203 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
204 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
205 "vpxorq %%zmm4, %%zmm1, %%zmm1"
206 :
207 : );
208
209 /* 1 = pbmul[px] */
210 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
211 /* 1 = db = DQ */
212 "vmovdqa64 %%zmm1, %0\n\t"
213 :
214 : "m" (dq[0]));
215
216 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
217 "vmovdqa64 %%zmm0, %0"
218 :
219 : "m" (dp[0]));
220
221 bytes -= 64;
222 p += 64;
223 q += 64;
224 dp += 64;
225 dq += 64;
226#endif
227 }
228
229 kernel_fpu_end();
230}
231
232static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
233 void **ptrs)
234{
235 u8 *p, *q, *dq;
236 const u8 *qmul; /* Q multiplier table */
237 const u8 x0f = 0x0f;
238
239 p = (u8 *)ptrs[disks-2];
240 q = (u8 *)ptrs[disks-1];
241
242 /*
243 * Compute syndrome with zero for the missing data page
244 * Use the dead data page as temporary storage for delta q
245 */
246
247 dq = (u8 *)ptrs[faila];
248 ptrs[faila] = (void *)raid6_empty_zero_page;
249 ptrs[disks-1] = dq;
250
251 raid6_call.gen_syndrome(disks, bytes, ptrs);
252
253 /* Restore pointer table */
254 ptrs[faila] = dq;
255 ptrs[disks-1] = q;
256
257 /* Now, pick the proper data tables */
258 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
259
260 kernel_fpu_begin();
261
262 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
263
264 while (bytes) {
265#ifdef CONFIG_X86_64
266 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
267 "vmovdqa64 %1, %%zmm8\n\t"
268 "vpxorq %2, %%zmm3, %%zmm3\n\t"
269 "vpxorq %3, %%zmm8, %%zmm8"
270 :
271 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
272 "m" (q[64]));
273
274 /*
275 * 3 = q[0] ^ dq[0]
276 * 8 = q[64] ^ dq[64]
277 */
278 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
279 "vmovapd %%zmm0, %%zmm13\n\t"
280 "vbroadcasti64x2 %1, %%zmm1\n\t"
281 "vmovapd %%zmm1, %%zmm14"
282 :
283 : "m" (qmul[0]), "m" (qmul[16]));
284
285 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
286 "vpsraw $4, %%zmm8, %%zmm12\n\t"
287 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
288 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
289 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
290 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
291 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
292 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
293 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
294 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
295 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
296 "vpxorq %%zmm13, %%zmm14, %%zmm14"
297 :
298 : );
299
300 /*
301 * 1 = qmul[q[0] ^ dq[0]]
302 * 14 = qmul[q[64] ^ dq[64]]
303 */
304 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
305 "vmovdqa64 %1, %%zmm12\n\t"
306 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
307 "vpxorq %%zmm14, %%zmm12, %%zmm12"
308 :
309 : "m" (p[0]), "m" (p[64]));
310
311 /*
312 * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
313 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
314 */
315
316 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
317 "vmovdqa64 %%zmm14, %1\n\t"
318 "vmovdqa64 %%zmm2, %2\n\t"
319 "vmovdqa64 %%zmm12,%3"
320 :
321 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
322 "m" (p[64]));
323
324 bytes -= 128;
325 p += 128;
326 q += 128;
327 dq += 128;
328#else
329 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
330 "vpxorq %1, %%zmm3, %%zmm3"
331 :
332 : "m" (dq[0]), "m" (q[0]));
333
334 /* 3 = q ^ dq */
335
336 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
337 "vbroadcasti64x2 %1, %%zmm1"
338 :
339 : "m" (qmul[0]), "m" (qmul[16]));
340
341 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
342 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
343 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
344 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
345 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
346 "vpxorq %%zmm0, %%zmm1, %%zmm1"
347 :
348 : );
349
350 /* 1 = qmul[q ^ dq] */
351
352 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
353 "vpxorq %%zmm1, %%zmm2, %%zmm2"
354 :
355 : "m" (p[0]));
356
357 /* 2 = p ^ qmul[q ^ dq] */
358
359 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
360 "vmovdqa64 %%zmm2, %1"
361 :
362 : "m" (dq[0]), "m" (p[0]));
363
364 bytes -= 64;
365 p += 64;
366 q += 64;
367 dq += 64;
368#endif
369 }
370
371 kernel_fpu_end();
372}
373
374const struct raid6_recov_calls raid6_recov_avx512 = {
375 .data2 = raid6_2data_recov_avx512,
376 .datap = raid6_datap_recov_avx512,
377 .valid = raid6_has_avx512,
378#ifdef CONFIG_X86_64
379 .name = "avx512x2",
380#else
381 .name = "avx512x1",
382#endif
383 .priority = 3,
384};
385
386#else
387#warning "your version of binutils lacks AVX512 support"
388#endif
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c
new file mode 100644
index 000000000000..b042dac826cc
--- /dev/null
+++ b/lib/raid6/recov_s390xc.c
@@ -0,0 +1,116 @@
1/*
2 * RAID-6 data recovery in dual failure mode based on the XC instruction.
3 *
4 * Copyright IBM Corp. 2016
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7
8#include <linux/export.h>
9#include <linux/raid/pq.h>
10
11static inline void xor_block(u8 *p1, u8 *p2)
12{
13 typedef struct { u8 _[256]; } addrtype;
14
15 asm volatile(
16 " xc 0(256,%[p1]),0(%[p2])\n"
17 : "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2),
18 [p1] "a" (p1), [p2] "a" (p2) : "cc");
19}
20
21/* Recover two failed data blocks. */
22static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
23 int failb, void **ptrs)
24{
25 u8 *p, *q, *dp, *dq;
26 const u8 *pbmul; /* P multiplier table for B data */
27 const u8 *qmul; /* Q multiplier table (for both) */
28 int i;
29
30 p = (u8 *)ptrs[disks-2];
31 q = (u8 *)ptrs[disks-1];
32
33 /* Compute syndrome with zero for the missing data pages
34 Use the dead data pages as temporary storage for
35 delta p and delta q */
36 dp = (u8 *)ptrs[faila];
37 ptrs[faila] = (void *)raid6_empty_zero_page;
38 ptrs[disks-2] = dp;
39 dq = (u8 *)ptrs[failb];
40 ptrs[failb] = (void *)raid6_empty_zero_page;
41 ptrs[disks-1] = dq;
42
43 raid6_call.gen_syndrome(disks, bytes, ptrs);
44
45 /* Restore pointer table */
46 ptrs[faila] = dp;
47 ptrs[failb] = dq;
48 ptrs[disks-2] = p;
49 ptrs[disks-1] = q;
50
51 /* Now, pick the proper data tables */
52 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
53 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
54
55 /* Now do it... */
56 while (bytes) {
57 xor_block(dp, p);
58 xor_block(dq, q);
59 for (i = 0; i < 256; i++)
60 dq[i] = pbmul[dp[i]] ^ qmul[dq[i]];
61 xor_block(dp, dq);
62 p += 256;
63 q += 256;
64 dp += 256;
65 dq += 256;
66 bytes -= 256;
67 }
68}
69
70/* Recover failure of one data block plus the P block */
71static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
72 void **ptrs)
73{
74 u8 *p, *q, *dq;
75 const u8 *qmul; /* Q multiplier table */
76 int i;
77
78 p = (u8 *)ptrs[disks-2];
79 q = (u8 *)ptrs[disks-1];
80
81 /* Compute syndrome with zero for the missing data page
82 Use the dead data page as temporary storage for delta q */
83 dq = (u8 *)ptrs[faila];
84 ptrs[faila] = (void *)raid6_empty_zero_page;
85 ptrs[disks-1] = dq;
86
87 raid6_call.gen_syndrome(disks, bytes, ptrs);
88
89 /* Restore pointer table */
90 ptrs[faila] = dq;
91 ptrs[disks-1] = q;
92
93 /* Now, pick the proper data tables */
94 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
95
96 /* Now do it... */
97 while (bytes) {
98 xor_block(dq, q);
99 for (i = 0; i < 256; i++)
100 dq[i] = qmul[dq[i]];
101 xor_block(p, dq);
102 p += 256;
103 q += 256;
104 dq += 256;
105 bytes -= 256;
106 }
107}
108
109
110const struct raid6_recov_calls raid6_recov_s390xc = {
111 .data2 = raid6_2data_recov_s390xc,
112 .datap = raid6_datap_recov_s390xc,
113 .valid = NULL,
114 .name = "s390xc",
115 .priority = 1,
116};
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
new file mode 100644
index 000000000000..7b45191a655f
--- /dev/null
+++ b/lib/raid6/s390vx.uc
@@ -0,0 +1,168 @@
1/*
2 * raid6_vx$#.c
3 *
4 * $#-way unrolled RAID6 gen/xor functions for s390
5 * based on the vector facility
6 *
7 * Copyright IBM Corp. 2016
8 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
9 *
10 * This file is postprocessed using unroll.awk.
11 */
12
13#include <linux/raid/pq.h>
14#include <asm/fpu/api.h>
15
16asm(".include \"asm/vx-insn.h\"\n");
17
18#define NSIZE 16
19
20static inline void LOAD_CONST(void)
21{
22 asm volatile("VREPIB %v24,7");
23 asm volatile("VREPIB %v25,0x1d");
24}
25
26/*
27 * The SHLBYTE() operation shifts each of the 16 bytes in
28 * vector register y left by 1 bit and stores the result in
29 * vector register x.
30 */
31static inline void SHLBYTE(int x, int y)
32{
33 asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
34}
35
36/*
37 * For each of the 16 bytes in the vector register y the MASK()
38 * operation returns 0xFF if the high bit of the byte is 1,
39 * or 0x00 if the high bit is 0. The result is stored in vector
40 * register x.
41 */
42static inline void MASK(int x, int y)
43{
44 asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y));
45}
46
47static inline void AND(int x, int y, int z)
48{
49 asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
50}
51
52static inline void XOR(int x, int y, int z)
53{
54 asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
55}
56
57static inline void LOAD_DATA(int x, int n, u8 *ptr)
58{
59 typedef struct { u8 _[16*n]; } addrtype;
60 register addrtype *__ptr asm("1") = (addrtype *) ptr;
61
62 asm volatile ("VLM %2,%3,0,%r1"
63 : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
64}
65
66static inline void STORE_DATA(int x, int n, u8 *ptr)
67{
68 typedef struct { u8 _[16*n]; } addrtype;
69 register addrtype *__ptr asm("1") = (addrtype *) ptr;
70
71 asm volatile ("VSTM %2,%3,0,1"
72 : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
73}
74
75static inline void COPY_VEC(int x, int y)
76{
77 asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
78}
79
80static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
81{
82 struct kernel_fpu vxstate;
83 u8 **dptr, *p, *q;
84 int d, z, z0;
85
86 kernel_fpu_begin(&vxstate, KERNEL_VXR);
87 LOAD_CONST();
88
89 dptr = (u8 **) ptrs;
90 z0 = disks - 3; /* Highest data disk */
91 p = dptr[z0 + 1]; /* XOR parity */
92 q = dptr[z0 + 2]; /* RS syndrome */
93
94 for (d = 0; d < bytes; d += $#*NSIZE) {
95 LOAD_DATA(0,$#,&dptr[z0][d]);
96 COPY_VEC(8+$$,0+$$);
97 for (z = z0 - 1; z >= 0; z--) {
98 MASK(16+$$,8+$$);
99 AND(16+$$,16+$$,25);
100 SHLBYTE(8+$$,8+$$);
101 XOR(8+$$,8+$$,16+$$);
102 LOAD_DATA(16,$#,&dptr[z][d]);
103 XOR(0+$$,0+$$,16+$$);
104 XOR(8+$$,8+$$,16+$$);
105 }
106 STORE_DATA(0,$#,&p[d]);
107 STORE_DATA(8,$#,&q[d]);
108 }
109 kernel_fpu_end(&vxstate, KERNEL_VXR);
110}
111
112static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
113 size_t bytes, void **ptrs)
114{
115 struct kernel_fpu vxstate;
116 u8 **dptr, *p, *q;
117 int d, z, z0;
118
119 dptr = (u8 **) ptrs;
120 z0 = stop; /* P/Q right side optimization */
121 p = dptr[disks - 2]; /* XOR parity */
122 q = dptr[disks - 1]; /* RS syndrome */
123
124 kernel_fpu_begin(&vxstate, KERNEL_VXR);
125 LOAD_CONST();
126
127 for (d = 0; d < bytes; d += $#*NSIZE) {
128 /* P/Q data pages */
129 LOAD_DATA(0,$#,&dptr[z0][d]);
130 COPY_VEC(8+$$,0+$$);
131 for (z = z0 - 1; z >= start; z--) {
132 MASK(16+$$,8+$$);
133 AND(16+$$,16+$$,25);
134 SHLBYTE(8+$$,8+$$);
135 XOR(8+$$,8+$$,16+$$);
136 LOAD_DATA(16,$#,&dptr[z][d]);
137 XOR(0+$$,0+$$,16+$$);
138 XOR(8+$$,8+$$,16+$$);
139 }
140 /* P/Q left side optimization */
141 for (z = start - 1; z >= 0; z--) {
142 MASK(16+$$,8+$$);
143 AND(16+$$,16+$$,25);
144 SHLBYTE(8+$$,8+$$);
145 XOR(8+$$,8+$$,16+$$);
146 }
147 LOAD_DATA(16,$#,&p[d]);
148 XOR(16+$$,16+$$,0+$$);
149 STORE_DATA(16,$#,&p[d]);
150 LOAD_DATA(16,$#,&q[d]);
151 XOR(16+$$,16+$$,8+$$);
152 STORE_DATA(16,$#,&q[d]);
153 }
154 kernel_fpu_end(&vxstate, KERNEL_VXR);
155}
156
157static int raid6_s390vx$#_valid(void)
158{
159 return MACHINE_HAS_VX;
160}
161
162const struct raid6_calls raid6_s390vx$# = {
163 raid6_s390vx$#_gen_syndrome,
164 raid6_s390vx$#_xor_syndrome,
165 raid6_s390vx$#_valid,
166 "vx128x$#",
167 1
168};
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 29090f3db677..2c7b60edea04 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -32,10 +32,13 @@ ifeq ($(ARCH),arm64)
32endif 32endif
33 33
34ifeq ($(IS_X86),yes) 34ifeq ($(IS_X86),yes)
35 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o 35 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
36 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ 36 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
37 gcc -c -x assembler - >&/dev/null && \ 37 gcc -c -x assembler - >&/dev/null && \
38 rm ./-.o && echo -DCONFIG_AS_AVX2=1) 38 rm ./-.o && echo -DCONFIG_AS_AVX2=1)
39 CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \
40 gcc -c -x assembler - >&/dev/null && \
41 rm ./-.o && echo -DCONFIG_AS_AVX512=1)
39else ifeq ($(HAS_NEON),yes) 42else ifeq ($(HAS_NEON),yes)
40 OBJS += neon.o neon1.o neon2.o neon4.o neon8.o 43 OBJS += neon.o neon1.o neon2.o neon4.o neon8.o
41 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 44 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 3bebbabdb510..b07f4d8e6b03 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -21,12 +21,13 @@
21 21
22#define NDISKS 16 /* Including P and Q */ 22#define NDISKS 16 /* Including P and Q */
23 23
24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
25struct raid6_calls raid6_call; 25struct raid6_calls raid6_call;
26 26
27char *dataptrs[NDISKS]; 27char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE]; 28char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; 29char recovi[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
30char recovj[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
30 31
31static void makedata(int start, int stop) 32static void makedata(int start, int stop)
32{ 33{
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index 8fe9d9662abb..834d268a4b05 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void)
46#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ 46#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
47#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ 47#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
48#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ 48#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
49#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */
50#define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular)
51 * Instructions
52 */
53#define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular)
54 * Instructions
55 */
56#define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length)
57 * Extensions
58 */
49#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 59#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
50 60
51/* Should work well enough on modern CPUs for testing */ 61/* Should work well enough on modern CPUs for testing */
diff --git a/lib/random32.c b/lib/random32.c
index 510d1ce7d4d2..fa594b1140e6 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -47,7 +47,7 @@ static inline void prandom_state_selftest(void)
47} 47}
48#endif 48#endif
49 49
50static DEFINE_PER_CPU(struct rnd_state, net_rand_state); 50static DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
51 51
52/** 52/**
53 * prandom_u32_state - seeded pseudo-random number generator. 53 * prandom_u32_state - seeded pseudo-random number generator.
@@ -81,7 +81,7 @@ u32 prandom_u32(void)
81 u32 res; 81 u32 res;
82 82
83 res = prandom_u32_state(state); 83 res = prandom_u32_state(state);
84 put_cpu_var(state); 84 put_cpu_var(net_rand_state);
85 85
86 return res; 86 return res;
87} 87}
@@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes)
128 struct rnd_state *state = &get_cpu_var(net_rand_state); 128 struct rnd_state *state = &get_cpu_var(net_rand_state);
129 129
130 prandom_bytes_state(state, buf, bytes); 130 prandom_bytes_state(state, buf, bytes);
131 put_cpu_var(state); 131 put_cpu_var(net_rand_state);
132} 132}
133EXPORT_SYMBOL(prandom_bytes); 133EXPORT_SYMBOL(prandom_bytes);
134 134
@@ -233,7 +233,6 @@ static void __prandom_timer(unsigned long dontcare)
233 233
234static void __init __prandom_start_seed_timer(void) 234static void __init __prandom_start_seed_timer(void)
235{ 235{
236 set_timer_slack(&seed_timer, HZ);
237 seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC); 236 seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
238 add_timer(&seed_timer); 237 add_timer(&seed_timer);
239} 238}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 2c5de86460c5..08f8043cac61 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -46,12 +46,14 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
46 rs->begin = jiffies; 46 rs->begin = jiffies;
47 47
48 if (time_is_before_jiffies(rs->begin + rs->interval)) { 48 if (time_is_before_jiffies(rs->begin + rs->interval)) {
49 if (rs->missed) 49 if (rs->missed) {
50 printk(KERN_WARNING "%s: %d callbacks suppressed\n", 50 if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
51 func, rs->missed); 51 pr_warn("%s: %d callbacks suppressed\n", func, rs->missed);
52 rs->missed = 0;
53 }
54 }
52 rs->begin = jiffies; 55 rs->begin = jiffies;
53 rs->printed = 0; 56 rs->printed = 0;
54 rs->missed = 0;
55 } 57 }
56 if (rs->burst && rs->burst > rs->printed) { 58 if (rs->burst && rs->burst > rs->printed) {
57 rs->printed++; 59 rs->printed++;
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1356454e36de..eb8a19fee110 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -539,17 +539,39 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
539{ 539{
540 struct rb_node *parent = rb_parent(victim); 540 struct rb_node *parent = rb_parent(victim);
541 541
542 /* Copy the pointers/colour from the victim to the replacement */
543 *new = *victim;
544
542 /* Set the surrounding nodes to point to the replacement */ 545 /* Set the surrounding nodes to point to the replacement */
543 __rb_change_child(victim, new, parent, root);
544 if (victim->rb_left) 546 if (victim->rb_left)
545 rb_set_parent(victim->rb_left, new); 547 rb_set_parent(victim->rb_left, new);
546 if (victim->rb_right) 548 if (victim->rb_right)
547 rb_set_parent(victim->rb_right, new); 549 rb_set_parent(victim->rb_right, new);
550 __rb_change_child(victim, new, parent, root);
551}
552EXPORT_SYMBOL(rb_replace_node);
553
554void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
555 struct rb_root *root)
556{
557 struct rb_node *parent = rb_parent(victim);
548 558
549 /* Copy the pointers/colour from the victim to the replacement */ 559 /* Copy the pointers/colour from the victim to the replacement */
550 *new = *victim; 560 *new = *victim;
561
562 /* Set the surrounding nodes to point to the replacement */
563 if (victim->rb_left)
564 rb_set_parent(victim->rb_left, new);
565 if (victim->rb_right)
566 rb_set_parent(victim->rb_right, new);
567
568 /* Set the parent's pointer to the new node last after an RCU barrier
569 * so that the pointers onwards are seen to be set correctly when doing
570 * an RCU walk over the tree.
571 */
572 __rb_change_child_rcu(victim, new, parent, root);
551} 573}
552EXPORT_SYMBOL(rb_replace_node); 574EXPORT_SYMBOL(rb_replace_node_rcu);
553 575
554static struct rb_node *rb_left_deepest_node(const struct rb_node *node) 576static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
555{ 577{
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 5d845ffd7982..32d0ad058380 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -30,7 +30,7 @@
30 30
31#define HASH_DEFAULT_SIZE 64UL 31#define HASH_DEFAULT_SIZE 64UL
32#define HASH_MIN_SIZE 4U 32#define HASH_MIN_SIZE 4U
33#define BUCKET_LOCKS_PER_CPU 128UL 33#define BUCKET_LOCKS_PER_CPU 32UL
34 34
35static u32 head_hashfn(struct rhashtable *ht, 35static u32 head_hashfn(struct rhashtable *ht,
36 const struct bucket_table *tbl, 36 const struct bucket_table *tbl,
@@ -70,21 +70,25 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
70 unsigned int nr_pcpus = num_possible_cpus(); 70 unsigned int nr_pcpus = num_possible_cpus();
71#endif 71#endif
72 72
73 nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); 73 nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
74 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); 74 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
75 75
76 /* Never allocate more than 0.5 locks per bucket */ 76 /* Never allocate more than 0.5 locks per bucket */
77 size = min_t(unsigned int, size, tbl->size >> 1); 77 size = min_t(unsigned int, size, tbl->size >> 1);
78 78
79 if (sizeof(spinlock_t) != 0) { 79 if (sizeof(spinlock_t) != 0) {
80 tbl->locks = NULL;
80#ifdef CONFIG_NUMA 81#ifdef CONFIG_NUMA
81 if (size * sizeof(spinlock_t) > PAGE_SIZE && 82 if (size * sizeof(spinlock_t) > PAGE_SIZE &&
82 gfp == GFP_KERNEL) 83 gfp == GFP_KERNEL)
83 tbl->locks = vmalloc(size * sizeof(spinlock_t)); 84 tbl->locks = vmalloc(size * sizeof(spinlock_t));
84 else
85#endif 85#endif
86 tbl->locks = kmalloc_array(size, sizeof(spinlock_t), 86 if (gfp != GFP_KERNEL)
87 gfp); 87 gfp |= __GFP_NOWARN | __GFP_NORETRY;
88
89 if (!tbl->locks)
90 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
91 gfp);
88 if (!tbl->locks) 92 if (!tbl->locks)
89 return -ENOMEM; 93 return -ENOMEM;
90 for (i = 0; i < size; i++) 94 for (i = 0; i < size; i++)
@@ -321,12 +325,14 @@ static int rhashtable_expand(struct rhashtable *ht)
321static int rhashtable_shrink(struct rhashtable *ht) 325static int rhashtable_shrink(struct rhashtable *ht)
322{ 326{
323 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 327 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
324 unsigned int size; 328 unsigned int nelems = atomic_read(&ht->nelems);
329 unsigned int size = 0;
325 int err; 330 int err;
326 331
327 ASSERT_RHT_MUTEX(ht); 332 ASSERT_RHT_MUTEX(ht);
328 333
329 size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); 334 if (nelems)
335 size = roundup_pow_of_two(nelems * 3 / 2);
330 if (size < ht->p.min_size) 336 if (size < ht->p.min_size)
331 size = ht->p.min_size; 337 size = ht->p.min_size;
332 338
@@ -372,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work)
372 schedule_work(&ht->run_work); 378 schedule_work(&ht->run_work);
373} 379}
374 380
375static bool rhashtable_check_elasticity(struct rhashtable *ht, 381static int rhashtable_insert_rehash(struct rhashtable *ht,
376 struct bucket_table *tbl, 382 struct bucket_table *tbl)
377 unsigned int hash)
378{
379 unsigned int elasticity = ht->elasticity;
380 struct rhash_head *head;
381
382 rht_for_each(head, tbl, hash)
383 if (!--elasticity)
384 return true;
385
386 return false;
387}
388
389int rhashtable_insert_rehash(struct rhashtable *ht,
390 struct bucket_table *tbl)
391{ 383{
392 struct bucket_table *old_tbl; 384 struct bucket_table *old_tbl;
393 struct bucket_table *new_tbl; 385 struct bucket_table *new_tbl;
@@ -433,61 +425,172 @@ fail:
433 425
434 return err; 426 return err;
435} 427}
436EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
437 428
438struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, 429static void *rhashtable_lookup_one(struct rhashtable *ht,
439 const void *key, 430 struct bucket_table *tbl, unsigned int hash,
440 struct rhash_head *obj, 431 const void *key, struct rhash_head *obj)
441 struct bucket_table *tbl)
442{ 432{
433 struct rhashtable_compare_arg arg = {
434 .ht = ht,
435 .key = key,
436 };
437 struct rhash_head __rcu **pprev;
443 struct rhash_head *head; 438 struct rhash_head *head;
444 unsigned int hash; 439 int elasticity;
445 int err;
446 440
447 tbl = rhashtable_last_table(ht, tbl); 441 elasticity = ht->elasticity;
448 hash = head_hashfn(ht, tbl, obj); 442 pprev = &tbl->buckets[hash];
449 spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); 443 rht_for_each(head, tbl, hash) {
444 struct rhlist_head *list;
445 struct rhlist_head *plist;
450 446
451 err = -EEXIST; 447 elasticity--;
452 if (key && rhashtable_lookup_fast(ht, key, ht->p)) 448 if (!key ||
453 goto exit; 449 (ht->p.obj_cmpfn ?
450 ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
451 rhashtable_compare(&arg, rht_obj(ht, head))))
452 continue;
454 453
455 err = -E2BIG; 454 if (!ht->rhlist)
456 if (unlikely(rht_grow_above_max(ht, tbl))) 455 return rht_obj(ht, head);
457 goto exit; 456
457 list = container_of(obj, struct rhlist_head, rhead);
458 plist = container_of(head, struct rhlist_head, rhead);
459
460 RCU_INIT_POINTER(list->next, plist);
461 head = rht_dereference_bucket(head->next, tbl, hash);
462 RCU_INIT_POINTER(list->rhead.next, head);
463 rcu_assign_pointer(*pprev, obj);
458 464
459 err = -EAGAIN; 465 return NULL;
460 if (rhashtable_check_elasticity(ht, tbl, hash) || 466 }
461 rht_grow_above_100(ht, tbl))
462 goto exit;
463 467
464 err = 0; 468 if (elasticity <= 0)
469 return ERR_PTR(-EAGAIN);
470
471 return ERR_PTR(-ENOENT);
472}
473
474static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
475 struct bucket_table *tbl,
476 unsigned int hash,
477 struct rhash_head *obj,
478 void *data)
479{
480 struct bucket_table *new_tbl;
481 struct rhash_head *head;
482
483 if (!IS_ERR_OR_NULL(data))
484 return ERR_PTR(-EEXIST);
485
486 if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
487 return ERR_CAST(data);
488
489 new_tbl = rcu_dereference(tbl->future_tbl);
490 if (new_tbl)
491 return new_tbl;
492
493 if (PTR_ERR(data) != -ENOENT)
494 return ERR_CAST(data);
495
496 if (unlikely(rht_grow_above_max(ht, tbl)))
497 return ERR_PTR(-E2BIG);
498
499 if (unlikely(rht_grow_above_100(ht, tbl)))
500 return ERR_PTR(-EAGAIN);
465 501
466 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); 502 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
467 503
468 RCU_INIT_POINTER(obj->next, head); 504 RCU_INIT_POINTER(obj->next, head);
505 if (ht->rhlist) {
506 struct rhlist_head *list;
507
508 list = container_of(obj, struct rhlist_head, rhead);
509 RCU_INIT_POINTER(list->next, NULL);
510 }
469 511
470 rcu_assign_pointer(tbl->buckets[hash], obj); 512 rcu_assign_pointer(tbl->buckets[hash], obj);
471 513
472 atomic_inc(&ht->nelems); 514 atomic_inc(&ht->nelems);
515 if (rht_grow_above_75(ht, tbl))
516 schedule_work(&ht->run_work);
473 517
474exit: 518 return NULL;
475 spin_unlock(rht_bucket_lock(tbl, hash)); 519}
476 520
477 if (err == 0) 521static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
478 return NULL; 522 struct rhash_head *obj)
479 else if (err == -EAGAIN) 523{
480 return tbl; 524 struct bucket_table *new_tbl;
481 else 525 struct bucket_table *tbl;
482 return ERR_PTR(err); 526 unsigned int hash;
527 spinlock_t *lock;
528 void *data;
529
530 tbl = rcu_dereference(ht->tbl);
531
532 /* All insertions must grab the oldest table containing
533 * the hashed bucket that is yet to be rehashed.
534 */
535 for (;;) {
536 hash = rht_head_hashfn(ht, tbl, obj, ht->p);
537 lock = rht_bucket_lock(tbl, hash);
538 spin_lock_bh(lock);
539
540 if (tbl->rehash <= hash)
541 break;
542
543 spin_unlock_bh(lock);
544 tbl = rcu_dereference(tbl->future_tbl);
545 }
546
547 data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
548 new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
549 if (PTR_ERR(new_tbl) != -EEXIST)
550 data = ERR_CAST(new_tbl);
551
552 while (!IS_ERR_OR_NULL(new_tbl)) {
553 tbl = new_tbl;
554 hash = rht_head_hashfn(ht, tbl, obj, ht->p);
555 spin_lock_nested(rht_bucket_lock(tbl, hash),
556 SINGLE_DEPTH_NESTING);
557
558 data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
559 new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
560 if (PTR_ERR(new_tbl) != -EEXIST)
561 data = ERR_CAST(new_tbl);
562
563 spin_unlock(rht_bucket_lock(tbl, hash));
564 }
565
566 spin_unlock_bh(lock);
567
568 if (PTR_ERR(data) == -EAGAIN)
569 data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
570 -EAGAIN);
571
572 return data;
573}
574
575void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
576 struct rhash_head *obj)
577{
578 void *data;
579
580 do {
581 rcu_read_lock();
582 data = rhashtable_try_insert(ht, key, obj);
583 rcu_read_unlock();
584 } while (PTR_ERR(data) == -EAGAIN);
585
586 return data;
483} 587}
484EXPORT_SYMBOL_GPL(rhashtable_insert_slow); 588EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
485 589
486/** 590/**
487 * rhashtable_walk_init - Initialise an iterator 591 * rhashtable_walk_enter - Initialise an iterator
488 * @ht: Table to walk over 592 * @ht: Table to walk over
489 * @iter: Hash table Iterator 593 * @iter: Hash table Iterator
490 * @gfp: GFP flags for allocations
491 * 594 *
492 * This function prepares a hash table walk. 595 * This function prepares a hash table walk.
493 * 596 *
@@ -502,30 +605,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
502 * This function may sleep so you must not call it from interrupt 605 * This function may sleep so you must not call it from interrupt
503 * context or with spin locks held. 606 * context or with spin locks held.
504 * 607 *
505 * You must call rhashtable_walk_exit if this function returns 608 * You must call rhashtable_walk_exit after this function returns.
506 * successfully.
507 */ 609 */
508int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, 610void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
509 gfp_t gfp)
510{ 611{
511 iter->ht = ht; 612 iter->ht = ht;
512 iter->p = NULL; 613 iter->p = NULL;
513 iter->slot = 0; 614 iter->slot = 0;
514 iter->skip = 0; 615 iter->skip = 0;
515 616
516 iter->walker = kmalloc(sizeof(*iter->walker), gfp);
517 if (!iter->walker)
518 return -ENOMEM;
519
520 spin_lock(&ht->lock); 617 spin_lock(&ht->lock);
521 iter->walker->tbl = 618 iter->walker.tbl =
522 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); 619 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
523 list_add(&iter->walker->list, &iter->walker->tbl->walkers); 620 list_add(&iter->walker.list, &iter->walker.tbl->walkers);
524 spin_unlock(&ht->lock); 621 spin_unlock(&ht->lock);
525
526 return 0;
527} 622}
528EXPORT_SYMBOL_GPL(rhashtable_walk_init); 623EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
529 624
530/** 625/**
531 * rhashtable_walk_exit - Free an iterator 626 * rhashtable_walk_exit - Free an iterator
@@ -536,10 +631,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
536void rhashtable_walk_exit(struct rhashtable_iter *iter) 631void rhashtable_walk_exit(struct rhashtable_iter *iter)
537{ 632{
538 spin_lock(&iter->ht->lock); 633 spin_lock(&iter->ht->lock);
539 if (iter->walker->tbl) 634 if (iter->walker.tbl)
540 list_del(&iter->walker->list); 635 list_del(&iter->walker.list);
541 spin_unlock(&iter->ht->lock); 636 spin_unlock(&iter->ht->lock);
542 kfree(iter->walker);
543} 637}
544EXPORT_SYMBOL_GPL(rhashtable_walk_exit); 638EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
545 639
@@ -565,12 +659,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
565 rcu_read_lock(); 659 rcu_read_lock();
566 660
567 spin_lock(&ht->lock); 661 spin_lock(&ht->lock);
568 if (iter->walker->tbl) 662 if (iter->walker.tbl)
569 list_del(&iter->walker->list); 663 list_del(&iter->walker.list);
570 spin_unlock(&ht->lock); 664 spin_unlock(&ht->lock);
571 665
572 if (!iter->walker->tbl) { 666 if (!iter->walker.tbl) {
573 iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); 667 iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
574 return -EAGAIN; 668 return -EAGAIN;
575 } 669 }
576 670
@@ -592,12 +686,17 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
592 */ 686 */
593void *rhashtable_walk_next(struct rhashtable_iter *iter) 687void *rhashtable_walk_next(struct rhashtable_iter *iter)
594{ 688{
595 struct bucket_table *tbl = iter->walker->tbl; 689 struct bucket_table *tbl = iter->walker.tbl;
690 struct rhlist_head *list = iter->list;
596 struct rhashtable *ht = iter->ht; 691 struct rhashtable *ht = iter->ht;
597 struct rhash_head *p = iter->p; 692 struct rhash_head *p = iter->p;
693 bool rhlist = ht->rhlist;
598 694
599 if (p) { 695 if (p) {
600 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); 696 if (!rhlist || !(list = rcu_dereference(list->next))) {
697 p = rcu_dereference(p->next);
698 list = container_of(p, struct rhlist_head, rhead);
699 }
601 goto next; 700 goto next;
602 } 701 }
603 702
@@ -605,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
605 int skip = iter->skip; 704 int skip = iter->skip;
606 705
607 rht_for_each_rcu(p, tbl, iter->slot) { 706 rht_for_each_rcu(p, tbl, iter->slot) {
707 if (rhlist) {
708 list = container_of(p, struct rhlist_head,
709 rhead);
710 do {
711 if (!skip)
712 goto next;
713 skip--;
714 list = rcu_dereference(list->next);
715 } while (list);
716
717 continue;
718 }
608 if (!skip) 719 if (!skip)
609 break; 720 break;
610 skip--; 721 skip--;
@@ -614,7 +725,8 @@ next:
614 if (!rht_is_a_nulls(p)) { 725 if (!rht_is_a_nulls(p)) {
615 iter->skip++; 726 iter->skip++;
616 iter->p = p; 727 iter->p = p;
617 return rht_obj(ht, p); 728 iter->list = list;
729 return rht_obj(ht, rhlist ? &list->rhead : p);
618 } 730 }
619 731
620 iter->skip = 0; 732 iter->skip = 0;
@@ -625,8 +737,8 @@ next:
625 /* Ensure we see any new tables. */ 737 /* Ensure we see any new tables. */
626 smp_rmb(); 738 smp_rmb();
627 739
628 iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); 740 iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
629 if (iter->walker->tbl) { 741 if (iter->walker.tbl) {
630 iter->slot = 0; 742 iter->slot = 0;
631 iter->skip = 0; 743 iter->skip = 0;
632 return ERR_PTR(-EAGAIN); 744 return ERR_PTR(-EAGAIN);
@@ -646,7 +758,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
646 __releases(RCU) 758 __releases(RCU)
647{ 759{
648 struct rhashtable *ht; 760 struct rhashtable *ht;
649 struct bucket_table *tbl = iter->walker->tbl; 761 struct bucket_table *tbl = iter->walker.tbl;
650 762
651 if (!tbl) 763 if (!tbl)
652 goto out; 764 goto out;
@@ -655,9 +767,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
655 767
656 spin_lock(&ht->lock); 768 spin_lock(&ht->lock);
657 if (tbl->rehash < tbl->size) 769 if (tbl->rehash < tbl->size)
658 list_add(&iter->walker->list, &tbl->walkers); 770 list_add(&iter->walker.list, &tbl->walkers);
659 else 771 else
660 iter->walker->tbl = NULL; 772 iter->walker.tbl = NULL;
661 spin_unlock(&ht->lock); 773 spin_unlock(&ht->lock);
662 774
663 iter->p = NULL; 775 iter->p = NULL;
@@ -803,6 +915,48 @@ int rhashtable_init(struct rhashtable *ht,
803EXPORT_SYMBOL_GPL(rhashtable_init); 915EXPORT_SYMBOL_GPL(rhashtable_init);
804 916
805/** 917/**
918 * rhltable_init - initialize a new hash list table
919 * @hlt: hash list table to be initialized
920 * @params: configuration parameters
921 *
922 * Initializes a new hash list table.
923 *
924 * See documentation for rhashtable_init.
925 */
926int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
927{
928 int err;
929
930 /* No rhlist NULLs marking for now. */
931 if (params->nulls_base)
932 return -EINVAL;
933
934 err = rhashtable_init(&hlt->ht, params);
935 hlt->ht.rhlist = true;
936 return err;
937}
938EXPORT_SYMBOL_GPL(rhltable_init);
939
940static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
941 void (*free_fn)(void *ptr, void *arg),
942 void *arg)
943{
944 struct rhlist_head *list;
945
946 if (!ht->rhlist) {
947 free_fn(rht_obj(ht, obj), arg);
948 return;
949 }
950
951 list = container_of(obj, struct rhlist_head, rhead);
952 do {
953 obj = &list->rhead;
954 list = rht_dereference(list->next, ht);
955 free_fn(rht_obj(ht, obj), arg);
956 } while (list);
957}
958
959/**
806 * rhashtable_free_and_destroy - free elements and destroy hash table 960 * rhashtable_free_and_destroy - free elements and destroy hash table
807 * @ht: the hash table to destroy 961 * @ht: the hash table to destroy
808 * @free_fn: callback to release resources of element 962 * @free_fn: callback to release resources of element
@@ -839,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
839 pos = next, 993 pos = next,
840 next = !rht_is_a_nulls(pos) ? 994 next = !rht_is_a_nulls(pos) ?
841 rht_dereference(pos->next, ht) : NULL) 995 rht_dereference(pos->next, ht) : NULL)
842 free_fn(rht_obj(ht, pos), arg); 996 rhashtable_free_one(ht, pos, free_fn, arg);
843 } 997 }
844 } 998 }
845 999
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
new file mode 100644
index 000000000000..2cecf05c82fd
--- /dev/null
+++ b/lib/sbitmap.c
@@ -0,0 +1,347 @@
1/*
2 * Copyright (C) 2016 Facebook
3 * Copyright (C) 2013-2014 Jens Axboe
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18#include <linux/random.h>
19#include <linux/sbitmap.h>
20
21int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
22 gfp_t flags, int node)
23{
24 unsigned int bits_per_word;
25 unsigned int i;
26
27 if (shift < 0) {
28 shift = ilog2(BITS_PER_LONG);
29 /*
30 * If the bitmap is small, shrink the number of bits per word so
31 * we spread over a few cachelines, at least. If less than 4
32 * bits, just forget about it, it's not going to work optimally
33 * anyway.
34 */
35 if (depth >= 4) {
36 while ((4U << shift) > depth)
37 shift--;
38 }
39 }
40 bits_per_word = 1U << shift;
41 if (bits_per_word > BITS_PER_LONG)
42 return -EINVAL;
43
44 sb->shift = shift;
45 sb->depth = depth;
46 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
47
48 if (depth == 0) {
49 sb->map = NULL;
50 return 0;
51 }
52
53 sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
54 if (!sb->map)
55 return -ENOMEM;
56
57 for (i = 0; i < sb->map_nr; i++) {
58 sb->map[i].depth = min(depth, bits_per_word);
59 depth -= sb->map[i].depth;
60 }
61 return 0;
62}
63EXPORT_SYMBOL_GPL(sbitmap_init_node);
64
65void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
66{
67 unsigned int bits_per_word = 1U << sb->shift;
68 unsigned int i;
69
70 sb->depth = depth;
71 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
72
73 for (i = 0; i < sb->map_nr; i++) {
74 sb->map[i].depth = min(depth, bits_per_word);
75 depth -= sb->map[i].depth;
76 }
77}
78EXPORT_SYMBOL_GPL(sbitmap_resize);
79
80static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
81 bool wrap)
82{
83 unsigned int orig_hint = hint;
84 int nr;
85
86 while (1) {
87 nr = find_next_zero_bit(&word->word, word->depth, hint);
88 if (unlikely(nr >= word->depth)) {
89 /*
90 * We started with an offset, and we didn't reset the
91 * offset to 0 in a failure case, so start from 0 to
92 * exhaust the map.
93 */
94 if (orig_hint && hint && wrap) {
95 hint = orig_hint = 0;
96 continue;
97 }
98 return -1;
99 }
100
101 if (!test_and_set_bit(nr, &word->word))
102 break;
103
104 hint = nr + 1;
105 if (hint >= word->depth - 1)
106 hint = 0;
107 }
108
109 return nr;
110}
111
112int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
113{
114 unsigned int i, index;
115 int nr = -1;
116
117 index = SB_NR_TO_INDEX(sb, alloc_hint);
118
119 for (i = 0; i < sb->map_nr; i++) {
120 nr = __sbitmap_get_word(&sb->map[index],
121 SB_NR_TO_BIT(sb, alloc_hint),
122 !round_robin);
123 if (nr != -1) {
124 nr += index << sb->shift;
125 break;
126 }
127
128 /* Jump to next index. */
129 index++;
130 alloc_hint = index << sb->shift;
131
132 if (index >= sb->map_nr) {
133 index = 0;
134 alloc_hint = 0;
135 }
136 }
137
138 return nr;
139}
140EXPORT_SYMBOL_GPL(sbitmap_get);
141
142bool sbitmap_any_bit_set(const struct sbitmap *sb)
143{
144 unsigned int i;
145
146 for (i = 0; i < sb->map_nr; i++) {
147 if (sb->map[i].word)
148 return true;
149 }
150 return false;
151}
152EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
153
154bool sbitmap_any_bit_clear(const struct sbitmap *sb)
155{
156 unsigned int i;
157
158 for (i = 0; i < sb->map_nr; i++) {
159 const struct sbitmap_word *word = &sb->map[i];
160 unsigned long ret;
161
162 ret = find_first_zero_bit(&word->word, word->depth);
163 if (ret < word->depth)
164 return true;
165 }
166 return false;
167}
168EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
169
170unsigned int sbitmap_weight(const struct sbitmap *sb)
171{
172 unsigned int i, weight = 0;
173
174 for (i = 0; i < sb->map_nr; i++) {
175 const struct sbitmap_word *word = &sb->map[i];
176
177 weight += bitmap_weight(&word->word, word->depth);
178 }
179 return weight;
180}
181EXPORT_SYMBOL_GPL(sbitmap_weight);
182
183static unsigned int sbq_calc_wake_batch(unsigned int depth)
184{
185 unsigned int wake_batch;
186
187 /*
188 * For each batch, we wake up one queue. We need to make sure that our
189 * batch size is small enough that the full depth of the bitmap is
190 * enough to wake up all of the queues.
191 */
192 wake_batch = SBQ_WAKE_BATCH;
193 if (wake_batch > depth / SBQ_WAIT_QUEUES)
194 wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
195
196 return wake_batch;
197}
198
199int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
200 int shift, bool round_robin, gfp_t flags, int node)
201{
202 int ret;
203 int i;
204
205 ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
206 if (ret)
207 return ret;
208
209 sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
210 if (!sbq->alloc_hint) {
211 sbitmap_free(&sbq->sb);
212 return -ENOMEM;
213 }
214
215 if (depth && !round_robin) {
216 for_each_possible_cpu(i)
217 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
218 }
219
220 sbq->wake_batch = sbq_calc_wake_batch(depth);
221 atomic_set(&sbq->wake_index, 0);
222
223 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
224 if (!sbq->ws) {
225 free_percpu(sbq->alloc_hint);
226 sbitmap_free(&sbq->sb);
227 return -ENOMEM;
228 }
229
230 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
231 init_waitqueue_head(&sbq->ws[i].wait);
232 atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
233 }
234
235 sbq->round_robin = round_robin;
236 return 0;
237}
238EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
239
240void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
241{
242 sbq->wake_batch = sbq_calc_wake_batch(depth);
243 sbitmap_resize(&sbq->sb, depth);
244}
245EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
246
247int __sbitmap_queue_get(struct sbitmap_queue *sbq)
248{
249 unsigned int hint, depth;
250 int nr;
251
252 hint = this_cpu_read(*sbq->alloc_hint);
253 depth = READ_ONCE(sbq->sb.depth);
254 if (unlikely(hint >= depth)) {
255 hint = depth ? prandom_u32() % depth : 0;
256 this_cpu_write(*sbq->alloc_hint, hint);
257 }
258 nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
259
260 if (nr == -1) {
261 /* If the map is full, a hint won't do us much good. */
262 this_cpu_write(*sbq->alloc_hint, 0);
263 } else if (nr == hint || unlikely(sbq->round_robin)) {
264 /* Only update the hint if we used it. */
265 hint = nr + 1;
266 if (hint >= depth - 1)
267 hint = 0;
268 this_cpu_write(*sbq->alloc_hint, hint);
269 }
270
271 return nr;
272}
273EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
274
275static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
276{
277 int i, wake_index;
278
279 wake_index = atomic_read(&sbq->wake_index);
280 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
281 struct sbq_wait_state *ws = &sbq->ws[wake_index];
282
283 if (waitqueue_active(&ws->wait)) {
284 int o = atomic_read(&sbq->wake_index);
285
286 if (wake_index != o)
287 atomic_cmpxchg(&sbq->wake_index, o, wake_index);
288 return ws;
289 }
290
291 wake_index = sbq_index_inc(wake_index);
292 }
293
294 return NULL;
295}
296
297static void sbq_wake_up(struct sbitmap_queue *sbq)
298{
299 struct sbq_wait_state *ws;
300 int wait_cnt;
301
302 /* Ensure that the wait list checks occur after clear_bit(). */
303 smp_mb();
304
305 ws = sbq_wake_ptr(sbq);
306 if (!ws)
307 return;
308
309 wait_cnt = atomic_dec_return(&ws->wait_cnt);
310 if (unlikely(wait_cnt < 0))
311 wait_cnt = atomic_inc_return(&ws->wait_cnt);
312 if (wait_cnt == 0) {
313 atomic_add(sbq->wake_batch, &ws->wait_cnt);
314 sbq_index_atomic_inc(&sbq->wake_index);
315 wake_up(&ws->wait);
316 }
317}
318
319void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
320 unsigned int cpu)
321{
322 sbitmap_clear_bit(&sbq->sb, nr);
323 sbq_wake_up(sbq);
324 if (likely(!sbq->round_robin && nr < sbq->sb.depth))
325 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
326}
327EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
328
329void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
330{
331 int i, wake_index;
332
333 /*
334 * Make sure all changes prior to this are visible from other CPUs.
335 */
336 smp_mb();
337 wake_index = atomic_read(&sbq->wake_index);
338 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
339 struct sbq_wait_state *ws = &sbq->ws[wake_index];
340
341 if (waitqueue_active(&ws->wait))
342 wake_up(&ws->wait);
343
344 wake_index = sbq_index_inc(wake_index);
345 }
346}
347EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 53ad6c0831ae..60f77f1d470a 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -242,6 +242,7 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
242 */ 242 */
243 alloc_flags &= ~GFP_ZONEMASK; 243 alloc_flags &= ~GFP_ZONEMASK;
244 alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); 244 alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
245 alloc_flags |= __GFP_NOWARN;
245 page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); 246 page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
246 if (page) 247 if (page)
247 prealloc = page_address(page); 248 prealloc = page_address(page);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 33f655ef48cd..7e35fc450c5b 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,6 +1,7 @@
1#include <linux/compiler.h> 1#include <linux/compiler.h>
2#include <linux/export.h> 2#include <linux/export.h>
3#include <linux/kasan-checks.h> 3#include <linux/kasan-checks.h>
4#include <linux/thread_info.h>
4#include <linux/uaccess.h> 5#include <linux/uaccess.h>
5#include <linux/kernel.h> 6#include <linux/kernel.h>
6#include <linux/errno.h> 7#include <linux/errno.h>
@@ -40,8 +41,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
40 unsigned long c, data; 41 unsigned long c, data;
41 42
42 /* Fall back to byte-at-a-time if we get a page fault */ 43 /* Fall back to byte-at-a-time if we get a page fault */
43 if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) 44 unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
44 break; 45
45 *(unsigned long *)(dst+res) = c; 46 *(unsigned long *)(dst+res) = c;
46 if (has_zero(c, &data, &constants)) { 47 if (has_zero(c, &data, &constants)) {
47 data = prep_zero_mask(c, data, &constants); 48 data = prep_zero_mask(c, data, &constants);
@@ -56,8 +57,7 @@ byte_at_a_time:
56 while (max) { 57 while (max) {
57 char c; 58 char c;
58 59
59 if (unlikely(unsafe_get_user(c,src+res))) 60 unsafe_get_user(c,src+res, efault);
60 return -EFAULT;
61 dst[res] = c; 61 dst[res] = c;
62 if (!c) 62 if (!c)
63 return res; 63 return res;
@@ -76,6 +76,7 @@ byte_at_a_time:
76 * Nope: we hit the address space limit, and we still had more 76 * Nope: we hit the address space limit, and we still had more
77 * characters the caller would have wanted. That's an EFAULT. 77 * characters the caller would have wanted. That's an EFAULT.
78 */ 78 */
79efault:
79 return -EFAULT; 80 return -EFAULT;
80} 81}
81 82
@@ -111,6 +112,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
111 long retval; 112 long retval;
112 113
113 kasan_check_write(dst, count); 114 kasan_check_write(dst, count);
115 check_object_size(dst, count, false);
114 user_access_begin(); 116 user_access_begin();
115 retval = do_strncpy_from_user(dst, src, count, max); 117 retval = do_strncpy_from_user(dst, src, count, max);
116 user_access_end(); 118 user_access_end();
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 2625943625d7..8e105ed4df12 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
45 src -= align; 45 src -= align;
46 max += align; 46 max += align;
47 47
48 if (unlikely(unsafe_get_user(c,(unsigned long __user *)src))) 48 unsafe_get_user(c, (unsigned long __user *)src, efault);
49 return 0;
50 c |= aligned_byte_mask(align); 49 c |= aligned_byte_mask(align);
51 50
52 for (;;) { 51 for (;;) {
@@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
61 if (unlikely(max <= sizeof(unsigned long))) 60 if (unlikely(max <= sizeof(unsigned long)))
62 break; 61 break;
63 max -= sizeof(unsigned long); 62 max -= sizeof(unsigned long);
64 if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) 63 unsafe_get_user(c, (unsigned long __user *)(src+res), efault);
65 return 0;
66 } 64 }
67 res -= align; 65 res -= align;
68 66
@@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
77 * Nope: we hit the address space limit, and we still had more 75 * Nope: we hit the address space limit, and we still had more
78 * characters the caller would have wanted. That's 0. 76 * characters the caller would have wanted. That's 0.
79 */ 77 */
78efault:
80 return 0; 79 return 0;
81} 80}
82 81
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 76f29ecba8f4..22e13a0e19d7 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -738,7 +738,7 @@ swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
738dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, 738dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
739 unsigned long offset, size_t size, 739 unsigned long offset, size_t size,
740 enum dma_data_direction dir, 740 enum dma_data_direction dir,
741 struct dma_attrs *attrs) 741 unsigned long attrs)
742{ 742{
743 phys_addr_t map, phys = page_to_phys(page) + offset; 743 phys_addr_t map, phys = page_to_phys(page) + offset;
744 dma_addr_t dev_addr = phys_to_dma(dev, phys); 744 dma_addr_t dev_addr = phys_to_dma(dev, phys);
@@ -807,7 +807,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
807 807
808void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 808void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
809 size_t size, enum dma_data_direction dir, 809 size_t size, enum dma_data_direction dir,
810 struct dma_attrs *attrs) 810 unsigned long attrs)
811{ 811{
812 unmap_single(hwdev, dev_addr, size, dir); 812 unmap_single(hwdev, dev_addr, size, dir);
813} 813}
@@ -877,7 +877,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_device);
877 */ 877 */
878int 878int
879swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, 879swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
880 enum dma_data_direction dir, struct dma_attrs *attrs) 880 enum dma_data_direction dir, unsigned long attrs)
881{ 881{
882 struct scatterlist *sg; 882 struct scatterlist *sg;
883 int i; 883 int i;
@@ -914,7 +914,7 @@ int
914swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 914swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
915 enum dma_data_direction dir) 915 enum dma_data_direction dir)
916{ 916{
917 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); 917 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, 0);
918} 918}
919EXPORT_SYMBOL(swiotlb_map_sg); 919EXPORT_SYMBOL(swiotlb_map_sg);
920 920
@@ -924,7 +924,8 @@ EXPORT_SYMBOL(swiotlb_map_sg);
924 */ 924 */
925void 925void
926swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, 926swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
927 int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) 927 int nelems, enum dma_data_direction dir,
928 unsigned long attrs)
928{ 929{
929 struct scatterlist *sg; 930 struct scatterlist *sg;
930 int i; 931 int i;
@@ -941,7 +942,7 @@ void
941swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 942swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
942 enum dma_data_direction dir) 943 enum dma_data_direction dir)
943{ 944{
944 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); 945 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, 0);
945} 946}
946EXPORT_SYMBOL(swiotlb_unmap_sg); 947EXPORT_SYMBOL(swiotlb_unmap_sg);
947 948
diff --git a/lib/syscall.c b/lib/syscall.c
index e30e03932480..63239e097b13 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -7,9 +7,19 @@ static int collect_syscall(struct task_struct *target, long *callno,
7 unsigned long args[6], unsigned int maxargs, 7 unsigned long args[6], unsigned int maxargs,
8 unsigned long *sp, unsigned long *pc) 8 unsigned long *sp, unsigned long *pc)
9{ 9{
10 struct pt_regs *regs = task_pt_regs(target); 10 struct pt_regs *regs;
11 if (unlikely(!regs)) 11
12 if (!try_get_task_stack(target)) {
13 /* Task has no stack, so the task isn't in a syscall. */
14 *callno = -1;
15 return 0;
16 }
17
18 regs = task_pt_regs(target);
19 if (unlikely(!regs)) {
20 put_task_stack(target);
12 return -EAGAIN; 21 return -EAGAIN;
22 }
13 23
14 *sp = user_stack_pointer(regs); 24 *sp = user_stack_pointer(regs);
15 *pc = instruction_pointer(regs); 25 *pc = instruction_pointer(regs);
@@ -18,6 +28,7 @@ static int collect_syscall(struct task_struct *target, long *callno,
18 if (*callno != -1L && maxargs > 0) 28 if (*callno != -1L && maxargs > 0)
19 syscall_get_arguments(target, regs, 0, maxargs, args); 29 syscall_get_arguments(target, regs, 0, maxargs, args);
20 30
31 put_task_stack(target);
21 return 0; 32 return 0;
22} 33}
23 34
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 93f45011a59d..94346b4d8984 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size)
5485 skb->hash = SKB_HASH; 5485 skb->hash = SKB_HASH;
5486 skb->queue_mapping = SKB_QUEUE_MAP; 5486 skb->queue_mapping = SKB_QUEUE_MAP;
5487 skb->vlan_tci = SKB_VLAN_TCI; 5487 skb->vlan_tci = SKB_VLAN_TCI;
5488 skb->vlan_proto = htons(ETH_P_IP);
5488 skb->dev = &dev; 5489 skb->dev = &dev;
5489 skb->dev->ifindex = SKB_DEV_IFINDEX; 5490 skb->dev->ifindex = SKB_DEV_IFINDEX;
5490 skb->dev->type = SKB_DEV_TYPE; 5491 skb->dev->type = SKB_DEV_TYPE;
diff --git a/lib/test_hash.c b/lib/test_hash.c
index c9549c8b4909..cac20c5fb304 100644
--- a/lib/test_hash.c
+++ b/lib/test_hash.c
@@ -143,7 +143,7 @@ static int __init
143test_hash_init(void) 143test_hash_init(void)
144{ 144{
145 char buf[SIZE+1]; 145 char buf[SIZE+1];
146 u32 string_or = 0, hash_or[2][33] = { 0 }; 146 u32 string_or = 0, hash_or[2][33] = { { 0, } };
147 unsigned tests = 0; 147 unsigned tests = 0;
148 unsigned long long h64 = 0; 148 unsigned long long h64 = 0;
149 int i, j; 149 int i, j;
@@ -155,8 +155,8 @@ test_hash_init(void)
155 buf[j] = '\0'; 155 buf[j] = '\0';
156 156
157 for (i = 0; i <= j; i++) { 157 for (i = 0; i <= j; i++) {
158 u64 hashlen = hashlen_string(buf+i); 158 u64 hashlen = hashlen_string(buf+i, buf+i);
159 u32 h0 = full_name_hash(buf+i, j-i); 159 u32 h0 = full_name_hash(buf+i, buf+i, j-i);
160 160
161 /* Check that hashlen_string gets the length right */ 161 /* Check that hashlen_string gets the length right */
162 if (hashlen_len(hashlen) != j-i) { 162 if (hashlen_len(hashlen) != j-i) {
@@ -219,21 +219,27 @@ test_hash_init(void)
219 } 219 }
220 220
221 /* Issue notices about skipped tests. */ 221 /* Issue notices about skipped tests. */
222#ifndef HAVE_ARCH__HASH_32 222#ifdef HAVE_ARCH__HASH_32
223 pr_info("__hash_32() has no arch implementation to test."); 223#if HAVE_ARCH__HASH_32 != 1
224#elif HAVE_ARCH__HASH_32 != 1
225 pr_info("__hash_32() is arch-specific; not compared to generic."); 224 pr_info("__hash_32() is arch-specific; not compared to generic.");
226#endif 225#endif
227#ifndef HAVE_ARCH_HASH_32 226#else
228 pr_info("hash_32() has no arch implementation to test."); 227 pr_info("__hash_32() has no arch implementation to test.");
229#elif HAVE_ARCH_HASH_32 != 1 228#endif
229#ifdef HAVE_ARCH_HASH_32
230#if HAVE_ARCH_HASH_32 != 1
230 pr_info("hash_32() is arch-specific; not compared to generic."); 231 pr_info("hash_32() is arch-specific; not compared to generic.");
231#endif 232#endif
232#ifndef HAVE_ARCH_HASH_64 233#else
233 pr_info("hash_64() has no arch implementation to test."); 234 pr_info("hash_32() has no arch implementation to test.");
234#elif HAVE_ARCH_HASH_64 != 1 235#endif
236#ifdef HAVE_ARCH_HASH_64
237#if HAVE_ARCH_HASH_64 != 1
235 pr_info("hash_64() is arch-specific; not compared to generic."); 238 pr_info("hash_64() is arch-specific; not compared to generic.");
236#endif 239#endif
240#else
241 pr_info("hash_64() has no arch implementation to test.");
242#endif
237 243
238 pr_notice("%u tests passed.", tests); 244 pr_notice("%u tests passed.", tests);
239 245
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 297fdb5e74bd..64e899b63337 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)");
38 38
39static int max_size = 0; 39static int max_size = 0;
40module_param(max_size, int, 0); 40module_param(max_size, int, 0);
41MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)"); 41MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
42 42
43static bool shrinking = false; 43static bool shrinking = false;
44module_param(shrinking, bool, 0); 44module_param(shrinking, bool, 0);
diff --git a/lib/test_uuid.c b/lib/test_uuid.c
new file mode 100644
index 000000000000..547d3127a3cf
--- /dev/null
+++ b/lib/test_uuid.c
@@ -0,0 +1,133 @@
1/*
2 * Test cases for lib/uuid.c module.
3 */
4#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5
6#include <linux/init.h>
7#include <linux/kernel.h>
8#include <linux/module.h>
9#include <linux/string.h>
10#include <linux/uuid.h>
11
12struct test_uuid_data {
13 const char *uuid;
14 uuid_le le;
15 uuid_be be;
16};
17
18static const struct test_uuid_data test_uuid_test_data[] = {
19 {
20 .uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
21 .le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
22 .be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
23 },
24 {
25 .uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
26 .le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
27 .be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
28 },
29 {
30 .uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
31 .le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
32 .be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
33 },
34};
35
36static const char * const test_uuid_wrong_data[] = {
37 "c33f4995-3701-450e-9fbf206a2e98e576 ", /* no hyphen(s) */
38 "64b4371c-77c1-48f9-8221-29f054XX023b", /* invalid character(s) */
39 "0cb4ddff-a545-4401-9d06-688af53e", /* not enough data */
40};
41
42static unsigned total_tests __initdata;
43static unsigned failed_tests __initdata;
44
45static void __init test_uuid_failed(const char *prefix, bool wrong, bool be,
46 const char *data, const char *actual)
47{
48 pr_err("%s test #%u %s %s data: '%s'\n",
49 prefix,
50 total_tests,
51 wrong ? "passed on wrong" : "failed on",
52 be ? "BE" : "LE",
53 data);
54 if (actual && *actual)
55 pr_err("%s test #%u actual data: '%s'\n",
56 prefix,
57 total_tests,
58 actual);
59 failed_tests++;
60}
61
62static void __init test_uuid_test(const struct test_uuid_data *data)
63{
64 uuid_le le;
65 uuid_be be;
66 char buf[48];
67
68 /* LE */
69 total_tests++;
70 if (uuid_le_to_bin(data->uuid, &le))
71 test_uuid_failed("conversion", false, false, data->uuid, NULL);
72
73 total_tests++;
74 if (uuid_le_cmp(data->le, le)) {
75 sprintf(buf, "%pUl", &le);
76 test_uuid_failed("cmp", false, false, data->uuid, buf);
77 }
78
79 /* BE */
80 total_tests++;
81 if (uuid_be_to_bin(data->uuid, &be))
82 test_uuid_failed("conversion", false, true, data->uuid, NULL);
83
84 total_tests++;
85 if (uuid_be_cmp(data->be, be)) {
86 sprintf(buf, "%pUb", &be);
87 test_uuid_failed("cmp", false, true, data->uuid, buf);
88 }
89}
90
91static void __init test_uuid_wrong(const char *data)
92{
93 uuid_le le;
94 uuid_be be;
95
96 /* LE */
97 total_tests++;
98 if (!uuid_le_to_bin(data, &le))
99 test_uuid_failed("negative", true, false, data, NULL);
100
101 /* BE */
102 total_tests++;
103 if (!uuid_be_to_bin(data, &be))
104 test_uuid_failed("negative", true, true, data, NULL);
105}
106
107static int __init test_uuid_init(void)
108{
109 unsigned int i;
110
111 for (i = 0; i < ARRAY_SIZE(test_uuid_test_data); i++)
112 test_uuid_test(&test_uuid_test_data[i]);
113
114 for (i = 0; i < ARRAY_SIZE(test_uuid_wrong_data); i++)
115 test_uuid_wrong(test_uuid_wrong_data[i]);
116
117 if (failed_tests == 0)
118 pr_info("all %u tests passed\n", total_tests);
119 else
120 pr_err("failed %u out of %u tests\n", failed_tests, total_tests);
121
122 return failed_tests ? -EINVAL : 0;
123}
124module_init(test_uuid_init);
125
126static void __exit test_uuid_exit(void)
127{
128 /* do nothing */
129}
130module_exit(test_uuid_exit);
131
132MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
133MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/ubsan.c b/lib/ubsan.c
index 8799ae5e2e42..fb0409df1bcf 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -308,7 +308,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data,
308 return; 308 return;
309 309
310 ubsan_prologue(&data->location, &flags); 310 ubsan_prologue(&data->location, &flags);
311 pr_err("%s address %pk with insufficient space\n", 311 pr_err("%s address %p with insufficient space\n",
312 type_check_kinds[data->type_check_kind], 312 type_check_kinds[data->type_check_kind],
313 (void *) ptr); 313 (void *) ptr);
314 pr_err("for an object of type %s\n", data->type->type_name); 314 pr_err("for an object of type %s\n", data->type->type_name);
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index f0b323abb4c6..ae8d2491133c 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -56,7 +56,7 @@ ucs2_utf8size(const ucs2_char_t *src)
56 unsigned long i; 56 unsigned long i;
57 unsigned long j = 0; 57 unsigned long j = 0;
58 58
59 for (i = 0; i < ucs2_strlen(src); i++) { 59 for (i = 0; src[i]; i++) {
60 u16 c = src[i]; 60 u16 c = src[i];
61 61
62 if (c >= 0x800) 62 if (c >= 0x800)
diff --git a/lib/usercopy.c b/lib/usercopy.c
deleted file mode 100644
index 4f5b1ddbcd25..000000000000
--- a/lib/usercopy.c
+++ /dev/null
@@ -1,9 +0,0 @@
1#include <linux/export.h>
2#include <linux/bug.h>
3#include <linux/uaccess.h>
4
5void copy_from_user_overflow(void)
6{
7 WARN(1, "Buffer overflow detected!\n");
8}
9EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/lib/uuid.c b/lib/uuid.c
index e116ae5fa00f..37687af77ff8 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -106,8 +106,8 @@ static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
106 return -EINVAL; 106 return -EINVAL;
107 107
108 for (i = 0; i < 16; i++) { 108 for (i = 0; i < 16; i++) {
109 int hi = hex_to_bin(uuid[si[i]] + 0); 109 int hi = hex_to_bin(uuid[si[i] + 0]);
110 int lo = hex_to_bin(uuid[si[i]] + 1); 110 int lo = hex_to_bin(uuid[si[i] + 1]);
111 111
112 b[ei[i]] = (hi << 4) | lo; 112 b[ei[i]] = (hi << 4) | lo;
113 } 113 }
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
new file mode 100644
index 000000000000..c8420d404926
--- /dev/null
+++ b/lib/win_minmax.c
@@ -0,0 +1,98 @@
1/**
2 * lib/minmax.c: windowed min/max tracker
3 *
4 * Kathleen Nichols' algorithm for tracking the minimum (or maximum)
5 * value of a data stream over some fixed time interval. (E.g.,
6 * the minimum RTT over the past five minutes.) It uses constant
7 * space and constant time per update yet almost always delivers
8 * the same minimum as an implementation that has to keep all the
9 * data in the window.
10 *
11 * The algorithm keeps track of the best, 2nd best & 3rd best min
12 * values, maintaining an invariant that the measurement time of
13 * the n'th best >= n-1'th best. It also makes sure that the three
14 * values are widely separated in the time window since that bounds
15 * the worse case error when that data is monotonically increasing
16 * over the window.
17 *
18 * Upon getting a new min, we can forget everything earlier because
19 * it has no value - the new min is <= everything else in the window
20 * by definition and it's the most recent. So we restart fresh on
21 * every new min and overwrites 2nd & 3rd choices. The same property
22 * holds for 2nd & 3rd best.
23 */
24#include <linux/module.h>
25#include <linux/win_minmax.h>
26
27/* As time advances, update the 1st, 2nd, and 3rd choices. */
28static u32 minmax_subwin_update(struct minmax *m, u32 win,
29 const struct minmax_sample *val)
30{
31 u32 dt = val->t - m->s[0].t;
32
33 if (unlikely(dt > win)) {
34 /*
35 * Passed entire window without a new val so make 2nd
36 * choice the new val & 3rd choice the new 2nd choice.
37 * we may have to iterate this since our 2nd choice
38 * may also be outside the window (we checked on entry
39 * that the third choice was in the window).
40 */
41 m->s[0] = m->s[1];
42 m->s[1] = m->s[2];
43 m->s[2] = *val;
44 if (unlikely(val->t - m->s[0].t > win)) {
45 m->s[0] = m->s[1];
46 m->s[1] = m->s[2];
47 m->s[2] = *val;
48 }
49 } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) {
50 /*
51 * We've passed a quarter of the window without a new val
52 * so take a 2nd choice from the 2nd quarter of the window.
53 */
54 m->s[2] = m->s[1] = *val;
55 } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) {
56 /*
57 * We've passed half the window without finding a new val
58 * so take a 3rd choice from the last half of the window
59 */
60 m->s[2] = *val;
61 }
62 return m->s[0].v;
63}
64
65/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */
66u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas)
67{
68 struct minmax_sample val = { .t = t, .v = meas };
69
70 if (unlikely(val.v >= m->s[0].v) || /* found new max? */
71 unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */
72 return minmax_reset(m, t, meas); /* forget earlier samples */
73
74 if (unlikely(val.v >= m->s[1].v))
75 m->s[2] = m->s[1] = val;
76 else if (unlikely(val.v >= m->s[2].v))
77 m->s[2] = val;
78
79 return minmax_subwin_update(m, win, &val);
80}
81EXPORT_SYMBOL(minmax_running_max);
82
83/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */
84u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas)
85{
86 struct minmax_sample val = { .t = t, .v = meas };
87
88 if (unlikely(val.v <= m->s[0].v) || /* found new min? */
89 unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */
90 return minmax_reset(m, t, meas); /* forget earlier samples */
91
92 if (unlikely(val.v <= m->s[1].v))
93 m->s[2] = m->s[1] = val;
94 else if (unlikely(val.v <= m->s[2].v))
95 m->s[2] = val;
96
97 return minmax_subwin_update(m, win, &val);
98}