aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorJonathan Corbet <corbet@lwn.net>2016-12-27 14:53:44 -0500
committerJonathan Corbet <corbet@lwn.net>2016-12-27 14:53:44 -0500
commit54ab6db0909061ab7ee07233d3cab86d29f86e6c (patch)
treea7650ab5c0fa3a6a3841de8e8693041b3e009054 /lib
parent217e2bfab22e740227df09f22165e834cddd8a3b (diff)
parent7ce7d89f48834cefece7804d38fc5d85382edf77 (diff)
Merge tag 'v4.10-rc1' into docs-next
Linux 4.10-rc1
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug71
-rw-r--r--lib/Kconfig.ubsan3
-rw-r--r--lib/Makefile1
-rw-r--r--lib/cpu-notifier-error-inject.c84
-rw-r--r--lib/debugobjects.c10
-rw-r--r--lib/extable.c2
-rw-r--r--lib/idr.c11
-rw-r--r--lib/iov_iter.c154
-rw-r--r--lib/kobject_uevent.c6
-rw-r--r--lib/kstrtox.c2
-rw-r--r--lib/list_debug.c99
-rw-r--r--lib/locking-selftest.c66
-rw-r--r--lib/lockref.c2
-rw-r--r--lib/mpi/mpi-pow.c7
-rw-r--r--lib/nlattr.c2
-rw-r--r--lib/parser.c47
-rw-r--r--lib/percpu_counter.c25
-rw-r--r--lib/radix-tree.c1195
-rw-r--r--lib/raid6/avx2.c232
-rw-r--r--lib/rbtree.c23
-rw-r--r--lib/stackdepot.c2
-rw-r--r--lib/swiotlb.c81
-rw-r--r--lib/test_kasan.c29
-rw-r--r--lib/timerqueue.c4
24 files changed, 1397 insertions, 761 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1a9fa51b693b..b06848a104e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,6 +15,21 @@ config PRINTK_TIME
15 The behavior is also controlled by the kernel command line 15 The behavior is also controlled by the kernel command line
16 parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst 16 parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst
17 17
18config CONSOLE_LOGLEVEL_DEFAULT
19 int "Default console loglevel (1-15)"
20 range 1 15
21 default "7"
22 help
23 Default loglevel to determine what will be printed on the console.
24
25 Setting a default here is equivalent to passing in loglevel=<x> in
26 the kernel bootargs. loglevel=<x> continues to override whatever
27 value is specified here as well.
28
29 Note: This does not affect the log level of un-prefixed printk()
30 usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT
31 option.
32
18config MESSAGE_LOGLEVEL_DEFAULT 33config MESSAGE_LOGLEVEL_DEFAULT
19 int "Default message log level (1-7)" 34 int "Default message log level (1-7)"
20 range 1 7 35 range 1 7
@@ -26,6 +41,10 @@ config MESSAGE_LOGLEVEL_DEFAULT
26 that are auditing their logs closely may want to set it to a lower 41 that are auditing their logs closely may want to set it to a lower
27 priority. 42 priority.
28 43
44 Note: This does not affect what message level gets printed on the console
45 by default. To change that, use loglevel=<x> in the kernel bootargs,
46 or pick a different CONSOLE_LOGLEVEL_DEFAULT configuration value.
47
29config BOOT_PRINTK_DELAY 48config BOOT_PRINTK_DELAY
30 bool "Delay each boot printk message by N milliseconds" 49 bool "Delay each boot printk message by N milliseconds"
31 depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY 50 depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
@@ -175,8 +194,8 @@ config GDB_SCRIPTS
175 build directory. If you load vmlinux into gdb, the helper 194 build directory. If you load vmlinux into gdb, the helper
176 scripts will be automatically imported by gdb as well, and 195 scripts will be automatically imported by gdb as well, and
177 additional functions are available to analyze a Linux kernel 196 additional functions are available to analyze a Linux kernel
178 instance. See Documentation/gdb-kernel-debugging.txt for further 197 instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
179 details. 198 for further details.
180 199
181config ENABLE_WARN_DEPRECATED 200config ENABLE_WARN_DEPRECATED
182 bool "Enable __deprecated logic" 201 bool "Enable __deprecated logic"
@@ -523,7 +542,7 @@ config DEBUG_KMEMLEAK
523 difference being that the orphan objects are not freed but 542 difference being that the orphan objects are not freed but
524 only shown in /sys/kernel/debug/kmemleak. Enabling this 543 only shown in /sys/kernel/debug/kmemleak. Enabling this
525 feature will introduce an overhead to memory 544 feature will introduce an overhead to memory
526 allocations. See Documentation/kmemleak.txt for more 545 allocations. See Documentation/dev-tools/kmemleak.rst for more
527 details. 546 details.
528 547
529 Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances 548 Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
@@ -720,7 +739,7 @@ config KCOV
720 different machines and across reboots. If you need stable PC values, 739 different machines and across reboots. If you need stable PC values,
721 disable RANDOMIZE_BASE. 740 disable RANDOMIZE_BASE.
722 741
723 For more details, see Documentation/kcov.txt. 742 For more details, see Documentation/dev-tools/kcov.rst.
724 743
725config KCOV_INSTRUMENT_ALL 744config KCOV_INSTRUMENT_ALL
726 bool "Instrument all code by default" 745 bool "Instrument all code by default"
@@ -1085,6 +1104,9 @@ config PROVE_LOCKING
1085 1104
1086 For more details, see Documentation/locking/lockdep-design.txt. 1105 For more details, see Documentation/locking/lockdep-design.txt.
1087 1106
1107config PROVE_LOCKING_SMALL
1108 bool
1109
1088config LOCKDEP 1110config LOCKDEP
1089 bool 1111 bool
1090 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 1112 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -1215,7 +1237,7 @@ config DEBUG_BUGVERBOSE
1215 1237
1216config DEBUG_LIST 1238config DEBUG_LIST
1217 bool "Debug linked list manipulation" 1239 bool "Debug linked list manipulation"
1218 depends on DEBUG_KERNEL 1240 depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
1219 help 1241 help
1220 Enable this to turn on extended checks in the linked-list 1242 Enable this to turn on extended checks in the linked-list
1221 walking routines. 1243 walking routines.
@@ -1431,7 +1453,8 @@ config RCU_TRACE
1431 select TRACE_CLOCK 1453 select TRACE_CLOCK
1432 help 1454 help
1433 This option provides tracing in RCU which presents stats 1455 This option provides tracing in RCU which presents stats
1434 in debugfs for debugging RCU implementation. 1456 in debugfs for debugging RCU implementation. It also enables
1457 additional tracepoints for ftrace-style event tracing.
1435 1458
1436 Say Y here if you want to enable RCU tracing 1459 Say Y here if you want to enable RCU tracing
1437 Say N if you are unsure. 1460 Say N if you are unsure.
@@ -1515,30 +1538,6 @@ config NOTIFIER_ERROR_INJECTION
1515 1538
1516 Say N if unsure. 1539 Say N if unsure.
1517 1540
1518config CPU_NOTIFIER_ERROR_INJECT
1519 tristate "CPU notifier error injection module"
1520 depends on HOTPLUG_CPU && NOTIFIER_ERROR_INJECTION
1521 help
1522 This option provides a kernel module that can be used to test
1523 the error handling of the cpu notifiers by injecting artificial
1524 errors to CPU notifier chain callbacks. It is controlled through
1525 debugfs interface under /sys/kernel/debug/notifier-error-inject/cpu
1526
1527 If the notifier call chain should be failed with some events
1528 notified, write the error code to "actions/<notifier event>/error".
1529
1530 Example: Inject CPU offline error (-1 == -EPERM)
1531
1532 # cd /sys/kernel/debug/notifier-error-inject/cpu
1533 # echo -1 > actions/CPU_DOWN_PREPARE/error
1534 # echo 0 > /sys/devices/system/cpu/cpu1/online
1535 bash: echo: write error: Operation not permitted
1536
1537 To compile this code as a module, choose M here: the module will
1538 be called cpu-notifier-error-inject.
1539
1540 If unsure, say N.
1541
1542config PM_NOTIFIER_ERROR_INJECT 1541config PM_NOTIFIER_ERROR_INJECT
1543 tristate "PM notifier error injection module" 1542 tristate "PM notifier error injection module"
1544 depends on PM && NOTIFIER_ERROR_INJECTION 1543 depends on PM && NOTIFIER_ERROR_INJECTION
@@ -1961,6 +1960,16 @@ config TEST_STATIC_KEYS
1961 1960
1962 If unsure, say N. 1961 If unsure, say N.
1963 1962
1963config BUG_ON_DATA_CORRUPTION
1964 bool "Trigger a BUG when data corruption is detected"
1965 select DEBUG_LIST
1966 help
1967 Select this option if the kernel should BUG when it encounters
1968 data corruption in kernel memory structures when they get checked
1969 for validity.
1970
1971 If unsure, say N.
1972
1964source "samples/Kconfig" 1973source "samples/Kconfig"
1965 1974
1966source "lib/Kconfig.kgdb" 1975source "lib/Kconfig.kgdb"
@@ -1972,7 +1981,7 @@ config ARCH_HAS_DEVMEM_IS_ALLOWED
1972 1981
1973config STRICT_DEVMEM 1982config STRICT_DEVMEM
1974 bool "Filter access to /dev/mem" 1983 bool "Filter access to /dev/mem"
1975 depends on MMU 1984 depends on MMU && DEVMEM
1976 depends on ARCH_HAS_DEVMEM_IS_ALLOWED 1985 depends on ARCH_HAS_DEVMEM_IS_ALLOWED
1977 default y if TILE || PPC 1986 default y if TILE || PPC
1978 ---help--- 1987 ---help---
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index bc6e651df68c..a669c193b878 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -10,7 +10,8 @@ config UBSAN
10 This option enables undefined behaviour sanity checker 10 This option enables undefined behaviour sanity checker
11 Compile-time instrumentation is used to detect various undefined 11 Compile-time instrumentation is used to detect various undefined
12 behaviours in runtime. Various types of checks may be enabled 12 behaviours in runtime. Various types of checks may be enabled
13 via boot parameter ubsan_handle (see: Documentation/ubsan.txt). 13 via boot parameter ubsan_handle
14 (see: Documentation/dev-tools/ubsan.rst).
14 15
15config UBSAN_SANITIZE_ALL 16config UBSAN_SANITIZE_ALL
16 bool "Enable instrumentation for the entire kernel" 17 bool "Enable instrumentation for the entire kernel"
diff --git a/lib/Makefile b/lib/Makefile
index 50144a3aeebd..bc4073a8cd08 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -128,7 +128,6 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o
128obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o 128obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
129obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 129obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
130obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o 130obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
131obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
132obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o 131obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
133obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o 132obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
134obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o 133obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
deleted file mode 100644
index 0e2c9a1e958a..000000000000
--- a/lib/cpu-notifier-error-inject.c
+++ /dev/null
@@ -1,84 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/cpu.h>
4
5#include "notifier-error-inject.h"
6
7static int priority;
8module_param(priority, int, 0);
9MODULE_PARM_DESC(priority, "specify cpu notifier priority");
10
11#define UP_PREPARE 0
12#define UP_PREPARE_FROZEN 0
13#define DOWN_PREPARE 0
14#define DOWN_PREPARE_FROZEN 0
15
16static struct notifier_err_inject cpu_notifier_err_inject = {
17 .actions = {
18 { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) },
19 { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) },
20 { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) },
21 { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) },
22 {}
23 }
24};
25
26static int notf_err_handle(struct notifier_err_inject_action *action)
27{
28 int ret;
29
30 ret = action->error;
31 if (ret)
32 pr_info("Injecting error (%d) to %s\n", ret, action->name);
33 return ret;
34}
35
36static int notf_err_inj_up_prepare(unsigned int cpu)
37{
38 if (!cpuhp_tasks_frozen)
39 return notf_err_handle(&cpu_notifier_err_inject.actions[0]);
40 else
41 return notf_err_handle(&cpu_notifier_err_inject.actions[1]);
42}
43
44static int notf_err_inj_dead(unsigned int cpu)
45{
46 if (!cpuhp_tasks_frozen)
47 return notf_err_handle(&cpu_notifier_err_inject.actions[2]);
48 else
49 return notf_err_handle(&cpu_notifier_err_inject.actions[3]);
50}
51
52static struct dentry *dir;
53
54static int err_inject_init(void)
55{
56 int err;
57
58 dir = notifier_err_inject_init("cpu", notifier_err_inject_dir,
59 &cpu_notifier_err_inject, priority);
60 if (IS_ERR(dir))
61 return PTR_ERR(dir);
62
63 err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE,
64 "cpu-err-notif:prepare",
65 notf_err_inj_up_prepare,
66 notf_err_inj_dead);
67 if (err)
68 debugfs_remove_recursive(dir);
69
70 return err;
71}
72
73static void err_inject_exit(void)
74{
75 cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE);
76 debugfs_remove_recursive(dir);
77}
78
79module_init(err_inject_init);
80module_exit(err_inject_exit);
81
82MODULE_DESCRIPTION("CPU notifier error injection module");
83MODULE_LICENSE("GPL");
84MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a8e12601eb37..04c1ef717fe0 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -199,7 +199,7 @@ static void free_object(struct debug_obj *obj)
199 * initialized: 199 * initialized:
200 */ 200 */
201 if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) 201 if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
202 sched = keventd_up(); 202 sched = 1;
203 hlist_add_head(&obj->node, &obj_pool); 203 hlist_add_head(&obj->node, &obj_pool);
204 obj_pool_free++; 204 obj_pool_free++;
205 obj_pool_used--; 205 obj_pool_used--;
@@ -362,6 +362,7 @@ void debug_object_init(void *addr, struct debug_obj_descr *descr)
362 362
363 __debug_object_init(addr, descr, 0); 363 __debug_object_init(addr, descr, 0);
364} 364}
365EXPORT_SYMBOL_GPL(debug_object_init);
365 366
366/** 367/**
367 * debug_object_init_on_stack - debug checks when an object on stack is 368 * debug_object_init_on_stack - debug checks when an object on stack is
@@ -376,6 +377,7 @@ void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr)
376 377
377 __debug_object_init(addr, descr, 1); 378 __debug_object_init(addr, descr, 1);
378} 379}
380EXPORT_SYMBOL_GPL(debug_object_init_on_stack);
379 381
380/** 382/**
381 * debug_object_activate - debug checks when an object is activated 383 * debug_object_activate - debug checks when an object is activated
@@ -449,6 +451,7 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
449 } 451 }
450 return 0; 452 return 0;
451} 453}
454EXPORT_SYMBOL_GPL(debug_object_activate);
452 455
453/** 456/**
454 * debug_object_deactivate - debug checks when an object is deactivated 457 * debug_object_deactivate - debug checks when an object is deactivated
@@ -496,6 +499,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
496 499
497 raw_spin_unlock_irqrestore(&db->lock, flags); 500 raw_spin_unlock_irqrestore(&db->lock, flags);
498} 501}
502EXPORT_SYMBOL_GPL(debug_object_deactivate);
499 503
500/** 504/**
501 * debug_object_destroy - debug checks when an object is destroyed 505 * debug_object_destroy - debug checks when an object is destroyed
@@ -542,6 +546,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
542out_unlock: 546out_unlock:
543 raw_spin_unlock_irqrestore(&db->lock, flags); 547 raw_spin_unlock_irqrestore(&db->lock, flags);
544} 548}
549EXPORT_SYMBOL_GPL(debug_object_destroy);
545 550
546/** 551/**
547 * debug_object_free - debug checks when an object is freed 552 * debug_object_free - debug checks when an object is freed
@@ -582,6 +587,7 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
582out_unlock: 587out_unlock:
583 raw_spin_unlock_irqrestore(&db->lock, flags); 588 raw_spin_unlock_irqrestore(&db->lock, flags);
584} 589}
590EXPORT_SYMBOL_GPL(debug_object_free);
585 591
586/** 592/**
587 * debug_object_assert_init - debug checks when object should be init-ed 593 * debug_object_assert_init - debug checks when object should be init-ed
@@ -626,6 +632,7 @@ void debug_object_assert_init(void *addr, struct debug_obj_descr *descr)
626 632
627 raw_spin_unlock_irqrestore(&db->lock, flags); 633 raw_spin_unlock_irqrestore(&db->lock, flags);
628} 634}
635EXPORT_SYMBOL_GPL(debug_object_assert_init);
629 636
630/** 637/**
631 * debug_object_active_state - debug checks object usage state machine 638 * debug_object_active_state - debug checks object usage state machine
@@ -673,6 +680,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr,
673 680
674 raw_spin_unlock_irqrestore(&db->lock, flags); 681 raw_spin_unlock_irqrestore(&db->lock, flags);
675} 682}
683EXPORT_SYMBOL_GPL(debug_object_active_state);
676 684
677#ifdef CONFIG_DEBUG_OBJECTS_FREE 685#ifdef CONFIG_DEBUG_OBJECTS_FREE
678static void __debug_check_no_obj_freed(const void *address, unsigned long size) 686static void __debug_check_no_obj_freed(const void *address, unsigned long size)
diff --git a/lib/extable.c b/lib/extable.c
index 0be02ad561e9..62968daa66a9 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -12,7 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/sort.h> 14#include <linux/sort.h>
15#include <asm/uaccess.h> 15#include <linux/uaccess.h>
16 16
17#ifndef ARCH_HAS_RELATIVE_EXTABLE 17#ifndef ARCH_HAS_RELATIVE_EXTABLE
18#define ex_to_insn(x) ((x)->insn) 18#define ex_to_insn(x) ((x)->insn)
diff --git a/lib/idr.c b/lib/idr.c
index 6098336df267..52d2979a05e8 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -927,6 +927,9 @@ EXPORT_SYMBOL(ida_pre_get);
927 * and go back to the ida_pre_get() call. If the ida is full, it will 927 * and go back to the ida_pre_get() call. If the ida is full, it will
928 * return %-ENOSPC. 928 * return %-ENOSPC.
929 * 929 *
930 * Note that callers must ensure that concurrent access to @ida is not possible.
931 * See ida_simple_get() for a varaint which takes care of locking.
932 *
930 * @p_id returns a value in the range @starting_id ... %0x7fffffff. 933 * @p_id returns a value in the range @starting_id ... %0x7fffffff.
931 */ 934 */
932int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) 935int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
@@ -1073,6 +1076,9 @@ EXPORT_SYMBOL(ida_destroy);
1073 * Allocates an id in the range start <= id < end, or returns -ENOSPC. 1076 * Allocates an id in the range start <= id < end, or returns -ENOSPC.
1074 * On memory allocation failure, returns -ENOMEM. 1077 * On memory allocation failure, returns -ENOMEM.
1075 * 1078 *
1079 * Compared to ida_get_new_above() this function does its own locking, and
1080 * should be used unless there are special requirements.
1081 *
1076 * Use ida_simple_remove() to get rid of an id. 1082 * Use ida_simple_remove() to get rid of an id.
1077 */ 1083 */
1078int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, 1084int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
@@ -1119,6 +1125,11 @@ EXPORT_SYMBOL(ida_simple_get);
1119 * ida_simple_remove - remove an allocated id. 1125 * ida_simple_remove - remove an allocated id.
1120 * @ida: the (initialized) ida. 1126 * @ida: the (initialized) ida.
1121 * @id: the id returned by ida_simple_get. 1127 * @id: the id returned by ida_simple_get.
1128 *
1129 * Use to release an id allocated with ida_simple_get().
1130 *
1131 * Compared to ida_remove() this function does its own locking, and should be
1132 * used unless there are special requirements.
1122 */ 1133 */
1123void ida_simple_remove(struct ida *ida, unsigned int id) 1134void ida_simple_remove(struct ida *ida, unsigned int id)
1124{ 1135{
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f0c7f1481bae..25f572303801 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1,4 +1,5 @@
1#include <linux/export.h> 1#include <linux/export.h>
2#include <linux/bvec.h>
2#include <linux/uio.h> 3#include <linux/uio.h>
3#include <linux/pagemap.h> 4#include <linux/pagemap.h>
4#include <linux/slab.h> 5#include <linux/slab.h>
@@ -72,19 +73,21 @@
72} 73}
73 74
74#define iterate_all_kinds(i, n, v, I, B, K) { \ 75#define iterate_all_kinds(i, n, v, I, B, K) { \
75 size_t skip = i->iov_offset; \ 76 if (likely(n)) { \
76 if (unlikely(i->type & ITER_BVEC)) { \ 77 size_t skip = i->iov_offset; \
77 struct bio_vec v; \ 78 if (unlikely(i->type & ITER_BVEC)) { \
78 struct bvec_iter __bi; \ 79 struct bio_vec v; \
79 iterate_bvec(i, n, v, __bi, skip, (B)) \ 80 struct bvec_iter __bi; \
80 } else if (unlikely(i->type & ITER_KVEC)) { \ 81 iterate_bvec(i, n, v, __bi, skip, (B)) \
81 const struct kvec *kvec; \ 82 } else if (unlikely(i->type & ITER_KVEC)) { \
82 struct kvec v; \ 83 const struct kvec *kvec; \
83 iterate_kvec(i, n, v, kvec, skip, (K)) \ 84 struct kvec v; \
84 } else { \ 85 iterate_kvec(i, n, v, kvec, skip, (K)) \
85 const struct iovec *iov; \ 86 } else { \
86 struct iovec v; \ 87 const struct iovec *iov; \
87 iterate_iovec(i, n, v, iov, skip, (I)) \ 88 struct iovec v; \
89 iterate_iovec(i, n, v, iov, skip, (I)) \
90 } \
88 } \ 91 } \
89} 92}
90 93
@@ -568,6 +571,31 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
568} 571}
569EXPORT_SYMBOL(copy_from_iter); 572EXPORT_SYMBOL(copy_from_iter);
570 573
574bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
575{
576 char *to = addr;
577 if (unlikely(i->type & ITER_PIPE)) {
578 WARN_ON(1);
579 return false;
580 }
581 if (unlikely(i->count < bytes))
582 return false;
583
584 iterate_all_kinds(i, bytes, v, ({
585 if (__copy_from_user((to += v.iov_len) - v.iov_len,
586 v.iov_base, v.iov_len))
587 return false;
588 0;}),
589 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
590 v.bv_offset, v.bv_len),
591 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
592 )
593
594 iov_iter_advance(i, bytes);
595 return true;
596}
597EXPORT_SYMBOL(copy_from_iter_full);
598
571size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 599size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
572{ 600{
573 char *to = addr; 601 char *to = addr;
@@ -587,6 +615,30 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
587} 615}
588EXPORT_SYMBOL(copy_from_iter_nocache); 616EXPORT_SYMBOL(copy_from_iter_nocache);
589 617
618bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
619{
620 char *to = addr;
621 if (unlikely(i->type & ITER_PIPE)) {
622 WARN_ON(1);
623 return false;
624 }
625 if (unlikely(i->count < bytes))
626 return false;
627 iterate_all_kinds(i, bytes, v, ({
628 if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
629 v.iov_base, v.iov_len))
630 return false;
631 0;}),
632 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
633 v.bv_offset, v.bv_len),
634 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
635 )
636
637 iov_iter_advance(i, bytes);
638 return true;
639}
640EXPORT_SYMBOL(copy_from_iter_full_nocache);
641
590size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 642size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
591 struct iov_iter *i) 643 struct iov_iter *i)
592{ 644{
@@ -683,10 +735,11 @@ static void pipe_advance(struct iov_iter *i, size_t size)
683 struct pipe_inode_info *pipe = i->pipe; 735 struct pipe_inode_info *pipe = i->pipe;
684 struct pipe_buffer *buf; 736 struct pipe_buffer *buf;
685 int idx = i->idx; 737 int idx = i->idx;
686 size_t off = i->iov_offset; 738 size_t off = i->iov_offset, orig_sz;
687 739
688 if (unlikely(i->count < size)) 740 if (unlikely(i->count < size))
689 size = i->count; 741 size = i->count;
742 orig_sz = size;
690 743
691 if (size) { 744 if (size) {
692 if (off) /* make it relative to the beginning of buffer */ 745 if (off) /* make it relative to the beginning of buffer */
@@ -713,6 +766,7 @@ static void pipe_advance(struct iov_iter *i, size_t size)
713 pipe->nrbufs--; 766 pipe->nrbufs--;
714 } 767 }
715 } 768 }
769 i->count -= orig_sz;
716} 770}
717 771
718void iov_iter_advance(struct iov_iter *i, size_t size) 772void iov_iter_advance(struct iov_iter *i, size_t size)
@@ -785,11 +839,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
785 unsigned long res = 0; 839 unsigned long res = 0;
786 size_t size = i->count; 840 size_t size = i->count;
787 841
788 if (!size)
789 return 0;
790
791 if (unlikely(i->type & ITER_PIPE)) { 842 if (unlikely(i->type & ITER_PIPE)) {
792 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) 843 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
793 return size | i->iov_offset; 844 return size | i->iov_offset;
794 return size; 845 return size;
795 } 846 }
@@ -804,10 +855,8 @@ EXPORT_SYMBOL(iov_iter_alignment);
804 855
805unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 856unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
806{ 857{
807 unsigned long res = 0; 858 unsigned long res = 0;
808 size_t size = i->count; 859 size_t size = i->count;
809 if (!size)
810 return 0;
811 860
812 if (unlikely(i->type & ITER_PIPE)) { 861 if (unlikely(i->type & ITER_PIPE)) {
813 WARN_ON(1); 862 WARN_ON(1);
@@ -822,7 +871,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
822 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 871 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
823 (size != v.iov_len ? size : 0)) 872 (size != v.iov_len ? size : 0))
824 ); 873 );
825 return res; 874 return res;
826} 875}
827EXPORT_SYMBOL(iov_iter_gap_alignment); 876EXPORT_SYMBOL(iov_iter_gap_alignment);
828 877
@@ -856,6 +905,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
856 size_t capacity; 905 size_t capacity;
857 int idx; 906 int idx;
858 907
908 if (!maxsize)
909 return 0;
910
859 if (!sanity(i)) 911 if (!sanity(i))
860 return -EFAULT; 912 return -EFAULT;
861 913
@@ -874,9 +926,6 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
874 if (maxsize > i->count) 926 if (maxsize > i->count)
875 maxsize = i->count; 927 maxsize = i->count;
876 928
877 if (!maxsize)
878 return 0;
879
880 if (unlikely(i->type & ITER_PIPE)) 929 if (unlikely(i->type & ITER_PIPE))
881 return pipe_get_pages(i, pages, maxsize, maxpages, start); 930 return pipe_get_pages(i, pages, maxsize, maxpages, start);
882 iterate_all_kinds(i, maxsize, v, ({ 931 iterate_all_kinds(i, maxsize, v, ({
@@ -923,6 +972,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
923 int idx; 972 int idx;
924 int npages; 973 int npages;
925 974
975 if (!maxsize)
976 return 0;
977
926 if (!sanity(i)) 978 if (!sanity(i))
927 return -EFAULT; 979 return -EFAULT;
928 980
@@ -954,9 +1006,6 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
954 if (maxsize > i->count) 1006 if (maxsize > i->count)
955 maxsize = i->count; 1007 maxsize = i->count;
956 1008
957 if (!maxsize)
958 return 0;
959
960 if (unlikely(i->type & ITER_PIPE)) 1009 if (unlikely(i->type & ITER_PIPE))
961 return pipe_get_pages_alloc(i, pages, maxsize, start); 1010 return pipe_get_pages_alloc(i, pages, maxsize, start);
962 iterate_all_kinds(i, maxsize, v, ({ 1011 iterate_all_kinds(i, maxsize, v, ({
@@ -1006,7 +1055,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1006 } 1055 }
1007 iterate_and_advance(i, bytes, v, ({ 1056 iterate_and_advance(i, bytes, v, ({
1008 int err = 0; 1057 int err = 0;
1009 next = csum_and_copy_from_user(v.iov_base, 1058 next = csum_and_copy_from_user(v.iov_base,
1010 (to += v.iov_len) - v.iov_len, 1059 (to += v.iov_len) - v.iov_len,
1011 v.iov_len, 0, &err); 1060 v.iov_len, 0, &err);
1012 if (!err) { 1061 if (!err) {
@@ -1035,6 +1084,51 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1035} 1084}
1036EXPORT_SYMBOL(csum_and_copy_from_iter); 1085EXPORT_SYMBOL(csum_and_copy_from_iter);
1037 1086
1087bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1088 struct iov_iter *i)
1089{
1090 char *to = addr;
1091 __wsum sum, next;
1092 size_t off = 0;
1093 sum = *csum;
1094 if (unlikely(i->type & ITER_PIPE)) {
1095 WARN_ON(1);
1096 return false;
1097 }
1098 if (unlikely(i->count < bytes))
1099 return false;
1100 iterate_all_kinds(i, bytes, v, ({
1101 int err = 0;
1102 next = csum_and_copy_from_user(v.iov_base,
1103 (to += v.iov_len) - v.iov_len,
1104 v.iov_len, 0, &err);
1105 if (err)
1106 return false;
1107 sum = csum_block_add(sum, next, off);
1108 off += v.iov_len;
1109 0;
1110 }), ({
1111 char *p = kmap_atomic(v.bv_page);
1112 next = csum_partial_copy_nocheck(p + v.bv_offset,
1113 (to += v.bv_len) - v.bv_len,
1114 v.bv_len, 0);
1115 kunmap_atomic(p);
1116 sum = csum_block_add(sum, next, off);
1117 off += v.bv_len;
1118 }),({
1119 next = csum_partial_copy_nocheck(v.iov_base,
1120 (to += v.iov_len) - v.iov_len,
1121 v.iov_len, 0);
1122 sum = csum_block_add(sum, next, off);
1123 off += v.iov_len;
1124 })
1125 )
1126 *csum = sum;
1127 iov_iter_advance(i, bytes);
1128 return true;
1129}
1130EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1131
1038size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, 1132size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
1039 struct iov_iter *i) 1133 struct iov_iter *i)
1040{ 1134{
@@ -1049,7 +1143,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
1049 iterate_and_advance(i, bytes, v, ({ 1143 iterate_and_advance(i, bytes, v, ({
1050 int err = 0; 1144 int err = 0;
1051 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1145 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1052 v.iov_base, 1146 v.iov_base,
1053 v.iov_len, 0, &err); 1147 v.iov_len, 0, &err);
1054 if (!err) { 1148 if (!err) {
1055 sum = csum_block_add(sum, next, off); 1149 sum = csum_block_add(sum, next, off);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index f6c2c1e7779c..9a2b811966eb 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -56,7 +56,7 @@ static const char *kobject_actions[] = {
56 * kobject_action_type - translate action string to numeric type 56 * kobject_action_type - translate action string to numeric type
57 * 57 *
58 * @buf: buffer containing the action string, newline is ignored 58 * @buf: buffer containing the action string, newline is ignored
59 * @len: length of buffer 59 * @count: length of buffer
60 * @type: pointer to the location to store the action type 60 * @type: pointer to the location to store the action type
61 * 61 *
62 * Returns 0 if the action string was recognized. 62 * Returns 0 if the action string was recognized.
@@ -154,8 +154,8 @@ static void cleanup_uevent_env(struct subprocess_info *info)
154/** 154/**
155 * kobject_uevent_env - send an uevent with environmental data 155 * kobject_uevent_env - send an uevent with environmental data
156 * 156 *
157 * @action: action that is happening
158 * @kobj: struct kobject that the action is happening to 157 * @kobj: struct kobject that the action is happening to
158 * @action: action that is happening
159 * @envp_ext: pointer to environmental data 159 * @envp_ext: pointer to environmental data
160 * 160 *
161 * Returns 0 if kobject_uevent_env() is completed with success or the 161 * Returns 0 if kobject_uevent_env() is completed with success or the
@@ -363,8 +363,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_env);
363/** 363/**
364 * kobject_uevent - notify userspace by sending an uevent 364 * kobject_uevent - notify userspace by sending an uevent
365 * 365 *
366 * @action: action that is happening
367 * @kobj: struct kobject that the action is happening to 366 * @kobj: struct kobject that the action is happening to
367 * @action: action that is happening
368 * 368 *
369 * Returns 0 if kobject_uevent() is completed with success or the 369 * Returns 0 if kobject_uevent() is completed with success or the
370 * corresponding error when it fails. 370 * corresponding error when it fails.
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index b8e2080c1a47..bf85e05ce858 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -17,7 +17,7 @@
17#include <linux/math64.h> 17#include <linux/math64.h>
18#include <linux/export.h> 18#include <linux/export.h>
19#include <linux/types.h> 19#include <linux/types.h>
20#include <asm/uaccess.h> 20#include <linux/uaccess.h>
21#include "kstrtox.h" 21#include "kstrtox.h"
22 22
23const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) 23const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 3859bf63561c..7f7bfa55eb6d 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -2,8 +2,7 @@
2 * Copyright 2006, Red Hat, Inc., Dave Jones 2 * Copyright 2006, Red Hat, Inc., Dave Jones
3 * Released under the General Public License (GPL). 3 * Released under the General Public License (GPL).
4 * 4 *
5 * This file contains the linked list implementations for 5 * This file contains the linked list validation for DEBUG_LIST.
6 * DEBUG_LIST.
7 */ 6 */
8 7
9#include <linux/export.h> 8#include <linux/export.h>
@@ -13,88 +12,48 @@
13#include <linux/rculist.h> 12#include <linux/rculist.h>
14 13
15/* 14/*
16 * Insert a new entry between two known consecutive entries. 15 * Check that the data structures for the list manipulations are reasonably
17 * 16 * valid. Failures here indicate memory corruption (and possibly an exploit
18 * This is only for internal list manipulation where we know 17 * attempt).
19 * the prev/next entries already!
20 */ 18 */
21 19
22void __list_add(struct list_head *new, 20bool __list_add_valid(struct list_head *new, struct list_head *prev,
23 struct list_head *prev, 21 struct list_head *next)
24 struct list_head *next)
25{ 22{
26 WARN(next->prev != prev, 23 CHECK_DATA_CORRUPTION(next->prev != prev,
27 "list_add corruption. next->prev should be " 24 "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
28 "prev (%p), but was %p. (next=%p).\n",
29 prev, next->prev, next); 25 prev, next->prev, next);
30 WARN(prev->next != next, 26 CHECK_DATA_CORRUPTION(prev->next != next,
31 "list_add corruption. prev->next should be " 27 "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
32 "next (%p), but was %p. (prev=%p).\n",
33 next, prev->next, prev); 28 next, prev->next, prev);
34 WARN(new == prev || new == next, 29 CHECK_DATA_CORRUPTION(new == prev || new == next,
35 "list_add double add: new=%p, prev=%p, next=%p.\n", 30 "list_add double add: new=%p, prev=%p, next=%p.\n",
36 new, prev, next); 31 new, prev, next);
37 next->prev = new; 32
38 new->next = next; 33 return true;
39 new->prev = prev;
40 WRITE_ONCE(prev->next, new);
41} 34}
42EXPORT_SYMBOL(__list_add); 35EXPORT_SYMBOL(__list_add_valid);
43 36
44void __list_del_entry(struct list_head *entry) 37bool __list_del_entry_valid(struct list_head *entry)
45{ 38{
46 struct list_head *prev, *next; 39 struct list_head *prev, *next;
47 40
48 prev = entry->prev; 41 prev = entry->prev;
49 next = entry->next; 42 next = entry->next;
50 43
51 if (WARN(next == LIST_POISON1, 44 CHECK_DATA_CORRUPTION(next == LIST_POISON1,
52 "list_del corruption, %p->next is LIST_POISON1 (%p)\n", 45 "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
53 entry, LIST_POISON1) || 46 entry, LIST_POISON1);
54 WARN(prev == LIST_POISON2, 47 CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
55 "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", 48 "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
56 entry, LIST_POISON2) || 49 entry, LIST_POISON2);
57 WARN(prev->next != entry, 50 CHECK_DATA_CORRUPTION(prev->next != entry,
58 "list_del corruption. prev->next should be %p, " 51 "list_del corruption. prev->next should be %p, but was %p\n",
59 "but was %p\n", entry, prev->next) || 52 entry, prev->next);
60 WARN(next->prev != entry, 53 CHECK_DATA_CORRUPTION(next->prev != entry,
61 "list_del corruption. next->prev should be %p, " 54 "list_del corruption. next->prev should be %p, but was %p\n",
62 "but was %p\n", entry, next->prev)) 55 entry, next->prev);
63 return; 56 return true;
64
65 __list_del(prev, next);
66}
67EXPORT_SYMBOL(__list_del_entry);
68 57
69/**
70 * list_del - deletes entry from list.
71 * @entry: the element to delete from the list.
72 * Note: list_empty on entry does not return true after this, the entry is
73 * in an undefined state.
74 */
75void list_del(struct list_head *entry)
76{
77 __list_del_entry(entry);
78 entry->next = LIST_POISON1;
79 entry->prev = LIST_POISON2;
80}
81EXPORT_SYMBOL(list_del);
82
83/*
84 * RCU variants.
85 */
86void __list_add_rcu(struct list_head *new,
87 struct list_head *prev, struct list_head *next)
88{
89 WARN(next->prev != prev,
90 "list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
91 prev, next->prev, next);
92 WARN(prev->next != next,
93 "list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
94 next, prev->next, prev);
95 new->next = next;
96 new->prev = prev;
97 rcu_assign_pointer(list_next_rcu(prev), new);
98 next->prev = new;
99} 58}
100EXPORT_SYMBOL(__list_add_rcu); 59EXPORT_SYMBOL(__list_del_entry_valid);
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 872a15a2a637..f3a217ea0388 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -980,23 +980,23 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
980#ifndef CONFIG_PROVE_LOCKING 980#ifndef CONFIG_PROVE_LOCKING
981 if (expected == FAILURE && debug_locks) { 981 if (expected == FAILURE && debug_locks) {
982 expected_testcase_failures++; 982 expected_testcase_failures++;
983 printk("failed|"); 983 pr_cont("failed|");
984 } 984 }
985 else 985 else
986#endif 986#endif
987 if (debug_locks != expected) { 987 if (debug_locks != expected) {
988 unexpected_testcase_failures++; 988 unexpected_testcase_failures++;
989 printk("FAILED|"); 989 pr_cont("FAILED|");
990 990
991 dump_stack(); 991 dump_stack();
992 } else { 992 } else {
993 testcase_successes++; 993 testcase_successes++;
994 printk(" ok |"); 994 pr_cont(" ok |");
995 } 995 }
996 testcase_total++; 996 testcase_total++;
997 997
998 if (debug_locks_verbose) 998 if (debug_locks_verbose)
999 printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n", 999 pr_cont(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
1000 lockclass_mask, debug_locks, expected); 1000 lockclass_mask, debug_locks, expected);
1001 /* 1001 /*
1002 * Some tests (e.g. double-unlock) might corrupt the preemption 1002 * Some tests (e.g. double-unlock) might corrupt the preemption
@@ -1021,26 +1021,26 @@ static inline void print_testname(const char *testname)
1021#define DO_TESTCASE_1(desc, name, nr) \ 1021#define DO_TESTCASE_1(desc, name, nr) \
1022 print_testname(desc"/"#nr); \ 1022 print_testname(desc"/"#nr); \
1023 dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ 1023 dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1024 printk("\n"); 1024 pr_cont("\n");
1025 1025
1026#define DO_TESTCASE_1B(desc, name, nr) \ 1026#define DO_TESTCASE_1B(desc, name, nr) \
1027 print_testname(desc"/"#nr); \ 1027 print_testname(desc"/"#nr); \
1028 dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \ 1028 dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1029 printk("\n"); 1029 pr_cont("\n");
1030 1030
1031#define DO_TESTCASE_3(desc, name, nr) \ 1031#define DO_TESTCASE_3(desc, name, nr) \
1032 print_testname(desc"/"#nr); \ 1032 print_testname(desc"/"#nr); \
1033 dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ 1033 dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \
1034 dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ 1034 dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1035 dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ 1035 dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1036 printk("\n"); 1036 pr_cont("\n");
1037 1037
1038#define DO_TESTCASE_3RW(desc, name, nr) \ 1038#define DO_TESTCASE_3RW(desc, name, nr) \
1039 print_testname(desc"/"#nr); \ 1039 print_testname(desc"/"#nr); \
1040 dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\ 1040 dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
1041 dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ 1041 dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1042 dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ 1042 dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1043 printk("\n"); 1043 pr_cont("\n");
1044 1044
1045#define DO_TESTCASE_6(desc, name) \ 1045#define DO_TESTCASE_6(desc, name) \
1046 print_testname(desc); \ 1046 print_testname(desc); \
@@ -1050,7 +1050,7 @@ static inline void print_testname(const char *testname)
1050 dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ 1050 dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
1051 dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ 1051 dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
1052 dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ 1052 dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
1053 printk("\n"); 1053 pr_cont("\n");
1054 1054
1055#define DO_TESTCASE_6_SUCCESS(desc, name) \ 1055#define DO_TESTCASE_6_SUCCESS(desc, name) \
1056 print_testname(desc); \ 1056 print_testname(desc); \
@@ -1060,7 +1060,7 @@ static inline void print_testname(const char *testname)
1060 dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \ 1060 dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \
1061 dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \ 1061 dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \
1062 dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \ 1062 dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \
1063 printk("\n"); 1063 pr_cont("\n");
1064 1064
1065/* 1065/*
1066 * 'read' variant: rlocks must not trigger. 1066 * 'read' variant: rlocks must not trigger.
@@ -1073,7 +1073,7 @@ static inline void print_testname(const char *testname)
1073 dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ 1073 dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
1074 dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ 1074 dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
1075 dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ 1075 dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
1076 printk("\n"); 1076 pr_cont("\n");
1077 1077
1078#define DO_TESTCASE_2I(desc, name, nr) \ 1078#define DO_TESTCASE_2I(desc, name, nr) \
1079 DO_TESTCASE_1("hard-"desc, name##_hard, nr); \ 1079 DO_TESTCASE_1("hard-"desc, name##_hard, nr); \
@@ -1726,25 +1726,25 @@ static void ww_tests(void)
1726 dotest(ww_test_fail_acquire, SUCCESS, LOCKTYPE_WW); 1726 dotest(ww_test_fail_acquire, SUCCESS, LOCKTYPE_WW);
1727 dotest(ww_test_normal, SUCCESS, LOCKTYPE_WW); 1727 dotest(ww_test_normal, SUCCESS, LOCKTYPE_WW);
1728 dotest(ww_test_unneeded_slow, FAILURE, LOCKTYPE_WW); 1728 dotest(ww_test_unneeded_slow, FAILURE, LOCKTYPE_WW);
1729 printk("\n"); 1729 pr_cont("\n");
1730 1730
1731 print_testname("ww contexts mixing"); 1731 print_testname("ww contexts mixing");
1732 dotest(ww_test_two_contexts, FAILURE, LOCKTYPE_WW); 1732 dotest(ww_test_two_contexts, FAILURE, LOCKTYPE_WW);
1733 dotest(ww_test_diff_class, FAILURE, LOCKTYPE_WW); 1733 dotest(ww_test_diff_class, FAILURE, LOCKTYPE_WW);
1734 printk("\n"); 1734 pr_cont("\n");
1735 1735
1736 print_testname("finishing ww context"); 1736 print_testname("finishing ww context");
1737 dotest(ww_test_context_done_twice, FAILURE, LOCKTYPE_WW); 1737 dotest(ww_test_context_done_twice, FAILURE, LOCKTYPE_WW);
1738 dotest(ww_test_context_unlock_twice, FAILURE, LOCKTYPE_WW); 1738 dotest(ww_test_context_unlock_twice, FAILURE, LOCKTYPE_WW);
1739 dotest(ww_test_context_fini_early, FAILURE, LOCKTYPE_WW); 1739 dotest(ww_test_context_fini_early, FAILURE, LOCKTYPE_WW);
1740 dotest(ww_test_context_lock_after_done, FAILURE, LOCKTYPE_WW); 1740 dotest(ww_test_context_lock_after_done, FAILURE, LOCKTYPE_WW);
1741 printk("\n"); 1741 pr_cont("\n");
1742 1742
1743 print_testname("locking mismatches"); 1743 print_testname("locking mismatches");
1744 dotest(ww_test_object_unlock_twice, FAILURE, LOCKTYPE_WW); 1744 dotest(ww_test_object_unlock_twice, FAILURE, LOCKTYPE_WW);
1745 dotest(ww_test_object_lock_unbalanced, FAILURE, LOCKTYPE_WW); 1745 dotest(ww_test_object_lock_unbalanced, FAILURE, LOCKTYPE_WW);
1746 dotest(ww_test_object_lock_stale_context, FAILURE, LOCKTYPE_WW); 1746 dotest(ww_test_object_lock_stale_context, FAILURE, LOCKTYPE_WW);
1747 printk("\n"); 1747 pr_cont("\n");
1748 1748
1749 print_testname("EDEADLK handling"); 1749 print_testname("EDEADLK handling");
1750 dotest(ww_test_edeadlk_normal, SUCCESS, LOCKTYPE_WW); 1750 dotest(ww_test_edeadlk_normal, SUCCESS, LOCKTYPE_WW);
@@ -1757,11 +1757,11 @@ static void ww_tests(void)
1757 dotest(ww_test_edeadlk_acquire_more_edeadlk_slow, FAILURE, LOCKTYPE_WW); 1757 dotest(ww_test_edeadlk_acquire_more_edeadlk_slow, FAILURE, LOCKTYPE_WW);
1758 dotest(ww_test_edeadlk_acquire_wrong, FAILURE, LOCKTYPE_WW); 1758 dotest(ww_test_edeadlk_acquire_wrong, FAILURE, LOCKTYPE_WW);
1759 dotest(ww_test_edeadlk_acquire_wrong_slow, FAILURE, LOCKTYPE_WW); 1759 dotest(ww_test_edeadlk_acquire_wrong_slow, FAILURE, LOCKTYPE_WW);
1760 printk("\n"); 1760 pr_cont("\n");
1761 1761
1762 print_testname("spinlock nest unlocked"); 1762 print_testname("spinlock nest unlocked");
1763 dotest(ww_test_spin_nest_unlocked, FAILURE, LOCKTYPE_WW); 1763 dotest(ww_test_spin_nest_unlocked, FAILURE, LOCKTYPE_WW);
1764 printk("\n"); 1764 pr_cont("\n");
1765 1765
1766 printk(" -----------------------------------------------------\n"); 1766 printk(" -----------------------------------------------------\n");
1767 printk(" |block | try |context|\n"); 1767 printk(" |block | try |context|\n");
@@ -1771,25 +1771,25 @@ static void ww_tests(void)
1771 dotest(ww_test_context_block, FAILURE, LOCKTYPE_WW); 1771 dotest(ww_test_context_block, FAILURE, LOCKTYPE_WW);
1772 dotest(ww_test_context_try, SUCCESS, LOCKTYPE_WW); 1772 dotest(ww_test_context_try, SUCCESS, LOCKTYPE_WW);
1773 dotest(ww_test_context_context, SUCCESS, LOCKTYPE_WW); 1773 dotest(ww_test_context_context, SUCCESS, LOCKTYPE_WW);
1774 printk("\n"); 1774 pr_cont("\n");
1775 1775
1776 print_testname("try"); 1776 print_testname("try");
1777 dotest(ww_test_try_block, FAILURE, LOCKTYPE_WW); 1777 dotest(ww_test_try_block, FAILURE, LOCKTYPE_WW);
1778 dotest(ww_test_try_try, SUCCESS, LOCKTYPE_WW); 1778 dotest(ww_test_try_try, SUCCESS, LOCKTYPE_WW);
1779 dotest(ww_test_try_context, FAILURE, LOCKTYPE_WW); 1779 dotest(ww_test_try_context, FAILURE, LOCKTYPE_WW);
1780 printk("\n"); 1780 pr_cont("\n");
1781 1781
1782 print_testname("block"); 1782 print_testname("block");
1783 dotest(ww_test_block_block, FAILURE, LOCKTYPE_WW); 1783 dotest(ww_test_block_block, FAILURE, LOCKTYPE_WW);
1784 dotest(ww_test_block_try, SUCCESS, LOCKTYPE_WW); 1784 dotest(ww_test_block_try, SUCCESS, LOCKTYPE_WW);
1785 dotest(ww_test_block_context, FAILURE, LOCKTYPE_WW); 1785 dotest(ww_test_block_context, FAILURE, LOCKTYPE_WW);
1786 printk("\n"); 1786 pr_cont("\n");
1787 1787
1788 print_testname("spinlock"); 1788 print_testname("spinlock");
1789 dotest(ww_test_spin_block, FAILURE, LOCKTYPE_WW); 1789 dotest(ww_test_spin_block, FAILURE, LOCKTYPE_WW);
1790 dotest(ww_test_spin_try, SUCCESS, LOCKTYPE_WW); 1790 dotest(ww_test_spin_try, SUCCESS, LOCKTYPE_WW);
1791 dotest(ww_test_spin_context, FAILURE, LOCKTYPE_WW); 1791 dotest(ww_test_spin_context, FAILURE, LOCKTYPE_WW);
1792 printk("\n"); 1792 pr_cont("\n");
1793} 1793}
1794 1794
1795void locking_selftest(void) 1795void locking_selftest(void)
@@ -1829,32 +1829,32 @@ void locking_selftest(void)
1829 1829
1830 printk(" --------------------------------------------------------------------------\n"); 1830 printk(" --------------------------------------------------------------------------\n");
1831 print_testname("recursive read-lock"); 1831 print_testname("recursive read-lock");
1832 printk(" |"); 1832 pr_cont(" |");
1833 dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK); 1833 dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
1834 printk(" |"); 1834 pr_cont(" |");
1835 dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM); 1835 dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
1836 printk("\n"); 1836 pr_cont("\n");
1837 1837
1838 print_testname("recursive read-lock #2"); 1838 print_testname("recursive read-lock #2");
1839 printk(" |"); 1839 pr_cont(" |");
1840 dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK); 1840 dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
1841 printk(" |"); 1841 pr_cont(" |");
1842 dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM); 1842 dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
1843 printk("\n"); 1843 pr_cont("\n");
1844 1844
1845 print_testname("mixed read-write-lock"); 1845 print_testname("mixed read-write-lock");
1846 printk(" |"); 1846 pr_cont(" |");
1847 dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK); 1847 dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
1848 printk(" |"); 1848 pr_cont(" |");
1849 dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM); 1849 dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
1850 printk("\n"); 1850 pr_cont("\n");
1851 1851
1852 print_testname("mixed write-read-lock"); 1852 print_testname("mixed write-read-lock");
1853 printk(" |"); 1853 pr_cont(" |");
1854 dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK); 1854 dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
1855 printk(" |"); 1855 pr_cont(" |");
1856 dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM); 1856 dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
1857 printk("\n"); 1857 pr_cont("\n");
1858 1858
1859 printk(" --------------------------------------------------------------------------\n"); 1859 printk(" --------------------------------------------------------------------------\n");
1860 1860
diff --git a/lib/lockref.c b/lib/lockref.c
index 5a92189ad711..c4bfcb8836cd 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -20,7 +20,7 @@
20 if (likely(old.lock_count == prev.lock_count)) { \ 20 if (likely(old.lock_count == prev.lock_count)) { \
21 SUCCESS; \ 21 SUCCESS; \
22 } \ 22 } \
23 cpu_relax_lowlatency(); \ 23 cpu_relax(); \
24 } \ 24 } \
25} while (0) 25} while (0)
26 26
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
index 5464c8744ea9..e24388a863a7 100644
--- a/lib/mpi/mpi-pow.c
+++ b/lib/mpi/mpi-pow.c
@@ -64,8 +64,13 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
64 if (!esize) { 64 if (!esize) {
65 /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 65 /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
66 * depending on if MOD equals 1. */ 66 * depending on if MOD equals 1. */
67 rp[0] = 1;
68 res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; 67 res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
68 if (res->nlimbs) {
69 if (mpi_resize(res, 1) < 0)
70 goto enomem;
71 rp = res->d;
72 rp[0] = 1;
73 }
69 res->sign = 0; 74 res->sign = 0;
70 goto leave; 75 goto leave;
71 } 76 }
diff --git a/lib/nlattr.c b/lib/nlattr.c
index fce1e9afc6d9..b42b8577fc23 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -14,7 +14,7 @@
14#include <linux/types.h> 14#include <linux/types.h>
15#include <net/netlink.h> 15#include <net/netlink.h>
16 16
17static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = { 17static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
18 [NLA_U8] = sizeof(u8), 18 [NLA_U8] = sizeof(u8),
19 [NLA_U16] = sizeof(u16), 19 [NLA_U16] = sizeof(u16),
20 [NLA_U32] = sizeof(u32), 20 [NLA_U32] = sizeof(u32),
diff --git a/lib/parser.c b/lib/parser.c
index b6d11631231b..3278958b472a 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -152,6 +152,36 @@ static int match_number(substring_t *s, int *result, int base)
152} 152}
153 153
154/** 154/**
155 * match_u64int: scan a number in the given base from a substring_t
156 * @s: substring to be scanned
157 * @result: resulting u64 on success
158 * @base: base to use when converting string
159 *
160 * Description: Given a &substring_t and a base, attempts to parse the substring
161 * as a number in that base. On success, sets @result to the integer represented
162 * by the string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
163 */
164static int match_u64int(substring_t *s, u64 *result, int base)
165{
166 char *buf;
167 int ret;
168 u64 val;
169 size_t len = s->to - s->from;
170
171 buf = kmalloc(len + 1, GFP_KERNEL);
172 if (!buf)
173 return -ENOMEM;
174 memcpy(buf, s->from, len);
175 buf[len] = '\0';
176
177 ret = kstrtoull(buf, base, &val);
178 if (!ret)
179 *result = val;
180 kfree(buf);
181 return ret;
182}
183
184/**
155 * match_int: - scan a decimal representation of an integer from a substring_t 185 * match_int: - scan a decimal representation of an integer from a substring_t
156 * @s: substring_t to be scanned 186 * @s: substring_t to be scanned
157 * @result: resulting integer on success 187 * @result: resulting integer on success
@@ -167,6 +197,23 @@ int match_int(substring_t *s, int *result)
167EXPORT_SYMBOL(match_int); 197EXPORT_SYMBOL(match_int);
168 198
169/** 199/**
200 * match_u64: - scan a decimal representation of a u64 from
201 * a substring_t
202 * @s: substring_t to be scanned
203 * @result: resulting unsigned long long on success
204 *
205 * Description: Attempts to parse the &substring_t @s as a long decimal
206 * integer. On success, sets @result to the integer represented by the
207 * string and returns 0.
208 * Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
209 */
210int match_u64(substring_t *s, u64 *result)
211{
212 return match_u64int(s, result, 0);
213}
214EXPORT_SYMBOL(match_u64);
215
216/**
170 * match_octal: - scan an octal representation of an integer from a substring_t 217 * match_octal: - scan an octal representation of an integer from a substring_t
171 * @s: substring_t to be scanned 218 * @s: substring_t to be scanned
172 * @result: resulting integer on success 219 * @result: resulting integer on success
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 72d36113ccaa..c8cebb137076 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -158,25 +158,21 @@ EXPORT_SYMBOL(percpu_counter_destroy);
158int percpu_counter_batch __read_mostly = 32; 158int percpu_counter_batch __read_mostly = 32;
159EXPORT_SYMBOL(percpu_counter_batch); 159EXPORT_SYMBOL(percpu_counter_batch);
160 160
161static void compute_batch_value(void) 161static int compute_batch_value(unsigned int cpu)
162{ 162{
163 int nr = num_online_cpus(); 163 int nr = num_online_cpus();
164 164
165 percpu_counter_batch = max(32, nr*2); 165 percpu_counter_batch = max(32, nr*2);
166 return 0;
166} 167}
167 168
168static int percpu_counter_hotcpu_callback(struct notifier_block *nb, 169static int percpu_counter_cpu_dead(unsigned int cpu)
169 unsigned long action, void *hcpu)
170{ 170{
171#ifdef CONFIG_HOTPLUG_CPU 171#ifdef CONFIG_HOTPLUG_CPU
172 unsigned int cpu;
173 struct percpu_counter *fbc; 172 struct percpu_counter *fbc;
174 173
175 compute_batch_value(); 174 compute_batch_value(cpu);
176 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
177 return NOTIFY_OK;
178 175
179 cpu = (unsigned long)hcpu;
180 spin_lock_irq(&percpu_counters_lock); 176 spin_lock_irq(&percpu_counters_lock);
181 list_for_each_entry(fbc, &percpu_counters, list) { 177 list_for_each_entry(fbc, &percpu_counters, list) {
182 s32 *pcount; 178 s32 *pcount;
@@ -190,7 +186,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
190 } 186 }
191 spin_unlock_irq(&percpu_counters_lock); 187 spin_unlock_irq(&percpu_counters_lock);
192#endif 188#endif
193 return NOTIFY_OK; 189 return 0;
194} 190}
195 191
196/* 192/*
@@ -222,8 +218,15 @@ EXPORT_SYMBOL(__percpu_counter_compare);
222 218
223static int __init percpu_counter_startup(void) 219static int __init percpu_counter_startup(void)
224{ 220{
225 compute_batch_value(); 221 int ret;
226 hotcpu_notifier(percpu_counter_hotcpu_callback, 0); 222
223 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online",
224 compute_batch_value, NULL);
225 WARN_ON(ret < 0);
226 ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD,
227 "lib/percpu_cnt:dead", NULL,
228 percpu_counter_cpu_dead);
229 WARN_ON(ret < 0);
227 return 0; 230 return 0;
228} 231}
229module_init(percpu_counter_startup); 232module_init(percpu_counter_startup);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e6d552c40dd..6f382e07de77 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,6 +22,7 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#include <linux/cpu.h>
25#include <linux/errno.h> 26#include <linux/errno.h>
26#include <linux/init.h> 27#include <linux/init.h>
27#include <linux/kernel.h> 28#include <linux/kernel.h>
@@ -30,7 +31,6 @@
30#include <linux/percpu.h> 31#include <linux/percpu.h>
31#include <linux/slab.h> 32#include <linux/slab.h>
32#include <linux/kmemleak.h> 33#include <linux/kmemleak.h>
33#include <linux/notifier.h>
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/bitops.h> 36#include <linux/bitops.h>
@@ -69,6 +69,11 @@ struct radix_tree_preload {
69}; 69};
70static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 70static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
71 71
72static inline struct radix_tree_node *entry_to_node(void *ptr)
73{
74 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
75}
76
72static inline void *node_to_entry(void *ptr) 77static inline void *node_to_entry(void *ptr)
73{ 78{
74 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 79 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
@@ -191,13 +196,12 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
191 * Returns next bit offset, or size if nothing found. 196 * Returns next bit offset, or size if nothing found.
192 */ 197 */
193static __always_inline unsigned long 198static __always_inline unsigned long
194radix_tree_find_next_bit(const unsigned long *addr, 199radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
195 unsigned long size, unsigned long offset) 200 unsigned long offset)
196{ 201{
197 if (!__builtin_constant_p(size)) 202 const unsigned long *addr = node->tags[tag];
198 return find_next_bit(addr, size, offset);
199 203
200 if (offset < size) { 204 if (offset < RADIX_TREE_MAP_SIZE) {
201 unsigned long tmp; 205 unsigned long tmp;
202 206
203 addr += offset / BITS_PER_LONG; 207 addr += offset / BITS_PER_LONG;
@@ -205,14 +209,32 @@ radix_tree_find_next_bit(const unsigned long *addr,
205 if (tmp) 209 if (tmp)
206 return __ffs(tmp) + offset; 210 return __ffs(tmp) + offset;
207 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 211 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
208 while (offset < size) { 212 while (offset < RADIX_TREE_MAP_SIZE) {
209 tmp = *++addr; 213 tmp = *++addr;
210 if (tmp) 214 if (tmp)
211 return __ffs(tmp) + offset; 215 return __ffs(tmp) + offset;
212 offset += BITS_PER_LONG; 216 offset += BITS_PER_LONG;
213 } 217 }
214 } 218 }
215 return size; 219 return RADIX_TREE_MAP_SIZE;
220}
221
222static unsigned int iter_offset(const struct radix_tree_iter *iter)
223{
224 return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
225}
226
227/*
228 * The maximum index which can be stored in a radix tree
229 */
230static inline unsigned long shift_maxindex(unsigned int shift)
231{
232 return (RADIX_TREE_MAP_SIZE << shift) - 1;
233}
234
235static inline unsigned long node_maxindex(struct radix_tree_node *node)
236{
237 return shift_maxindex(node->shift);
216} 238}
217 239
218#ifndef __KERNEL__ 240#ifndef __KERNEL__
@@ -220,10 +242,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
220{ 242{
221 unsigned long i; 243 unsigned long i;
222 244
223 pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n", 245 pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n",
224 node, node->offset, 246 node, node->offset, index, index | node_maxindex(node),
247 node->parent,
225 node->tags[0][0], node->tags[1][0], node->tags[2][0], 248 node->tags[0][0], node->tags[1][0], node->tags[2][0],
226 node->shift, node->count, node->parent); 249 node->shift, node->count, node->exceptional);
227 250
228 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { 251 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
229 unsigned long first = index | (i << node->shift); 252 unsigned long first = index | (i << node->shift);
@@ -231,14 +254,16 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
231 void *entry = node->slots[i]; 254 void *entry = node->slots[i];
232 if (!entry) 255 if (!entry)
233 continue; 256 continue;
234 if (is_sibling_entry(node, entry)) { 257 if (entry == RADIX_TREE_RETRY) {
235 pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", 258 pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n",
236 entry, i, 259 i, first, last, node);
237 *(void **)entry_to_node(entry),
238 first, last);
239 } else if (!radix_tree_is_internal_node(entry)) { 260 } else if (!radix_tree_is_internal_node(entry)) {
240 pr_debug("radix entry %p offset %ld indices %ld-%ld\n", 261 pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
241 entry, i, first, last); 262 entry, i, first, last, node);
263 } else if (is_sibling_entry(node, entry)) {
264 pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
265 entry, i, first, last, node,
266 *(void **)entry_to_node(entry));
242 } else { 267 } else {
243 dump_node(entry_to_node(entry), first); 268 dump_node(entry_to_node(entry), first);
244 } 269 }
@@ -262,7 +287,10 @@ static void radix_tree_dump(struct radix_tree_root *root)
262 * that the caller has pinned this thread of control to the current CPU. 287 * that the caller has pinned this thread of control to the current CPU.
263 */ 288 */
264static struct radix_tree_node * 289static struct radix_tree_node *
265radix_tree_node_alloc(struct radix_tree_root *root) 290radix_tree_node_alloc(struct radix_tree_root *root,
291 struct radix_tree_node *parent,
292 unsigned int shift, unsigned int offset,
293 unsigned int count, unsigned int exceptional)
266{ 294{
267 struct radix_tree_node *ret = NULL; 295 struct radix_tree_node *ret = NULL;
268 gfp_t gfp_mask = root_gfp_mask(root); 296 gfp_t gfp_mask = root_gfp_mask(root);
@@ -307,6 +335,13 @@ radix_tree_node_alloc(struct radix_tree_root *root)
307 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 335 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
308out: 336out:
309 BUG_ON(radix_tree_is_internal_node(ret)); 337 BUG_ON(radix_tree_is_internal_node(ret));
338 if (ret) {
339 ret->parent = parent;
340 ret->shift = shift;
341 ret->offset = offset;
342 ret->count = count;
343 ret->exceptional = exceptional;
344 }
310 return ret; 345 return ret;
311} 346}
312 347
@@ -314,18 +349,15 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
314{ 349{
315 struct radix_tree_node *node = 350 struct radix_tree_node *node =
316 container_of(head, struct radix_tree_node, rcu_head); 351 container_of(head, struct radix_tree_node, rcu_head);
317 int i;
318 352
319 /* 353 /*
320 * must only free zeroed nodes into the slab. radix_tree_shrink 354 * Must only free zeroed nodes into the slab. We can be left with
321 * can leave us with a non-NULL entry in the first slot, so clear 355 * non-NULL entries by radix_tree_free_nodes, so clear the entries
322 * that here to make sure. 356 * and tags here.
323 */ 357 */
324 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) 358 memset(node->slots, 0, sizeof(node->slots));
325 tag_clear(node, i, 0); 359 memset(node->tags, 0, sizeof(node->tags));
326 360 INIT_LIST_HEAD(&node->private_list);
327 node->slots[0] = NULL;
328 node->count = 0;
329 361
330 kmem_cache_free(radix_tree_node_cachep, node); 362 kmem_cache_free(radix_tree_node_cachep, node);
331} 363}
@@ -345,7 +377,7 @@ radix_tree_node_free(struct radix_tree_node *node)
345 * To make use of this facility, the radix tree must be initialised without 377 * To make use of this facility, the radix tree must be initialised without
346 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 378 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
347 */ 379 */
348static int __radix_tree_preload(gfp_t gfp_mask, int nr) 380static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
349{ 381{
350 struct radix_tree_preload *rtp; 382 struct radix_tree_preload *rtp;
351 struct radix_tree_node *node; 383 struct radix_tree_node *node;
@@ -411,6 +443,28 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
411} 443}
412EXPORT_SYMBOL(radix_tree_maybe_preload); 444EXPORT_SYMBOL(radix_tree_maybe_preload);
413 445
446#ifdef CONFIG_RADIX_TREE_MULTIORDER
447/*
448 * Preload with enough objects to ensure that we can split a single entry
449 * of order @old_order into many entries of size @new_order
450 */
451int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
452 gfp_t gfp_mask)
453{
454 unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
455 unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
456 (new_order / RADIX_TREE_MAP_SHIFT);
457 unsigned nr = 0;
458
459 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
460 BUG_ON(new_order >= old_order);
461
462 while (layers--)
463 nr = nr * RADIX_TREE_MAP_SIZE + 1;
464 return __radix_tree_preload(gfp_mask, top * nr);
465}
466#endif
467
414/* 468/*
415 * The same as function above, but preload number of nodes required to insert 469 * The same as function above, but preload number of nodes required to insert
416 * (1 << order) continuous naturally-aligned elements. 470 * (1 << order) continuous naturally-aligned elements.
@@ -456,19 +510,6 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
456 return __radix_tree_preload(gfp_mask, nr_nodes); 510 return __radix_tree_preload(gfp_mask, nr_nodes);
457} 511}
458 512
459/*
460 * The maximum index which can be stored in a radix tree
461 */
462static inline unsigned long shift_maxindex(unsigned int shift)
463{
464 return (RADIX_TREE_MAP_SIZE << shift) - 1;
465}
466
467static inline unsigned long node_maxindex(struct radix_tree_node *node)
468{
469 return shift_maxindex(node->shift);
470}
471
472static unsigned radix_tree_load_root(struct radix_tree_root *root, 513static unsigned radix_tree_load_root(struct radix_tree_root *root,
473 struct radix_tree_node **nodep, unsigned long *maxindex) 514 struct radix_tree_node **nodep, unsigned long *maxindex)
474{ 515{
@@ -506,8 +547,8 @@ static int radix_tree_extend(struct radix_tree_root *root,
506 goto out; 547 goto out;
507 548
508 do { 549 do {
509 struct radix_tree_node *node = radix_tree_node_alloc(root); 550 struct radix_tree_node *node = radix_tree_node_alloc(root,
510 551 NULL, shift, 0, 1, 0);
511 if (!node) 552 if (!node)
512 return -ENOMEM; 553 return -ENOMEM;
513 554
@@ -518,12 +559,12 @@ static int radix_tree_extend(struct radix_tree_root *root,
518 } 559 }
519 560
520 BUG_ON(shift > BITS_PER_LONG); 561 BUG_ON(shift > BITS_PER_LONG);
521 node->shift = shift; 562 if (radix_tree_is_internal_node(slot)) {
522 node->offset = 0;
523 node->count = 1;
524 node->parent = NULL;
525 if (radix_tree_is_internal_node(slot))
526 entry_to_node(slot)->parent = node; 563 entry_to_node(slot)->parent = node;
564 } else if (radix_tree_exceptional_entry(slot)) {
565 /* Moving an exceptional root->rnode to a node */
566 node->exceptional = 1;
567 }
527 node->slots[0] = slot; 568 node->slots[0] = slot;
528 slot = node_to_entry(node); 569 slot = node_to_entry(node);
529 rcu_assign_pointer(root->rnode, slot); 570 rcu_assign_pointer(root->rnode, slot);
@@ -534,6 +575,104 @@ out:
534} 575}
535 576
536/** 577/**
578 * radix_tree_shrink - shrink radix tree to minimum height
579 * @root radix tree root
580 */
581static inline void radix_tree_shrink(struct radix_tree_root *root,
582 radix_tree_update_node_t update_node,
583 void *private)
584{
585 for (;;) {
586 struct radix_tree_node *node = root->rnode;
587 struct radix_tree_node *child;
588
589 if (!radix_tree_is_internal_node(node))
590 break;
591 node = entry_to_node(node);
592
593 /*
594 * The candidate node has more than one child, or its child
595 * is not at the leftmost slot, or the child is a multiorder
596 * entry, we cannot shrink.
597 */
598 if (node->count != 1)
599 break;
600 child = node->slots[0];
601 if (!child)
602 break;
603 if (!radix_tree_is_internal_node(child) && node->shift)
604 break;
605
606 if (radix_tree_is_internal_node(child))
607 entry_to_node(child)->parent = NULL;
608
609 /*
610 * We don't need rcu_assign_pointer(), since we are simply
611 * moving the node from one part of the tree to another: if it
612 * was safe to dereference the old pointer to it
613 * (node->slots[0]), it will be safe to dereference the new
614 * one (root->rnode) as far as dependent read barriers go.
615 */
616 root->rnode = child;
617
618 /*
619 * We have a dilemma here. The node's slot[0] must not be
620 * NULLed in case there are concurrent lookups expecting to
621 * find the item. However if this was a bottom-level node,
622 * then it may be subject to the slot pointer being visible
623 * to callers dereferencing it. If item corresponding to
624 * slot[0] is subsequently deleted, these callers would expect
625 * their slot to become empty sooner or later.
626 *
627 * For example, lockless pagecache will look up a slot, deref
628 * the page pointer, and if the page has 0 refcount it means it
629 * was concurrently deleted from pagecache so try the deref
630 * again. Fortunately there is already a requirement for logic
631 * to retry the entire slot lookup -- the indirect pointer
632 * problem (replacing direct root node with an indirect pointer
633 * also results in a stale slot). So tag the slot as indirect
634 * to force callers to retry.
635 */
636 node->count = 0;
637 if (!radix_tree_is_internal_node(child)) {
638 node->slots[0] = RADIX_TREE_RETRY;
639 if (update_node)
640 update_node(node, private);
641 }
642
643 radix_tree_node_free(node);
644 }
645}
646
647static void delete_node(struct radix_tree_root *root,
648 struct radix_tree_node *node,
649 radix_tree_update_node_t update_node, void *private)
650{
651 do {
652 struct radix_tree_node *parent;
653
654 if (node->count) {
655 if (node == entry_to_node(root->rnode))
656 radix_tree_shrink(root, update_node, private);
657 return;
658 }
659
660 parent = node->parent;
661 if (parent) {
662 parent->slots[node->offset] = NULL;
663 parent->count--;
664 } else {
665 root_tag_clear_all(root);
666 root->rnode = NULL;
667 }
668
669 radix_tree_node_free(node);
670
671 node = parent;
672 } while (node);
673}
674
675/**
537 * __radix_tree_create - create a slot in a radix tree 676 * __radix_tree_create - create a slot in a radix tree
538 * @root: radix tree root 677 * @root: radix tree root
539 * @index: index key 678 * @index: index key
@@ -563,26 +702,24 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
563 shift = radix_tree_load_root(root, &child, &maxindex); 702 shift = radix_tree_load_root(root, &child, &maxindex);
564 703
565 /* Make sure the tree is high enough. */ 704 /* Make sure the tree is high enough. */
705 if (order > 0 && max == ((1UL << order) - 1))
706 max++;
566 if (max > maxindex) { 707 if (max > maxindex) {
567 int error = radix_tree_extend(root, max, shift); 708 int error = radix_tree_extend(root, max, shift);
568 if (error < 0) 709 if (error < 0)
569 return error; 710 return error;
570 shift = error; 711 shift = error;
571 child = root->rnode; 712 child = root->rnode;
572 if (order == shift)
573 shift += RADIX_TREE_MAP_SHIFT;
574 } 713 }
575 714
576 while (shift > order) { 715 while (shift > order) {
577 shift -= RADIX_TREE_MAP_SHIFT; 716 shift -= RADIX_TREE_MAP_SHIFT;
578 if (child == NULL) { 717 if (child == NULL) {
579 /* Have to add a child node. */ 718 /* Have to add a child node. */
580 child = radix_tree_node_alloc(root); 719 child = radix_tree_node_alloc(root, node, shift,
720 offset, 0, 0);
581 if (!child) 721 if (!child)
582 return -ENOMEM; 722 return -ENOMEM;
583 child->shift = shift;
584 child->offset = offset;
585 child->parent = node;
586 rcu_assign_pointer(*slot, node_to_entry(child)); 723 rcu_assign_pointer(*slot, node_to_entry(child));
587 if (node) 724 if (node)
588 node->count++; 725 node->count++;
@@ -595,31 +732,125 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
595 slot = &node->slots[offset]; 732 slot = &node->slots[offset];
596 } 733 }
597 734
735 if (nodep)
736 *nodep = node;
737 if (slotp)
738 *slotp = slot;
739 return 0;
740}
741
598#ifdef CONFIG_RADIX_TREE_MULTIORDER 742#ifdef CONFIG_RADIX_TREE_MULTIORDER
599 /* Insert pointers to the canonical entry */ 743/*
600 if (order > shift) { 744 * Free any nodes below this node. The tree is presumed to not need
601 unsigned i, n = 1 << (order - shift); 745 * shrinking, and any user data in the tree is presumed to not need a
746 * destructor called on it. If we need to add a destructor, we can
747 * add that functionality later. Note that we may not clear tags or
748 * slots from the tree as an RCU walker may still have a pointer into
749 * this subtree. We could replace the entries with RADIX_TREE_RETRY,
750 * but we'll still have to clear those in rcu_free.
751 */
752static void radix_tree_free_nodes(struct radix_tree_node *node)
753{
754 unsigned offset = 0;
755 struct radix_tree_node *child = entry_to_node(node);
756
757 for (;;) {
758 void *entry = child->slots[offset];
759 if (radix_tree_is_internal_node(entry) &&
760 !is_sibling_entry(child, entry)) {
761 child = entry_to_node(entry);
762 offset = 0;
763 continue;
764 }
765 offset++;
766 while (offset == RADIX_TREE_MAP_SIZE) {
767 struct radix_tree_node *old = child;
768 offset = child->offset + 1;
769 child = child->parent;
770 radix_tree_node_free(old);
771 if (old == entry_to_node(node))
772 return;
773 }
774 }
775}
776
777static inline int insert_entries(struct radix_tree_node *node, void **slot,
778 void *item, unsigned order, bool replace)
779{
780 struct radix_tree_node *child;
781 unsigned i, n, tag, offset, tags = 0;
782
783 if (node) {
784 if (order > node->shift)
785 n = 1 << (order - node->shift);
786 else
787 n = 1;
788 offset = get_slot_offset(node, slot);
789 } else {
790 n = 1;
791 offset = 0;
792 }
793
794 if (n > 1) {
602 offset = offset & ~(n - 1); 795 offset = offset & ~(n - 1);
603 slot = &node->slots[offset]; 796 slot = &node->slots[offset];
604 child = node_to_entry(slot); 797 }
605 for (i = 0; i < n; i++) { 798 child = node_to_entry(slot);
606 if (slot[i]) 799
800 for (i = 0; i < n; i++) {
801 if (slot[i]) {
802 if (replace) {
803 node->count--;
804 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
805 if (tag_get(node, tag, offset + i))
806 tags |= 1 << tag;
807 } else
607 return -EEXIST; 808 return -EEXIST;
608 } 809 }
810 }
609 811
610 for (i = 1; i < n; i++) { 812 for (i = 0; i < n; i++) {
813 struct radix_tree_node *old = slot[i];
814 if (i) {
611 rcu_assign_pointer(slot[i], child); 815 rcu_assign_pointer(slot[i], child);
612 node->count++; 816 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
817 if (tags & (1 << tag))
818 tag_clear(node, tag, offset + i);
819 } else {
820 rcu_assign_pointer(slot[i], item);
821 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
822 if (tags & (1 << tag))
823 tag_set(node, tag, offset);
613 } 824 }
825 if (radix_tree_is_internal_node(old) &&
826 !is_sibling_entry(node, old) &&
827 (old != RADIX_TREE_RETRY))
828 radix_tree_free_nodes(old);
829 if (radix_tree_exceptional_entry(old))
830 node->exceptional--;
614 } 831 }
615#endif 832 if (node) {
616 833 node->count += n;
617 if (nodep) 834 if (radix_tree_exceptional_entry(item))
618 *nodep = node; 835 node->exceptional += n;
619 if (slotp) 836 }
620 *slotp = slot; 837 return n;
621 return 0; 838}
839#else
840static inline int insert_entries(struct radix_tree_node *node, void **slot,
841 void *item, unsigned order, bool replace)
842{
843 if (*slot)
844 return -EEXIST;
845 rcu_assign_pointer(*slot, item);
846 if (node) {
847 node->count++;
848 if (radix_tree_exceptional_entry(item))
849 node->exceptional++;
850 }
851 return 1;
622} 852}
853#endif
623 854
624/** 855/**
625 * __radix_tree_insert - insert into a radix tree 856 * __radix_tree_insert - insert into a radix tree
@@ -642,13 +873,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
642 error = __radix_tree_create(root, index, order, &node, &slot); 873 error = __radix_tree_create(root, index, order, &node, &slot);
643 if (error) 874 if (error)
644 return error; 875 return error;
645 if (*slot != NULL) 876
646 return -EEXIST; 877 error = insert_entries(node, slot, item, order, false);
647 rcu_assign_pointer(*slot, item); 878 if (error < 0)
879 return error;
648 880
649 if (node) { 881 if (node) {
650 unsigned offset = get_slot_offset(node, slot); 882 unsigned offset = get_slot_offset(node, slot);
651 node->count++;
652 BUG_ON(tag_get(node, 0, offset)); 883 BUG_ON(tag_get(node, 0, offset));
653 BUG_ON(tag_get(node, 1, offset)); 884 BUG_ON(tag_get(node, 1, offset));
654 BUG_ON(tag_get(node, 2, offset)); 885 BUG_ON(tag_get(node, 2, offset));
@@ -746,6 +977,287 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
746} 977}
747EXPORT_SYMBOL(radix_tree_lookup); 978EXPORT_SYMBOL(radix_tree_lookup);
748 979
980static inline int slot_count(struct radix_tree_node *node,
981 void **slot)
982{
983 int n = 1;
984#ifdef CONFIG_RADIX_TREE_MULTIORDER
985 void *ptr = node_to_entry(slot);
986 unsigned offset = get_slot_offset(node, slot);
987 int i;
988
989 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
990 if (node->slots[offset + i] != ptr)
991 break;
992 n++;
993 }
994#endif
995 return n;
996}
997
998static void replace_slot(struct radix_tree_root *root,
999 struct radix_tree_node *node,
1000 void **slot, void *item,
1001 bool warn_typeswitch)
1002{
1003 void *old = rcu_dereference_raw(*slot);
1004 int count, exceptional;
1005
1006 WARN_ON_ONCE(radix_tree_is_internal_node(item));
1007
1008 count = !!item - !!old;
1009 exceptional = !!radix_tree_exceptional_entry(item) -
1010 !!radix_tree_exceptional_entry(old);
1011
1012 WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
1013
1014 if (node) {
1015 node->count += count;
1016 if (exceptional) {
1017 exceptional *= slot_count(node, slot);
1018 node->exceptional += exceptional;
1019 }
1020 }
1021
1022 rcu_assign_pointer(*slot, item);
1023}
1024
1025static inline void delete_sibling_entries(struct radix_tree_node *node,
1026 void **slot)
1027{
1028#ifdef CONFIG_RADIX_TREE_MULTIORDER
1029 bool exceptional = radix_tree_exceptional_entry(*slot);
1030 void *ptr = node_to_entry(slot);
1031 unsigned offset = get_slot_offset(node, slot);
1032 int i;
1033
1034 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
1035 if (node->slots[offset + i] != ptr)
1036 break;
1037 node->slots[offset + i] = NULL;
1038 node->count--;
1039 if (exceptional)
1040 node->exceptional--;
1041 }
1042#endif
1043}
1044
1045/**
1046 * __radix_tree_replace - replace item in a slot
1047 * @root: radix tree root
1048 * @node: pointer to tree node
1049 * @slot: pointer to slot in @node
1050 * @item: new item to store in the slot.
1051 * @update_node: callback for changing leaf nodes
1052 * @private: private data to pass to @update_node
1053 *
1054 * For use with __radix_tree_lookup(). Caller must hold tree write locked
1055 * across slot lookup and replacement.
1056 */
1057void __radix_tree_replace(struct radix_tree_root *root,
1058 struct radix_tree_node *node,
1059 void **slot, void *item,
1060 radix_tree_update_node_t update_node, void *private)
1061{
1062 if (!item)
1063 delete_sibling_entries(node, slot);
1064 /*
1065 * This function supports replacing exceptional entries and
1066 * deleting entries, but that needs accounting against the
1067 * node unless the slot is root->rnode.
1068 */
1069 replace_slot(root, node, slot, item,
1070 !node && slot != (void **)&root->rnode);
1071
1072 if (!node)
1073 return;
1074
1075 if (update_node)
1076 update_node(node, private);
1077
1078 delete_node(root, node, update_node, private);
1079}
1080
1081/**
1082 * radix_tree_replace_slot - replace item in a slot
1083 * @root: radix tree root
1084 * @slot: pointer to slot
1085 * @item: new item to store in the slot.
1086 *
1087 * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
1088 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
1089 * across slot lookup and replacement.
1090 *
1091 * NOTE: This cannot be used to switch between non-entries (empty slots),
1092 * regular entries, and exceptional entries, as that requires accounting
1093 * inside the radix tree node. When switching from one type of entry or
1094 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
1095 * radix_tree_iter_replace().
1096 */
1097void radix_tree_replace_slot(struct radix_tree_root *root,
1098 void **slot, void *item)
1099{
1100 replace_slot(root, NULL, slot, item, true);
1101}
1102
1103/**
1104 * radix_tree_iter_replace - replace item in a slot
1105 * @root: radix tree root
1106 * @slot: pointer to slot
1107 * @item: new item to store in the slot.
1108 *
1109 * For use with radix_tree_split() and radix_tree_for_each_slot().
1110 * Caller must hold tree write locked across split and replacement.
1111 */
1112void radix_tree_iter_replace(struct radix_tree_root *root,
1113 const struct radix_tree_iter *iter, void **slot, void *item)
1114{
1115 __radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
1116}
1117
1118#ifdef CONFIG_RADIX_TREE_MULTIORDER
1119/**
1120 * radix_tree_join - replace multiple entries with one multiorder entry
1121 * @root: radix tree root
1122 * @index: an index inside the new entry
1123 * @order: order of the new entry
1124 * @item: new entry
1125 *
1126 * Call this function to replace several entries with one larger entry.
1127 * The existing entries are presumed to not need freeing as a result of
1128 * this call.
1129 *
1130 * The replacement entry will have all the tags set on it that were set
1131 * on any of the entries it is replacing.
1132 */
1133int radix_tree_join(struct radix_tree_root *root, unsigned long index,
1134 unsigned order, void *item)
1135{
1136 struct radix_tree_node *node;
1137 void **slot;
1138 int error;
1139
1140 BUG_ON(radix_tree_is_internal_node(item));
1141
1142 error = __radix_tree_create(root, index, order, &node, &slot);
1143 if (!error)
1144 error = insert_entries(node, slot, item, order, true);
1145 if (error > 0)
1146 error = 0;
1147
1148 return error;
1149}
1150
1151/**
1152 * radix_tree_split - Split an entry into smaller entries
1153 * @root: radix tree root
1154 * @index: An index within the large entry
1155 * @order: Order of new entries
1156 *
1157 * Call this function as the first step in replacing a multiorder entry
1158 * with several entries of lower order. After this function returns,
1159 * loop over the relevant portion of the tree using radix_tree_for_each_slot()
1160 * and call radix_tree_iter_replace() to set up each new entry.
1161 *
1162 * The tags from this entry are replicated to all the new entries.
1163 *
1164 * The radix tree should be locked against modification during the entire
1165 * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which
1166 * should prompt RCU walkers to restart the lookup from the root.
1167 */
1168int radix_tree_split(struct radix_tree_root *root, unsigned long index,
1169 unsigned order)
1170{
1171 struct radix_tree_node *parent, *node, *child;
1172 void **slot;
1173 unsigned int offset, end;
1174 unsigned n, tag, tags = 0;
1175
1176 if (!__radix_tree_lookup(root, index, &parent, &slot))
1177 return -ENOENT;
1178 if (!parent)
1179 return -ENOENT;
1180
1181 offset = get_slot_offset(parent, slot);
1182
1183 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1184 if (tag_get(parent, tag, offset))
1185 tags |= 1 << tag;
1186
1187 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
1188 if (!is_sibling_entry(parent, parent->slots[end]))
1189 break;
1190 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1191 if (tags & (1 << tag))
1192 tag_set(parent, tag, end);
1193 /* rcu_assign_pointer ensures tags are set before RETRY */
1194 rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
1195 }
1196 rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
1197 parent->exceptional -= (end - offset);
1198
1199 if (order == parent->shift)
1200 return 0;
1201 if (order > parent->shift) {
1202 while (offset < end)
1203 offset += insert_entries(parent, &parent->slots[offset],
1204 RADIX_TREE_RETRY, order, true);
1205 return 0;
1206 }
1207
1208 node = parent;
1209
1210 for (;;) {
1211 if (node->shift > order) {
1212 child = radix_tree_node_alloc(root, node,
1213 node->shift - RADIX_TREE_MAP_SHIFT,
1214 offset, 0, 0);
1215 if (!child)
1216 goto nomem;
1217 if (node != parent) {
1218 node->count++;
1219 node->slots[offset] = node_to_entry(child);
1220 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1221 if (tags & (1 << tag))
1222 tag_set(node, tag, offset);
1223 }
1224
1225 node = child;
1226 offset = 0;
1227 continue;
1228 }
1229
1230 n = insert_entries(node, &node->slots[offset],
1231 RADIX_TREE_RETRY, order, false);
1232 BUG_ON(n > RADIX_TREE_MAP_SIZE);
1233
1234 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1235 if (tags & (1 << tag))
1236 tag_set(node, tag, offset);
1237 offset += n;
1238
1239 while (offset == RADIX_TREE_MAP_SIZE) {
1240 if (node == parent)
1241 break;
1242 offset = node->offset;
1243 child = node;
1244 node = node->parent;
1245 rcu_assign_pointer(node->slots[offset],
1246 node_to_entry(child));
1247 offset++;
1248 }
1249 if ((node == parent) && (offset == end))
1250 return 0;
1251 }
1252
1253 nomem:
1254 /* Shouldn't happen; did user forget to preload? */
1255 /* TODO: free all the allocated nodes */
1256 WARN_ON(1);
1257 return -ENOMEM;
1258}
1259#endif
1260
749/** 1261/**
750 * radix_tree_tag_set - set a tag on a radix tree node 1262 * radix_tree_tag_set - set a tag on a radix tree node
751 * @root: radix tree root 1263 * @root: radix tree root
@@ -807,6 +1319,34 @@ static void node_tag_clear(struct radix_tree_root *root,
807 root_tag_clear(root, tag); 1319 root_tag_clear(root, tag);
808} 1320}
809 1321
1322static void node_tag_set(struct radix_tree_root *root,
1323 struct radix_tree_node *node,
1324 unsigned int tag, unsigned int offset)
1325{
1326 while (node) {
1327 if (tag_get(node, tag, offset))
1328 return;
1329 tag_set(node, tag, offset);
1330 offset = node->offset;
1331 node = node->parent;
1332 }
1333
1334 if (!root_tag_get(root, tag))
1335 root_tag_set(root, tag);
1336}
1337
1338/**
1339 * radix_tree_iter_tag_set - set a tag on the current iterator entry
1340 * @root: radix tree root
1341 * @iter: iterator state
1342 * @tag: tag to set
1343 */
1344void radix_tree_iter_tag_set(struct radix_tree_root *root,
1345 const struct radix_tree_iter *iter, unsigned int tag)
1346{
1347 node_tag_set(root, iter->node, tag, iter_offset(iter));
1348}
1349
810/** 1350/**
811 * radix_tree_tag_clear - clear a tag on a radix tree node 1351 * radix_tree_tag_clear - clear a tag on a radix tree node
812 * @root: radix tree root 1352 * @root: radix tree root
@@ -902,6 +1442,121 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter,
902#endif 1442#endif
903} 1443}
904 1444
1445/* Construct iter->tags bit-mask from node->tags[tag] array */
1446static void set_iter_tags(struct radix_tree_iter *iter,
1447 struct radix_tree_node *node, unsigned offset,
1448 unsigned tag)
1449{
1450 unsigned tag_long = offset / BITS_PER_LONG;
1451 unsigned tag_bit = offset % BITS_PER_LONG;
1452
1453 iter->tags = node->tags[tag][tag_long] >> tag_bit;
1454
1455 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
1456 if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
1457 /* Pick tags from next element */
1458 if (tag_bit)
1459 iter->tags |= node->tags[tag][tag_long + 1] <<
1460 (BITS_PER_LONG - tag_bit);
1461 /* Clip chunk size, here only BITS_PER_LONG tags */
1462 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
1463 }
1464}
1465
1466#ifdef CONFIG_RADIX_TREE_MULTIORDER
1467static void **skip_siblings(struct radix_tree_node **nodep,
1468 void **slot, struct radix_tree_iter *iter)
1469{
1470 void *sib = node_to_entry(slot - 1);
1471
1472 while (iter->index < iter->next_index) {
1473 *nodep = rcu_dereference_raw(*slot);
1474 if (*nodep && *nodep != sib)
1475 return slot;
1476 slot++;
1477 iter->index = __radix_tree_iter_add(iter, 1);
1478 iter->tags >>= 1;
1479 }
1480
1481 *nodep = NULL;
1482 return NULL;
1483}
1484
1485void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
1486 unsigned flags)
1487{
1488 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
1489 struct radix_tree_node *node = rcu_dereference_raw(*slot);
1490
1491 slot = skip_siblings(&node, slot, iter);
1492
1493 while (radix_tree_is_internal_node(node)) {
1494 unsigned offset;
1495 unsigned long next_index;
1496
1497 if (node == RADIX_TREE_RETRY)
1498 return slot;
1499 node = entry_to_node(node);
1500 iter->node = node;
1501 iter->shift = node->shift;
1502
1503 if (flags & RADIX_TREE_ITER_TAGGED) {
1504 offset = radix_tree_find_next_bit(node, tag, 0);
1505 if (offset == RADIX_TREE_MAP_SIZE)
1506 return NULL;
1507 slot = &node->slots[offset];
1508 iter->index = __radix_tree_iter_add(iter, offset);
1509 set_iter_tags(iter, node, offset, tag);
1510 node = rcu_dereference_raw(*slot);
1511 } else {
1512 offset = 0;
1513 slot = &node->slots[0];
1514 for (;;) {
1515 node = rcu_dereference_raw(*slot);
1516 if (node)
1517 break;
1518 slot++;
1519 offset++;
1520 if (offset == RADIX_TREE_MAP_SIZE)
1521 return NULL;
1522 }
1523 iter->index = __radix_tree_iter_add(iter, offset);
1524 }
1525 if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
1526 goto none;
1527 next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
1528 if (next_index < iter->next_index)
1529 iter->next_index = next_index;
1530 }
1531
1532 return slot;
1533 none:
1534 iter->next_index = 0;
1535 return NULL;
1536}
1537EXPORT_SYMBOL(__radix_tree_next_slot);
1538#else
1539static void **skip_siblings(struct radix_tree_node **nodep,
1540 void **slot, struct radix_tree_iter *iter)
1541{
1542 return slot;
1543}
1544#endif
1545
1546void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter)
1547{
1548 struct radix_tree_node *node;
1549
1550 slot++;
1551 iter->index = __radix_tree_iter_add(iter, 1);
1552 node = rcu_dereference_raw(*slot);
1553 skip_siblings(&node, slot, iter);
1554 iter->next_index = iter->index;
1555 iter->tags = 0;
1556 return NULL;
1557}
1558EXPORT_SYMBOL(radix_tree_iter_resume);
1559
905/** 1560/**
906 * radix_tree_next_chunk - find next chunk of slots for iteration 1561 * radix_tree_next_chunk - find next chunk of slots for iteration
907 * 1562 *
@@ -927,7 +1582,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
927 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1582 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
928 * 1583 *
929 * This condition also used by radix_tree_next_slot() to stop 1584 * This condition also used by radix_tree_next_slot() to stop
930 * contiguous iterating, and forbid swithing to the next chunk. 1585 * contiguous iterating, and forbid switching to the next chunk.
931 */ 1586 */
932 index = iter->next_index; 1587 index = iter->next_index;
933 if (!index && iter->index) 1588 if (!index && iter->index)
@@ -945,6 +1600,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
945 iter->index = index; 1600 iter->index = index;
946 iter->next_index = maxindex + 1; 1601 iter->next_index = maxindex + 1;
947 iter->tags = 1; 1602 iter->tags = 1;
1603 iter->node = NULL;
948 __set_iter_shift(iter, 0); 1604 __set_iter_shift(iter, 0);
949 return (void **)&root->rnode; 1605 return (void **)&root->rnode;
950 } 1606 }
@@ -960,9 +1616,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
960 return NULL; 1616 return NULL;
961 1617
962 if (flags & RADIX_TREE_ITER_TAGGED) 1618 if (flags & RADIX_TREE_ITER_TAGGED)
963 offset = radix_tree_find_next_bit( 1619 offset = radix_tree_find_next_bit(node, tag,
964 node->tags[tag],
965 RADIX_TREE_MAP_SIZE,
966 offset + 1); 1620 offset + 1);
967 else 1621 else
968 while (++offset < RADIX_TREE_MAP_SIZE) { 1622 while (++offset < RADIX_TREE_MAP_SIZE) {
@@ -982,154 +1636,26 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
982 child = rcu_dereference_raw(node->slots[offset]); 1636 child = rcu_dereference_raw(node->slots[offset]);
983 } 1637 }
984 1638
985 if ((child == NULL) || (child == RADIX_TREE_RETRY)) 1639 if (!child)
986 goto restart; 1640 goto restart;
1641 if (child == RADIX_TREE_RETRY)
1642 break;
987 } while (radix_tree_is_internal_node(child)); 1643 } while (radix_tree_is_internal_node(child));
988 1644
989 /* Update the iterator state */ 1645 /* Update the iterator state */
990 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); 1646 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
991 iter->next_index = (index | node_maxindex(node)) + 1; 1647 iter->next_index = (index | node_maxindex(node)) + 1;
1648 iter->node = node;
992 __set_iter_shift(iter, node->shift); 1649 __set_iter_shift(iter, node->shift);
993 1650
994 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1651 if (flags & RADIX_TREE_ITER_TAGGED)
995 if (flags & RADIX_TREE_ITER_TAGGED) { 1652 set_iter_tags(iter, node, offset, tag);
996 unsigned tag_long, tag_bit;
997
998 tag_long = offset / BITS_PER_LONG;
999 tag_bit = offset % BITS_PER_LONG;
1000 iter->tags = node->tags[tag][tag_long] >> tag_bit;
1001 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
1002 if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
1003 /* Pick tags from next element */
1004 if (tag_bit)
1005 iter->tags |= node->tags[tag][tag_long + 1] <<
1006 (BITS_PER_LONG - tag_bit);
1007 /* Clip chunk size, here only BITS_PER_LONG tags */
1008 iter->next_index = index + BITS_PER_LONG;
1009 }
1010 }
1011 1653
1012 return node->slots + offset; 1654 return node->slots + offset;
1013} 1655}
1014EXPORT_SYMBOL(radix_tree_next_chunk); 1656EXPORT_SYMBOL(radix_tree_next_chunk);
1015 1657
1016/** 1658/**
1017 * radix_tree_range_tag_if_tagged - for each item in given range set given
1018 * tag if item has another tag set
1019 * @root: radix tree root
1020 * @first_indexp: pointer to a starting index of a range to scan
1021 * @last_index: last index of a range to scan
1022 * @nr_to_tag: maximum number items to tag
1023 * @iftag: tag index to test
1024 * @settag: tag index to set if tested tag is set
1025 *
1026 * This function scans range of radix tree from first_index to last_index
1027 * (inclusive). For each item in the range if iftag is set, the function sets
1028 * also settag. The function stops either after tagging nr_to_tag items or
1029 * after reaching last_index.
1030 *
1031 * The tags must be set from the leaf level only and propagated back up the
1032 * path to the root. We must do this so that we resolve the full path before
1033 * setting any tags on intermediate nodes. If we set tags as we descend, then
1034 * we can get to the leaf node and find that the index that has the iftag
1035 * set is outside the range we are scanning. This reults in dangling tags and
1036 * can lead to problems with later tag operations (e.g. livelocks on lookups).
1037 *
1038 * The function returns the number of leaves where the tag was set and sets
1039 * *first_indexp to the first unscanned index.
1040 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
1041 * be prepared to handle that.
1042 */
1043unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
1044 unsigned long *first_indexp, unsigned long last_index,
1045 unsigned long nr_to_tag,
1046 unsigned int iftag, unsigned int settag)
1047{
1048 struct radix_tree_node *parent, *node, *child;
1049 unsigned long maxindex;
1050 unsigned long tagged = 0;
1051 unsigned long index = *first_indexp;
1052
1053 radix_tree_load_root(root, &child, &maxindex);
1054 last_index = min(last_index, maxindex);
1055 if (index > last_index)
1056 return 0;
1057 if (!nr_to_tag)
1058 return 0;
1059 if (!root_tag_get(root, iftag)) {
1060 *first_indexp = last_index + 1;
1061 return 0;
1062 }
1063 if (!radix_tree_is_internal_node(child)) {
1064 *first_indexp = last_index + 1;
1065 root_tag_set(root, settag);
1066 return 1;
1067 }
1068
1069 node = entry_to_node(child);
1070
1071 for (;;) {
1072 unsigned offset = radix_tree_descend(node, &child, index);
1073 if (!child)
1074 goto next;
1075 if (!tag_get(node, iftag, offset))
1076 goto next;
1077 /* Sibling slots never have tags set on them */
1078 if (radix_tree_is_internal_node(child)) {
1079 node = entry_to_node(child);
1080 continue;
1081 }
1082
1083 /* tag the leaf */
1084 tagged++;
1085 tag_set(node, settag, offset);
1086
1087 /* walk back up the path tagging interior nodes */
1088 parent = node;
1089 for (;;) {
1090 offset = parent->offset;
1091 parent = parent->parent;
1092 if (!parent)
1093 break;
1094 /* stop if we find a node with the tag already set */
1095 if (tag_get(parent, settag, offset))
1096 break;
1097 tag_set(parent, settag, offset);
1098 }
1099 next:
1100 /* Go to next entry in node */
1101 index = ((index >> node->shift) + 1) << node->shift;
1102 /* Overflow can happen when last_index is ~0UL... */
1103 if (index > last_index || !index)
1104 break;
1105 offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
1106 while (offset == 0) {
1107 /*
1108 * We've fully scanned this node. Go up. Because
1109 * last_index is guaranteed to be in the tree, what
1110 * we do below cannot wander astray.
1111 */
1112 node = node->parent;
1113 offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
1114 }
1115 if (is_sibling_entry(node, node->slots[offset]))
1116 goto next;
1117 if (tagged >= nr_to_tag)
1118 break;
1119 }
1120 /*
1121 * We need not to tag the root tag if there is no tag which is set with
1122 * settag within the range from *first_indexp to last_index.
1123 */
1124 if (tagged > 0)
1125 root_tag_set(root, settag);
1126 *first_indexp = index;
1127
1128 return tagged;
1129}
1130EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
1131
1132/**
1133 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1659 * radix_tree_gang_lookup - perform multiple lookup on a radix tree
1134 * @root: radix tree root 1660 * @root: radix tree root
1135 * @results: where the results of the lookup are placed 1661 * @results: where the results of the lookup are placed
@@ -1294,174 +1820,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1294} 1820}
1295EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1821EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1296 1822
1297#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
1298#include <linux/sched.h> /* for cond_resched() */
1299
1300struct locate_info {
1301 unsigned long found_index;
1302 bool stop;
1303};
1304
1305/*
1306 * This linear search is at present only useful to shmem_unuse_inode().
1307 */
1308static unsigned long __locate(struct radix_tree_node *slot, void *item,
1309 unsigned long index, struct locate_info *info)
1310{
1311 unsigned long i;
1312
1313 do {
1314 unsigned int shift = slot->shift;
1315
1316 for (i = (index >> shift) & RADIX_TREE_MAP_MASK;
1317 i < RADIX_TREE_MAP_SIZE;
1318 i++, index += (1UL << shift)) {
1319 struct radix_tree_node *node =
1320 rcu_dereference_raw(slot->slots[i]);
1321 if (node == RADIX_TREE_RETRY)
1322 goto out;
1323 if (!radix_tree_is_internal_node(node)) {
1324 if (node == item) {
1325 info->found_index = index;
1326 info->stop = true;
1327 goto out;
1328 }
1329 continue;
1330 }
1331 node = entry_to_node(node);
1332 if (is_sibling_entry(slot, node))
1333 continue;
1334 slot = node;
1335 break;
1336 }
1337 } while (i < RADIX_TREE_MAP_SIZE);
1338
1339out:
1340 if ((index == 0) && (i == RADIX_TREE_MAP_SIZE))
1341 info->stop = true;
1342 return index;
1343}
1344
1345/**
1346 * radix_tree_locate_item - search through radix tree for item
1347 * @root: radix tree root
1348 * @item: item to be found
1349 *
1350 * Returns index where item was found, or -1 if not found.
1351 * Caller must hold no lock (since this time-consuming function needs
1352 * to be preemptible), and must check afterwards if item is still there.
1353 */
1354unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1355{
1356 struct radix_tree_node *node;
1357 unsigned long max_index;
1358 unsigned long cur_index = 0;
1359 struct locate_info info = {
1360 .found_index = -1,
1361 .stop = false,
1362 };
1363
1364 do {
1365 rcu_read_lock();
1366 node = rcu_dereference_raw(root->rnode);
1367 if (!radix_tree_is_internal_node(node)) {
1368 rcu_read_unlock();
1369 if (node == item)
1370 info.found_index = 0;
1371 break;
1372 }
1373
1374 node = entry_to_node(node);
1375
1376 max_index = node_maxindex(node);
1377 if (cur_index > max_index) {
1378 rcu_read_unlock();
1379 break;
1380 }
1381
1382 cur_index = __locate(node, item, cur_index, &info);
1383 rcu_read_unlock();
1384 cond_resched();
1385 } while (!info.stop && cur_index <= max_index);
1386
1387 return info.found_index;
1388}
1389#else
1390unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1391{
1392 return -1;
1393}
1394#endif /* CONFIG_SHMEM && CONFIG_SWAP */
1395
1396/**
1397 * radix_tree_shrink - shrink radix tree to minimum height
1398 * @root radix tree root
1399 */
1400static inline bool radix_tree_shrink(struct radix_tree_root *root)
1401{
1402 bool shrunk = false;
1403
1404 for (;;) {
1405 struct radix_tree_node *node = root->rnode;
1406 struct radix_tree_node *child;
1407
1408 if (!radix_tree_is_internal_node(node))
1409 break;
1410 node = entry_to_node(node);
1411
1412 /*
1413 * The candidate node has more than one child, or its child
1414 * is not at the leftmost slot, or the child is a multiorder
1415 * entry, we cannot shrink.
1416 */
1417 if (node->count != 1)
1418 break;
1419 child = node->slots[0];
1420 if (!child)
1421 break;
1422 if (!radix_tree_is_internal_node(child) && node->shift)
1423 break;
1424
1425 if (radix_tree_is_internal_node(child))
1426 entry_to_node(child)->parent = NULL;
1427
1428 /*
1429 * We don't need rcu_assign_pointer(), since we are simply
1430 * moving the node from one part of the tree to another: if it
1431 * was safe to dereference the old pointer to it
1432 * (node->slots[0]), it will be safe to dereference the new
1433 * one (root->rnode) as far as dependent read barriers go.
1434 */
1435 root->rnode = child;
1436
1437 /*
1438 * We have a dilemma here. The node's slot[0] must not be
1439 * NULLed in case there are concurrent lookups expecting to
1440 * find the item. However if this was a bottom-level node,
1441 * then it may be subject to the slot pointer being visible
1442 * to callers dereferencing it. If item corresponding to
1443 * slot[0] is subsequently deleted, these callers would expect
1444 * their slot to become empty sooner or later.
1445 *
1446 * For example, lockless pagecache will look up a slot, deref
1447 * the page pointer, and if the page has 0 refcount it means it
1448 * was concurrently deleted from pagecache so try the deref
1449 * again. Fortunately there is already a requirement for logic
1450 * to retry the entire slot lookup -- the indirect pointer
1451 * problem (replacing direct root node with an indirect pointer
1452 * also results in a stale slot). So tag the slot as indirect
1453 * to force callers to retry.
1454 */
1455 if (!radix_tree_is_internal_node(child))
1456 node->slots[0] = RADIX_TREE_RETRY;
1457
1458 radix_tree_node_free(node);
1459 shrunk = true;
1460 }
1461
1462 return shrunk;
1463}
1464
1465/** 1823/**
1466 * __radix_tree_delete_node - try to free node after clearing a slot 1824 * __radix_tree_delete_node - try to free node after clearing a slot
1467 * @root: radix tree root 1825 * @root: radix tree root
@@ -1470,53 +1828,11 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
1470 * After clearing the slot at @index in @node from radix tree 1828 * After clearing the slot at @index in @node from radix tree
1471 * rooted at @root, call this function to attempt freeing the 1829 * rooted at @root, call this function to attempt freeing the
1472 * node and shrinking the tree. 1830 * node and shrinking the tree.
1473 *
1474 * Returns %true if @node was freed, %false otherwise.
1475 */ 1831 */
1476bool __radix_tree_delete_node(struct radix_tree_root *root, 1832void __radix_tree_delete_node(struct radix_tree_root *root,
1477 struct radix_tree_node *node) 1833 struct radix_tree_node *node)
1478{ 1834{
1479 bool deleted = false; 1835 delete_node(root, node, NULL, NULL);
1480
1481 do {
1482 struct radix_tree_node *parent;
1483
1484 if (node->count) {
1485 if (node == entry_to_node(root->rnode))
1486 deleted |= radix_tree_shrink(root);
1487 return deleted;
1488 }
1489
1490 parent = node->parent;
1491 if (parent) {
1492 parent->slots[node->offset] = NULL;
1493 parent->count--;
1494 } else {
1495 root_tag_clear_all(root);
1496 root->rnode = NULL;
1497 }
1498
1499 radix_tree_node_free(node);
1500 deleted = true;
1501
1502 node = parent;
1503 } while (node);
1504
1505 return deleted;
1506}
1507
1508static inline void delete_sibling_entries(struct radix_tree_node *node,
1509 void *ptr, unsigned offset)
1510{
1511#ifdef CONFIG_RADIX_TREE_MULTIORDER
1512 int i;
1513 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
1514 if (node->slots[offset + i] != ptr)
1515 break;
1516 node->slots[offset + i] = NULL;
1517 node->count--;
1518 }
1519#endif
1520} 1836}
1521 1837
1522/** 1838/**
@@ -1558,11 +1874,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
1558 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1874 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1559 node_tag_clear(root, node, tag, offset); 1875 node_tag_clear(root, node, tag, offset);
1560 1876
1561 delete_sibling_entries(node, node_to_entry(slot), offset); 1877 __radix_tree_replace(root, node, slot, NULL, NULL, NULL);
1562 node->slots[offset] = NULL;
1563 node->count--;
1564
1565 __radix_tree_delete_node(root, node);
1566 1878
1567 return entry; 1879 return entry;
1568} 1880}
@@ -1642,32 +1954,31 @@ static __init void radix_tree_init_maxnodes(void)
1642 } 1954 }
1643} 1955}
1644 1956
1645static int radix_tree_callback(struct notifier_block *nfb, 1957static int radix_tree_cpu_dead(unsigned int cpu)
1646 unsigned long action, void *hcpu)
1647{ 1958{
1648 int cpu = (long)hcpu;
1649 struct radix_tree_preload *rtp; 1959 struct radix_tree_preload *rtp;
1650 struct radix_tree_node *node; 1960 struct radix_tree_node *node;
1651 1961
1652 /* Free per-cpu pool of preloaded nodes */ 1962 /* Free per-cpu pool of preloaded nodes */
1653 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1963 rtp = &per_cpu(radix_tree_preloads, cpu);
1654 rtp = &per_cpu(radix_tree_preloads, cpu); 1964 while (rtp->nr) {
1655 while (rtp->nr) { 1965 node = rtp->nodes;
1656 node = rtp->nodes; 1966 rtp->nodes = node->private_data;
1657 rtp->nodes = node->private_data; 1967 kmem_cache_free(radix_tree_node_cachep, node);
1658 kmem_cache_free(radix_tree_node_cachep, node); 1968 rtp->nr--;
1659 rtp->nr--;
1660 }
1661 } 1969 }
1662 return NOTIFY_OK; 1970 return 0;
1663} 1971}
1664 1972
1665void __init radix_tree_init(void) 1973void __init radix_tree_init(void)
1666{ 1974{
1975 int ret;
1667 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1976 radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1668 sizeof(struct radix_tree_node), 0, 1977 sizeof(struct radix_tree_node), 0,
1669 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1978 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1670 radix_tree_node_ctor); 1979 radix_tree_node_ctor);
1671 radix_tree_init_maxnodes(); 1980 radix_tree_init_maxnodes();
1672 hotcpu_notifier(radix_tree_callback, 0); 1981 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
1982 NULL, radix_tree_cpu_dead);
1983 WARN_ON(ret < 0);
1673} 1984}
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
index 76734004358d..20bca3d44f67 100644
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -87,9 +87,57 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
87 kernel_fpu_end(); 87 kernel_fpu_end();
88} 88}
89 89
90static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
91 size_t bytes, void **ptrs)
92{
93 u8 **dptr = (u8 **)ptrs;
94 u8 *p, *q;
95 int d, z, z0;
96
97 z0 = stop; /* P/Q right side optimization */
98 p = dptr[disks-2]; /* XOR parity */
99 q = dptr[disks-1]; /* RS syndrome */
100
101 kernel_fpu_begin();
102
103 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
104
105 for (d = 0 ; d < bytes ; d += 32) {
106 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
107 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
108 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
109 /* P/Q data pages */
110 for (z = z0-1 ; z >= start ; z--) {
111 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
112 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
113 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
114 asm volatile("vpand %ymm0,%ymm5,%ymm5");
115 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
116 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
117 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
118 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
119 }
120 /* P/Q left side optimization */
121 for (z = start-1 ; z >= 0 ; z--) {
122 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
123 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
124 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
125 asm volatile("vpand %ymm0,%ymm5,%ymm5");
126 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
127 }
128 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
129 /* Don't use movntdq for r/w memory area < cache line */
130 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
131 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
132 }
133
134 asm volatile("sfence" : : : "memory");
135 kernel_fpu_end();
136}
137
90const struct raid6_calls raid6_avx2x1 = { 138const struct raid6_calls raid6_avx2x1 = {
91 raid6_avx21_gen_syndrome, 139 raid6_avx21_gen_syndrome,
92 NULL, /* XOR not yet implemented */ 140 raid6_avx21_xor_syndrome,
93 raid6_have_avx2, 141 raid6_have_avx2,
94 "avx2x1", 142 "avx2x1",
95 1 /* Has cache hints */ 143 1 /* Has cache hints */
@@ -149,9 +197,77 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
149 kernel_fpu_end(); 197 kernel_fpu_end();
150} 198}
151 199
200static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
201 size_t bytes, void **ptrs)
202{
203 u8 **dptr = (u8 **)ptrs;
204 u8 *p, *q;
205 int d, z, z0;
206
207 z0 = stop; /* P/Q right side optimization */
208 p = dptr[disks-2]; /* XOR parity */
209 q = dptr[disks-1]; /* RS syndrome */
210
211 kernel_fpu_begin();
212
213 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
214
215 for (d = 0 ; d < bytes ; d += 64) {
216 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
217 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
218 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
219 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
220 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
221 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
222 /* P/Q data pages */
223 for (z = z0-1 ; z >= start ; z--) {
224 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
225 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
226 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
227 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
228 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
229 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
230 asm volatile("vpand %ymm0,%ymm5,%ymm5");
231 asm volatile("vpand %ymm0,%ymm7,%ymm7");
232 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
233 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
234 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
235 asm volatile("vmovdqa %0,%%ymm7"
236 :: "m" (dptr[z][d+32]));
237 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
238 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
239 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
240 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
241 }
242 /* P/Q left side optimization */
243 for (z = start-1 ; z >= 0 ; z--) {
244 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
245 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
246 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
247 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
248 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
249 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
250 asm volatile("vpand %ymm0,%ymm5,%ymm5");
251 asm volatile("vpand %ymm0,%ymm7,%ymm7");
252 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
253 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
254 }
255 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
256 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
257 /* Don't use movntdq for r/w memory area < cache line */
258 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
259 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
260 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
261 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
262 }
263
264 asm volatile("sfence" : : : "memory");
265 kernel_fpu_end();
266}
267
152const struct raid6_calls raid6_avx2x2 = { 268const struct raid6_calls raid6_avx2x2 = {
153 raid6_avx22_gen_syndrome, 269 raid6_avx22_gen_syndrome,
154 NULL, /* XOR not yet implemented */ 270 raid6_avx22_xor_syndrome,
155 raid6_have_avx2, 271 raid6_have_avx2,
156 "avx2x2", 272 "avx2x2",
157 1 /* Has cache hints */ 273 1 /* Has cache hints */
@@ -242,9 +358,119 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
242 kernel_fpu_end(); 358 kernel_fpu_end();
243} 359}
244 360
361static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
362 size_t bytes, void **ptrs)
363{
364 u8 **dptr = (u8 **)ptrs;
365 u8 *p, *q;
366 int d, z, z0;
367
368 z0 = stop; /* P/Q right side optimization */
369 p = dptr[disks-2]; /* XOR parity */
370 q = dptr[disks-1]; /* RS syndrome */
371
372 kernel_fpu_begin();
373
374 asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
375
376 for (d = 0 ; d < bytes ; d += 128) {
377 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
378 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
379 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
380 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
381 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
382 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
383 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
384 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
385 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
386 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
387 asm volatile("vpxor %ymm12,%ymm10,%ymm10");
388 asm volatile("vpxor %ymm14,%ymm11,%ymm11");
389 /* P/Q data pages */
390 for (z = z0-1 ; z >= start ; z--) {
391 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
392 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
393 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
394 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
395 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
396 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
397 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
398 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
399 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
400 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
401 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
402 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
403 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
404 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
405 asm volatile("vpand %ymm0,%ymm5,%ymm5");
406 asm volatile("vpand %ymm0,%ymm7,%ymm7");
407 asm volatile("vpand %ymm0,%ymm13,%ymm13");
408 asm volatile("vpand %ymm0,%ymm15,%ymm15");
409 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
410 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
411 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
412 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
413 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
414 asm volatile("vmovdqa %0,%%ymm7"
415 :: "m" (dptr[z][d+32]));
416 asm volatile("vmovdqa %0,%%ymm13"
417 :: "m" (dptr[z][d+64]));
418 asm volatile("vmovdqa %0,%%ymm15"
419 :: "m" (dptr[z][d+96]));
420 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
421 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
422 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
423 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
424 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
425 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
426 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
427 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
428 }
429 asm volatile("prefetchnta %0" :: "m" (q[d]));
430 asm volatile("prefetchnta %0" :: "m" (q[d+64]));
431 /* P/Q left side optimization */
432 for (z = start-1 ; z >= 0 ; z--) {
433 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
434 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
435 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
436 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
437 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
438 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
439 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
440 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
441 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
442 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
443 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
444 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
445 asm volatile("vpand %ymm0,%ymm5,%ymm5");
446 asm volatile("vpand %ymm0,%ymm7,%ymm7");
447 asm volatile("vpand %ymm0,%ymm13,%ymm13");
448 asm volatile("vpand %ymm0,%ymm15,%ymm15");
449 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
450 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
451 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
452 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
453 }
454 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
455 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
456 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
457 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
458 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
459 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
460 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
461 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
462 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
463 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
464 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
465 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
466 }
467 asm volatile("sfence" : : : "memory");
468 kernel_fpu_end();
469}
470
245const struct raid6_calls raid6_avx2x4 = { 471const struct raid6_calls raid6_avx2x4 = {
246 raid6_avx24_gen_syndrome, 472 raid6_avx24_gen_syndrome,
247 NULL, /* XOR not yet implemented */ 473 raid6_avx24_xor_syndrome,
248 raid6_have_avx2, 474 raid6_have_avx2,
249 "avx2x4", 475 "avx2x4",
250 1 /* Has cache hints */ 476 1 /* Has cache hints */
diff --git a/lib/rbtree.c b/lib/rbtree.c
index eb8a19fee110..1f8b112a7c35 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -296,11 +296,26 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
296 * 296 *
297 * (p) (p) 297 * (p) (p)
298 * / \ / \ 298 * / \ / \
299 * N S --> N Sl 299 * N S --> N sl
300 * / \ \ 300 * / \ \
301 * sl Sr s 301 * sl Sr S
302 * \ 302 * \
303 * Sr 303 * Sr
304 *
305 * Note: p might be red, and then both
306 * p and sl are red after rotation(which
307 * breaks property 4). This is fixed in
308 * Case 4 (in __rb_rotate_set_parents()
309 * which set sl the color of p
310 * and set p RB_BLACK)
311 *
312 * (p) (sl)
313 * / \ / \
314 * N sl --> P S
315 * \ / \
316 * S N Sr
317 * \
318 * Sr
304 */ 319 */
305 tmp1 = tmp2->rb_right; 320 tmp1 = tmp2->rb_right;
306 WRITE_ONCE(sibling->rb_left, tmp1); 321 WRITE_ONCE(sibling->rb_left, tmp1);
@@ -365,7 +380,7 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
365 } 380 }
366 break; 381 break;
367 } 382 }
368 /* Case 3 - right rotate at sibling */ 383 /* Case 3 - left rotate at sibling */
369 tmp1 = tmp2->rb_left; 384 tmp1 = tmp2->rb_left;
370 WRITE_ONCE(sibling->rb_right, tmp1); 385 WRITE_ONCE(sibling->rb_right, tmp1);
371 WRITE_ONCE(tmp2->rb_left, sibling); 386 WRITE_ONCE(tmp2->rb_left, sibling);
@@ -377,7 +392,7 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
377 tmp1 = sibling; 392 tmp1 = sibling;
378 sibling = tmp2; 393 sibling = tmp2;
379 } 394 }
380 /* Case 4 - left rotate at parent + color flips */ 395 /* Case 4 - right rotate at parent + color flips */
381 tmp2 = sibling->rb_right; 396 tmp2 = sibling->rb_right;
382 WRITE_ONCE(parent->rb_left, tmp2); 397 WRITE_ONCE(parent->rb_left, tmp2);
383 WRITE_ONCE(sibling->rb_right, parent); 398 WRITE_ONCE(sibling->rb_right, parent);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 4d830e299989..f87d138e9672 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -192,6 +192,7 @@ void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
192 trace->entries = stack->entries; 192 trace->entries = stack->entries;
193 trace->skip = 0; 193 trace->skip = 0;
194} 194}
195EXPORT_SYMBOL_GPL(depot_fetch_stack);
195 196
196/** 197/**
197 * depot_save_stack - save stack in a stack depot. 198 * depot_save_stack - save stack in a stack depot.
@@ -283,3 +284,4 @@ exit:
283fast_exit: 284fast_exit:
284 return retval; 285 return retval;
285} 286}
287EXPORT_SYMBOL_GPL(depot_save_stack);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 22e13a0e19d7..cb1b54ee8527 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -425,7 +425,8 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
425phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 425phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
426 dma_addr_t tbl_dma_addr, 426 dma_addr_t tbl_dma_addr,
427 phys_addr_t orig_addr, size_t size, 427 phys_addr_t orig_addr, size_t size,
428 enum dma_data_direction dir) 428 enum dma_data_direction dir,
429 unsigned long attrs)
429{ 430{
430 unsigned long flags; 431 unsigned long flags;
431 phys_addr_t tlb_addr; 432 phys_addr_t tlb_addr;
@@ -526,7 +527,8 @@ found:
526 */ 527 */
527 for (i = 0; i < nslots; i++) 528 for (i = 0; i < nslots; i++)
528 io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); 529 io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
529 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 530 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
531 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
530 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); 532 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
531 533
532 return tlb_addr; 534 return tlb_addr;
@@ -539,18 +541,20 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
539 541
540static phys_addr_t 542static phys_addr_t
541map_single(struct device *hwdev, phys_addr_t phys, size_t size, 543map_single(struct device *hwdev, phys_addr_t phys, size_t size,
542 enum dma_data_direction dir) 544 enum dma_data_direction dir, unsigned long attrs)
543{ 545{
544 dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); 546 dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
545 547
546 return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); 548 return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
549 dir, attrs);
547} 550}
548 551
549/* 552/*
550 * dma_addr is the kernel virtual address of the bounce buffer to unmap. 553 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
551 */ 554 */
552void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, 555void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
553 size_t size, enum dma_data_direction dir) 556 size_t size, enum dma_data_direction dir,
557 unsigned long attrs)
554{ 558{
555 unsigned long flags; 559 unsigned long flags;
556 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 560 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -561,6 +565,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
561 * First, sync the memory before unmapping the entry 565 * First, sync the memory before unmapping the entry
562 */ 566 */
563 if (orig_addr != INVALID_PHYS_ADDR && 567 if (orig_addr != INVALID_PHYS_ADDR &&
568 !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
564 ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) 569 ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
565 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); 570 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
566 571
@@ -654,7 +659,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
654 * GFP_DMA memory; fall back on map_single(), which 659 * GFP_DMA memory; fall back on map_single(), which
655 * will grab memory from the lowest available address range. 660 * will grab memory from the lowest available address range.
656 */ 661 */
657 phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); 662 phys_addr_t paddr = map_single(hwdev, 0, size,
663 DMA_FROM_DEVICE, 0);
658 if (paddr == SWIOTLB_MAP_ERROR) 664 if (paddr == SWIOTLB_MAP_ERROR)
659 goto err_warn; 665 goto err_warn;
660 666
@@ -667,9 +673,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
667 (unsigned long long)dma_mask, 673 (unsigned long long)dma_mask,
668 (unsigned long long)dev_addr); 674 (unsigned long long)dev_addr);
669 675
670 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 676 /*
677 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
678 * The DMA_ATTR_SKIP_CPU_SYNC is optional.
679 */
671 swiotlb_tbl_unmap_single(hwdev, paddr, 680 swiotlb_tbl_unmap_single(hwdev, paddr,
672 size, DMA_TO_DEVICE); 681 size, DMA_TO_DEVICE,
682 DMA_ATTR_SKIP_CPU_SYNC);
673 goto err_warn; 683 goto err_warn;
674 } 684 }
675 } 685 }
@@ -698,8 +708,12 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
698 if (!is_swiotlb_buffer(paddr)) 708 if (!is_swiotlb_buffer(paddr))
699 free_pages((unsigned long)vaddr, get_order(size)); 709 free_pages((unsigned long)vaddr, get_order(size));
700 else 710 else
701 /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ 711 /*
702 swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); 712 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
713 * DMA_ATTR_SKIP_CPU_SYNC is optional.
714 */
715 swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE,
716 DMA_ATTR_SKIP_CPU_SYNC);
703} 717}
704EXPORT_SYMBOL(swiotlb_free_coherent); 718EXPORT_SYMBOL(swiotlb_free_coherent);
705 719
@@ -714,8 +728,8 @@ swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
714 * When the mapping is small enough return a static buffer to limit 728 * When the mapping is small enough return a static buffer to limit
715 * the damage, or panic when the transfer is too big. 729 * the damage, or panic when the transfer is too big.
716 */ 730 */
717 printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " 731 dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
718 "device %s\n", size, dev ? dev_name(dev) : "?"); 732 size);
719 733
720 if (size <= io_tlb_overflow || !do_panic) 734 if (size <= io_tlb_overflow || !do_panic)
721 return; 735 return;
@@ -755,7 +769,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
755 trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); 769 trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
756 770
757 /* Oh well, have to allocate and map a bounce buffer. */ 771 /* Oh well, have to allocate and map a bounce buffer. */
758 map = map_single(dev, phys, size, dir); 772 map = map_single(dev, phys, size, dir, attrs);
759 if (map == SWIOTLB_MAP_ERROR) { 773 if (map == SWIOTLB_MAP_ERROR) {
760 swiotlb_full(dev, size, dir, 1); 774 swiotlb_full(dev, size, dir, 1);
761 return phys_to_dma(dev, io_tlb_overflow_buffer); 775 return phys_to_dma(dev, io_tlb_overflow_buffer);
@@ -764,12 +778,13 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
764 dev_addr = phys_to_dma(dev, map); 778 dev_addr = phys_to_dma(dev, map);
765 779
766 /* Ensure that the address returned is DMA'ble */ 780 /* Ensure that the address returned is DMA'ble */
767 if (!dma_capable(dev, dev_addr, size)) { 781 if (dma_capable(dev, dev_addr, size))
768 swiotlb_tbl_unmap_single(dev, map, size, dir); 782 return dev_addr;
769 return phys_to_dma(dev, io_tlb_overflow_buffer);
770 }
771 783
772 return dev_addr; 784 attrs |= DMA_ATTR_SKIP_CPU_SYNC;
785 swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
786
787 return phys_to_dma(dev, io_tlb_overflow_buffer);
773} 788}
774EXPORT_SYMBOL_GPL(swiotlb_map_page); 789EXPORT_SYMBOL_GPL(swiotlb_map_page);
775 790
@@ -782,14 +797,15 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
782 * whatever the device wrote there. 797 * whatever the device wrote there.
783 */ 798 */
784static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, 799static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
785 size_t size, enum dma_data_direction dir) 800 size_t size, enum dma_data_direction dir,
801 unsigned long attrs)
786{ 802{
787 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 803 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
788 804
789 BUG_ON(dir == DMA_NONE); 805 BUG_ON(dir == DMA_NONE);
790 806
791 if (is_swiotlb_buffer(paddr)) { 807 if (is_swiotlb_buffer(paddr)) {
792 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); 808 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
793 return; 809 return;
794 } 810 }
795 811
@@ -809,7 +825,7 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
809 size_t size, enum dma_data_direction dir, 825 size_t size, enum dma_data_direction dir,
810 unsigned long attrs) 826 unsigned long attrs)
811{ 827{
812 unmap_single(hwdev, dev_addr, size, dir); 828 unmap_single(hwdev, dev_addr, size, dir, attrs);
813} 829}
814EXPORT_SYMBOL_GPL(swiotlb_unmap_page); 830EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
815 831
@@ -891,11 +907,12 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
891 if (swiotlb_force || 907 if (swiotlb_force ||
892 !dma_capable(hwdev, dev_addr, sg->length)) { 908 !dma_capable(hwdev, dev_addr, sg->length)) {
893 phys_addr_t map = map_single(hwdev, sg_phys(sg), 909 phys_addr_t map = map_single(hwdev, sg_phys(sg),
894 sg->length, dir); 910 sg->length, dir, attrs);
895 if (map == SWIOTLB_MAP_ERROR) { 911 if (map == SWIOTLB_MAP_ERROR) {
896 /* Don't panic here, we expect map_sg users 912 /* Don't panic here, we expect map_sg users
897 to do proper error handling. */ 913 to do proper error handling. */
898 swiotlb_full(hwdev, sg->length, dir, 0); 914 swiotlb_full(hwdev, sg->length, dir, 0);
915 attrs |= DMA_ATTR_SKIP_CPU_SYNC;
899 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 916 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
900 attrs); 917 attrs);
901 sg_dma_len(sgl) = 0; 918 sg_dma_len(sgl) = 0;
@@ -910,14 +927,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
910} 927}
911EXPORT_SYMBOL(swiotlb_map_sg_attrs); 928EXPORT_SYMBOL(swiotlb_map_sg_attrs);
912 929
913int
914swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
915 enum dma_data_direction dir)
916{
917 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, 0);
918}
919EXPORT_SYMBOL(swiotlb_map_sg);
920
921/* 930/*
922 * Unmap a set of streaming mode DMA translations. Again, cpu read rules 931 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
923 * concerning calls here are the same as for swiotlb_unmap_page() above. 932 * concerning calls here are the same as for swiotlb_unmap_page() above.
@@ -933,19 +942,11 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
933 BUG_ON(dir == DMA_NONE); 942 BUG_ON(dir == DMA_NONE);
934 943
935 for_each_sg(sgl, sg, nelems, i) 944 for_each_sg(sgl, sg, nelems, i)
936 unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); 945 unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
937 946 attrs);
938} 947}
939EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); 948EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
940 949
941void
942swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
943 enum dma_data_direction dir)
944{
945 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, 0);
946}
947EXPORT_SYMBOL(swiotlb_unmap_sg);
948
949/* 950/*
950 * Make physical memory consistent for a set of streaming mode DMA translations 951 * Make physical memory consistent for a set of streaming mode DMA translations
951 * after a transfer. 952 * after a transfer.
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 5e51872b3fc1..fbdf87920093 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -20,6 +20,11 @@
20#include <linux/uaccess.h> 20#include <linux/uaccess.h>
21#include <linux/module.h> 21#include <linux/module.h>
22 22
23/*
24 * Note: test functions are marked noinline so that their names appear in
25 * reports.
26 */
27
23static noinline void __init kmalloc_oob_right(void) 28static noinline void __init kmalloc_oob_right(void)
24{ 29{
25 char *ptr; 30 char *ptr;
@@ -411,6 +416,29 @@ static noinline void __init copy_user_test(void)
411 kfree(kmem); 416 kfree(kmem);
412} 417}
413 418
419static noinline void __init use_after_scope_test(void)
420{
421 volatile char *volatile p;
422
423 pr_info("use-after-scope on int\n");
424 {
425 int local = 0;
426
427 p = (char *)&local;
428 }
429 p[0] = 1;
430 p[3] = 1;
431
432 pr_info("use-after-scope on array\n");
433 {
434 char local[1024] = {0};
435
436 p = local;
437 }
438 p[0] = 1;
439 p[1023] = 1;
440}
441
414static int __init kmalloc_tests_init(void) 442static int __init kmalloc_tests_init(void)
415{ 443{
416 kmalloc_oob_right(); 444 kmalloc_oob_right();
@@ -436,6 +464,7 @@ static int __init kmalloc_tests_init(void)
436 kasan_global_oob(); 464 kasan_global_oob();
437 ksize_unpoisons_memory(); 465 ksize_unpoisons_memory();
438 copy_user_test(); 466 copy_user_test();
467 use_after_scope_test();
439 return -EAGAIN; 468 return -EAGAIN;
440} 469}
441 470
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 782ae8ca2c06..adc6ee0a5126 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -48,7 +48,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
48 while (*p) { 48 while (*p) {
49 parent = *p; 49 parent = *p;
50 ptr = rb_entry(parent, struct timerqueue_node, node); 50 ptr = rb_entry(parent, struct timerqueue_node, node);
51 if (node->expires.tv64 < ptr->expires.tv64) 51 if (node->expires < ptr->expires)
52 p = &(*p)->rb_left; 52 p = &(*p)->rb_left;
53 else 53 else
54 p = &(*p)->rb_right; 54 p = &(*p)->rb_right;
@@ -56,7 +56,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
56 rb_link_node(&node->node, parent, p); 56 rb_link_node(&node->node, parent, p);
57 rb_insert_color(&node->node, &head->head); 57 rb_insert_color(&node->node, &head->head);
58 58
59 if (!head->next || node->expires.tv64 < head->next->expires.tv64) { 59 if (!head->next || node->expires < head->next->expires) {
60 head->next = node; 60 head->next = node;
61 return true; 61 return true;
62 } 62 }