aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug34
-rw-r--r--lib/Kconfig.ubsan3
-rw-r--r--lib/Makefile1
-rw-r--r--lib/cpu-notifier-error-inject.c84
-rw-r--r--lib/extable.c2
-rw-r--r--lib/iov_iter.c149
-rw-r--r--lib/kstrtox.c2
-rw-r--r--lib/radix-tree.c891
-rw-r--r--lib/raid6/avx2.c232
-rw-r--r--lib/timerqueue.c4
10 files changed, 920 insertions, 482 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e6327d102184..b06848a104e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -26,7 +26,7 @@ config CONSOLE_LOGLEVEL_DEFAULT
26 the kernel bootargs. loglevel=<x> continues to override whatever 26 the kernel bootargs. loglevel=<x> continues to override whatever
27 value is specified here as well. 27 value is specified here as well.
28 28
29 Note: This does not affect the log level of un-prefixed prink() 29 Note: This does not affect the log level of un-prefixed printk()
30 usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT 30 usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT
31 option. 31 option.
32 32
@@ -194,8 +194,8 @@ config GDB_SCRIPTS
194 build directory. If you load vmlinux into gdb, the helper 194 build directory. If you load vmlinux into gdb, the helper
195 scripts will be automatically imported by gdb as well, and 195 scripts will be automatically imported by gdb as well, and
196 additional functions are available to analyze a Linux kernel 196 additional functions are available to analyze a Linux kernel
197 instance. See Documentation/gdb-kernel-debugging.txt for further 197 instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
198 details. 198 for further details.
199 199
200config ENABLE_WARN_DEPRECATED 200config ENABLE_WARN_DEPRECATED
201 bool "Enable __deprecated logic" 201 bool "Enable __deprecated logic"
@@ -542,7 +542,7 @@ config DEBUG_KMEMLEAK
542 difference being that the orphan objects are not freed but 542 difference being that the orphan objects are not freed but
543 only shown in /sys/kernel/debug/kmemleak. Enabling this 543 only shown in /sys/kernel/debug/kmemleak. Enabling this
544 feature will introduce an overhead to memory 544 feature will introduce an overhead to memory
545 allocations. See Documentation/kmemleak.txt for more 545 allocations. See Documentation/dev-tools/kmemleak.rst for more
546 details. 546 details.
547 547
548 Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances 548 Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
@@ -739,7 +739,7 @@ config KCOV
739 different machines and across reboots. If you need stable PC values, 739 different machines and across reboots. If you need stable PC values,
740 disable RANDOMIZE_BASE. 740 disable RANDOMIZE_BASE.
741 741
742 For more details, see Documentation/kcov.txt. 742 For more details, see Documentation/dev-tools/kcov.rst.
743 743
744config KCOV_INSTRUMENT_ALL 744config KCOV_INSTRUMENT_ALL
745 bool "Instrument all code by default" 745 bool "Instrument all code by default"
@@ -1538,30 +1538,6 @@ config NOTIFIER_ERROR_INJECTION
1538 1538
1539 Say N if unsure. 1539 Say N if unsure.
1540 1540
1541config CPU_NOTIFIER_ERROR_INJECT
1542 tristate "CPU notifier error injection module"
1543 depends on HOTPLUG_CPU && NOTIFIER_ERROR_INJECTION
1544 help
1545 This option provides a kernel module that can be used to test
1546 the error handling of the cpu notifiers by injecting artificial
1547 errors to CPU notifier chain callbacks. It is controlled through
1548 debugfs interface under /sys/kernel/debug/notifier-error-inject/cpu
1549
1550 If the notifier call chain should be failed with some events
1551 notified, write the error code to "actions/<notifier event>/error".
1552
1553 Example: Inject CPU offline error (-1 == -EPERM)
1554
1555 # cd /sys/kernel/debug/notifier-error-inject/cpu
1556 # echo -1 > actions/CPU_DOWN_PREPARE/error
1557 # echo 0 > /sys/devices/system/cpu/cpu1/online
1558 bash: echo: write error: Operation not permitted
1559
1560 To compile this code as a module, choose M here: the module will
1561 be called cpu-notifier-error-inject.
1562
1563 If unsure, say N.
1564
1565config PM_NOTIFIER_ERROR_INJECT 1541config PM_NOTIFIER_ERROR_INJECT
1566 tristate "PM notifier error injection module" 1542 tristate "PM notifier error injection module"
1567 depends on PM && NOTIFIER_ERROR_INJECTION 1543 depends on PM && NOTIFIER_ERROR_INJECTION
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index bc6e651df68c..a669c193b878 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -10,7 +10,8 @@ config UBSAN
10 This option enables undefined behaviour sanity checker 10 This option enables undefined behaviour sanity checker
11 Compile-time instrumentation is used to detect various undefined 11 Compile-time instrumentation is used to detect various undefined
12 behaviours in runtime. Various types of checks may be enabled 12 behaviours in runtime. Various types of checks may be enabled
13 via boot parameter ubsan_handle (see: Documentation/ubsan.txt). 13 via boot parameter ubsan_handle
14 (see: Documentation/dev-tools/ubsan.rst).
14 15
15config UBSAN_SANITIZE_ALL 16config UBSAN_SANITIZE_ALL
16 bool "Enable instrumentation for the entire kernel" 17 bool "Enable instrumentation for the entire kernel"
diff --git a/lib/Makefile b/lib/Makefile
index 50144a3aeebd..bc4073a8cd08 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -128,7 +128,6 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o
128obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o 128obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
129obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 129obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
130obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o 130obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
131obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
132obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o 131obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
133obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o 132obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
134obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o 133obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
deleted file mode 100644
index 0e2c9a1e958a..000000000000
--- a/lib/cpu-notifier-error-inject.c
+++ /dev/null
@@ -1,84 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/cpu.h>
4
5#include "notifier-error-inject.h"
6
7static int priority;
8module_param(priority, int, 0);
9MODULE_PARM_DESC(priority, "specify cpu notifier priority");
10
11#define UP_PREPARE 0
12#define UP_PREPARE_FROZEN 0
13#define DOWN_PREPARE 0
14#define DOWN_PREPARE_FROZEN 0
15
16static struct notifier_err_inject cpu_notifier_err_inject = {
17 .actions = {
18 { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) },
19 { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) },
20 { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) },
21 { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) },
22 {}
23 }
24};
25
26static int notf_err_handle(struct notifier_err_inject_action *action)
27{
28 int ret;
29
30 ret = action->error;
31 if (ret)
32 pr_info("Injecting error (%d) to %s\n", ret, action->name);
33 return ret;
34}
35
36static int notf_err_inj_up_prepare(unsigned int cpu)
37{
38 if (!cpuhp_tasks_frozen)
39 return notf_err_handle(&cpu_notifier_err_inject.actions[0]);
40 else
41 return notf_err_handle(&cpu_notifier_err_inject.actions[1]);
42}
43
44static int notf_err_inj_dead(unsigned int cpu)
45{
46 if (!cpuhp_tasks_frozen)
47 return notf_err_handle(&cpu_notifier_err_inject.actions[2]);
48 else
49 return notf_err_handle(&cpu_notifier_err_inject.actions[3]);
50}
51
52static struct dentry *dir;
53
54static int err_inject_init(void)
55{
56 int err;
57
58 dir = notifier_err_inject_init("cpu", notifier_err_inject_dir,
59 &cpu_notifier_err_inject, priority);
60 if (IS_ERR(dir))
61 return PTR_ERR(dir);
62
63 err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE,
64 "cpu-err-notif:prepare",
65 notf_err_inj_up_prepare,
66 notf_err_inj_dead);
67 if (err)
68 debugfs_remove_recursive(dir);
69
70 return err;
71}
72
73static void err_inject_exit(void)
74{
75 cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE);
76 debugfs_remove_recursive(dir);
77}
78
79module_init(err_inject_init);
80module_exit(err_inject_exit);
81
82MODULE_DESCRIPTION("CPU notifier error injection module");
83MODULE_LICENSE("GPL");
84MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/extable.c b/lib/extable.c
index 0be02ad561e9..62968daa66a9 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -12,7 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/sort.h> 14#include <linux/sort.h>
15#include <asm/uaccess.h> 15#include <linux/uaccess.h>
16 16
17#ifndef ARCH_HAS_RELATIVE_EXTABLE 17#ifndef ARCH_HAS_RELATIVE_EXTABLE
18#define ex_to_insn(x) ((x)->insn) 18#define ex_to_insn(x) ((x)->insn)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 691a52b634fe..25f572303801 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -73,19 +73,21 @@
73} 73}
74 74
75#define iterate_all_kinds(i, n, v, I, B, K) { \ 75#define iterate_all_kinds(i, n, v, I, B, K) { \
76 size_t skip = i->iov_offset; \ 76 if (likely(n)) { \
77 if (unlikely(i->type & ITER_BVEC)) { \ 77 size_t skip = i->iov_offset; \
78 struct bio_vec v; \ 78 if (unlikely(i->type & ITER_BVEC)) { \
79 struct bvec_iter __bi; \ 79 struct bio_vec v; \
80 iterate_bvec(i, n, v, __bi, skip, (B)) \ 80 struct bvec_iter __bi; \
81 } else if (unlikely(i->type & ITER_KVEC)) { \ 81 iterate_bvec(i, n, v, __bi, skip, (B)) \
82 const struct kvec *kvec; \ 82 } else if (unlikely(i->type & ITER_KVEC)) { \
83 struct kvec v; \ 83 const struct kvec *kvec; \
84 iterate_kvec(i, n, v, kvec, skip, (K)) \ 84 struct kvec v; \
85 } else { \ 85 iterate_kvec(i, n, v, kvec, skip, (K)) \
86 const struct iovec *iov; \ 86 } else { \
87 struct iovec v; \ 87 const struct iovec *iov; \
88 iterate_iovec(i, n, v, iov, skip, (I)) \ 88 struct iovec v; \
89 iterate_iovec(i, n, v, iov, skip, (I)) \
90 } \
89 } \ 91 } \
90} 92}
91 93
@@ -569,6 +571,31 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
569} 571}
570EXPORT_SYMBOL(copy_from_iter); 572EXPORT_SYMBOL(copy_from_iter);
571 573
574bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
575{
576 char *to = addr;
577 if (unlikely(i->type & ITER_PIPE)) {
578 WARN_ON(1);
579 return false;
580 }
581 if (unlikely(i->count < bytes))
582 return false;
583
584 iterate_all_kinds(i, bytes, v, ({
585 if (__copy_from_user((to += v.iov_len) - v.iov_len,
586 v.iov_base, v.iov_len))
587 return false;
588 0;}),
589 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
590 v.bv_offset, v.bv_len),
591 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
592 )
593
594 iov_iter_advance(i, bytes);
595 return true;
596}
597EXPORT_SYMBOL(copy_from_iter_full);
598
572size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 599size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
573{ 600{
574 char *to = addr; 601 char *to = addr;
@@ -588,6 +615,30 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
588} 615}
589EXPORT_SYMBOL(copy_from_iter_nocache); 616EXPORT_SYMBOL(copy_from_iter_nocache);
590 617
618bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
619{
620 char *to = addr;
621 if (unlikely(i->type & ITER_PIPE)) {
622 WARN_ON(1);
623 return false;
624 }
625 if (unlikely(i->count < bytes))
626 return false;
627 iterate_all_kinds(i, bytes, v, ({
628 if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
629 v.iov_base, v.iov_len))
630 return false;
631 0;}),
632 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
633 v.bv_offset, v.bv_len),
634 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
635 )
636
637 iov_iter_advance(i, bytes);
638 return true;
639}
640EXPORT_SYMBOL(copy_from_iter_full_nocache);
641
591size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 642size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
592 struct iov_iter *i) 643 struct iov_iter *i)
593{ 644{
@@ -788,11 +839,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
788 unsigned long res = 0; 839 unsigned long res = 0;
789 size_t size = i->count; 840 size_t size = i->count;
790 841
791 if (!size)
792 return 0;
793
794 if (unlikely(i->type & ITER_PIPE)) { 842 if (unlikely(i->type & ITER_PIPE)) {
795 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) 843 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
796 return size | i->iov_offset; 844 return size | i->iov_offset;
797 return size; 845 return size;
798 } 846 }
@@ -807,10 +855,8 @@ EXPORT_SYMBOL(iov_iter_alignment);
807 855
808unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 856unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
809{ 857{
810 unsigned long res = 0; 858 unsigned long res = 0;
811 size_t size = i->count; 859 size_t size = i->count;
812 if (!size)
813 return 0;
814 860
815 if (unlikely(i->type & ITER_PIPE)) { 861 if (unlikely(i->type & ITER_PIPE)) {
816 WARN_ON(1); 862 WARN_ON(1);
@@ -825,7 +871,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
825 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 871 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
826 (size != v.iov_len ? size : 0)) 872 (size != v.iov_len ? size : 0))
827 ); 873 );
828 return res; 874 return res;
829} 875}
830EXPORT_SYMBOL(iov_iter_gap_alignment); 876EXPORT_SYMBOL(iov_iter_gap_alignment);
831 877
@@ -859,6 +905,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
859 size_t capacity; 905 size_t capacity;
860 int idx; 906 int idx;
861 907
908 if (!maxsize)
909 return 0;
910
862 if (!sanity(i)) 911 if (!sanity(i))
863 return -EFAULT; 912 return -EFAULT;
864 913
@@ -877,9 +926,6 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
877 if (maxsize > i->count) 926 if (maxsize > i->count)
878 maxsize = i->count; 927 maxsize = i->count;
879 928
880 if (!maxsize)
881 return 0;
882
883 if (unlikely(i->type & ITER_PIPE)) 929 if (unlikely(i->type & ITER_PIPE))
884 return pipe_get_pages(i, pages, maxsize, maxpages, start); 930 return pipe_get_pages(i, pages, maxsize, maxpages, start);
885 iterate_all_kinds(i, maxsize, v, ({ 931 iterate_all_kinds(i, maxsize, v, ({
@@ -926,6 +972,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
926 int idx; 972 int idx;
927 int npages; 973 int npages;
928 974
975 if (!maxsize)
976 return 0;
977
929 if (!sanity(i)) 978 if (!sanity(i))
930 return -EFAULT; 979 return -EFAULT;
931 980
@@ -957,9 +1006,6 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
957 if (maxsize > i->count) 1006 if (maxsize > i->count)
958 maxsize = i->count; 1007 maxsize = i->count;
959 1008
960 if (!maxsize)
961 return 0;
962
963 if (unlikely(i->type & ITER_PIPE)) 1009 if (unlikely(i->type & ITER_PIPE))
964 return pipe_get_pages_alloc(i, pages, maxsize, start); 1010 return pipe_get_pages_alloc(i, pages, maxsize, start);
965 iterate_all_kinds(i, maxsize, v, ({ 1011 iterate_all_kinds(i, maxsize, v, ({
@@ -1009,7 +1055,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1009 } 1055 }
1010 iterate_and_advance(i, bytes, v, ({ 1056 iterate_and_advance(i, bytes, v, ({
1011 int err = 0; 1057 int err = 0;
1012 next = csum_and_copy_from_user(v.iov_base, 1058 next = csum_and_copy_from_user(v.iov_base,
1013 (to += v.iov_len) - v.iov_len, 1059 (to += v.iov_len) - v.iov_len,
1014 v.iov_len, 0, &err); 1060 v.iov_len, 0, &err);
1015 if (!err) { 1061 if (!err) {
@@ -1038,6 +1084,51 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1038} 1084}
1039EXPORT_SYMBOL(csum_and_copy_from_iter); 1085EXPORT_SYMBOL(csum_and_copy_from_iter);
1040 1086
1087bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1088 struct iov_iter *i)
1089{
1090 char *to = addr;
1091 __wsum sum, next;
1092 size_t off = 0;
1093 sum = *csum;
1094 if (unlikely(i->type & ITER_PIPE)) {
1095 WARN_ON(1);
1096 return false;
1097 }
1098 if (unlikely(i->count < bytes))
1099 return false;
1100 iterate_all_kinds(i, bytes, v, ({
1101 int err = 0;
1102 next = csum_and_copy_from_user(v.iov_base,
1103 (to += v.iov_len) - v.iov_len,
1104 v.iov_len, 0, &err);
1105 if (err)
1106 return false;
1107 sum = csum_block_add(sum, next, off);
1108 off += v.iov_len;
1109 0;
1110 }), ({
1111 char *p = kmap_atomic(v.bv_page);
1112 next = csum_partial_copy_nocheck(p + v.bv_offset,
1113 (to += v.bv_len) - v.bv_len,
1114 v.bv_len, 0);
1115 kunmap_atomic(p);
1116 sum = csum_block_add(sum, next, off);
1117 off += v.bv_len;
1118 }),({
1119 next = csum_partial_copy_nocheck(v.iov_base,
1120 (to += v.iov_len) - v.iov_len,
1121 v.iov_len, 0);
1122 sum = csum_block_add(sum, next, off);
1123 off += v.iov_len;
1124 })
1125 )
1126 *csum = sum;
1127 iov_iter_advance(i, bytes);
1128 return true;
1129}
1130EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1131
1041size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, 1132size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
1042 struct iov_iter *i) 1133 struct iov_iter *i)
1043{ 1134{
@@ -1052,7 +1143,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
1052 iterate_and_advance(i, bytes, v, ({ 1143 iterate_and_advance(i, bytes, v, ({
1053 int err = 0; 1144 int err = 0;
1054 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1145 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1055 v.iov_base, 1146 v.iov_base,
1056 v.iov_len, 0, &err); 1147 v.iov_len, 0, &err);
1057 if (!err) { 1148 if (!err) {
1058 sum = csum_block_add(sum, next, off); 1149 sum = csum_block_add(sum, next, off);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index b8e2080c1a47..bf85e05ce858 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -17,7 +17,7 @@
17#include <linux/math64.h> 17#include <linux/math64.h>
18#include <linux/export.h> 18#include <linux/export.h>
19#include <linux/types.h> 19#include <linux/types.h>
20#include <asm/uaccess.h> 20#include <linux/uaccess.h>
21#include "kstrtox.h" 21#include "kstrtox.h"
22 22
23const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) 23const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 2e8c6f7aa56e..6f382e07de77 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,6 +22,7 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#include <linux/cpu.h>
25#include <linux/errno.h> 26#include <linux/errno.h>
26#include <linux/init.h> 27#include <linux/init.h>
27#include <linux/kernel.h> 28#include <linux/kernel.h>
@@ -30,7 +31,6 @@
30#include <linux/percpu.h> 31#include <linux/percpu.h>
31#include <linux/slab.h> 32#include <linux/slab.h>
32#include <linux/kmemleak.h> 33#include <linux/kmemleak.h>
33#include <linux/notifier.h>
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/bitops.h> 36#include <linux/bitops.h>
@@ -69,6 +69,11 @@ struct radix_tree_preload {
69}; 69};
70static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 70static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
71 71
72static inline struct radix_tree_node *entry_to_node(void *ptr)
73{
74 return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
75}
76
72static inline void *node_to_entry(void *ptr) 77static inline void *node_to_entry(void *ptr)
73{ 78{
74 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 79 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
@@ -191,13 +196,12 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
191 * Returns next bit offset, or size if nothing found. 196 * Returns next bit offset, or size if nothing found.
192 */ 197 */
193static __always_inline unsigned long 198static __always_inline unsigned long
194radix_tree_find_next_bit(const unsigned long *addr, 199radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
195 unsigned long size, unsigned long offset) 200 unsigned long offset)
196{ 201{
197 if (!__builtin_constant_p(size)) 202 const unsigned long *addr = node->tags[tag];
198 return find_next_bit(addr, size, offset);
199 203
200 if (offset < size) { 204 if (offset < RADIX_TREE_MAP_SIZE) {
201 unsigned long tmp; 205 unsigned long tmp;
202 206
203 addr += offset / BITS_PER_LONG; 207 addr += offset / BITS_PER_LONG;
@@ -205,14 +209,32 @@ radix_tree_find_next_bit(const unsigned long *addr,
205 if (tmp) 209 if (tmp)
206 return __ffs(tmp) + offset; 210 return __ffs(tmp) + offset;
207 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); 211 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
208 while (offset < size) { 212 while (offset < RADIX_TREE_MAP_SIZE) {
209 tmp = *++addr; 213 tmp = *++addr;
210 if (tmp) 214 if (tmp)
211 return __ffs(tmp) + offset; 215 return __ffs(tmp) + offset;
212 offset += BITS_PER_LONG; 216 offset += BITS_PER_LONG;
213 } 217 }
214 } 218 }
215 return size; 219 return RADIX_TREE_MAP_SIZE;
220}
221
222static unsigned int iter_offset(const struct radix_tree_iter *iter)
223{
224 return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
225}
226
227/*
228 * The maximum index which can be stored in a radix tree
229 */
230static inline unsigned long shift_maxindex(unsigned int shift)
231{
232 return (RADIX_TREE_MAP_SIZE << shift) - 1;
233}
234
235static inline unsigned long node_maxindex(struct radix_tree_node *node)
236{
237 return shift_maxindex(node->shift);
216} 238}
217 239
218#ifndef __KERNEL__ 240#ifndef __KERNEL__
@@ -220,10 +242,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
220{ 242{
221 unsigned long i; 243 unsigned long i;
222 244
223 pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d exceptional %d parent %p\n", 245 pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n",
224 node, node->offset, 246 node, node->offset, index, index | node_maxindex(node),
247 node->parent,
225 node->tags[0][0], node->tags[1][0], node->tags[2][0], 248 node->tags[0][0], node->tags[1][0], node->tags[2][0],
226 node->shift, node->count, node->exceptional, node->parent); 249 node->shift, node->count, node->exceptional);
227 250
228 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { 251 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
229 unsigned long first = index | (i << node->shift); 252 unsigned long first = index | (i << node->shift);
@@ -231,14 +254,16 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
231 void *entry = node->slots[i]; 254 void *entry = node->slots[i];
232 if (!entry) 255 if (!entry)
233 continue; 256 continue;
234 if (is_sibling_entry(node, entry)) { 257 if (entry == RADIX_TREE_RETRY) {
235 pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", 258 pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n",
236 entry, i, 259 i, first, last, node);
237 *(void **)entry_to_node(entry),
238 first, last);
239 } else if (!radix_tree_is_internal_node(entry)) { 260 } else if (!radix_tree_is_internal_node(entry)) {
240 pr_debug("radix entry %p offset %ld indices %ld-%ld\n", 261 pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
241 entry, i, first, last); 262 entry, i, first, last, node);
263 } else if (is_sibling_entry(node, entry)) {
264 pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
265 entry, i, first, last, node,
266 *(void **)entry_to_node(entry));
242 } else { 267 } else {
243 dump_node(entry_to_node(entry), first); 268 dump_node(entry_to_node(entry), first);
244 } 269 }
@@ -262,7 +287,10 @@ static void radix_tree_dump(struct radix_tree_root *root)
262 * that the caller has pinned this thread of control to the current CPU. 287 * that the caller has pinned this thread of control to the current CPU.
263 */ 288 */
264static struct radix_tree_node * 289static struct radix_tree_node *
265radix_tree_node_alloc(struct radix_tree_root *root) 290radix_tree_node_alloc(struct radix_tree_root *root,
291 struct radix_tree_node *parent,
292 unsigned int shift, unsigned int offset,
293 unsigned int count, unsigned int exceptional)
266{ 294{
267 struct radix_tree_node *ret = NULL; 295 struct radix_tree_node *ret = NULL;
268 gfp_t gfp_mask = root_gfp_mask(root); 296 gfp_t gfp_mask = root_gfp_mask(root);
@@ -307,6 +335,13 @@ radix_tree_node_alloc(struct radix_tree_root *root)
307 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); 335 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
308out: 336out:
309 BUG_ON(radix_tree_is_internal_node(ret)); 337 BUG_ON(radix_tree_is_internal_node(ret));
338 if (ret) {
339 ret->parent = parent;
340 ret->shift = shift;
341 ret->offset = offset;
342 ret->count = count;
343 ret->exceptional = exceptional;
344 }
310 return ret; 345 return ret;
311} 346}
312 347
@@ -314,17 +349,15 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
314{ 349{
315 struct radix_tree_node *node = 350 struct radix_tree_node *node =
316 container_of(head, struct radix_tree_node, rcu_head); 351 container_of(head, struct radix_tree_node, rcu_head);
317 int i;
318 352
319 /* 353 /*
320 * must only free zeroed nodes into the slab. radix_tree_shrink 354 * Must only free zeroed nodes into the slab. We can be left with
321 * can leave us with a non-NULL entry in the first slot, so clear 355 * non-NULL entries by radix_tree_free_nodes, so clear the entries
322 * that here to make sure. 356 * and tags here.
323 */ 357 */
324 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) 358 memset(node->slots, 0, sizeof(node->slots));
325 tag_clear(node, i, 0); 359 memset(node->tags, 0, sizeof(node->tags));
326 360 INIT_LIST_HEAD(&node->private_list);
327 node->slots[0] = NULL;
328 361
329 kmem_cache_free(radix_tree_node_cachep, node); 362 kmem_cache_free(radix_tree_node_cachep, node);
330} 363}
@@ -344,7 +377,7 @@ radix_tree_node_free(struct radix_tree_node *node)
344 * To make use of this facility, the radix tree must be initialised without 377 * To make use of this facility, the radix tree must be initialised without
345 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). 378 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
346 */ 379 */
347static int __radix_tree_preload(gfp_t gfp_mask, int nr) 380static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
348{ 381{
349 struct radix_tree_preload *rtp; 382 struct radix_tree_preload *rtp;
350 struct radix_tree_node *node; 383 struct radix_tree_node *node;
@@ -410,6 +443,28 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
410} 443}
411EXPORT_SYMBOL(radix_tree_maybe_preload); 444EXPORT_SYMBOL(radix_tree_maybe_preload);
412 445
446#ifdef CONFIG_RADIX_TREE_MULTIORDER
447/*
448 * Preload with enough objects to ensure that we can split a single entry
449 * of order @old_order into many entries of size @new_order
450 */
451int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
452 gfp_t gfp_mask)
453{
454 unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
455 unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
456 (new_order / RADIX_TREE_MAP_SHIFT);
457 unsigned nr = 0;
458
459 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
460 BUG_ON(new_order >= old_order);
461
462 while (layers--)
463 nr = nr * RADIX_TREE_MAP_SIZE + 1;
464 return __radix_tree_preload(gfp_mask, top * nr);
465}
466#endif
467
413/* 468/*
414 * The same as function above, but preload number of nodes required to insert 469 * The same as function above, but preload number of nodes required to insert
415 * (1 << order) continuous naturally-aligned elements. 470 * (1 << order) continuous naturally-aligned elements.
@@ -455,19 +510,6 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
455 return __radix_tree_preload(gfp_mask, nr_nodes); 510 return __radix_tree_preload(gfp_mask, nr_nodes);
456} 511}
457 512
458/*
459 * The maximum index which can be stored in a radix tree
460 */
461static inline unsigned long shift_maxindex(unsigned int shift)
462{
463 return (RADIX_TREE_MAP_SIZE << shift) - 1;
464}
465
466static inline unsigned long node_maxindex(struct radix_tree_node *node)
467{
468 return shift_maxindex(node->shift);
469}
470
471static unsigned radix_tree_load_root(struct radix_tree_root *root, 513static unsigned radix_tree_load_root(struct radix_tree_root *root,
472 struct radix_tree_node **nodep, unsigned long *maxindex) 514 struct radix_tree_node **nodep, unsigned long *maxindex)
473{ 515{
@@ -505,8 +547,8 @@ static int radix_tree_extend(struct radix_tree_root *root,
505 goto out; 547 goto out;
506 548
507 do { 549 do {
508 struct radix_tree_node *node = radix_tree_node_alloc(root); 550 struct radix_tree_node *node = radix_tree_node_alloc(root,
509 551 NULL, shift, 0, 1, 0);
510 if (!node) 552 if (!node)
511 return -ENOMEM; 553 return -ENOMEM;
512 554
@@ -517,16 +559,11 @@ static int radix_tree_extend(struct radix_tree_root *root,
517 } 559 }
518 560
519 BUG_ON(shift > BITS_PER_LONG); 561 BUG_ON(shift > BITS_PER_LONG);
520 node->shift = shift;
521 node->offset = 0;
522 node->count = 1;
523 node->parent = NULL;
524 if (radix_tree_is_internal_node(slot)) { 562 if (radix_tree_is_internal_node(slot)) {
525 entry_to_node(slot)->parent = node; 563 entry_to_node(slot)->parent = node;
526 } else { 564 } else if (radix_tree_exceptional_entry(slot)) {
527 /* Moving an exceptional root->rnode to a node */ 565 /* Moving an exceptional root->rnode to a node */
528 if (radix_tree_exceptional_entry(slot)) 566 node->exceptional = 1;
529 node->exceptional = 1;
530 } 567 }
531 node->slots[0] = slot; 568 node->slots[0] = slot;
532 slot = node_to_entry(node); 569 slot = node_to_entry(node);
@@ -665,26 +702,24 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
665 shift = radix_tree_load_root(root, &child, &maxindex); 702 shift = radix_tree_load_root(root, &child, &maxindex);
666 703
667 /* Make sure the tree is high enough. */ 704 /* Make sure the tree is high enough. */
705 if (order > 0 && max == ((1UL << order) - 1))
706 max++;
668 if (max > maxindex) { 707 if (max > maxindex) {
669 int error = radix_tree_extend(root, max, shift); 708 int error = radix_tree_extend(root, max, shift);
670 if (error < 0) 709 if (error < 0)
671 return error; 710 return error;
672 shift = error; 711 shift = error;
673 child = root->rnode; 712 child = root->rnode;
674 if (order == shift)
675 shift += RADIX_TREE_MAP_SHIFT;
676 } 713 }
677 714
678 while (shift > order) { 715 while (shift > order) {
679 shift -= RADIX_TREE_MAP_SHIFT; 716 shift -= RADIX_TREE_MAP_SHIFT;
680 if (child == NULL) { 717 if (child == NULL) {
681 /* Have to add a child node. */ 718 /* Have to add a child node. */
682 child = radix_tree_node_alloc(root); 719 child = radix_tree_node_alloc(root, node, shift,
720 offset, 0, 0);
683 if (!child) 721 if (!child)
684 return -ENOMEM; 722 return -ENOMEM;
685 child->shift = shift;
686 child->offset = offset;
687 child->parent = node;
688 rcu_assign_pointer(*slot, node_to_entry(child)); 723 rcu_assign_pointer(*slot, node_to_entry(child));
689 if (node) 724 if (node)
690 node->count++; 725 node->count++;
@@ -697,31 +732,125 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
697 slot = &node->slots[offset]; 732 slot = &node->slots[offset];
698 } 733 }
699 734
735 if (nodep)
736 *nodep = node;
737 if (slotp)
738 *slotp = slot;
739 return 0;
740}
741
700#ifdef CONFIG_RADIX_TREE_MULTIORDER 742#ifdef CONFIG_RADIX_TREE_MULTIORDER
701 /* Insert pointers to the canonical entry */ 743/*
702 if (order > shift) { 744 * Free any nodes below this node. The tree is presumed to not need
703 unsigned i, n = 1 << (order - shift); 745 * shrinking, and any user data in the tree is presumed to not need a
746 * destructor called on it. If we need to add a destructor, we can
747 * add that functionality later. Note that we may not clear tags or
748 * slots from the tree as an RCU walker may still have a pointer into
749 * this subtree. We could replace the entries with RADIX_TREE_RETRY,
750 * but we'll still have to clear those in rcu_free.
751 */
752static void radix_tree_free_nodes(struct radix_tree_node *node)
753{
754 unsigned offset = 0;
755 struct radix_tree_node *child = entry_to_node(node);
756
757 for (;;) {
758 void *entry = child->slots[offset];
759 if (radix_tree_is_internal_node(entry) &&
760 !is_sibling_entry(child, entry)) {
761 child = entry_to_node(entry);
762 offset = 0;
763 continue;
764 }
765 offset++;
766 while (offset == RADIX_TREE_MAP_SIZE) {
767 struct radix_tree_node *old = child;
768 offset = child->offset + 1;
769 child = child->parent;
770 radix_tree_node_free(old);
771 if (old == entry_to_node(node))
772 return;
773 }
774 }
775}
776
777static inline int insert_entries(struct radix_tree_node *node, void **slot,
778 void *item, unsigned order, bool replace)
779{
780 struct radix_tree_node *child;
781 unsigned i, n, tag, offset, tags = 0;
782
783 if (node) {
784 if (order > node->shift)
785 n = 1 << (order - node->shift);
786 else
787 n = 1;
788 offset = get_slot_offset(node, slot);
789 } else {
790 n = 1;
791 offset = 0;
792 }
793
794 if (n > 1) {
704 offset = offset & ~(n - 1); 795 offset = offset & ~(n - 1);
705 slot = &node->slots[offset]; 796 slot = &node->slots[offset];
706 child = node_to_entry(slot); 797 }
707 for (i = 0; i < n; i++) { 798 child = node_to_entry(slot);
708 if (slot[i]) 799
800 for (i = 0; i < n; i++) {
801 if (slot[i]) {
802 if (replace) {
803 node->count--;
804 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
805 if (tag_get(node, tag, offset + i))
806 tags |= 1 << tag;
807 } else
709 return -EEXIST; 808 return -EEXIST;
710 } 809 }
810 }
711 811
712 for (i = 1; i < n; i++) { 812 for (i = 0; i < n; i++) {
813 struct radix_tree_node *old = slot[i];
814 if (i) {
713 rcu_assign_pointer(slot[i], child); 815 rcu_assign_pointer(slot[i], child);
714 node->count++; 816 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
817 if (tags & (1 << tag))
818 tag_clear(node, tag, offset + i);
819 } else {
820 rcu_assign_pointer(slot[i], item);
821 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
822 if (tags & (1 << tag))
823 tag_set(node, tag, offset);
715 } 824 }
825 if (radix_tree_is_internal_node(old) &&
826 !is_sibling_entry(node, old) &&
827 (old != RADIX_TREE_RETRY))
828 radix_tree_free_nodes(old);
829 if (radix_tree_exceptional_entry(old))
830 node->exceptional--;
716 } 831 }
717#endif 832 if (node) {
718 833 node->count += n;
719 if (nodep) 834 if (radix_tree_exceptional_entry(item))
720 *nodep = node; 835 node->exceptional += n;
721 if (slotp) 836 }
722 *slotp = slot; 837 return n;
723 return 0; 838}
839#else
840static inline int insert_entries(struct radix_tree_node *node, void **slot,
841 void *item, unsigned order, bool replace)
842{
843 if (*slot)
844 return -EEXIST;
845 rcu_assign_pointer(*slot, item);
846 if (node) {
847 node->count++;
848 if (radix_tree_exceptional_entry(item))
849 node->exceptional++;
850 }
851 return 1;
724} 852}
853#endif
725 854
726/** 855/**
727 * __radix_tree_insert - insert into a radix tree 856 * __radix_tree_insert - insert into a radix tree
@@ -744,15 +873,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
744 error = __radix_tree_create(root, index, order, &node, &slot); 873 error = __radix_tree_create(root, index, order, &node, &slot);
745 if (error) 874 if (error)
746 return error; 875 return error;
747 if (*slot != NULL) 876
748 return -EEXIST; 877 error = insert_entries(node, slot, item, order, false);
749 rcu_assign_pointer(*slot, item); 878 if (error < 0)
879 return error;
750 880
751 if (node) { 881 if (node) {
752 unsigned offset = get_slot_offset(node, slot); 882 unsigned offset = get_slot_offset(node, slot);
753 node->count++;
754 if (radix_tree_exceptional_entry(item))
755 node->exceptional++;
756 BUG_ON(tag_get(node, 0, offset)); 883 BUG_ON(tag_get(node, 0, offset));
757 BUG_ON(tag_get(node, 1, offset)); 884 BUG_ON(tag_get(node, 1, offset));
758 BUG_ON(tag_get(node, 2, offset)); 885 BUG_ON(tag_get(node, 2, offset));
@@ -850,6 +977,24 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
850} 977}
851EXPORT_SYMBOL(radix_tree_lookup); 978EXPORT_SYMBOL(radix_tree_lookup);
852 979
980static inline int slot_count(struct radix_tree_node *node,
981 void **slot)
982{
983 int n = 1;
984#ifdef CONFIG_RADIX_TREE_MULTIORDER
985 void *ptr = node_to_entry(slot);
986 unsigned offset = get_slot_offset(node, slot);
987 int i;
988
989 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
990 if (node->slots[offset + i] != ptr)
991 break;
992 n++;
993 }
994#endif
995 return n;
996}
997
853static void replace_slot(struct radix_tree_root *root, 998static void replace_slot(struct radix_tree_root *root,
854 struct radix_tree_node *node, 999 struct radix_tree_node *node,
855 void **slot, void *item, 1000 void **slot, void *item,
@@ -868,12 +1013,35 @@ static void replace_slot(struct radix_tree_root *root,
868 1013
869 if (node) { 1014 if (node) {
870 node->count += count; 1015 node->count += count;
871 node->exceptional += exceptional; 1016 if (exceptional) {
1017 exceptional *= slot_count(node, slot);
1018 node->exceptional += exceptional;
1019 }
872 } 1020 }
873 1021
874 rcu_assign_pointer(*slot, item); 1022 rcu_assign_pointer(*slot, item);
875} 1023}
876 1024
1025static inline void delete_sibling_entries(struct radix_tree_node *node,
1026 void **slot)
1027{
1028#ifdef CONFIG_RADIX_TREE_MULTIORDER
1029 bool exceptional = radix_tree_exceptional_entry(*slot);
1030 void *ptr = node_to_entry(slot);
1031 unsigned offset = get_slot_offset(node, slot);
1032 int i;
1033
1034 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
1035 if (node->slots[offset + i] != ptr)
1036 break;
1037 node->slots[offset + i] = NULL;
1038 node->count--;
1039 if (exceptional)
1040 node->exceptional--;
1041 }
1042#endif
1043}
1044
877/** 1045/**
878 * __radix_tree_replace - replace item in a slot 1046 * __radix_tree_replace - replace item in a slot
879 * @root: radix tree root 1047 * @root: radix tree root
@@ -891,6 +1059,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
891 void **slot, void *item, 1059 void **slot, void *item,
892 radix_tree_update_node_t update_node, void *private) 1060 radix_tree_update_node_t update_node, void *private)
893{ 1061{
1062 if (!item)
1063 delete_sibling_entries(node, slot);
894 /* 1064 /*
895 * This function supports replacing exceptional entries and 1065 * This function supports replacing exceptional entries and
896 * deleting entries, but that needs accounting against the 1066 * deleting entries, but that needs accounting against the
@@ -921,7 +1091,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
921 * NOTE: This cannot be used to switch between non-entries (empty slots), 1091 * NOTE: This cannot be used to switch between non-entries (empty slots),
922 * regular entries, and exceptional entries, as that requires accounting 1092 * regular entries, and exceptional entries, as that requires accounting
923 * inside the radix tree node. When switching from one type of entry or 1093 * inside the radix tree node. When switching from one type of entry or
924 * deleting, use __radix_tree_lookup() and __radix_tree_replace(). 1094 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
1095 * radix_tree_iter_replace().
925 */ 1096 */
926void radix_tree_replace_slot(struct radix_tree_root *root, 1097void radix_tree_replace_slot(struct radix_tree_root *root,
927 void **slot, void *item) 1098 void **slot, void *item)
@@ -930,6 +1101,164 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
930} 1101}
931 1102
932/** 1103/**
1104 * radix_tree_iter_replace - replace item in a slot
1105 * @root: radix tree root
1106 * @slot: pointer to slot
1107 * @item: new item to store in the slot.
1108 *
1109 * For use with radix_tree_split() and radix_tree_for_each_slot().
1110 * Caller must hold tree write locked across split and replacement.
1111 */
1112void radix_tree_iter_replace(struct radix_tree_root *root,
1113 const struct radix_tree_iter *iter, void **slot, void *item)
1114{
1115 __radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
1116}
1117
1118#ifdef CONFIG_RADIX_TREE_MULTIORDER
1119/**
1120 * radix_tree_join - replace multiple entries with one multiorder entry
1121 * @root: radix tree root
1122 * @index: an index inside the new entry
1123 * @order: order of the new entry
1124 * @item: new entry
1125 *
1126 * Call this function to replace several entries with one larger entry.
1127 * The existing entries are presumed to not need freeing as a result of
1128 * this call.
1129 *
1130 * The replacement entry will have all the tags set on it that were set
1131 * on any of the entries it is replacing.
1132 */
1133int radix_tree_join(struct radix_tree_root *root, unsigned long index,
1134 unsigned order, void *item)
1135{
1136 struct radix_tree_node *node;
1137 void **slot;
1138 int error;
1139
1140 BUG_ON(radix_tree_is_internal_node(item));
1141
1142 error = __radix_tree_create(root, index, order, &node, &slot);
1143 if (!error)
1144 error = insert_entries(node, slot, item, order, true);
1145 if (error > 0)
1146 error = 0;
1147
1148 return error;
1149}
1150
1151/**
1152 * radix_tree_split - Split an entry into smaller entries
1153 * @root: radix tree root
1154 * @index: An index within the large entry
1155 * @order: Order of new entries
1156 *
1157 * Call this function as the first step in replacing a multiorder entry
1158 * with several entries of lower order. After this function returns,
1159 * loop over the relevant portion of the tree using radix_tree_for_each_slot()
1160 * and call radix_tree_iter_replace() to set up each new entry.
1161 *
1162 * The tags from this entry are replicated to all the new entries.
1163 *
1164 * The radix tree should be locked against modification during the entire
1165 * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which
1166 * should prompt RCU walkers to restart the lookup from the root.
1167 */
1168int radix_tree_split(struct radix_tree_root *root, unsigned long index,
1169 unsigned order)
1170{
1171 struct radix_tree_node *parent, *node, *child;
1172 void **slot;
1173 unsigned int offset, end;
1174 unsigned n, tag, tags = 0;
1175
1176 if (!__radix_tree_lookup(root, index, &parent, &slot))
1177 return -ENOENT;
1178 if (!parent)
1179 return -ENOENT;
1180
1181 offset = get_slot_offset(parent, slot);
1182
1183 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1184 if (tag_get(parent, tag, offset))
1185 tags |= 1 << tag;
1186
1187 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
1188 if (!is_sibling_entry(parent, parent->slots[end]))
1189 break;
1190 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1191 if (tags & (1 << tag))
1192 tag_set(parent, tag, end);
1193 /* rcu_assign_pointer ensures tags are set before RETRY */
1194 rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
1195 }
1196 rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
1197 parent->exceptional -= (end - offset);
1198
1199 if (order == parent->shift)
1200 return 0;
1201 if (order > parent->shift) {
1202 while (offset < end)
1203 offset += insert_entries(parent, &parent->slots[offset],
1204 RADIX_TREE_RETRY, order, true);
1205 return 0;
1206 }
1207
1208 node = parent;
1209
1210 for (;;) {
1211 if (node->shift > order) {
1212 child = radix_tree_node_alloc(root, node,
1213 node->shift - RADIX_TREE_MAP_SHIFT,
1214 offset, 0, 0);
1215 if (!child)
1216 goto nomem;
1217 if (node != parent) {
1218 node->count++;
1219 node->slots[offset] = node_to_entry(child);
1220 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1221 if (tags & (1 << tag))
1222 tag_set(node, tag, offset);
1223 }
1224
1225 node = child;
1226 offset = 0;
1227 continue;
1228 }
1229
1230 n = insert_entries(node, &node->slots[offset],
1231 RADIX_TREE_RETRY, order, false);
1232 BUG_ON(n > RADIX_TREE_MAP_SIZE);
1233
1234 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1235 if (tags & (1 << tag))
1236 tag_set(node, tag, offset);
1237 offset += n;
1238
1239 while (offset == RADIX_TREE_MAP_SIZE) {
1240 if (node == parent)
1241 break;
1242 offset = node->offset;
1243 child = node;
1244 node = node->parent;
1245 rcu_assign_pointer(node->slots[offset],
1246 node_to_entry(child));
1247 offset++;
1248 }
1249 if ((node == parent) && (offset == end))
1250 return 0;
1251 }
1252
1253 nomem:
1254 /* Shouldn't happen; did user forget to preload? */
1255 /* TODO: free all the allocated nodes */
1256 WARN_ON(1);
1257 return -ENOMEM;
1258}
1259#endif
1260
1261/**
933 * radix_tree_tag_set - set a tag on a radix tree node 1262 * radix_tree_tag_set - set a tag on a radix tree node
934 * @root: radix tree root 1263 * @root: radix tree root
935 * @index: index key 1264 * @index: index key
@@ -990,6 +1319,34 @@ static void node_tag_clear(struct radix_tree_root *root,
990 root_tag_clear(root, tag); 1319 root_tag_clear(root, tag);
991} 1320}
992 1321
1322static void node_tag_set(struct radix_tree_root *root,
1323 struct radix_tree_node *node,
1324 unsigned int tag, unsigned int offset)
1325{
1326 while (node) {
1327 if (tag_get(node, tag, offset))
1328 return;
1329 tag_set(node, tag, offset);
1330 offset = node->offset;
1331 node = node->parent;
1332 }
1333
1334 if (!root_tag_get(root, tag))
1335 root_tag_set(root, tag);
1336}
1337
1338/**
1339 * radix_tree_iter_tag_set - set a tag on the current iterator entry
1340 * @root: radix tree root
1341 * @iter: iterator state
1342 * @tag: tag to set
1343 */
1344void radix_tree_iter_tag_set(struct radix_tree_root *root,
1345 const struct radix_tree_iter *iter, unsigned int tag)
1346{
1347 node_tag_set(root, iter->node, tag, iter_offset(iter));
1348}
1349
993/** 1350/**
994 * radix_tree_tag_clear - clear a tag on a radix tree node 1351 * radix_tree_tag_clear - clear a tag on a radix tree node
995 * @root: radix tree root 1352 * @root: radix tree root
@@ -1085,6 +1442,121 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter,
1085#endif 1442#endif
1086} 1443}
1087 1444
1445/* Construct iter->tags bit-mask from node->tags[tag] array */
1446static void set_iter_tags(struct radix_tree_iter *iter,
1447 struct radix_tree_node *node, unsigned offset,
1448 unsigned tag)
1449{
1450 unsigned tag_long = offset / BITS_PER_LONG;
1451 unsigned tag_bit = offset % BITS_PER_LONG;
1452
1453 iter->tags = node->tags[tag][tag_long] >> tag_bit;
1454
1455 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
1456 if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
1457 /* Pick tags from next element */
1458 if (tag_bit)
1459 iter->tags |= node->tags[tag][tag_long + 1] <<
1460 (BITS_PER_LONG - tag_bit);
1461 /* Clip chunk size, here only BITS_PER_LONG tags */
1462 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
1463 }
1464}
1465
1466#ifdef CONFIG_RADIX_TREE_MULTIORDER
1467static void **skip_siblings(struct radix_tree_node **nodep,
1468 void **slot, struct radix_tree_iter *iter)
1469{
1470 void *sib = node_to_entry(slot - 1);
1471
1472 while (iter->index < iter->next_index) {
1473 *nodep = rcu_dereference_raw(*slot);
1474 if (*nodep && *nodep != sib)
1475 return slot;
1476 slot++;
1477 iter->index = __radix_tree_iter_add(iter, 1);
1478 iter->tags >>= 1;
1479 }
1480
1481 *nodep = NULL;
1482 return NULL;
1483}
1484
1485void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
1486 unsigned flags)
1487{
1488 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
1489 struct radix_tree_node *node = rcu_dereference_raw(*slot);
1490
1491 slot = skip_siblings(&node, slot, iter);
1492
1493 while (radix_tree_is_internal_node(node)) {
1494 unsigned offset;
1495 unsigned long next_index;
1496
1497 if (node == RADIX_TREE_RETRY)
1498 return slot;
1499 node = entry_to_node(node);
1500 iter->node = node;
1501 iter->shift = node->shift;
1502
1503 if (flags & RADIX_TREE_ITER_TAGGED) {
1504 offset = radix_tree_find_next_bit(node, tag, 0);
1505 if (offset == RADIX_TREE_MAP_SIZE)
1506 return NULL;
1507 slot = &node->slots[offset];
1508 iter->index = __radix_tree_iter_add(iter, offset);
1509 set_iter_tags(iter, node, offset, tag);
1510 node = rcu_dereference_raw(*slot);
1511 } else {
1512 offset = 0;
1513 slot = &node->slots[0];
1514 for (;;) {
1515 node = rcu_dereference_raw(*slot);
1516 if (node)
1517 break;
1518 slot++;
1519 offset++;
1520 if (offset == RADIX_TREE_MAP_SIZE)
1521 return NULL;
1522 }
1523 iter->index = __radix_tree_iter_add(iter, offset);
1524 }
1525 if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
1526 goto none;
1527 next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
1528 if (next_index < iter->next_index)
1529 iter->next_index = next_index;
1530 }
1531
1532 return slot;
1533 none:
1534 iter->next_index = 0;
1535 return NULL;
1536}
1537EXPORT_SYMBOL(__radix_tree_next_slot);
1538#else
1539static void **skip_siblings(struct radix_tree_node **nodep,
1540 void **slot, struct radix_tree_iter *iter)
1541{
1542 return slot;
1543}
1544#endif
1545
1546void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter)
1547{
1548 struct radix_tree_node *node;
1549
1550 slot++;
1551 iter->index = __radix_tree_iter_add(iter, 1);
1552 node = rcu_dereference_raw(*slot);
1553 skip_siblings(&node, slot, iter);
1554 iter->next_index = iter->index;
1555 iter->tags = 0;
1556 return NULL;
1557}
1558EXPORT_SYMBOL(radix_tree_iter_resume);
1559
1088/** 1560/**
1089 * radix_tree_next_chunk - find next chunk of slots for iteration 1561 * radix_tree_next_chunk - find next chunk of slots for iteration
1090 * 1562 *
@@ -1110,7 +1582,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
1110 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. 1582 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
1111 * 1583 *
1112 * This condition also used by radix_tree_next_slot() to stop 1584 * This condition also used by radix_tree_next_slot() to stop
1113 * contiguous iterating, and forbid swithing to the next chunk. 1585 * contiguous iterating, and forbid switching to the next chunk.
1114 */ 1586 */
1115 index = iter->next_index; 1587 index = iter->next_index;
1116 if (!index && iter->index) 1588 if (!index && iter->index)
@@ -1128,6 +1600,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
1128 iter->index = index; 1600 iter->index = index;
1129 iter->next_index = maxindex + 1; 1601 iter->next_index = maxindex + 1;
1130 iter->tags = 1; 1602 iter->tags = 1;
1603 iter->node = NULL;
1131 __set_iter_shift(iter, 0); 1604 __set_iter_shift(iter, 0);
1132 return (void **)&root->rnode; 1605 return (void **)&root->rnode;
1133 } 1606 }
@@ -1143,9 +1616,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
1143 return NULL; 1616 return NULL;
1144 1617
1145 if (flags & RADIX_TREE_ITER_TAGGED) 1618 if (flags & RADIX_TREE_ITER_TAGGED)
1146 offset = radix_tree_find_next_bit( 1619 offset = radix_tree_find_next_bit(node, tag,
1147 node->tags[tag],
1148 RADIX_TREE_MAP_SIZE,
1149 offset + 1); 1620 offset + 1);
1150 else 1621 else
1151 while (++offset < RADIX_TREE_MAP_SIZE) { 1622 while (++offset < RADIX_TREE_MAP_SIZE) {
@@ -1165,154 +1636,26 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
1165 child = rcu_dereference_raw(node->slots[offset]); 1636 child = rcu_dereference_raw(node->slots[offset]);
1166 } 1637 }
1167 1638
1168 if ((child == NULL) || (child == RADIX_TREE_RETRY)) 1639 if (!child)
1169 goto restart; 1640 goto restart;
1641 if (child == RADIX_TREE_RETRY)
1642 break;
1170 } while (radix_tree_is_internal_node(child)); 1643 } while (radix_tree_is_internal_node(child));
1171 1644
1172 /* Update the iterator state */ 1645 /* Update the iterator state */
1173 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); 1646 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
1174 iter->next_index = (index | node_maxindex(node)) + 1; 1647 iter->next_index = (index | node_maxindex(node)) + 1;
1648 iter->node = node;
1175 __set_iter_shift(iter, node->shift); 1649 __set_iter_shift(iter, node->shift);
1176 1650
1177 /* Construct iter->tags bit-mask from node->tags[tag] array */ 1651 if (flags & RADIX_TREE_ITER_TAGGED)
1178 if (flags & RADIX_TREE_ITER_TAGGED) { 1652 set_iter_tags(iter, node, offset, tag);
1179 unsigned tag_long, tag_bit;
1180
1181 tag_long = offset / BITS_PER_LONG;
1182 tag_bit = offset % BITS_PER_LONG;
1183 iter->tags = node->tags[tag][tag_long] >> tag_bit;
1184 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
1185 if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
1186 /* Pick tags from next element */
1187 if (tag_bit)
1188 iter->tags |= node->tags[tag][tag_long + 1] <<
1189 (BITS_PER_LONG - tag_bit);
1190 /* Clip chunk size, here only BITS_PER_LONG tags */
1191 iter->next_index = index + BITS_PER_LONG;
1192 }
1193 }
1194 1653
1195 return node->slots + offset; 1654 return node->slots + offset;
1196} 1655}
1197EXPORT_SYMBOL(radix_tree_next_chunk); 1656EXPORT_SYMBOL(radix_tree_next_chunk);
1198 1657
1199/** 1658/**
1200 * radix_tree_range_tag_if_tagged - for each item in given range set given
1201 * tag if item has another tag set
1202 * @root: radix tree root
1203 * @first_indexp: pointer to a starting index of a range to scan
1204 * @last_index: last index of a range to scan
1205 * @nr_to_tag: maximum number items to tag
1206 * @iftag: tag index to test
1207 * @settag: tag index to set if tested tag is set
1208 *
1209 * This function scans range of radix tree from first_index to last_index
1210 * (inclusive). For each item in the range if iftag is set, the function sets
1211 * also settag. The function stops either after tagging nr_to_tag items or
1212 * after reaching last_index.
1213 *
1214 * The tags must be set from the leaf level only and propagated back up the
1215 * path to the root. We must do this so that we resolve the full path before
1216 * setting any tags on intermediate nodes. If we set tags as we descend, then
1217 * we can get to the leaf node and find that the index that has the iftag
1218 * set is outside the range we are scanning. This reults in dangling tags and
1219 * can lead to problems with later tag operations (e.g. livelocks on lookups).
1220 *
1221 * The function returns the number of leaves where the tag was set and sets
1222 * *first_indexp to the first unscanned index.
1223 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
1224 * be prepared to handle that.
1225 */
1226unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
1227 unsigned long *first_indexp, unsigned long last_index,
1228 unsigned long nr_to_tag,
1229 unsigned int iftag, unsigned int settag)
1230{
1231 struct radix_tree_node *parent, *node, *child;
1232 unsigned long maxindex;
1233 unsigned long tagged = 0;
1234 unsigned long index = *first_indexp;
1235
1236 radix_tree_load_root(root, &child, &maxindex);
1237 last_index = min(last_index, maxindex);
1238 if (index > last_index)
1239 return 0;
1240 if (!nr_to_tag)
1241 return 0;
1242 if (!root_tag_get(root, iftag)) {
1243 *first_indexp = last_index + 1;
1244 return 0;
1245 }
1246 if (!radix_tree_is_internal_node(child)) {
1247 *first_indexp = last_index + 1;
1248 root_tag_set(root, settag);
1249 return 1;
1250 }
1251
1252 node = entry_to_node(child);
1253
1254 for (;;) {
1255 unsigned offset = radix_tree_descend(node, &child, index);
1256 if (!child)
1257 goto next;
1258 if (!tag_get(node, iftag, offset))
1259 goto next;
1260 /* Sibling slots never have tags set on them */
1261 if (radix_tree_is_internal_node(child)) {
1262 node = entry_to_node(child);
1263 continue;
1264 }
1265
1266 /* tag the leaf */
1267 tagged++;
1268 tag_set(node, settag, offset);
1269
1270 /* walk back up the path tagging interior nodes */
1271 parent = node;
1272 for (;;) {
1273 offset = parent->offset;
1274 parent = parent->parent;
1275 if (!parent)
1276 break;
1277 /* stop if we find a node with the tag already set */
1278 if (tag_get(parent, settag, offset))
1279 break;
1280 tag_set(parent, settag, offset);
1281 }
1282 next:
1283 /* Go to next entry in node */
1284 index = ((index >> node->shift) + 1) << node->shift;
1285 /* Overflow can happen when last_index is ~0UL... */
1286 if (index > last_index || !index)
1287 break;
1288 offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
1289 while (offset == 0) {
1290 /*
1291 * We've fully scanned this node. Go up. Because
1292 * last_index is guaranteed to be in the tree, what
1293 * we do below cannot wander astray.
1294 */
1295 node = node->parent;
1296 offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
1297 }
1298 if (is_sibling_entry(node, node->slots[offset]))
1299 goto next;
1300 if (tagged >= nr_to_tag)
1301 break;
1302 }
1303 /*
1304 * We need not to tag the root tag if there is no tag which is set with
1305 * settag within the range from *first_indexp to last_index.
1306 */
1307 if (tagged > 0)
1308 root_tag_set(root, settag);
1309 *first_indexp = index;
1310
1311 return tagged;
1312}
1313EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
1314
1315/**
1316 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 1659 * radix_tree_gang_lookup - perform multiple lookup on a radix tree
1317 * @root: radix tree root 1660 * @root: radix tree root
1318 * @results: where the results of the lookup are placed 1661 * @results: where the results of the lookup are placed
@@ -1477,105 +1820,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1477} 1820}
1478EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1821EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1479 1822
1480#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
1481#include <linux/sched.h> /* for cond_resched() */
1482
1483struct locate_info {
1484 unsigned long found_index;
1485 bool stop;
1486};
1487
1488/*
1489 * This linear search is at present only useful to shmem_unuse_inode().
1490 */
1491static unsigned long __locate(struct radix_tree_node *slot, void *item,
1492 unsigned long index, struct locate_info *info)
1493{
1494 unsigned long i;
1495
1496 do {
1497 unsigned int shift = slot->shift;
1498
1499 for (i = (index >> shift) & RADIX_TREE_MAP_MASK;
1500 i < RADIX_TREE_MAP_SIZE;
1501 i++, index += (1UL << shift)) {
1502 struct radix_tree_node *node =
1503 rcu_dereference_raw(slot->slots[i]);
1504 if (node == RADIX_TREE_RETRY)
1505 goto out;
1506 if (!radix_tree_is_internal_node(node)) {
1507 if (node == item) {
1508 info->found_index = index;
1509 info->stop = true;
1510 goto out;
1511 }
1512 continue;
1513 }
1514 node = entry_to_node(node);
1515 if (is_sibling_entry(slot, node))
1516 continue;
1517 slot = node;
1518 break;
1519 }
1520 } while (i < RADIX_TREE_MAP_SIZE);
1521
1522out:
1523 if ((index == 0) && (i == RADIX_TREE_MAP_SIZE))
1524 info->stop = true;
1525 return index;
1526}
1527
1528/**
1529 * radix_tree_locate_item - search through radix tree for item
1530 * @root: radix tree root
1531 * @item: item to be found
1532 *
1533 * Returns index where item was found, or -1 if not found.
1534 * Caller must hold no lock (since this time-consuming function needs
1535 * to be preemptible), and must check afterwards if item is still there.
1536 */
1537unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1538{
1539 struct radix_tree_node *node;
1540 unsigned long max_index;
1541 unsigned long cur_index = 0;
1542 struct locate_info info = {
1543 .found_index = -1,
1544 .stop = false,
1545 };
1546
1547 do {
1548 rcu_read_lock();
1549 node = rcu_dereference_raw(root->rnode);
1550 if (!radix_tree_is_internal_node(node)) {
1551 rcu_read_unlock();
1552 if (node == item)
1553 info.found_index = 0;
1554 break;
1555 }
1556
1557 node = entry_to_node(node);
1558
1559 max_index = node_maxindex(node);
1560 if (cur_index > max_index) {
1561 rcu_read_unlock();
1562 break;
1563 }
1564
1565 cur_index = __locate(node, item, cur_index, &info);
1566 rcu_read_unlock();
1567 cond_resched();
1568 } while (!info.stop && cur_index <= max_index);
1569
1570 return info.found_index;
1571}
1572#else
1573unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1574{
1575 return -1;
1576}
1577#endif /* CONFIG_SHMEM && CONFIG_SWAP */
1578
1579/** 1823/**
1580 * __radix_tree_delete_node - try to free node after clearing a slot 1824 * __radix_tree_delete_node - try to free node after clearing a slot
1581 * @root: radix tree root 1825 * @root: radix tree root
@@ -1591,20 +1835,6 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
1591 delete_node(root, node, NULL, NULL); 1835 delete_node(root, node, NULL, NULL);
1592} 1836}
1593 1837
1594static inline void delete_sibling_entries(struct radix_tree_node *node,
1595 void *ptr, unsigned offset)
1596{
1597#ifdef CONFIG_RADIX_TREE_MULTIORDER
1598 int i;
1599 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
1600 if (node->slots[offset + i] != ptr)
1601 break;
1602 node->slots[offset + i] = NULL;
1603 node->count--;
1604 }
1605#endif
1606}
1607
1608/** 1838/**
1609 * radix_tree_delete_item - delete an item from a radix tree 1839 * radix_tree_delete_item - delete an item from a radix tree
1610 * @root: radix tree root 1840 * @root: radix tree root
@@ -1644,7 +1874,6 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
1644 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1874 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1645 node_tag_clear(root, node, tag, offset); 1875 node_tag_clear(root, node, tag, offset);
1646 1876
1647 delete_sibling_entries(node, node_to_entry(slot), offset);
1648 __radix_tree_replace(root, node, slot, NULL, NULL, NULL); 1877 __radix_tree_replace(root, node, slot, NULL, NULL, NULL);
1649 1878
1650 return entry; 1879 return entry;
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
index 76734004358d..20bca3d44f67 100644
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -87,9 +87,57 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
87 kernel_fpu_end(); 87 kernel_fpu_end();
88} 88}
89 89
90static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
91 size_t bytes, void **ptrs)
92{
93 u8 **dptr = (u8 **)ptrs;
94 u8 *p, *q;
95 int d, z, z0;
96
97 z0 = stop; /* P/Q right side optimization */
98 p = dptr[disks-2]; /* XOR parity */
99 q = dptr[disks-1]; /* RS syndrome */
100
101 kernel_fpu_begin();
102
103 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
104
105 for (d = 0 ; d < bytes ; d += 32) {
106 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
107 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
108 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
109 /* P/Q data pages */
110 for (z = z0-1 ; z >= start ; z--) {
111 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
112 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
113 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
114 asm volatile("vpand %ymm0,%ymm5,%ymm5");
115 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
116 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
117 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
118 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
119 }
120 /* P/Q left side optimization */
121 for (z = start-1 ; z >= 0 ; z--) {
122 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
123 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
124 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
125 asm volatile("vpand %ymm0,%ymm5,%ymm5");
126 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
127 }
128 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
129 /* Don't use movntdq for r/w memory area < cache line */
130 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
131 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
132 }
133
134 asm volatile("sfence" : : : "memory");
135 kernel_fpu_end();
136}
137
90const struct raid6_calls raid6_avx2x1 = { 138const struct raid6_calls raid6_avx2x1 = {
91 raid6_avx21_gen_syndrome, 139 raid6_avx21_gen_syndrome,
92 NULL, /* XOR not yet implemented */ 140 raid6_avx21_xor_syndrome,
93 raid6_have_avx2, 141 raid6_have_avx2,
94 "avx2x1", 142 "avx2x1",
95 1 /* Has cache hints */ 143 1 /* Has cache hints */
@@ -149,9 +197,77 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
149 kernel_fpu_end(); 197 kernel_fpu_end();
150} 198}
151 199
200static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
201 size_t bytes, void **ptrs)
202{
203 u8 **dptr = (u8 **)ptrs;
204 u8 *p, *q;
205 int d, z, z0;
206
207 z0 = stop; /* P/Q right side optimization */
208 p = dptr[disks-2]; /* XOR parity */
209 q = dptr[disks-1]; /* RS syndrome */
210
211 kernel_fpu_begin();
212
213 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
214
215 for (d = 0 ; d < bytes ; d += 64) {
216 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
217 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
218 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
219 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
220 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
221 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
222 /* P/Q data pages */
223 for (z = z0-1 ; z >= start ; z--) {
224 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
225 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
226 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
227 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
228 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
229 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
230 asm volatile("vpand %ymm0,%ymm5,%ymm5");
231 asm volatile("vpand %ymm0,%ymm7,%ymm7");
232 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
233 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
234 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
235 asm volatile("vmovdqa %0,%%ymm7"
236 :: "m" (dptr[z][d+32]));
237 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
238 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
239 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
240 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
241 }
242 /* P/Q left side optimization */
243 for (z = start-1 ; z >= 0 ; z--) {
244 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
245 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
246 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
247 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
248 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
249 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
250 asm volatile("vpand %ymm0,%ymm5,%ymm5");
251 asm volatile("vpand %ymm0,%ymm7,%ymm7");
252 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
253 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
254 }
255 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
256 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
257 /* Don't use movntdq for r/w memory area < cache line */
258 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
259 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
260 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
261 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
262 }
263
264 asm volatile("sfence" : : : "memory");
265 kernel_fpu_end();
266}
267
152const struct raid6_calls raid6_avx2x2 = { 268const struct raid6_calls raid6_avx2x2 = {
153 raid6_avx22_gen_syndrome, 269 raid6_avx22_gen_syndrome,
154 NULL, /* XOR not yet implemented */ 270 raid6_avx22_xor_syndrome,
155 raid6_have_avx2, 271 raid6_have_avx2,
156 "avx2x2", 272 "avx2x2",
157 1 /* Has cache hints */ 273 1 /* Has cache hints */
@@ -242,9 +358,119 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
242 kernel_fpu_end(); 358 kernel_fpu_end();
243} 359}
244 360
361static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
362 size_t bytes, void **ptrs)
363{
364 u8 **dptr = (u8 **)ptrs;
365 u8 *p, *q;
366 int d, z, z0;
367
368 z0 = stop; /* P/Q right side optimization */
369 p = dptr[disks-2]; /* XOR parity */
370 q = dptr[disks-1]; /* RS syndrome */
371
372 kernel_fpu_begin();
373
374 asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
375
376 for (d = 0 ; d < bytes ; d += 128) {
377 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
378 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
379 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
380 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
381 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
382 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
383 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
384 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
385 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
386 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
387 asm volatile("vpxor %ymm12,%ymm10,%ymm10");
388 asm volatile("vpxor %ymm14,%ymm11,%ymm11");
389 /* P/Q data pages */
390 for (z = z0-1 ; z >= start ; z--) {
391 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
392 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
393 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
394 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
395 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
396 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
397 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
398 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
399 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
400 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
401 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
402 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
403 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
404 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
405 asm volatile("vpand %ymm0,%ymm5,%ymm5");
406 asm volatile("vpand %ymm0,%ymm7,%ymm7");
407 asm volatile("vpand %ymm0,%ymm13,%ymm13");
408 asm volatile("vpand %ymm0,%ymm15,%ymm15");
409 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
410 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
411 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
412 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
413 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
414 asm volatile("vmovdqa %0,%%ymm7"
415 :: "m" (dptr[z][d+32]));
416 asm volatile("vmovdqa %0,%%ymm13"
417 :: "m" (dptr[z][d+64]));
418 asm volatile("vmovdqa %0,%%ymm15"
419 :: "m" (dptr[z][d+96]));
420 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
421 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
422 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
423 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
424 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
425 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
426 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
427 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
428 }
429 asm volatile("prefetchnta %0" :: "m" (q[d]));
430 asm volatile("prefetchnta %0" :: "m" (q[d+64]));
431 /* P/Q left side optimization */
432 for (z = start-1 ; z >= 0 ; z--) {
433 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
434 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
435 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
436 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
437 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
438 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
439 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
440 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
441 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
442 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
443 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
444 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
445 asm volatile("vpand %ymm0,%ymm5,%ymm5");
446 asm volatile("vpand %ymm0,%ymm7,%ymm7");
447 asm volatile("vpand %ymm0,%ymm13,%ymm13");
448 asm volatile("vpand %ymm0,%ymm15,%ymm15");
449 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
450 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
451 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
452 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
453 }
454 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
455 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
456 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
457 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
458 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
459 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
460 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
461 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
462 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
463 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
464 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
465 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
466 }
467 asm volatile("sfence" : : : "memory");
468 kernel_fpu_end();
469}
470
245const struct raid6_calls raid6_avx2x4 = { 471const struct raid6_calls raid6_avx2x4 = {
246 raid6_avx24_gen_syndrome, 472 raid6_avx24_gen_syndrome,
247 NULL, /* XOR not yet implemented */ 473 raid6_avx24_xor_syndrome,
248 raid6_have_avx2, 474 raid6_have_avx2,
249 "avx2x4", 475 "avx2x4",
250 1 /* Has cache hints */ 476 1 /* Has cache hints */
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 782ae8ca2c06..adc6ee0a5126 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -48,7 +48,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
48 while (*p) { 48 while (*p) {
49 parent = *p; 49 parent = *p;
50 ptr = rb_entry(parent, struct timerqueue_node, node); 50 ptr = rb_entry(parent, struct timerqueue_node, node);
51 if (node->expires.tv64 < ptr->expires.tv64) 51 if (node->expires < ptr->expires)
52 p = &(*p)->rb_left; 52 p = &(*p)->rb_left;
53 else 53 else
54 p = &(*p)->rb_right; 54 p = &(*p)->rb_right;
@@ -56,7 +56,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
56 rb_link_node(&node->node, parent, p); 56 rb_link_node(&node->node, parent, p);
57 rb_insert_color(&node->node, &head->head); 57 rb_insert_color(&node->node, &head->head);
58 58
59 if (!head->next || node->expires.tv64 < head->next->expires.tv64) { 59 if (!head->next || node->expires < head->next->expires) {
60 head->next = node; 60 head->next = node;
61 return true; 61 return true;
62 } 62 }