diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Kconfig.debug | 70 | ||||
| -rw-r--r-- | lib/Kconfig.ubsan | 3 | ||||
| -rw-r--r-- | lib/Makefile | 1 | ||||
| -rw-r--r-- | lib/cpu-notifier-error-inject.c | 84 | ||||
| -rw-r--r-- | lib/debugobjects.c | 2 | ||||
| -rw-r--r-- | lib/extable.c | 2 | ||||
| -rw-r--r-- | lib/idr.c | 11 | ||||
| -rw-r--r-- | lib/iov_iter.c | 150 | ||||
| -rw-r--r-- | lib/kobject_uevent.c | 6 | ||||
| -rw-r--r-- | lib/kstrtox.c | 2 | ||||
| -rw-r--r-- | lib/list_debug.c | 99 | ||||
| -rw-r--r-- | lib/lockref.c | 2 | ||||
| -rw-r--r-- | lib/nlattr.c | 2 | ||||
| -rw-r--r-- | lib/parser.c | 47 | ||||
| -rw-r--r-- | lib/percpu_counter.c | 25 | ||||
| -rw-r--r-- | lib/radix-tree.c | 1195 | ||||
| -rw-r--r-- | lib/raid6/avx2.c | 232 | ||||
| -rw-r--r-- | lib/rbtree.c | 23 | ||||
| -rw-r--r-- | lib/swiotlb.c | 81 | ||||
| -rw-r--r-- | lib/timerqueue.c | 4 |
20 files changed, 1314 insertions, 727 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a6c8db1d62f6..b06848a104e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -13,7 +13,22 @@ config PRINTK_TIME | |||
| 13 | be included, not that the timestamp is recorded. | 13 | be included, not that the timestamp is recorded. |
| 14 | 14 | ||
| 15 | The behavior is also controlled by the kernel command line | 15 | The behavior is also controlled by the kernel command line |
| 16 | parameter printk.time=1. See Documentation/kernel-parameters.txt | 16 | parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst |
| 17 | |||
| 18 | config CONSOLE_LOGLEVEL_DEFAULT | ||
| 19 | int "Default console loglevel (1-15)" | ||
| 20 | range 1 15 | ||
| 21 | default "7" | ||
| 22 | help | ||
| 23 | Default loglevel to determine what will be printed on the console. | ||
| 24 | |||
| 25 | Setting a default here is equivalent to passing in loglevel=<x> in | ||
| 26 | the kernel bootargs. loglevel=<x> continues to override whatever | ||
| 27 | value is specified here as well. | ||
| 28 | |||
| 29 | Note: This does not affect the log level of un-prefixed printk() | ||
| 30 | usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT | ||
| 31 | option. | ||
| 17 | 32 | ||
| 18 | config MESSAGE_LOGLEVEL_DEFAULT | 33 | config MESSAGE_LOGLEVEL_DEFAULT |
| 19 | int "Default message log level (1-7)" | 34 | int "Default message log level (1-7)" |
| @@ -26,6 +41,10 @@ config MESSAGE_LOGLEVEL_DEFAULT | |||
| 26 | that are auditing their logs closely may want to set it to a lower | 41 | that are auditing their logs closely may want to set it to a lower |
| 27 | priority. | 42 | priority. |
| 28 | 43 | ||
| 44 | Note: This does not affect what message level gets printed on the console | ||
| 45 | by default. To change that, use loglevel=<x> in the kernel bootargs, | ||
| 46 | or pick a different CONSOLE_LOGLEVEL_DEFAULT configuration value. | ||
| 47 | |||
| 29 | config BOOT_PRINTK_DELAY | 48 | config BOOT_PRINTK_DELAY |
| 30 | bool "Delay each boot printk message by N milliseconds" | 49 | bool "Delay each boot printk message by N milliseconds" |
| 31 | depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY | 50 | depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY |
| @@ -175,8 +194,8 @@ config GDB_SCRIPTS | |||
| 175 | build directory. If you load vmlinux into gdb, the helper | 194 | build directory. If you load vmlinux into gdb, the helper |
| 176 | scripts will be automatically imported by gdb as well, and | 195 | scripts will be automatically imported by gdb as well, and |
| 177 | additional functions are available to analyze a Linux kernel | 196 | additional functions are available to analyze a Linux kernel |
| 178 | instance. See Documentation/gdb-kernel-debugging.txt for further | 197 | instance. See Documentation/dev-tools/gdb-kernel-debugging.rst |
| 179 | details. | 198 | for further details. |
| 180 | 199 | ||
| 181 | config ENABLE_WARN_DEPRECATED | 200 | config ENABLE_WARN_DEPRECATED |
| 182 | bool "Enable __deprecated logic" | 201 | bool "Enable __deprecated logic" |
| @@ -523,7 +542,7 @@ config DEBUG_KMEMLEAK | |||
| 523 | difference being that the orphan objects are not freed but | 542 | difference being that the orphan objects are not freed but |
| 524 | only shown in /sys/kernel/debug/kmemleak. Enabling this | 543 | only shown in /sys/kernel/debug/kmemleak. Enabling this |
| 525 | feature will introduce an overhead to memory | 544 | feature will introduce an overhead to memory |
| 526 | allocations. See Documentation/kmemleak.txt for more | 545 | allocations. See Documentation/dev-tools/kmemleak.rst for more |
| 527 | details. | 546 | details. |
| 528 | 547 | ||
| 529 | Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances | 548 | Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances |
| @@ -720,7 +739,7 @@ config KCOV | |||
| 720 | different machines and across reboots. If you need stable PC values, | 739 | different machines and across reboots. If you need stable PC values, |
| 721 | disable RANDOMIZE_BASE. | 740 | disable RANDOMIZE_BASE. |
| 722 | 741 | ||
| 723 | For more details, see Documentation/kcov.txt. | 742 | For more details, see Documentation/dev-tools/kcov.rst. |
| 724 | 743 | ||
| 725 | config KCOV_INSTRUMENT_ALL | 744 | config KCOV_INSTRUMENT_ALL |
| 726 | bool "Instrument all code by default" | 745 | bool "Instrument all code by default" |
| @@ -1218,7 +1237,7 @@ config DEBUG_BUGVERBOSE | |||
| 1218 | 1237 | ||
| 1219 | config DEBUG_LIST | 1238 | config DEBUG_LIST |
| 1220 | bool "Debug linked list manipulation" | 1239 | bool "Debug linked list manipulation" |
| 1221 | depends on DEBUG_KERNEL | 1240 | depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION |
| 1222 | help | 1241 | help |
| 1223 | Enable this to turn on extended checks in the linked-list | 1242 | Enable this to turn on extended checks in the linked-list |
| 1224 | walking routines. | 1243 | walking routines. |
| @@ -1434,7 +1453,8 @@ config RCU_TRACE | |||
| 1434 | select TRACE_CLOCK | 1453 | select TRACE_CLOCK |
| 1435 | help | 1454 | help |
| 1436 | This option provides tracing in RCU which presents stats | 1455 | This option provides tracing in RCU which presents stats |
| 1437 | in debugfs for debugging RCU implementation. | 1456 | in debugfs for debugging RCU implementation. It also enables |
| 1457 | additional tracepoints for ftrace-style event tracing. | ||
| 1438 | 1458 | ||
| 1439 | Say Y here if you want to enable RCU tracing | 1459 | Say Y here if you want to enable RCU tracing |
| 1440 | Say N if you are unsure. | 1460 | Say N if you are unsure. |
| @@ -1518,30 +1538,6 @@ config NOTIFIER_ERROR_INJECTION | |||
| 1518 | 1538 | ||
| 1519 | Say N if unsure. | 1539 | Say N if unsure. |
| 1520 | 1540 | ||
| 1521 | config CPU_NOTIFIER_ERROR_INJECT | ||
| 1522 | tristate "CPU notifier error injection module" | ||
| 1523 | depends on HOTPLUG_CPU && NOTIFIER_ERROR_INJECTION | ||
| 1524 | help | ||
| 1525 | This option provides a kernel module that can be used to test | ||
| 1526 | the error handling of the cpu notifiers by injecting artificial | ||
| 1527 | errors to CPU notifier chain callbacks. It is controlled through | ||
| 1528 | debugfs interface under /sys/kernel/debug/notifier-error-inject/cpu | ||
| 1529 | |||
| 1530 | If the notifier call chain should be failed with some events | ||
| 1531 | notified, write the error code to "actions/<notifier event>/error". | ||
| 1532 | |||
| 1533 | Example: Inject CPU offline error (-1 == -EPERM) | ||
| 1534 | |||
| 1535 | # cd /sys/kernel/debug/notifier-error-inject/cpu | ||
| 1536 | # echo -1 > actions/CPU_DOWN_PREPARE/error | ||
| 1537 | # echo 0 > /sys/devices/system/cpu/cpu1/online | ||
| 1538 | bash: echo: write error: Operation not permitted | ||
| 1539 | |||
| 1540 | To compile this code as a module, choose M here: the module will | ||
| 1541 | be called cpu-notifier-error-inject. | ||
| 1542 | |||
| 1543 | If unsure, say N. | ||
| 1544 | |||
| 1545 | config PM_NOTIFIER_ERROR_INJECT | 1541 | config PM_NOTIFIER_ERROR_INJECT |
| 1546 | tristate "PM notifier error injection module" | 1542 | tristate "PM notifier error injection module" |
| 1547 | depends on PM && NOTIFIER_ERROR_INJECTION | 1543 | depends on PM && NOTIFIER_ERROR_INJECTION |
| @@ -1964,6 +1960,16 @@ config TEST_STATIC_KEYS | |||
| 1964 | 1960 | ||
| 1965 | If unsure, say N. | 1961 | If unsure, say N. |
| 1966 | 1962 | ||
| 1963 | config BUG_ON_DATA_CORRUPTION | ||
| 1964 | bool "Trigger a BUG when data corruption is detected" | ||
| 1965 | select DEBUG_LIST | ||
| 1966 | help | ||
| 1967 | Select this option if the kernel should BUG when it encounters | ||
| 1968 | data corruption in kernel memory structures when they get checked | ||
| 1969 | for validity. | ||
| 1970 | |||
| 1971 | If unsure, say N. | ||
| 1972 | |||
| 1967 | source "samples/Kconfig" | 1973 | source "samples/Kconfig" |
| 1968 | 1974 | ||
| 1969 | source "lib/Kconfig.kgdb" | 1975 | source "lib/Kconfig.kgdb" |
| @@ -1975,7 +1981,7 @@ config ARCH_HAS_DEVMEM_IS_ALLOWED | |||
| 1975 | 1981 | ||
| 1976 | config STRICT_DEVMEM | 1982 | config STRICT_DEVMEM |
| 1977 | bool "Filter access to /dev/mem" | 1983 | bool "Filter access to /dev/mem" |
| 1978 | depends on MMU | 1984 | depends on MMU && DEVMEM |
| 1979 | depends on ARCH_HAS_DEVMEM_IS_ALLOWED | 1985 | depends on ARCH_HAS_DEVMEM_IS_ALLOWED |
| 1980 | default y if TILE || PPC | 1986 | default y if TILE || PPC |
| 1981 | ---help--- | 1987 | ---help--- |
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index bc6e651df68c..a669c193b878 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan | |||
| @@ -10,7 +10,8 @@ config UBSAN | |||
| 10 | This option enables undefined behaviour sanity checker | 10 | This option enables undefined behaviour sanity checker |
| 11 | Compile-time instrumentation is used to detect various undefined | 11 | Compile-time instrumentation is used to detect various undefined |
| 12 | behaviours in runtime. Various types of checks may be enabled | 12 | behaviours in runtime. Various types of checks may be enabled |
| 13 | via boot parameter ubsan_handle (see: Documentation/ubsan.txt). | 13 | via boot parameter ubsan_handle |
| 14 | (see: Documentation/dev-tools/ubsan.rst). | ||
| 14 | 15 | ||
| 15 | config UBSAN_SANITIZE_ALL | 16 | config UBSAN_SANITIZE_ALL |
| 16 | bool "Enable instrumentation for the entire kernel" | 17 | bool "Enable instrumentation for the entire kernel" |
diff --git a/lib/Makefile b/lib/Makefile index 50144a3aeebd..bc4073a8cd08 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
| @@ -128,7 +128,6 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o | |||
| 128 | obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o | 128 | obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o |
| 129 | obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o | 129 | obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o |
| 130 | obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o | 130 | obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o |
| 131 | obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o | ||
| 132 | obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o | 131 | obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o |
| 133 | obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o | 132 | obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o |
| 134 | obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o | 133 | obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o |
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c deleted file mode 100644 index 0e2c9a1e958a..000000000000 --- a/lib/cpu-notifier-error-inject.c +++ /dev/null | |||
| @@ -1,84 +0,0 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/cpu.h> | ||
| 4 | |||
| 5 | #include "notifier-error-inject.h" | ||
| 6 | |||
| 7 | static int priority; | ||
| 8 | module_param(priority, int, 0); | ||
| 9 | MODULE_PARM_DESC(priority, "specify cpu notifier priority"); | ||
| 10 | |||
| 11 | #define UP_PREPARE 0 | ||
| 12 | #define UP_PREPARE_FROZEN 0 | ||
| 13 | #define DOWN_PREPARE 0 | ||
| 14 | #define DOWN_PREPARE_FROZEN 0 | ||
| 15 | |||
| 16 | static struct notifier_err_inject cpu_notifier_err_inject = { | ||
| 17 | .actions = { | ||
| 18 | { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) }, | ||
| 19 | { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) }, | ||
| 20 | { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) }, | ||
| 21 | { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) }, | ||
| 22 | {} | ||
| 23 | } | ||
| 24 | }; | ||
| 25 | |||
| 26 | static int notf_err_handle(struct notifier_err_inject_action *action) | ||
| 27 | { | ||
| 28 | int ret; | ||
| 29 | |||
| 30 | ret = action->error; | ||
| 31 | if (ret) | ||
| 32 | pr_info("Injecting error (%d) to %s\n", ret, action->name); | ||
| 33 | return ret; | ||
| 34 | } | ||
| 35 | |||
| 36 | static int notf_err_inj_up_prepare(unsigned int cpu) | ||
| 37 | { | ||
| 38 | if (!cpuhp_tasks_frozen) | ||
| 39 | return notf_err_handle(&cpu_notifier_err_inject.actions[0]); | ||
| 40 | else | ||
| 41 | return notf_err_handle(&cpu_notifier_err_inject.actions[1]); | ||
| 42 | } | ||
| 43 | |||
| 44 | static int notf_err_inj_dead(unsigned int cpu) | ||
| 45 | { | ||
| 46 | if (!cpuhp_tasks_frozen) | ||
| 47 | return notf_err_handle(&cpu_notifier_err_inject.actions[2]); | ||
| 48 | else | ||
| 49 | return notf_err_handle(&cpu_notifier_err_inject.actions[3]); | ||
| 50 | } | ||
| 51 | |||
| 52 | static struct dentry *dir; | ||
| 53 | |||
| 54 | static int err_inject_init(void) | ||
| 55 | { | ||
| 56 | int err; | ||
| 57 | |||
| 58 | dir = notifier_err_inject_init("cpu", notifier_err_inject_dir, | ||
| 59 | &cpu_notifier_err_inject, priority); | ||
| 60 | if (IS_ERR(dir)) | ||
| 61 | return PTR_ERR(dir); | ||
| 62 | |||
| 63 | err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE, | ||
| 64 | "cpu-err-notif:prepare", | ||
| 65 | notf_err_inj_up_prepare, | ||
| 66 | notf_err_inj_dead); | ||
| 67 | if (err) | ||
| 68 | debugfs_remove_recursive(dir); | ||
| 69 | |||
| 70 | return err; | ||
| 71 | } | ||
| 72 | |||
| 73 | static void err_inject_exit(void) | ||
| 74 | { | ||
| 75 | cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE); | ||
| 76 | debugfs_remove_recursive(dir); | ||
| 77 | } | ||
| 78 | |||
| 79 | module_init(err_inject_init); | ||
| 80 | module_exit(err_inject_exit); | ||
| 81 | |||
| 82 | MODULE_DESCRIPTION("CPU notifier error injection module"); | ||
| 83 | MODULE_LICENSE("GPL"); | ||
| 84 | MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); | ||
diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 056052dc8e91..04c1ef717fe0 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c | |||
| @@ -199,7 +199,7 @@ static void free_object(struct debug_obj *obj) | |||
| 199 | * initialized: | 199 | * initialized: |
| 200 | */ | 200 | */ |
| 201 | if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) | 201 | if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) |
| 202 | sched = keventd_up(); | 202 | sched = 1; |
| 203 | hlist_add_head(&obj->node, &obj_pool); | 203 | hlist_add_head(&obj->node, &obj_pool); |
| 204 | obj_pool_free++; | 204 | obj_pool_free++; |
| 205 | obj_pool_used--; | 205 | obj_pool_used--; |
diff --git a/lib/extable.c b/lib/extable.c index 0be02ad561e9..62968daa66a9 100644 --- a/lib/extable.c +++ b/lib/extable.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
| 14 | #include <linux/sort.h> | 14 | #include <linux/sort.h> |
| 15 | #include <asm/uaccess.h> | 15 | #include <linux/uaccess.h> |
| 16 | 16 | ||
| 17 | #ifndef ARCH_HAS_RELATIVE_EXTABLE | 17 | #ifndef ARCH_HAS_RELATIVE_EXTABLE |
| 18 | #define ex_to_insn(x) ((x)->insn) | 18 | #define ex_to_insn(x) ((x)->insn) |
| @@ -927,6 +927,9 @@ EXPORT_SYMBOL(ida_pre_get); | |||
| 927 | * and go back to the ida_pre_get() call. If the ida is full, it will | 927 | * and go back to the ida_pre_get() call. If the ida is full, it will |
| 928 | * return %-ENOSPC. | 928 | * return %-ENOSPC. |
| 929 | * | 929 | * |
| 930 | * Note that callers must ensure that concurrent access to @ida is not possible. | ||
| 931 | * See ida_simple_get() for a varaint which takes care of locking. | ||
| 932 | * | ||
| 930 | * @p_id returns a value in the range @starting_id ... %0x7fffffff. | 933 | * @p_id returns a value in the range @starting_id ... %0x7fffffff. |
| 931 | */ | 934 | */ |
| 932 | int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) | 935 | int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) |
| @@ -1073,6 +1076,9 @@ EXPORT_SYMBOL(ida_destroy); | |||
| 1073 | * Allocates an id in the range start <= id < end, or returns -ENOSPC. | 1076 | * Allocates an id in the range start <= id < end, or returns -ENOSPC. |
| 1074 | * On memory allocation failure, returns -ENOMEM. | 1077 | * On memory allocation failure, returns -ENOMEM. |
| 1075 | * | 1078 | * |
| 1079 | * Compared to ida_get_new_above() this function does its own locking, and | ||
| 1080 | * should be used unless there are special requirements. | ||
| 1081 | * | ||
| 1076 | * Use ida_simple_remove() to get rid of an id. | 1082 | * Use ida_simple_remove() to get rid of an id. |
| 1077 | */ | 1083 | */ |
| 1078 | int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, | 1084 | int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, |
| @@ -1119,6 +1125,11 @@ EXPORT_SYMBOL(ida_simple_get); | |||
| 1119 | * ida_simple_remove - remove an allocated id. | 1125 | * ida_simple_remove - remove an allocated id. |
| 1120 | * @ida: the (initialized) ida. | 1126 | * @ida: the (initialized) ida. |
| 1121 | * @id: the id returned by ida_simple_get. | 1127 | * @id: the id returned by ida_simple_get. |
| 1128 | * | ||
| 1129 | * Use to release an id allocated with ida_simple_get(). | ||
| 1130 | * | ||
| 1131 | * Compared to ida_remove() this function does its own locking, and should be | ||
| 1132 | * used unless there are special requirements. | ||
| 1122 | */ | 1133 | */ |
| 1123 | void ida_simple_remove(struct ida *ida, unsigned int id) | 1134 | void ida_simple_remove(struct ida *ida, unsigned int id) |
| 1124 | { | 1135 | { |
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f2bd21b93dfc..25f572303801 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | #include <linux/export.h> | 1 | #include <linux/export.h> |
| 2 | #include <linux/bvec.h> | ||
| 2 | #include <linux/uio.h> | 3 | #include <linux/uio.h> |
| 3 | #include <linux/pagemap.h> | 4 | #include <linux/pagemap.h> |
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| @@ -72,19 +73,21 @@ | |||
| 72 | } | 73 | } |
| 73 | 74 | ||
| 74 | #define iterate_all_kinds(i, n, v, I, B, K) { \ | 75 | #define iterate_all_kinds(i, n, v, I, B, K) { \ |
| 75 | size_t skip = i->iov_offset; \ | 76 | if (likely(n)) { \ |
| 76 | if (unlikely(i->type & ITER_BVEC)) { \ | 77 | size_t skip = i->iov_offset; \ |
| 77 | struct bio_vec v; \ | 78 | if (unlikely(i->type & ITER_BVEC)) { \ |
| 78 | struct bvec_iter __bi; \ | 79 | struct bio_vec v; \ |
| 79 | iterate_bvec(i, n, v, __bi, skip, (B)) \ | 80 | struct bvec_iter __bi; \ |
| 80 | } else if (unlikely(i->type & ITER_KVEC)) { \ | 81 | iterate_bvec(i, n, v, __bi, skip, (B)) \ |
| 81 | const struct kvec *kvec; \ | 82 | } else if (unlikely(i->type & ITER_KVEC)) { \ |
| 82 | struct kvec v; \ | 83 | const struct kvec *kvec; \ |
| 83 | iterate_kvec(i, n, v, kvec, skip, (K)) \ | 84 | struct kvec v; \ |
| 84 | } else { \ | 85 | iterate_kvec(i, n, v, kvec, skip, (K)) \ |
| 85 | const struct iovec *iov; \ | 86 | } else { \ |
| 86 | struct iovec v; \ | 87 | const struct iovec *iov; \ |
| 87 | iterate_iovec(i, n, v, iov, skip, (I)) \ | 88 | struct iovec v; \ |
| 89 | iterate_iovec(i, n, v, iov, skip, (I)) \ | ||
| 90 | } \ | ||
| 88 | } \ | 91 | } \ |
| 89 | } | 92 | } |
| 90 | 93 | ||
| @@ -568,6 +571,31 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | |||
| 568 | } | 571 | } |
| 569 | EXPORT_SYMBOL(copy_from_iter); | 572 | EXPORT_SYMBOL(copy_from_iter); |
| 570 | 573 | ||
| 574 | bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) | ||
| 575 | { | ||
| 576 | char *to = addr; | ||
| 577 | if (unlikely(i->type & ITER_PIPE)) { | ||
| 578 | WARN_ON(1); | ||
| 579 | return false; | ||
| 580 | } | ||
| 581 | if (unlikely(i->count < bytes)) | ||
| 582 | return false; | ||
| 583 | |||
| 584 | iterate_all_kinds(i, bytes, v, ({ | ||
| 585 | if (__copy_from_user((to += v.iov_len) - v.iov_len, | ||
| 586 | v.iov_base, v.iov_len)) | ||
| 587 | return false; | ||
| 588 | 0;}), | ||
| 589 | memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, | ||
| 590 | v.bv_offset, v.bv_len), | ||
| 591 | memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) | ||
| 592 | ) | ||
| 593 | |||
| 594 | iov_iter_advance(i, bytes); | ||
| 595 | return true; | ||
| 596 | } | ||
| 597 | EXPORT_SYMBOL(copy_from_iter_full); | ||
| 598 | |||
| 571 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | 599 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
| 572 | { | 600 | { |
| 573 | char *to = addr; | 601 | char *to = addr; |
| @@ -587,6 +615,30 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | |||
| 587 | } | 615 | } |
| 588 | EXPORT_SYMBOL(copy_from_iter_nocache); | 616 | EXPORT_SYMBOL(copy_from_iter_nocache); |
| 589 | 617 | ||
| 618 | bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) | ||
| 619 | { | ||
| 620 | char *to = addr; | ||
| 621 | if (unlikely(i->type & ITER_PIPE)) { | ||
| 622 | WARN_ON(1); | ||
| 623 | return false; | ||
| 624 | } | ||
| 625 | if (unlikely(i->count < bytes)) | ||
| 626 | return false; | ||
| 627 | iterate_all_kinds(i, bytes, v, ({ | ||
| 628 | if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len, | ||
| 629 | v.iov_base, v.iov_len)) | ||
| 630 | return false; | ||
| 631 | 0;}), | ||
| 632 | memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, | ||
| 633 | v.bv_offset, v.bv_len), | ||
| 634 | memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) | ||
| 635 | ) | ||
| 636 | |||
| 637 | iov_iter_advance(i, bytes); | ||
| 638 | return true; | ||
| 639 | } | ||
| 640 | EXPORT_SYMBOL(copy_from_iter_full_nocache); | ||
| 641 | |||
| 590 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | 642 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, |
| 591 | struct iov_iter *i) | 643 | struct iov_iter *i) |
| 592 | { | 644 | { |
| @@ -787,11 +839,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) | |||
| 787 | unsigned long res = 0; | 839 | unsigned long res = 0; |
| 788 | size_t size = i->count; | 840 | size_t size = i->count; |
| 789 | 841 | ||
| 790 | if (!size) | ||
| 791 | return 0; | ||
| 792 | |||
| 793 | if (unlikely(i->type & ITER_PIPE)) { | 842 | if (unlikely(i->type & ITER_PIPE)) { |
| 794 | if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) | 843 | if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) |
| 795 | return size | i->iov_offset; | 844 | return size | i->iov_offset; |
| 796 | return size; | 845 | return size; |
| 797 | } | 846 | } |
| @@ -806,10 +855,8 @@ EXPORT_SYMBOL(iov_iter_alignment); | |||
| 806 | 855 | ||
| 807 | unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | 856 | unsigned long iov_iter_gap_alignment(const struct iov_iter *i) |
| 808 | { | 857 | { |
| 809 | unsigned long res = 0; | 858 | unsigned long res = 0; |
| 810 | size_t size = i->count; | 859 | size_t size = i->count; |
| 811 | if (!size) | ||
| 812 | return 0; | ||
| 813 | 860 | ||
| 814 | if (unlikely(i->type & ITER_PIPE)) { | 861 | if (unlikely(i->type & ITER_PIPE)) { |
| 815 | WARN_ON(1); | 862 | WARN_ON(1); |
| @@ -824,7 +871,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | |||
| 824 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | | 871 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | |
| 825 | (size != v.iov_len ? size : 0)) | 872 | (size != v.iov_len ? size : 0)) |
| 826 | ); | 873 | ); |
| 827 | return res; | 874 | return res; |
| 828 | } | 875 | } |
| 829 | EXPORT_SYMBOL(iov_iter_gap_alignment); | 876 | EXPORT_SYMBOL(iov_iter_gap_alignment); |
| 830 | 877 | ||
| @@ -858,6 +905,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i, | |||
| 858 | size_t capacity; | 905 | size_t capacity; |
| 859 | int idx; | 906 | int idx; |
| 860 | 907 | ||
| 908 | if (!maxsize) | ||
| 909 | return 0; | ||
| 910 | |||
| 861 | if (!sanity(i)) | 911 | if (!sanity(i)) |
| 862 | return -EFAULT; | 912 | return -EFAULT; |
| 863 | 913 | ||
| @@ -876,9 +926,6 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, | |||
| 876 | if (maxsize > i->count) | 926 | if (maxsize > i->count) |
| 877 | maxsize = i->count; | 927 | maxsize = i->count; |
| 878 | 928 | ||
| 879 | if (!maxsize) | ||
| 880 | return 0; | ||
| 881 | |||
| 882 | if (unlikely(i->type & ITER_PIPE)) | 929 | if (unlikely(i->type & ITER_PIPE)) |
| 883 | return pipe_get_pages(i, pages, maxsize, maxpages, start); | 930 | return pipe_get_pages(i, pages, maxsize, maxpages, start); |
| 884 | iterate_all_kinds(i, maxsize, v, ({ | 931 | iterate_all_kinds(i, maxsize, v, ({ |
| @@ -925,6 +972,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, | |||
| 925 | int idx; | 972 | int idx; |
| 926 | int npages; | 973 | int npages; |
| 927 | 974 | ||
| 975 | if (!maxsize) | ||
| 976 | return 0; | ||
| 977 | |||
| 928 | if (!sanity(i)) | 978 | if (!sanity(i)) |
| 929 | return -EFAULT; | 979 | return -EFAULT; |
| 930 | 980 | ||
| @@ -956,9 +1006,6 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | |||
| 956 | if (maxsize > i->count) | 1006 | if (maxsize > i->count) |
| 957 | maxsize = i->count; | 1007 | maxsize = i->count; |
| 958 | 1008 | ||
| 959 | if (!maxsize) | ||
| 960 | return 0; | ||
| 961 | |||
| 962 | if (unlikely(i->type & ITER_PIPE)) | 1009 | if (unlikely(i->type & ITER_PIPE)) |
| 963 | return pipe_get_pages_alloc(i, pages, maxsize, start); | 1010 | return pipe_get_pages_alloc(i, pages, maxsize, start); |
| 964 | iterate_all_kinds(i, maxsize, v, ({ | 1011 | iterate_all_kinds(i, maxsize, v, ({ |
| @@ -1008,7 +1055,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | |||
| 1008 | } | 1055 | } |
| 1009 | iterate_and_advance(i, bytes, v, ({ | 1056 | iterate_and_advance(i, bytes, v, ({ |
| 1010 | int err = 0; | 1057 | int err = 0; |
| 1011 | next = csum_and_copy_from_user(v.iov_base, | 1058 | next = csum_and_copy_from_user(v.iov_base, |
| 1012 | (to += v.iov_len) - v.iov_len, | 1059 | (to += v.iov_len) - v.iov_len, |
| 1013 | v.iov_len, 0, &err); | 1060 | v.iov_len, 0, &err); |
| 1014 | if (!err) { | 1061 | if (!err) { |
| @@ -1037,6 +1084,51 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | |||
| 1037 | } | 1084 | } |
| 1038 | EXPORT_SYMBOL(csum_and_copy_from_iter); | 1085 | EXPORT_SYMBOL(csum_and_copy_from_iter); |
| 1039 | 1086 | ||
| 1087 | bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, | ||
| 1088 | struct iov_iter *i) | ||
| 1089 | { | ||
| 1090 | char *to = addr; | ||
| 1091 | __wsum sum, next; | ||
| 1092 | size_t off = 0; | ||
| 1093 | sum = *csum; | ||
| 1094 | if (unlikely(i->type & ITER_PIPE)) { | ||
| 1095 | WARN_ON(1); | ||
| 1096 | return false; | ||
| 1097 | } | ||
| 1098 | if (unlikely(i->count < bytes)) | ||
| 1099 | return false; | ||
| 1100 | iterate_all_kinds(i, bytes, v, ({ | ||
| 1101 | int err = 0; | ||
| 1102 | next = csum_and_copy_from_user(v.iov_base, | ||
| 1103 | (to += v.iov_len) - v.iov_len, | ||
| 1104 | v.iov_len, 0, &err); | ||
| 1105 | if (err) | ||
| 1106 | return false; | ||
| 1107 | sum = csum_block_add(sum, next, off); | ||
| 1108 | off += v.iov_len; | ||
| 1109 | 0; | ||
| 1110 | }), ({ | ||
| 1111 | char *p = kmap_atomic(v.bv_page); | ||
| 1112 | next = csum_partial_copy_nocheck(p + v.bv_offset, | ||
| 1113 | (to += v.bv_len) - v.bv_len, | ||
| 1114 | v.bv_len, 0); | ||
| 1115 | kunmap_atomic(p); | ||
| 1116 | sum = csum_block_add(sum, next, off); | ||
| 1117 | off += v.bv_len; | ||
| 1118 | }),({ | ||
| 1119 | next = csum_partial_copy_nocheck(v.iov_base, | ||
| 1120 | (to += v.iov_len) - v.iov_len, | ||
| 1121 | v.iov_len, 0); | ||
| 1122 | sum = csum_block_add(sum, next, off); | ||
| 1123 | off += v.iov_len; | ||
| 1124 | }) | ||
| 1125 | ) | ||
| 1126 | *csum = sum; | ||
| 1127 | iov_iter_advance(i, bytes); | ||
| 1128 | return true; | ||
| 1129 | } | ||
| 1130 | EXPORT_SYMBOL(csum_and_copy_from_iter_full); | ||
| 1131 | |||
| 1040 | size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, | 1132 | size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, |
| 1041 | struct iov_iter *i) | 1133 | struct iov_iter *i) |
| 1042 | { | 1134 | { |
| @@ -1051,7 +1143,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, | |||
| 1051 | iterate_and_advance(i, bytes, v, ({ | 1143 | iterate_and_advance(i, bytes, v, ({ |
| 1052 | int err = 0; | 1144 | int err = 0; |
| 1053 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, | 1145 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, |
| 1054 | v.iov_base, | 1146 | v.iov_base, |
| 1055 | v.iov_len, 0, &err); | 1147 | v.iov_len, 0, &err); |
| 1056 | if (!err) { | 1148 | if (!err) { |
| 1057 | sum = csum_block_add(sum, next, off); | 1149 | sum = csum_block_add(sum, next, off); |
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index f6c2c1e7779c..9a2b811966eb 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c | |||
| @@ -56,7 +56,7 @@ static const char *kobject_actions[] = { | |||
| 56 | * kobject_action_type - translate action string to numeric type | 56 | * kobject_action_type - translate action string to numeric type |
| 57 | * | 57 | * |
| 58 | * @buf: buffer containing the action string, newline is ignored | 58 | * @buf: buffer containing the action string, newline is ignored |
| 59 | * @len: length of buffer | 59 | * @count: length of buffer |
| 60 | * @type: pointer to the location to store the action type | 60 | * @type: pointer to the location to store the action type |
| 61 | * | 61 | * |
| 62 | * Returns 0 if the action string was recognized. | 62 | * Returns 0 if the action string was recognized. |
| @@ -154,8 +154,8 @@ static void cleanup_uevent_env(struct subprocess_info *info) | |||
| 154 | /** | 154 | /** |
| 155 | * kobject_uevent_env - send an uevent with environmental data | 155 | * kobject_uevent_env - send an uevent with environmental data |
| 156 | * | 156 | * |
| 157 | * @action: action that is happening | ||
| 158 | * @kobj: struct kobject that the action is happening to | 157 | * @kobj: struct kobject that the action is happening to |
| 158 | * @action: action that is happening | ||
| 159 | * @envp_ext: pointer to environmental data | 159 | * @envp_ext: pointer to environmental data |
| 160 | * | 160 | * |
| 161 | * Returns 0 if kobject_uevent_env() is completed with success or the | 161 | * Returns 0 if kobject_uevent_env() is completed with success or the |
| @@ -363,8 +363,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_env); | |||
| 363 | /** | 363 | /** |
| 364 | * kobject_uevent - notify userspace by sending an uevent | 364 | * kobject_uevent - notify userspace by sending an uevent |
| 365 | * | 365 | * |
| 366 | * @action: action that is happening | ||
| 367 | * @kobj: struct kobject that the action is happening to | 366 | * @kobj: struct kobject that the action is happening to |
| 367 | * @action: action that is happening | ||
| 368 | * | 368 | * |
| 369 | * Returns 0 if kobject_uevent() is completed with success or the | 369 | * Returns 0 if kobject_uevent() is completed with success or the |
| 370 | * corresponding error when it fails. | 370 | * corresponding error when it fails. |
diff --git a/lib/kstrtox.c b/lib/kstrtox.c index b8e2080c1a47..bf85e05ce858 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | #include <linux/math64.h> | 17 | #include <linux/math64.h> |
| 18 | #include <linux/export.h> | 18 | #include <linux/export.h> |
| 19 | #include <linux/types.h> | 19 | #include <linux/types.h> |
| 20 | #include <asm/uaccess.h> | 20 | #include <linux/uaccess.h> |
| 21 | #include "kstrtox.h" | 21 | #include "kstrtox.h" |
| 22 | 22 | ||
| 23 | const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) | 23 | const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) |
diff --git a/lib/list_debug.c b/lib/list_debug.c index 3859bf63561c..7f7bfa55eb6d 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c | |||
| @@ -2,8 +2,7 @@ | |||
| 2 | * Copyright 2006, Red Hat, Inc., Dave Jones | 2 | * Copyright 2006, Red Hat, Inc., Dave Jones |
| 3 | * Released under the General Public License (GPL). | 3 | * Released under the General Public License (GPL). |
| 4 | * | 4 | * |
| 5 | * This file contains the linked list implementations for | 5 | * This file contains the linked list validation for DEBUG_LIST. |
| 6 | * DEBUG_LIST. | ||
| 7 | */ | 6 | */ |
| 8 | 7 | ||
| 9 | #include <linux/export.h> | 8 | #include <linux/export.h> |
| @@ -13,88 +12,48 @@ | |||
| 13 | #include <linux/rculist.h> | 12 | #include <linux/rculist.h> |
| 14 | 13 | ||
| 15 | /* | 14 | /* |
| 16 | * Insert a new entry between two known consecutive entries. | 15 | * Check that the data structures for the list manipulations are reasonably |
| 17 | * | 16 | * valid. Failures here indicate memory corruption (and possibly an exploit |
| 18 | * This is only for internal list manipulation where we know | 17 | * attempt). |
| 19 | * the prev/next entries already! | ||
| 20 | */ | 18 | */ |
| 21 | 19 | ||
| 22 | void __list_add(struct list_head *new, | 20 | bool __list_add_valid(struct list_head *new, struct list_head *prev, |
| 23 | struct list_head *prev, | 21 | struct list_head *next) |
| 24 | struct list_head *next) | ||
| 25 | { | 22 | { |
| 26 | WARN(next->prev != prev, | 23 | CHECK_DATA_CORRUPTION(next->prev != prev, |
| 27 | "list_add corruption. next->prev should be " | 24 | "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", |
| 28 | "prev (%p), but was %p. (next=%p).\n", | ||
| 29 | prev, next->prev, next); | 25 | prev, next->prev, next); |
| 30 | WARN(prev->next != next, | 26 | CHECK_DATA_CORRUPTION(prev->next != next, |
| 31 | "list_add corruption. prev->next should be " | 27 | "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", |
| 32 | "next (%p), but was %p. (prev=%p).\n", | ||
| 33 | next, prev->next, prev); | 28 | next, prev->next, prev); |
| 34 | WARN(new == prev || new == next, | 29 | CHECK_DATA_CORRUPTION(new == prev || new == next, |
| 35 | "list_add double add: new=%p, prev=%p, next=%p.\n", | 30 | "list_add double add: new=%p, prev=%p, next=%p.\n", |
| 36 | new, prev, next); | 31 | new, prev, next); |
| 37 | next->prev = new; | 32 | |
| 38 | new->next = next; | 33 | return true; |
| 39 | new->prev = prev; | ||
| 40 | WRITE_ONCE(prev->next, new); | ||
| 41 | } | 34 | } |
| 42 | EXPORT_SYMBOL(__list_add); | 35 | EXPORT_SYMBOL(__list_add_valid); |
| 43 | 36 | ||
| 44 | void __list_del_entry(struct list_head *entry) | 37 | bool __list_del_entry_valid(struct list_head *entry) |
| 45 | { | 38 | { |
| 46 | struct list_head *prev, *next; | 39 | struct list_head *prev, *next; |
| 47 | 40 | ||
| 48 | prev = entry->prev; | 41 | prev = entry->prev; |
| 49 | next = entry->next; | 42 | next = entry->next; |
| 50 | 43 | ||
| 51 | if (WARN(next == LIST_POISON1, | 44 | CHECK_DATA_CORRUPTION(next == LIST_POISON1, |
| 52 | "list_del corruption, %p->next is LIST_POISON1 (%p)\n", | 45 | "list_del corruption, %p->next is LIST_POISON1 (%p)\n", |
| 53 | entry, LIST_POISON1) || | 46 | entry, LIST_POISON1); |
| 54 | WARN(prev == LIST_POISON2, | 47 | CHECK_DATA_CORRUPTION(prev == LIST_POISON2, |
| 55 | "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", | 48 | "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", |
| 56 | entry, LIST_POISON2) || | 49 | entry, LIST_POISON2); |
| 57 | WARN(prev->next != entry, | 50 | CHECK_DATA_CORRUPTION(prev->next != entry, |
| 58 | "list_del corruption. prev->next should be %p, " | 51 | "list_del corruption. prev->next should be %p, but was %p\n", |
| 59 | "but was %p\n", entry, prev->next) || | 52 | entry, prev->next); |
| 60 | WARN(next->prev != entry, | 53 | CHECK_DATA_CORRUPTION(next->prev != entry, |
| 61 | "list_del corruption. next->prev should be %p, " | 54 | "list_del corruption. next->prev should be %p, but was %p\n", |
| 62 | "but was %p\n", entry, next->prev)) | 55 | entry, next->prev); |
| 63 | return; | 56 | return true; |
| 64 | |||
| 65 | __list_del(prev, next); | ||
| 66 | } | ||
| 67 | EXPORT_SYMBOL(__list_del_entry); | ||
| 68 | 57 | ||
| 69 | /** | ||
| 70 | * list_del - deletes entry from list. | ||
| 71 | * @entry: the element to delete from the list. | ||
| 72 | * Note: list_empty on entry does not return true after this, the entry is | ||
| 73 | * in an undefined state. | ||
| 74 | */ | ||
| 75 | void list_del(struct list_head *entry) | ||
| 76 | { | ||
| 77 | __list_del_entry(entry); | ||
| 78 | entry->next = LIST_POISON1; | ||
| 79 | entry->prev = LIST_POISON2; | ||
| 80 | } | ||
| 81 | EXPORT_SYMBOL(list_del); | ||
| 82 | |||
| 83 | /* | ||
| 84 | * RCU variants. | ||
| 85 | */ | ||
| 86 | void __list_add_rcu(struct list_head *new, | ||
| 87 | struct list_head *prev, struct list_head *next) | ||
| 88 | { | ||
| 89 | WARN(next->prev != prev, | ||
| 90 | "list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", | ||
| 91 | prev, next->prev, next); | ||
| 92 | WARN(prev->next != next, | ||
| 93 | "list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", | ||
| 94 | next, prev->next, prev); | ||
| 95 | new->next = next; | ||
| 96 | new->prev = prev; | ||
| 97 | rcu_assign_pointer(list_next_rcu(prev), new); | ||
| 98 | next->prev = new; | ||
| 99 | } | 58 | } |
| 100 | EXPORT_SYMBOL(__list_add_rcu); | 59 | EXPORT_SYMBOL(__list_del_entry_valid); |
diff --git a/lib/lockref.c b/lib/lockref.c index 5a92189ad711..c4bfcb8836cd 100644 --- a/lib/lockref.c +++ b/lib/lockref.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | if (likely(old.lock_count == prev.lock_count)) { \ | 20 | if (likely(old.lock_count == prev.lock_count)) { \ |
| 21 | SUCCESS; \ | 21 | SUCCESS; \ |
| 22 | } \ | 22 | } \ |
| 23 | cpu_relax_lowlatency(); \ | 23 | cpu_relax(); \ |
| 24 | } \ | 24 | } \ |
| 25 | } while (0) | 25 | } while (0) |
| 26 | 26 | ||
diff --git a/lib/nlattr.c b/lib/nlattr.c index fce1e9afc6d9..b42b8577fc23 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | #include <linux/types.h> | 14 | #include <linux/types.h> |
| 15 | #include <net/netlink.h> | 15 | #include <net/netlink.h> |
| 16 | 16 | ||
| 17 | static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = { | 17 | static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { |
| 18 | [NLA_U8] = sizeof(u8), | 18 | [NLA_U8] = sizeof(u8), |
| 19 | [NLA_U16] = sizeof(u16), | 19 | [NLA_U16] = sizeof(u16), |
| 20 | [NLA_U32] = sizeof(u32), | 20 | [NLA_U32] = sizeof(u32), |
diff --git a/lib/parser.c b/lib/parser.c index b6d11631231b..3278958b472a 100644 --- a/lib/parser.c +++ b/lib/parser.c | |||
| @@ -152,6 +152,36 @@ static int match_number(substring_t *s, int *result, int base) | |||
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | /** | 154 | /** |
| 155 | * match_u64int: scan a number in the given base from a substring_t | ||
| 156 | * @s: substring to be scanned | ||
| 157 | * @result: resulting u64 on success | ||
| 158 | * @base: base to use when converting string | ||
| 159 | * | ||
| 160 | * Description: Given a &substring_t and a base, attempts to parse the substring | ||
| 161 | * as a number in that base. On success, sets @result to the integer represented | ||
| 162 | * by the string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure. | ||
| 163 | */ | ||
| 164 | static int match_u64int(substring_t *s, u64 *result, int base) | ||
| 165 | { | ||
| 166 | char *buf; | ||
| 167 | int ret; | ||
| 168 | u64 val; | ||
| 169 | size_t len = s->to - s->from; | ||
| 170 | |||
| 171 | buf = kmalloc(len + 1, GFP_KERNEL); | ||
| 172 | if (!buf) | ||
| 173 | return -ENOMEM; | ||
| 174 | memcpy(buf, s->from, len); | ||
| 175 | buf[len] = '\0'; | ||
| 176 | |||
| 177 | ret = kstrtoull(buf, base, &val); | ||
| 178 | if (!ret) | ||
| 179 | *result = val; | ||
| 180 | kfree(buf); | ||
| 181 | return ret; | ||
| 182 | } | ||
| 183 | |||
| 184 | /** | ||
| 155 | * match_int: - scan a decimal representation of an integer from a substring_t | 185 | * match_int: - scan a decimal representation of an integer from a substring_t |
| 156 | * @s: substring_t to be scanned | 186 | * @s: substring_t to be scanned |
| 157 | * @result: resulting integer on success | 187 | * @result: resulting integer on success |
| @@ -167,6 +197,23 @@ int match_int(substring_t *s, int *result) | |||
| 167 | EXPORT_SYMBOL(match_int); | 197 | EXPORT_SYMBOL(match_int); |
| 168 | 198 | ||
| 169 | /** | 199 | /** |
| 200 | * match_u64: - scan a decimal representation of a u64 from | ||
| 201 | * a substring_t | ||
| 202 | * @s: substring_t to be scanned | ||
| 203 | * @result: resulting unsigned long long on success | ||
| 204 | * | ||
| 205 | * Description: Attempts to parse the &substring_t @s as a long decimal | ||
| 206 | * integer. On success, sets @result to the integer represented by the | ||
| 207 | * string and returns 0. | ||
| 208 | * Returns -ENOMEM, -EINVAL, or -ERANGE on failure. | ||
| 209 | */ | ||
| 210 | int match_u64(substring_t *s, u64 *result) | ||
| 211 | { | ||
| 212 | return match_u64int(s, result, 0); | ||
| 213 | } | ||
| 214 | EXPORT_SYMBOL(match_u64); | ||
| 215 | |||
| 216 | /** | ||
| 170 | * match_octal: - scan an octal representation of an integer from a substring_t | 217 | * match_octal: - scan an octal representation of an integer from a substring_t |
| 171 | * @s: substring_t to be scanned | 218 | * @s: substring_t to be scanned |
| 172 | * @result: resulting integer on success | 219 | * @result: resulting integer on success |
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 72d36113ccaa..c8cebb137076 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c | |||
| @@ -158,25 +158,21 @@ EXPORT_SYMBOL(percpu_counter_destroy); | |||
| 158 | int percpu_counter_batch __read_mostly = 32; | 158 | int percpu_counter_batch __read_mostly = 32; |
| 159 | EXPORT_SYMBOL(percpu_counter_batch); | 159 | EXPORT_SYMBOL(percpu_counter_batch); |
| 160 | 160 | ||
| 161 | static void compute_batch_value(void) | 161 | static int compute_batch_value(unsigned int cpu) |
| 162 | { | 162 | { |
| 163 | int nr = num_online_cpus(); | 163 | int nr = num_online_cpus(); |
| 164 | 164 | ||
| 165 | percpu_counter_batch = max(32, nr*2); | 165 | percpu_counter_batch = max(32, nr*2); |
| 166 | return 0; | ||
| 166 | } | 167 | } |
| 167 | 168 | ||
| 168 | static int percpu_counter_hotcpu_callback(struct notifier_block *nb, | 169 | static int percpu_counter_cpu_dead(unsigned int cpu) |
| 169 | unsigned long action, void *hcpu) | ||
| 170 | { | 170 | { |
| 171 | #ifdef CONFIG_HOTPLUG_CPU | 171 | #ifdef CONFIG_HOTPLUG_CPU |
| 172 | unsigned int cpu; | ||
| 173 | struct percpu_counter *fbc; | 172 | struct percpu_counter *fbc; |
| 174 | 173 | ||
| 175 | compute_batch_value(); | 174 | compute_batch_value(cpu); |
| 176 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) | ||
| 177 | return NOTIFY_OK; | ||
| 178 | 175 | ||
| 179 | cpu = (unsigned long)hcpu; | ||
| 180 | spin_lock_irq(&percpu_counters_lock); | 176 | spin_lock_irq(&percpu_counters_lock); |
| 181 | list_for_each_entry(fbc, &percpu_counters, list) { | 177 | list_for_each_entry(fbc, &percpu_counters, list) { |
| 182 | s32 *pcount; | 178 | s32 *pcount; |
| @@ -190,7 +186,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, | |||
| 190 | } | 186 | } |
| 191 | spin_unlock_irq(&percpu_counters_lock); | 187 | spin_unlock_irq(&percpu_counters_lock); |
| 192 | #endif | 188 | #endif |
| 193 | return NOTIFY_OK; | 189 | return 0; |
| 194 | } | 190 | } |
| 195 | 191 | ||
| 196 | /* | 192 | /* |
| @@ -222,8 +218,15 @@ EXPORT_SYMBOL(__percpu_counter_compare); | |||
| 222 | 218 | ||
| 223 | static int __init percpu_counter_startup(void) | 219 | static int __init percpu_counter_startup(void) |
| 224 | { | 220 | { |
| 225 | compute_batch_value(); | 221 | int ret; |
| 226 | hotcpu_notifier(percpu_counter_hotcpu_callback, 0); | 222 | |
| 223 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online", | ||
| 224 | compute_batch_value, NULL); | ||
| 225 | WARN_ON(ret < 0); | ||
| 226 | ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD, | ||
| 227 | "lib/percpu_cnt:dead", NULL, | ||
| 228 | percpu_counter_cpu_dead); | ||
| 229 | WARN_ON(ret < 0); | ||
| 227 | return 0; | 230 | return 0; |
| 228 | } | 231 | } |
| 229 | module_init(percpu_counter_startup); | 232 | module_init(percpu_counter_startup); |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8e6d552c40dd..6f382e07de77 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/cpu.h> | ||
| 25 | #include <linux/errno.h> | 26 | #include <linux/errno.h> |
| 26 | #include <linux/init.h> | 27 | #include <linux/init.h> |
| 27 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
| @@ -30,7 +31,6 @@ | |||
| 30 | #include <linux/percpu.h> | 31 | #include <linux/percpu.h> |
| 31 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
| 32 | #include <linux/kmemleak.h> | 33 | #include <linux/kmemleak.h> |
| 33 | #include <linux/notifier.h> | ||
| 34 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
| 35 | #include <linux/string.h> | 35 | #include <linux/string.h> |
| 36 | #include <linux/bitops.h> | 36 | #include <linux/bitops.h> |
| @@ -69,6 +69,11 @@ struct radix_tree_preload { | |||
| 69 | }; | 69 | }; |
| 70 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; | 70 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; |
| 71 | 71 | ||
| 72 | static inline struct radix_tree_node *entry_to_node(void *ptr) | ||
| 73 | { | ||
| 74 | return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); | ||
| 75 | } | ||
| 76 | |||
| 72 | static inline void *node_to_entry(void *ptr) | 77 | static inline void *node_to_entry(void *ptr) |
| 73 | { | 78 | { |
| 74 | return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); | 79 | return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); |
| @@ -191,13 +196,12 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) | |||
| 191 | * Returns next bit offset, or size if nothing found. | 196 | * Returns next bit offset, or size if nothing found. |
| 192 | */ | 197 | */ |
| 193 | static __always_inline unsigned long | 198 | static __always_inline unsigned long |
| 194 | radix_tree_find_next_bit(const unsigned long *addr, | 199 | radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, |
| 195 | unsigned long size, unsigned long offset) | 200 | unsigned long offset) |
| 196 | { | 201 | { |
| 197 | if (!__builtin_constant_p(size)) | 202 | const unsigned long *addr = node->tags[tag]; |
| 198 | return find_next_bit(addr, size, offset); | ||
| 199 | 203 | ||
| 200 | if (offset < size) { | 204 | if (offset < RADIX_TREE_MAP_SIZE) { |
| 201 | unsigned long tmp; | 205 | unsigned long tmp; |
| 202 | 206 | ||
| 203 | addr += offset / BITS_PER_LONG; | 207 | addr += offset / BITS_PER_LONG; |
| @@ -205,14 +209,32 @@ radix_tree_find_next_bit(const unsigned long *addr, | |||
| 205 | if (tmp) | 209 | if (tmp) |
| 206 | return __ffs(tmp) + offset; | 210 | return __ffs(tmp) + offset; |
| 207 | offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); | 211 | offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); |
| 208 | while (offset < size) { | 212 | while (offset < RADIX_TREE_MAP_SIZE) { |
| 209 | tmp = *++addr; | 213 | tmp = *++addr; |
| 210 | if (tmp) | 214 | if (tmp) |
| 211 | return __ffs(tmp) + offset; | 215 | return __ffs(tmp) + offset; |
| 212 | offset += BITS_PER_LONG; | 216 | offset += BITS_PER_LONG; |
| 213 | } | 217 | } |
| 214 | } | 218 | } |
| 215 | return size; | 219 | return RADIX_TREE_MAP_SIZE; |
| 220 | } | ||
| 221 | |||
| 222 | static unsigned int iter_offset(const struct radix_tree_iter *iter) | ||
| 223 | { | ||
| 224 | return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 228 | * The maximum index which can be stored in a radix tree | ||
| 229 | */ | ||
| 230 | static inline unsigned long shift_maxindex(unsigned int shift) | ||
| 231 | { | ||
| 232 | return (RADIX_TREE_MAP_SIZE << shift) - 1; | ||
| 233 | } | ||
| 234 | |||
| 235 | static inline unsigned long node_maxindex(struct radix_tree_node *node) | ||
| 236 | { | ||
| 237 | return shift_maxindex(node->shift); | ||
| 216 | } | 238 | } |
| 217 | 239 | ||
| 218 | #ifndef __KERNEL__ | 240 | #ifndef __KERNEL__ |
| @@ -220,10 +242,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) | |||
| 220 | { | 242 | { |
| 221 | unsigned long i; | 243 | unsigned long i; |
| 222 | 244 | ||
| 223 | pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n", | 245 | pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n", |
| 224 | node, node->offset, | 246 | node, node->offset, index, index | node_maxindex(node), |
| 247 | node->parent, | ||
| 225 | node->tags[0][0], node->tags[1][0], node->tags[2][0], | 248 | node->tags[0][0], node->tags[1][0], node->tags[2][0], |
| 226 | node->shift, node->count, node->parent); | 249 | node->shift, node->count, node->exceptional); |
| 227 | 250 | ||
| 228 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { | 251 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { |
| 229 | unsigned long first = index | (i << node->shift); | 252 | unsigned long first = index | (i << node->shift); |
| @@ -231,14 +254,16 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) | |||
| 231 | void *entry = node->slots[i]; | 254 | void *entry = node->slots[i]; |
| 232 | if (!entry) | 255 | if (!entry) |
| 233 | continue; | 256 | continue; |
| 234 | if (is_sibling_entry(node, entry)) { | 257 | if (entry == RADIX_TREE_RETRY) { |
| 235 | pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", | 258 | pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n", |
| 236 | entry, i, | 259 | i, first, last, node); |
| 237 | *(void **)entry_to_node(entry), | ||
| 238 | first, last); | ||
| 239 | } else if (!radix_tree_is_internal_node(entry)) { | 260 | } else if (!radix_tree_is_internal_node(entry)) { |
| 240 | pr_debug("radix entry %p offset %ld indices %ld-%ld\n", | 261 | pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", |
| 241 | entry, i, first, last); | 262 | entry, i, first, last, node); |
| 263 | } else if (is_sibling_entry(node, entry)) { | ||
| 264 | pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", | ||
| 265 | entry, i, first, last, node, | ||
| 266 | *(void **)entry_to_node(entry)); | ||
| 242 | } else { | 267 | } else { |
| 243 | dump_node(entry_to_node(entry), first); | 268 | dump_node(entry_to_node(entry), first); |
| 244 | } | 269 | } |
| @@ -262,7 +287,10 @@ static void radix_tree_dump(struct radix_tree_root *root) | |||
| 262 | * that the caller has pinned this thread of control to the current CPU. | 287 | * that the caller has pinned this thread of control to the current CPU. |
| 263 | */ | 288 | */ |
| 264 | static struct radix_tree_node * | 289 | static struct radix_tree_node * |
| 265 | radix_tree_node_alloc(struct radix_tree_root *root) | 290 | radix_tree_node_alloc(struct radix_tree_root *root, |
| 291 | struct radix_tree_node *parent, | ||
| 292 | unsigned int shift, unsigned int offset, | ||
| 293 | unsigned int count, unsigned int exceptional) | ||
| 266 | { | 294 | { |
| 267 | struct radix_tree_node *ret = NULL; | 295 | struct radix_tree_node *ret = NULL; |
| 268 | gfp_t gfp_mask = root_gfp_mask(root); | 296 | gfp_t gfp_mask = root_gfp_mask(root); |
| @@ -307,6 +335,13 @@ radix_tree_node_alloc(struct radix_tree_root *root) | |||
| 307 | ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); | 335 | ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); |
| 308 | out: | 336 | out: |
| 309 | BUG_ON(radix_tree_is_internal_node(ret)); | 337 | BUG_ON(radix_tree_is_internal_node(ret)); |
| 338 | if (ret) { | ||
| 339 | ret->parent = parent; | ||
| 340 | ret->shift = shift; | ||
| 341 | ret->offset = offset; | ||
| 342 | ret->count = count; | ||
| 343 | ret->exceptional = exceptional; | ||
| 344 | } | ||
| 310 | return ret; | 345 | return ret; |
| 311 | } | 346 | } |
| 312 | 347 | ||
| @@ -314,18 +349,15 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) | |||
| 314 | { | 349 | { |
| 315 | struct radix_tree_node *node = | 350 | struct radix_tree_node *node = |
| 316 | container_of(head, struct radix_tree_node, rcu_head); | 351 | container_of(head, struct radix_tree_node, rcu_head); |
| 317 | int i; | ||
| 318 | 352 | ||
| 319 | /* | 353 | /* |
| 320 | * must only free zeroed nodes into the slab. radix_tree_shrink | 354 | * Must only free zeroed nodes into the slab. We can be left with |
| 321 | * can leave us with a non-NULL entry in the first slot, so clear | 355 | * non-NULL entries by radix_tree_free_nodes, so clear the entries |
| 322 | * that here to make sure. | 356 | * and tags here. |
| 323 | */ | 357 | */ |
| 324 | for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) | 358 | memset(node->slots, 0, sizeof(node->slots)); |
| 325 | tag_clear(node, i, 0); | 359 | memset(node->tags, 0, sizeof(node->tags)); |
| 326 | 360 | INIT_LIST_HEAD(&node->private_list); | |
| 327 | node->slots[0] = NULL; | ||
| 328 | node->count = 0; | ||
| 329 | 361 | ||
| 330 | kmem_cache_free(radix_tree_node_cachep, node); | 362 | kmem_cache_free(radix_tree_node_cachep, node); |
| 331 | } | 363 | } |
| @@ -345,7 +377,7 @@ radix_tree_node_free(struct radix_tree_node *node) | |||
| 345 | * To make use of this facility, the radix tree must be initialised without | 377 | * To make use of this facility, the radix tree must be initialised without |
| 346 | * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). | 378 | * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). |
| 347 | */ | 379 | */ |
| 348 | static int __radix_tree_preload(gfp_t gfp_mask, int nr) | 380 | static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) |
| 349 | { | 381 | { |
| 350 | struct radix_tree_preload *rtp; | 382 | struct radix_tree_preload *rtp; |
| 351 | struct radix_tree_node *node; | 383 | struct radix_tree_node *node; |
| @@ -411,6 +443,28 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) | |||
| 411 | } | 443 | } |
| 412 | EXPORT_SYMBOL(radix_tree_maybe_preload); | 444 | EXPORT_SYMBOL(radix_tree_maybe_preload); |
| 413 | 445 | ||
| 446 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 447 | /* | ||
| 448 | * Preload with enough objects to ensure that we can split a single entry | ||
| 449 | * of order @old_order into many entries of size @new_order | ||
| 450 | */ | ||
| 451 | int radix_tree_split_preload(unsigned int old_order, unsigned int new_order, | ||
| 452 | gfp_t gfp_mask) | ||
| 453 | { | ||
| 454 | unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT); | ||
| 455 | unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) - | ||
| 456 | (new_order / RADIX_TREE_MAP_SHIFT); | ||
| 457 | unsigned nr = 0; | ||
| 458 | |||
| 459 | WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); | ||
| 460 | BUG_ON(new_order >= old_order); | ||
| 461 | |||
| 462 | while (layers--) | ||
| 463 | nr = nr * RADIX_TREE_MAP_SIZE + 1; | ||
| 464 | return __radix_tree_preload(gfp_mask, top * nr); | ||
| 465 | } | ||
| 466 | #endif | ||
| 467 | |||
| 414 | /* | 468 | /* |
| 415 | * The same as function above, but preload number of nodes required to insert | 469 | * The same as function above, but preload number of nodes required to insert |
| 416 | * (1 << order) continuous naturally-aligned elements. | 470 | * (1 << order) continuous naturally-aligned elements. |
| @@ -456,19 +510,6 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |||
| 456 | return __radix_tree_preload(gfp_mask, nr_nodes); | 510 | return __radix_tree_preload(gfp_mask, nr_nodes); |
| 457 | } | 511 | } |
| 458 | 512 | ||
| 459 | /* | ||
| 460 | * The maximum index which can be stored in a radix tree | ||
| 461 | */ | ||
| 462 | static inline unsigned long shift_maxindex(unsigned int shift) | ||
| 463 | { | ||
| 464 | return (RADIX_TREE_MAP_SIZE << shift) - 1; | ||
| 465 | } | ||
| 466 | |||
| 467 | static inline unsigned long node_maxindex(struct radix_tree_node *node) | ||
| 468 | { | ||
| 469 | return shift_maxindex(node->shift); | ||
| 470 | } | ||
| 471 | |||
| 472 | static unsigned radix_tree_load_root(struct radix_tree_root *root, | 513 | static unsigned radix_tree_load_root(struct radix_tree_root *root, |
| 473 | struct radix_tree_node **nodep, unsigned long *maxindex) | 514 | struct radix_tree_node **nodep, unsigned long *maxindex) |
| 474 | { | 515 | { |
| @@ -506,8 +547,8 @@ static int radix_tree_extend(struct radix_tree_root *root, | |||
| 506 | goto out; | 547 | goto out; |
| 507 | 548 | ||
| 508 | do { | 549 | do { |
| 509 | struct radix_tree_node *node = radix_tree_node_alloc(root); | 550 | struct radix_tree_node *node = radix_tree_node_alloc(root, |
| 510 | 551 | NULL, shift, 0, 1, 0); | |
| 511 | if (!node) | 552 | if (!node) |
| 512 | return -ENOMEM; | 553 | return -ENOMEM; |
| 513 | 554 | ||
| @@ -518,12 +559,12 @@ static int radix_tree_extend(struct radix_tree_root *root, | |||
| 518 | } | 559 | } |
| 519 | 560 | ||
| 520 | BUG_ON(shift > BITS_PER_LONG); | 561 | BUG_ON(shift > BITS_PER_LONG); |
| 521 | node->shift = shift; | 562 | if (radix_tree_is_internal_node(slot)) { |
| 522 | node->offset = 0; | ||
| 523 | node->count = 1; | ||
| 524 | node->parent = NULL; | ||
| 525 | if (radix_tree_is_internal_node(slot)) | ||
| 526 | entry_to_node(slot)->parent = node; | 563 | entry_to_node(slot)->parent = node; |
| 564 | } else if (radix_tree_exceptional_entry(slot)) { | ||
| 565 | /* Moving an exceptional root->rnode to a node */ | ||
| 566 | node->exceptional = 1; | ||
| 567 | } | ||
| 527 | node->slots[0] = slot; | 568 | node->slots[0] = slot; |
| 528 | slot = node_to_entry(node); | 569 | slot = node_to_entry(node); |
| 529 | rcu_assign_pointer(root->rnode, slot); | 570 | rcu_assign_pointer(root->rnode, slot); |
| @@ -534,6 +575,104 @@ out: | |||
| 534 | } | 575 | } |
| 535 | 576 | ||
| 536 | /** | 577 | /** |
| 578 | * radix_tree_shrink - shrink radix tree to minimum height | ||
| 579 | * @root radix tree root | ||
| 580 | */ | ||
| 581 | static inline void radix_tree_shrink(struct radix_tree_root *root, | ||
| 582 | radix_tree_update_node_t update_node, | ||
| 583 | void *private) | ||
| 584 | { | ||
| 585 | for (;;) { | ||
| 586 | struct radix_tree_node *node = root->rnode; | ||
| 587 | struct radix_tree_node *child; | ||
| 588 | |||
| 589 | if (!radix_tree_is_internal_node(node)) | ||
| 590 | break; | ||
| 591 | node = entry_to_node(node); | ||
| 592 | |||
| 593 | /* | ||
| 594 | * The candidate node has more than one child, or its child | ||
| 595 | * is not at the leftmost slot, or the child is a multiorder | ||
| 596 | * entry, we cannot shrink. | ||
| 597 | */ | ||
| 598 | if (node->count != 1) | ||
| 599 | break; | ||
| 600 | child = node->slots[0]; | ||
| 601 | if (!child) | ||
| 602 | break; | ||
| 603 | if (!radix_tree_is_internal_node(child) && node->shift) | ||
| 604 | break; | ||
| 605 | |||
| 606 | if (radix_tree_is_internal_node(child)) | ||
| 607 | entry_to_node(child)->parent = NULL; | ||
| 608 | |||
| 609 | /* | ||
| 610 | * We don't need rcu_assign_pointer(), since we are simply | ||
| 611 | * moving the node from one part of the tree to another: if it | ||
| 612 | * was safe to dereference the old pointer to it | ||
| 613 | * (node->slots[0]), it will be safe to dereference the new | ||
| 614 | * one (root->rnode) as far as dependent read barriers go. | ||
| 615 | */ | ||
| 616 | root->rnode = child; | ||
| 617 | |||
| 618 | /* | ||
| 619 | * We have a dilemma here. The node's slot[0] must not be | ||
| 620 | * NULLed in case there are concurrent lookups expecting to | ||
| 621 | * find the item. However if this was a bottom-level node, | ||
| 622 | * then it may be subject to the slot pointer being visible | ||
| 623 | * to callers dereferencing it. If item corresponding to | ||
| 624 | * slot[0] is subsequently deleted, these callers would expect | ||
| 625 | * their slot to become empty sooner or later. | ||
| 626 | * | ||
| 627 | * For example, lockless pagecache will look up a slot, deref | ||
| 628 | * the page pointer, and if the page has 0 refcount it means it | ||
| 629 | * was concurrently deleted from pagecache so try the deref | ||
| 630 | * again. Fortunately there is already a requirement for logic | ||
| 631 | * to retry the entire slot lookup -- the indirect pointer | ||
| 632 | * problem (replacing direct root node with an indirect pointer | ||
| 633 | * also results in a stale slot). So tag the slot as indirect | ||
| 634 | * to force callers to retry. | ||
| 635 | */ | ||
| 636 | node->count = 0; | ||
| 637 | if (!radix_tree_is_internal_node(child)) { | ||
| 638 | node->slots[0] = RADIX_TREE_RETRY; | ||
| 639 | if (update_node) | ||
| 640 | update_node(node, private); | ||
| 641 | } | ||
| 642 | |||
| 643 | radix_tree_node_free(node); | ||
| 644 | } | ||
| 645 | } | ||
| 646 | |||
| 647 | static void delete_node(struct radix_tree_root *root, | ||
| 648 | struct radix_tree_node *node, | ||
| 649 | radix_tree_update_node_t update_node, void *private) | ||
| 650 | { | ||
| 651 | do { | ||
| 652 | struct radix_tree_node *parent; | ||
| 653 | |||
| 654 | if (node->count) { | ||
| 655 | if (node == entry_to_node(root->rnode)) | ||
| 656 | radix_tree_shrink(root, update_node, private); | ||
| 657 | return; | ||
| 658 | } | ||
| 659 | |||
| 660 | parent = node->parent; | ||
| 661 | if (parent) { | ||
| 662 | parent->slots[node->offset] = NULL; | ||
| 663 | parent->count--; | ||
| 664 | } else { | ||
| 665 | root_tag_clear_all(root); | ||
| 666 | root->rnode = NULL; | ||
| 667 | } | ||
| 668 | |||
| 669 | radix_tree_node_free(node); | ||
| 670 | |||
| 671 | node = parent; | ||
| 672 | } while (node); | ||
| 673 | } | ||
| 674 | |||
| 675 | /** | ||
| 537 | * __radix_tree_create - create a slot in a radix tree | 676 | * __radix_tree_create - create a slot in a radix tree |
| 538 | * @root: radix tree root | 677 | * @root: radix tree root |
| 539 | * @index: index key | 678 | * @index: index key |
| @@ -563,26 +702,24 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, | |||
| 563 | shift = radix_tree_load_root(root, &child, &maxindex); | 702 | shift = radix_tree_load_root(root, &child, &maxindex); |
| 564 | 703 | ||
| 565 | /* Make sure the tree is high enough. */ | 704 | /* Make sure the tree is high enough. */ |
| 705 | if (order > 0 && max == ((1UL << order) - 1)) | ||
| 706 | max++; | ||
| 566 | if (max > maxindex) { | 707 | if (max > maxindex) { |
| 567 | int error = radix_tree_extend(root, max, shift); | 708 | int error = radix_tree_extend(root, max, shift); |
| 568 | if (error < 0) | 709 | if (error < 0) |
| 569 | return error; | 710 | return error; |
| 570 | shift = error; | 711 | shift = error; |
| 571 | child = root->rnode; | 712 | child = root->rnode; |
| 572 | if (order == shift) | ||
| 573 | shift += RADIX_TREE_MAP_SHIFT; | ||
| 574 | } | 713 | } |
| 575 | 714 | ||
| 576 | while (shift > order) { | 715 | while (shift > order) { |
| 577 | shift -= RADIX_TREE_MAP_SHIFT; | 716 | shift -= RADIX_TREE_MAP_SHIFT; |
| 578 | if (child == NULL) { | 717 | if (child == NULL) { |
| 579 | /* Have to add a child node. */ | 718 | /* Have to add a child node. */ |
| 580 | child = radix_tree_node_alloc(root); | 719 | child = radix_tree_node_alloc(root, node, shift, |
| 720 | offset, 0, 0); | ||
| 581 | if (!child) | 721 | if (!child) |
| 582 | return -ENOMEM; | 722 | return -ENOMEM; |
| 583 | child->shift = shift; | ||
| 584 | child->offset = offset; | ||
| 585 | child->parent = node; | ||
| 586 | rcu_assign_pointer(*slot, node_to_entry(child)); | 723 | rcu_assign_pointer(*slot, node_to_entry(child)); |
| 587 | if (node) | 724 | if (node) |
| 588 | node->count++; | 725 | node->count++; |
| @@ -595,31 +732,125 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, | |||
| 595 | slot = &node->slots[offset]; | 732 | slot = &node->slots[offset]; |
| 596 | } | 733 | } |
| 597 | 734 | ||
| 735 | if (nodep) | ||
| 736 | *nodep = node; | ||
| 737 | if (slotp) | ||
| 738 | *slotp = slot; | ||
| 739 | return 0; | ||
| 740 | } | ||
| 741 | |||
| 598 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 742 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
| 599 | /* Insert pointers to the canonical entry */ | 743 | /* |
| 600 | if (order > shift) { | 744 | * Free any nodes below this node. The tree is presumed to not need |
| 601 | unsigned i, n = 1 << (order - shift); | 745 | * shrinking, and any user data in the tree is presumed to not need a |
| 746 | * destructor called on it. If we need to add a destructor, we can | ||
| 747 | * add that functionality later. Note that we may not clear tags or | ||
| 748 | * slots from the tree as an RCU walker may still have a pointer into | ||
| 749 | * this subtree. We could replace the entries with RADIX_TREE_RETRY, | ||
| 750 | * but we'll still have to clear those in rcu_free. | ||
| 751 | */ | ||
| 752 | static void radix_tree_free_nodes(struct radix_tree_node *node) | ||
| 753 | { | ||
| 754 | unsigned offset = 0; | ||
| 755 | struct radix_tree_node *child = entry_to_node(node); | ||
| 756 | |||
| 757 | for (;;) { | ||
| 758 | void *entry = child->slots[offset]; | ||
| 759 | if (radix_tree_is_internal_node(entry) && | ||
| 760 | !is_sibling_entry(child, entry)) { | ||
| 761 | child = entry_to_node(entry); | ||
| 762 | offset = 0; | ||
| 763 | continue; | ||
| 764 | } | ||
| 765 | offset++; | ||
| 766 | while (offset == RADIX_TREE_MAP_SIZE) { | ||
| 767 | struct radix_tree_node *old = child; | ||
| 768 | offset = child->offset + 1; | ||
| 769 | child = child->parent; | ||
| 770 | radix_tree_node_free(old); | ||
| 771 | if (old == entry_to_node(node)) | ||
| 772 | return; | ||
| 773 | } | ||
| 774 | } | ||
| 775 | } | ||
| 776 | |||
| 777 | static inline int insert_entries(struct radix_tree_node *node, void **slot, | ||
| 778 | void *item, unsigned order, bool replace) | ||
| 779 | { | ||
| 780 | struct radix_tree_node *child; | ||
| 781 | unsigned i, n, tag, offset, tags = 0; | ||
| 782 | |||
| 783 | if (node) { | ||
| 784 | if (order > node->shift) | ||
| 785 | n = 1 << (order - node->shift); | ||
| 786 | else | ||
| 787 | n = 1; | ||
| 788 | offset = get_slot_offset(node, slot); | ||
| 789 | } else { | ||
| 790 | n = 1; | ||
| 791 | offset = 0; | ||
| 792 | } | ||
| 793 | |||
| 794 | if (n > 1) { | ||
| 602 | offset = offset & ~(n - 1); | 795 | offset = offset & ~(n - 1); |
| 603 | slot = &node->slots[offset]; | 796 | slot = &node->slots[offset]; |
| 604 | child = node_to_entry(slot); | 797 | } |
| 605 | for (i = 0; i < n; i++) { | 798 | child = node_to_entry(slot); |
| 606 | if (slot[i]) | 799 | |
| 800 | for (i = 0; i < n; i++) { | ||
| 801 | if (slot[i]) { | ||
| 802 | if (replace) { | ||
| 803 | node->count--; | ||
| 804 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 805 | if (tag_get(node, tag, offset + i)) | ||
| 806 | tags |= 1 << tag; | ||
| 807 | } else | ||
| 607 | return -EEXIST; | 808 | return -EEXIST; |
| 608 | } | 809 | } |
| 810 | } | ||
| 609 | 811 | ||
| 610 | for (i = 1; i < n; i++) { | 812 | for (i = 0; i < n; i++) { |
| 813 | struct radix_tree_node *old = slot[i]; | ||
| 814 | if (i) { | ||
| 611 | rcu_assign_pointer(slot[i], child); | 815 | rcu_assign_pointer(slot[i], child); |
| 612 | node->count++; | 816 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
| 817 | if (tags & (1 << tag)) | ||
| 818 | tag_clear(node, tag, offset + i); | ||
| 819 | } else { | ||
| 820 | rcu_assign_pointer(slot[i], item); | ||
| 821 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 822 | if (tags & (1 << tag)) | ||
| 823 | tag_set(node, tag, offset); | ||
| 613 | } | 824 | } |
| 825 | if (radix_tree_is_internal_node(old) && | ||
| 826 | !is_sibling_entry(node, old) && | ||
| 827 | (old != RADIX_TREE_RETRY)) | ||
| 828 | radix_tree_free_nodes(old); | ||
| 829 | if (radix_tree_exceptional_entry(old)) | ||
| 830 | node->exceptional--; | ||
| 614 | } | 831 | } |
| 615 | #endif | 832 | if (node) { |
| 616 | 833 | node->count += n; | |
| 617 | if (nodep) | 834 | if (radix_tree_exceptional_entry(item)) |
| 618 | *nodep = node; | 835 | node->exceptional += n; |
| 619 | if (slotp) | 836 | } |
| 620 | *slotp = slot; | 837 | return n; |
| 621 | return 0; | 838 | } |
| 839 | #else | ||
| 840 | static inline int insert_entries(struct radix_tree_node *node, void **slot, | ||
| 841 | void *item, unsigned order, bool replace) | ||
| 842 | { | ||
| 843 | if (*slot) | ||
| 844 | return -EEXIST; | ||
| 845 | rcu_assign_pointer(*slot, item); | ||
| 846 | if (node) { | ||
| 847 | node->count++; | ||
| 848 | if (radix_tree_exceptional_entry(item)) | ||
| 849 | node->exceptional++; | ||
| 850 | } | ||
| 851 | return 1; | ||
| 622 | } | 852 | } |
| 853 | #endif | ||
| 623 | 854 | ||
| 624 | /** | 855 | /** |
| 625 | * __radix_tree_insert - insert into a radix tree | 856 | * __radix_tree_insert - insert into a radix tree |
| @@ -642,13 +873,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, | |||
| 642 | error = __radix_tree_create(root, index, order, &node, &slot); | 873 | error = __radix_tree_create(root, index, order, &node, &slot); |
| 643 | if (error) | 874 | if (error) |
| 644 | return error; | 875 | return error; |
| 645 | if (*slot != NULL) | 876 | |
| 646 | return -EEXIST; | 877 | error = insert_entries(node, slot, item, order, false); |
| 647 | rcu_assign_pointer(*slot, item); | 878 | if (error < 0) |
| 879 | return error; | ||
| 648 | 880 | ||
| 649 | if (node) { | 881 | if (node) { |
| 650 | unsigned offset = get_slot_offset(node, slot); | 882 | unsigned offset = get_slot_offset(node, slot); |
| 651 | node->count++; | ||
| 652 | BUG_ON(tag_get(node, 0, offset)); | 883 | BUG_ON(tag_get(node, 0, offset)); |
| 653 | BUG_ON(tag_get(node, 1, offset)); | 884 | BUG_ON(tag_get(node, 1, offset)); |
| 654 | BUG_ON(tag_get(node, 2, offset)); | 885 | BUG_ON(tag_get(node, 2, offset)); |
| @@ -746,6 +977,287 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) | |||
| 746 | } | 977 | } |
| 747 | EXPORT_SYMBOL(radix_tree_lookup); | 978 | EXPORT_SYMBOL(radix_tree_lookup); |
| 748 | 979 | ||
| 980 | static inline int slot_count(struct radix_tree_node *node, | ||
| 981 | void **slot) | ||
| 982 | { | ||
| 983 | int n = 1; | ||
| 984 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 985 | void *ptr = node_to_entry(slot); | ||
| 986 | unsigned offset = get_slot_offset(node, slot); | ||
| 987 | int i; | ||
| 988 | |||
| 989 | for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { | ||
| 990 | if (node->slots[offset + i] != ptr) | ||
| 991 | break; | ||
| 992 | n++; | ||
| 993 | } | ||
| 994 | #endif | ||
| 995 | return n; | ||
| 996 | } | ||
| 997 | |||
| 998 | static void replace_slot(struct radix_tree_root *root, | ||
| 999 | struct radix_tree_node *node, | ||
| 1000 | void **slot, void *item, | ||
| 1001 | bool warn_typeswitch) | ||
| 1002 | { | ||
| 1003 | void *old = rcu_dereference_raw(*slot); | ||
| 1004 | int count, exceptional; | ||
| 1005 | |||
| 1006 | WARN_ON_ONCE(radix_tree_is_internal_node(item)); | ||
| 1007 | |||
| 1008 | count = !!item - !!old; | ||
| 1009 | exceptional = !!radix_tree_exceptional_entry(item) - | ||
| 1010 | !!radix_tree_exceptional_entry(old); | ||
| 1011 | |||
| 1012 | WARN_ON_ONCE(warn_typeswitch && (count || exceptional)); | ||
| 1013 | |||
| 1014 | if (node) { | ||
| 1015 | node->count += count; | ||
| 1016 | if (exceptional) { | ||
| 1017 | exceptional *= slot_count(node, slot); | ||
| 1018 | node->exceptional += exceptional; | ||
| 1019 | } | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | rcu_assign_pointer(*slot, item); | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | static inline void delete_sibling_entries(struct radix_tree_node *node, | ||
| 1026 | void **slot) | ||
| 1027 | { | ||
| 1028 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 1029 | bool exceptional = radix_tree_exceptional_entry(*slot); | ||
| 1030 | void *ptr = node_to_entry(slot); | ||
| 1031 | unsigned offset = get_slot_offset(node, slot); | ||
| 1032 | int i; | ||
| 1033 | |||
| 1034 | for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { | ||
| 1035 | if (node->slots[offset + i] != ptr) | ||
| 1036 | break; | ||
| 1037 | node->slots[offset + i] = NULL; | ||
| 1038 | node->count--; | ||
| 1039 | if (exceptional) | ||
| 1040 | node->exceptional--; | ||
| 1041 | } | ||
| 1042 | #endif | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | /** | ||
| 1046 | * __radix_tree_replace - replace item in a slot | ||
| 1047 | * @root: radix tree root | ||
| 1048 | * @node: pointer to tree node | ||
| 1049 | * @slot: pointer to slot in @node | ||
| 1050 | * @item: new item to store in the slot. | ||
| 1051 | * @update_node: callback for changing leaf nodes | ||
| 1052 | * @private: private data to pass to @update_node | ||
| 1053 | * | ||
| 1054 | * For use with __radix_tree_lookup(). Caller must hold tree write locked | ||
| 1055 | * across slot lookup and replacement. | ||
| 1056 | */ | ||
| 1057 | void __radix_tree_replace(struct radix_tree_root *root, | ||
| 1058 | struct radix_tree_node *node, | ||
| 1059 | void **slot, void *item, | ||
| 1060 | radix_tree_update_node_t update_node, void *private) | ||
| 1061 | { | ||
| 1062 | if (!item) | ||
| 1063 | delete_sibling_entries(node, slot); | ||
| 1064 | /* | ||
| 1065 | * This function supports replacing exceptional entries and | ||
| 1066 | * deleting entries, but that needs accounting against the | ||
| 1067 | * node unless the slot is root->rnode. | ||
| 1068 | */ | ||
| 1069 | replace_slot(root, node, slot, item, | ||
| 1070 | !node && slot != (void **)&root->rnode); | ||
| 1071 | |||
| 1072 | if (!node) | ||
| 1073 | return; | ||
| 1074 | |||
| 1075 | if (update_node) | ||
| 1076 | update_node(node, private); | ||
| 1077 | |||
| 1078 | delete_node(root, node, update_node, private); | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | /** | ||
| 1082 | * radix_tree_replace_slot - replace item in a slot | ||
| 1083 | * @root: radix tree root | ||
| 1084 | * @slot: pointer to slot | ||
| 1085 | * @item: new item to store in the slot. | ||
| 1086 | * | ||
| 1087 | * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(), | ||
| 1088 | * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked | ||
| 1089 | * across slot lookup and replacement. | ||
| 1090 | * | ||
| 1091 | * NOTE: This cannot be used to switch between non-entries (empty slots), | ||
| 1092 | * regular entries, and exceptional entries, as that requires accounting | ||
| 1093 | * inside the radix tree node. When switching from one type of entry or | ||
| 1094 | * deleting, use __radix_tree_lookup() and __radix_tree_replace() or | ||
| 1095 | * radix_tree_iter_replace(). | ||
| 1096 | */ | ||
| 1097 | void radix_tree_replace_slot(struct radix_tree_root *root, | ||
| 1098 | void **slot, void *item) | ||
| 1099 | { | ||
| 1100 | replace_slot(root, NULL, slot, item, true); | ||
| 1101 | } | ||
| 1102 | |||
| 1103 | /** | ||
| 1104 | * radix_tree_iter_replace - replace item in a slot | ||
| 1105 | * @root: radix tree root | ||
| 1106 | * @slot: pointer to slot | ||
| 1107 | * @item: new item to store in the slot. | ||
| 1108 | * | ||
| 1109 | * For use with radix_tree_split() and radix_tree_for_each_slot(). | ||
| 1110 | * Caller must hold tree write locked across split and replacement. | ||
| 1111 | */ | ||
| 1112 | void radix_tree_iter_replace(struct radix_tree_root *root, | ||
| 1113 | const struct radix_tree_iter *iter, void **slot, void *item) | ||
| 1114 | { | ||
| 1115 | __radix_tree_replace(root, iter->node, slot, item, NULL, NULL); | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 1119 | /** | ||
| 1120 | * radix_tree_join - replace multiple entries with one multiorder entry | ||
| 1121 | * @root: radix tree root | ||
| 1122 | * @index: an index inside the new entry | ||
| 1123 | * @order: order of the new entry | ||
| 1124 | * @item: new entry | ||
| 1125 | * | ||
| 1126 | * Call this function to replace several entries with one larger entry. | ||
| 1127 | * The existing entries are presumed to not need freeing as a result of | ||
| 1128 | * this call. | ||
| 1129 | * | ||
| 1130 | * The replacement entry will have all the tags set on it that were set | ||
| 1131 | * on any of the entries it is replacing. | ||
| 1132 | */ | ||
| 1133 | int radix_tree_join(struct radix_tree_root *root, unsigned long index, | ||
| 1134 | unsigned order, void *item) | ||
| 1135 | { | ||
| 1136 | struct radix_tree_node *node; | ||
| 1137 | void **slot; | ||
| 1138 | int error; | ||
| 1139 | |||
| 1140 | BUG_ON(radix_tree_is_internal_node(item)); | ||
| 1141 | |||
| 1142 | error = __radix_tree_create(root, index, order, &node, &slot); | ||
| 1143 | if (!error) | ||
| 1144 | error = insert_entries(node, slot, item, order, true); | ||
| 1145 | if (error > 0) | ||
| 1146 | error = 0; | ||
| 1147 | |||
| 1148 | return error; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | /** | ||
| 1152 | * radix_tree_split - Split an entry into smaller entries | ||
| 1153 | * @root: radix tree root | ||
| 1154 | * @index: An index within the large entry | ||
| 1155 | * @order: Order of new entries | ||
| 1156 | * | ||
| 1157 | * Call this function as the first step in replacing a multiorder entry | ||
| 1158 | * with several entries of lower order. After this function returns, | ||
| 1159 | * loop over the relevant portion of the tree using radix_tree_for_each_slot() | ||
| 1160 | * and call radix_tree_iter_replace() to set up each new entry. | ||
| 1161 | * | ||
| 1162 | * The tags from this entry are replicated to all the new entries. | ||
| 1163 | * | ||
| 1164 | * The radix tree should be locked against modification during the entire | ||
| 1165 | * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which | ||
| 1166 | * should prompt RCU walkers to restart the lookup from the root. | ||
| 1167 | */ | ||
| 1168 | int radix_tree_split(struct radix_tree_root *root, unsigned long index, | ||
| 1169 | unsigned order) | ||
| 1170 | { | ||
| 1171 | struct radix_tree_node *parent, *node, *child; | ||
| 1172 | void **slot; | ||
| 1173 | unsigned int offset, end; | ||
| 1174 | unsigned n, tag, tags = 0; | ||
| 1175 | |||
| 1176 | if (!__radix_tree_lookup(root, index, &parent, &slot)) | ||
| 1177 | return -ENOENT; | ||
| 1178 | if (!parent) | ||
| 1179 | return -ENOENT; | ||
| 1180 | |||
| 1181 | offset = get_slot_offset(parent, slot); | ||
| 1182 | |||
| 1183 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1184 | if (tag_get(parent, tag, offset)) | ||
| 1185 | tags |= 1 << tag; | ||
| 1186 | |||
| 1187 | for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { | ||
| 1188 | if (!is_sibling_entry(parent, parent->slots[end])) | ||
| 1189 | break; | ||
| 1190 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1191 | if (tags & (1 << tag)) | ||
| 1192 | tag_set(parent, tag, end); | ||
| 1193 | /* rcu_assign_pointer ensures tags are set before RETRY */ | ||
| 1194 | rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); | ||
| 1195 | } | ||
| 1196 | rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); | ||
| 1197 | parent->exceptional -= (end - offset); | ||
| 1198 | |||
| 1199 | if (order == parent->shift) | ||
| 1200 | return 0; | ||
| 1201 | if (order > parent->shift) { | ||
| 1202 | while (offset < end) | ||
| 1203 | offset += insert_entries(parent, &parent->slots[offset], | ||
| 1204 | RADIX_TREE_RETRY, order, true); | ||
| 1205 | return 0; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | node = parent; | ||
| 1209 | |||
| 1210 | for (;;) { | ||
| 1211 | if (node->shift > order) { | ||
| 1212 | child = radix_tree_node_alloc(root, node, | ||
| 1213 | node->shift - RADIX_TREE_MAP_SHIFT, | ||
| 1214 | offset, 0, 0); | ||
| 1215 | if (!child) | ||
| 1216 | goto nomem; | ||
| 1217 | if (node != parent) { | ||
| 1218 | node->count++; | ||
| 1219 | node->slots[offset] = node_to_entry(child); | ||
| 1220 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1221 | if (tags & (1 << tag)) | ||
| 1222 | tag_set(node, tag, offset); | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | node = child; | ||
| 1226 | offset = 0; | ||
| 1227 | continue; | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | n = insert_entries(node, &node->slots[offset], | ||
| 1231 | RADIX_TREE_RETRY, order, false); | ||
| 1232 | BUG_ON(n > RADIX_TREE_MAP_SIZE); | ||
| 1233 | |||
| 1234 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1235 | if (tags & (1 << tag)) | ||
| 1236 | tag_set(node, tag, offset); | ||
| 1237 | offset += n; | ||
| 1238 | |||
| 1239 | while (offset == RADIX_TREE_MAP_SIZE) { | ||
| 1240 | if (node == parent) | ||
| 1241 | break; | ||
| 1242 | offset = node->offset; | ||
| 1243 | child = node; | ||
| 1244 | node = node->parent; | ||
| 1245 | rcu_assign_pointer(node->slots[offset], | ||
| 1246 | node_to_entry(child)); | ||
| 1247 | offset++; | ||
| 1248 | } | ||
| 1249 | if ((node == parent) && (offset == end)) | ||
| 1250 | return 0; | ||
| 1251 | } | ||
| 1252 | |||
| 1253 | nomem: | ||
| 1254 | /* Shouldn't happen; did user forget to preload? */ | ||
| 1255 | /* TODO: free all the allocated nodes */ | ||
| 1256 | WARN_ON(1); | ||
| 1257 | return -ENOMEM; | ||
| 1258 | } | ||
| 1259 | #endif | ||
| 1260 | |||
| 749 | /** | 1261 | /** |
| 750 | * radix_tree_tag_set - set a tag on a radix tree node | 1262 | * radix_tree_tag_set - set a tag on a radix tree node |
| 751 | * @root: radix tree root | 1263 | * @root: radix tree root |
| @@ -807,6 +1319,34 @@ static void node_tag_clear(struct radix_tree_root *root, | |||
| 807 | root_tag_clear(root, tag); | 1319 | root_tag_clear(root, tag); |
| 808 | } | 1320 | } |
| 809 | 1321 | ||
| 1322 | static void node_tag_set(struct radix_tree_root *root, | ||
| 1323 | struct radix_tree_node *node, | ||
| 1324 | unsigned int tag, unsigned int offset) | ||
| 1325 | { | ||
| 1326 | while (node) { | ||
| 1327 | if (tag_get(node, tag, offset)) | ||
| 1328 | return; | ||
| 1329 | tag_set(node, tag, offset); | ||
| 1330 | offset = node->offset; | ||
| 1331 | node = node->parent; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | if (!root_tag_get(root, tag)) | ||
| 1335 | root_tag_set(root, tag); | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | /** | ||
| 1339 | * radix_tree_iter_tag_set - set a tag on the current iterator entry | ||
| 1340 | * @root: radix tree root | ||
| 1341 | * @iter: iterator state | ||
| 1342 | * @tag: tag to set | ||
| 1343 | */ | ||
| 1344 | void radix_tree_iter_tag_set(struct radix_tree_root *root, | ||
| 1345 | const struct radix_tree_iter *iter, unsigned int tag) | ||
| 1346 | { | ||
| 1347 | node_tag_set(root, iter->node, tag, iter_offset(iter)); | ||
| 1348 | } | ||
| 1349 | |||
| 810 | /** | 1350 | /** |
| 811 | * radix_tree_tag_clear - clear a tag on a radix tree node | 1351 | * radix_tree_tag_clear - clear a tag on a radix tree node |
| 812 | * @root: radix tree root | 1352 | * @root: radix tree root |
| @@ -902,6 +1442,121 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter, | |||
| 902 | #endif | 1442 | #endif |
| 903 | } | 1443 | } |
| 904 | 1444 | ||
| 1445 | /* Construct iter->tags bit-mask from node->tags[tag] array */ | ||
| 1446 | static void set_iter_tags(struct radix_tree_iter *iter, | ||
| 1447 | struct radix_tree_node *node, unsigned offset, | ||
| 1448 | unsigned tag) | ||
| 1449 | { | ||
| 1450 | unsigned tag_long = offset / BITS_PER_LONG; | ||
| 1451 | unsigned tag_bit = offset % BITS_PER_LONG; | ||
| 1452 | |||
| 1453 | iter->tags = node->tags[tag][tag_long] >> tag_bit; | ||
| 1454 | |||
| 1455 | /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ | ||
| 1456 | if (tag_long < RADIX_TREE_TAG_LONGS - 1) { | ||
| 1457 | /* Pick tags from next element */ | ||
| 1458 | if (tag_bit) | ||
| 1459 | iter->tags |= node->tags[tag][tag_long + 1] << | ||
| 1460 | (BITS_PER_LONG - tag_bit); | ||
| 1461 | /* Clip chunk size, here only BITS_PER_LONG tags */ | ||
| 1462 | iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); | ||
| 1463 | } | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 1467 | static void **skip_siblings(struct radix_tree_node **nodep, | ||
| 1468 | void **slot, struct radix_tree_iter *iter) | ||
| 1469 | { | ||
| 1470 | void *sib = node_to_entry(slot - 1); | ||
| 1471 | |||
| 1472 | while (iter->index < iter->next_index) { | ||
| 1473 | *nodep = rcu_dereference_raw(*slot); | ||
| 1474 | if (*nodep && *nodep != sib) | ||
| 1475 | return slot; | ||
| 1476 | slot++; | ||
| 1477 | iter->index = __radix_tree_iter_add(iter, 1); | ||
| 1478 | iter->tags >>= 1; | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | *nodep = NULL; | ||
| 1482 | return NULL; | ||
| 1483 | } | ||
| 1484 | |||
| 1485 | void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, | ||
| 1486 | unsigned flags) | ||
| 1487 | { | ||
| 1488 | unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; | ||
| 1489 | struct radix_tree_node *node = rcu_dereference_raw(*slot); | ||
| 1490 | |||
| 1491 | slot = skip_siblings(&node, slot, iter); | ||
| 1492 | |||
| 1493 | while (radix_tree_is_internal_node(node)) { | ||
| 1494 | unsigned offset; | ||
| 1495 | unsigned long next_index; | ||
| 1496 | |||
| 1497 | if (node == RADIX_TREE_RETRY) | ||
| 1498 | return slot; | ||
| 1499 | node = entry_to_node(node); | ||
| 1500 | iter->node = node; | ||
| 1501 | iter->shift = node->shift; | ||
| 1502 | |||
| 1503 | if (flags & RADIX_TREE_ITER_TAGGED) { | ||
| 1504 | offset = radix_tree_find_next_bit(node, tag, 0); | ||
| 1505 | if (offset == RADIX_TREE_MAP_SIZE) | ||
| 1506 | return NULL; | ||
| 1507 | slot = &node->slots[offset]; | ||
| 1508 | iter->index = __radix_tree_iter_add(iter, offset); | ||
| 1509 | set_iter_tags(iter, node, offset, tag); | ||
| 1510 | node = rcu_dereference_raw(*slot); | ||
| 1511 | } else { | ||
| 1512 | offset = 0; | ||
| 1513 | slot = &node->slots[0]; | ||
| 1514 | for (;;) { | ||
| 1515 | node = rcu_dereference_raw(*slot); | ||
| 1516 | if (node) | ||
| 1517 | break; | ||
| 1518 | slot++; | ||
| 1519 | offset++; | ||
| 1520 | if (offset == RADIX_TREE_MAP_SIZE) | ||
| 1521 | return NULL; | ||
| 1522 | } | ||
| 1523 | iter->index = __radix_tree_iter_add(iter, offset); | ||
| 1524 | } | ||
| 1525 | if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0)) | ||
| 1526 | goto none; | ||
| 1527 | next_index = (iter->index | shift_maxindex(iter->shift)) + 1; | ||
| 1528 | if (next_index < iter->next_index) | ||
| 1529 | iter->next_index = next_index; | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | return slot; | ||
| 1533 | none: | ||
| 1534 | iter->next_index = 0; | ||
| 1535 | return NULL; | ||
| 1536 | } | ||
| 1537 | EXPORT_SYMBOL(__radix_tree_next_slot); | ||
| 1538 | #else | ||
| 1539 | static void **skip_siblings(struct radix_tree_node **nodep, | ||
| 1540 | void **slot, struct radix_tree_iter *iter) | ||
| 1541 | { | ||
| 1542 | return slot; | ||
| 1543 | } | ||
| 1544 | #endif | ||
| 1545 | |||
| 1546 | void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter) | ||
| 1547 | { | ||
| 1548 | struct radix_tree_node *node; | ||
| 1549 | |||
| 1550 | slot++; | ||
| 1551 | iter->index = __radix_tree_iter_add(iter, 1); | ||
| 1552 | node = rcu_dereference_raw(*slot); | ||
| 1553 | skip_siblings(&node, slot, iter); | ||
| 1554 | iter->next_index = iter->index; | ||
| 1555 | iter->tags = 0; | ||
| 1556 | return NULL; | ||
| 1557 | } | ||
| 1558 | EXPORT_SYMBOL(radix_tree_iter_resume); | ||
| 1559 | |||
| 905 | /** | 1560 | /** |
| 906 | * radix_tree_next_chunk - find next chunk of slots for iteration | 1561 | * radix_tree_next_chunk - find next chunk of slots for iteration |
| 907 | * | 1562 | * |
| @@ -927,7 +1582,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 927 | * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. | 1582 | * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. |
| 928 | * | 1583 | * |
| 929 | * This condition also used by radix_tree_next_slot() to stop | 1584 | * This condition also used by radix_tree_next_slot() to stop |
| 930 | * contiguous iterating, and forbid swithing to the next chunk. | 1585 | * contiguous iterating, and forbid switching to the next chunk. |
| 931 | */ | 1586 | */ |
| 932 | index = iter->next_index; | 1587 | index = iter->next_index; |
| 933 | if (!index && iter->index) | 1588 | if (!index && iter->index) |
| @@ -945,6 +1600,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 945 | iter->index = index; | 1600 | iter->index = index; |
| 946 | iter->next_index = maxindex + 1; | 1601 | iter->next_index = maxindex + 1; |
| 947 | iter->tags = 1; | 1602 | iter->tags = 1; |
| 1603 | iter->node = NULL; | ||
| 948 | __set_iter_shift(iter, 0); | 1604 | __set_iter_shift(iter, 0); |
| 949 | return (void **)&root->rnode; | 1605 | return (void **)&root->rnode; |
| 950 | } | 1606 | } |
| @@ -960,9 +1616,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 960 | return NULL; | 1616 | return NULL; |
| 961 | 1617 | ||
| 962 | if (flags & RADIX_TREE_ITER_TAGGED) | 1618 | if (flags & RADIX_TREE_ITER_TAGGED) |
| 963 | offset = radix_tree_find_next_bit( | 1619 | offset = radix_tree_find_next_bit(node, tag, |
| 964 | node->tags[tag], | ||
| 965 | RADIX_TREE_MAP_SIZE, | ||
| 966 | offset + 1); | 1620 | offset + 1); |
| 967 | else | 1621 | else |
| 968 | while (++offset < RADIX_TREE_MAP_SIZE) { | 1622 | while (++offset < RADIX_TREE_MAP_SIZE) { |
| @@ -982,154 +1636,26 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 982 | child = rcu_dereference_raw(node->slots[offset]); | 1636 | child = rcu_dereference_raw(node->slots[offset]); |
| 983 | } | 1637 | } |
| 984 | 1638 | ||
| 985 | if ((child == NULL) || (child == RADIX_TREE_RETRY)) | 1639 | if (!child) |
| 986 | goto restart; | 1640 | goto restart; |
| 1641 | if (child == RADIX_TREE_RETRY) | ||
| 1642 | break; | ||
| 987 | } while (radix_tree_is_internal_node(child)); | 1643 | } while (radix_tree_is_internal_node(child)); |
| 988 | 1644 | ||
| 989 | /* Update the iterator state */ | 1645 | /* Update the iterator state */ |
| 990 | iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); | 1646 | iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); |
| 991 | iter->next_index = (index | node_maxindex(node)) + 1; | 1647 | iter->next_index = (index | node_maxindex(node)) + 1; |
| 1648 | iter->node = node; | ||
| 992 | __set_iter_shift(iter, node->shift); | 1649 | __set_iter_shift(iter, node->shift); |
| 993 | 1650 | ||
| 994 | /* Construct iter->tags bit-mask from node->tags[tag] array */ | 1651 | if (flags & RADIX_TREE_ITER_TAGGED) |
| 995 | if (flags & RADIX_TREE_ITER_TAGGED) { | 1652 | set_iter_tags(iter, node, offset, tag); |
| 996 | unsigned tag_long, tag_bit; | ||
| 997 | |||
| 998 | tag_long = offset / BITS_PER_LONG; | ||
| 999 | tag_bit = offset % BITS_PER_LONG; | ||
| 1000 | iter->tags = node->tags[tag][tag_long] >> tag_bit; | ||
| 1001 | /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ | ||
| 1002 | if (tag_long < RADIX_TREE_TAG_LONGS - 1) { | ||
| 1003 | /* Pick tags from next element */ | ||
| 1004 | if (tag_bit) | ||
| 1005 | iter->tags |= node->tags[tag][tag_long + 1] << | ||
| 1006 | (BITS_PER_LONG - tag_bit); | ||
| 1007 | /* Clip chunk size, here only BITS_PER_LONG tags */ | ||
| 1008 | iter->next_index = index + BITS_PER_LONG; | ||
| 1009 | } | ||
| 1010 | } | ||
| 1011 | 1653 | ||
| 1012 | return node->slots + offset; | 1654 | return node->slots + offset; |
| 1013 | } | 1655 | } |
| 1014 | EXPORT_SYMBOL(radix_tree_next_chunk); | 1656 | EXPORT_SYMBOL(radix_tree_next_chunk); |
| 1015 | 1657 | ||
| 1016 | /** | 1658 | /** |
| 1017 | * radix_tree_range_tag_if_tagged - for each item in given range set given | ||
| 1018 | * tag if item has another tag set | ||
| 1019 | * @root: radix tree root | ||
| 1020 | * @first_indexp: pointer to a starting index of a range to scan | ||
| 1021 | * @last_index: last index of a range to scan | ||
| 1022 | * @nr_to_tag: maximum number items to tag | ||
| 1023 | * @iftag: tag index to test | ||
| 1024 | * @settag: tag index to set if tested tag is set | ||
| 1025 | * | ||
| 1026 | * This function scans range of radix tree from first_index to last_index | ||
| 1027 | * (inclusive). For each item in the range if iftag is set, the function sets | ||
| 1028 | * also settag. The function stops either after tagging nr_to_tag items or | ||
| 1029 | * after reaching last_index. | ||
| 1030 | * | ||
| 1031 | * The tags must be set from the leaf level only and propagated back up the | ||
| 1032 | * path to the root. We must do this so that we resolve the full path before | ||
| 1033 | * setting any tags on intermediate nodes. If we set tags as we descend, then | ||
| 1034 | * we can get to the leaf node and find that the index that has the iftag | ||
| 1035 | * set is outside the range we are scanning. This reults in dangling tags and | ||
| 1036 | * can lead to problems with later tag operations (e.g. livelocks on lookups). | ||
| 1037 | * | ||
| 1038 | * The function returns the number of leaves where the tag was set and sets | ||
| 1039 | * *first_indexp to the first unscanned index. | ||
| 1040 | * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must | ||
| 1041 | * be prepared to handle that. | ||
| 1042 | */ | ||
| 1043 | unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | ||
| 1044 | unsigned long *first_indexp, unsigned long last_index, | ||
| 1045 | unsigned long nr_to_tag, | ||
| 1046 | unsigned int iftag, unsigned int settag) | ||
| 1047 | { | ||
| 1048 | struct radix_tree_node *parent, *node, *child; | ||
| 1049 | unsigned long maxindex; | ||
| 1050 | unsigned long tagged = 0; | ||
| 1051 | unsigned long index = *first_indexp; | ||
| 1052 | |||
| 1053 | radix_tree_load_root(root, &child, &maxindex); | ||
| 1054 | last_index = min(last_index, maxindex); | ||
| 1055 | if (index > last_index) | ||
| 1056 | return 0; | ||
| 1057 | if (!nr_to_tag) | ||
| 1058 | return 0; | ||
| 1059 | if (!root_tag_get(root, iftag)) { | ||
| 1060 | *first_indexp = last_index + 1; | ||
| 1061 | return 0; | ||
| 1062 | } | ||
| 1063 | if (!radix_tree_is_internal_node(child)) { | ||
| 1064 | *first_indexp = last_index + 1; | ||
| 1065 | root_tag_set(root, settag); | ||
| 1066 | return 1; | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | node = entry_to_node(child); | ||
| 1070 | |||
| 1071 | for (;;) { | ||
| 1072 | unsigned offset = radix_tree_descend(node, &child, index); | ||
| 1073 | if (!child) | ||
| 1074 | goto next; | ||
| 1075 | if (!tag_get(node, iftag, offset)) | ||
| 1076 | goto next; | ||
| 1077 | /* Sibling slots never have tags set on them */ | ||
| 1078 | if (radix_tree_is_internal_node(child)) { | ||
| 1079 | node = entry_to_node(child); | ||
| 1080 | continue; | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | /* tag the leaf */ | ||
| 1084 | tagged++; | ||
| 1085 | tag_set(node, settag, offset); | ||
| 1086 | |||
| 1087 | /* walk back up the path tagging interior nodes */ | ||
| 1088 | parent = node; | ||
| 1089 | for (;;) { | ||
| 1090 | offset = parent->offset; | ||
| 1091 | parent = parent->parent; | ||
| 1092 | if (!parent) | ||
| 1093 | break; | ||
| 1094 | /* stop if we find a node with the tag already set */ | ||
| 1095 | if (tag_get(parent, settag, offset)) | ||
| 1096 | break; | ||
| 1097 | tag_set(parent, settag, offset); | ||
| 1098 | } | ||
| 1099 | next: | ||
| 1100 | /* Go to next entry in node */ | ||
| 1101 | index = ((index >> node->shift) + 1) << node->shift; | ||
| 1102 | /* Overflow can happen when last_index is ~0UL... */ | ||
| 1103 | if (index > last_index || !index) | ||
| 1104 | break; | ||
| 1105 | offset = (index >> node->shift) & RADIX_TREE_MAP_MASK; | ||
| 1106 | while (offset == 0) { | ||
| 1107 | /* | ||
| 1108 | * We've fully scanned this node. Go up. Because | ||
| 1109 | * last_index is guaranteed to be in the tree, what | ||
| 1110 | * we do below cannot wander astray. | ||
| 1111 | */ | ||
| 1112 | node = node->parent; | ||
| 1113 | offset = (index >> node->shift) & RADIX_TREE_MAP_MASK; | ||
| 1114 | } | ||
| 1115 | if (is_sibling_entry(node, node->slots[offset])) | ||
| 1116 | goto next; | ||
| 1117 | if (tagged >= nr_to_tag) | ||
| 1118 | break; | ||
| 1119 | } | ||
| 1120 | /* | ||
| 1121 | * We need not to tag the root tag if there is no tag which is set with | ||
| 1122 | * settag within the range from *first_indexp to last_index. | ||
| 1123 | */ | ||
| 1124 | if (tagged > 0) | ||
| 1125 | root_tag_set(root, settag); | ||
| 1126 | *first_indexp = index; | ||
| 1127 | |||
| 1128 | return tagged; | ||
| 1129 | } | ||
| 1130 | EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); | ||
| 1131 | |||
| 1132 | /** | ||
| 1133 | * radix_tree_gang_lookup - perform multiple lookup on a radix tree | 1659 | * radix_tree_gang_lookup - perform multiple lookup on a radix tree |
| 1134 | * @root: radix tree root | 1660 | * @root: radix tree root |
| 1135 | * @results: where the results of the lookup are placed | 1661 | * @results: where the results of the lookup are placed |
| @@ -1294,174 +1820,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, | |||
| 1294 | } | 1820 | } |
| 1295 | EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); | 1821 | EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); |
| 1296 | 1822 | ||
| 1297 | #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) | ||
| 1298 | #include <linux/sched.h> /* for cond_resched() */ | ||
| 1299 | |||
| 1300 | struct locate_info { | ||
| 1301 | unsigned long found_index; | ||
| 1302 | bool stop; | ||
| 1303 | }; | ||
| 1304 | |||
| 1305 | /* | ||
| 1306 | * This linear search is at present only useful to shmem_unuse_inode(). | ||
| 1307 | */ | ||
| 1308 | static unsigned long __locate(struct radix_tree_node *slot, void *item, | ||
| 1309 | unsigned long index, struct locate_info *info) | ||
| 1310 | { | ||
| 1311 | unsigned long i; | ||
| 1312 | |||
| 1313 | do { | ||
| 1314 | unsigned int shift = slot->shift; | ||
| 1315 | |||
| 1316 | for (i = (index >> shift) & RADIX_TREE_MAP_MASK; | ||
| 1317 | i < RADIX_TREE_MAP_SIZE; | ||
| 1318 | i++, index += (1UL << shift)) { | ||
| 1319 | struct radix_tree_node *node = | ||
| 1320 | rcu_dereference_raw(slot->slots[i]); | ||
| 1321 | if (node == RADIX_TREE_RETRY) | ||
| 1322 | goto out; | ||
| 1323 | if (!radix_tree_is_internal_node(node)) { | ||
| 1324 | if (node == item) { | ||
| 1325 | info->found_index = index; | ||
| 1326 | info->stop = true; | ||
| 1327 | goto out; | ||
| 1328 | } | ||
| 1329 | continue; | ||
| 1330 | } | ||
| 1331 | node = entry_to_node(node); | ||
| 1332 | if (is_sibling_entry(slot, node)) | ||
| 1333 | continue; | ||
| 1334 | slot = node; | ||
| 1335 | break; | ||
| 1336 | } | ||
| 1337 | } while (i < RADIX_TREE_MAP_SIZE); | ||
| 1338 | |||
| 1339 | out: | ||
| 1340 | if ((index == 0) && (i == RADIX_TREE_MAP_SIZE)) | ||
| 1341 | info->stop = true; | ||
| 1342 | return index; | ||
| 1343 | } | ||
| 1344 | |||
| 1345 | /** | ||
| 1346 | * radix_tree_locate_item - search through radix tree for item | ||
| 1347 | * @root: radix tree root | ||
| 1348 | * @item: item to be found | ||
| 1349 | * | ||
| 1350 | * Returns index where item was found, or -1 if not found. | ||
| 1351 | * Caller must hold no lock (since this time-consuming function needs | ||
| 1352 | * to be preemptible), and must check afterwards if item is still there. | ||
| 1353 | */ | ||
| 1354 | unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) | ||
| 1355 | { | ||
| 1356 | struct radix_tree_node *node; | ||
| 1357 | unsigned long max_index; | ||
| 1358 | unsigned long cur_index = 0; | ||
| 1359 | struct locate_info info = { | ||
| 1360 | .found_index = -1, | ||
| 1361 | .stop = false, | ||
| 1362 | }; | ||
| 1363 | |||
| 1364 | do { | ||
| 1365 | rcu_read_lock(); | ||
| 1366 | node = rcu_dereference_raw(root->rnode); | ||
| 1367 | if (!radix_tree_is_internal_node(node)) { | ||
| 1368 | rcu_read_unlock(); | ||
| 1369 | if (node == item) | ||
| 1370 | info.found_index = 0; | ||
| 1371 | break; | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | node = entry_to_node(node); | ||
| 1375 | |||
| 1376 | max_index = node_maxindex(node); | ||
| 1377 | if (cur_index > max_index) { | ||
| 1378 | rcu_read_unlock(); | ||
| 1379 | break; | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | cur_index = __locate(node, item, cur_index, &info); | ||
| 1383 | rcu_read_unlock(); | ||
| 1384 | cond_resched(); | ||
| 1385 | } while (!info.stop && cur_index <= max_index); | ||
| 1386 | |||
| 1387 | return info.found_index; | ||
| 1388 | } | ||
| 1389 | #else | ||
| 1390 | unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) | ||
| 1391 | { | ||
| 1392 | return -1; | ||
| 1393 | } | ||
| 1394 | #endif /* CONFIG_SHMEM && CONFIG_SWAP */ | ||
| 1395 | |||
| 1396 | /** | ||
| 1397 | * radix_tree_shrink - shrink radix tree to minimum height | ||
| 1398 | * @root radix tree root | ||
| 1399 | */ | ||
| 1400 | static inline bool radix_tree_shrink(struct radix_tree_root *root) | ||
| 1401 | { | ||
| 1402 | bool shrunk = false; | ||
| 1403 | |||
| 1404 | for (;;) { | ||
| 1405 | struct radix_tree_node *node = root->rnode; | ||
| 1406 | struct radix_tree_node *child; | ||
| 1407 | |||
| 1408 | if (!radix_tree_is_internal_node(node)) | ||
| 1409 | break; | ||
| 1410 | node = entry_to_node(node); | ||
| 1411 | |||
| 1412 | /* | ||
| 1413 | * The candidate node has more than one child, or its child | ||
| 1414 | * is not at the leftmost slot, or the child is a multiorder | ||
| 1415 | * entry, we cannot shrink. | ||
| 1416 | */ | ||
| 1417 | if (node->count != 1) | ||
| 1418 | break; | ||
| 1419 | child = node->slots[0]; | ||
| 1420 | if (!child) | ||
| 1421 | break; | ||
| 1422 | if (!radix_tree_is_internal_node(child) && node->shift) | ||
| 1423 | break; | ||
| 1424 | |||
| 1425 | if (radix_tree_is_internal_node(child)) | ||
| 1426 | entry_to_node(child)->parent = NULL; | ||
| 1427 | |||
| 1428 | /* | ||
| 1429 | * We don't need rcu_assign_pointer(), since we are simply | ||
| 1430 | * moving the node from one part of the tree to another: if it | ||
| 1431 | * was safe to dereference the old pointer to it | ||
| 1432 | * (node->slots[0]), it will be safe to dereference the new | ||
| 1433 | * one (root->rnode) as far as dependent read barriers go. | ||
| 1434 | */ | ||
| 1435 | root->rnode = child; | ||
| 1436 | |||
| 1437 | /* | ||
| 1438 | * We have a dilemma here. The node's slot[0] must not be | ||
| 1439 | * NULLed in case there are concurrent lookups expecting to | ||
| 1440 | * find the item. However if this was a bottom-level node, | ||
| 1441 | * then it may be subject to the slot pointer being visible | ||
| 1442 | * to callers dereferencing it. If item corresponding to | ||
| 1443 | * slot[0] is subsequently deleted, these callers would expect | ||
| 1444 | * their slot to become empty sooner or later. | ||
| 1445 | * | ||
| 1446 | * For example, lockless pagecache will look up a slot, deref | ||
| 1447 | * the page pointer, and if the page has 0 refcount it means it | ||
| 1448 | * was concurrently deleted from pagecache so try the deref | ||
| 1449 | * again. Fortunately there is already a requirement for logic | ||
| 1450 | * to retry the entire slot lookup -- the indirect pointer | ||
| 1451 | * problem (replacing direct root node with an indirect pointer | ||
| 1452 | * also results in a stale slot). So tag the slot as indirect | ||
| 1453 | * to force callers to retry. | ||
| 1454 | */ | ||
| 1455 | if (!radix_tree_is_internal_node(child)) | ||
| 1456 | node->slots[0] = RADIX_TREE_RETRY; | ||
| 1457 | |||
| 1458 | radix_tree_node_free(node); | ||
| 1459 | shrunk = true; | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | return shrunk; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | /** | 1823 | /** |
| 1466 | * __radix_tree_delete_node - try to free node after clearing a slot | 1824 | * __radix_tree_delete_node - try to free node after clearing a slot |
| 1467 | * @root: radix tree root | 1825 | * @root: radix tree root |
| @@ -1470,53 +1828,11 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) | |||
| 1470 | * After clearing the slot at @index in @node from radix tree | 1828 | * After clearing the slot at @index in @node from radix tree |
| 1471 | * rooted at @root, call this function to attempt freeing the | 1829 | * rooted at @root, call this function to attempt freeing the |
| 1472 | * node and shrinking the tree. | 1830 | * node and shrinking the tree. |
| 1473 | * | ||
| 1474 | * Returns %true if @node was freed, %false otherwise. | ||
| 1475 | */ | 1831 | */ |
| 1476 | bool __radix_tree_delete_node(struct radix_tree_root *root, | 1832 | void __radix_tree_delete_node(struct radix_tree_root *root, |
| 1477 | struct radix_tree_node *node) | 1833 | struct radix_tree_node *node) |
| 1478 | { | 1834 | { |
| 1479 | bool deleted = false; | 1835 | delete_node(root, node, NULL, NULL); |
| 1480 | |||
| 1481 | do { | ||
| 1482 | struct radix_tree_node *parent; | ||
| 1483 | |||
| 1484 | if (node->count) { | ||
| 1485 | if (node == entry_to_node(root->rnode)) | ||
| 1486 | deleted |= radix_tree_shrink(root); | ||
| 1487 | return deleted; | ||
| 1488 | } | ||
| 1489 | |||
| 1490 | parent = node->parent; | ||
| 1491 | if (parent) { | ||
| 1492 | parent->slots[node->offset] = NULL; | ||
| 1493 | parent->count--; | ||
| 1494 | } else { | ||
| 1495 | root_tag_clear_all(root); | ||
| 1496 | root->rnode = NULL; | ||
| 1497 | } | ||
| 1498 | |||
| 1499 | radix_tree_node_free(node); | ||
| 1500 | deleted = true; | ||
| 1501 | |||
| 1502 | node = parent; | ||
| 1503 | } while (node); | ||
| 1504 | |||
| 1505 | return deleted; | ||
| 1506 | } | ||
| 1507 | |||
| 1508 | static inline void delete_sibling_entries(struct radix_tree_node *node, | ||
| 1509 | void *ptr, unsigned offset) | ||
| 1510 | { | ||
| 1511 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 1512 | int i; | ||
| 1513 | for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { | ||
| 1514 | if (node->slots[offset + i] != ptr) | ||
| 1515 | break; | ||
| 1516 | node->slots[offset + i] = NULL; | ||
| 1517 | node->count--; | ||
| 1518 | } | ||
| 1519 | #endif | ||
| 1520 | } | 1836 | } |
| 1521 | 1837 | ||
| 1522 | /** | 1838 | /** |
| @@ -1558,11 +1874,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, | |||
| 1558 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | 1874 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
| 1559 | node_tag_clear(root, node, tag, offset); | 1875 | node_tag_clear(root, node, tag, offset); |
| 1560 | 1876 | ||
| 1561 | delete_sibling_entries(node, node_to_entry(slot), offset); | 1877 | __radix_tree_replace(root, node, slot, NULL, NULL, NULL); |
| 1562 | node->slots[offset] = NULL; | ||
| 1563 | node->count--; | ||
| 1564 | |||
| 1565 | __radix_tree_delete_node(root, node); | ||
| 1566 | 1878 | ||
| 1567 | return entry; | 1879 | return entry; |
| 1568 | } | 1880 | } |
| @@ -1642,32 +1954,31 @@ static __init void radix_tree_init_maxnodes(void) | |||
| 1642 | } | 1954 | } |
| 1643 | } | 1955 | } |
| 1644 | 1956 | ||
| 1645 | static int radix_tree_callback(struct notifier_block *nfb, | 1957 | static int radix_tree_cpu_dead(unsigned int cpu) |
| 1646 | unsigned long action, void *hcpu) | ||
| 1647 | { | 1958 | { |
| 1648 | int cpu = (long)hcpu; | ||
| 1649 | struct radix_tree_preload *rtp; | 1959 | struct radix_tree_preload *rtp; |
| 1650 | struct radix_tree_node *node; | 1960 | struct radix_tree_node *node; |
| 1651 | 1961 | ||
| 1652 | /* Free per-cpu pool of preloaded nodes */ | 1962 | /* Free per-cpu pool of preloaded nodes */ |
| 1653 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | 1963 | rtp = &per_cpu(radix_tree_preloads, cpu); |
| 1654 | rtp = &per_cpu(radix_tree_preloads, cpu); | 1964 | while (rtp->nr) { |
| 1655 | while (rtp->nr) { | 1965 | node = rtp->nodes; |
| 1656 | node = rtp->nodes; | 1966 | rtp->nodes = node->private_data; |
| 1657 | rtp->nodes = node->private_data; | 1967 | kmem_cache_free(radix_tree_node_cachep, node); |
| 1658 | kmem_cache_free(radix_tree_node_cachep, node); | 1968 | rtp->nr--; |
| 1659 | rtp->nr--; | ||
| 1660 | } | ||
| 1661 | } | 1969 | } |
| 1662 | return NOTIFY_OK; | 1970 | return 0; |
| 1663 | } | 1971 | } |
| 1664 | 1972 | ||
| 1665 | void __init radix_tree_init(void) | 1973 | void __init radix_tree_init(void) |
| 1666 | { | 1974 | { |
| 1975 | int ret; | ||
| 1667 | radix_tree_node_cachep = kmem_cache_create("radix_tree_node", | 1976 | radix_tree_node_cachep = kmem_cache_create("radix_tree_node", |
| 1668 | sizeof(struct radix_tree_node), 0, | 1977 | sizeof(struct radix_tree_node), 0, |
| 1669 | SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, | 1978 | SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, |
| 1670 | radix_tree_node_ctor); | 1979 | radix_tree_node_ctor); |
| 1671 | radix_tree_init_maxnodes(); | 1980 | radix_tree_init_maxnodes(); |
| 1672 | hotcpu_notifier(radix_tree_callback, 0); | 1981 | ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", |
| 1982 | NULL, radix_tree_cpu_dead); | ||
| 1983 | WARN_ON(ret < 0); | ||
| 1673 | } | 1984 | } |
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index 76734004358d..20bca3d44f67 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c | |||
| @@ -87,9 +87,57 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 87 | kernel_fpu_end(); | 87 | kernel_fpu_end(); |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | static void raid6_avx21_xor_syndrome(int disks, int start, int stop, | ||
| 91 | size_t bytes, void **ptrs) | ||
| 92 | { | ||
| 93 | u8 **dptr = (u8 **)ptrs; | ||
| 94 | u8 *p, *q; | ||
| 95 | int d, z, z0; | ||
| 96 | |||
| 97 | z0 = stop; /* P/Q right side optimization */ | ||
| 98 | p = dptr[disks-2]; /* XOR parity */ | ||
| 99 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 100 | |||
| 101 | kernel_fpu_begin(); | ||
| 102 | |||
| 103 | asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); | ||
| 104 | |||
| 105 | for (d = 0 ; d < bytes ; d += 32) { | ||
| 106 | asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); | ||
| 107 | asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); | ||
| 108 | asm volatile("vpxor %ymm4,%ymm2,%ymm2"); | ||
| 109 | /* P/Q data pages */ | ||
| 110 | for (z = z0-1 ; z >= start ; z--) { | ||
| 111 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 112 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 113 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 114 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 115 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 116 | asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); | ||
| 117 | asm volatile("vpxor %ymm5,%ymm2,%ymm2"); | ||
| 118 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 119 | } | ||
| 120 | /* P/Q left side optimization */ | ||
| 121 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 122 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 123 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 124 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 125 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 126 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 127 | } | ||
| 128 | asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); | ||
| 129 | /* Don't use movntdq for r/w memory area < cache line */ | ||
| 130 | asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d])); | ||
| 131 | asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d])); | ||
| 132 | } | ||
| 133 | |||
| 134 | asm volatile("sfence" : : : "memory"); | ||
| 135 | kernel_fpu_end(); | ||
| 136 | } | ||
| 137 | |||
| 90 | const struct raid6_calls raid6_avx2x1 = { | 138 | const struct raid6_calls raid6_avx2x1 = { |
| 91 | raid6_avx21_gen_syndrome, | 139 | raid6_avx21_gen_syndrome, |
| 92 | NULL, /* XOR not yet implemented */ | 140 | raid6_avx21_xor_syndrome, |
| 93 | raid6_have_avx2, | 141 | raid6_have_avx2, |
| 94 | "avx2x1", | 142 | "avx2x1", |
| 95 | 1 /* Has cache hints */ | 143 | 1 /* Has cache hints */ |
| @@ -149,9 +197,77 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 149 | kernel_fpu_end(); | 197 | kernel_fpu_end(); |
| 150 | } | 198 | } |
| 151 | 199 | ||
| 200 | static void raid6_avx22_xor_syndrome(int disks, int start, int stop, | ||
| 201 | size_t bytes, void **ptrs) | ||
| 202 | { | ||
| 203 | u8 **dptr = (u8 **)ptrs; | ||
| 204 | u8 *p, *q; | ||
| 205 | int d, z, z0; | ||
| 206 | |||
| 207 | z0 = stop; /* P/Q right side optimization */ | ||
| 208 | p = dptr[disks-2]; /* XOR parity */ | ||
| 209 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 210 | |||
| 211 | kernel_fpu_begin(); | ||
| 212 | |||
| 213 | asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); | ||
| 214 | |||
| 215 | for (d = 0 ; d < bytes ; d += 64) { | ||
| 216 | asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); | ||
| 217 | asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32])); | ||
| 218 | asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); | ||
| 219 | asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32])); | ||
| 220 | asm volatile("vpxor %ymm4,%ymm2,%ymm2"); | ||
| 221 | asm volatile("vpxor %ymm6,%ymm3,%ymm3"); | ||
| 222 | /* P/Q data pages */ | ||
| 223 | for (z = z0-1 ; z >= start ; z--) { | ||
| 224 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 225 | asm volatile("vpxor %ymm7,%ymm7,%ymm7"); | ||
| 226 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 227 | asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); | ||
| 228 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 229 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
| 230 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 231 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
| 232 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 233 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 234 | asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); | ||
| 235 | asm volatile("vmovdqa %0,%%ymm7" | ||
| 236 | :: "m" (dptr[z][d+32])); | ||
| 237 | asm volatile("vpxor %ymm5,%ymm2,%ymm2"); | ||
| 238 | asm volatile("vpxor %ymm7,%ymm3,%ymm3"); | ||
| 239 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 240 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 241 | } | ||
| 242 | /* P/Q left side optimization */ | ||
| 243 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 244 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 245 | asm volatile("vpxor %ymm7,%ymm7,%ymm7"); | ||
| 246 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 247 | asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); | ||
| 248 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 249 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
| 250 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 251 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
| 252 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 253 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 254 | } | ||
| 255 | asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); | ||
| 256 | asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32])); | ||
| 257 | /* Don't use movntdq for r/w memory area < cache line */ | ||
| 258 | asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d])); | ||
| 259 | asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32])); | ||
| 260 | asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d])); | ||
| 261 | asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32])); | ||
| 262 | } | ||
| 263 | |||
| 264 | asm volatile("sfence" : : : "memory"); | ||
| 265 | kernel_fpu_end(); | ||
| 266 | } | ||
| 267 | |||
| 152 | const struct raid6_calls raid6_avx2x2 = { | 268 | const struct raid6_calls raid6_avx2x2 = { |
| 153 | raid6_avx22_gen_syndrome, | 269 | raid6_avx22_gen_syndrome, |
| 154 | NULL, /* XOR not yet implemented */ | 270 | raid6_avx22_xor_syndrome, |
| 155 | raid6_have_avx2, | 271 | raid6_have_avx2, |
| 156 | "avx2x2", | 272 | "avx2x2", |
| 157 | 1 /* Has cache hints */ | 273 | 1 /* Has cache hints */ |
| @@ -242,9 +358,119 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 242 | kernel_fpu_end(); | 358 | kernel_fpu_end(); |
| 243 | } | 359 | } |
| 244 | 360 | ||
| 361 | static void raid6_avx24_xor_syndrome(int disks, int start, int stop, | ||
| 362 | size_t bytes, void **ptrs) | ||
| 363 | { | ||
| 364 | u8 **dptr = (u8 **)ptrs; | ||
| 365 | u8 *p, *q; | ||
| 366 | int d, z, z0; | ||
| 367 | |||
| 368 | z0 = stop; /* P/Q right side optimization */ | ||
| 369 | p = dptr[disks-2]; /* XOR parity */ | ||
| 370 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 371 | |||
| 372 | kernel_fpu_begin(); | ||
| 373 | |||
| 374 | asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0])); | ||
| 375 | |||
| 376 | for (d = 0 ; d < bytes ; d += 128) { | ||
| 377 | asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); | ||
| 378 | asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32])); | ||
| 379 | asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64])); | ||
| 380 | asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96])); | ||
| 381 | asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); | ||
| 382 | asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32])); | ||
| 383 | asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64])); | ||
| 384 | asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96])); | ||
| 385 | asm volatile("vpxor %ymm4,%ymm2,%ymm2"); | ||
| 386 | asm volatile("vpxor %ymm6,%ymm3,%ymm3"); | ||
| 387 | asm volatile("vpxor %ymm12,%ymm10,%ymm10"); | ||
| 388 | asm volatile("vpxor %ymm14,%ymm11,%ymm11"); | ||
| 389 | /* P/Q data pages */ | ||
| 390 | for (z = z0-1 ; z >= start ; z--) { | ||
| 391 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); | ||
| 392 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64])); | ||
| 393 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 394 | asm volatile("vpxor %ymm7,%ymm7,%ymm7"); | ||
| 395 | asm volatile("vpxor %ymm13,%ymm13,%ymm13"); | ||
| 396 | asm volatile("vpxor %ymm15,%ymm15,%ymm15"); | ||
| 397 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 398 | asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); | ||
| 399 | asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13"); | ||
| 400 | asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15"); | ||
| 401 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 402 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
| 403 | asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); | ||
| 404 | asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); | ||
| 405 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 406 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
| 407 | asm volatile("vpand %ymm0,%ymm13,%ymm13"); | ||
| 408 | asm volatile("vpand %ymm0,%ymm15,%ymm15"); | ||
| 409 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 410 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 411 | asm volatile("vpxor %ymm13,%ymm12,%ymm12"); | ||
| 412 | asm volatile("vpxor %ymm15,%ymm14,%ymm14"); | ||
| 413 | asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); | ||
| 414 | asm volatile("vmovdqa %0,%%ymm7" | ||
| 415 | :: "m" (dptr[z][d+32])); | ||
| 416 | asm volatile("vmovdqa %0,%%ymm13" | ||
| 417 | :: "m" (dptr[z][d+64])); | ||
| 418 | asm volatile("vmovdqa %0,%%ymm15" | ||
| 419 | :: "m" (dptr[z][d+96])); | ||
| 420 | asm volatile("vpxor %ymm5,%ymm2,%ymm2"); | ||
| 421 | asm volatile("vpxor %ymm7,%ymm3,%ymm3"); | ||
| 422 | asm volatile("vpxor %ymm13,%ymm10,%ymm10"); | ||
| 423 | asm volatile("vpxor %ymm15,%ymm11,%ymm11"); | ||
| 424 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 425 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 426 | asm volatile("vpxor %ymm13,%ymm12,%ymm12"); | ||
| 427 | asm volatile("vpxor %ymm15,%ymm14,%ymm14"); | ||
| 428 | } | ||
| 429 | asm volatile("prefetchnta %0" :: "m" (q[d])); | ||
| 430 | asm volatile("prefetchnta %0" :: "m" (q[d+64])); | ||
| 431 | /* P/Q left side optimization */ | ||
| 432 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 433 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 434 | asm volatile("vpxor %ymm7,%ymm7,%ymm7"); | ||
| 435 | asm volatile("vpxor %ymm13,%ymm13,%ymm13"); | ||
| 436 | asm volatile("vpxor %ymm15,%ymm15,%ymm15"); | ||
| 437 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 438 | asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); | ||
| 439 | asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13"); | ||
| 440 | asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15"); | ||
| 441 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 442 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
| 443 | asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); | ||
| 444 | asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); | ||
| 445 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 446 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
| 447 | asm volatile("vpand %ymm0,%ymm13,%ymm13"); | ||
| 448 | asm volatile("vpand %ymm0,%ymm15,%ymm15"); | ||
| 449 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 450 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 451 | asm volatile("vpxor %ymm13,%ymm12,%ymm12"); | ||
| 452 | asm volatile("vpxor %ymm15,%ymm14,%ymm14"); | ||
| 453 | } | ||
| 454 | asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); | ||
| 455 | asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); | ||
| 456 | asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); | ||
| 457 | asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); | ||
| 458 | asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); | ||
| 459 | asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32])); | ||
| 460 | asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64])); | ||
| 461 | asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96])); | ||
| 462 | asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); | ||
| 463 | asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); | ||
| 464 | asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); | ||
| 465 | asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); | ||
| 466 | } | ||
| 467 | asm volatile("sfence" : : : "memory"); | ||
| 468 | kernel_fpu_end(); | ||
| 469 | } | ||
| 470 | |||
| 245 | const struct raid6_calls raid6_avx2x4 = { | 471 | const struct raid6_calls raid6_avx2x4 = { |
| 246 | raid6_avx24_gen_syndrome, | 472 | raid6_avx24_gen_syndrome, |
| 247 | NULL, /* XOR not yet implemented */ | 473 | raid6_avx24_xor_syndrome, |
| 248 | raid6_have_avx2, | 474 | raid6_have_avx2, |
| 249 | "avx2x4", | 475 | "avx2x4", |
| 250 | 1 /* Has cache hints */ | 476 | 1 /* Has cache hints */ |
diff --git a/lib/rbtree.c b/lib/rbtree.c index eb8a19fee110..1f8b112a7c35 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c | |||
| @@ -296,11 +296,26 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, | |||
| 296 | * | 296 | * |
| 297 | * (p) (p) | 297 | * (p) (p) |
| 298 | * / \ / \ | 298 | * / \ / \ |
| 299 | * N S --> N Sl | 299 | * N S --> N sl |
| 300 | * / \ \ | 300 | * / \ \ |
| 301 | * sl Sr s | 301 | * sl Sr S |
| 302 | * \ | 302 | * \ |
| 303 | * Sr | 303 | * Sr |
| 304 | * | ||
| 305 | * Note: p might be red, and then both | ||
| 306 | * p and sl are red after rotation(which | ||
| 307 | * breaks property 4). This is fixed in | ||
| 308 | * Case 4 (in __rb_rotate_set_parents() | ||
| 309 | * which set sl the color of p | ||
| 310 | * and set p RB_BLACK) | ||
| 311 | * | ||
| 312 | * (p) (sl) | ||
| 313 | * / \ / \ | ||
| 314 | * N sl --> P S | ||
| 315 | * \ / \ | ||
| 316 | * S N Sr | ||
| 317 | * \ | ||
| 318 | * Sr | ||
| 304 | */ | 319 | */ |
| 305 | tmp1 = tmp2->rb_right; | 320 | tmp1 = tmp2->rb_right; |
| 306 | WRITE_ONCE(sibling->rb_left, tmp1); | 321 | WRITE_ONCE(sibling->rb_left, tmp1); |
| @@ -365,7 +380,7 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, | |||
| 365 | } | 380 | } |
| 366 | break; | 381 | break; |
| 367 | } | 382 | } |
| 368 | /* Case 3 - right rotate at sibling */ | 383 | /* Case 3 - left rotate at sibling */ |
| 369 | tmp1 = tmp2->rb_left; | 384 | tmp1 = tmp2->rb_left; |
| 370 | WRITE_ONCE(sibling->rb_right, tmp1); | 385 | WRITE_ONCE(sibling->rb_right, tmp1); |
| 371 | WRITE_ONCE(tmp2->rb_left, sibling); | 386 | WRITE_ONCE(tmp2->rb_left, sibling); |
| @@ -377,7 +392,7 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, | |||
| 377 | tmp1 = sibling; | 392 | tmp1 = sibling; |
| 378 | sibling = tmp2; | 393 | sibling = tmp2; |
| 379 | } | 394 | } |
| 380 | /* Case 4 - left rotate at parent + color flips */ | 395 | /* Case 4 - right rotate at parent + color flips */ |
| 381 | tmp2 = sibling->rb_right; | 396 | tmp2 = sibling->rb_right; |
| 382 | WRITE_ONCE(parent->rb_left, tmp2); | 397 | WRITE_ONCE(parent->rb_left, tmp2); |
| 383 | WRITE_ONCE(sibling->rb_right, parent); | 398 | WRITE_ONCE(sibling->rb_right, parent); |
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 22e13a0e19d7..cb1b54ee8527 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
| @@ -425,7 +425,8 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, | |||
| 425 | phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, | 425 | phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, |
| 426 | dma_addr_t tbl_dma_addr, | 426 | dma_addr_t tbl_dma_addr, |
| 427 | phys_addr_t orig_addr, size_t size, | 427 | phys_addr_t orig_addr, size_t size, |
| 428 | enum dma_data_direction dir) | 428 | enum dma_data_direction dir, |
| 429 | unsigned long attrs) | ||
| 429 | { | 430 | { |
| 430 | unsigned long flags; | 431 | unsigned long flags; |
| 431 | phys_addr_t tlb_addr; | 432 | phys_addr_t tlb_addr; |
| @@ -526,7 +527,8 @@ found: | |||
| 526 | */ | 527 | */ |
| 527 | for (i = 0; i < nslots; i++) | 528 | for (i = 0; i < nslots; i++) |
| 528 | io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); | 529 | io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); |
| 529 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) | 530 | if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && |
| 531 | (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) | ||
| 530 | swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); | 532 | swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); |
| 531 | 533 | ||
| 532 | return tlb_addr; | 534 | return tlb_addr; |
| @@ -539,18 +541,20 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); | |||
| 539 | 541 | ||
| 540 | static phys_addr_t | 542 | static phys_addr_t |
| 541 | map_single(struct device *hwdev, phys_addr_t phys, size_t size, | 543 | map_single(struct device *hwdev, phys_addr_t phys, size_t size, |
| 542 | enum dma_data_direction dir) | 544 | enum dma_data_direction dir, unsigned long attrs) |
| 543 | { | 545 | { |
| 544 | dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); | 546 | dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); |
| 545 | 547 | ||
| 546 | return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); | 548 | return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, |
| 549 | dir, attrs); | ||
| 547 | } | 550 | } |
| 548 | 551 | ||
| 549 | /* | 552 | /* |
| 550 | * dma_addr is the kernel virtual address of the bounce buffer to unmap. | 553 | * dma_addr is the kernel virtual address of the bounce buffer to unmap. |
| 551 | */ | 554 | */ |
| 552 | void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, | 555 | void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, |
| 553 | size_t size, enum dma_data_direction dir) | 556 | size_t size, enum dma_data_direction dir, |
| 557 | unsigned long attrs) | ||
| 554 | { | 558 | { |
| 555 | unsigned long flags; | 559 | unsigned long flags; |
| 556 | int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; | 560 | int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
| @@ -561,6 +565,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, | |||
| 561 | * First, sync the memory before unmapping the entry | 565 | * First, sync the memory before unmapping the entry |
| 562 | */ | 566 | */ |
| 563 | if (orig_addr != INVALID_PHYS_ADDR && | 567 | if (orig_addr != INVALID_PHYS_ADDR && |
| 568 | !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && | ||
| 564 | ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) | 569 | ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) |
| 565 | swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); | 570 | swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); |
| 566 | 571 | ||
| @@ -654,7 +659,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
| 654 | * GFP_DMA memory; fall back on map_single(), which | 659 | * GFP_DMA memory; fall back on map_single(), which |
| 655 | * will grab memory from the lowest available address range. | 660 | * will grab memory from the lowest available address range. |
| 656 | */ | 661 | */ |
| 657 | phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); | 662 | phys_addr_t paddr = map_single(hwdev, 0, size, |
| 663 | DMA_FROM_DEVICE, 0); | ||
| 658 | if (paddr == SWIOTLB_MAP_ERROR) | 664 | if (paddr == SWIOTLB_MAP_ERROR) |
| 659 | goto err_warn; | 665 | goto err_warn; |
| 660 | 666 | ||
| @@ -667,9 +673,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
| 667 | (unsigned long long)dma_mask, | 673 | (unsigned long long)dma_mask, |
| 668 | (unsigned long long)dev_addr); | 674 | (unsigned long long)dev_addr); |
| 669 | 675 | ||
| 670 | /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ | 676 | /* |
| 677 | * DMA_TO_DEVICE to avoid memcpy in unmap_single. | ||
| 678 | * The DMA_ATTR_SKIP_CPU_SYNC is optional. | ||
| 679 | */ | ||
| 671 | swiotlb_tbl_unmap_single(hwdev, paddr, | 680 | swiotlb_tbl_unmap_single(hwdev, paddr, |
| 672 | size, DMA_TO_DEVICE); | 681 | size, DMA_TO_DEVICE, |
| 682 | DMA_ATTR_SKIP_CPU_SYNC); | ||
| 673 | goto err_warn; | 683 | goto err_warn; |
| 674 | } | 684 | } |
| 675 | } | 685 | } |
| @@ -698,8 +708,12 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, | |||
| 698 | if (!is_swiotlb_buffer(paddr)) | 708 | if (!is_swiotlb_buffer(paddr)) |
| 699 | free_pages((unsigned long)vaddr, get_order(size)); | 709 | free_pages((unsigned long)vaddr, get_order(size)); |
| 700 | else | 710 | else |
| 701 | /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ | 711 | /* |
| 702 | swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); | 712 | * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. |
| 713 | * DMA_ATTR_SKIP_CPU_SYNC is optional. | ||
| 714 | */ | ||
| 715 | swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE, | ||
| 716 | DMA_ATTR_SKIP_CPU_SYNC); | ||
| 703 | } | 717 | } |
| 704 | EXPORT_SYMBOL(swiotlb_free_coherent); | 718 | EXPORT_SYMBOL(swiotlb_free_coherent); |
| 705 | 719 | ||
| @@ -714,8 +728,8 @@ swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, | |||
| 714 | * When the mapping is small enough return a static buffer to limit | 728 | * When the mapping is small enough return a static buffer to limit |
| 715 | * the damage, or panic when the transfer is too big. | 729 | * the damage, or panic when the transfer is too big. |
| 716 | */ | 730 | */ |
| 717 | printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " | 731 | dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n", |
| 718 | "device %s\n", size, dev ? dev_name(dev) : "?"); | 732 | size); |
| 719 | 733 | ||
| 720 | if (size <= io_tlb_overflow || !do_panic) | 734 | if (size <= io_tlb_overflow || !do_panic) |
| 721 | return; | 735 | return; |
| @@ -755,7 +769,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, | |||
| 755 | trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); | 769 | trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); |
| 756 | 770 | ||
| 757 | /* Oh well, have to allocate and map a bounce buffer. */ | 771 | /* Oh well, have to allocate and map a bounce buffer. */ |
| 758 | map = map_single(dev, phys, size, dir); | 772 | map = map_single(dev, phys, size, dir, attrs); |
| 759 | if (map == SWIOTLB_MAP_ERROR) { | 773 | if (map == SWIOTLB_MAP_ERROR) { |
| 760 | swiotlb_full(dev, size, dir, 1); | 774 | swiotlb_full(dev, size, dir, 1); |
| 761 | return phys_to_dma(dev, io_tlb_overflow_buffer); | 775 | return phys_to_dma(dev, io_tlb_overflow_buffer); |
| @@ -764,12 +778,13 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, | |||
| 764 | dev_addr = phys_to_dma(dev, map); | 778 | dev_addr = phys_to_dma(dev, map); |
| 765 | 779 | ||
| 766 | /* Ensure that the address returned is DMA'ble */ | 780 | /* Ensure that the address returned is DMA'ble */ |
| 767 | if (!dma_capable(dev, dev_addr, size)) { | 781 | if (dma_capable(dev, dev_addr, size)) |
| 768 | swiotlb_tbl_unmap_single(dev, map, size, dir); | 782 | return dev_addr; |
| 769 | return phys_to_dma(dev, io_tlb_overflow_buffer); | ||
| 770 | } | ||
| 771 | 783 | ||
| 772 | return dev_addr; | 784 | attrs |= DMA_ATTR_SKIP_CPU_SYNC; |
| 785 | swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); | ||
| 786 | |||
| 787 | return phys_to_dma(dev, io_tlb_overflow_buffer); | ||
| 773 | } | 788 | } |
| 774 | EXPORT_SYMBOL_GPL(swiotlb_map_page); | 789 | EXPORT_SYMBOL_GPL(swiotlb_map_page); |
| 775 | 790 | ||
| @@ -782,14 +797,15 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); | |||
| 782 | * whatever the device wrote there. | 797 | * whatever the device wrote there. |
| 783 | */ | 798 | */ |
| 784 | static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, | 799 | static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, |
| 785 | size_t size, enum dma_data_direction dir) | 800 | size_t size, enum dma_data_direction dir, |
| 801 | unsigned long attrs) | ||
| 786 | { | 802 | { |
| 787 | phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); | 803 | phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
| 788 | 804 | ||
| 789 | BUG_ON(dir == DMA_NONE); | 805 | BUG_ON(dir == DMA_NONE); |
| 790 | 806 | ||
| 791 | if (is_swiotlb_buffer(paddr)) { | 807 | if (is_swiotlb_buffer(paddr)) { |
| 792 | swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); | 808 | swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); |
| 793 | return; | 809 | return; |
| 794 | } | 810 | } |
| 795 | 811 | ||
| @@ -809,7 +825,7 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, | |||
| 809 | size_t size, enum dma_data_direction dir, | 825 | size_t size, enum dma_data_direction dir, |
| 810 | unsigned long attrs) | 826 | unsigned long attrs) |
| 811 | { | 827 | { |
| 812 | unmap_single(hwdev, dev_addr, size, dir); | 828 | unmap_single(hwdev, dev_addr, size, dir, attrs); |
| 813 | } | 829 | } |
| 814 | EXPORT_SYMBOL_GPL(swiotlb_unmap_page); | 830 | EXPORT_SYMBOL_GPL(swiotlb_unmap_page); |
| 815 | 831 | ||
| @@ -891,11 +907,12 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, | |||
| 891 | if (swiotlb_force || | 907 | if (swiotlb_force || |
| 892 | !dma_capable(hwdev, dev_addr, sg->length)) { | 908 | !dma_capable(hwdev, dev_addr, sg->length)) { |
| 893 | phys_addr_t map = map_single(hwdev, sg_phys(sg), | 909 | phys_addr_t map = map_single(hwdev, sg_phys(sg), |
| 894 | sg->length, dir); | 910 | sg->length, dir, attrs); |
| 895 | if (map == SWIOTLB_MAP_ERROR) { | 911 | if (map == SWIOTLB_MAP_ERROR) { |
| 896 | /* Don't panic here, we expect map_sg users | 912 | /* Don't panic here, we expect map_sg users |
| 897 | to do proper error handling. */ | 913 | to do proper error handling. */ |
| 898 | swiotlb_full(hwdev, sg->length, dir, 0); | 914 | swiotlb_full(hwdev, sg->length, dir, 0); |
| 915 | attrs |= DMA_ATTR_SKIP_CPU_SYNC; | ||
| 899 | swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, | 916 | swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, |
| 900 | attrs); | 917 | attrs); |
| 901 | sg_dma_len(sgl) = 0; | 918 | sg_dma_len(sgl) = 0; |
| @@ -910,14 +927,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, | |||
| 910 | } | 927 | } |
| 911 | EXPORT_SYMBOL(swiotlb_map_sg_attrs); | 928 | EXPORT_SYMBOL(swiotlb_map_sg_attrs); |
| 912 | 929 | ||
| 913 | int | ||
| 914 | swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, | ||
| 915 | enum dma_data_direction dir) | ||
| 916 | { | ||
| 917 | return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, 0); | ||
| 918 | } | ||
| 919 | EXPORT_SYMBOL(swiotlb_map_sg); | ||
| 920 | |||
| 921 | /* | 930 | /* |
| 922 | * Unmap a set of streaming mode DMA translations. Again, cpu read rules | 931 | * Unmap a set of streaming mode DMA translations. Again, cpu read rules |
| 923 | * concerning calls here are the same as for swiotlb_unmap_page() above. | 932 | * concerning calls here are the same as for swiotlb_unmap_page() above. |
| @@ -933,19 +942,11 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, | |||
| 933 | BUG_ON(dir == DMA_NONE); | 942 | BUG_ON(dir == DMA_NONE); |
| 934 | 943 | ||
| 935 | for_each_sg(sgl, sg, nelems, i) | 944 | for_each_sg(sgl, sg, nelems, i) |
| 936 | unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); | 945 | unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, |
| 937 | 946 | attrs); | |
| 938 | } | 947 | } |
| 939 | EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); | 948 | EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); |
| 940 | 949 | ||
| 941 | void | ||
| 942 | swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, | ||
| 943 | enum dma_data_direction dir) | ||
| 944 | { | ||
| 945 | return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, 0); | ||
| 946 | } | ||
| 947 | EXPORT_SYMBOL(swiotlb_unmap_sg); | ||
| 948 | |||
| 949 | /* | 950 | /* |
| 950 | * Make physical memory consistent for a set of streaming mode DMA translations | 951 | * Make physical memory consistent for a set of streaming mode DMA translations |
| 951 | * after a transfer. | 952 | * after a transfer. |
diff --git a/lib/timerqueue.c b/lib/timerqueue.c index 782ae8ca2c06..adc6ee0a5126 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c | |||
| @@ -48,7 +48,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) | |||
| 48 | while (*p) { | 48 | while (*p) { |
| 49 | parent = *p; | 49 | parent = *p; |
| 50 | ptr = rb_entry(parent, struct timerqueue_node, node); | 50 | ptr = rb_entry(parent, struct timerqueue_node, node); |
| 51 | if (node->expires.tv64 < ptr->expires.tv64) | 51 | if (node->expires < ptr->expires) |
| 52 | p = &(*p)->rb_left; | 52 | p = &(*p)->rb_left; |
| 53 | else | 53 | else |
| 54 | p = &(*p)->rb_right; | 54 | p = &(*p)->rb_right; |
| @@ -56,7 +56,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) | |||
| 56 | rb_link_node(&node->node, parent, p); | 56 | rb_link_node(&node->node, parent, p); |
| 57 | rb_insert_color(&node->node, &head->head); | 57 | rb_insert_color(&node->node, &head->head); |
| 58 | 58 | ||
| 59 | if (!head->next || node->expires.tv64 < head->next->expires.tv64) { | 59 | if (!head->next || node->expires < head->next->expires) { |
| 60 | head->next = node; | 60 | head->next = node; |
| 61 | return true; | 61 | return true; |
| 62 | } | 62 | } |
