diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Kconfig.debug | 34 | ||||
| -rw-r--r-- | lib/Kconfig.ubsan | 3 | ||||
| -rw-r--r-- | lib/Makefile | 1 | ||||
| -rw-r--r-- | lib/cpu-notifier-error-inject.c | 84 | ||||
| -rw-r--r-- | lib/extable.c | 2 | ||||
| -rw-r--r-- | lib/iov_iter.c | 149 | ||||
| -rw-r--r-- | lib/kstrtox.c | 2 | ||||
| -rw-r--r-- | lib/radix-tree.c | 891 | ||||
| -rw-r--r-- | lib/raid6/avx2.c | 232 | ||||
| -rw-r--r-- | lib/timerqueue.c | 4 |
10 files changed, 920 insertions, 482 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e6327d102184..b06848a104e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -26,7 +26,7 @@ config CONSOLE_LOGLEVEL_DEFAULT | |||
| 26 | the kernel bootargs. loglevel=<x> continues to override whatever | 26 | the kernel bootargs. loglevel=<x> continues to override whatever |
| 27 | value is specified here as well. | 27 | value is specified here as well. |
| 28 | 28 | ||
| 29 | Note: This does not affect the log level of un-prefixed prink() | 29 | Note: This does not affect the log level of un-prefixed printk() |
| 30 | usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT | 30 | usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT |
| 31 | option. | 31 | option. |
| 32 | 32 | ||
| @@ -194,8 +194,8 @@ config GDB_SCRIPTS | |||
| 194 | build directory. If you load vmlinux into gdb, the helper | 194 | build directory. If you load vmlinux into gdb, the helper |
| 195 | scripts will be automatically imported by gdb as well, and | 195 | scripts will be automatically imported by gdb as well, and |
| 196 | additional functions are available to analyze a Linux kernel | 196 | additional functions are available to analyze a Linux kernel |
| 197 | instance. See Documentation/gdb-kernel-debugging.txt for further | 197 | instance. See Documentation/dev-tools/gdb-kernel-debugging.rst |
| 198 | details. | 198 | for further details. |
| 199 | 199 | ||
| 200 | config ENABLE_WARN_DEPRECATED | 200 | config ENABLE_WARN_DEPRECATED |
| 201 | bool "Enable __deprecated logic" | 201 | bool "Enable __deprecated logic" |
| @@ -542,7 +542,7 @@ config DEBUG_KMEMLEAK | |||
| 542 | difference being that the orphan objects are not freed but | 542 | difference being that the orphan objects are not freed but |
| 543 | only shown in /sys/kernel/debug/kmemleak. Enabling this | 543 | only shown in /sys/kernel/debug/kmemleak. Enabling this |
| 544 | feature will introduce an overhead to memory | 544 | feature will introduce an overhead to memory |
| 545 | allocations. See Documentation/kmemleak.txt for more | 545 | allocations. See Documentation/dev-tools/kmemleak.rst for more |
| 546 | details. | 546 | details. |
| 547 | 547 | ||
| 548 | Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances | 548 | Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances |
| @@ -739,7 +739,7 @@ config KCOV | |||
| 739 | different machines and across reboots. If you need stable PC values, | 739 | different machines and across reboots. If you need stable PC values, |
| 740 | disable RANDOMIZE_BASE. | 740 | disable RANDOMIZE_BASE. |
| 741 | 741 | ||
| 742 | For more details, see Documentation/kcov.txt. | 742 | For more details, see Documentation/dev-tools/kcov.rst. |
| 743 | 743 | ||
| 744 | config KCOV_INSTRUMENT_ALL | 744 | config KCOV_INSTRUMENT_ALL |
| 745 | bool "Instrument all code by default" | 745 | bool "Instrument all code by default" |
| @@ -1538,30 +1538,6 @@ config NOTIFIER_ERROR_INJECTION | |||
| 1538 | 1538 | ||
| 1539 | Say N if unsure. | 1539 | Say N if unsure. |
| 1540 | 1540 | ||
| 1541 | config CPU_NOTIFIER_ERROR_INJECT | ||
| 1542 | tristate "CPU notifier error injection module" | ||
| 1543 | depends on HOTPLUG_CPU && NOTIFIER_ERROR_INJECTION | ||
| 1544 | help | ||
| 1545 | This option provides a kernel module that can be used to test | ||
| 1546 | the error handling of the cpu notifiers by injecting artificial | ||
| 1547 | errors to CPU notifier chain callbacks. It is controlled through | ||
| 1548 | debugfs interface under /sys/kernel/debug/notifier-error-inject/cpu | ||
| 1549 | |||
| 1550 | If the notifier call chain should be failed with some events | ||
| 1551 | notified, write the error code to "actions/<notifier event>/error". | ||
| 1552 | |||
| 1553 | Example: Inject CPU offline error (-1 == -EPERM) | ||
| 1554 | |||
| 1555 | # cd /sys/kernel/debug/notifier-error-inject/cpu | ||
| 1556 | # echo -1 > actions/CPU_DOWN_PREPARE/error | ||
| 1557 | # echo 0 > /sys/devices/system/cpu/cpu1/online | ||
| 1558 | bash: echo: write error: Operation not permitted | ||
| 1559 | |||
| 1560 | To compile this code as a module, choose M here: the module will | ||
| 1561 | be called cpu-notifier-error-inject. | ||
| 1562 | |||
| 1563 | If unsure, say N. | ||
| 1564 | |||
| 1565 | config PM_NOTIFIER_ERROR_INJECT | 1541 | config PM_NOTIFIER_ERROR_INJECT |
| 1566 | tristate "PM notifier error injection module" | 1542 | tristate "PM notifier error injection module" |
| 1567 | depends on PM && NOTIFIER_ERROR_INJECTION | 1543 | depends on PM && NOTIFIER_ERROR_INJECTION |
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index bc6e651df68c..a669c193b878 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan | |||
| @@ -10,7 +10,8 @@ config UBSAN | |||
| 10 | This option enables undefined behaviour sanity checker | 10 | This option enables undefined behaviour sanity checker |
| 11 | Compile-time instrumentation is used to detect various undefined | 11 | Compile-time instrumentation is used to detect various undefined |
| 12 | behaviours in runtime. Various types of checks may be enabled | 12 | behaviours in runtime. Various types of checks may be enabled |
| 13 | via boot parameter ubsan_handle (see: Documentation/ubsan.txt). | 13 | via boot parameter ubsan_handle |
| 14 | (see: Documentation/dev-tools/ubsan.rst). | ||
| 14 | 15 | ||
| 15 | config UBSAN_SANITIZE_ALL | 16 | config UBSAN_SANITIZE_ALL |
| 16 | bool "Enable instrumentation for the entire kernel" | 17 | bool "Enable instrumentation for the entire kernel" |
diff --git a/lib/Makefile b/lib/Makefile index 50144a3aeebd..bc4073a8cd08 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
| @@ -128,7 +128,6 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o | |||
| 128 | obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o | 128 | obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o |
| 129 | obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o | 129 | obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o |
| 130 | obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o | 130 | obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o |
| 131 | obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o | ||
| 132 | obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o | 131 | obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o |
| 133 | obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o | 132 | obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o |
| 134 | obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o | 133 | obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o |
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c deleted file mode 100644 index 0e2c9a1e958a..000000000000 --- a/lib/cpu-notifier-error-inject.c +++ /dev/null | |||
| @@ -1,84 +0,0 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/cpu.h> | ||
| 4 | |||
| 5 | #include "notifier-error-inject.h" | ||
| 6 | |||
| 7 | static int priority; | ||
| 8 | module_param(priority, int, 0); | ||
| 9 | MODULE_PARM_DESC(priority, "specify cpu notifier priority"); | ||
| 10 | |||
| 11 | #define UP_PREPARE 0 | ||
| 12 | #define UP_PREPARE_FROZEN 0 | ||
| 13 | #define DOWN_PREPARE 0 | ||
| 14 | #define DOWN_PREPARE_FROZEN 0 | ||
| 15 | |||
| 16 | static struct notifier_err_inject cpu_notifier_err_inject = { | ||
| 17 | .actions = { | ||
| 18 | { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) }, | ||
| 19 | { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) }, | ||
| 20 | { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) }, | ||
| 21 | { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) }, | ||
| 22 | {} | ||
| 23 | } | ||
| 24 | }; | ||
| 25 | |||
| 26 | static int notf_err_handle(struct notifier_err_inject_action *action) | ||
| 27 | { | ||
| 28 | int ret; | ||
| 29 | |||
| 30 | ret = action->error; | ||
| 31 | if (ret) | ||
| 32 | pr_info("Injecting error (%d) to %s\n", ret, action->name); | ||
| 33 | return ret; | ||
| 34 | } | ||
| 35 | |||
| 36 | static int notf_err_inj_up_prepare(unsigned int cpu) | ||
| 37 | { | ||
| 38 | if (!cpuhp_tasks_frozen) | ||
| 39 | return notf_err_handle(&cpu_notifier_err_inject.actions[0]); | ||
| 40 | else | ||
| 41 | return notf_err_handle(&cpu_notifier_err_inject.actions[1]); | ||
| 42 | } | ||
| 43 | |||
| 44 | static int notf_err_inj_dead(unsigned int cpu) | ||
| 45 | { | ||
| 46 | if (!cpuhp_tasks_frozen) | ||
| 47 | return notf_err_handle(&cpu_notifier_err_inject.actions[2]); | ||
| 48 | else | ||
| 49 | return notf_err_handle(&cpu_notifier_err_inject.actions[3]); | ||
| 50 | } | ||
| 51 | |||
| 52 | static struct dentry *dir; | ||
| 53 | |||
| 54 | static int err_inject_init(void) | ||
| 55 | { | ||
| 56 | int err; | ||
| 57 | |||
| 58 | dir = notifier_err_inject_init("cpu", notifier_err_inject_dir, | ||
| 59 | &cpu_notifier_err_inject, priority); | ||
| 60 | if (IS_ERR(dir)) | ||
| 61 | return PTR_ERR(dir); | ||
| 62 | |||
| 63 | err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE, | ||
| 64 | "cpu-err-notif:prepare", | ||
| 65 | notf_err_inj_up_prepare, | ||
| 66 | notf_err_inj_dead); | ||
| 67 | if (err) | ||
| 68 | debugfs_remove_recursive(dir); | ||
| 69 | |||
| 70 | return err; | ||
| 71 | } | ||
| 72 | |||
| 73 | static void err_inject_exit(void) | ||
| 74 | { | ||
| 75 | cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE); | ||
| 76 | debugfs_remove_recursive(dir); | ||
| 77 | } | ||
| 78 | |||
| 79 | module_init(err_inject_init); | ||
| 80 | module_exit(err_inject_exit); | ||
| 81 | |||
| 82 | MODULE_DESCRIPTION("CPU notifier error injection module"); | ||
| 83 | MODULE_LICENSE("GPL"); | ||
| 84 | MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); | ||
diff --git a/lib/extable.c b/lib/extable.c index 0be02ad561e9..62968daa66a9 100644 --- a/lib/extable.c +++ b/lib/extable.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
| 14 | #include <linux/sort.h> | 14 | #include <linux/sort.h> |
| 15 | #include <asm/uaccess.h> | 15 | #include <linux/uaccess.h> |
| 16 | 16 | ||
| 17 | #ifndef ARCH_HAS_RELATIVE_EXTABLE | 17 | #ifndef ARCH_HAS_RELATIVE_EXTABLE |
| 18 | #define ex_to_insn(x) ((x)->insn) | 18 | #define ex_to_insn(x) ((x)->insn) |
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 691a52b634fe..25f572303801 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c | |||
| @@ -73,19 +73,21 @@ | |||
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | #define iterate_all_kinds(i, n, v, I, B, K) { \ | 75 | #define iterate_all_kinds(i, n, v, I, B, K) { \ |
| 76 | size_t skip = i->iov_offset; \ | 76 | if (likely(n)) { \ |
| 77 | if (unlikely(i->type & ITER_BVEC)) { \ | 77 | size_t skip = i->iov_offset; \ |
| 78 | struct bio_vec v; \ | 78 | if (unlikely(i->type & ITER_BVEC)) { \ |
| 79 | struct bvec_iter __bi; \ | 79 | struct bio_vec v; \ |
| 80 | iterate_bvec(i, n, v, __bi, skip, (B)) \ | 80 | struct bvec_iter __bi; \ |
| 81 | } else if (unlikely(i->type & ITER_KVEC)) { \ | 81 | iterate_bvec(i, n, v, __bi, skip, (B)) \ |
| 82 | const struct kvec *kvec; \ | 82 | } else if (unlikely(i->type & ITER_KVEC)) { \ |
| 83 | struct kvec v; \ | 83 | const struct kvec *kvec; \ |
| 84 | iterate_kvec(i, n, v, kvec, skip, (K)) \ | 84 | struct kvec v; \ |
| 85 | } else { \ | 85 | iterate_kvec(i, n, v, kvec, skip, (K)) \ |
| 86 | const struct iovec *iov; \ | 86 | } else { \ |
| 87 | struct iovec v; \ | 87 | const struct iovec *iov; \ |
| 88 | iterate_iovec(i, n, v, iov, skip, (I)) \ | 88 | struct iovec v; \ |
| 89 | iterate_iovec(i, n, v, iov, skip, (I)) \ | ||
| 90 | } \ | ||
| 89 | } \ | 91 | } \ |
| 90 | } | 92 | } |
| 91 | 93 | ||
| @@ -569,6 +571,31 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | |||
| 569 | } | 571 | } |
| 570 | EXPORT_SYMBOL(copy_from_iter); | 572 | EXPORT_SYMBOL(copy_from_iter); |
| 571 | 573 | ||
| 574 | bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) | ||
| 575 | { | ||
| 576 | char *to = addr; | ||
| 577 | if (unlikely(i->type & ITER_PIPE)) { | ||
| 578 | WARN_ON(1); | ||
| 579 | return false; | ||
| 580 | } | ||
| 581 | if (unlikely(i->count < bytes)) | ||
| 582 | return false; | ||
| 583 | |||
| 584 | iterate_all_kinds(i, bytes, v, ({ | ||
| 585 | if (__copy_from_user((to += v.iov_len) - v.iov_len, | ||
| 586 | v.iov_base, v.iov_len)) | ||
| 587 | return false; | ||
| 588 | 0;}), | ||
| 589 | memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, | ||
| 590 | v.bv_offset, v.bv_len), | ||
| 591 | memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) | ||
| 592 | ) | ||
| 593 | |||
| 594 | iov_iter_advance(i, bytes); | ||
| 595 | return true; | ||
| 596 | } | ||
| 597 | EXPORT_SYMBOL(copy_from_iter_full); | ||
| 598 | |||
| 572 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | 599 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
| 573 | { | 600 | { |
| 574 | char *to = addr; | 601 | char *to = addr; |
| @@ -588,6 +615,30 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | |||
| 588 | } | 615 | } |
| 589 | EXPORT_SYMBOL(copy_from_iter_nocache); | 616 | EXPORT_SYMBOL(copy_from_iter_nocache); |
| 590 | 617 | ||
| 618 | bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) | ||
| 619 | { | ||
| 620 | char *to = addr; | ||
| 621 | if (unlikely(i->type & ITER_PIPE)) { | ||
| 622 | WARN_ON(1); | ||
| 623 | return false; | ||
| 624 | } | ||
| 625 | if (unlikely(i->count < bytes)) | ||
| 626 | return false; | ||
| 627 | iterate_all_kinds(i, bytes, v, ({ | ||
| 628 | if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len, | ||
| 629 | v.iov_base, v.iov_len)) | ||
| 630 | return false; | ||
| 631 | 0;}), | ||
| 632 | memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, | ||
| 633 | v.bv_offset, v.bv_len), | ||
| 634 | memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) | ||
| 635 | ) | ||
| 636 | |||
| 637 | iov_iter_advance(i, bytes); | ||
| 638 | return true; | ||
| 639 | } | ||
| 640 | EXPORT_SYMBOL(copy_from_iter_full_nocache); | ||
| 641 | |||
| 591 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | 642 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, |
| 592 | struct iov_iter *i) | 643 | struct iov_iter *i) |
| 593 | { | 644 | { |
| @@ -788,11 +839,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) | |||
| 788 | unsigned long res = 0; | 839 | unsigned long res = 0; |
| 789 | size_t size = i->count; | 840 | size_t size = i->count; |
| 790 | 841 | ||
| 791 | if (!size) | ||
| 792 | return 0; | ||
| 793 | |||
| 794 | if (unlikely(i->type & ITER_PIPE)) { | 842 | if (unlikely(i->type & ITER_PIPE)) { |
| 795 | if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) | 843 | if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) |
| 796 | return size | i->iov_offset; | 844 | return size | i->iov_offset; |
| 797 | return size; | 845 | return size; |
| 798 | } | 846 | } |
| @@ -807,10 +855,8 @@ EXPORT_SYMBOL(iov_iter_alignment); | |||
| 807 | 855 | ||
| 808 | unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | 856 | unsigned long iov_iter_gap_alignment(const struct iov_iter *i) |
| 809 | { | 857 | { |
| 810 | unsigned long res = 0; | 858 | unsigned long res = 0; |
| 811 | size_t size = i->count; | 859 | size_t size = i->count; |
| 812 | if (!size) | ||
| 813 | return 0; | ||
| 814 | 860 | ||
| 815 | if (unlikely(i->type & ITER_PIPE)) { | 861 | if (unlikely(i->type & ITER_PIPE)) { |
| 816 | WARN_ON(1); | 862 | WARN_ON(1); |
| @@ -825,7 +871,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | |||
| 825 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | | 871 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | |
| 826 | (size != v.iov_len ? size : 0)) | 872 | (size != v.iov_len ? size : 0)) |
| 827 | ); | 873 | ); |
| 828 | return res; | 874 | return res; |
| 829 | } | 875 | } |
| 830 | EXPORT_SYMBOL(iov_iter_gap_alignment); | 876 | EXPORT_SYMBOL(iov_iter_gap_alignment); |
| 831 | 877 | ||
| @@ -859,6 +905,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i, | |||
| 859 | size_t capacity; | 905 | size_t capacity; |
| 860 | int idx; | 906 | int idx; |
| 861 | 907 | ||
| 908 | if (!maxsize) | ||
| 909 | return 0; | ||
| 910 | |||
| 862 | if (!sanity(i)) | 911 | if (!sanity(i)) |
| 863 | return -EFAULT; | 912 | return -EFAULT; |
| 864 | 913 | ||
| @@ -877,9 +926,6 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, | |||
| 877 | if (maxsize > i->count) | 926 | if (maxsize > i->count) |
| 878 | maxsize = i->count; | 927 | maxsize = i->count; |
| 879 | 928 | ||
| 880 | if (!maxsize) | ||
| 881 | return 0; | ||
| 882 | |||
| 883 | if (unlikely(i->type & ITER_PIPE)) | 929 | if (unlikely(i->type & ITER_PIPE)) |
| 884 | return pipe_get_pages(i, pages, maxsize, maxpages, start); | 930 | return pipe_get_pages(i, pages, maxsize, maxpages, start); |
| 885 | iterate_all_kinds(i, maxsize, v, ({ | 931 | iterate_all_kinds(i, maxsize, v, ({ |
| @@ -926,6 +972,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, | |||
| 926 | int idx; | 972 | int idx; |
| 927 | int npages; | 973 | int npages; |
| 928 | 974 | ||
| 975 | if (!maxsize) | ||
| 976 | return 0; | ||
| 977 | |||
| 929 | if (!sanity(i)) | 978 | if (!sanity(i)) |
| 930 | return -EFAULT; | 979 | return -EFAULT; |
| 931 | 980 | ||
| @@ -957,9 +1006,6 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | |||
| 957 | if (maxsize > i->count) | 1006 | if (maxsize > i->count) |
| 958 | maxsize = i->count; | 1007 | maxsize = i->count; |
| 959 | 1008 | ||
| 960 | if (!maxsize) | ||
| 961 | return 0; | ||
| 962 | |||
| 963 | if (unlikely(i->type & ITER_PIPE)) | 1009 | if (unlikely(i->type & ITER_PIPE)) |
| 964 | return pipe_get_pages_alloc(i, pages, maxsize, start); | 1010 | return pipe_get_pages_alloc(i, pages, maxsize, start); |
| 965 | iterate_all_kinds(i, maxsize, v, ({ | 1011 | iterate_all_kinds(i, maxsize, v, ({ |
| @@ -1009,7 +1055,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | |||
| 1009 | } | 1055 | } |
| 1010 | iterate_and_advance(i, bytes, v, ({ | 1056 | iterate_and_advance(i, bytes, v, ({ |
| 1011 | int err = 0; | 1057 | int err = 0; |
| 1012 | next = csum_and_copy_from_user(v.iov_base, | 1058 | next = csum_and_copy_from_user(v.iov_base, |
| 1013 | (to += v.iov_len) - v.iov_len, | 1059 | (to += v.iov_len) - v.iov_len, |
| 1014 | v.iov_len, 0, &err); | 1060 | v.iov_len, 0, &err); |
| 1015 | if (!err) { | 1061 | if (!err) { |
| @@ -1038,6 +1084,51 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | |||
| 1038 | } | 1084 | } |
| 1039 | EXPORT_SYMBOL(csum_and_copy_from_iter); | 1085 | EXPORT_SYMBOL(csum_and_copy_from_iter); |
| 1040 | 1086 | ||
| 1087 | bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, | ||
| 1088 | struct iov_iter *i) | ||
| 1089 | { | ||
| 1090 | char *to = addr; | ||
| 1091 | __wsum sum, next; | ||
| 1092 | size_t off = 0; | ||
| 1093 | sum = *csum; | ||
| 1094 | if (unlikely(i->type & ITER_PIPE)) { | ||
| 1095 | WARN_ON(1); | ||
| 1096 | return false; | ||
| 1097 | } | ||
| 1098 | if (unlikely(i->count < bytes)) | ||
| 1099 | return false; | ||
| 1100 | iterate_all_kinds(i, bytes, v, ({ | ||
| 1101 | int err = 0; | ||
| 1102 | next = csum_and_copy_from_user(v.iov_base, | ||
| 1103 | (to += v.iov_len) - v.iov_len, | ||
| 1104 | v.iov_len, 0, &err); | ||
| 1105 | if (err) | ||
| 1106 | return false; | ||
| 1107 | sum = csum_block_add(sum, next, off); | ||
| 1108 | off += v.iov_len; | ||
| 1109 | 0; | ||
| 1110 | }), ({ | ||
| 1111 | char *p = kmap_atomic(v.bv_page); | ||
| 1112 | next = csum_partial_copy_nocheck(p + v.bv_offset, | ||
| 1113 | (to += v.bv_len) - v.bv_len, | ||
| 1114 | v.bv_len, 0); | ||
| 1115 | kunmap_atomic(p); | ||
| 1116 | sum = csum_block_add(sum, next, off); | ||
| 1117 | off += v.bv_len; | ||
| 1118 | }),({ | ||
| 1119 | next = csum_partial_copy_nocheck(v.iov_base, | ||
| 1120 | (to += v.iov_len) - v.iov_len, | ||
| 1121 | v.iov_len, 0); | ||
| 1122 | sum = csum_block_add(sum, next, off); | ||
| 1123 | off += v.iov_len; | ||
| 1124 | }) | ||
| 1125 | ) | ||
| 1126 | *csum = sum; | ||
| 1127 | iov_iter_advance(i, bytes); | ||
| 1128 | return true; | ||
| 1129 | } | ||
| 1130 | EXPORT_SYMBOL(csum_and_copy_from_iter_full); | ||
| 1131 | |||
| 1041 | size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, | 1132 | size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, |
| 1042 | struct iov_iter *i) | 1133 | struct iov_iter *i) |
| 1043 | { | 1134 | { |
| @@ -1052,7 +1143,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, | |||
| 1052 | iterate_and_advance(i, bytes, v, ({ | 1143 | iterate_and_advance(i, bytes, v, ({ |
| 1053 | int err = 0; | 1144 | int err = 0; |
| 1054 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, | 1145 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, |
| 1055 | v.iov_base, | 1146 | v.iov_base, |
| 1056 | v.iov_len, 0, &err); | 1147 | v.iov_len, 0, &err); |
| 1057 | if (!err) { | 1148 | if (!err) { |
| 1058 | sum = csum_block_add(sum, next, off); | 1149 | sum = csum_block_add(sum, next, off); |
diff --git a/lib/kstrtox.c b/lib/kstrtox.c index b8e2080c1a47..bf85e05ce858 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | #include <linux/math64.h> | 17 | #include <linux/math64.h> |
| 18 | #include <linux/export.h> | 18 | #include <linux/export.h> |
| 19 | #include <linux/types.h> | 19 | #include <linux/types.h> |
| 20 | #include <asm/uaccess.h> | 20 | #include <linux/uaccess.h> |
| 21 | #include "kstrtox.h" | 21 | #include "kstrtox.h" |
| 22 | 22 | ||
| 23 | const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) | 23 | const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 2e8c6f7aa56e..6f382e07de77 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/cpu.h> | ||
| 25 | #include <linux/errno.h> | 26 | #include <linux/errno.h> |
| 26 | #include <linux/init.h> | 27 | #include <linux/init.h> |
| 27 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
| @@ -30,7 +31,6 @@ | |||
| 30 | #include <linux/percpu.h> | 31 | #include <linux/percpu.h> |
| 31 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
| 32 | #include <linux/kmemleak.h> | 33 | #include <linux/kmemleak.h> |
| 33 | #include <linux/notifier.h> | ||
| 34 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
| 35 | #include <linux/string.h> | 35 | #include <linux/string.h> |
| 36 | #include <linux/bitops.h> | 36 | #include <linux/bitops.h> |
| @@ -69,6 +69,11 @@ struct radix_tree_preload { | |||
| 69 | }; | 69 | }; |
| 70 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; | 70 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; |
| 71 | 71 | ||
| 72 | static inline struct radix_tree_node *entry_to_node(void *ptr) | ||
| 73 | { | ||
| 74 | return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); | ||
| 75 | } | ||
| 76 | |||
| 72 | static inline void *node_to_entry(void *ptr) | 77 | static inline void *node_to_entry(void *ptr) |
| 73 | { | 78 | { |
| 74 | return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); | 79 | return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); |
| @@ -191,13 +196,12 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) | |||
| 191 | * Returns next bit offset, or size if nothing found. | 196 | * Returns next bit offset, or size if nothing found. |
| 192 | */ | 197 | */ |
| 193 | static __always_inline unsigned long | 198 | static __always_inline unsigned long |
| 194 | radix_tree_find_next_bit(const unsigned long *addr, | 199 | radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, |
| 195 | unsigned long size, unsigned long offset) | 200 | unsigned long offset) |
| 196 | { | 201 | { |
| 197 | if (!__builtin_constant_p(size)) | 202 | const unsigned long *addr = node->tags[tag]; |
| 198 | return find_next_bit(addr, size, offset); | ||
| 199 | 203 | ||
| 200 | if (offset < size) { | 204 | if (offset < RADIX_TREE_MAP_SIZE) { |
| 201 | unsigned long tmp; | 205 | unsigned long tmp; |
| 202 | 206 | ||
| 203 | addr += offset / BITS_PER_LONG; | 207 | addr += offset / BITS_PER_LONG; |
| @@ -205,14 +209,32 @@ radix_tree_find_next_bit(const unsigned long *addr, | |||
| 205 | if (tmp) | 209 | if (tmp) |
| 206 | return __ffs(tmp) + offset; | 210 | return __ffs(tmp) + offset; |
| 207 | offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); | 211 | offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); |
| 208 | while (offset < size) { | 212 | while (offset < RADIX_TREE_MAP_SIZE) { |
| 209 | tmp = *++addr; | 213 | tmp = *++addr; |
| 210 | if (tmp) | 214 | if (tmp) |
| 211 | return __ffs(tmp) + offset; | 215 | return __ffs(tmp) + offset; |
| 212 | offset += BITS_PER_LONG; | 216 | offset += BITS_PER_LONG; |
| 213 | } | 217 | } |
| 214 | } | 218 | } |
| 215 | return size; | 219 | return RADIX_TREE_MAP_SIZE; |
| 220 | } | ||
| 221 | |||
| 222 | static unsigned int iter_offset(const struct radix_tree_iter *iter) | ||
| 223 | { | ||
| 224 | return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 228 | * The maximum index which can be stored in a radix tree | ||
| 229 | */ | ||
| 230 | static inline unsigned long shift_maxindex(unsigned int shift) | ||
| 231 | { | ||
| 232 | return (RADIX_TREE_MAP_SIZE << shift) - 1; | ||
| 233 | } | ||
| 234 | |||
| 235 | static inline unsigned long node_maxindex(struct radix_tree_node *node) | ||
| 236 | { | ||
| 237 | return shift_maxindex(node->shift); | ||
| 216 | } | 238 | } |
| 217 | 239 | ||
| 218 | #ifndef __KERNEL__ | 240 | #ifndef __KERNEL__ |
| @@ -220,10 +242,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) | |||
| 220 | { | 242 | { |
| 221 | unsigned long i; | 243 | unsigned long i; |
| 222 | 244 | ||
| 223 | pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d exceptional %d parent %p\n", | 245 | pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n", |
| 224 | node, node->offset, | 246 | node, node->offset, index, index | node_maxindex(node), |
| 247 | node->parent, | ||
| 225 | node->tags[0][0], node->tags[1][0], node->tags[2][0], | 248 | node->tags[0][0], node->tags[1][0], node->tags[2][0], |
| 226 | node->shift, node->count, node->exceptional, node->parent); | 249 | node->shift, node->count, node->exceptional); |
| 227 | 250 | ||
| 228 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { | 251 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { |
| 229 | unsigned long first = index | (i << node->shift); | 252 | unsigned long first = index | (i << node->shift); |
| @@ -231,14 +254,16 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) | |||
| 231 | void *entry = node->slots[i]; | 254 | void *entry = node->slots[i]; |
| 232 | if (!entry) | 255 | if (!entry) |
| 233 | continue; | 256 | continue; |
| 234 | if (is_sibling_entry(node, entry)) { | 257 | if (entry == RADIX_TREE_RETRY) { |
| 235 | pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", | 258 | pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n", |
| 236 | entry, i, | 259 | i, first, last, node); |
| 237 | *(void **)entry_to_node(entry), | ||
| 238 | first, last); | ||
| 239 | } else if (!radix_tree_is_internal_node(entry)) { | 260 | } else if (!radix_tree_is_internal_node(entry)) { |
| 240 | pr_debug("radix entry %p offset %ld indices %ld-%ld\n", | 261 | pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", |
| 241 | entry, i, first, last); | 262 | entry, i, first, last, node); |
| 263 | } else if (is_sibling_entry(node, entry)) { | ||
| 264 | pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", | ||
| 265 | entry, i, first, last, node, | ||
| 266 | *(void **)entry_to_node(entry)); | ||
| 242 | } else { | 267 | } else { |
| 243 | dump_node(entry_to_node(entry), first); | 268 | dump_node(entry_to_node(entry), first); |
| 244 | } | 269 | } |
| @@ -262,7 +287,10 @@ static void radix_tree_dump(struct radix_tree_root *root) | |||
| 262 | * that the caller has pinned this thread of control to the current CPU. | 287 | * that the caller has pinned this thread of control to the current CPU. |
| 263 | */ | 288 | */ |
| 264 | static struct radix_tree_node * | 289 | static struct radix_tree_node * |
| 265 | radix_tree_node_alloc(struct radix_tree_root *root) | 290 | radix_tree_node_alloc(struct radix_tree_root *root, |
| 291 | struct radix_tree_node *parent, | ||
| 292 | unsigned int shift, unsigned int offset, | ||
| 293 | unsigned int count, unsigned int exceptional) | ||
| 266 | { | 294 | { |
| 267 | struct radix_tree_node *ret = NULL; | 295 | struct radix_tree_node *ret = NULL; |
| 268 | gfp_t gfp_mask = root_gfp_mask(root); | 296 | gfp_t gfp_mask = root_gfp_mask(root); |
| @@ -307,6 +335,13 @@ radix_tree_node_alloc(struct radix_tree_root *root) | |||
| 307 | ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); | 335 | ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); |
| 308 | out: | 336 | out: |
| 309 | BUG_ON(radix_tree_is_internal_node(ret)); | 337 | BUG_ON(radix_tree_is_internal_node(ret)); |
| 338 | if (ret) { | ||
| 339 | ret->parent = parent; | ||
| 340 | ret->shift = shift; | ||
| 341 | ret->offset = offset; | ||
| 342 | ret->count = count; | ||
| 343 | ret->exceptional = exceptional; | ||
| 344 | } | ||
| 310 | return ret; | 345 | return ret; |
| 311 | } | 346 | } |
| 312 | 347 | ||
| @@ -314,17 +349,15 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) | |||
| 314 | { | 349 | { |
| 315 | struct radix_tree_node *node = | 350 | struct radix_tree_node *node = |
| 316 | container_of(head, struct radix_tree_node, rcu_head); | 351 | container_of(head, struct radix_tree_node, rcu_head); |
| 317 | int i; | ||
| 318 | 352 | ||
| 319 | /* | 353 | /* |
| 320 | * must only free zeroed nodes into the slab. radix_tree_shrink | 354 | * Must only free zeroed nodes into the slab. We can be left with |
| 321 | * can leave us with a non-NULL entry in the first slot, so clear | 355 | * non-NULL entries by radix_tree_free_nodes, so clear the entries |
| 322 | * that here to make sure. | 356 | * and tags here. |
| 323 | */ | 357 | */ |
| 324 | for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) | 358 | memset(node->slots, 0, sizeof(node->slots)); |
| 325 | tag_clear(node, i, 0); | 359 | memset(node->tags, 0, sizeof(node->tags)); |
| 326 | 360 | INIT_LIST_HEAD(&node->private_list); | |
| 327 | node->slots[0] = NULL; | ||
| 328 | 361 | ||
| 329 | kmem_cache_free(radix_tree_node_cachep, node); | 362 | kmem_cache_free(radix_tree_node_cachep, node); |
| 330 | } | 363 | } |
| @@ -344,7 +377,7 @@ radix_tree_node_free(struct radix_tree_node *node) | |||
| 344 | * To make use of this facility, the radix tree must be initialised without | 377 | * To make use of this facility, the radix tree must be initialised without |
| 345 | * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). | 378 | * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). |
| 346 | */ | 379 | */ |
| 347 | static int __radix_tree_preload(gfp_t gfp_mask, int nr) | 380 | static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) |
| 348 | { | 381 | { |
| 349 | struct radix_tree_preload *rtp; | 382 | struct radix_tree_preload *rtp; |
| 350 | struct radix_tree_node *node; | 383 | struct radix_tree_node *node; |
| @@ -410,6 +443,28 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) | |||
| 410 | } | 443 | } |
| 411 | EXPORT_SYMBOL(radix_tree_maybe_preload); | 444 | EXPORT_SYMBOL(radix_tree_maybe_preload); |
| 412 | 445 | ||
| 446 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 447 | /* | ||
| 448 | * Preload with enough objects to ensure that we can split a single entry | ||
| 449 | * of order @old_order into many entries of size @new_order | ||
| 450 | */ | ||
| 451 | int radix_tree_split_preload(unsigned int old_order, unsigned int new_order, | ||
| 452 | gfp_t gfp_mask) | ||
| 453 | { | ||
| 454 | unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT); | ||
| 455 | unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) - | ||
| 456 | (new_order / RADIX_TREE_MAP_SHIFT); | ||
| 457 | unsigned nr = 0; | ||
| 458 | |||
| 459 | WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); | ||
| 460 | BUG_ON(new_order >= old_order); | ||
| 461 | |||
| 462 | while (layers--) | ||
| 463 | nr = nr * RADIX_TREE_MAP_SIZE + 1; | ||
| 464 | return __radix_tree_preload(gfp_mask, top * nr); | ||
| 465 | } | ||
| 466 | #endif | ||
| 467 | |||
| 413 | /* | 468 | /* |
| 414 | * The same as function above, but preload number of nodes required to insert | 469 | * The same as function above, but preload number of nodes required to insert |
| 415 | * (1 << order) continuous naturally-aligned elements. | 470 | * (1 << order) continuous naturally-aligned elements. |
| @@ -455,19 +510,6 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |||
| 455 | return __radix_tree_preload(gfp_mask, nr_nodes); | 510 | return __radix_tree_preload(gfp_mask, nr_nodes); |
| 456 | } | 511 | } |
| 457 | 512 | ||
| 458 | /* | ||
| 459 | * The maximum index which can be stored in a radix tree | ||
| 460 | */ | ||
| 461 | static inline unsigned long shift_maxindex(unsigned int shift) | ||
| 462 | { | ||
| 463 | return (RADIX_TREE_MAP_SIZE << shift) - 1; | ||
| 464 | } | ||
| 465 | |||
| 466 | static inline unsigned long node_maxindex(struct radix_tree_node *node) | ||
| 467 | { | ||
| 468 | return shift_maxindex(node->shift); | ||
| 469 | } | ||
| 470 | |||
| 471 | static unsigned radix_tree_load_root(struct radix_tree_root *root, | 513 | static unsigned radix_tree_load_root(struct radix_tree_root *root, |
| 472 | struct radix_tree_node **nodep, unsigned long *maxindex) | 514 | struct radix_tree_node **nodep, unsigned long *maxindex) |
| 473 | { | 515 | { |
| @@ -505,8 +547,8 @@ static int radix_tree_extend(struct radix_tree_root *root, | |||
| 505 | goto out; | 547 | goto out; |
| 506 | 548 | ||
| 507 | do { | 549 | do { |
| 508 | struct radix_tree_node *node = radix_tree_node_alloc(root); | 550 | struct radix_tree_node *node = radix_tree_node_alloc(root, |
| 509 | 551 | NULL, shift, 0, 1, 0); | |
| 510 | if (!node) | 552 | if (!node) |
| 511 | return -ENOMEM; | 553 | return -ENOMEM; |
| 512 | 554 | ||
| @@ -517,16 +559,11 @@ static int radix_tree_extend(struct radix_tree_root *root, | |||
| 517 | } | 559 | } |
| 518 | 560 | ||
| 519 | BUG_ON(shift > BITS_PER_LONG); | 561 | BUG_ON(shift > BITS_PER_LONG); |
| 520 | node->shift = shift; | ||
| 521 | node->offset = 0; | ||
| 522 | node->count = 1; | ||
| 523 | node->parent = NULL; | ||
| 524 | if (radix_tree_is_internal_node(slot)) { | 562 | if (radix_tree_is_internal_node(slot)) { |
| 525 | entry_to_node(slot)->parent = node; | 563 | entry_to_node(slot)->parent = node; |
| 526 | } else { | 564 | } else if (radix_tree_exceptional_entry(slot)) { |
| 527 | /* Moving an exceptional root->rnode to a node */ | 565 | /* Moving an exceptional root->rnode to a node */ |
| 528 | if (radix_tree_exceptional_entry(slot)) | 566 | node->exceptional = 1; |
| 529 | node->exceptional = 1; | ||
| 530 | } | 567 | } |
| 531 | node->slots[0] = slot; | 568 | node->slots[0] = slot; |
| 532 | slot = node_to_entry(node); | 569 | slot = node_to_entry(node); |
| @@ -665,26 +702,24 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, | |||
| 665 | shift = radix_tree_load_root(root, &child, &maxindex); | 702 | shift = radix_tree_load_root(root, &child, &maxindex); |
| 666 | 703 | ||
| 667 | /* Make sure the tree is high enough. */ | 704 | /* Make sure the tree is high enough. */ |
| 705 | if (order > 0 && max == ((1UL << order) - 1)) | ||
| 706 | max++; | ||
| 668 | if (max > maxindex) { | 707 | if (max > maxindex) { |
| 669 | int error = radix_tree_extend(root, max, shift); | 708 | int error = radix_tree_extend(root, max, shift); |
| 670 | if (error < 0) | 709 | if (error < 0) |
| 671 | return error; | 710 | return error; |
| 672 | shift = error; | 711 | shift = error; |
| 673 | child = root->rnode; | 712 | child = root->rnode; |
| 674 | if (order == shift) | ||
| 675 | shift += RADIX_TREE_MAP_SHIFT; | ||
| 676 | } | 713 | } |
| 677 | 714 | ||
| 678 | while (shift > order) { | 715 | while (shift > order) { |
| 679 | shift -= RADIX_TREE_MAP_SHIFT; | 716 | shift -= RADIX_TREE_MAP_SHIFT; |
| 680 | if (child == NULL) { | 717 | if (child == NULL) { |
| 681 | /* Have to add a child node. */ | 718 | /* Have to add a child node. */ |
| 682 | child = radix_tree_node_alloc(root); | 719 | child = radix_tree_node_alloc(root, node, shift, |
| 720 | offset, 0, 0); | ||
| 683 | if (!child) | 721 | if (!child) |
| 684 | return -ENOMEM; | 722 | return -ENOMEM; |
| 685 | child->shift = shift; | ||
| 686 | child->offset = offset; | ||
| 687 | child->parent = node; | ||
| 688 | rcu_assign_pointer(*slot, node_to_entry(child)); | 723 | rcu_assign_pointer(*slot, node_to_entry(child)); |
| 689 | if (node) | 724 | if (node) |
| 690 | node->count++; | 725 | node->count++; |
| @@ -697,31 +732,125 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, | |||
| 697 | slot = &node->slots[offset]; | 732 | slot = &node->slots[offset]; |
| 698 | } | 733 | } |
| 699 | 734 | ||
| 735 | if (nodep) | ||
| 736 | *nodep = node; | ||
| 737 | if (slotp) | ||
| 738 | *slotp = slot; | ||
| 739 | return 0; | ||
| 740 | } | ||
| 741 | |||
| 700 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 742 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
| 701 | /* Insert pointers to the canonical entry */ | 743 | /* |
| 702 | if (order > shift) { | 744 | * Free any nodes below this node. The tree is presumed to not need |
| 703 | unsigned i, n = 1 << (order - shift); | 745 | * shrinking, and any user data in the tree is presumed to not need a |
| 746 | * destructor called on it. If we need to add a destructor, we can | ||
| 747 | * add that functionality later. Note that we may not clear tags or | ||
| 748 | * slots from the tree as an RCU walker may still have a pointer into | ||
| 749 | * this subtree. We could replace the entries with RADIX_TREE_RETRY, | ||
| 750 | * but we'll still have to clear those in rcu_free. | ||
| 751 | */ | ||
| 752 | static void radix_tree_free_nodes(struct radix_tree_node *node) | ||
| 753 | { | ||
| 754 | unsigned offset = 0; | ||
| 755 | struct radix_tree_node *child = entry_to_node(node); | ||
| 756 | |||
| 757 | for (;;) { | ||
| 758 | void *entry = child->slots[offset]; | ||
| 759 | if (radix_tree_is_internal_node(entry) && | ||
| 760 | !is_sibling_entry(child, entry)) { | ||
| 761 | child = entry_to_node(entry); | ||
| 762 | offset = 0; | ||
| 763 | continue; | ||
| 764 | } | ||
| 765 | offset++; | ||
| 766 | while (offset == RADIX_TREE_MAP_SIZE) { | ||
| 767 | struct radix_tree_node *old = child; | ||
| 768 | offset = child->offset + 1; | ||
| 769 | child = child->parent; | ||
| 770 | radix_tree_node_free(old); | ||
| 771 | if (old == entry_to_node(node)) | ||
| 772 | return; | ||
| 773 | } | ||
| 774 | } | ||
| 775 | } | ||
| 776 | |||
| 777 | static inline int insert_entries(struct radix_tree_node *node, void **slot, | ||
| 778 | void *item, unsigned order, bool replace) | ||
| 779 | { | ||
| 780 | struct radix_tree_node *child; | ||
| 781 | unsigned i, n, tag, offset, tags = 0; | ||
| 782 | |||
| 783 | if (node) { | ||
| 784 | if (order > node->shift) | ||
| 785 | n = 1 << (order - node->shift); | ||
| 786 | else | ||
| 787 | n = 1; | ||
| 788 | offset = get_slot_offset(node, slot); | ||
| 789 | } else { | ||
| 790 | n = 1; | ||
| 791 | offset = 0; | ||
| 792 | } | ||
| 793 | |||
| 794 | if (n > 1) { | ||
| 704 | offset = offset & ~(n - 1); | 795 | offset = offset & ~(n - 1); |
| 705 | slot = &node->slots[offset]; | 796 | slot = &node->slots[offset]; |
| 706 | child = node_to_entry(slot); | 797 | } |
| 707 | for (i = 0; i < n; i++) { | 798 | child = node_to_entry(slot); |
| 708 | if (slot[i]) | 799 | |
| 800 | for (i = 0; i < n; i++) { | ||
| 801 | if (slot[i]) { | ||
| 802 | if (replace) { | ||
| 803 | node->count--; | ||
| 804 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 805 | if (tag_get(node, tag, offset + i)) | ||
| 806 | tags |= 1 << tag; | ||
| 807 | } else | ||
| 709 | return -EEXIST; | 808 | return -EEXIST; |
| 710 | } | 809 | } |
| 810 | } | ||
| 711 | 811 | ||
| 712 | for (i = 1; i < n; i++) { | 812 | for (i = 0; i < n; i++) { |
| 813 | struct radix_tree_node *old = slot[i]; | ||
| 814 | if (i) { | ||
| 713 | rcu_assign_pointer(slot[i], child); | 815 | rcu_assign_pointer(slot[i], child); |
| 714 | node->count++; | 816 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
| 817 | if (tags & (1 << tag)) | ||
| 818 | tag_clear(node, tag, offset + i); | ||
| 819 | } else { | ||
| 820 | rcu_assign_pointer(slot[i], item); | ||
| 821 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 822 | if (tags & (1 << tag)) | ||
| 823 | tag_set(node, tag, offset); | ||
| 715 | } | 824 | } |
| 825 | if (radix_tree_is_internal_node(old) && | ||
| 826 | !is_sibling_entry(node, old) && | ||
| 827 | (old != RADIX_TREE_RETRY)) | ||
| 828 | radix_tree_free_nodes(old); | ||
| 829 | if (radix_tree_exceptional_entry(old)) | ||
| 830 | node->exceptional--; | ||
| 716 | } | 831 | } |
| 717 | #endif | 832 | if (node) { |
| 718 | 833 | node->count += n; | |
| 719 | if (nodep) | 834 | if (radix_tree_exceptional_entry(item)) |
| 720 | *nodep = node; | 835 | node->exceptional += n; |
| 721 | if (slotp) | 836 | } |
| 722 | *slotp = slot; | 837 | return n; |
| 723 | return 0; | 838 | } |
| 839 | #else | ||
| 840 | static inline int insert_entries(struct radix_tree_node *node, void **slot, | ||
| 841 | void *item, unsigned order, bool replace) | ||
| 842 | { | ||
| 843 | if (*slot) | ||
| 844 | return -EEXIST; | ||
| 845 | rcu_assign_pointer(*slot, item); | ||
| 846 | if (node) { | ||
| 847 | node->count++; | ||
| 848 | if (radix_tree_exceptional_entry(item)) | ||
| 849 | node->exceptional++; | ||
| 850 | } | ||
| 851 | return 1; | ||
| 724 | } | 852 | } |
| 853 | #endif | ||
| 725 | 854 | ||
| 726 | /** | 855 | /** |
| 727 | * __radix_tree_insert - insert into a radix tree | 856 | * __radix_tree_insert - insert into a radix tree |
| @@ -744,15 +873,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, | |||
| 744 | error = __radix_tree_create(root, index, order, &node, &slot); | 873 | error = __radix_tree_create(root, index, order, &node, &slot); |
| 745 | if (error) | 874 | if (error) |
| 746 | return error; | 875 | return error; |
| 747 | if (*slot != NULL) | 876 | |
| 748 | return -EEXIST; | 877 | error = insert_entries(node, slot, item, order, false); |
| 749 | rcu_assign_pointer(*slot, item); | 878 | if (error < 0) |
| 879 | return error; | ||
| 750 | 880 | ||
| 751 | if (node) { | 881 | if (node) { |
| 752 | unsigned offset = get_slot_offset(node, slot); | 882 | unsigned offset = get_slot_offset(node, slot); |
| 753 | node->count++; | ||
| 754 | if (radix_tree_exceptional_entry(item)) | ||
| 755 | node->exceptional++; | ||
| 756 | BUG_ON(tag_get(node, 0, offset)); | 883 | BUG_ON(tag_get(node, 0, offset)); |
| 757 | BUG_ON(tag_get(node, 1, offset)); | 884 | BUG_ON(tag_get(node, 1, offset)); |
| 758 | BUG_ON(tag_get(node, 2, offset)); | 885 | BUG_ON(tag_get(node, 2, offset)); |
| @@ -850,6 +977,24 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) | |||
| 850 | } | 977 | } |
| 851 | EXPORT_SYMBOL(radix_tree_lookup); | 978 | EXPORT_SYMBOL(radix_tree_lookup); |
| 852 | 979 | ||
| 980 | static inline int slot_count(struct radix_tree_node *node, | ||
| 981 | void **slot) | ||
| 982 | { | ||
| 983 | int n = 1; | ||
| 984 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 985 | void *ptr = node_to_entry(slot); | ||
| 986 | unsigned offset = get_slot_offset(node, slot); | ||
| 987 | int i; | ||
| 988 | |||
| 989 | for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { | ||
| 990 | if (node->slots[offset + i] != ptr) | ||
| 991 | break; | ||
| 992 | n++; | ||
| 993 | } | ||
| 994 | #endif | ||
| 995 | return n; | ||
| 996 | } | ||
| 997 | |||
| 853 | static void replace_slot(struct radix_tree_root *root, | 998 | static void replace_slot(struct radix_tree_root *root, |
| 854 | struct radix_tree_node *node, | 999 | struct radix_tree_node *node, |
| 855 | void **slot, void *item, | 1000 | void **slot, void *item, |
| @@ -868,12 +1013,35 @@ static void replace_slot(struct radix_tree_root *root, | |||
| 868 | 1013 | ||
| 869 | if (node) { | 1014 | if (node) { |
| 870 | node->count += count; | 1015 | node->count += count; |
| 871 | node->exceptional += exceptional; | 1016 | if (exceptional) { |
| 1017 | exceptional *= slot_count(node, slot); | ||
| 1018 | node->exceptional += exceptional; | ||
| 1019 | } | ||
| 872 | } | 1020 | } |
| 873 | 1021 | ||
| 874 | rcu_assign_pointer(*slot, item); | 1022 | rcu_assign_pointer(*slot, item); |
| 875 | } | 1023 | } |
| 876 | 1024 | ||
| 1025 | static inline void delete_sibling_entries(struct radix_tree_node *node, | ||
| 1026 | void **slot) | ||
| 1027 | { | ||
| 1028 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 1029 | bool exceptional = radix_tree_exceptional_entry(*slot); | ||
| 1030 | void *ptr = node_to_entry(slot); | ||
| 1031 | unsigned offset = get_slot_offset(node, slot); | ||
| 1032 | int i; | ||
| 1033 | |||
| 1034 | for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { | ||
| 1035 | if (node->slots[offset + i] != ptr) | ||
| 1036 | break; | ||
| 1037 | node->slots[offset + i] = NULL; | ||
| 1038 | node->count--; | ||
| 1039 | if (exceptional) | ||
| 1040 | node->exceptional--; | ||
| 1041 | } | ||
| 1042 | #endif | ||
| 1043 | } | ||
| 1044 | |||
| 877 | /** | 1045 | /** |
| 878 | * __radix_tree_replace - replace item in a slot | 1046 | * __radix_tree_replace - replace item in a slot |
| 879 | * @root: radix tree root | 1047 | * @root: radix tree root |
| @@ -891,6 +1059,8 @@ void __radix_tree_replace(struct radix_tree_root *root, | |||
| 891 | void **slot, void *item, | 1059 | void **slot, void *item, |
| 892 | radix_tree_update_node_t update_node, void *private) | 1060 | radix_tree_update_node_t update_node, void *private) |
| 893 | { | 1061 | { |
| 1062 | if (!item) | ||
| 1063 | delete_sibling_entries(node, slot); | ||
| 894 | /* | 1064 | /* |
| 895 | * This function supports replacing exceptional entries and | 1065 | * This function supports replacing exceptional entries and |
| 896 | * deleting entries, but that needs accounting against the | 1066 | * deleting entries, but that needs accounting against the |
| @@ -921,7 +1091,8 @@ void __radix_tree_replace(struct radix_tree_root *root, | |||
| 921 | * NOTE: This cannot be used to switch between non-entries (empty slots), | 1091 | * NOTE: This cannot be used to switch between non-entries (empty slots), |
| 922 | * regular entries, and exceptional entries, as that requires accounting | 1092 | * regular entries, and exceptional entries, as that requires accounting |
| 923 | * inside the radix tree node. When switching from one type of entry or | 1093 | * inside the radix tree node. When switching from one type of entry or |
| 924 | * deleting, use __radix_tree_lookup() and __radix_tree_replace(). | 1094 | * deleting, use __radix_tree_lookup() and __radix_tree_replace() or |
| 1095 | * radix_tree_iter_replace(). | ||
| 925 | */ | 1096 | */ |
| 926 | void radix_tree_replace_slot(struct radix_tree_root *root, | 1097 | void radix_tree_replace_slot(struct radix_tree_root *root, |
| 927 | void **slot, void *item) | 1098 | void **slot, void *item) |
| @@ -930,6 +1101,164 @@ void radix_tree_replace_slot(struct radix_tree_root *root, | |||
| 930 | } | 1101 | } |
| 931 | 1102 | ||
| 932 | /** | 1103 | /** |
| 1104 | * radix_tree_iter_replace - replace item in a slot | ||
| 1105 | * @root: radix tree root | ||
| 1106 | * @slot: pointer to slot | ||
| 1107 | * @item: new item to store in the slot. | ||
| 1108 | * | ||
| 1109 | * For use with radix_tree_split() and radix_tree_for_each_slot(). | ||
| 1110 | * Caller must hold tree write locked across split and replacement. | ||
| 1111 | */ | ||
| 1112 | void radix_tree_iter_replace(struct radix_tree_root *root, | ||
| 1113 | const struct radix_tree_iter *iter, void **slot, void *item) | ||
| 1114 | { | ||
| 1115 | __radix_tree_replace(root, iter->node, slot, item, NULL, NULL); | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 1119 | /** | ||
| 1120 | * radix_tree_join - replace multiple entries with one multiorder entry | ||
| 1121 | * @root: radix tree root | ||
| 1122 | * @index: an index inside the new entry | ||
| 1123 | * @order: order of the new entry | ||
| 1124 | * @item: new entry | ||
| 1125 | * | ||
| 1126 | * Call this function to replace several entries with one larger entry. | ||
| 1127 | * The existing entries are presumed to not need freeing as a result of | ||
| 1128 | * this call. | ||
| 1129 | * | ||
| 1130 | * The replacement entry will have all the tags set on it that were set | ||
| 1131 | * on any of the entries it is replacing. | ||
| 1132 | */ | ||
| 1133 | int radix_tree_join(struct radix_tree_root *root, unsigned long index, | ||
| 1134 | unsigned order, void *item) | ||
| 1135 | { | ||
| 1136 | struct radix_tree_node *node; | ||
| 1137 | void **slot; | ||
| 1138 | int error; | ||
| 1139 | |||
| 1140 | BUG_ON(radix_tree_is_internal_node(item)); | ||
| 1141 | |||
| 1142 | error = __radix_tree_create(root, index, order, &node, &slot); | ||
| 1143 | if (!error) | ||
| 1144 | error = insert_entries(node, slot, item, order, true); | ||
| 1145 | if (error > 0) | ||
| 1146 | error = 0; | ||
| 1147 | |||
| 1148 | return error; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | /** | ||
| 1152 | * radix_tree_split - Split an entry into smaller entries | ||
| 1153 | * @root: radix tree root | ||
| 1154 | * @index: An index within the large entry | ||
| 1155 | * @order: Order of new entries | ||
| 1156 | * | ||
| 1157 | * Call this function as the first step in replacing a multiorder entry | ||
| 1158 | * with several entries of lower order. After this function returns, | ||
| 1159 | * loop over the relevant portion of the tree using radix_tree_for_each_slot() | ||
| 1160 | * and call radix_tree_iter_replace() to set up each new entry. | ||
| 1161 | * | ||
| 1162 | * The tags from this entry are replicated to all the new entries. | ||
| 1163 | * | ||
| 1164 | * The radix tree should be locked against modification during the entire | ||
| 1165 | * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which | ||
| 1166 | * should prompt RCU walkers to restart the lookup from the root. | ||
| 1167 | */ | ||
| 1168 | int radix_tree_split(struct radix_tree_root *root, unsigned long index, | ||
| 1169 | unsigned order) | ||
| 1170 | { | ||
| 1171 | struct radix_tree_node *parent, *node, *child; | ||
| 1172 | void **slot; | ||
| 1173 | unsigned int offset, end; | ||
| 1174 | unsigned n, tag, tags = 0; | ||
| 1175 | |||
| 1176 | if (!__radix_tree_lookup(root, index, &parent, &slot)) | ||
| 1177 | return -ENOENT; | ||
| 1178 | if (!parent) | ||
| 1179 | return -ENOENT; | ||
| 1180 | |||
| 1181 | offset = get_slot_offset(parent, slot); | ||
| 1182 | |||
| 1183 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1184 | if (tag_get(parent, tag, offset)) | ||
| 1185 | tags |= 1 << tag; | ||
| 1186 | |||
| 1187 | for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { | ||
| 1188 | if (!is_sibling_entry(parent, parent->slots[end])) | ||
| 1189 | break; | ||
| 1190 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1191 | if (tags & (1 << tag)) | ||
| 1192 | tag_set(parent, tag, end); | ||
| 1193 | /* rcu_assign_pointer ensures tags are set before RETRY */ | ||
| 1194 | rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); | ||
| 1195 | } | ||
| 1196 | rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); | ||
| 1197 | parent->exceptional -= (end - offset); | ||
| 1198 | |||
| 1199 | if (order == parent->shift) | ||
| 1200 | return 0; | ||
| 1201 | if (order > parent->shift) { | ||
| 1202 | while (offset < end) | ||
| 1203 | offset += insert_entries(parent, &parent->slots[offset], | ||
| 1204 | RADIX_TREE_RETRY, order, true); | ||
| 1205 | return 0; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | node = parent; | ||
| 1209 | |||
| 1210 | for (;;) { | ||
| 1211 | if (node->shift > order) { | ||
| 1212 | child = radix_tree_node_alloc(root, node, | ||
| 1213 | node->shift - RADIX_TREE_MAP_SHIFT, | ||
| 1214 | offset, 0, 0); | ||
| 1215 | if (!child) | ||
| 1216 | goto nomem; | ||
| 1217 | if (node != parent) { | ||
| 1218 | node->count++; | ||
| 1219 | node->slots[offset] = node_to_entry(child); | ||
| 1220 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1221 | if (tags & (1 << tag)) | ||
| 1222 | tag_set(node, tag, offset); | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | node = child; | ||
| 1226 | offset = 0; | ||
| 1227 | continue; | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | n = insert_entries(node, &node->slots[offset], | ||
| 1231 | RADIX_TREE_RETRY, order, false); | ||
| 1232 | BUG_ON(n > RADIX_TREE_MAP_SIZE); | ||
| 1233 | |||
| 1234 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1235 | if (tags & (1 << tag)) | ||
| 1236 | tag_set(node, tag, offset); | ||
| 1237 | offset += n; | ||
| 1238 | |||
| 1239 | while (offset == RADIX_TREE_MAP_SIZE) { | ||
| 1240 | if (node == parent) | ||
| 1241 | break; | ||
| 1242 | offset = node->offset; | ||
| 1243 | child = node; | ||
| 1244 | node = node->parent; | ||
| 1245 | rcu_assign_pointer(node->slots[offset], | ||
| 1246 | node_to_entry(child)); | ||
| 1247 | offset++; | ||
| 1248 | } | ||
| 1249 | if ((node == parent) && (offset == end)) | ||
| 1250 | return 0; | ||
| 1251 | } | ||
| 1252 | |||
| 1253 | nomem: | ||
| 1254 | /* Shouldn't happen; did user forget to preload? */ | ||
| 1255 | /* TODO: free all the allocated nodes */ | ||
| 1256 | WARN_ON(1); | ||
| 1257 | return -ENOMEM; | ||
| 1258 | } | ||
| 1259 | #endif | ||
| 1260 | |||
| 1261 | /** | ||
| 933 | * radix_tree_tag_set - set a tag on a radix tree node | 1262 | * radix_tree_tag_set - set a tag on a radix tree node |
| 934 | * @root: radix tree root | 1263 | * @root: radix tree root |
| 935 | * @index: index key | 1264 | * @index: index key |
| @@ -990,6 +1319,34 @@ static void node_tag_clear(struct radix_tree_root *root, | |||
| 990 | root_tag_clear(root, tag); | 1319 | root_tag_clear(root, tag); |
| 991 | } | 1320 | } |
| 992 | 1321 | ||
| 1322 | static void node_tag_set(struct radix_tree_root *root, | ||
| 1323 | struct radix_tree_node *node, | ||
| 1324 | unsigned int tag, unsigned int offset) | ||
| 1325 | { | ||
| 1326 | while (node) { | ||
| 1327 | if (tag_get(node, tag, offset)) | ||
| 1328 | return; | ||
| 1329 | tag_set(node, tag, offset); | ||
| 1330 | offset = node->offset; | ||
| 1331 | node = node->parent; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | if (!root_tag_get(root, tag)) | ||
| 1335 | root_tag_set(root, tag); | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | /** | ||
| 1339 | * radix_tree_iter_tag_set - set a tag on the current iterator entry | ||
| 1340 | * @root: radix tree root | ||
| 1341 | * @iter: iterator state | ||
| 1342 | * @tag: tag to set | ||
| 1343 | */ | ||
| 1344 | void radix_tree_iter_tag_set(struct radix_tree_root *root, | ||
| 1345 | const struct radix_tree_iter *iter, unsigned int tag) | ||
| 1346 | { | ||
| 1347 | node_tag_set(root, iter->node, tag, iter_offset(iter)); | ||
| 1348 | } | ||
| 1349 | |||
| 993 | /** | 1350 | /** |
| 994 | * radix_tree_tag_clear - clear a tag on a radix tree node | 1351 | * radix_tree_tag_clear - clear a tag on a radix tree node |
| 995 | * @root: radix tree root | 1352 | * @root: radix tree root |
| @@ -1085,6 +1442,121 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter, | |||
| 1085 | #endif | 1442 | #endif |
| 1086 | } | 1443 | } |
| 1087 | 1444 | ||
| 1445 | /* Construct iter->tags bit-mask from node->tags[tag] array */ | ||
| 1446 | static void set_iter_tags(struct radix_tree_iter *iter, | ||
| 1447 | struct radix_tree_node *node, unsigned offset, | ||
| 1448 | unsigned tag) | ||
| 1449 | { | ||
| 1450 | unsigned tag_long = offset / BITS_PER_LONG; | ||
| 1451 | unsigned tag_bit = offset % BITS_PER_LONG; | ||
| 1452 | |||
| 1453 | iter->tags = node->tags[tag][tag_long] >> tag_bit; | ||
| 1454 | |||
| 1455 | /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ | ||
| 1456 | if (tag_long < RADIX_TREE_TAG_LONGS - 1) { | ||
| 1457 | /* Pick tags from next element */ | ||
| 1458 | if (tag_bit) | ||
| 1459 | iter->tags |= node->tags[tag][tag_long + 1] << | ||
| 1460 | (BITS_PER_LONG - tag_bit); | ||
| 1461 | /* Clip chunk size, here only BITS_PER_LONG tags */ | ||
| 1462 | iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); | ||
| 1463 | } | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 1467 | static void **skip_siblings(struct radix_tree_node **nodep, | ||
| 1468 | void **slot, struct radix_tree_iter *iter) | ||
| 1469 | { | ||
| 1470 | void *sib = node_to_entry(slot - 1); | ||
| 1471 | |||
| 1472 | while (iter->index < iter->next_index) { | ||
| 1473 | *nodep = rcu_dereference_raw(*slot); | ||
| 1474 | if (*nodep && *nodep != sib) | ||
| 1475 | return slot; | ||
| 1476 | slot++; | ||
| 1477 | iter->index = __radix_tree_iter_add(iter, 1); | ||
| 1478 | iter->tags >>= 1; | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | *nodep = NULL; | ||
| 1482 | return NULL; | ||
| 1483 | } | ||
| 1484 | |||
| 1485 | void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, | ||
| 1486 | unsigned flags) | ||
| 1487 | { | ||
| 1488 | unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; | ||
| 1489 | struct radix_tree_node *node = rcu_dereference_raw(*slot); | ||
| 1490 | |||
| 1491 | slot = skip_siblings(&node, slot, iter); | ||
| 1492 | |||
| 1493 | while (radix_tree_is_internal_node(node)) { | ||
| 1494 | unsigned offset; | ||
| 1495 | unsigned long next_index; | ||
| 1496 | |||
| 1497 | if (node == RADIX_TREE_RETRY) | ||
| 1498 | return slot; | ||
| 1499 | node = entry_to_node(node); | ||
| 1500 | iter->node = node; | ||
| 1501 | iter->shift = node->shift; | ||
| 1502 | |||
| 1503 | if (flags & RADIX_TREE_ITER_TAGGED) { | ||
| 1504 | offset = radix_tree_find_next_bit(node, tag, 0); | ||
| 1505 | if (offset == RADIX_TREE_MAP_SIZE) | ||
| 1506 | return NULL; | ||
| 1507 | slot = &node->slots[offset]; | ||
| 1508 | iter->index = __radix_tree_iter_add(iter, offset); | ||
| 1509 | set_iter_tags(iter, node, offset, tag); | ||
| 1510 | node = rcu_dereference_raw(*slot); | ||
| 1511 | } else { | ||
| 1512 | offset = 0; | ||
| 1513 | slot = &node->slots[0]; | ||
| 1514 | for (;;) { | ||
| 1515 | node = rcu_dereference_raw(*slot); | ||
| 1516 | if (node) | ||
| 1517 | break; | ||
| 1518 | slot++; | ||
| 1519 | offset++; | ||
| 1520 | if (offset == RADIX_TREE_MAP_SIZE) | ||
| 1521 | return NULL; | ||
| 1522 | } | ||
| 1523 | iter->index = __radix_tree_iter_add(iter, offset); | ||
| 1524 | } | ||
| 1525 | if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0)) | ||
| 1526 | goto none; | ||
| 1527 | next_index = (iter->index | shift_maxindex(iter->shift)) + 1; | ||
| 1528 | if (next_index < iter->next_index) | ||
| 1529 | iter->next_index = next_index; | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | return slot; | ||
| 1533 | none: | ||
| 1534 | iter->next_index = 0; | ||
| 1535 | return NULL; | ||
| 1536 | } | ||
| 1537 | EXPORT_SYMBOL(__radix_tree_next_slot); | ||
| 1538 | #else | ||
| 1539 | static void **skip_siblings(struct radix_tree_node **nodep, | ||
| 1540 | void **slot, struct radix_tree_iter *iter) | ||
| 1541 | { | ||
| 1542 | return slot; | ||
| 1543 | } | ||
| 1544 | #endif | ||
| 1545 | |||
| 1546 | void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter) | ||
| 1547 | { | ||
| 1548 | struct radix_tree_node *node; | ||
| 1549 | |||
| 1550 | slot++; | ||
| 1551 | iter->index = __radix_tree_iter_add(iter, 1); | ||
| 1552 | node = rcu_dereference_raw(*slot); | ||
| 1553 | skip_siblings(&node, slot, iter); | ||
| 1554 | iter->next_index = iter->index; | ||
| 1555 | iter->tags = 0; | ||
| 1556 | return NULL; | ||
| 1557 | } | ||
| 1558 | EXPORT_SYMBOL(radix_tree_iter_resume); | ||
| 1559 | |||
| 1088 | /** | 1560 | /** |
| 1089 | * radix_tree_next_chunk - find next chunk of slots for iteration | 1561 | * radix_tree_next_chunk - find next chunk of slots for iteration |
| 1090 | * | 1562 | * |
| @@ -1110,7 +1582,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 1110 | * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. | 1582 | * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. |
| 1111 | * | 1583 | * |
| 1112 | * This condition also used by radix_tree_next_slot() to stop | 1584 | * This condition also used by radix_tree_next_slot() to stop |
| 1113 | * contiguous iterating, and forbid swithing to the next chunk. | 1585 | * contiguous iterating, and forbid switching to the next chunk. |
| 1114 | */ | 1586 | */ |
| 1115 | index = iter->next_index; | 1587 | index = iter->next_index; |
| 1116 | if (!index && iter->index) | 1588 | if (!index && iter->index) |
| @@ -1128,6 +1600,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 1128 | iter->index = index; | 1600 | iter->index = index; |
| 1129 | iter->next_index = maxindex + 1; | 1601 | iter->next_index = maxindex + 1; |
| 1130 | iter->tags = 1; | 1602 | iter->tags = 1; |
| 1603 | iter->node = NULL; | ||
| 1131 | __set_iter_shift(iter, 0); | 1604 | __set_iter_shift(iter, 0); |
| 1132 | return (void **)&root->rnode; | 1605 | return (void **)&root->rnode; |
| 1133 | } | 1606 | } |
| @@ -1143,9 +1616,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 1143 | return NULL; | 1616 | return NULL; |
| 1144 | 1617 | ||
| 1145 | if (flags & RADIX_TREE_ITER_TAGGED) | 1618 | if (flags & RADIX_TREE_ITER_TAGGED) |
| 1146 | offset = radix_tree_find_next_bit( | 1619 | offset = radix_tree_find_next_bit(node, tag, |
| 1147 | node->tags[tag], | ||
| 1148 | RADIX_TREE_MAP_SIZE, | ||
| 1149 | offset + 1); | 1620 | offset + 1); |
| 1150 | else | 1621 | else |
| 1151 | while (++offset < RADIX_TREE_MAP_SIZE) { | 1622 | while (++offset < RADIX_TREE_MAP_SIZE) { |
| @@ -1165,154 +1636,26 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 1165 | child = rcu_dereference_raw(node->slots[offset]); | 1636 | child = rcu_dereference_raw(node->slots[offset]); |
| 1166 | } | 1637 | } |
| 1167 | 1638 | ||
| 1168 | if ((child == NULL) || (child == RADIX_TREE_RETRY)) | 1639 | if (!child) |
| 1169 | goto restart; | 1640 | goto restart; |
| 1641 | if (child == RADIX_TREE_RETRY) | ||
| 1642 | break; | ||
| 1170 | } while (radix_tree_is_internal_node(child)); | 1643 | } while (radix_tree_is_internal_node(child)); |
| 1171 | 1644 | ||
| 1172 | /* Update the iterator state */ | 1645 | /* Update the iterator state */ |
| 1173 | iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); | 1646 | iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); |
| 1174 | iter->next_index = (index | node_maxindex(node)) + 1; | 1647 | iter->next_index = (index | node_maxindex(node)) + 1; |
| 1648 | iter->node = node; | ||
| 1175 | __set_iter_shift(iter, node->shift); | 1649 | __set_iter_shift(iter, node->shift); |
| 1176 | 1650 | ||
| 1177 | /* Construct iter->tags bit-mask from node->tags[tag] array */ | 1651 | if (flags & RADIX_TREE_ITER_TAGGED) |
| 1178 | if (flags & RADIX_TREE_ITER_TAGGED) { | 1652 | set_iter_tags(iter, node, offset, tag); |
| 1179 | unsigned tag_long, tag_bit; | ||
| 1180 | |||
| 1181 | tag_long = offset / BITS_PER_LONG; | ||
| 1182 | tag_bit = offset % BITS_PER_LONG; | ||
| 1183 | iter->tags = node->tags[tag][tag_long] >> tag_bit; | ||
| 1184 | /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ | ||
| 1185 | if (tag_long < RADIX_TREE_TAG_LONGS - 1) { | ||
| 1186 | /* Pick tags from next element */ | ||
| 1187 | if (tag_bit) | ||
| 1188 | iter->tags |= node->tags[tag][tag_long + 1] << | ||
| 1189 | (BITS_PER_LONG - tag_bit); | ||
| 1190 | /* Clip chunk size, here only BITS_PER_LONG tags */ | ||
| 1191 | iter->next_index = index + BITS_PER_LONG; | ||
| 1192 | } | ||
| 1193 | } | ||
| 1194 | 1653 | ||
| 1195 | return node->slots + offset; | 1654 | return node->slots + offset; |
| 1196 | } | 1655 | } |
| 1197 | EXPORT_SYMBOL(radix_tree_next_chunk); | 1656 | EXPORT_SYMBOL(radix_tree_next_chunk); |
| 1198 | 1657 | ||
| 1199 | /** | 1658 | /** |
| 1200 | * radix_tree_range_tag_if_tagged - for each item in given range set given | ||
| 1201 | * tag if item has another tag set | ||
| 1202 | * @root: radix tree root | ||
| 1203 | * @first_indexp: pointer to a starting index of a range to scan | ||
| 1204 | * @last_index: last index of a range to scan | ||
| 1205 | * @nr_to_tag: maximum number items to tag | ||
| 1206 | * @iftag: tag index to test | ||
| 1207 | * @settag: tag index to set if tested tag is set | ||
| 1208 | * | ||
| 1209 | * This function scans range of radix tree from first_index to last_index | ||
| 1210 | * (inclusive). For each item in the range if iftag is set, the function sets | ||
| 1211 | * also settag. The function stops either after tagging nr_to_tag items or | ||
| 1212 | * after reaching last_index. | ||
| 1213 | * | ||
| 1214 | * The tags must be set from the leaf level only and propagated back up the | ||
| 1215 | * path to the root. We must do this so that we resolve the full path before | ||
| 1216 | * setting any tags on intermediate nodes. If we set tags as we descend, then | ||
| 1217 | * we can get to the leaf node and find that the index that has the iftag | ||
| 1218 | * set is outside the range we are scanning. This reults in dangling tags and | ||
| 1219 | * can lead to problems with later tag operations (e.g. livelocks on lookups). | ||
| 1220 | * | ||
| 1221 | * The function returns the number of leaves where the tag was set and sets | ||
| 1222 | * *first_indexp to the first unscanned index. | ||
| 1223 | * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must | ||
| 1224 | * be prepared to handle that. | ||
| 1225 | */ | ||
| 1226 | unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | ||
| 1227 | unsigned long *first_indexp, unsigned long last_index, | ||
| 1228 | unsigned long nr_to_tag, | ||
| 1229 | unsigned int iftag, unsigned int settag) | ||
| 1230 | { | ||
| 1231 | struct radix_tree_node *parent, *node, *child; | ||
| 1232 | unsigned long maxindex; | ||
| 1233 | unsigned long tagged = 0; | ||
| 1234 | unsigned long index = *first_indexp; | ||
| 1235 | |||
| 1236 | radix_tree_load_root(root, &child, &maxindex); | ||
| 1237 | last_index = min(last_index, maxindex); | ||
| 1238 | if (index > last_index) | ||
| 1239 | return 0; | ||
| 1240 | if (!nr_to_tag) | ||
| 1241 | return 0; | ||
| 1242 | if (!root_tag_get(root, iftag)) { | ||
| 1243 | *first_indexp = last_index + 1; | ||
| 1244 | return 0; | ||
| 1245 | } | ||
| 1246 | if (!radix_tree_is_internal_node(child)) { | ||
| 1247 | *first_indexp = last_index + 1; | ||
| 1248 | root_tag_set(root, settag); | ||
| 1249 | return 1; | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | node = entry_to_node(child); | ||
| 1253 | |||
| 1254 | for (;;) { | ||
| 1255 | unsigned offset = radix_tree_descend(node, &child, index); | ||
| 1256 | if (!child) | ||
| 1257 | goto next; | ||
| 1258 | if (!tag_get(node, iftag, offset)) | ||
| 1259 | goto next; | ||
| 1260 | /* Sibling slots never have tags set on them */ | ||
| 1261 | if (radix_tree_is_internal_node(child)) { | ||
| 1262 | node = entry_to_node(child); | ||
| 1263 | continue; | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | /* tag the leaf */ | ||
| 1267 | tagged++; | ||
| 1268 | tag_set(node, settag, offset); | ||
| 1269 | |||
| 1270 | /* walk back up the path tagging interior nodes */ | ||
| 1271 | parent = node; | ||
| 1272 | for (;;) { | ||
| 1273 | offset = parent->offset; | ||
| 1274 | parent = parent->parent; | ||
| 1275 | if (!parent) | ||
| 1276 | break; | ||
| 1277 | /* stop if we find a node with the tag already set */ | ||
| 1278 | if (tag_get(parent, settag, offset)) | ||
| 1279 | break; | ||
| 1280 | tag_set(parent, settag, offset); | ||
| 1281 | } | ||
| 1282 | next: | ||
| 1283 | /* Go to next entry in node */ | ||
| 1284 | index = ((index >> node->shift) + 1) << node->shift; | ||
| 1285 | /* Overflow can happen when last_index is ~0UL... */ | ||
| 1286 | if (index > last_index || !index) | ||
| 1287 | break; | ||
| 1288 | offset = (index >> node->shift) & RADIX_TREE_MAP_MASK; | ||
| 1289 | while (offset == 0) { | ||
| 1290 | /* | ||
| 1291 | * We've fully scanned this node. Go up. Because | ||
| 1292 | * last_index is guaranteed to be in the tree, what | ||
| 1293 | * we do below cannot wander astray. | ||
| 1294 | */ | ||
| 1295 | node = node->parent; | ||
| 1296 | offset = (index >> node->shift) & RADIX_TREE_MAP_MASK; | ||
| 1297 | } | ||
| 1298 | if (is_sibling_entry(node, node->slots[offset])) | ||
| 1299 | goto next; | ||
| 1300 | if (tagged >= nr_to_tag) | ||
| 1301 | break; | ||
| 1302 | } | ||
| 1303 | /* | ||
| 1304 | * We need not to tag the root tag if there is no tag which is set with | ||
| 1305 | * settag within the range from *first_indexp to last_index. | ||
| 1306 | */ | ||
| 1307 | if (tagged > 0) | ||
| 1308 | root_tag_set(root, settag); | ||
| 1309 | *first_indexp = index; | ||
| 1310 | |||
| 1311 | return tagged; | ||
| 1312 | } | ||
| 1313 | EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); | ||
| 1314 | |||
| 1315 | /** | ||
| 1316 | * radix_tree_gang_lookup - perform multiple lookup on a radix tree | 1659 | * radix_tree_gang_lookup - perform multiple lookup on a radix tree |
| 1317 | * @root: radix tree root | 1660 | * @root: radix tree root |
| 1318 | * @results: where the results of the lookup are placed | 1661 | * @results: where the results of the lookup are placed |
| @@ -1477,105 +1820,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, | |||
| 1477 | } | 1820 | } |
| 1478 | EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); | 1821 | EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); |
| 1479 | 1822 | ||
| 1480 | #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) | ||
| 1481 | #include <linux/sched.h> /* for cond_resched() */ | ||
| 1482 | |||
| 1483 | struct locate_info { | ||
| 1484 | unsigned long found_index; | ||
| 1485 | bool stop; | ||
| 1486 | }; | ||
| 1487 | |||
| 1488 | /* | ||
| 1489 | * This linear search is at present only useful to shmem_unuse_inode(). | ||
| 1490 | */ | ||
| 1491 | static unsigned long __locate(struct radix_tree_node *slot, void *item, | ||
| 1492 | unsigned long index, struct locate_info *info) | ||
| 1493 | { | ||
| 1494 | unsigned long i; | ||
| 1495 | |||
| 1496 | do { | ||
| 1497 | unsigned int shift = slot->shift; | ||
| 1498 | |||
| 1499 | for (i = (index >> shift) & RADIX_TREE_MAP_MASK; | ||
| 1500 | i < RADIX_TREE_MAP_SIZE; | ||
| 1501 | i++, index += (1UL << shift)) { | ||
| 1502 | struct radix_tree_node *node = | ||
| 1503 | rcu_dereference_raw(slot->slots[i]); | ||
| 1504 | if (node == RADIX_TREE_RETRY) | ||
| 1505 | goto out; | ||
| 1506 | if (!radix_tree_is_internal_node(node)) { | ||
| 1507 | if (node == item) { | ||
| 1508 | info->found_index = index; | ||
| 1509 | info->stop = true; | ||
| 1510 | goto out; | ||
| 1511 | } | ||
| 1512 | continue; | ||
| 1513 | } | ||
| 1514 | node = entry_to_node(node); | ||
| 1515 | if (is_sibling_entry(slot, node)) | ||
| 1516 | continue; | ||
| 1517 | slot = node; | ||
| 1518 | break; | ||
| 1519 | } | ||
| 1520 | } while (i < RADIX_TREE_MAP_SIZE); | ||
| 1521 | |||
| 1522 | out: | ||
| 1523 | if ((index == 0) && (i == RADIX_TREE_MAP_SIZE)) | ||
| 1524 | info->stop = true; | ||
| 1525 | return index; | ||
| 1526 | } | ||
| 1527 | |||
| 1528 | /** | ||
| 1529 | * radix_tree_locate_item - search through radix tree for item | ||
| 1530 | * @root: radix tree root | ||
| 1531 | * @item: item to be found | ||
| 1532 | * | ||
| 1533 | * Returns index where item was found, or -1 if not found. | ||
| 1534 | * Caller must hold no lock (since this time-consuming function needs | ||
| 1535 | * to be preemptible), and must check afterwards if item is still there. | ||
| 1536 | */ | ||
| 1537 | unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) | ||
| 1538 | { | ||
| 1539 | struct radix_tree_node *node; | ||
| 1540 | unsigned long max_index; | ||
| 1541 | unsigned long cur_index = 0; | ||
| 1542 | struct locate_info info = { | ||
| 1543 | .found_index = -1, | ||
| 1544 | .stop = false, | ||
| 1545 | }; | ||
| 1546 | |||
| 1547 | do { | ||
| 1548 | rcu_read_lock(); | ||
| 1549 | node = rcu_dereference_raw(root->rnode); | ||
| 1550 | if (!radix_tree_is_internal_node(node)) { | ||
| 1551 | rcu_read_unlock(); | ||
| 1552 | if (node == item) | ||
| 1553 | info.found_index = 0; | ||
| 1554 | break; | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | node = entry_to_node(node); | ||
| 1558 | |||
| 1559 | max_index = node_maxindex(node); | ||
| 1560 | if (cur_index > max_index) { | ||
| 1561 | rcu_read_unlock(); | ||
| 1562 | break; | ||
| 1563 | } | ||
| 1564 | |||
| 1565 | cur_index = __locate(node, item, cur_index, &info); | ||
| 1566 | rcu_read_unlock(); | ||
| 1567 | cond_resched(); | ||
| 1568 | } while (!info.stop && cur_index <= max_index); | ||
| 1569 | |||
| 1570 | return info.found_index; | ||
| 1571 | } | ||
| 1572 | #else | ||
| 1573 | unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) | ||
| 1574 | { | ||
| 1575 | return -1; | ||
| 1576 | } | ||
| 1577 | #endif /* CONFIG_SHMEM && CONFIG_SWAP */ | ||
| 1578 | |||
| 1579 | /** | 1823 | /** |
| 1580 | * __radix_tree_delete_node - try to free node after clearing a slot | 1824 | * __radix_tree_delete_node - try to free node after clearing a slot |
| 1581 | * @root: radix tree root | 1825 | * @root: radix tree root |
| @@ -1591,20 +1835,6 @@ void __radix_tree_delete_node(struct radix_tree_root *root, | |||
| 1591 | delete_node(root, node, NULL, NULL); | 1835 | delete_node(root, node, NULL, NULL); |
| 1592 | } | 1836 | } |
| 1593 | 1837 | ||
| 1594 | static inline void delete_sibling_entries(struct radix_tree_node *node, | ||
| 1595 | void *ptr, unsigned offset) | ||
| 1596 | { | ||
| 1597 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 1598 | int i; | ||
| 1599 | for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { | ||
| 1600 | if (node->slots[offset + i] != ptr) | ||
| 1601 | break; | ||
| 1602 | node->slots[offset + i] = NULL; | ||
| 1603 | node->count--; | ||
| 1604 | } | ||
| 1605 | #endif | ||
| 1606 | } | ||
| 1607 | |||
| 1608 | /** | 1838 | /** |
| 1609 | * radix_tree_delete_item - delete an item from a radix tree | 1839 | * radix_tree_delete_item - delete an item from a radix tree |
| 1610 | * @root: radix tree root | 1840 | * @root: radix tree root |
| @@ -1644,7 +1874,6 @@ void *radix_tree_delete_item(struct radix_tree_root *root, | |||
| 1644 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | 1874 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
| 1645 | node_tag_clear(root, node, tag, offset); | 1875 | node_tag_clear(root, node, tag, offset); |
| 1646 | 1876 | ||
| 1647 | delete_sibling_entries(node, node_to_entry(slot), offset); | ||
| 1648 | __radix_tree_replace(root, node, slot, NULL, NULL, NULL); | 1877 | __radix_tree_replace(root, node, slot, NULL, NULL, NULL); |
| 1649 | 1878 | ||
| 1650 | return entry; | 1879 | return entry; |
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index 76734004358d..20bca3d44f67 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c | |||
| @@ -87,9 +87,57 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 87 | kernel_fpu_end(); | 87 | kernel_fpu_end(); |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | static void raid6_avx21_xor_syndrome(int disks, int start, int stop, | ||
| 91 | size_t bytes, void **ptrs) | ||
| 92 | { | ||
| 93 | u8 **dptr = (u8 **)ptrs; | ||
| 94 | u8 *p, *q; | ||
| 95 | int d, z, z0; | ||
| 96 | |||
| 97 | z0 = stop; /* P/Q right side optimization */ | ||
| 98 | p = dptr[disks-2]; /* XOR parity */ | ||
| 99 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 100 | |||
| 101 | kernel_fpu_begin(); | ||
| 102 | |||
| 103 | asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); | ||
| 104 | |||
| 105 | for (d = 0 ; d < bytes ; d += 32) { | ||
| 106 | asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); | ||
| 107 | asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); | ||
| 108 | asm volatile("vpxor %ymm4,%ymm2,%ymm2"); | ||
| 109 | /* P/Q data pages */ | ||
| 110 | for (z = z0-1 ; z >= start ; z--) { | ||
| 111 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 112 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 113 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 114 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 115 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 116 | asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); | ||
| 117 | asm volatile("vpxor %ymm5,%ymm2,%ymm2"); | ||
| 118 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 119 | } | ||
| 120 | /* P/Q left side optimization */ | ||
| 121 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 122 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 123 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 124 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 125 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 126 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 127 | } | ||
| 128 | asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); | ||
| 129 | /* Don't use movntdq for r/w memory area < cache line */ | ||
| 130 | asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d])); | ||
| 131 | asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d])); | ||
| 132 | } | ||
| 133 | |||
| 134 | asm volatile("sfence" : : : "memory"); | ||
| 135 | kernel_fpu_end(); | ||
| 136 | } | ||
| 137 | |||
| 90 | const struct raid6_calls raid6_avx2x1 = { | 138 | const struct raid6_calls raid6_avx2x1 = { |
| 91 | raid6_avx21_gen_syndrome, | 139 | raid6_avx21_gen_syndrome, |
| 92 | NULL, /* XOR not yet implemented */ | 140 | raid6_avx21_xor_syndrome, |
| 93 | raid6_have_avx2, | 141 | raid6_have_avx2, |
| 94 | "avx2x1", | 142 | "avx2x1", |
| 95 | 1 /* Has cache hints */ | 143 | 1 /* Has cache hints */ |
| @@ -149,9 +197,77 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 149 | kernel_fpu_end(); | 197 | kernel_fpu_end(); |
| 150 | } | 198 | } |
| 151 | 199 | ||
| 200 | static void raid6_avx22_xor_syndrome(int disks, int start, int stop, | ||
| 201 | size_t bytes, void **ptrs) | ||
| 202 | { | ||
| 203 | u8 **dptr = (u8 **)ptrs; | ||
| 204 | u8 *p, *q; | ||
| 205 | int d, z, z0; | ||
| 206 | |||
| 207 | z0 = stop; /* P/Q right side optimization */ | ||
| 208 | p = dptr[disks-2]; /* XOR parity */ | ||
| 209 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 210 | |||
| 211 | kernel_fpu_begin(); | ||
| 212 | |||
| 213 | asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); | ||
| 214 | |||
| 215 | for (d = 0 ; d < bytes ; d += 64) { | ||
| 216 | asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); | ||
| 217 | asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32])); | ||
| 218 | asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); | ||
| 219 | asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32])); | ||
| 220 | asm volatile("vpxor %ymm4,%ymm2,%ymm2"); | ||
| 221 | asm volatile("vpxor %ymm6,%ymm3,%ymm3"); | ||
| 222 | /* P/Q data pages */ | ||
| 223 | for (z = z0-1 ; z >= start ; z--) { | ||
| 224 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 225 | asm volatile("vpxor %ymm7,%ymm7,%ymm7"); | ||
| 226 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 227 | asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); | ||
| 228 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 229 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
| 230 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 231 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
| 232 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 233 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 234 | asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); | ||
| 235 | asm volatile("vmovdqa %0,%%ymm7" | ||
| 236 | :: "m" (dptr[z][d+32])); | ||
| 237 | asm volatile("vpxor %ymm5,%ymm2,%ymm2"); | ||
| 238 | asm volatile("vpxor %ymm7,%ymm3,%ymm3"); | ||
| 239 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 240 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 241 | } | ||
| 242 | /* P/Q left side optimization */ | ||
| 243 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 244 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 245 | asm volatile("vpxor %ymm7,%ymm7,%ymm7"); | ||
| 246 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 247 | asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); | ||
| 248 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 249 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
| 250 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 251 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
| 252 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 253 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 254 | } | ||
| 255 | asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); | ||
| 256 | asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32])); | ||
| 257 | /* Don't use movntdq for r/w memory area < cache line */ | ||
| 258 | asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d])); | ||
| 259 | asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32])); | ||
| 260 | asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d])); | ||
| 261 | asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32])); | ||
| 262 | } | ||
| 263 | |||
| 264 | asm volatile("sfence" : : : "memory"); | ||
| 265 | kernel_fpu_end(); | ||
| 266 | } | ||
| 267 | |||
| 152 | const struct raid6_calls raid6_avx2x2 = { | 268 | const struct raid6_calls raid6_avx2x2 = { |
| 153 | raid6_avx22_gen_syndrome, | 269 | raid6_avx22_gen_syndrome, |
| 154 | NULL, /* XOR not yet implemented */ | 270 | raid6_avx22_xor_syndrome, |
| 155 | raid6_have_avx2, | 271 | raid6_have_avx2, |
| 156 | "avx2x2", | 272 | "avx2x2", |
| 157 | 1 /* Has cache hints */ | 273 | 1 /* Has cache hints */ |
| @@ -242,9 +358,119 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) | |||
| 242 | kernel_fpu_end(); | 358 | kernel_fpu_end(); |
| 243 | } | 359 | } |
| 244 | 360 | ||
| 361 | static void raid6_avx24_xor_syndrome(int disks, int start, int stop, | ||
| 362 | size_t bytes, void **ptrs) | ||
| 363 | { | ||
| 364 | u8 **dptr = (u8 **)ptrs; | ||
| 365 | u8 *p, *q; | ||
| 366 | int d, z, z0; | ||
| 367 | |||
| 368 | z0 = stop; /* P/Q right side optimization */ | ||
| 369 | p = dptr[disks-2]; /* XOR parity */ | ||
| 370 | q = dptr[disks-1]; /* RS syndrome */ | ||
| 371 | |||
| 372 | kernel_fpu_begin(); | ||
| 373 | |||
| 374 | asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0])); | ||
| 375 | |||
| 376 | for (d = 0 ; d < bytes ; d += 128) { | ||
| 377 | asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); | ||
| 378 | asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32])); | ||
| 379 | asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64])); | ||
| 380 | asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96])); | ||
| 381 | asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); | ||
| 382 | asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32])); | ||
| 383 | asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64])); | ||
| 384 | asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96])); | ||
| 385 | asm volatile("vpxor %ymm4,%ymm2,%ymm2"); | ||
| 386 | asm volatile("vpxor %ymm6,%ymm3,%ymm3"); | ||
| 387 | asm volatile("vpxor %ymm12,%ymm10,%ymm10"); | ||
| 388 | asm volatile("vpxor %ymm14,%ymm11,%ymm11"); | ||
| 389 | /* P/Q data pages */ | ||
| 390 | for (z = z0-1 ; z >= start ; z--) { | ||
| 391 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); | ||
| 392 | asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64])); | ||
| 393 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 394 | asm volatile("vpxor %ymm7,%ymm7,%ymm7"); | ||
| 395 | asm volatile("vpxor %ymm13,%ymm13,%ymm13"); | ||
| 396 | asm volatile("vpxor %ymm15,%ymm15,%ymm15"); | ||
| 397 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 398 | asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); | ||
| 399 | asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13"); | ||
| 400 | asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15"); | ||
| 401 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 402 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
| 403 | asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); | ||
| 404 | asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); | ||
| 405 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 406 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
| 407 | asm volatile("vpand %ymm0,%ymm13,%ymm13"); | ||
| 408 | asm volatile("vpand %ymm0,%ymm15,%ymm15"); | ||
| 409 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 410 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 411 | asm volatile("vpxor %ymm13,%ymm12,%ymm12"); | ||
| 412 | asm volatile("vpxor %ymm15,%ymm14,%ymm14"); | ||
| 413 | asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); | ||
| 414 | asm volatile("vmovdqa %0,%%ymm7" | ||
| 415 | :: "m" (dptr[z][d+32])); | ||
| 416 | asm volatile("vmovdqa %0,%%ymm13" | ||
| 417 | :: "m" (dptr[z][d+64])); | ||
| 418 | asm volatile("vmovdqa %0,%%ymm15" | ||
| 419 | :: "m" (dptr[z][d+96])); | ||
| 420 | asm volatile("vpxor %ymm5,%ymm2,%ymm2"); | ||
| 421 | asm volatile("vpxor %ymm7,%ymm3,%ymm3"); | ||
| 422 | asm volatile("vpxor %ymm13,%ymm10,%ymm10"); | ||
| 423 | asm volatile("vpxor %ymm15,%ymm11,%ymm11"); | ||
| 424 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 425 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 426 | asm volatile("vpxor %ymm13,%ymm12,%ymm12"); | ||
| 427 | asm volatile("vpxor %ymm15,%ymm14,%ymm14"); | ||
| 428 | } | ||
| 429 | asm volatile("prefetchnta %0" :: "m" (q[d])); | ||
| 430 | asm volatile("prefetchnta %0" :: "m" (q[d+64])); | ||
| 431 | /* P/Q left side optimization */ | ||
| 432 | for (z = start-1 ; z >= 0 ; z--) { | ||
| 433 | asm volatile("vpxor %ymm5,%ymm5,%ymm5"); | ||
| 434 | asm volatile("vpxor %ymm7,%ymm7,%ymm7"); | ||
| 435 | asm volatile("vpxor %ymm13,%ymm13,%ymm13"); | ||
| 436 | asm volatile("vpxor %ymm15,%ymm15,%ymm15"); | ||
| 437 | asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); | ||
| 438 | asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); | ||
| 439 | asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13"); | ||
| 440 | asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15"); | ||
| 441 | asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); | ||
| 442 | asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); | ||
| 443 | asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); | ||
| 444 | asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); | ||
| 445 | asm volatile("vpand %ymm0,%ymm5,%ymm5"); | ||
| 446 | asm volatile("vpand %ymm0,%ymm7,%ymm7"); | ||
| 447 | asm volatile("vpand %ymm0,%ymm13,%ymm13"); | ||
| 448 | asm volatile("vpand %ymm0,%ymm15,%ymm15"); | ||
| 449 | asm volatile("vpxor %ymm5,%ymm4,%ymm4"); | ||
| 450 | asm volatile("vpxor %ymm7,%ymm6,%ymm6"); | ||
| 451 | asm volatile("vpxor %ymm13,%ymm12,%ymm12"); | ||
| 452 | asm volatile("vpxor %ymm15,%ymm14,%ymm14"); | ||
| 453 | } | ||
| 454 | asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); | ||
| 455 | asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); | ||
| 456 | asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); | ||
| 457 | asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); | ||
| 458 | asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); | ||
| 459 | asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32])); | ||
| 460 | asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64])); | ||
| 461 | asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96])); | ||
| 462 | asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); | ||
| 463 | asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); | ||
| 464 | asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); | ||
| 465 | asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); | ||
| 466 | } | ||
| 467 | asm volatile("sfence" : : : "memory"); | ||
| 468 | kernel_fpu_end(); | ||
| 469 | } | ||
| 470 | |||
| 245 | const struct raid6_calls raid6_avx2x4 = { | 471 | const struct raid6_calls raid6_avx2x4 = { |
| 246 | raid6_avx24_gen_syndrome, | 472 | raid6_avx24_gen_syndrome, |
| 247 | NULL, /* XOR not yet implemented */ | 473 | raid6_avx24_xor_syndrome, |
| 248 | raid6_have_avx2, | 474 | raid6_have_avx2, |
| 249 | "avx2x4", | 475 | "avx2x4", |
| 250 | 1 /* Has cache hints */ | 476 | 1 /* Has cache hints */ |
diff --git a/lib/timerqueue.c b/lib/timerqueue.c index 782ae8ca2c06..adc6ee0a5126 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c | |||
| @@ -48,7 +48,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) | |||
| 48 | while (*p) { | 48 | while (*p) { |
| 49 | parent = *p; | 49 | parent = *p; |
| 50 | ptr = rb_entry(parent, struct timerqueue_node, node); | 50 | ptr = rb_entry(parent, struct timerqueue_node, node); |
| 51 | if (node->expires.tv64 < ptr->expires.tv64) | 51 | if (node->expires < ptr->expires) |
| 52 | p = &(*p)->rb_left; | 52 | p = &(*p)->rb_left; |
| 53 | else | 53 | else |
| 54 | p = &(*p)->rb_right; | 54 | p = &(*p)->rb_right; |
| @@ -56,7 +56,7 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) | |||
| 56 | rb_link_node(&node->node, parent, p); | 56 | rb_link_node(&node->node, parent, p); |
| 57 | rb_insert_color(&node->node, &head->head); | 57 | rb_insert_color(&node->node, &head->head); |
| 58 | 58 | ||
| 59 | if (!head->next || node->expires.tv64 < head->next->expires.tv64) { | 59 | if (!head->next || node->expires < head->next->expires) { |
| 60 | head->next = node; | 60 | head->next = node; |
| 61 | return true; | 61 | return true; |
| 62 | } | 62 | } |
