27 files changed, 2752 insertions, 1385 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index b3c8be0da17f..991c98bc4a3f 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -51,13 +51,6 @@ config PERCPU_RWSEM
 config ARCH_USE_CMPXCHG_LOCKREF
        bool
-config CMPXCHG_LOCKREF
-        def_bool y if ARCH_USE_CMPXCHG_LOCKREF
-        depends on SMP
-        depends on !GENERIC_LOCKBREAK
-        depends on !DEBUG_SPINLOCK
-        depends on !DEBUG_LOCK_ALLOC
 config CRC_CCITT
        tristate "CRC-CCITT functions"
        help
@@ -189,6 +182,13 @@ config AUDIT_GENERIC
        depends on AUDIT && !AUDIT_ARCH
        default y
+config RANDOM32_SELFTEST
+        bool "PRNG perform self test on init"
+        default n
+        help
+          This option enables the 32 bit PRNG library functions to perform a
+          self test on initialization.
 #
 # compression support is select'ed if needed
 #
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
 config BTREE
        boolean
+config ASSOCIATIVE_ARRAY
+        bool
+        help
+          Generic associative array.  Can be searched and iterated over whilst
+          it is being modified.  It is also reasonably quick to search and
+          modify.  The algorithms are non-recursive, and the trees are highly
+          capacious.
+          See:
+                Documentation/assoc_array.txt
+          for more information.
 config HAS_IOMEM
        boolean
        depends on !NO_IOMEM
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 06344d986eb9..db25707aa41b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -312,6 +312,15 @@ config MAGIC_SYSRQ
          keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
          unless you really know what this hack does.
+config MAGIC_SYSRQ_DEFAULT_ENABLE
+        hex "Enable magic SysRq key functions by default"
+        depends on MAGIC_SYSRQ
+        default 0x1
+        help
+          Specifies which SysRq key functions are enabled by default.
+          This may be set to 1 or 0 to enable or disable them all, or
+          to a bitmask as described in Documentation/sysrq.txt.
 config DEBUG_KERNEL
        bool "Kernel debugging"
        help
@@ -983,7 +992,7 @@ config DEBUG_KOBJECT
 config DEBUG_KOBJECT_RELEASE
        bool "kobject release debugging"
-        depends on DEBUG_KERNEL
+        depends on DEBUG_OBJECTS_TIMERS
        help
          kobjects are reference counted objects.  This means that their
          last reference count put is not predictable, and the kobject can
@@ -1472,6 +1481,15 @@ config INTERVAL_TREE_TEST
        help
          A benchmark measuring the performance of the interval tree library
+config PERCPU_TEST
+        tristate "Per cpu operations test"
+        depends on m && DEBUG_KERNEL
+        help
+          Enable this option to build test module which validates per-cpu
+          operations.
+          If unsure, say N.
 config ATOMIC64_SELFTEST
        bool "Perform an atomic64_t self-test at boot"
        help
diff --git a/lib/Makefile b/lib/Makefile
index f3bb2cb98adf..a459c31e8c6b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
         sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
         proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
         is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-         earlycpio.o percpu-refcount.o percpu_ida.o
+         earlycpio.o
 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
         bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
         gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
         bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-         percpu_ida.o
+         percpu-refcount.o percpu_ida.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
@@ -42,15 +42,12 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
 obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
-obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
-lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
-lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
-lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
@@ -157,6 +154,8 @@ obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
 interval_tree_test-objs := interval_tree_test_main.o interval_tree.o
+obj-$(CONFIG_PERCPU_TEST) += percpu_test.o
 obj-$(CONFIG_ASN1) += asn1_decoder.o
 obj-$(CONFIG_FONT_SUPPORT) += fonts/
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644
index 000000000000..17edeaf19180
--- /dev/null
+++ b/lib/assoc_array.c
@@ -0,0 +1,1746 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+//#define DEBUG
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/assoc_array_priv.h>
+/*
+ * Iterate over an associative array.  The caller must hold the RCU read lock
+ * or better.
+ */
+static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
+                                       const struct assoc_array_ptr *stop,
+                                       int (*iterator)(const void *leaf,
+                                                       void *iterator_data),
+                                       void *iterator_data)
+{
+        const struct assoc_array_shortcut *shortcut;
+        const struct assoc_array_node *node;
+        const struct assoc_array_ptr *cursor, *ptr, *parent;
+        unsigned long has_meta;
+        int slot, ret;
+        cursor = root;
+begin_node:
+        if (assoc_array_ptr_is_shortcut(cursor)) {
+                /* Descend through a shortcut */
+                shortcut = assoc_array_ptr_to_shortcut(cursor);
+                smp_read_barrier_depends();
+                cursor = ACCESS_ONCE(shortcut->next_node);
+        }
+        node = assoc_array_ptr_to_node(cursor);
+        smp_read_barrier_depends();
+        slot = 0;
+        /* We perform two passes of each node.
+         *
+         * The first pass does all the leaves in this node.  This means we
+         * don't miss any leaves if the node is split up by insertion whilst
+         * we're iterating over the branches rooted here (we may, however, see
+         * some leaves twice).
+         */
+        has_meta = 0;
+        for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                ptr = ACCESS_ONCE(node->slots[slot]);
+                has_meta |= (unsigned long)ptr;
+                if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+                        /* We need a barrier between the read of the pointer
+                         * and dereferencing the pointer - but only if we are
+                         * actually going to dereference it.
+                         */
+                        smp_read_barrier_depends();
+                        /* Invoke the callback */
+                        ret = iterator(assoc_array_ptr_to_leaf(ptr),
+                                       iterator_data);
+                        if (ret)
+                                return ret;
+                }
+        }
+        /* The second pass attends to all the metadata pointers.  If we follow
+         * one of these we may find that we don't come back here, but rather go
+         * back to a replacement node with the leaves in a different layout.
+         *
+         * We are guaranteed to make progress, however, as the slot number for
+         * a particular portion of the key space cannot change - and we
+         * continue at the back pointer + 1.
+         */
+        if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
+                goto finished_node;
+        slot = 0;
+continue_node:
+        node = assoc_array_ptr_to_node(cursor);
+        smp_read_barrier_depends();
+        for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                ptr = ACCESS_ONCE(node->slots[slot]);
+                if (assoc_array_ptr_is_meta(ptr)) {
+                        cursor = ptr;
+                        goto begin_node;
+                }
+        }
+finished_node:
+        /* Move up to the parent (may need to skip back over a shortcut) */
+        parent = ACCESS_ONCE(node->back_pointer);
+        slot = node->parent_slot;
+        if (parent == stop)
+                return 0;
+        if (assoc_array_ptr_is_shortcut(parent)) {
+                shortcut = assoc_array_ptr_to_shortcut(parent);
+                smp_read_barrier_depends();
+                cursor = parent;
+                parent = ACCESS_ONCE(shortcut->back_pointer);
+                slot = shortcut->parent_slot;
+                if (parent == stop)
+                        return 0;
+        }
+        /* Ascend to next slot in parent node */
+        cursor = parent;
+        slot++;
+        goto continue_node;
+}
+/**
+ * assoc_array_iterate - Pass all objects in the array to a callback
+ * @array: The array to iterate over.
+ * @iterator: The callback function.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Iterate over all the objects in an associative array.  Each one will be
+ * presented to the iterator function.
+ *
+ * If the array is being modified concurrently with the iteration then it is
+ * possible that some objects in the array will be passed to the iterator
+ * callback more than once - though every object should be passed at least
+ * once.  If this is undesirable then the caller must lock against modification
+ * for the duration of this function.
+ *
+ * The function will return 0 if no objects were in the array or else it will
+ * return the result of the last iterator function called.  Iteration stops
+ * immediately if any call to the iteration function results in a non-zero
+ * return.
+ *
+ * The caller should hold the RCU read lock or better if concurrent
+ * modification is possible.
+ */
+int assoc_array_iterate(const struct assoc_array *array,
+                        int (*iterator)(const void *object,
+                                        void *iterator_data),
+                        void *iterator_data)
+{
+        struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
+        if (!root)
+                return 0;
+        return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
+}
+enum assoc_array_walk_status {
+        assoc_array_walk_tree_empty,
+        assoc_array_walk_found_terminal_node,
+        assoc_array_walk_found_wrong_shortcut,
+} status;
+struct assoc_array_walk_result {
+        struct {
+                struct assoc_array_node *node;  /* Node in which leaf might be found */
+                int             level;
+                int             slot;
+        } terminal_node;
+        struct {
+                struct assoc_array_shortcut *shortcut;
+                int             level;
+                int             sc_level;
+                unsigned long   sc_segments;
+                unsigned long   dissimilarity;
+        } wrong_shortcut;
+};
+/*
+ * Navigate through the internal tree looking for the closest node to the key.
+ */
+static enum assoc_array_walk_status
+assoc_array_walk(const struct assoc_array *array,
+                 const struct assoc_array_ops *ops,
+                 const void *index_key,
+                 struct assoc_array_walk_result *result)
+{
+        struct assoc_array_shortcut *shortcut;
+        struct assoc_array_node *node;
+        struct assoc_array_ptr *cursor, *ptr;
+        unsigned long sc_segments, dissimilarity;
+        unsigned long segments;
+        int level, sc_level, next_sc_level;
+        int slot;
+        pr_devel("-->%s()\n", __func__);
+        cursor = ACCESS_ONCE(array->root);
+        if (!cursor)
+                return assoc_array_walk_tree_empty;
+        level = 0;
+        /* Use segments from the key for the new leaf to navigate through the
+         * internal tree, skipping through nodes and shortcuts that are on
+         * route to the destination.  Eventually we'll come to a slot that is
+         * either empty or contains a leaf at which point we've found a node in
+         * which the leaf we're looking for might be found or into which it
+         * should be inserted.
+         */
+jumped:
+        segments = ops->get_key_chunk(index_key, level);
+        pr_devel("segments[%d]: %lx\n", level, segments);
+        if (assoc_array_ptr_is_shortcut(cursor))
+                goto follow_shortcut;
+consider_node:
+        node = assoc_array_ptr_to_node(cursor);
+        smp_read_barrier_depends();
+        slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+        slot &= ASSOC_ARRAY_FAN_MASK;
+        ptr = ACCESS_ONCE(node->slots[slot]);
+        pr_devel("consider slot %x [ix=%d type=%lu]\n",
+                 slot, level, (unsigned long)ptr & 3);
+        if (!assoc_array_ptr_is_meta(ptr)) {
+                /* The node doesn't have a node/shortcut pointer in the slot
+                 * corresponding to the index key that we have to follow.
+                 */
+                result->terminal_node.node = node;
+                result->terminal_node.level = level;
+                result->terminal_node.slot = slot;
+                pr_devel("<--%s() = terminal_node\n", __func__);
+                return assoc_array_walk_found_terminal_node;
+        }
+        if (assoc_array_ptr_is_node(ptr)) {
+                /* There is a pointer to a node in the slot corresponding to
+                 * this index key segment, so we need to follow it.
+                 */
+                cursor = ptr;
+                level += ASSOC_ARRAY_LEVEL_STEP;
+                if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
+                        goto consider_node;
+                goto jumped;
+        }
+        /* There is a shortcut in the slot corresponding to the index key
+         * segment.  We follow the shortcut if its partial index key matches
+         * this leaf's.  Otherwise we need to split the shortcut.
+         */
+        cursor = ptr;
+follow_shortcut:
+        shortcut = assoc_array_ptr_to_shortcut(cursor);
+        smp_read_barrier_depends();
+        pr_devel("shortcut to %d\n", shortcut->skip_to_level);
+        sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
+        BUG_ON(sc_level > shortcut->skip_to_level);
+        do {
+                /* Check the leaf against the shortcut's index key a word at a
+                 * time, trimming the final word (the shortcut stores the index
+                 * key completely from the root to the shortcut's target).
+                 */
+                if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
+                        segments = ops->get_key_chunk(index_key, sc_level);
+                sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
+                dissimilarity = segments ^ sc_segments;
+                if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
+                        /* Trim segments that are beyond the shortcut */
+                        int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+                        dissimilarity &= ~(ULONG_MAX << shift);
+                        next_sc_level = shortcut->skip_to_level;
+                } else {
+                        next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
+                        next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+                }
+                if (dissimilarity != 0) {
+                        /* This shortcut points elsewhere */
+                        result->wrong_shortcut.shortcut = shortcut;
+                        result->wrong_shortcut.level = level;
+                        result->wrong_shortcut.sc_level = sc_level;
+                        result->wrong_shortcut.sc_segments = sc_segments;
+                        result->wrong_shortcut.dissimilarity = dissimilarity;
+                        return assoc_array_walk_found_wrong_shortcut;
+                }
+                sc_level = next_sc_level;
+        } while (sc_level < shortcut->skip_to_level);
+        /* The shortcut matches the leaf's index to this point. */
+        cursor = ACCESS_ONCE(shortcut->next_node);
+        if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
+                level = sc_level;
+                goto jumped;
+        } else {
+                level = sc_level;
+                goto consider_node;
+        }
+}
+/**
+ * assoc_array_find - Find an object by index key
+ * @array: The associative array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Find an object in an associative array by walking through the internal tree
+ * to the node that should contain the object and then searching the leaves
+ * there.  NULL is returned if the requested object was not found in the array.
+ *
+ * The caller must hold the RCU read lock or better.
+ */
+void *assoc_array_find(const struct assoc_array *array,
+                       const struct assoc_array_ops *ops,
+                       const void *index_key)
+{
+        struct assoc_array_walk_result result;
+        const struct assoc_array_node *node;
+        const struct assoc_array_ptr *ptr;
+        const void *leaf;
+        int slot;
+        if (assoc_array_walk(array, ops, index_key, &result) !=
+            assoc_array_walk_found_terminal_node)
+                return NULL;
+        node = result.terminal_node.node;
+        smp_read_barrier_depends();
+        /* If the target key is available to us, it's has to be pointed to by
+         * the terminal node.
+         */
+        for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                ptr = ACCESS_ONCE(node->slots[slot]);
+                if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+                        /* We need a barrier between the read of the pointer
+                         * and dereferencing the pointer - but only if we are
+                         * actually going to dereference it.
+                         */
+                        leaf = assoc_array_ptr_to_leaf(ptr);
+                        smp_read_barrier_depends();
+                        if (ops->compare_object(leaf, index_key))
+                                return (void *)leaf;
+                }
+        }
+        return NULL;
+}
+/*
+ * Destructively iterate over an associative array.  The caller must prevent
+ * other simultaneous accesses.
+ */
+static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
+                                        const struct assoc_array_ops *ops)
+{
+        struct assoc_array_shortcut *shortcut;
+        struct assoc_array_node *node;
+        struct assoc_array_ptr *cursor, *parent = NULL;
+        int slot = -1;
+        pr_devel("-->%s()\n", __func__);
+        cursor = root;
+        if (!cursor) {
+                pr_devel("empty\n");
+                return;
+        }
+move_to_meta:
+        if (assoc_array_ptr_is_shortcut(cursor)) {
+                /* Descend through a shortcut */
+                pr_devel("[%d] shortcut\n", slot);
+                BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
+                shortcut = assoc_array_ptr_to_shortcut(cursor);
+                BUG_ON(shortcut->back_pointer != parent);
+                BUG_ON(slot != -1 && shortcut->parent_slot != slot);
+                parent = cursor;
+                cursor = shortcut->next_node;
+                slot = -1;
+                BUG_ON(!assoc_array_ptr_is_node(cursor));
+        }
+        pr_devel("[%d] node\n", slot);
+        node = assoc_array_ptr_to_node(cursor);
+        BUG_ON(node->back_pointer != parent);
+        BUG_ON(slot != -1 && node->parent_slot != slot);
+        slot = 0;
+continue_node:
+        pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
+        for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                struct assoc_array_ptr *ptr = node->slots[slot];
+                if (!ptr)
+                        continue;
+                if (assoc_array_ptr_is_meta(ptr)) {
+                        parent = cursor;
+                        cursor = ptr;
+                        goto move_to_meta;
+                }
+                if (ops) {
+                        pr_devel("[%d] free leaf\n", slot);
+                        ops->free_object(assoc_array_ptr_to_leaf(ptr));
+                }
+        }
+        parent = node->back_pointer;
+        slot = node->parent_slot;
+        pr_devel("free node\n");
+        kfree(node);
+        if (!parent)
+                return; /* Done */
+        /* Move back up to the parent (may need to free a shortcut on
+         * the way up) */
+        if (assoc_array_ptr_is_shortcut(parent)) {
+                shortcut = assoc_array_ptr_to_shortcut(parent);
+                BUG_ON(shortcut->next_node != cursor);
+                cursor = parent;
+                parent = shortcut->back_pointer;
+                slot = shortcut->parent_slot;
+                pr_devel("free shortcut\n");
+                kfree(shortcut);
+                if (!parent)
+                        return;
+                BUG_ON(!assoc_array_ptr_is_node(parent));
+        }
+        /* Ascend to next slot in parent node */
+        pr_devel("ascend to %p[%d]\n", parent, slot);
+        cursor = parent;
+        node = assoc_array_ptr_to_node(cursor);
+        slot++;
+        goto continue_node;
+}
+/**
+ * assoc_array_destroy - Destroy an associative array
+ * @array: The array to destroy.
+ * @ops: The operations to use.
+ *
+ * Discard all metadata and free all objects in an associative array.  The
+ * array will be empty and ready to use again upon completion.  This function
+ * cannot fail.
+ *
+ * The caller must prevent all other accesses whilst this takes place as no
+ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
+ * accesses to continue.  On the other hand, no memory allocation is required.
+ */
+void assoc_array_destroy(struct assoc_array *array,
+                         const struct assoc_array_ops *ops)
+{
+        assoc_array_destroy_subtree(array->root, ops);
+        array->root = NULL;
+}
+/*
+ * Handle insertion into an empty tree.
+ */
+static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
+{
+        struct assoc_array_node *new_n0;
+        pr_devel("-->%s()\n", __func__);
+        new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+        if (!new_n0)
+                return false;
+        edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+        edit->leaf_p = &new_n0->slots[0];
+        edit->adjust_count_on = new_n0;
+        edit->set[0].ptr = &edit->array->root;
+        edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+        pr_devel("<--%s() = ok [no root]\n", __func__);
+        return true;
+}
+/*
+ * Handle insertion into a terminal node.
+ */
+static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
+                                                  const struct assoc_array_ops *ops,
+                                                  const void *index_key,
+                                                  struct assoc_array_walk_result *result)
+{
+        struct assoc_array_shortcut *shortcut, *new_s0;
+        struct assoc_array_node *node, *new_n0, *new_n1, *side;
+        struct assoc_array_ptr *ptr;
+        unsigned long dissimilarity, base_seg, blank;
+        size_t keylen;
+        bool have_meta;
+        int level, diff;
+        int slot, next_slot, free_slot, i, j;
+        node    = result->terminal_node.node;
+        level   = result->terminal_node.level;
+        edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
+        pr_devel("-->%s()\n", __func__);
+        /* We arrived at a node which doesn't have an onward node or shortcut
+         * pointer that we have to follow.  This means that (a) the leaf we
+         * want must go here (either by insertion or replacement) or (b) we
+         * need to split this node and insert in one of the fragments.
+         */
+        free_slot = -1;
+        /* Firstly, we have to check the leaves in this node to see if there's
+         * a matching one we should replace in place.
+         */
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                ptr = node->slots[i];
+                if (!ptr) {
+                        free_slot = i;
+                        continue;
+                }
+                if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+                        pr_devel("replace in slot %d\n", i);
+                        edit->leaf_p = &node->slots[i];
+                        edit->dead_leaf = node->slots[i];
+                        pr_devel("<--%s() = ok [replace]\n", __func__);
+                        return true;
+                }
+        }
+        /* If there is a free slot in this node then we can just insert the
+         * leaf here.
+         */
+        if (free_slot >= 0) {
+                pr_devel("insert in free slot %d\n", free_slot);
+                edit->leaf_p = &node->slots[free_slot];
+                edit->adjust_count_on = node;
+                pr_devel("<--%s() = ok [insert]\n", __func__);
+                return true;
+        }
+        /* The node has no spare slots - so we're either going to have to split
+         * it or insert another node before it.
+         *
+         * Whatever, we're going to need at least two new nodes - so allocate
+         * those now.  We may also need a new shortcut, but we deal with that
+         * when we need it.
+         */
+        new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+        if (!new_n0)
+                return false;
+        edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+        new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+        if (!new_n1)
+                return false;
+        edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
+        /* We need to find out how similar the leaves are. */
+        pr_devel("no spare slots\n");
+        have_meta = false;
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                ptr = node->slots[i];
+                if (assoc_array_ptr_is_meta(ptr)) {
+                        edit->segment_cache[i] = 0xff;
+                        have_meta = true;
+                        continue;
+                }
+                base_seg = ops->get_object_key_chunk(
+                        assoc_array_ptr_to_leaf(ptr), level);
+                base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+                edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+        }
+        if (have_meta) {
+                pr_devel("have meta\n");
+                goto split_node;
+        }
+        /* The node contains only leaves */
+        dissimilarity = 0;
+        base_seg = edit->segment_cache[0];
+        for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
+                dissimilarity |= edit->segment_cache[i] ^ base_seg;
+        pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
+        if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
+                /* The old leaves all cluster in the same slot.  We will need
+                 * to insert a shortcut if the new node wants to cluster with them.
+                 */
+                if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
+                        goto all_leaves_cluster_together;
+                /* Otherwise we can just insert a new node ahead of the old
+                 * one.
+                 */
+                goto present_leaves_cluster_but_not_new_leaf;
+        }
+split_node:
+        pr_devel("split node\n");
+        /* We need to split the current node; we know that the node doesn't
+         * simply contain a full set of leaves that cluster together (it
+         * contains meta pointers and/or non-clustering leaves).
+         *
+         * We need to expel at least two leaves out of a set consisting of the
+         * leaves in the node and the new leaf.
+         *
+         * We need a new node (n0) to replace the current one and a new node to
+         * take the expelled nodes (n1).
+         */
+        edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+        new_n0->back_pointer = node->back_pointer;
+        new_n0->parent_slot = node->parent_slot;
+        new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+        new_n1->parent_slot = -1; /* Need to calculate this */
+do_split_node:
+        pr_devel("do_split_node\n");
+        new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+        new_n1->nr_leaves_on_branch = 0;
+        /* Begin by finding two matching leaves.  There have to be at least two
+         * that match - even if there are meta pointers - because any leaf that
+         * would match a slot with a meta pointer in it must be somewhere
+         * behind that meta pointer and cannot be here.  Further, given N
+         * remaining leaf slots, we now have N+1 leaves to go in them.
+         */
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                slot = edit->segment_cache[i];
+                if (slot != 0xff)
+                        for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
+                                if (edit->segment_cache[j] == slot)
+                                        goto found_slot_for_multiple_occupancy;
+        }
+found_slot_for_multiple_occupancy:
+        pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
+        BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
+        BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
+        BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
+        new_n1->parent_slot = slot;
+        /* Metadata pointers cannot change slot */
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+                if (assoc_array_ptr_is_meta(node->slots[i]))
+                        new_n0->slots[i] = node->slots[i];
+                else
+                        new_n0->slots[i] = NULL;
+        BUG_ON(new_n0->slots[slot] != NULL);
+        new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
+        /* Filter the leaf pointers between the new nodes */
+        free_slot = -1;
+        next_slot = 0;
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                if (assoc_array_ptr_is_meta(node->slots[i]))
+                        continue;
+                if (edit->segment_cache[i] == slot) {
+                        new_n1->slots[next_slot++] = node->slots[i];
+                        new_n1->nr_leaves_on_branch++;
+                } else {
+                        do {
+                                free_slot++;
+                        } while (new_n0->slots[free_slot] != NULL);
+                        new_n0->slots[free_slot] = node->slots[i];
+                }
+        }
+        pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
+        if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
+                do {
+                        free_slot++;
+                } while (new_n0->slots[free_slot] != NULL);
+                edit->leaf_p = &new_n0->slots[free_slot];
+                edit->adjust_count_on = new_n0;
+        } else {
+                edit->leaf_p = &new_n1->slots[next_slot++];
+                edit->adjust_count_on = new_n1;
+        }
+        BUG_ON(next_slot <= 1);
+        edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                if (edit->segment_cache[i] == 0xff) {
+                        ptr = node->slots[i];
+                        BUG_ON(assoc_array_ptr_is_leaf(ptr));
+                        if (assoc_array_ptr_is_node(ptr)) {
+                                side = assoc_array_ptr_to_node(ptr);
+                                edit->set_backpointers[i] = &side->back_pointer;
+                        } else {
+                                shortcut = assoc_array_ptr_to_shortcut(ptr);
+                                edit->set_backpointers[i] = &shortcut->back_pointer;
+                        }
+                }
+        }
+        ptr = node->back_pointer;
+        if (!ptr)
+                edit->set[0].ptr = &edit->array->root;
+        else if (assoc_array_ptr_is_node(ptr))
+                edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
+        else
+                edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
+        edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+        pr_devel("<--%s() = ok [split node]\n", __func__);
+        return true;
+present_leaves_cluster_but_not_new_leaf:
+        /* All the old leaves cluster in the same slot, but the new leaf wants
+         * to go into a different slot, so we create a new node to hold the new
+         * leaf and a pointer to a new node holding all the old leaves.
+         */
+        pr_devel("present leaves cluster but not new leaf\n");
+        new_n0->back_pointer = node->back_pointer;
+        new_n0->parent_slot = node->parent_slot;
+        new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+        new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+        new_n1->parent_slot = edit->segment_cache[0];
+        new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
+        edit->adjust_count_on = new_n0;
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+                new_n1->slots[i] = node->slots[i];
+        new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
+        edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
+        edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
+        edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+        edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+        pr_devel("<--%s() = ok [insert node before]\n", __func__);
+        return true;
+all_leaves_cluster_together:
+        /* All the leaves, new and old, want to cluster together in this node
+         * in the same slot, so we have to replace this node with a shortcut to
+         * skip over the identical parts of the key and then place a pair of
+         * nodes, one inside the other, at the end of the shortcut and
+         * distribute the keys between them.
+         *
+         * Firstly we need to work out where the leaves start diverging as a
+         * bit position into their keys so that we know how big the shortcut
+         * needs to be.
+         *
+         * We only need to make a single pass of N of the N+1 leaves because if
+         * any keys differ between themselves at bit X then at least one of
+         * them must also differ with the base key at bit X or before.
+         */
+        pr_devel("all leaves cluster together\n");
+        diff = INT_MAX;
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
+                                          assoc_array_ptr_to_leaf(node->slots[i]));
+                if (x < diff) {
+                        BUG_ON(x < 0);
+                        diff = x;
+                }
+        }
+        BUG_ON(diff == INT_MAX);
+        BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
+        keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+        keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+        new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                         keylen * sizeof(unsigned long), GFP_KERNEL);
+        if (!new_s0)
+                return false;
+        edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
+        edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+        new_s0->back_pointer = node->back_pointer;
+        new_s0->parent_slot = node->parent_slot;
+        new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+        new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+        new_n0->parent_slot = 0;
+        new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+        new_n1->parent_slot = -1; /* Need to calculate this */
+        new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+        pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
+        BUG_ON(level <= 0);
+        for (i = 0; i < keylen; i++)
+                new_s0->index_key[i] =
+                        ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
+        blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+        pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
+        new_s0->index_key[keylen - 1] &= ~blank;
+        /* This now reduces to a node splitting exercise for which we'll need
+         * to regenerate the disparity table.
+         */
+        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                ptr = node->slots[i];
+                base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
+                                                     level);
+                base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+                edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+        }
+        base_seg = ops->get_key_chunk(index_key, level);
+        base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+        edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
+        goto do_split_node;
+}
+/*
+ * Handle insertion into the middle of a shortcut.
+ */
+static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
+                                            const struct assoc_array_ops *ops,
+                                            struct assoc_array_walk_result *result)
+{
+        struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
+        struct assoc_array_node *node, *new_n0, *side;
+        unsigned long sc_segments, dissimilarity, blank;
+        size_t keylen;
+        int level, sc_level, diff;
+        int sc_slot;
+        shortcut        = result->wrong_shortcut.shortcut;
+        level           = result->wrong_shortcut.level;
+        sc_level        = result->wrong_shortcut.sc_level;
+        sc_segments     = result->wrong_shortcut.sc_segments;
+        dissimilarity   = result->wrong_shortcut.dissimilarity;
+        pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
+                 __func__, level, dissimilarity, sc_level);
+        /* We need to split a shortcut and insert a node between the two
+         * pieces.  Zero-length pieces will be dispensed with entirely.
+         *
+         * First of all, we need to find out in which level the first
+         * difference was.
+         */
+        diff = __ffs(dissimilarity);
+        diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+        diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
+        pr_devel("diff=%d\n", diff);
+        if (!shortcut->back_pointer) {
+                edit->set[0].ptr = &edit->array->root;
+        } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
+                node = assoc_array_ptr_to_node(shortcut->back_pointer);
+                edit->set[0].ptr = &node->slots[shortcut->parent_slot];
+        } else {
+                BUG();
+        }
+        edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
+        /* Create a new node now since we're going to need it anyway */
+        new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+        if (!new_n0)
+                return false;
+        edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+        edit->adjust_count_on = new_n0;
+        /* Insert a new shortcut before the new node if this segment isn't of
+         * zero length - otherwise we just connect the new node directly to the
+         * parent.
+         */
+        level += ASSOC_ARRAY_LEVEL_STEP;
+        if (diff > level) {
+                pr_devel("pre-shortcut %d...%d\n", level, diff);
+                keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+                keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+                new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                                 keylen * sizeof(unsigned long), GFP_KERNEL);
+                if (!new_s0)
+                        return false;
+                edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
+                edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+                new_s0->back_pointer = shortcut->back_pointer;
+                new_s0->parent_slot = shortcut->parent_slot;
+                new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+                new_s0->skip_to_level = diff;
+                new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+                new_n0->parent_slot = 0;
+                memcpy(new_s0->index_key, shortcut->index_key,
+                       keylen * sizeof(unsigned long));
+                blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+                pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
+                new_s0->index_key[keylen - 1] &= ~blank;
+        } else {
+                pr_devel("no pre-shortcut\n");
+                edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+                new_n0->back_pointer = shortcut->back_pointer;
+                new_n0->parent_slot = shortcut->parent_slot;
+        }
+        side = assoc_array_ptr_to_node(shortcut->next_node);
+        new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
+        /* We need to know which slot in the new node is going to take a
+         * metadata pointer.
+         */
+        sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+        sc_slot &= ASSOC_ARRAY_FAN_MASK;
+        pr_devel("new slot %lx >> %d -> %d\n",
+                 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
+        /* Determine whether we need to follow the new node with a replacement
+         * for the current shortcut.  We could in theory reuse the current
+         * shortcut if its parent slot number doesn't change - but that's a
+         * 1-in-16 chance so not worth expending the code upon.
+         */
+        level = diff + ASSOC_ARRAY_LEVEL_STEP;
+        if (level < shortcut->skip_to_level) {
+                pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
+                keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+                keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+                new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                                 keylen * sizeof(unsigned long), GFP_KERNEL);
+                if (!new_s1)
+                        return false;
+                edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
+                new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
+                new_s1->parent_slot = sc_slot;
+                new_s1->next_node = shortcut->next_node;
+                new_s1->skip_to_level = shortcut->skip_to_level;
+                new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
+                memcpy(new_s1->index_key, shortcut->index_key,
+                       keylen * sizeof(unsigned long));
+                edit->set[1].ptr = &side->back_pointer;
+                edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
+        } else {
+                pr_devel("no post-shortcut\n");
+                /* We don't have to replace the pointed-to node as long as we
+                 * use memory barriers to make sure the parent slot number is
+                 * changed before the back pointer (the parent slot number is
+                 * irrelevant to the old parent shortcut).
+                 */
+                new_n0->slots[sc_slot] = shortcut->next_node;
+                edit->set_parent_slot[0].p = &side->parent_slot;
+                edit->set_parent_slot[0].to = sc_slot;
+                edit->set[1].ptr = &side->back_pointer;
+                edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+        }
+        /* Install the new leaf in a spare slot in the new node. */
+        if (sc_slot == 0)
+                edit->leaf_p = &new_n0->slots[1];
+        else
+                edit->leaf_p = &new_n0->slots[0];
+        pr_devel("<--%s() = ok [split shortcut]\n", __func__);
+        return edit;
+}
+/**
+ * assoc_array_insert - Script insertion of an object into an associative array
+ * @array: The array to insert into.
+ * @ops: The operations to use.
+ * @index_key: The key to insert at.
+ * @object: The object to insert.
+ *
+ * Precalculate and preallocate a script for the insertion or replacement of an
+ * object in an associative array.  This results in an edit script that can
+ * either be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+                                            const struct assoc_array_ops *ops,
+                                            const void *index_key,
+                                            void *object)
+{
+        struct assoc_array_walk_result result;
+        struct assoc_array_edit *edit;
+        pr_devel("-->%s()\n", __func__);
+        /* The leaf pointer we're given must not have the bottom bit set as we
+         * use those for type-marking the pointer.  NULL pointers are also not
+         * allowed as they indicate an empty slot but we have to allow them
+         * here as they can be updated later.
+         */
+        BUG_ON(assoc_array_ptr_is_meta(object));
+        edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+        if (!edit)
+                return ERR_PTR(-ENOMEM);
+        edit->array = array;
+        edit->ops = ops;
+        edit->leaf = assoc_array_leaf_to_ptr(object);
+        edit->adjust_count_by = 1;
+        switch (assoc_array_walk(array, ops, index_key, &result)) {
+        case assoc_array_walk_tree_empty:
+                /* Allocate a root node if there isn't one yet */
+                if (!assoc_array_insert_in_empty_tree(edit))
+                        goto enomem;
+                return edit;
+        case assoc_array_walk_found_terminal_node:
+                /* We found a node that doesn't have a node/shortcut pointer in
+                 * the slot corresponding to the index key that we have to
+                 * follow.
+                 */
+                if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
+                                                           &result))
+                        goto enomem;
+                return edit;
+        case assoc_array_walk_found_wrong_shortcut:
+                /* We found a shortcut that didn't match our key in a slot we
+                 * needed to follow.
+                 */
+                if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
+                        goto enomem;
+                return edit;
+        }
+enomem:
+        /* Clean up after an out of memory error */
+        pr_devel("enomem\n");
+        assoc_array_cancel_edit(edit);
+        return ERR_PTR(-ENOMEM);
+}
+/**
+ * assoc_array_insert_set_object - Set the new object pointer in an edit script
+ * @edit: The edit script to modify.
+ * @object: The object pointer to set.
+ *
+ * Change the object to be inserted in an edit script.  The object pointed to
+ * by the old object is not freed.  This must be done prior to applying the
+ * script.
+ */
+void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
+{
+        BUG_ON(!object);
+        edit->leaf = assoc_array_leaf_to_ptr(object);
+}
+struct assoc_array_delete_collapse_context {
+        struct assoc_array_node *node;
+        const void              *skip_leaf;
+        int                     slot;
+};
+/*
+ * Subtree collapse to node iterator.
+ */
+static int assoc_array_delete_collapse_iterator(const void *leaf,
+                                                void *iterator_data)
+{
+        struct assoc_array_delete_collapse_context *collapse = iterator_data;
+        if (leaf == collapse->skip_leaf)
+                return 0;
+        BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
+        collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
+        return 0;
+}
+/**
+ * assoc_array_delete - Script deletion of an object from an associative array
+ * @array: The array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Precalculate and preallocate a script for the deletion of an object from an
+ * associative array.  This results in an edit script that can either be
+ * applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if the object was found,
+ * NULL if the object was not found or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+                                            const struct assoc_array_ops *ops,
+                                            const void *index_key)
+{
+        struct assoc_array_delete_collapse_context collapse;
+        struct assoc_array_walk_result result;
+        struct assoc_array_node *node, *new_n0;
+        struct assoc_array_edit *edit;
+        struct assoc_array_ptr *ptr;
+        bool has_meta;
+        int slot, i;
+        pr_devel("-->%s()\n", __func__);
+        edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+        if (!edit)
+                return ERR_PTR(-ENOMEM);
+        edit->array = array;
+        edit->ops = ops;
+        edit->adjust_count_by = -1;
+        switch (assoc_array_walk(array, ops, index_key, &result)) {
+        case assoc_array_walk_found_terminal_node:
+                /* We found a node that should contain the leaf we've been
+                 * asked to remove - *if* it's in the tree.
+                 */
+                pr_devel("terminal_node\n");
+                node = result.terminal_node.node;
+                for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                        ptr = node->slots[slot];
+                        if (ptr &&
+                            assoc_array_ptr_is_leaf(ptr) &&
+                            ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+                                                index_key))
+                                goto found_leaf;
+                }
+        case assoc_array_walk_tree_empty:
+        case assoc_array_walk_found_wrong_shortcut:
+        default:
+                assoc_array_cancel_edit(edit);
+                pr_devel("not found\n");
+                return NULL;
+        }
+found_leaf:
+        BUG_ON(array->nr_leaves_on_tree <= 0);
+        /* In the simplest form of deletion we just clear the slot and release
+         * the leaf after a suitable interval.
+         */
+        edit->dead_leaf = node->slots[slot];
+        edit->set[0].ptr = &node->slots[slot];
+        edit->set[0].to = NULL;
+        edit->adjust_count_on = node;
+        /* If that concludes erasure of the last leaf, then delete the entire
+         * internal array.
+         */
+        if (array->nr_leaves_on_tree == 1) {
+                edit->set[1].ptr = &array->root;
+                edit->set[1].to = NULL;
+                edit->adjust_count_on = NULL;
+                edit->excised_subtree = array->root;
+                pr_devel("all gone\n");
+                return edit;
+        }
+        /* However, we'd also like to clear up some metadata blocks if we
+         * possibly can.
+         *
+         * We go for a simple algorithm of: if this node has FAN_OUT or fewer
+         * leaves in it, then attempt to collapse it - and attempt to
+         * recursively collapse up the tree.
+         *
+         * We could also try and collapse in partially filled subtrees to take
+         * up space in this node.
+         */
+        if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+                struct assoc_array_node *parent, *grandparent;
+                struct assoc_array_ptr *ptr;
+                /* First of all, we need to know if this node has metadata so
+                 * that we don't try collapsing if all the leaves are already
+                 * here.
+                 */
+                has_meta = false;
+                for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                        ptr = node->slots[i];
+                        if (assoc_array_ptr_is_meta(ptr)) {
+                                has_meta = true;
+                                break;
+                        }
+                }
+                pr_devel("leaves: %ld [m=%d]\n",
+                         node->nr_leaves_on_branch - 1, has_meta);
+                /* Look further up the tree to see if we can collapse this node
+                 * into a more proximal node too.
+                 */
+                parent = node;
+        collapse_up:
+                pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
+                ptr = parent->back_pointer;
+                if (!ptr)
+                        goto do_collapse;
+                if (assoc_array_ptr_is_shortcut(ptr)) {
+                        struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
+                        ptr = s->back_pointer;
+                        if (!ptr)
+                                goto do_collapse;
+                }
+                grandparent = assoc_array_ptr_to_node(ptr);
+                if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+                        parent = grandparent;
+                        goto collapse_up;
+                }
+        do_collapse:
+                /* There's no point collapsing if the original node has no meta
+                 * pointers to discard and if we didn't merge into one of that
+                 * node's ancestry.
+                 */
+                if (has_meta || parent != node) {
+                        node = parent;
+                        /* Create a new node to collapse into */
+                        new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+                        if (!new_n0)
+                                goto enomem;
+                        edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+                        new_n0->back_pointer = node->back_pointer;
+                        new_n0->parent_slot = node->parent_slot;
+                        new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+                        edit->adjust_count_on = new_n0;
+                        collapse.node = new_n0;
+                        collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
+                        collapse.slot = 0;
+                        assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
+                                                    node->back_pointer,
+                                                    assoc_array_delete_collapse_iterator,
+                                                    &collapse);
+                        pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
+                        BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
+                        if (!node->back_pointer) {
+                                edit->set[1].ptr = &array->root;
+                        } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
+                                BUG();
+                        } else if (assoc_array_ptr_is_node(node->back_pointer)) {
+                                struct assoc_array_node *p =
+                                        assoc_array_ptr_to_node(node->back_pointer);
+                                edit->set[1].ptr = &p->slots[node->parent_slot];
+                        } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
+                                struct assoc_array_shortcut *s =
+                                        assoc_array_ptr_to_shortcut(node->back_pointer);
+                                edit->set[1].ptr = &s->next_node;
+                        }
+                        edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+                        edit->excised_subtree = assoc_array_node_to_ptr(node);
+                }
+        }
+        return edit;
+enomem:
+        /* Clean up after an out of memory error */
+        pr_devel("enomem\n");
+        assoc_array_cancel_edit(edit);
+        return ERR_PTR(-ENOMEM);
+}
+/**
+ * assoc_array_clear - Script deletion of all objects from an associative array
+ * @array: The array to clear.
+ * @ops: The operations to use.
+ *
+ * Precalculate and preallocate a script for the deletion of all the objects
+ * from an associative array.  This results in an edit script that can either
+ * be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if there are objects to be
+ * deleted, NULL if there are no objects in the array or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+                                           const struct assoc_array_ops *ops)
+{
+        struct assoc_array_edit *edit;
+        pr_devel("-->%s()\n", __func__);
+        if (!array->root)
+                return NULL;
+        edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+        if (!edit)
+                return ERR_PTR(-ENOMEM);
+        edit->array = array;
+        edit->ops = ops;
+        edit->set[1].ptr = &array->root;
+        edit->set[1].to = NULL;
+        edit->excised_subtree = array->root;
+        edit->ops_for_excised_subtree = ops;
+        pr_devel("all gone\n");
+        return edit;
+}
+/*
+ * Handle the deferred destruction after an applied edit.
+ */
+static void assoc_array_rcu_cleanup(struct rcu_head *head)
+{
+        struct assoc_array_edit *edit =
+                container_of(head, struct assoc_array_edit, rcu);
+        int i;
+        pr_devel("-->%s()\n", __func__);
+        if (edit->dead_leaf)
+                edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
+        for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
+                if (edit->excised_meta[i])
+                        kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
+        if (edit->excised_subtree) {
+                BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
+                if (assoc_array_ptr_is_node(edit->excised_subtree)) {
+                        struct assoc_array_node *n =
+                                assoc_array_ptr_to_node(edit->excised_subtree);
+                        n->back_pointer = NULL;
+                } else {
+                        struct assoc_array_shortcut *s =
+                                assoc_array_ptr_to_shortcut(edit->excised_subtree);
+                        s->back_pointer = NULL;
+                }
+                assoc_array_destroy_subtree(edit->excised_subtree,
+                                            edit->ops_for_excised_subtree);
+        }
+        kfree(edit);
+}
+/**
+ * assoc_array_apply_edit - Apply an edit script to an associative array
+ * @edit: The script to apply.
+ *
+ * Apply an edit script to an associative array to effect an insertion,
+ * deletion or clearance.  As the edit script includes preallocated memory,
+ * this is guaranteed not to fail.
+ *
+ * The edit script, dead objects and dead metadata will be scheduled for
+ * destruction after an RCU grace period to permit those doing read-only
+ * accesses on the array to continue to do so under the RCU read lock whilst
+ * the edit is taking place.
+ */
+void assoc_array_apply_edit(struct assoc_array_edit *edit)
+{
+        struct assoc_array_shortcut *shortcut;
+        struct assoc_array_node *node;
+        struct assoc_array_ptr *ptr;
+        int i;
+        pr_devel("-->%s()\n", __func__);
+        smp_wmb();
+        if (edit->leaf_p)
+                *edit->leaf_p = edit->leaf;
+        smp_wmb();
+        for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
+                if (edit->set_parent_slot[i].p)
+                        *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
+        smp_wmb();
+        for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
+                if (edit->set_backpointers[i])
+                        *edit->set_backpointers[i] = edit->set_backpointers_to;
+        smp_wmb();
+        for (i = 0; i < ARRAY_SIZE(edit->set); i++)
+                if (edit->set[i].ptr)
+                        *edit->set[i].ptr = edit->set[i].to;
+        if (edit->array->root == NULL) {
+                edit->array->nr_leaves_on_tree = 0;
+        } else if (edit->adjust_count_on) {
+                node = edit->adjust_count_on;
+                for (;;) {
+                        node->nr_leaves_on_branch += edit->adjust_count_by;
+                        ptr = node->back_pointer;
+                        if (!ptr)
+                                break;
+                        if (assoc_array_ptr_is_shortcut(ptr)) {
+                                shortcut = assoc_array_ptr_to_shortcut(ptr);
+                                ptr = shortcut->back_pointer;
+                                if (!ptr)
+                                        break;
+                        }
+                        BUG_ON(!assoc_array_ptr_is_node(ptr));
+                        node = assoc_array_ptr_to_node(ptr);
+                }
+                edit->array->nr_leaves_on_tree += edit->adjust_count_by;
+        }
+        call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
+}
+/**
+ * assoc_array_cancel_edit - Discard an edit script.
+ * @edit: The script to discard.
+ *
+ * Free an edit script and all the preallocated data it holds without making
+ * any changes to the associative array it was intended for.
+ *
+ * NOTE!  In the case of an insertion script, this does _not_ release the leaf
+ * that was to be inserted.  That is left to the caller.
+ */
+void assoc_array_cancel_edit(struct assoc_array_edit *edit)
+{
+        struct assoc_array_ptr *ptr;
+        int i;
+        pr_devel("-->%s()\n", __func__);
+        /* Clean up after an out of memory error */
+        for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
+                ptr = edit->new_meta[i];
+                if (ptr) {
+                        if (assoc_array_ptr_is_node(ptr))
+                                kfree(assoc_array_ptr_to_node(ptr));
+                        else
+                                kfree(assoc_array_ptr_to_shortcut(ptr));
+                }
+        }
+        kfree(edit);
+}
+/**
+ * assoc_array_gc - Garbage collect an associative array.
+ * @array: The array to clean.
+ * @ops: The operations to use.
+ * @iterator: A callback function to pass judgement on each object.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Collect garbage from an associative array and pack down the internal tree to
+ * save memory.
+ *
+ * The iterator function is asked to pass judgement upon each object in the
+ * array.  If it returns false, the object is discard and if it returns true,
+ * the object is kept.  If it returns true, it must increment the object's
+ * usage count (or whatever it needs to do to retain it) before returning.
+ *
+ * This function returns 0 if successful or -ENOMEM if out of memory.  In the
+ * latter case, the array is not changed.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+int assoc_array_gc(struct assoc_array *array,
+                   const struct assoc_array_ops *ops,
+                   bool (*iterator)(void *object, void *iterator_data),
+                   void *iterator_data)
+{
+        struct assoc_array_shortcut *shortcut, *new_s;
+        struct assoc_array_node *node, *new_n;
+        struct assoc_array_edit *edit;
+        struct assoc_array_ptr *cursor, *ptr;
+        struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
+        unsigned long nr_leaves_on_tree;
+        int keylen, slot, nr_free, next_slot, i;
+        pr_devel("-->%s()\n", __func__);
+        if (!array->root)
+                return 0;
+        edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+        if (!edit)
+                return -ENOMEM;
+        edit->array = array;
+        edit->ops = ops;
+        edit->ops_for_excised_subtree = ops;
+        edit->set[0].ptr = &array->root;
+        edit->excised_subtree = array->root;
+        new_root = new_parent = NULL;
+        new_ptr_pp = &new_root;
+        cursor = array->root;
+descend:
+        /* If this point is a shortcut, then we need to duplicate it and
+         * advance the target cursor.
+         */
+        if (assoc_array_ptr_is_shortcut(cursor)) {
+                shortcut = assoc_array_ptr_to_shortcut(cursor);
+                keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+                keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+                new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
+                                keylen * sizeof(unsigned long), GFP_KERNEL);
+                if (!new_s)
+                        goto enomem;
+                pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
+                memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
+                                         keylen * sizeof(unsigned long)));
+                new_s->back_pointer = new_parent;
+                new_s->parent_slot = shortcut->parent_slot;
+                *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
+                new_ptr_pp = &new_s->next_node;
+                cursor = shortcut->next_node;
+        }
+        /* Duplicate the node at this position */
+        node = assoc_array_ptr_to_node(cursor);
+        new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+        if (!new_n)
+                goto enomem;
+        pr_devel("dup node %p -> %p\n", node, new_n);
+        new_n->back_pointer = new_parent;
+        new_n->parent_slot = node->parent_slot;
+        *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
+        new_ptr_pp = NULL;
+        slot = 0;
+continue_node:
+        /* Filter across any leaves and gc any subtrees */
+        for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                ptr = node->slots[slot];
+                if (!ptr)
+                        continue;
+                if (assoc_array_ptr_is_leaf(ptr)) {
+                        if (iterator(assoc_array_ptr_to_leaf(ptr),
+                                     iterator_data))
+                                /* The iterator will have done any reference
+                                 * counting on the object for us.
+                                 */
+                                new_n->slots[slot] = ptr;
+                        continue;
+                }
+                new_ptr_pp = &new_n->slots[slot];
+                cursor = ptr;
+                goto descend;
+        }
+        pr_devel("-- compress node %p --\n", new_n);
+        /* Count up the number of empty slots in this node and work out the
+         * subtree leaf count.
+         */
+        new_n->nr_leaves_on_branch = 0;
+        nr_free = 0;
+        for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                ptr = new_n->slots[slot];
+                if (!ptr)
+                        nr_free++;
+                else if (assoc_array_ptr_is_leaf(ptr))
+                        new_n->nr_leaves_on_branch++;
+        }
+        pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
+        /* See what we can fold in */
+        next_slot = 0;
+        for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                struct assoc_array_shortcut *s;
+                struct assoc_array_node *child;
+                ptr = new_n->slots[slot];
+                if (!ptr || assoc_array_ptr_is_leaf(ptr))
+                        continue;
+                s = NULL;
+                if (assoc_array_ptr_is_shortcut(ptr)) {
+                        s = assoc_array_ptr_to_shortcut(ptr);
+                        ptr = s->next_node;
+                }
+                child = assoc_array_ptr_to_node(ptr);
+                new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
+                if (child->nr_leaves_on_branch <= nr_free + 1) {
+                        /* Fold the child node into this one */
+                        pr_devel("[%d] fold node %lu/%d [nx %d]\n",
+                                 slot, child->nr_leaves_on_branch, nr_free + 1,
+                                 next_slot);
+                        /* We would already have reaped an intervening shortcut
+                         * on the way back up the tree.
+                         */
+                        BUG_ON(s);
+                        new_n->slots[slot] = NULL;
+                        nr_free++;
+                        if (slot < next_slot)
+                                next_slot = slot;
+                        for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                                struct assoc_array_ptr *p = child->slots[i];
+                                if (!p)
+                                        continue;
+                                BUG_ON(assoc_array_ptr_is_meta(p));
+                                while (new_n->slots[next_slot])
+                                        next_slot++;
+                                BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
+                                new_n->slots[next_slot++] = p;
+                                nr_free--;
+                        }
+                        kfree(child);
+                } else {
+                        pr_devel("[%d] retain node %lu/%d [nx %d]\n",
+                                 slot, child->nr_leaves_on_branch, nr_free + 1,
+                                 next_slot);
+                }
+        }
+        pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
+        nr_leaves_on_tree = new_n->nr_leaves_on_branch;
+        /* Excise this node if it is singly occupied by a shortcut */
+        if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
+                for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
+                        if ((ptr = new_n->slots[slot]))
+                                break;
+                if (assoc_array_ptr_is_meta(ptr) &&
+                    assoc_array_ptr_is_shortcut(ptr)) {
+                        pr_devel("excise node %p with 1 shortcut\n", new_n);
+                        new_s = assoc_array_ptr_to_shortcut(ptr);
+                        new_parent = new_n->back_pointer;
+                        slot = new_n->parent_slot;
+                        kfree(new_n);
+                        if (!new_parent) {
+                                new_s->back_pointer = NULL;
+                                new_s->parent_slot = 0;
+                                new_root = ptr;
+                                goto gc_complete;
+                        }
+                        if (assoc_array_ptr_is_shortcut(new_parent)) {
+                                /* We can discard any preceding shortcut also */
+                                struct assoc_array_shortcut *s =
+                                        assoc_array_ptr_to_shortcut(new_parent);
+                                pr_devel("excise preceding shortcut\n");
+                                new_parent = new_s->back_pointer = s->back_pointer;
+                                slot = new_s->parent_slot = s->parent_slot;
+                                kfree(s);
+                                if (!new_parent) {
+                                        new_s->back_pointer = NULL;
+                                        new_s->parent_slot = 0;
+                                        new_root = ptr;
+                                        goto gc_complete;
+                                }
+                        }
+                        new_s->back_pointer = new_parent;
+                        new_s->parent_slot = slot;
+                        new_n = assoc_array_ptr_to_node(new_parent);
+                        new_n->slots[slot] = ptr;
+                        goto ascend_old_tree;
+                }
+        }
+        /* Excise any shortcuts we might encounter that point to nodes that
+         * only contain leaves.
+         */
+        ptr = new_n->back_pointer;
+        if (!ptr)
+                goto gc_complete;
+        if (assoc_array_ptr_is_shortcut(ptr)) {
+                new_s = assoc_array_ptr_to_shortcut(ptr);
+                new_parent = new_s->back_pointer;
+                slot = new_s->parent_slot;
+                if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
+                        struct assoc_array_node *n;
+                        pr_devel("excise shortcut\n");
+                        new_n->back_pointer = new_parent;
+                        new_n->parent_slot = slot;
+                        kfree(new_s);
+                        if (!new_parent) {
+                                new_root = assoc_array_node_to_ptr(new_n);
+                                goto gc_complete;
+                        }
+                        n = assoc_array_ptr_to_node(new_parent);
+                        n->slots[slot] = assoc_array_node_to_ptr(new_n);
+                }
+        } else {
+                new_parent = ptr;
+        }
+        new_n = assoc_array_ptr_to_node(new_parent);
+ascend_old_tree:
+        ptr = node->back_pointer;
+        if (assoc_array_ptr_is_shortcut(ptr)) {
+                shortcut = assoc_array_ptr_to_shortcut(ptr);
+                slot = shortcut->parent_slot;
+                cursor = shortcut->back_pointer;
+        } else {
+                slot = node->parent_slot;
+                cursor = ptr;
+        }
+        BUG_ON(!ptr);
+        node = assoc_array_ptr_to_node(cursor);
+        slot++;
+        goto continue_node;
+gc_complete:
+        edit->set[0].to = new_root;
+        assoc_array_apply_edit(edit);
+        edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
+        return 0;
+enomem:
+        pr_devel("enomem\n");
+        assoc_array_destroy_subtree(new_root, edit->ops);
+        kfree(edit);
+        return -ENOMEM;
+}
diff --git a/lib/crc32.c b/lib/crc32.c
index 410093dbe51c..70f00ca5ef1e 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -29,6 +29,7 @@
 #include <linux/crc32.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/sched.h>
 #include "crc32defs.h"
 #if CRC_LE_BITS > 8
@@ -49,6 +50,30 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
+#define GF2_DIM         32
+static u32 gf2_matrix_times(u32 *mat, u32 vec)
+{
+        u32 sum = 0;
+        while (vec) {
+                if (vec & 1)
+                        sum ^= *mat;
+                vec >>= 1;
+                mat++;
+        }
+        return sum;
+}
+static void gf2_matrix_square(u32 *square, u32 *mat)
+{
+        int i;
+        for (i = 0; i < GF2_DIM; i++)
+                square[i] = gf2_matrix_times(mat, mat[i]);
+}
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 /* implements slicing-by-4 or slicing-by-8 algorithm */
@@ -130,6 +155,52 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 }
 #endif
+/* For conditions of distribution and use, see copyright notice in zlib.h */
+static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
+                                 u32 polynomial)
+{
+        u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
+        u32 odd[GF2_DIM];  /* Odd-power-of-two zeros operator  */
+        u32 row;
+        int i;
+        if (len2 <= 0)
+                return crc1;
+        /* Put operator for one zero bit in odd */
+        odd[0] = polynomial;
+        row = 1;
+        for (i = 1; i < GF2_DIM; i++) {
+                odd[i] = row;
+                row <<= 1;
+        }
+        gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
+        gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
+        /* Apply len2 zeros to crc1 (first square will put the operator for one
+         * zero byte, eight zero bits, in even).
+         */
+        do {
+                /* Apply zeros operator for this bit of len2 */
+                gf2_matrix_square(even, odd);
+                if (len2 & 1)
+                        crc1 = gf2_matrix_times(even, crc1);
+                len2 >>= 1;
+                /* If no more bits set, then done */
+                if (len2 == 0)
+                        break;
+                /* Another iteration of the loop with odd and even swapped */
+                gf2_matrix_square(odd, even);
+                if (len2 & 1)
+                        crc1 = gf2_matrix_times(odd, crc1);
+                len2 >>= 1;
+        } while (len2 != 0);
+        crc1 ^= crc2;
+        return crc1;
+}
 /**
 * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
 *                      CRC32/CRC32C
@@ -200,8 +271,19 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
                        (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
+u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+        return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);
+}
+u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+        return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
+}
 EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(crc32_le_combine);
 EXPORT_SYMBOL(__crc32c_le);
+EXPORT_SYMBOL(__crc32c_le_combine);
 /**
 * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
@@ -795,206 +877,106 @@ static struct crc_test {
        u32 crc32c_le;  /* expected crc32c_le result */
 } test[] =
 {
-        {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1,
+        {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
-         0xf6e93d6c},
+        {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
-        {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad,
+        {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8},
-         0x0fe92aca},
+        {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a},
-        {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f,
+        {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152},
-         0x52e1ebb8},
+        {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7},
-        {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a,
+        {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc},
-         0x0798af9a},
+        {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2},
-        {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2,
+        {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d},
-         0x18eb3152},
+        {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5},
-        {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793,
+        {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f},
-         0xd00d08c7},
+        {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a},
-        {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed,
+        {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8},
-         0x8ba966bc},
+        {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa},
-        {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35,
+        {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801},
-         0x11d694a2},
+        {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597},
-        {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2,
+        {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b},
-         0x6ab3208d},
+        {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a},
-        {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10,
+        {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d},
-         0xba4603c5},
+        {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982},
-        {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb,
+        {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18},
-         0xe6071c6f},
+        {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7},
-        {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0,
+        {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3},
-         0x179ec30a},
+        {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5},
-        {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb,
+        {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59},
-         0x0903beb8},
+        {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e},
-        {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed,
+        {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603},
-         0x6a7cb4fa},
+        {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060},
-        {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591,
+        {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072},
-         0xdb535801},
+        {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59},
-        {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67,
+        {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213},
-         0x92bed597},
+        {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41},
-        {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd,
+        {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5},
-         0x192a3f1b},
+        {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2},
-        {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a,
+        {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a},
-         0xccbaec1a},
+        {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2},
-        {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b,
+        {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b},
-         0x7eabae4d},
+        {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1},
-        {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f,
+        {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba},
-         0x28c72982},
+        {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62},
-        {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d,
+        {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe},
-         0xc3cd4d18},
+        {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988},
-        {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a,
+        {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be},
-         0xbca8f0e7},
+        {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546},
-        {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97,
+        {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc},
-         0x713f60b3},
+        {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69},
-        {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2,
+        {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a},
-         0xebd08fd5},
+        {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2},
-        {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138,
+        {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd},
-         0x64406c59},
+        {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb},
-        {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032,
+        {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b},
-         0x7421890e},
+        {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76},
-        {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f,
+        {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339},
-         0xe9347603},
+        {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9},
-        {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f,
+        {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548},
-         0x1bef9060},
+        {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de},
-        {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32,
+        {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59},
-         0x34720072},
+        {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b},
-        {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef,
+        {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73},
-         0x48310f59},
+        {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11},
-        {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0,
+        {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c},
-         0x783a4213},
+        {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b},
-        {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59,
+        {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb},
-         0x9e8efd41},
+        {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc},
-        {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4,
+        {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196},
-         0xfc3d34a5},
+        {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a},
-        {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c,
+        {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de},
-         0x17a52ae2},
+        {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9},
-        {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51,
+        {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0},
-         0x886d935a},
+        {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60},
-        {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11,
+        {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6},
-         0xeaaeaeb2},
+        {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c},
-        {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659,
+        {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73},
-         0x8e900a4b},
+        {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7},
-        {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af,
+        {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf},
-         0xd74662b1},
+        {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83},
-        {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99,
+        {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867},
-         0xd26752ba},
+        {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211},
-        {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b,
+        {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2},
-         0x8b1fcd62},
+        {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874},
-        {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521,
+        {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f},
-         0xf54342fe},
+        {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff},
-        {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3,
+        {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95},
-         0x5b95b988},
+        {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd},
-        {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d,
+        {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06},
-         0x2e1176be},
+        {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784},
-        {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f,
+        {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616},
-         0x66120546},
+        {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c},
-        {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b,
+        {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c},
-         0xf256a5cc},
+        {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d},
-        {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0,
+        {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d},
-         0x4af1dd69},
+        {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272},
-        {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195,
+        {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb},
-         0x56f0a04a},
+        {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b},
-        {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d,
+        {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e},
-         0x74f6b6b2},
+        {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23},
-        {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4,
+        {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672},
-         0x085951fd},
+        {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86},
-        {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3,
+        {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd},
-         0xc65387eb},
+        {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48},
-        {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643,
-         0x1ca9257b},
-        {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10,
-         0xfd196d76},
-        {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d,
-         0x5ef88339},
-        {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5,
-         0x2c3714d9},
-        {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b,
-         0x58576548},
-        {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee,
-         0xfd7c57de},
-        {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14,
-         0xd5fedd59},
-        {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a,
-         0x1cc3b17b},
-        {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b,
-         0x270eed73},
-        {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3,
-         0x91ecbb11},
-        {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826,
-         0x05ed8d0c},
-        {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06,
-         0x0b09ad5b},
-        {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35,
-         0xf8d511fb},
-        {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801,
-         0x5ad832cc},
-        {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2,
-         0x1214d196},
-        {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d,
-         0x5747218a},
-        {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c,
-         0xde8f14de},
-        {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba,
-         0x3563b7b9},
-        {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5,
-         0x071475d0},
-        {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b,
-         0x54c79d60},
-        {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178,
-         0x4c53eee6},
-        {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3,
-         0x10137a3c},
-        {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605,
-         0xaa9d6c73},
-        {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1,
-         0xb63d23e7},
-        {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9,
-         0x7f53e9cf},
-        {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78,
-         0x13c1cd83},
-        {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9,
-         0x49ff5867},
-        {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd,
-         0x8467f211},
-        {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab,
-         0x3f9683b2},
-        {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb,
-         0x76a3f874},
-        {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77,
-         0x863b702f},
-        {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da,
-         0xdc6c58ff},
-        {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39,
-         0x0622cc95},
-        {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16,
-         0xe85605cd},
-        {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208,
-         0x31da5f06},
-        {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e,
-         0xa1f2e784},
-        {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5,
-         0xb07cc616},
-        {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892,
-         0xbf943b6c},
-        {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db,
-         0x2c01af1c},
-        {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43,
-         0x0fe5f56d},
-        {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac,
-         0xf8943b2d},
-        {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7,
-         0xe4d89272},
-        {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2,
-         0x7c2f6bbb},
-        {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2,
-         0xabbf388b},
-        {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640,
-         0x1dca1f4e},
-        {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f,
-         0x5c170e23},
-        {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99,
-         0xc0e9d672},
-        {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7,
-         0xc18bdc86},
-        {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499,
-         0xa874fcdd},
-        {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a,
-         0x9dc0bb48},
 };
 #include <linux/time.h>
@@ -1050,6 +1032,41 @@ static int __init crc32c_test(void)
        return 0;
 }
+static int __init crc32c_combine_test(void)
+{
+        int i, j;
+        int errors = 0, runs = 0;
+        for (i = 0; i < 10; i++) {
+                u32 crc_full;
+                crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start,
+                                       test[i].length);
+                for (j = 0; j <= test[i].length; ++j) {
+                        u32 crc1, crc2;
+                        u32 len1 = j, len2 = test[i].length - j;
+                        crc1 = __crc32c_le(test[i].crc, test_buf +
+                                           test[i].start, len1);
+                        crc2 = __crc32c_le(0, test_buf + test[i].start +
+                                           len1, len2);
+                        if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) &&
+                              crc_full == test[i].crc32c_le))
+                                errors++;
+                        runs++;
+                        cond_resched();
+                }
+        }
+        if (errors)
+                pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs);
+        else
+                pr_info("crc32c_combine: %d self tests passed\n", runs);
+        return 0;
+}
 static int __init crc32_test(void)
 {
        int i;
@@ -1109,10 +1126,49 @@ static int __init crc32_test(void)
        return 0;
 }
+static int __init crc32_combine_test(void)
+{
+        int i, j;
+        int errors = 0, runs = 0;
+        for (i = 0; i < 10; i++) {
+                u32 crc_full;
+                crc_full = crc32_le(test[i].crc, test_buf + test[i].start,
+                                    test[i].length);
+                for (j = 0; j <= test[i].length; ++j) {
+                        u32 crc1, crc2;
+                        u32 len1 = j, len2 = test[i].length - j;
+                        crc1 = crc32_le(test[i].crc, test_buf +
+                                        test[i].start, len1);
+                        crc2 = crc32_le(0, test_buf + test[i].start +
+                                        len1, len2);
+                        if (!(crc_full == crc32_le_combine(crc1, crc2, len2) &&
+                              crc_full == test[i].crc_le))
+                                errors++;
+                        runs++;
+                        cond_resched();
+                }
+        }
+        if (errors)
+                pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs);
+        else
+                pr_info("crc32_combine: %d self tests passed\n", runs);
+        return 0;
+}
 static int __init crc32test_init(void)
 {
        crc32_test();
        crc32c_test();
+        crc32_combine_test();
+        crc32c_combine_test();
        return 0;
 }
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index bf2c8b1043d8..e0731c3db706 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -196,7 +196,7 @@ static void free_object(struct debug_obj *obj)
         * initialized:
         */
        if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
-                sched = keventd_up() && !work_pending(&debug_obj_work);
+                sched = keventd_up();
        hlist_add_head(&obj->node, &obj_pool);
        obj_pool_free++;
        obj_pool_used--;
diff --git a/lib/digsig.c b/lib/digsig.c
index 2f31e6a45f0a..8793aeda30ca 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -209,7 +209,7 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen,
                kref = keyring_search(make_key_ref(keyring, 1UL),
                                                &key_type_user, name);
                if (IS_ERR(kref))
-                        key = ERR_PTR(PTR_ERR(kref));
+                        key = ERR_CAST(kref);
                else
                        key = key_ref_to_ptr(kref);
        } else {
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 26cf20be72b7..dda31168844f 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -313,6 +313,34 @@ retry:
 EXPORT_SYMBOL(gen_pool_alloc);
 /**
+ * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ * @dma: dma-view physical address
+ *
+ * Allocate the requested number of bytes from the specified pool.
+ * Uses the pool allocation function (with first-fit algorithm by default).
+ * Can not be used in NMI handler on architectures without
+ * NMI-safe cmpxchg implementation.
+ */
+void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
+{
+        unsigned long vaddr;
+        if (!pool)
+                return NULL;
+        vaddr = gen_pool_alloc(pool, size);
+        if (!vaddr)
+                return NULL;
+        *dma = gen_pool_virt_to_phys(pool, vaddr);
+        return (void *)vaddr;
+}
+EXPORT_SYMBOL(gen_pool_dma_alloc);
+/**
 * gen_pool_free - free allocated special memory back to the pool
 * @pool: pool to free to
 * @addr: starting address of memory to free back to pool
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 7b7f83027b7b..d79b9d222065 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -215,7 +215,7 @@ static unsigned long kfifo_copy_from_user(struct __kfifo *fifo,
         * incrementing the fifo->in index counter
         */
        smp_wmb();
-        *copied = len - ret;
+        *copied = len - ret * esize;
        /* return the number of elements which are not copied */
        return ret;
 }
@@ -275,7 +275,7 @@ static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to,
         * incrementing the fifo->out index counter
         */
        smp_wmb();
-        *copied = len - ret;
+        *copied = len - ret * esize;
        /* return the number of elements which are not copied */
        return ret;
 }
diff --git a/lib/kobject.c b/lib/kobject.c
index 084f7b18d0c0..5b4b8886435e 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -13,11 +13,30 @@
 */
 #include <linux/kobject.h>
+#include <linux/kobj_completion.h>
 #include <linux/string.h>
 #include <linux/export.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
+/**
+ * kobject_namespace - return @kobj's namespace tag
+ * @kobj: kobject in question
+ *
+ * Returns namespace tag of @kobj if its parent has namespace ops enabled
+ * and thus @kobj should have a namespace tag associated with it.  Returns
+ * %NULL otherwise.
+ */
+const void *kobject_namespace(struct kobject *kobj)
+{
+        const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj);
+        if (!ns_ops || ns_ops->type == KOBJ_NS_TYPE_NONE)
+                return NULL;
+        return kobj->ktype->namespace(kobj);
+}
 /*
 * populate_dir - populate directory with attributes.
 * @kobj: object we're working on.
@@ -46,13 +65,21 @@ static int populate_dir(struct kobject *kobj)
 static int create_dir(struct kobject *kobj)
 {
-        int error = 0;
+        int error;
-        error = sysfs_create_dir(kobj);
+        error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
        if (!error) {
                error = populate_dir(kobj);
                if (error)
                        sysfs_remove_dir(kobj);
        }
+        /*
+         * @kobj->sd may be deleted by an ancestor going away.  Hold an
+         * extra reference so that it stays until @kobj is gone.
+         */
+        sysfs_get(kobj->sd);
        return error;
 }
@@ -428,7 +455,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
                goto out;
        }
-        error = sysfs_rename_dir(kobj, new_name);
+        error = sysfs_rename_dir_ns(kobj, new_name, kobject_namespace(kobj));
        if (error)
                goto out;
@@ -472,6 +499,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent)
                if (kobj->kset)
                        new_parent = kobject_get(&kobj->kset->kobj);
        }
        /* old object path */
        devpath = kobject_get_path(kobj, GFP_KERNEL);
        if (!devpath) {
@@ -486,7 +514,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent)
        sprintf(devpath_string, "DEVPATH_OLD=%s", devpath);
        envp[0] = devpath_string;
        envp[1] = NULL;
-        error = sysfs_move_dir(kobj, new_parent);
+        error = sysfs_move_dir_ns(kobj, new_parent, kobject_namespace(kobj));
        if (error)
                goto out;
        old_parent = kobj->parent;
@@ -508,10 +536,15 @@ out:
 */
 void kobject_del(struct kobject *kobj)
 {
+        struct sysfs_dirent *sd;
        if (!kobj)
                return;
+        sd = kobj->sd;
        sysfs_remove_dir(kobj);
+        sysfs_put(sd);
        kobj->state_in_sysfs = 0;
        kobj_kset_leave(kobj);
        kobject_put(kobj->parent);
@@ -727,6 +760,55 @@ const struct sysfs_ops kobj_sysfs_ops = {
 };
 /**
+ * kobj_completion_init - initialize a kobj_completion object.
+ * @kc: kobj_completion
+ * @ktype: type of kobject to initialize
+ *
+ * kobj_completion structures can be embedded within structures with different
+ * lifetime rules.  During the release of the enclosing object, we can
+ * wait on the release of the kobject so that we don't free it while it's
+ * still busy.
+ */
+void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype)
+{
+        init_completion(&kc->kc_unregister);
+        kobject_init(&kc->kc_kobj, ktype);
+}
+EXPORT_SYMBOL_GPL(kobj_completion_init);
+/**
+ * kobj_completion_release - release a kobj_completion object
+ * @kobj: kobject embedded in kobj_completion
+ *
+ * Used with kobject_release to notify waiters that the kobject has been
+ * released.
+ */
+void kobj_completion_release(struct kobject *kobj)
+{
+        struct kobj_completion *kc = kobj_to_kobj_completion(kobj);
+        complete(&kc->kc_unregister);
+}
+EXPORT_SYMBOL_GPL(kobj_completion_release);
+/**
+ * kobj_completion_del_and_wait - release the kobject and wait for it
+ * @kc: kobj_completion object to release
+ *
+ * Delete the kobject from sysfs and drop the reference count.  Then wait
+ * until any other outstanding references are also dropped.  This routine
+ * is only necessary once other references may have been taken on the
+ * kobject.  Typically this happens when the kobject has been published
+ * to sysfs via kobject_add.
+ */
+void kobj_completion_del_and_wait(struct kobj_completion *kc)
+{
+        kobject_del(&kc->kc_kobj);
+        kobject_put(&kc->kc_kobj);
+        wait_for_completion(&kc->kc_unregister);
+}
+EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait);
+/**
 * kset_register - initialize and add a kset.
 * @k: kset.
 */
diff --git a/lib/llist.c b/lib/llist.c
index 4a70d120138c..f76196d07409 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -81,3 +81,25 @@ struct llist_node *llist_del_first(struct llist_head *head)
        return entry;
 }
 EXPORT_SYMBOL_GPL(llist_del_first);
+/**
+ * llist_reverse_order - reverse order of a llist chain
+ * @head:       first item of the list to be reversed
+ *
+ * Reverse the order of a chain of llist entries and return the
+ * new first entry.
+ */
+struct llist_node *llist_reverse_order(struct llist_node *head)
+{
+        struct llist_node *new_head = NULL;
+        while (head) {
+                struct llist_node *tmp = head;
+                head = head->next;
+                tmp->next = new_head;
+                new_head = tmp;
+        }
+        return new_head;
+}
+EXPORT_SYMBOL_GPL(llist_reverse_order);
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 6dc09d8f4c24..872a15a2a637 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1002,7 +1002,7 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
         * Some tests (e.g. double-unlock) might corrupt the preemption
         * count, so restore it:
         */
-        preempt_count() = saved_preempt_count;
+        preempt_count_set(saved_preempt_count);
 #ifdef CONFIG_TRACE_IRQFLAGS
        if (softirq_count())
                current->softirqs_enabled = 0;
diff --git a/lib/lockref.c b/lib/lockref.c
index 6f9d434c1521..d2b123f8456b 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -1,7 +1,7 @@
 #include <linux/export.h>
 #include <linux/lockref.h>
-#ifdef CONFIG_CMPXCHG_LOCKREF
+#if USE_CMPXCHG_LOCKREF
 /*
 * Allow weakly-ordered memory architectures to provide barrier-less
@@ -153,6 +153,7 @@ void lockref_mark_dead(struct lockref *lockref)
        assert_spin_locked(&lockref->lock);
        lockref->count = -128;
 }
+EXPORT_SYMBOL(lockref_mark_dead);
 /**
 * lockref_get_not_dead - Increments count unless the ref is dead
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 657979f71bef..bf076d281d40 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -121,3 +121,6 @@ void mpi_free(MPI a)
        kfree(a);
 }
 EXPORT_SYMBOL_GPL(mpi_free);
+MODULE_DESCRIPTION("Multiprecision maths library");
+MODULE_LICENSE("GPL");
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
deleted file mode 100644
index 652a8ee8efe9..000000000000
--- a/lib/percpu-rwsem.c
+++ /dev/null
@@ -1,165 +0,0 @@
-#include <linux/atomic.h>
-#include <linux/rwsem.h>
-#include <linux/percpu.h>
-#include <linux/wait.h>
-#include <linux/lockdep.h>
-#include <linux/percpu-rwsem.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
-                        const char *name, struct lock_class_key *rwsem_key)
-{
-        brw->fast_read_ctr = alloc_percpu(int);
-        if (unlikely(!brw->fast_read_ctr))
-                return -ENOMEM;
-        /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
-        __init_rwsem(&brw->rw_sem, name, rwsem_key);
-        atomic_set(&brw->write_ctr, 0);
-        atomic_set(&brw->slow_read_ctr, 0);
-        init_waitqueue_head(&brw->write_waitq);
-        return 0;
-}
-void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
-{
-        free_percpu(brw->fast_read_ctr);
-        brw->fast_read_ctr = NULL; /* catch use after free bugs */
-}
-/*
- * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
- * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
- * serialize with the preempt-disabled section below.
- *
- * The nontrivial part is that we should guarantee acquire/release semantics
- * in case when
- *
- *      R_W: down_write() comes after up_read(), the writer should see all
- *           changes done by the reader
- * or
- *      W_R: down_read() comes after up_write(), the reader should see all
- *           changes done by the writer
- *
- * If this helper fails the callers rely on the normal rw_semaphore and
- * atomic_dec_and_test(), so in this case we have the necessary barriers.
- *
- * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
- * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
- * reader inside the critical section. See the comments in down_write and
- * up_write below.
- */
-static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
-{
-        bool success = false;
-        preempt_disable();
-        if (likely(!atomic_read(&brw->write_ctr))) {
-                __this_cpu_add(*brw->fast_read_ctr, val);
-                success = true;
-        }
-        preempt_enable();
-        return success;
-}
-/*
- * Like the normal down_read() this is not recursive, the writer can
- * come after the first percpu_down_read() and create the deadlock.
- *
- * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
- * percpu_up_read() does rwsem_release(). This pairs with the usage
- * of ->rw_sem in percpu_down/up_write().
- */
-void percpu_down_read(struct percpu_rw_semaphore *brw)
-{
-        might_sleep();
-        if (likely(update_fast_ctr(brw, +1))) {
-                rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
-                return;
-        }
-        down_read(&brw->rw_sem);
-        atomic_inc(&brw->slow_read_ctr);
-        /* avoid up_read()->rwsem_release() */
-        __up_read(&brw->rw_sem);
-}
-void percpu_up_read(struct percpu_rw_semaphore *brw)
-{
-        rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
-        if (likely(update_fast_ctr(brw, -1)))
-                return;
-        /* false-positive is possible but harmless */
-        if (atomic_dec_and_test(&brw->slow_read_ctr))
-                wake_up_all(&brw->write_waitq);
-}
-static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
-{
-        unsigned int sum = 0;
-        int cpu;
-        for_each_possible_cpu(cpu) {
-                sum += per_cpu(*brw->fast_read_ctr, cpu);
-                per_cpu(*brw->fast_read_ctr, cpu) = 0;
-        }
-        return sum;
-}
-/*
- * A writer increments ->write_ctr to force the readers to switch to the
- * slow mode, note the atomic_read() check in update_fast_ctr().
- *
- * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
- * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
- * counter it represents the number of active readers.
- *
- * Finally the writer takes ->rw_sem for writing and blocks the new readers,
- * then waits until the slow counter becomes zero.
- */
-void percpu_down_write(struct percpu_rw_semaphore *brw)
-{
-        /* tell update_fast_ctr() there is a pending writer */
-        atomic_inc(&brw->write_ctr);
-        /*
-         * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
-         *    so that update_fast_ctr() can't succeed.
-         *
-         * 2. Ensures we see the result of every previous this_cpu_add() in
-         *    update_fast_ctr().
-         *
-         * 3. Ensures that if any reader has exited its critical section via
-         *    fast-path, it executes a full memory barrier before we return.
-         *    See R_W case in the comment above update_fast_ctr().
-         */
-        synchronize_sched_expedited();
-        /* exclude other writers, and block the new readers completely */
-        down_write(&brw->rw_sem);
-        /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
-        atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
-        /* wait for all readers to complete their percpu_up_read() */
-        wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
-}
-void percpu_up_write(struct percpu_rw_semaphore *brw)
-{
-        /* release the lock, but the readers can't use the fast-path */
-        up_write(&brw->rw_sem);
-        /*
-         * Insert the barrier before the next fast-path in down_read,
-         * see W_R case in the comment above update_fast_ctr().
-         */
-        synchronize_sched_expedited();
-        /* the last writer unblocks update_fast_ctr() */
-        atomic_dec(&brw->write_ctr);
-}
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 93c5d5ecff4e..7473ee3b4ee7 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -60,14 +60,15 @@ static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 {
        int cpu;
+        unsigned long flags;
-        raw_spin_lock(&fbc->lock);
+        raw_spin_lock_irqsave(&fbc->lock, flags);
        for_each_possible_cpu(cpu) {
                s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
                *pcount = 0;
        }
        fbc->count = amount;
-        raw_spin_unlock(&fbc->lock);
+        raw_spin_unlock_irqrestore(&fbc->lock, flags);
 }
 EXPORT_SYMBOL(percpu_counter_set);
@@ -78,9 +79,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
        preempt_disable();
        count = __this_cpu_read(*fbc->counters) + amount;
        if (count >= batch || count <= -batch) {
-                raw_spin_lock(&fbc->lock);
+                unsigned long flags;
+                raw_spin_lock_irqsave(&fbc->lock, flags);
                fbc->count += count;
-                raw_spin_unlock(&fbc->lock);
+                raw_spin_unlock_irqrestore(&fbc->lock, flags);
                __this_cpu_write(*fbc->counters, 0);
        } else {
                __this_cpu_write(*fbc->counters, count);
@@ -97,14 +99,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 {
        s64 ret;
        int cpu;
+        unsigned long flags;
-        raw_spin_lock(&fbc->lock);
+        raw_spin_lock_irqsave(&fbc->lock, flags);
        ret = fbc->count;
        for_each_online_cpu(cpu) {
                s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
                ret += *pcount;
        }
-        raw_spin_unlock(&fbc->lock);
+        raw_spin_unlock_irqrestore(&fbc->lock, flags);
        return ret;
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index bab1ba2a4c71..9d054bf91d0f 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -30,15 +30,6 @@
 #include <linux/spinlock.h>
 #include <linux/percpu_ida.h>
-/*
- * Number of tags we move between the percpu freelist and the global freelist at
- * a time
- */
-#define IDA_PCPU_BATCH_MOVE     32U
-/* Max size of percpu freelist, */
-#define IDA_PCPU_SIZE           ((IDA_PCPU_BATCH_MOVE * 3) / 2)
 struct percpu_ida_cpu {
        /*
         * Even though this is percpu, we need a lock for tag stealing by remote
@@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool,
        struct percpu_ida_cpu *remote;
        for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
-             cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2;
+             cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
             cpus_have_tags--) {
                cpu = cpumask_next(cpu, &pool->cpus_have_tags);
@@ -123,11 +114,10 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
 {
        move_tags(tags->freelist, &tags->nr_free,
                  pool->freelist, &pool->nr_free,
-                  min(pool->nr_free, IDA_PCPU_BATCH_MOVE));
+                  min(pool->nr_free, pool->percpu_batch_size));
 }
-static inline unsigned alloc_local_tag(struct percpu_ida *pool,
+static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
-                                       struct percpu_ida_cpu *tags)
 {
        int tag = -ENOSPC;
@@ -168,7 +158,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp)
        tags = this_cpu_ptr(pool->tag_cpu);
        /* Fastpath */
-        tag = alloc_local_tag(pool, tags);
+        tag = alloc_local_tag(tags);
        if (likely(tag >= 0)) {
                local_irq_restore(flags);
                return tag;
@@ -245,17 +235,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
                wake_up(&pool->wait);
        }
-        if (nr_free == IDA_PCPU_SIZE) {
+        if (nr_free == pool->percpu_max_size) {
                spin_lock(&pool->lock);
                /*
                 * Global lock held and irqs disabled, don't need percpu
                 * lock
                 */
-                if (tags->nr_free == IDA_PCPU_SIZE) {
+                if (tags->nr_free == pool->percpu_max_size) {
                        move_tags(pool->freelist, &pool->nr_free,
                                  tags->freelist, &tags->nr_free,
-                                  IDA_PCPU_BATCH_MOVE);
+                                  pool->percpu_batch_size);
                        wake_up(&pool->wait);
                }
@@ -292,7 +282,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy);
 * Allocation is percpu, but sharding is limited by nr_tags - for best
 * performance, the workload should not span more cpus than nr_tags / 128.
 */
-int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
+int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
+        unsigned long max_size, unsigned long batch_size)
 {
        unsigned i, cpu, order;
@@ -301,6 +292,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
        init_waitqueue_head(&pool->wait);
        spin_lock_init(&pool->lock);
        pool->nr_tags = nr_tags;
+        pool->percpu_max_size = max_size;
+        pool->percpu_batch_size = batch_size;
        /* Guard against overflow */
        if (nr_tags > (unsigned) INT_MAX + 1) {
@@ -319,7 +312,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
        pool->nr_free = nr_tags;
        pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
-                                       IDA_PCPU_SIZE * sizeof(unsigned),
+                                       pool->percpu_max_size * sizeof(unsigned),
                                       sizeof(unsigned));
        if (!pool->tag_cpu)
                goto err;
@@ -332,4 +325,65 @@ err:
        percpu_ida_destroy(pool);
        return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(percpu_ida_init);
+EXPORT_SYMBOL_GPL(__percpu_ida_init);
+/**
+ * percpu_ida_for_each_free - iterate free ids of a pool
+ * @pool: pool to iterate
+ * @fn: interate callback function
+ * @data: parameter for @fn
+ *
+ * Note, this doesn't guarantee to iterate all free ids restrictly. Some free
+ * ids might be missed, some might be iterated duplicated, and some might
+ * be iterated and not free soon.
+ */
+int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
+        void *data)
+{
+        unsigned long flags;
+        struct percpu_ida_cpu *remote;
+        unsigned cpu, i, err = 0;
+        local_irq_save(flags);
+        for_each_possible_cpu(cpu) {
+                remote = per_cpu_ptr(pool->tag_cpu, cpu);
+                spin_lock(&remote->lock);
+                for (i = 0; i < remote->nr_free; i++) {
+                        err = fn(remote->freelist[i], data);
+                        if (err)
+                                break;
+                }
+                spin_unlock(&remote->lock);
+                if (err)
+                        goto out;
+        }
+        spin_lock(&pool->lock);
+        for (i = 0; i < pool->nr_free; i++) {
+                err = fn(pool->freelist[i], data);
+                if (err)
+                        break;
+        }
+        spin_unlock(&pool->lock);
+out:
+        local_irq_restore(flags);
+        return err;
+}
+EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
+/**
+ * percpu_ida_free_tags - return free tags number of a specific cpu or global pool
+ * @pool: pool related
+ * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids
+ *
+ * Note: this just returns a snapshot of free tags number.
+ */
+unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu)
+{
+        struct percpu_ida_cpu *remote;
+        if (cpu == nr_cpu_ids)
+                return pool->nr_free;
+        remote = per_cpu_ptr(pool->tag_cpu, cpu);
+        return remote->nr_free;
+}
+EXPORT_SYMBOL_GPL(percpu_ida_free_tags);
diff --git a/lib/percpu_test.c b/lib/percpu_test.c
new file mode 100644
index 000000000000..0b5d14dadd1a
--- /dev/null
+++ b/lib/percpu_test.c
@@ -0,0 +1,138 @@
+#include <linux/module.h>
+/* validate @native and @pcp counter values match @expected */
+#define CHECK(native, pcp, expected)                                    \
+        do {                                                            \
+                WARN((native) != (expected),                            \
+                     "raw %ld (0x%lx) != expected %lld (0x%llx)",       \
+                     (native), (native),                                \
+                     (long long)(expected), (long long)(expected));     \
+                WARN(__this_cpu_read(pcp) != (expected),                \
+                     "pcp %ld (0x%lx) != expected %lld (0x%llx)",       \
+                     __this_cpu_read(pcp), __this_cpu_read(pcp),        \
+                     (long long)(expected), (long long)(expected));     \
+        } while (0)
+static DEFINE_PER_CPU(long, long_counter);
+static DEFINE_PER_CPU(unsigned long, ulong_counter);
+static int __init percpu_test_init(void)
+{
+        /*
+         * volatile prevents compiler from optimizing it uses, otherwise the
+         * +ul_one/-ul_one below would replace with inc/dec instructions.
+         */
+        volatile unsigned int ui_one = 1;
+        long l = 0;
+        unsigned long ul = 0;
+        pr_info("percpu test start\n");
+        preempt_disable();
+        l += -1;
+        __this_cpu_add(long_counter, -1);
+        CHECK(l, long_counter, -1);
+        l += 1;
+        __this_cpu_add(long_counter, 1);
+        CHECK(l, long_counter, 0);
+        ul = 0;
+        __this_cpu_write(ulong_counter, 0);
+        ul += 1UL;
+        __this_cpu_add(ulong_counter, 1UL);
+        CHECK(ul, ulong_counter, 1);
+        ul += -1UL;
+        __this_cpu_add(ulong_counter, -1UL);
+        CHECK(ul, ulong_counter, 0);
+        ul += -(unsigned long)1;
+        __this_cpu_add(ulong_counter, -(unsigned long)1);
+        CHECK(ul, ulong_counter, -1);
+        ul = 0;
+        __this_cpu_write(ulong_counter, 0);
+        ul -= 1;
+        __this_cpu_dec(ulong_counter);
+        CHECK(ul, ulong_counter, -1);
+        CHECK(ul, ulong_counter, ULONG_MAX);
+        l += -ui_one;
+        __this_cpu_add(long_counter, -ui_one);
+        CHECK(l, long_counter, 0xffffffff);
+        l += ui_one;
+        __this_cpu_add(long_counter, ui_one);
+        CHECK(l, long_counter, (long)0x100000000LL);
+        l = 0;
+        __this_cpu_write(long_counter, 0);
+        l -= ui_one;
+        __this_cpu_sub(long_counter, ui_one);
+        CHECK(l, long_counter, -1);
+        l = 0;
+        __this_cpu_write(long_counter, 0);
+        l += ui_one;
+        __this_cpu_add(long_counter, ui_one);
+        CHECK(l, long_counter, 1);
+        l += -ui_one;
+        __this_cpu_add(long_counter, -ui_one);
+        CHECK(l, long_counter, (long)0x100000000LL);
+        l = 0;
+        __this_cpu_write(long_counter, 0);
+        l -= ui_one;
+        this_cpu_sub(long_counter, ui_one);
+        CHECK(l, long_counter, -1);
+        CHECK(l, long_counter, ULONG_MAX);
+        ul = 0;
+        __this_cpu_write(ulong_counter, 0);
+        ul += ui_one;
+        __this_cpu_add(ulong_counter, ui_one);
+        CHECK(ul, ulong_counter, 1);
+        ul = 0;
+        __this_cpu_write(ulong_counter, 0);
+        ul -= ui_one;
+        __this_cpu_sub(ulong_counter, ui_one);
+        CHECK(ul, ulong_counter, -1);
+        CHECK(ul, ulong_counter, ULONG_MAX);
+        ul = 3;
+        __this_cpu_write(ulong_counter, 3);
+        ul = this_cpu_sub_return(ulong_counter, ui_one);
+        CHECK(ul, ulong_counter, 2);
+        ul = __this_cpu_sub_return(ulong_counter, ui_one);
+        CHECK(ul, ulong_counter, 1);
+        preempt_enable();
+        pr_info("percpu test done\n");
+        return -EAGAIN;  /* Fail will directly unload the module */
+}
+static void __exit percpu_test_exit(void)
+{
+}
+module_init(percpu_test_init)
+module_exit(percpu_test_exit)
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Greg Thelen");
+MODULE_DESCRIPTION("percpu operations test");
diff --git a/lib/random32.c b/lib/random32.c
index 52280d5526be..1e5b2df44291 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -2,19 +2,19 @@
  This is a maximally equidistributed combined Tausworthe generator
  based on code from GNU Scientific Library 1.5 (30 Jun 2004)
-   x_n = (s1_n ^ s2_n ^ s3_n)
+  lfsr113 version:
-   s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
+   x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n)
-   s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
-   s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
-   The period of this generator is about 2^88.
+   s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n <<  6) ^ s1_n) >> 13))
+   s2_{n+1} = (((s2_n & 4294967288) <<  2) ^ (((s2_n <<  2) ^ s2_n) >> 27))
+   s3_{n+1} = (((s3_n & 4294967280) <<  7) ^ (((s3_n << 13) ^ s3_n) >> 21))
+   s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n <<  3) ^ s4_n) >> 12))
-   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+   The period of this generator is about 2^113 (see erratum paper).
-   Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
-   This is available on the net from L'Ecuyer's home page,
+   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+   Generators", Mathematics of Computation, 65, 213 (1996), 203--213:
   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
   ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
@@ -29,7 +29,7 @@
        that paper.)
   This affects the seeding procedure by imposing the requirement
-   s1 > 1, s2 > 7, s3 > 15.
+   s1 > 1, s2 > 7, s3 > 15, s4 > 127.
 */
@@ -38,6 +38,11 @@
 #include <linux/export.h>
 #include <linux/jiffies.h>
 #include <linux/random.h>
+#include <linux/sched.h>
+#ifdef CONFIG_RANDOM32_SELFTEST
+static void __init prandom_state_selftest(void);
+#endif
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
@@ -52,11 +57,12 @@ u32 prandom_u32_state(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
-        state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
+        state->s1 = TAUSWORTHE(state->s1,  6U, 13U, 4294967294U, 18U);
-        state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
+        state->s2 = TAUSWORTHE(state->s2,  2U, 27U, 4294967288U,  2U);
-        state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+        state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U,  7U);
+        state->s4 = TAUSWORTHE(state->s4,  3U, 12U, 4294967168U, 13U);
-        return (state->s1 ^ state->s2 ^ state->s3);
+        return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4);
 }
 EXPORT_SYMBOL(prandom_u32_state);
@@ -126,6 +132,38 @@ void prandom_bytes(void *buf, int bytes)
 }
 EXPORT_SYMBOL(prandom_bytes);
+static void prandom_warmup(struct rnd_state *state)
+{
+        /* Calling RNG ten times to satify recurrence condition */
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+        prandom_u32_state(state);
+}
+static void prandom_seed_very_weak(struct rnd_state *state, u32 seed)
+{
+        /* Note: This sort of seeding is ONLY used in test cases and
+         * during boot at the time from core_initcall until late_initcall
+         * as we don't have a stronger entropy source available yet.
+         * After late_initcall, we reseed entire state, we have to (!),
+         * otherwise an attacker just needs to search 32 bit space to
+         * probe for our internal 128 bit state if he knows a couple
+         * of prandom32 outputs!
+         */
+#define LCG(x)  ((x) * 69069U)  /* super-duper LCG */
+        state->s1 = __seed(LCG(seed),        2U);
+        state->s2 = __seed(LCG(state->s1),   8U);
+        state->s3 = __seed(LCG(state->s2),  16U);
+        state->s4 = __seed(LCG(state->s3), 128U);
+}
 /**
 *      prandom_seed - add entropy to pseudo random number generator
 *      @seed: seed value
@@ -141,7 +179,9 @@ void prandom_seed(u32 entropy)
         */
        for_each_possible_cpu (i) {
                struct rnd_state *state = &per_cpu(net_rand_state, i);
-                state->s1 = __seed(state->s1 ^ entropy, 1);
+                state->s1 = __seed(state->s1 ^ entropy, 2U);
+                prandom_warmup(state);
        }
 }
 EXPORT_SYMBOL(prandom_seed);
@@ -154,46 +194,249 @@ static int __init prandom_init(void)
 {
        int i;
+#ifdef CONFIG_RANDOM32_SELFTEST
+        prandom_state_selftest();
+#endif
        for_each_possible_cpu(i) {
                struct rnd_state *state = &per_cpu(net_rand_state,i);
-#define LCG(x)  ((x) * 69069)   /* super-duper LCG */
+                prandom_seed_very_weak(state, (i + jiffies) ^ random_get_entropy());
-                state->s1 = __seed(LCG(i + jiffies), 1);
+                prandom_warmup(state);
-                state->s2 = __seed(LCG(state->s1), 7);
-                state->s3 = __seed(LCG(state->s2), 15);
-                /* "warm it up" */
-                prandom_u32_state(state);
-                prandom_u32_state(state);
-                prandom_u32_state(state);
-                prandom_u32_state(state);
-                prandom_u32_state(state);
-                prandom_u32_state(state);
        }
        return 0;
 }
 core_initcall(prandom_init);
+static void __prandom_timer(unsigned long dontcare);
+static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0);
+static void __prandom_timer(unsigned long dontcare)
+{
+        u32 entropy;
+        unsigned long expires;
+        get_random_bytes(&entropy, sizeof(entropy));
+        prandom_seed(entropy);
+        /* reseed every ~60 seconds, in [40 .. 80) interval with slack */
+        expires = 40 + (prandom_u32() % 40);
+        seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
+        add_timer(&seed_timer);
+}
+static void __init __prandom_start_seed_timer(void)
+{
+        set_timer_slack(&seed_timer, HZ);
+        seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
+        add_timer(&seed_timer);
+}
 /*
 *      Generate better values after random number generator
 *      is fully initialized.
 */
-static int __init prandom_reseed(void)
+static void __prandom_reseed(bool late)
 {
        int i;
+        unsigned long flags;
+        static bool latch = false;
+        static DEFINE_SPINLOCK(lock);
+        /* only allow initial seeding (late == false) once */
+        spin_lock_irqsave(&lock, flags);
+        if (latch && !late)
+                goto out;
+        latch = true;
        for_each_possible_cpu(i) {
                struct rnd_state *state = &per_cpu(net_rand_state,i);
-                u32 seeds[3];
+                u32 seeds[4];
                get_random_bytes(&seeds, sizeof(seeds));
-                state->s1 = __seed(seeds[0], 1);
+                state->s1 = __seed(seeds[0],   2U);
-                state->s2 = __seed(seeds[1], 7);
+                state->s2 = __seed(seeds[1],   8U);
-                state->s3 = __seed(seeds[2], 15);
+                state->s3 = __seed(seeds[2],  16U);
+                state->s4 = __seed(seeds[3], 128U);
-                /* mix it in */
+                prandom_warmup(state);
-                prandom_u32_state(state);
        }
+out:
+        spin_unlock_irqrestore(&lock, flags);
+}
+void prandom_reseed_late(void)
+{
+        __prandom_reseed(true);
+}
+static int __init prandom_reseed(void)
+{
+        __prandom_reseed(false);
+        __prandom_start_seed_timer();
        return 0;
 }
 late_initcall(prandom_reseed);
+#ifdef CONFIG_RANDOM32_SELFTEST
+static struct prandom_test1 {
+        u32 seed;
+        u32 result;
+} test1[] = {
+        { 1U, 3484351685U },
+        { 2U, 2623130059U },
+        { 3U, 3125133893U },
+        { 4U,  984847254U },
+};
+static struct prandom_test2 {
+        u32 seed;
+        u32 iteration;
+        u32 result;
+} test2[] = {
+        /* Test cases against taus113 from GSL library. */
+        {  931557656U, 959U, 2975593782U },
+        { 1339693295U, 876U, 3887776532U },
+        { 1545556285U, 961U, 1615538833U },
+        {  601730776U, 723U, 1776162651U },
+        { 1027516047U, 687U,  511983079U },
+        {  416526298U, 700U,  916156552U },
+        { 1395522032U, 652U, 2222063676U },
+        {  366221443U, 617U, 2992857763U },
+        { 1539836965U, 714U, 3783265725U },
+        {  556206671U, 994U,  799626459U },
+        {  684907218U, 799U,  367789491U },
+        { 2121230701U, 931U, 2115467001U },
+        { 1668516451U, 644U, 3620590685U },
+        {  768046066U, 883U, 2034077390U },
+        { 1989159136U, 833U, 1195767305U },
+        {  536585145U, 996U, 3577259204U },
+        { 1008129373U, 642U, 1478080776U },
+        { 1740775604U, 939U, 1264980372U },
+        { 1967883163U, 508U,   10734624U },
+        { 1923019697U, 730U, 3821419629U },
+        {  442079932U, 560U, 3440032343U },
+        { 1961302714U, 845U,  841962572U },
+        { 2030205964U, 962U, 1325144227U },
+        { 1160407529U, 507U,  240940858U },
+        {  635482502U, 779U, 4200489746U },
+        { 1252788931U, 699U,  867195434U },
+        { 1961817131U, 719U,  668237657U },
+        { 1071468216U, 983U,  917876630U },
+        { 1281848367U, 932U, 1003100039U },
+        {  582537119U, 780U, 1127273778U },
+        { 1973672777U, 853U, 1071368872U },
+        { 1896756996U, 762U, 1127851055U },
+        {  847917054U, 500U, 1717499075U },
+        { 1240520510U, 951U, 2849576657U },
+        { 1685071682U, 567U, 1961810396U },
+        { 1516232129U, 557U,    3173877U },
+        { 1208118903U, 612U, 1613145022U },
+        { 1817269927U, 693U, 4279122573U },
+        { 1510091701U, 717U,  638191229U },
+        {  365916850U, 807U,  600424314U },
+        {  399324359U, 702U, 1803598116U },
+        { 1318480274U, 779U, 2074237022U },
+        {  697758115U, 840U, 1483639402U },
+        { 1696507773U, 840U,  577415447U },
+        { 2081979121U, 981U, 3041486449U },
+        {  955646687U, 742U, 3846494357U },
+        { 1250683506U, 749U,  836419859U },
+        {  595003102U, 534U,  366794109U },
+        {   47485338U, 558U, 3521120834U },
+        {  619433479U, 610U, 3991783875U },
+        {  704096520U, 518U, 4139493852U },
+        { 1712224984U, 606U, 2393312003U },
+        { 1318233152U, 922U, 3880361134U },
+        {  855572992U, 761U, 1472974787U },
+        {   64721421U, 703U,  683860550U },
+        {  678931758U, 840U,  380616043U },
+        {  692711973U, 778U, 1382361947U },
+        {  677703619U, 530U, 2826914161U },
+        {   92393223U, 586U, 1522128471U },
+        { 1222592920U, 743U, 3466726667U },
+        {  358288986U, 695U, 1091956998U },
+        { 1935056945U, 958U,  514864477U },
+        {  735675993U, 990U, 1294239989U },
+        { 1560089402U, 897U, 2238551287U },
+        {   70616361U, 829U,   22483098U },
+        {  368234700U, 731U, 2913875084U },
+        {   20221190U, 879U, 1564152970U },
+        {  539444654U, 682U, 1835141259U },
+        { 1314987297U, 840U, 1801114136U },
+        { 2019295544U, 645U, 3286438930U },
+        {  469023838U, 716U, 1637918202U },
+        { 1843754496U, 653U, 2562092152U },
+        {  400672036U, 809U, 4264212785U },
+        {  404722249U, 965U, 2704116999U },
+        {  600702209U, 758U,  584979986U },
+        {  519953954U, 667U, 2574436237U },
+        { 1658071126U, 694U, 2214569490U },
+        {  420480037U, 749U, 3430010866U },
+        {  690103647U, 969U, 3700758083U },
+        { 1029424799U, 937U, 3787746841U },
+        { 2012608669U, 506U, 3362628973U },
+        { 1535432887U, 998U,   42610943U },
+        { 1330635533U, 857U, 3040806504U },
+        { 1223800550U, 539U, 3954229517U },
+        { 1322411537U, 680U, 3223250324U },
+        { 1877847898U, 945U, 2915147143U },
+        { 1646356099U, 874U,  965988280U },
+        {  805687536U, 744U, 4032277920U },
+        { 1948093210U, 633U, 1346597684U },
+        {  392609744U, 783U, 1636083295U },
+        {  690241304U, 770U, 1201031298U },
+        { 1360302965U, 696U, 1665394461U },
+        { 1220090946U, 780U, 1316922812U },
+        {  447092251U, 500U, 3438743375U },
+        { 1613868791U, 592U,  828546883U },
+        {  523430951U, 548U, 2552392304U },
+        {  726692899U, 810U, 1656872867U },
+        { 1364340021U, 836U, 3710513486U },
+        { 1986257729U, 931U,  935013962U },
+        {  407983964U, 921U,  728767059U },
+};
+static void __init prandom_state_selftest(void)
+{
+        int i, j, errors = 0, runs = 0;
+        bool error = false;
+        for (i = 0; i < ARRAY_SIZE(test1); i++) {
+                struct rnd_state state;
+                prandom_seed_very_weak(&state, test1[i].seed);
+                prandom_warmup(&state);
+                if (test1[i].result != prandom_u32_state(&state))
+                        error = true;
+        }
+        if (error)
+                pr_warn("prandom: seed boundary self test failed\n");
+        else
+                pr_info("prandom: seed boundary self test passed\n");
+        for (i = 0; i < ARRAY_SIZE(test2); i++) {
+                struct rnd_state state;
+                prandom_seed_very_weak(&state, test2[i].seed);
+                prandom_warmup(&state);
+                for (j = 0; j < test2[i].iteration - 1; j++)
+                        prandom_u32_state(&state);
+                if (test2[i].result != prandom_u32_state(&state))
+                        errors++;
+                runs++;
+                cond_resched();
+        }
+        if (errors)
+                pr_warn("prandom: %d/%d self tests failed\n", errors, runs);
+        else
+                pr_info("prandom: %d self tests passed\n", runs);
+}
+#endif
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
deleted file mode 100644
index 9be8a9144978..000000000000
--- a/lib/rwsem-spinlock.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
- * generic spinlock implementation
- *
- * Copyright (c) 2001   David Howells (dhowells@redhat.com).
- * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
- * - Derived also from comments by Linus
- */
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/export.h>
-enum rwsem_waiter_type {
-        RWSEM_WAITING_FOR_WRITE,
-        RWSEM_WAITING_FOR_READ
-};
-struct rwsem_waiter {
-        struct list_head list;
-        struct task_struct *task;
-        enum rwsem_waiter_type type;
-};
-int rwsem_is_locked(struct rw_semaphore *sem)
-{
-        int ret = 1;
-        unsigned long flags;
-        if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
-                ret = (sem->activity != 0);
-                raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-        }
-        return ret;
-}
-EXPORT_SYMBOL(rwsem_is_locked);
-/*
- * initialise the semaphore
- */
-void __init_rwsem(struct rw_semaphore *sem, const char *name,
-                  struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-        /*
-         * Make sure we are not reinitializing a held semaphore:
-         */
-        debug_check_no_locks_freed((void *)sem, sizeof(*sem));
-        lockdep_init_map(&sem->dep_map, name, key, 0);
-#endif
-        sem->activity = 0;
-        raw_spin_lock_init(&sem->wait_lock);
-        INIT_LIST_HEAD(&sem->wait_list);
-}
-EXPORT_SYMBOL(__init_rwsem);
-/*
- * handle the lock release when processes blocked on it that can now run
- * - if we come here, then:
- *   - the 'active count' _reached_ zero
- *   - the 'waiting count' is non-zero
- * - the spinlock must be held by the caller
- * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if wakewrite is non-zero
- */
-static inline struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
-{
-        struct rwsem_waiter *waiter;
-        struct task_struct *tsk;
-        int woken;
-        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-        if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-                if (wakewrite)
-                        /* Wake up a writer. Note that we do not grant it the
-                         * lock - it will have to acquire it when it runs. */
-                        wake_up_process(waiter->task);
-                goto out;
-        }
-        /* grant an infinite number of read locks to the front of the queue */
-        woken = 0;
-        do {
-                struct list_head *next = waiter->list.next;
-                list_del(&waiter->list);
-                tsk = waiter->task;
-                smp_mb();
-                waiter->task = NULL;
-                wake_up_process(tsk);
-                put_task_struct(tsk);
-                woken++;
-                if (next == &sem->wait_list)
-                        break;
-                waiter = list_entry(next, struct rwsem_waiter, list);
-        } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-        sem->activity += woken;
- out:
-        return sem;
-}
-/*
- * wake a single writer
- */
-static inline struct rw_semaphore *
-__rwsem_wake_one_writer(struct rw_semaphore *sem)
-{
-        struct rwsem_waiter *waiter;
-        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-        wake_up_process(waiter->task);
-        return sem;
-}
-/*
- * get a read lock on the semaphore
- */
-void __sched __down_read(struct rw_semaphore *sem)
-{
-        struct rwsem_waiter waiter;
-        struct task_struct *tsk;
-        unsigned long flags;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
-                /* granted */
-                sem->activity++;
-                raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-                goto out;
-        }
-        tsk = current;
-        set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-        /* set up my own style of waitqueue */
-        waiter.task = tsk;
-        waiter.type = RWSEM_WAITING_FOR_READ;
-        get_task_struct(tsk);
-        list_add_tail(&waiter.list, &sem->wait_list);
-        /* we don't need to touch the semaphore struct anymore */
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-        /* wait to be given the lock */
-        for (;;) {
-                if (!waiter.task)
-                        break;
-                schedule();
-                set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-        }
-        tsk->state = TASK_RUNNING;
- out:
-        ;
-}
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-int __down_read_trylock(struct rw_semaphore *sem)
-{
-        unsigned long flags;
-        int ret = 0;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
-                /* granted */
-                sem->activity++;
-                ret = 1;
-        }
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-        return ret;
-}
-/*
- * get a write lock on the semaphore
- */
-void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
-        struct rwsem_waiter waiter;
-        struct task_struct *tsk;
-        unsigned long flags;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        /* set up my own style of waitqueue */
-        tsk = current;
-        waiter.task = tsk;
-        waiter.type = RWSEM_WAITING_FOR_WRITE;
-        list_add_tail(&waiter.list, &sem->wait_list);
-        /* wait for someone to release the lock */
-        for (;;) {
-                /*
-                 * That is the key to support write lock stealing: allows the
-                 * task already on CPU to get the lock soon rather than put
-                 * itself into sleep and waiting for system woke it or someone
-                 * else in the head of the wait list up.
-                 */
-                if (sem->activity == 0)
-                        break;
-                set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-                raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-                schedule();
-                raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        }
-        /* got the lock */
-        sem->activity = -1;
-        list_del(&waiter.list);
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-void __sched __down_write(struct rw_semaphore *sem)
-{
-        __down_write_nested(sem, 0);
-}
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-int __down_write_trylock(struct rw_semaphore *sem)
-{
-        unsigned long flags;
-        int ret = 0;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        if (sem->activity == 0) {
-                /* got the lock */
-                sem->activity = -1;
-                ret = 1;
-        }
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-        return ret;
-}
-/*
- * release a read lock on the semaphore
- */
-void __up_read(struct rw_semaphore *sem)
-{
-        unsigned long flags;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        if (--sem->activity == 0 && !list_empty(&sem->wait_list))
-                sem = __rwsem_wake_one_writer(sem);
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-/*
- * release a write lock on the semaphore
- */
-void __up_write(struct rw_semaphore *sem)
-{
-        unsigned long flags;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        sem->activity = 0;
-        if (!list_empty(&sem->wait_list))
-                sem = __rwsem_do_wake(sem, 1);
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-/*
- * downgrade a write lock into a read lock
- * - just wake up any readers at the front of the queue
- */
-void __downgrade_write(struct rw_semaphore *sem)
-{
-        unsigned long flags;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        sem->activity = 1;
-        if (!list_empty(&sem->wait_list))
-                sem = __rwsem_do_wake(sem, 0);
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
diff --git a/lib/rwsem.c b/lib/rwsem.c
deleted file mode 100644
index 19c5fa95e0b4..000000000000
--- a/lib/rwsem.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/* rwsem.c: R/W semaphores: contention handling functions
- *
- * Written by David Howells (dhowells@redhat.com).
- * Derived from arch/i386/kernel/semaphore.c
- *
- * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
- * and Michel Lespinasse <walken@google.com>
- */
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/export.h>
-/*
- * Initialize an rwsem:
- */
-void __init_rwsem(struct rw_semaphore *sem, const char *name,
-                  struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-        /*
-         * Make sure we are not reinitializing a held semaphore:
-         */
-        debug_check_no_locks_freed((void *)sem, sizeof(*sem));
-        lockdep_init_map(&sem->dep_map, name, key, 0);
-#endif
-        sem->count = RWSEM_UNLOCKED_VALUE;
-        raw_spin_lock_init(&sem->wait_lock);
-        INIT_LIST_HEAD(&sem->wait_list);
-}
-EXPORT_SYMBOL(__init_rwsem);
-enum rwsem_waiter_type {
-        RWSEM_WAITING_FOR_WRITE,
-        RWSEM_WAITING_FOR_READ
-};
-struct rwsem_waiter {
-        struct list_head list;
-        struct task_struct *task;
-        enum rwsem_waiter_type type;
-};
-enum rwsem_wake_type {
-        RWSEM_WAKE_ANY,         /* Wake whatever's at head of wait list */
-        RWSEM_WAKE_READERS,     /* Wake readers only */
-        RWSEM_WAKE_READ_OWNED   /* Waker thread holds the read lock */
-};
-/*
- * handle the lock release when processes blocked on it that can now run
- * - if we come here from up_xxxx(), then:
- *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
- *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
- * - there must be someone on the queue
- * - the spinlock must be held by the caller
- * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if downgrading is false
- */
-static struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
-{
-        struct rwsem_waiter *waiter;
-        struct task_struct *tsk;
-        struct list_head *next;
-        long oldcount, woken, loop, adjustment;
-        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-        if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-                if (wake_type == RWSEM_WAKE_ANY)
-                        /* Wake writer at the front of the queue, but do not
-                         * grant it the lock yet as we want other writers
-                         * to be able to steal it.  Readers, on the other hand,
-                         * will block as they will notice the queued writer.
-                         */
-                        wake_up_process(waiter->task);
-                goto out;
-        }
-        /* Writers might steal the lock before we grant it to the next reader.
-         * We prefer to do the first reader grant before counting readers
-         * so we can bail out early if a writer stole the lock.
-         */
-        adjustment = 0;
-        if (wake_type != RWSEM_WAKE_READ_OWNED) {
-                adjustment = RWSEM_ACTIVE_READ_BIAS;
- try_reader_grant:
-                oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
-                if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
-                        /* A writer stole the lock. Undo our reader grant. */
-                        if (rwsem_atomic_update(-adjustment, sem) &
-                                                RWSEM_ACTIVE_MASK)
-                                goto out;
-                        /* Last active locker left. Retry waking readers. */
-                        goto try_reader_grant;
-                }
-        }
-        /* Grant an infinite number of read locks to the readers at the front
-         * of the queue.  Note we increment the 'active part' of the count by
-         * the number of readers before waking any processes up.
-         */
-        woken = 0;
-        do {
-                woken++;
-                if (waiter->list.next == &sem->wait_list)
-                        break;
-                waiter = list_entry(waiter->list.next,
-                                        struct rwsem_waiter, list);
-        } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-        adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
-        if (waiter->type != RWSEM_WAITING_FOR_WRITE)
-                /* hit end of list above */
-                adjustment -= RWSEM_WAITING_BIAS;
-        if (adjustment)
-                rwsem_atomic_add(adjustment, sem);
-        next = sem->wait_list.next;
-        loop = woken;
-        do {
-                waiter = list_entry(next, struct rwsem_waiter, list);
-                next = waiter->list.next;
-                tsk = waiter->task;
-                smp_mb();
-                waiter->task = NULL;
-                wake_up_process(tsk);
-                put_task_struct(tsk);
-        } while (--loop);
-        sem->wait_list.next = next;
-        next->prev = &sem->wait_list;
- out:
-        return sem;
-}
-/*
- * wait for the read lock to be granted
- */
-struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
-{
-        long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
-        struct rwsem_waiter waiter;
-        struct task_struct *tsk = current;
-        /* set up my own style of waitqueue */
-        waiter.task = tsk;
-        waiter.type = RWSEM_WAITING_FOR_READ;
-        get_task_struct(tsk);
-        raw_spin_lock_irq(&sem->wait_lock);
-        if (list_empty(&sem->wait_list))
-                adjustment += RWSEM_WAITING_BIAS;
-        list_add_tail(&waiter.list, &sem->wait_list);
-        /* we're now waiting on the lock, but no longer actively locking */
-        count = rwsem_atomic_update(adjustment, sem);
-        /* If there are no active locks, wake the front queued process(es).
-         *
-         * If there are no writers and we are first in the queue,
-         * wake our own waiter to join the existing active readers !
-         */
-        if (count == RWSEM_WAITING_BIAS ||
-            (count > RWSEM_WAITING_BIAS &&
-             adjustment != -RWSEM_ACTIVE_READ_BIAS))
-                sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
-        raw_spin_unlock_irq(&sem->wait_lock);
-        /* wait to be given the lock */
-        while (true) {
-                set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-                if (!waiter.task)
-                        break;
-                schedule();
-        }
-        tsk->state = TASK_RUNNING;
-        return sem;
-}
-/*
- * wait until we successfully acquire the write lock
- */
-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
-{
-        long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
-        struct rwsem_waiter waiter;
-        struct task_struct *tsk = current;
-        /* set up my own style of waitqueue */
-        waiter.task = tsk;
-        waiter.type = RWSEM_WAITING_FOR_WRITE;
-        raw_spin_lock_irq(&sem->wait_lock);
-        if (list_empty(&sem->wait_list))
-                adjustment += RWSEM_WAITING_BIAS;
-        list_add_tail(&waiter.list, &sem->wait_list);
-        /* we're now waiting on the lock, but no longer actively locking */
-        count = rwsem_atomic_update(adjustment, sem);
-        /* If there were already threads queued before us and there are no
-         * active writers, the lock must be read owned; so we try to wake
-         * any read locks that were queued ahead of us. */
-        if (count > RWSEM_WAITING_BIAS &&
-            adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
-                sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
-        /* wait until we successfully acquire the lock */
-        set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-        while (true) {
-                if (!(count & RWSEM_ACTIVE_MASK)) {
-                        /* Try acquiring the write lock. */
-                        count = RWSEM_ACTIVE_WRITE_BIAS;
-                        if (!list_is_singular(&sem->wait_list))
-                                count += RWSEM_WAITING_BIAS;
-                        if (sem->count == RWSEM_WAITING_BIAS &&
-                            cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
-                                                        RWSEM_WAITING_BIAS)
-                                break;
-                }
-                raw_spin_unlock_irq(&sem->wait_lock);
-                /* Block until there are no active lockers. */
-                do {
-                        schedule();
-                        set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-                } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
-                raw_spin_lock_irq(&sem->wait_lock);
-        }
-        list_del(&waiter.list);
-        raw_spin_unlock_irq(&sem->wait_lock);
-        tsk->state = TASK_RUNNING;
-        return sem;
-}
-/*
- * handle waking up a waiter on the semaphore
- * - up_read/up_write has decremented the active part of count if we come here
- */
-struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
-{
-        unsigned long flags;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        /* do nothing if list empty */
-        if (!list_empty(&sem->wait_list))
-                sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-        return sem;
-}
-/*
- * downgrade a write lock into a read lock
- * - caller incremented waiting part of count and discovered it still negative
- * - just wake up any readers at the front of the queue
- */
-struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
-{
-        unsigned long flags;
-        raw_spin_lock_irqsave(&sem->wait_lock, flags);
-        /* do nothing if list empty */
-        if (!list_empty(&sem->wait_list))
-                sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
-        raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-        return sem;
-}
-EXPORT_SYMBOL(rwsem_down_read_failed);
-EXPORT_SYMBOL(rwsem_down_write_failed);
-EXPORT_SYMBOL(rwsem_wake);
-EXPORT_SYMBOL(rwsem_downgrade_wake);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a685c8a79578..d16fa295ae1d 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -577,7 +577,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
                miter->__offset += miter->consumed;
                miter->__remaining -= miter->consumed;
-                if (miter->__flags & SG_MITER_TO_SG)
+                if ((miter->__flags & SG_MITER_TO_SG) &&
+                    !PageSlab(miter->page))
                        flush_kernel_dcache_page(miter->page);
                if (miter->__flags & SG_MITER_ATOMIC) {
diff --git a/lib/show_mem.c b/lib/show_mem.c
index b7c72311ad0c..5847a4921b8e 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -12,8 +12,7 @@
 void show_mem(unsigned int filter)
 {
        pg_data_t *pgdat;
-        unsigned long total = 0, reserved = 0, shared = 0,
+        unsigned long total = 0, reserved = 0, highmem = 0;
-                nonshared = 0, highmem = 0;
        printk("Mem-Info:\n");
        show_free_areas(filter);
@@ -22,43 +21,27 @@ void show_mem(unsigned int filter)
                return;
        for_each_online_pgdat(pgdat) {
-                unsigned long i, flags;
+                unsigned long flags;
+                int zoneid;
                pgdat_resize_lock(pgdat, &flags);
-                for (i = 0; i < pgdat->node_spanned_pages; i++) {
+                for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
-                        struct page *page;
+                        struct zone *zone = &pgdat->node_zones[zoneid];
-                        unsigned long pfn = pgdat->node_start_pfn + i;
+                        if (!populated_zone(zone))
-                        if (unlikely(!(i % MAX_ORDER_NR_PAGES)))
-                                touch_nmi_watchdog();
-                        if (!pfn_valid(pfn))
                                continue;
-                        page = pfn_to_page(pfn);
+                        total += zone->present_pages;
+                        reserved = zone->present_pages - zone->managed_pages;
-                        if (PageHighMem(page))
-                                highmem++;
-                        if (PageReserved(page))
+                        if (is_highmem_idx(zoneid))
-                                reserved++;
+                                highmem += zone->present_pages;
-                        else if (page_count(page) == 1)
-                                nonshared++;
-                        else if (page_count(page) > 1)
-                                shared += page_count(page) - 1;
-                        total++;
                }
                pgdat_resize_unlock(pgdat, &flags);
        }
        printk("%lu pages RAM\n", total);
-#ifdef CONFIG_HIGHMEM
+        printk("%lu pages HighMem/MovableOnly\n", highmem);
-        printk("%lu pages HighMem\n", highmem);
-#endif
        printk("%lu pages reserved\n", reserved);
-        printk("%lu pages shared\n", shared);
-        printk("%lu pages non-shared\n", nonshared);
 #ifdef CONFIG_QUICKLIST
        printk("%lu pages in pagetable cache\n",
                quicklist_total_size());
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 4c0d0e51d49e..04abe53f12a1 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -9,10 +9,9 @@
 notrace unsigned int debug_smp_processor_id(void)
 {
-        unsigned long preempt_count = preempt_count();
        int this_cpu = raw_smp_processor_id();
-        if (likely(preempt_count))
+        if (likely(preempt_count()))
                goto out;
        if (irqs_disabled())
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
deleted file mode 100644
index 0374a596cffa..000000000000
--- a/lib/spinlock_debug.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright 2005, Red Hat, Inc., Ingo Molnar
- * Released under the General Public License (GPL).
- *
- * This file contains the spinlock/rwlock implementations for
- * DEBUG_SPINLOCK.
- */
-#include <linux/spinlock.h>
-#include <linux/nmi.h>
-#include <linux/interrupt.h>
-#include <linux/debug_locks.h>
-#include <linux/delay.h>
-#include <linux/export.h>
-void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
-                          struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-        /*
-         * Make sure we are not reinitializing a held lock:
-         */
-        debug_check_no_locks_freed((void *)lock, sizeof(*lock));
-        lockdep_init_map(&lock->dep_map, name, key, 0);
-#endif
-        lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
-        lock->magic = SPINLOCK_MAGIC;
-        lock->owner = SPINLOCK_OWNER_INIT;
-        lock->owner_cpu = -1;
-}
-EXPORT_SYMBOL(__raw_spin_lock_init);
-void __rwlock_init(rwlock_t *lock, const char *name,
-                   struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-        /*
-         * Make sure we are not reinitializing a held lock:
-         */
-        debug_check_no_locks_freed((void *)lock, sizeof(*lock));
-        lockdep_init_map(&lock->dep_map, name, key, 0);
-#endif
-        lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
-        lock->magic = RWLOCK_MAGIC;
-        lock->owner = SPINLOCK_OWNER_INIT;
-        lock->owner_cpu = -1;
-}
-EXPORT_SYMBOL(__rwlock_init);
-static void spin_dump(raw_spinlock_t *lock, const char *msg)
-{
-        struct task_struct *owner = NULL;
-        if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
-                owner = lock->owner;
-        printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
-                msg, raw_smp_processor_id(),
-                current->comm, task_pid_nr(current));
-        printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
-                        ".owner_cpu: %d\n",
-                lock, lock->magic,
-                owner ? owner->comm : "<none>",
-                owner ? task_pid_nr(owner) : -1,
-                lock->owner_cpu);
-        dump_stack();
-}
-static void spin_bug(raw_spinlock_t *lock, const char *msg)
-{
-        if (!debug_locks_off())
-                return;
-        spin_dump(lock, msg);
-}
-#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
-static inline void
-debug_spin_lock_before(raw_spinlock_t *lock)
-{
-        SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-        SPIN_BUG_ON(lock->owner == current, lock, "recursion");
-        SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
-                                                        lock, "cpu recursion");
-}
-static inline void debug_spin_lock_after(raw_spinlock_t *lock)
-{
-        lock->owner_cpu = raw_smp_processor_id();
-        lock->owner = current;
-}
-static inline void debug_spin_unlock(raw_spinlock_t *lock)
-{
-        SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-        SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
-        SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
-        SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
-                                                        lock, "wrong CPU");
-        lock->owner = SPINLOCK_OWNER_INIT;
-        lock->owner_cpu = -1;
-}
-static void __spin_lock_debug(raw_spinlock_t *lock)
-{
-        u64 i;
-        u64 loops = loops_per_jiffy * HZ;
-        for (i = 0; i < loops; i++) {
-                if (arch_spin_trylock(&lock->raw_lock))
-                        return;
-                __delay(1);
-        }
-        /* lockup suspected: */
-        spin_dump(lock, "lockup suspected");
-#ifdef CONFIG_SMP
-        trigger_all_cpu_backtrace();
-#endif
-        /*
-         * The trylock above was causing a livelock.  Give the lower level arch
-         * specific lock code a chance to acquire the lock. We have already
-         * printed a warning/backtrace at this point. The non-debug arch
-         * specific code might actually succeed in acquiring the lock.  If it is
-         * not successful, the end-result is the same - there is no forward
-         * progress.
-         */
-        arch_spin_lock(&lock->raw_lock);
-}
-void do_raw_spin_lock(raw_spinlock_t *lock)
-{
-        debug_spin_lock_before(lock);
-        if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
-                __spin_lock_debug(lock);
-        debug_spin_lock_after(lock);
-}
-int do_raw_spin_trylock(raw_spinlock_t *lock)
-{
-        int ret = arch_spin_trylock(&lock->raw_lock);
-        if (ret)
-                debug_spin_lock_after(lock);
-#ifndef CONFIG_SMP
-        /*
-         * Must not happen on UP:
-         */
-        SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
-#endif
-        return ret;
-}
-void do_raw_spin_unlock(raw_spinlock_t *lock)
-{
-        debug_spin_unlock(lock);
-        arch_spin_unlock(&lock->raw_lock);
-}
-static void rwlock_bug(rwlock_t *lock, const char *msg)
-{
-        if (!debug_locks_off())
-                return;
-        printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
-                msg, raw_smp_processor_id(), current->comm,
-                task_pid_nr(current), lock);
-        dump_stack();
-}
-#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
-#if 0           /* __write_lock_debug() can lock up - maybe this can too? */
-static void __read_lock_debug(rwlock_t *lock)
-{
-        u64 i;
-        u64 loops = loops_per_jiffy * HZ;
-        int print_once = 1;
-        for (;;) {
-                for (i = 0; i < loops; i++) {
-                        if (arch_read_trylock(&lock->raw_lock))
-                                return;
-                        __delay(1);
-                }
-                /* lockup suspected: */
-                if (print_once) {
-                        print_once = 0;
-                        printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, "
-                                        "%s/%d, %p\n",
-                                raw_smp_processor_id(), current->comm,
-                                current->pid, lock);
-                        dump_stack();
-                }
-        }
-}
-#endif
-void do_raw_read_lock(rwlock_t *lock)
-{
-        RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-        arch_read_lock(&lock->raw_lock);
-}
-int do_raw_read_trylock(rwlock_t *lock)
-{
-        int ret = arch_read_trylock(&lock->raw_lock);
-#ifndef CONFIG_SMP
-        /*
-         * Must not happen on UP:
-         */
-        RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
-#endif
-        return ret;
-}
-void do_raw_read_unlock(rwlock_t *lock)
-{
-        RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-        arch_read_unlock(&lock->raw_lock);
-}
-static inline void debug_write_lock_before(rwlock_t *lock)
-{
-        RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-        RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
-        RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
-                                                        lock, "cpu recursion");
-}
-static inline void debug_write_lock_after(rwlock_t *lock)
-{
-        lock->owner_cpu = raw_smp_processor_id();
-        lock->owner = current;
-}
-static inline void debug_write_unlock(rwlock_t *lock)
-{
-        RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-        RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
-        RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
-                                                        lock, "wrong CPU");
-        lock->owner = SPINLOCK_OWNER_INIT;
-        lock->owner_cpu = -1;
-}
-#if 0           /* This can cause lockups */
-static void __write_lock_debug(rwlock_t *lock)
-{
-        u64 i;
-        u64 loops = loops_per_jiffy * HZ;
-        int print_once = 1;
-        for (;;) {
-                for (i = 0; i < loops; i++) {
-                        if (arch_write_trylock(&lock->raw_lock))
-                                return;
-                        __delay(1);
-                }
-                /* lockup suspected: */
-                if (print_once) {
-                        print_once = 0;
-                        printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, "
-                                        "%s/%d, %p\n",
-                                raw_smp_processor_id(), current->comm,
-                                current->pid, lock);
-                        dump_stack();
-                }
-        }
-}
-#endif
-void do_raw_write_lock(rwlock_t *lock)
-{
-        debug_write_lock_before(lock);
-        arch_write_lock(&lock->raw_lock);
-        debug_write_lock_after(lock);
-}
-int do_raw_write_trylock(rwlock_t *lock)
-{
-        int ret = arch_write_trylock(&lock->raw_lock);
-        if (ret)
-                debug_write_lock_after(lock);
-#ifndef CONFIG_SMP
-        /*
-         * Must not happen on UP:
-         */
-        RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
-#endif
-        return ret;
-}
-void do_raw_write_unlock(rwlock_t *lock)
-{
-        debug_write_unlock(lock);
-        arch_write_unlock(&lock->raw_lock);
-}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 4e8686c7e5a4..e4399fa65ad6 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -38,6 +38,9 @@
 #include <linux/bootmem.h>
 #include <linux/iommu-helper.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/swiotlb.h>
 #define OFFSET(val,align) ((unsigned long)      \
                           ( (val) & ( (align) - 1)))
@@ -502,6 +505,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 not_found:
        spin_unlock_irqrestore(&io_tlb_lock, flags);
+        dev_warn(hwdev, "swiotlb buffer is full\n");
        return SWIOTLB_MAP_ERROR;
 found:
        spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -726,6 +730,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
        if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
                return dev_addr;
+        trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
        /* Oh well, have to allocate and map a bounce buffer. */
        map = map_single(dev, phys, size, dir);
        if (map == SWIOTLB_MAP_ERROR) {
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 26559bdb4c49..10909c571494 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -27,6 +27,7 @@
 #include <linux/uaccess.h>
 #include <linux/ioport.h>
 #include <linux/dcache.h>
+#include <linux/cred.h>
 #include <net/addrconf.h>
 #include <asm/page.h>           /* for PAGE_SIZE */
@@ -1218,6 +1219,8 @@ int kptr_restrict __read_mostly;
 *            The maximum supported length is 64 bytes of the input. Consider
 *            to use print_hex_dump() for the larger input.
 * - 'a' For a phys_addr_t type and its derivative types (passed by reference)
+ * - 'd[234]' For a dentry name (optionally 2-4 last components)
+ * - 'D[234]' Same as 'd' but for a struct file
 *
 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
 * function pointers are really function descriptors, which contain a
@@ -1312,11 +1315,37 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
                                spec.field_width = default_width;
                        return string(buf, end, "pK-error", spec);
                }
-                if (!((kptr_restrict == 0) ||
-                      (kptr_restrict == 1 &&
+                switch (kptr_restrict) {
-                       has_capability_noaudit(current, CAP_SYSLOG))))
+                case 0:
+                        /* Always print %pK values */
+                        break;
+                case 1: {
+                        /*
+                         * Only print the real pointer value if the current
+                         * process has CAP_SYSLOG and is running with the
+                         * same credentials it started with. This is because
+                         * access to files is checked at open() time, but %pK
+                         * checks permission at read() time. We don't want to
+                         * leak pointer values if a binary opens a file using
+                         * %pK and then elevates privileges before reading it.
+                         */
+                        const struct cred *cred = current_cred();
+                        if (!has_capability_noaudit(current, CAP_SYSLOG) ||
+                            !uid_eq(cred->euid, cred->uid) ||
+                            !gid_eq(cred->egid, cred->gid))
+                                ptr = NULL;
+                        break;
+                }
+                case 2:
+                default:
+                        /* Always print 0's for %pK */
                        ptr = NULL;
+                        break;
+                }
                break;
        case 'N':
                switch (fmt[1]) {
                case 'F':
@@ -1683,18 +1712,16 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
                        break;
                case FORMAT_TYPE_NRCHARS: {
-                        u8 qualifier = spec.qualifier;
+                        /*
+                         * Since %n poses a greater security risk than
+                         * utility, ignore %n and skip its argument.
+                         */
+                        void *skip_arg;
-                        if (qualifier == 'l') {
+                        WARN_ONCE(1, "Please remove ignored %%n in '%s'\n",
-                                long *ip = va_arg(args, long *);
+                                        old_fmt);
-                                *ip = (str - buf);
-                        } else if (_tolower(qualifier) == 'z') {
+                        skip_arg = va_arg(args, void *);
-                                size_t *ip = va_arg(args, size_t *);
-                                *ip = (str - buf);
-                        } else {
-                                int *ip = va_arg(args, int *);
-                                *ip = (str - buf);
-                        }
                        break;
                }