29 files changed, 2620 insertions, 216 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index d79909dc01ec..260a80e313b9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -457,9 +457,6 @@ config NLATTR
 config GENERIC_ATOMIC64
       bool
-config ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-        def_bool y if GENERIC_ATOMIC64
 config LRU_CACHE
        tristate
@@ -550,4 +547,7 @@ config STACKDEPOT
        bool
        select STACKTRACE
+config SBITMAP
+        bool
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cab7405f48d2..33bc56cf60d7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -305,7 +305,7 @@ config DEBUG_SECTION_MISMATCH
            a larger kernel).
          - Run the section mismatch analysis for each module/built-in.o file.
            When we run the section mismatch analysis on vmlinux.o, we
-            lose valueble information about where the mismatch was
+            lose valuable information about where the mismatch was
            introduced.
            Running the analysis for each module/built-in.o file
            tells where the mismatch happens much closer to the
@@ -1857,15 +1857,6 @@ config PROVIDE_OHCI1394_DMA_INIT
          See Documentation/debugging-via-ohci1394.txt for more information.
-config BUILD_DOCSRC
-        bool "Build targets in Documentation/ tree"
-        depends on HEADERS_CHECK
-        help
-          This option attempts to build objects from the source files in the
-          kernel Documentation/ tree.
-          Say N if you are unsure.
 config DMA_API_DEBUG
        bool "Enable debugging of DMA-API usage"
        depends on HAVE_DMA_API_DEBUG
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 39494af9a84a..bc6e651df68c 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,6 +1,9 @@
 config ARCH_HAS_UBSAN_SANITIZE_ALL
        bool
+config ARCH_WANTS_UBSAN_NO_NULL
+        def_bool n
 config UBSAN
        bool "Undefined behaviour sanity checker"
        help
@@ -34,3 +37,11 @@ config UBSAN_ALIGNMENT
          This option enables detection of unaligned memory accesses.
          Enabling this option on architectures that support unaligned
          accesses may produce a lot of false positives.
+config UBSAN_NULL
+        bool "Enable checking of null pointers"
+        depends on UBSAN
+        default y if !ARCH_WANTS_UBSAN_NO_NULL
+        help
+          This option enables detection of memory accesses via a
+          null pointer.
diff --git a/lib/Makefile b/lib/Makefile
index 5dc77a8ec297..50144a3aeebd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
         sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
         flex_proportions.o ratelimit.o show_mem.o \
         is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-         earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
+         earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -180,6 +180,7 @@ obj-$(CONFIG_IRQ_POLL) += irq_poll.o
 obj-$(CONFIG_STACKDEPOT) += stackdepot.o
 KASAN_SANITIZE_stackdepot.o := n
+KCOV_INSTRUMENT_stackdepot.o := n
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
               fdt_empty_tree.o
@@ -227,3 +228,5 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
 obj-$(CONFIG_UBSAN) += ubsan.o
 UBSAN_SANITIZE_ubsan.o := n
+obj-$(CONFIG_SBITMAP) += sbitmap.o
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index dbb369145dda..46042901130f 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -213,7 +213,6 @@ static __init void test_atomic64(void)
        r += one;
        BUG_ON(v.counter != r);
-#ifdef CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
        INIT(onestwos);
        BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
        r -= one;
@@ -226,9 +225,6 @@ static __init void test_atomic64(void)
        INIT(-one);
        BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
        BUG_ON(v.counter != r);
-#else
-#warning Please implement atomic64_dec_if_positive for your architecture and select the above Kconfig symbol
-#endif
        INIT(onestwos);
        BUG_ON(!atomic64_inc_not_zero(&v));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index eca88087fa8a..0b66f0e5eb6b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -496,6 +496,11 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf);
 * ranges.  Consecutively set bits are shown as two hyphen-separated
 * decimal numbers, the smallest and largest bit numbers set in
 * the range.
+ * Optionally each range can be postfixed to denote that only parts of it
+ * should be set. The range will divided to groups of specific size.
+ * From each group will be used only defined amount of bits.
+ * Syntax: range:used_size/group_size
+ * Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769
 *
 * Returns 0 on success, -errno on invalid input strings.
 * Error values:
@@ -507,16 +512,20 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
                int is_user, unsigned long *maskp,
                int nmaskbits)
 {
-        unsigned a, b;
+        unsigned int a, b, old_a, old_b;
+        unsigned int group_size, used_size;
        int c, old_c, totaldigits, ndigits;
        const char __user __force *ubuf = (const char __user __force *)buf;
-        int at_start, in_range;
+        int at_start, in_range, in_partial_range;
        totaldigits = c = 0;
+        old_a = old_b = 0;
+        group_size = used_size = 0;
        bitmap_zero(maskp, nmaskbits);
        do {
                at_start = 1;
                in_range = 0;
+                in_partial_range = 0;
                a = b = 0;
                ndigits = totaldigits;
@@ -547,6 +556,24 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
                        if ((totaldigits != ndigits) && isspace(old_c))
                                return -EINVAL;
+                        if (c == '/') {
+                                used_size = a;
+                                at_start = 1;
+                                in_range = 0;
+                                a = b = 0;
+                                continue;
+                        }
+                        if (c == ':') {
+                                old_a = a;
+                                old_b = b;
+                                at_start = 1;
+                                in_range = 0;
+                                in_partial_range = 1;
+                                a = b = 0;
+                                continue;
+                        }
                        if (c == '-') {
                                if (at_start || in_range)
                                        return -EINVAL;
@@ -567,15 +594,30 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
                }
                if (ndigits == totaldigits)
                        continue;
+                if (in_partial_range) {
+                        group_size = a;
+                        a = old_a;
+                        b = old_b;
+                        old_a = old_b = 0;
+                }
                /* if no digit is after '-', it's wrong*/
                if (at_start && in_range)
                        return -EINVAL;
-                if (!(a <= b))
+                if (!(a <= b) || !(used_size <= group_size))
                        return -EINVAL;
                if (b >= nmaskbits)
                        return -ERANGE;
                while (a <= b) {
-                        set_bit(a, maskp);
+                        if (in_partial_range) {
+                                static int pos_in_group = 1;
+                                if (pos_in_group <= used_size)
+                                        set_bit(a, maskp);
+                                if (a == b || ++pos_in_group > group_size)
+                                        pos_in_group = 1;
+                        } else
+                                set_bit(a, maskp);
                        a++;
                }
        } while (buflen && c == ',');
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 06f02f6aecd2..8971370bfb16 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -44,6 +44,7 @@ enum {
        dma_debug_page,
        dma_debug_sg,
        dma_debug_coherent,
+        dma_debug_resource,
 };
 enum map_err_types {
@@ -151,8 +152,9 @@ static const char *const maperr2str[] = {
        [MAP_ERR_CHECKED] = "dma map error checked",
 };
-static const char *type2name[4] = { "single", "page",
+static const char *type2name[5] = { "single", "page",
-                                    "scather-gather", "coherent" };
+                                    "scather-gather", "coherent",
+                                    "resource" };
 static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
                                   "DMA_FROM_DEVICE", "DMA_NONE" };
@@ -400,6 +402,9 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
 static unsigned long long phys_addr(struct dma_debug_entry *entry)
 {
+        if (entry->type == dma_debug_resource)
+                return __pfn_to_phys(entry->pfn) + entry->offset;
        return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
 }
@@ -1519,6 +1524,49 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
 }
 EXPORT_SYMBOL(debug_dma_free_coherent);
+void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
+                            int direction, dma_addr_t dma_addr)
+{
+        struct dma_debug_entry *entry;
+        if (unlikely(dma_debug_disabled()))
+                return;
+        entry = dma_entry_alloc();
+        if (!entry)
+                return;
+        entry->type             = dma_debug_resource;
+        entry->dev              = dev;
+        entry->pfn              = PHYS_PFN(addr);
+        entry->offset           = offset_in_page(addr);
+        entry->size             = size;
+        entry->dev_addr         = dma_addr;
+        entry->direction        = direction;
+        entry->map_err_type     = MAP_ERR_NOT_CHECKED;
+        add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_map_resource);
+void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
+                              size_t size, int direction)
+{
+        struct dma_debug_entry ref = {
+                .type           = dma_debug_resource,
+                .dev            = dev,
+                .dev_addr       = dma_addr,
+                .size           = size,
+                .direction      = direction,
+        };
+        if (unlikely(dma_debug_disabled()))
+                return;
+        check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_unmap_resource);
 void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
                                   size_t size, int direction)
 {
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 7e3138cfc8c9..f0c7f1481bae 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -3,8 +3,11 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/splice.h>
 #include <net/checksum.h>
+#define PIPE_PARANOIA /* for now */
 #define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
        size_t left;                                    \
        size_t wanted = n;                              \
@@ -290,6 +293,93 @@ done:
        return wanted - bytes;
 }
+#ifdef PIPE_PARANOIA
+static bool sanity(const struct iov_iter *i)
+{
+        struct pipe_inode_info *pipe = i->pipe;
+        int idx = i->idx;
+        int next = pipe->curbuf + pipe->nrbufs;
+        if (i->iov_offset) {
+                struct pipe_buffer *p;
+                if (unlikely(!pipe->nrbufs))
+                        goto Bad;       // pipe must be non-empty
+                if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
+                        goto Bad;       // must be at the last buffer...
+                p = &pipe->bufs[idx];
+                if (unlikely(p->offset + p->len != i->iov_offset))
+                        goto Bad;       // ... at the end of segment
+        } else {
+                if (idx != (next & (pipe->buffers - 1)))
+                        goto Bad;       // must be right after the last buffer
+        }
+        return true;
+Bad:
+        printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
+        printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
+                        pipe->curbuf, pipe->nrbufs, pipe->buffers);
+        for (idx = 0; idx < pipe->buffers; idx++)
+                printk(KERN_ERR "[%p %p %d %d]\n",
+                        pipe->bufs[idx].ops,
+                        pipe->bufs[idx].page,
+                        pipe->bufs[idx].offset,
+                        pipe->bufs[idx].len);
+        WARN_ON(1);
+        return false;
+}
+#else
+#define sanity(i) true
+#endif
+static inline int next_idx(int idx, struct pipe_inode_info *pipe)
+{
+        return (idx + 1) & (pipe->buffers - 1);
+}
+static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
+                         struct iov_iter *i)
+{
+        struct pipe_inode_info *pipe = i->pipe;
+        struct pipe_buffer *buf;
+        size_t off;
+        int idx;
+        if (unlikely(bytes > i->count))
+                bytes = i->count;
+        if (unlikely(!bytes))
+                return 0;
+        if (!sanity(i))
+                return 0;
+        off = i->iov_offset;
+        idx = i->idx;
+        buf = &pipe->bufs[idx];
+        if (off) {
+                if (offset == off && buf->page == page) {
+                        /* merge with the last one */
+                        buf->len += bytes;
+                        i->iov_offset += bytes;
+                        goto out;
+                }
+                idx = next_idx(idx, pipe);
+                buf = &pipe->bufs[idx];
+        }
+        if (idx == pipe->curbuf && pipe->nrbufs)
+                return 0;
+        pipe->nrbufs++;
+        buf->ops = &page_cache_pipe_buf_ops;
+        get_page(buf->page = page);
+        buf->offset = offset;
+        buf->len = bytes;
+        i->iov_offset = offset + bytes;
+        i->idx = idx;
+out:
+        i->count -= bytes;
+        return bytes;
+}
 /*
 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
 * bytes.  For each iovec, fault in each page that constitutes the iovec.
@@ -306,8 +396,7 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
        if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
                iterate_iovec(i, bytes, v, iov, skip, ({
-                        err = fault_in_multipages_readable(v.iov_base,
+                        err = fault_in_pages_readable(v.iov_base, v.iov_len);
-                                        v.iov_len);
                        if (unlikely(err))
                        return err;
                0;}))
@@ -356,9 +445,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
        kunmap_atomic(addr);
 }
+static inline bool allocated(struct pipe_buffer *buf)
+{
+        return buf->ops == &default_pipe_buf_ops;
+}
+static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
+{
+        size_t off = i->iov_offset;
+        int idx = i->idx;
+        if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
+                idx = next_idx(idx, i->pipe);
+                off = 0;
+        }
+        *idxp = idx;
+        *offp = off;
+}
+static size_t push_pipe(struct iov_iter *i, size_t size,
+                        int *idxp, size_t *offp)
+{
+        struct pipe_inode_info *pipe = i->pipe;
+        size_t off;
+        int idx;
+        ssize_t left;
+        if (unlikely(size > i->count))
+                size = i->count;
+        if (unlikely(!size))
+                return 0;
+        left = size;
+        data_start(i, &idx, &off);
+        *idxp = idx;
+        *offp = off;
+        if (off) {
+                left -= PAGE_SIZE - off;
+                if (left <= 0) {
+                        pipe->bufs[idx].len += size;
+                        return size;
+                }
+                pipe->bufs[idx].len = PAGE_SIZE;
+                idx = next_idx(idx, pipe);
+        }
+        while (idx != pipe->curbuf || !pipe->nrbufs) {
+                struct page *page = alloc_page(GFP_USER);
+                if (!page)
+                        break;
+                pipe->nrbufs++;
+                pipe->bufs[idx].ops = &default_pipe_buf_ops;
+                pipe->bufs[idx].page = page;
+                pipe->bufs[idx].offset = 0;
+                if (left <= PAGE_SIZE) {
+                        pipe->bufs[idx].len = left;
+                        return size;
+                }
+                pipe->bufs[idx].len = PAGE_SIZE;
+                left -= PAGE_SIZE;
+                idx = next_idx(idx, pipe);
+        }
+        return size - left;
+}
+static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
+                                struct iov_iter *i)
+{
+        struct pipe_inode_info *pipe = i->pipe;
+        size_t n, off;
+        int idx;
+        if (!sanity(i))
+                return 0;
+        bytes = n = push_pipe(i, bytes, &idx, &off);
+        if (unlikely(!n))
+                return 0;
+        for ( ; n; idx = next_idx(idx, pipe), off = 0) {
+                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+                memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
+                i->idx = idx;
+                i->iov_offset = off + chunk;
+                n -= chunk;
+                addr += chunk;
+        }
+        i->count -= bytes;
+        return bytes;
+}
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
        const char *from = addr;
+        if (unlikely(i->type & ITER_PIPE))
+                return copy_pipe_to_iter(addr, bytes, i);
        iterate_and_advance(i, bytes, v,
                __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
                               v.iov_len),
@@ -374,6 +552,10 @@ EXPORT_SYMBOL(copy_to_iter);
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
+        if (unlikely(i->type & ITER_PIPE)) {
+                WARN_ON(1);
+                return 0;
+        }
        iterate_and_advance(i, bytes, v,
                __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
                                 v.iov_len),
@@ -389,6 +571,10 @@ EXPORT_SYMBOL(copy_from_iter);
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
+        if (unlikely(i->type & ITER_PIPE)) {
+                WARN_ON(1);
+                return 0;
+        }
        iterate_and_advance(i, bytes, v,
                __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
                                         v.iov_base, v.iov_len),
@@ -409,14 +595,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
                kunmap_atomic(kaddr);
                return wanted;
-        } else
+        } else if (likely(!(i->type & ITER_PIPE)))
                return copy_page_to_iter_iovec(page, offset, bytes, i);
+        else
+                return copy_page_to_iter_pipe(page, offset, bytes, i);
 }
 EXPORT_SYMBOL(copy_page_to_iter);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
 {
+        if (unlikely(i->type & ITER_PIPE)) {
+                WARN_ON(1);
+                return 0;
+        }
        if (i->type & (ITER_BVEC|ITER_KVEC)) {
                void *kaddr = kmap_atomic(page);
                size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
@@ -427,8 +619,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 }
 EXPORT_SYMBOL(copy_page_from_iter);
+static size_t pipe_zero(size_t bytes, struct iov_iter *i)
+{
+        struct pipe_inode_info *pipe = i->pipe;
+        size_t n, off;
+        int idx;
+        if (!sanity(i))
+                return 0;
+        bytes = n = push_pipe(i, bytes, &idx, &off);
+        if (unlikely(!n))
+                return 0;
+        for ( ; n; idx = next_idx(idx, pipe), off = 0) {
+                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+                memzero_page(pipe->bufs[idx].page, off, chunk);
+                i->idx = idx;
+                i->iov_offset = off + chunk;
+                n -= chunk;
+        }
+        i->count -= bytes;
+        return bytes;
+}
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
+        if (unlikely(i->type & ITER_PIPE))
+                return pipe_zero(bytes, i);
        iterate_and_advance(i, bytes, v,
                __clear_user(v.iov_base, v.iov_len),
                memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@ -443,6 +661,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
                struct iov_iter *i, unsigned long offset, size_t bytes)
 {
        char *kaddr = kmap_atomic(page), *p = kaddr + offset;
+        if (unlikely(i->type & ITER_PIPE)) {
+                kunmap_atomic(kaddr);
+                WARN_ON(1);
+                return 0;
+        }
        iterate_all_kinds(i, bytes, v,
                __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
                                          v.iov_base, v.iov_len),
@@ -455,8 +678,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 }
 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+static void pipe_advance(struct iov_iter *i, size_t size)
+{
+        struct pipe_inode_info *pipe = i->pipe;
+        struct pipe_buffer *buf;
+        int idx = i->idx;
+        size_t off = i->iov_offset;
+        
+        if (unlikely(i->count < size))
+                size = i->count;
+        if (size) {
+                if (off) /* make it relative to the beginning of buffer */
+                        size += off - pipe->bufs[idx].offset;
+                while (1) {
+                        buf = &pipe->bufs[idx];
+                        if (size <= buf->len)
+                                break;
+                        size -= buf->len;
+                        idx = next_idx(idx, pipe);
+                }
+                buf->len = size;
+                i->idx = idx;
+                off = i->iov_offset = buf->offset + size;
+        }
+        if (off)
+                idx = next_idx(idx, pipe);
+        if (pipe->nrbufs) {
+                int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+                /* [curbuf,unused) is in use.  Free [idx,unused) */
+                while (idx != unused) {
+                        pipe_buf_release(pipe, &pipe->bufs[idx]);
+                        idx = next_idx(idx, pipe);
+                        pipe->nrbufs--;
+                }
+        }
+}
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
+        if (unlikely(i->type & ITER_PIPE)) {
+                pipe_advance(i, size);
+                return;
+        }
        iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -466,6 +730,8 @@ EXPORT_SYMBOL(iov_iter_advance);
 */
 size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
+        if (unlikely(i->type & ITER_PIPE))
+                return i->count;        // it is a silly place, anyway
        if (i->nr_segs == 1)
                return i->count;
        else if (i->type & ITER_BVEC)
@@ -501,6 +767,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_bvec);
+void iov_iter_pipe(struct iov_iter *i, int direction,
+                        struct pipe_inode_info *pipe,
+                        size_t count)
+{
+        BUG_ON(direction != ITER_PIPE);
+        i->type = direction;
+        i->pipe = pipe;
+        i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+        i->iov_offset = 0;
+        i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_pipe);
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
        unsigned long res = 0;
@@ -509,6 +788,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
        if (!size)
                return 0;
+        if (unlikely(i->type & ITER_PIPE)) {
+                if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
+                        return size | i->iov_offset;
+                return size;
+        }
        iterate_all_kinds(i, size, v,
                (res |= (unsigned long)v.iov_base | v.iov_len, 0),
                res |= v.bv_offset | v.bv_len,
@@ -525,6 +809,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
        if (!size)
                return 0;
+        if (unlikely(i->type & ITER_PIPE)) {
+                WARN_ON(1);
+                return ~0U;
+        }
        iterate_all_kinds(i, size, v,
                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
                        (size != v.iov_len ? size : 0), 0),
@@ -537,6 +826,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
+static inline size_t __pipe_get_pages(struct iov_iter *i,
+                                size_t maxsize,
+                                struct page **pages,
+                                int idx,
+                                size_t *start)
+{
+        struct pipe_inode_info *pipe = i->pipe;
+        ssize_t n = push_pipe(i, maxsize, &idx, start);
+        if (!n)
+                return -EFAULT;
+        maxsize = n;
+        n += *start;
+        while (n > 0) {
+                get_page(*pages++ = pipe->bufs[idx].page);
+                idx = next_idx(idx, pipe);
+                n -= PAGE_SIZE;
+        }
+        return maxsize;
+}
+static ssize_t pipe_get_pages(struct iov_iter *i,
+                   struct page **pages, size_t maxsize, unsigned maxpages,
+                   size_t *start)
+{
+        unsigned npages;
+        size_t capacity;
+        int idx;
+        if (!sanity(i))
+                return -EFAULT;
+        data_start(i, &idx, start);
+        /* some of this one + all after this one */
+        npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
+        capacity = min(npages,maxpages) * PAGE_SIZE - *start;
+        return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
+}
 ssize_t iov_iter_get_pages(struct iov_iter *i,
                   struct page **pages, size_t maxsize, unsigned maxpages,
                   size_t *start)
@@ -547,6 +877,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
        if (!maxsize)
                return 0;
+        if (unlikely(i->type & ITER_PIPE))
+                return pipe_get_pages(i, pages, maxsize, maxpages, start);
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -582,6 +914,37 @@ static struct page **get_pages_array(size_t n)
        return p;
 }
+static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
+                   struct page ***pages, size_t maxsize,
+                   size_t *start)
+{
+        struct page **p;
+        size_t n;
+        int idx;
+        int npages;
+        if (!sanity(i))
+                return -EFAULT;
+        data_start(i, &idx, start);
+        /* some of this one + all after this one */
+        npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
+        n = npages * PAGE_SIZE - *start;
+        if (maxsize > n)
+                maxsize = n;
+        else
+                npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
+        p = get_pages_array(npages);
+        if (!p)
+                return -ENOMEM;
+        n = __pipe_get_pages(i, maxsize, p, idx, start);
+        if (n > 0)
+                *pages = p;
+        else
+                kvfree(p);
+        return n;
+}
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
                   struct page ***pages, size_t maxsize,
                   size_t *start)
@@ -594,6 +957,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
        if (!maxsize)
                return 0;
+        if (unlikely(i->type & ITER_PIPE))
+                return pipe_get_pages_alloc(i, pages, maxsize, start);
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -635,6 +1000,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
+        if (unlikely(i->type & ITER_PIPE)) {
+                WARN_ON(1);
+                return 0;
+        }
        iterate_and_advance(i, bytes, v, ({
                int err = 0;
                next = csum_and_copy_from_user(v.iov_base, 
@@ -673,6 +1042,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
+        if (unlikely(i->type & ITER_PIPE)) {
+                WARN_ON(1);     /* for now */
+                return 0;
+        }
        iterate_and_advance(i, bytes, v, ({
                int err = 0;
                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
@@ -712,7 +1085,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
        if (!size)
                return 0;
-        iterate_all_kinds(i, size, v, ({
+        if (unlikely(i->type & ITER_PIPE)) {
+                struct pipe_inode_info *pipe = i->pipe;
+                size_t off;
+                int idx;
+                if (!sanity(i))
+                        return 0;
+                data_start(i, &idx, &off);
+                /* some of this one + all after this one */
+                npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
+                if (npages >= maxpages)
+                        return maxpages;
+        } else iterate_all_kinds(i, size, v, ({
                unsigned long p = (unsigned long)v.iov_base;
                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
                        - p / PAGE_SIZE;
@@ -737,6 +1123,10 @@ EXPORT_SYMBOL(iov_iter_npages);
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 {
        *new = *old;
+        if (unlikely(new->type & ITER_PIPE)) {
+                WARN_ON(1);
+                return NULL;
+        }
        if (new->type & ITER_BVEC)
                return new->bvec = kmemdup(new->bvec,
                                    new->nr_segs * sizeof(struct bio_vec),
@@ -749,6 +1139,28 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
+/**
+ * import_iovec() - Copy an array of &struct iovec from userspace
+ *     into the kernel, check that it is valid, and initialize a new
+ *     &struct iov_iter iterator to access it.
+ *
+ * @type: One of %READ or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @iov.
+ * @iov: (input and output parameter) Pointer to pointer to (usually small
+ *     on-stack) kernel array.
+ * @i: Pointer to iterator that will be initialized on success.
+ *
+ * If the array pointed to by *@iov is large enough to hold all @nr_segs,
+ * then this function places %NULL in *@iov on return. Otherwise, a new
+ * array will be allocated and the result placed in *@iov. This means that
+ * the caller may call kfree() on *@iov regardless of whether the small
+ * on-stack array was used or not (and regardless of whether this function
+ * returns an error or not).
+ *
+ * Return: 0 on success or negative error code on error.
+ */
 int import_iovec(int type, const struct iovec __user * uvector,
                 unsigned nr_segs, unsigned fast_segs,
                 struct iovec **iov, struct iov_iter *i)
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 2be55692aa43..1d6565e81030 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -74,7 +74,7 @@ void irq_poll_complete(struct irq_poll *iop)
 }
 EXPORT_SYMBOL(irq_poll_complete);
-static void irq_poll_softirq(struct softirq_action *h)
+static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
 {
        struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
        int rearm = 0, budget = irq_poll_budget;
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index d8a5cf66c316..b8e2080c1a47 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -48,11 +48,9 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 {
        unsigned long long res;
        unsigned int rv;
-        int overflow;
        res = 0;
        rv = 0;
-        overflow = 0;
        while (*s) {
                unsigned int val;
@@ -71,15 +69,13 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
                 */
                if (unlikely(res & (~0ull << 60))) {
                        if (res > div_u64(ULLONG_MAX - val, base))
-                                overflow = 1;
+                                rv |= KSTRTOX_OVERFLOW;
                }
                res = res * base + val;
                rv++;
                s++;
        }
        *p = res;
-        if (overflow)
-                rv |= KSTRTOX_OVERFLOW;
        return rv;
 }
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 26caf51cc238..75554754eadf 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -16,21 +16,23 @@
 #include <linux/delay.h>
 #include <linux/kprobes.h>
 #include <linux/nmi.h>
+#include <linux/cpu.h>
-#ifdef arch_trigger_all_cpu_backtrace
+#ifdef arch_trigger_cpumask_backtrace
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+/* "in progress" flag of arch_trigger_cpumask_backtrace */
 static unsigned long backtrace_flag;
 /*
- * When raise() is called it will be is passed a pointer to the
+ * When raise() is called it will be passed a pointer to the
 * backtrace_mask. Architectures that call nmi_cpu_backtrace()
 * directly from their raise() functions may rely on the mask
 * they are passed being updated as a side effect of this call.
 */
-void nmi_trigger_all_cpu_backtrace(bool include_self,
+void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
+                                   bool exclude_self,
                                   void (*raise)(cpumask_t *mask))
 {
        int i, this_cpu = get_cpu();
@@ -44,13 +46,22 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
                return;
        }
-        cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+        cpumask_copy(to_cpumask(backtrace_mask), mask);
-        if (!include_self)
+        if (exclude_self)
                cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
+        /*
+         * Don't try to send an NMI to this cpu; it may work on some
+         * architectures, but on others it may not, and we'll get
+         * information at least as useful just by doing a dump_stack() here.
+         * Note that nmi_cpu_backtrace(NULL) will clear the cpu bit.
+         */
+        if (cpumask_test_cpu(this_cpu, to_cpumask(backtrace_mask)))
+                nmi_cpu_backtrace(NULL);
        if (!cpumask_empty(to_cpumask(backtrace_mask))) {
-                pr_info("Sending NMI to %s CPUs:\n",
+                pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n",
-                        (include_self ? "all" : "other"));
+                        this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask));
                raise(to_cpumask(backtrace_mask));
        }
@@ -77,11 +88,16 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
        int cpu = smp_processor_id();
        if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-                pr_warn("NMI backtrace for cpu %d\n", cpu);
+                if (regs && cpu_in_idle(instruction_pointer(regs))) {
-                if (regs)
+                        pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n",
-                        show_regs(regs);
+                                cpu, instruction_pointer(regs));
-                else
+                } else {
-                        dump_stack();
+                        pr_warn("NMI backtrace for cpu %d\n", cpu);
+                        if (regs)
+                                show_regs(regs);
+                        else
+                                dump_stack();
+                }
                cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
                return true;
        }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 27fe74948882..9ac959ef4cae 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -33,6 +33,7 @@
 #define PERCPU_COUNT_BIAS       (1LU << (BITS_PER_LONG - 1))
+static DEFINE_SPINLOCK(percpu_ref_switch_lock);
 static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
@@ -82,6 +83,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
        atomic_long_set(&ref->count, start_count);
        ref->release = release;
+        ref->confirm_switch = NULL;
        return 0;
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
@@ -101,6 +103,8 @@ void percpu_ref_exit(struct percpu_ref *ref)
        unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
        if (percpu_count) {
+                /* non-NULL confirm_switch indicates switching in progress */
+                WARN_ON_ONCE(ref->confirm_switch);
                free_percpu(percpu_count);
                ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
        }
@@ -161,66 +165,23 @@ static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
 static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
                                          percpu_ref_func_t *confirm_switch)
 {
-        if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
+        if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) {
-                /* switching from percpu to atomic */
+                if (confirm_switch)
-                ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+                        confirm_switch(ref);
+                return;
-                /*
-                 * Non-NULL ->confirm_switch is used to indicate that
-                 * switching is in progress.  Use noop one if unspecified.
-                 */
-                WARN_ON_ONCE(ref->confirm_switch);
-                ref->confirm_switch =
-                        confirm_switch ?: percpu_ref_noop_confirm_switch;
-                percpu_ref_get(ref);    /* put after confirmation */
-                call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
-        } else if (confirm_switch) {
-                /*
-                 * Somebody already set ATOMIC.  Switching may still be in
-                 * progress.  @confirm_switch must be invoked after the
-                 * switching is complete and a full sched RCU grace period
-                 * has passed.  Wait synchronously for the previous
-                 * switching and schedule @confirm_switch invocation.
-                 */
-                wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
-                ref->confirm_switch = confirm_switch;
-                percpu_ref_get(ref);    /* put after confirmation */
-                call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
        }
-}
-/**
+        /* switching from percpu to atomic */
- * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
+        ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
- * @ref: percpu_ref to switch to atomic mode
- * @confirm_switch: optional confirmation callback
+        /*
- *
+         * Non-NULL ->confirm_switch is used to indicate that switching is
- * There's no reason to use this function for the usual reference counting.
+         * in progress.  Use noop one if unspecified.
- * Use percpu_ref_kill[_and_confirm]().
+         */
- *
+        ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch;
- * Schedule switching of @ref to atomic mode.  All its percpu counts will
- * be collected to the main atomic counter.  On completion, when all CPUs
+        percpu_ref_get(ref);    /* put after confirmation */
- * are guaraneed to be in atomic mode, @confirm_switch, which may not
+        call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
- * block, is invoked.  This function may be invoked concurrently with all
- * the get/put operations and can safely be mixed with kill and reinit
- * operations.  Note that @ref will stay in atomic mode across kill/reinit
- * cycles until percpu_ref_switch_to_percpu() is called.
- *
- * This function normally doesn't block and can be called from any context
- * but it may block if @confirm_kill is specified and @ref is already in
- * the process of switching to atomic mode.  In such cases, @confirm_switch
- * will be invoked after the switching is complete.
- *
- * Due to the way percpu_ref is implemented, @confirm_switch will be called
- * after at least one full sched RCU grace period has passed but this is an
- * implementation detail and must not be depended upon.
- */
-void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
-                                 percpu_ref_func_t *confirm_switch)
-{
-        ref->force_atomic = true;
-        __percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
 static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
@@ -233,8 +194,6 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
        if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
                return;
-        wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
        atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
        /*
@@ -250,6 +209,58 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
                          ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 }
+static void __percpu_ref_switch_mode(struct percpu_ref *ref,
+                                     percpu_ref_func_t *confirm_switch)
+{
+        lockdep_assert_held(&percpu_ref_switch_lock);
+        /*
+         * If the previous ATOMIC switching hasn't finished yet, wait for
+         * its completion.  If the caller ensures that ATOMIC switching
+         * isn't in progress, this function can be called from any context.
+         */
+        wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch,
+                            percpu_ref_switch_lock);
+        if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+                __percpu_ref_switch_to_atomic(ref, confirm_switch);
+        else
+                __percpu_ref_switch_to_percpu(ref);
+}
+/**
+ * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
+ * @ref: percpu_ref to switch to atomic mode
+ * @confirm_switch: optional confirmation callback
+ *
+ * There's no reason to use this function for the usual reference counting.
+ * Use percpu_ref_kill[_and_confirm]().
+ *
+ * Schedule switching of @ref to atomic mode.  All its percpu counts will
+ * be collected to the main atomic counter.  On completion, when all CPUs
+ * are guaraneed to be in atomic mode, @confirm_switch, which may not
+ * block, is invoked.  This function may be invoked concurrently with all
+ * the get/put operations and can safely be mixed with kill and reinit
+ * operations.  Note that @ref will stay in atomic mode across kill/reinit
+ * cycles until percpu_ref_switch_to_percpu() is called.
+ *
+ * This function may block if @ref is in the process of switching to atomic
+ * mode.  If the caller ensures that @ref is not in the process of
+ * switching to atomic mode, this function can be called from any context.
+ */
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+                                 percpu_ref_func_t *confirm_switch)
+{
+        unsigned long flags;
+        spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+        ref->force_atomic = true;
+        __percpu_ref_switch_mode(ref, confirm_switch);
+        spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
+}
 /**
 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
 * @ref: percpu_ref to switch to percpu mode
@@ -264,17 +275,20 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 * dying or dead, the actual switching takes place on the following
 * percpu_ref_reinit().
 *
- * This function normally doesn't block and can be called from any context
+ * This function may block if @ref is in the process of switching to atomic
- * but it may block if @ref is in the process of switching to atomic mode
+ * mode.  If the caller ensures that @ref is not in the process of
- * by percpu_ref_switch_atomic().
+ * switching to atomic mode, this function can be called from any context.
 */
 void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 {
+        unsigned long flags;
+        spin_lock_irqsave(&percpu_ref_switch_lock, flags);
        ref->force_atomic = false;
+        __percpu_ref_switch_mode(ref, NULL);
-        /* a dying or dead ref can't be switched to percpu mode w/o reinit */
+        spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
-        if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
-                __percpu_ref_switch_to_percpu(ref);
 }
 /**
@@ -290,21 +304,23 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 *
 * This function normally doesn't block and can be called from any context
 * but it may block if @confirm_kill is specified and @ref is in the
- * process of switching to atomic mode by percpu_ref_switch_atomic().
+ * process of switching to atomic mode by percpu_ref_switch_to_atomic().
- *
- * Due to the way percpu_ref is implemented, @confirm_switch will be called
- * after at least one full sched RCU grace period has passed but this is an
- * implementation detail and must not be depended upon.
 */
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
                                 percpu_ref_func_t *confirm_kill)
 {
+        unsigned long flags;
+        spin_lock_irqsave(&percpu_ref_switch_lock, flags);
        WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
                  "%s called more than once on %pf!", __func__, ref->release);
        ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
-        __percpu_ref_switch_to_atomic(ref, confirm_kill);
+        __percpu_ref_switch_mode(ref, confirm_kill);
        percpu_ref_put(ref);
+        spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
@@ -321,11 +337,16 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
+        unsigned long flags;
+        spin_lock_irqsave(&percpu_ref_switch_lock, flags);
        WARN_ON_ONCE(!percpu_ref_is_zero(ref));
        ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
        percpu_ref_get(ref);
-        if (!ref->force_atomic)
+        __percpu_ref_switch_mode(ref, NULL);
-                __percpu_ref_switch_to_percpu(ref);
+        spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 91f0727e3cad..8e6d552c40dd 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1583,15 +1583,10 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_delete);
-struct radix_tree_node *radix_tree_replace_clear_tags(
+void radix_tree_clear_tags(struct radix_tree_root *root,
-                        struct radix_tree_root *root,
+                           struct radix_tree_node *node,
-                        unsigned long index, void *entry)
+                           void **slot)
 {
-        struct radix_tree_node *node;
-        void **slot;
-        __radix_tree_lookup(root, index, &node, &slot);
        if (node) {
                unsigned int tag, offset = get_slot_offset(node, slot);
                for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
@@ -1600,9 +1595,6 @@ struct radix_tree_node *radix_tree_replace_clear_tags(
                /* Clear root node tags */
                root->gfp_mask &= __GFP_BITS_MASK;
        }
-        radix_tree_replace_slot(slot, entry);
-        return node;
 }
 /**
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 0a7e494b2bcd..f01b1cb04f91 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -3,3 +3,4 @@ altivec*.c
 int*.c
 tables.c
 neon?.c
+s390vx?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3b10a48fa040..3057011f5599 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -3,10 +3,11 @@ obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o
 raid6_pq-y      += algos.o recov.o tables.o int1.o int2.o int4.o \
                   int8.o int16.o int32.o
-raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
+raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
+raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 hostprogs-y     += mktables
@@ -116,6 +117,11 @@ $(obj)/tilegx8.c:   UNROLL := 8
 $(obj)/tilegx8.c:   $(src)/tilegx.uc $(src)/unroll.awk FORCE
        $(call if_changed,unroll)
+targets += s390vx8.c
+$(obj)/s390vx8.c:   UNROLL := 8
+$(obj)/s390vx8.c:   $(src)/s390vx.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
 quiet_cmd_mktable = TABLE   $@
      cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 975c6e0434bd..7857049fd7d3 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -49,6 +49,10 @@ const struct raid6_calls * const raid6_algos[] = {
        &raid6_avx2x1,
        &raid6_avx2x2,
 #endif
+#ifdef CONFIG_AS_AVX512
+        &raid6_avx512x1,
+        &raid6_avx512x2,
+#endif
 #endif
 #if defined(__x86_64__) && !defined(__arch_um__)
        &raid6_sse2x1,
@@ -59,6 +63,11 @@ const struct raid6_calls * const raid6_algos[] = {
        &raid6_avx2x2,
        &raid6_avx2x4,
 #endif
+#ifdef CONFIG_AS_AVX512
+        &raid6_avx512x1,
+        &raid6_avx512x2,
+        &raid6_avx512x4,
+#endif
 #endif
 #ifdef CONFIG_ALTIVEC
        &raid6_altivec1,
@@ -69,6 +78,9 @@ const struct raid6_calls * const raid6_algos[] = {
 #if defined(CONFIG_TILEGX)
        &raid6_tilegx8,
 #endif
+#if defined(CONFIG_S390)
+        &raid6_s390vx8,
+#endif
        &raid6_intx1,
        &raid6_intx2,
        &raid6_intx4,
@@ -89,12 +101,18 @@ void (*raid6_datap_recov)(int, size_t, int, void **);
 EXPORT_SYMBOL_GPL(raid6_datap_recov);
 const struct raid6_recov_calls *const raid6_recov_algos[] = {
+#ifdef CONFIG_AS_AVX512
+        &raid6_recov_avx512,
+#endif
 #ifdef CONFIG_AS_AVX2
        &raid6_recov_avx2,
 #endif
 #ifdef CONFIG_AS_SSSE3
        &raid6_recov_ssse3,
 #endif
+#ifdef CONFIG_S390
+        &raid6_recov_s390xc,
+#endif
        &raid6_recov_intx1,
        NULL
 };
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c
new file mode 100644
index 000000000000..f524a7972006
--- /dev/null
+++ b/lib/raid6/avx512.c
@@ -0,0 +1,569 @@
+/* -*- linux-c -*- --------------------------------------------------------
+ *
+ *   Copyright (C) 2016 Intel Corporation
+ *
+ *   Author: Gayatri Kammela <gayatri.kammela@intel.com>
+ *   Author: Megha Dey <megha.dey@linux.intel.com>
+ *
+ *   Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
+ *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * -----------------------------------------------------------------------
+ */
+/*
+ * AVX512 implementation of RAID-6 syndrome functions
+ *
+ */
+#ifdef CONFIG_AS_AVX512
+#include <linux/raid/pq.h>
+#include "x86.h"
+static const struct raid6_avx512_constants {
+        u64 x1d[8];
+} raid6_avx512_constants __aligned(512) = {
+        { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+          0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+          0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+          0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
+};
+static int raid6_have_avx512(void)
+{
+        return boot_cpu_has(X86_FEATURE_AVX2) &&
+                boot_cpu_has(X86_FEATURE_AVX) &&
+                boot_cpu_has(X86_FEATURE_AVX512F) &&
+                boot_cpu_has(X86_FEATURE_AVX512BW) &&
+                boot_cpu_has(X86_FEATURE_AVX512VL) &&
+                boot_cpu_has(X86_FEATURE_AVX512DQ);
+}
+static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("vmovdqa64 %0,%%zmm0\n\t"
+                     "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
+                     :
+                     : "m" (raid6_avx512_constants.x1d[0]));
+        for (d = 0; d < bytes; d += 64) {
+                asm volatile("prefetchnta %0\n\t"
+                             "vmovdqa64 %0,%%zmm2\n\t"     /* P[0] */
+                             "prefetchnta %1\n\t"
+                             "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
+                             "vmovdqa64 %1,%%zmm6"
+                             :
+                             : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
+                for (z = z0-2; z >= 0; z--) {
+                        asm volatile("prefetchnta %0\n\t"
+                                     "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
+                                     "vpmovm2b %%k1,%%zmm5\n\t"
+                                     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                                     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
+                                     "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
+                                     "vmovdqa64 %0,%%zmm6"
+                                     :
+                                     : "m" (dptr[z][d]));
+                }
+                asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
+                             "vpmovm2b %%k1,%%zmm5\n\t"
+                             "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                             "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                             "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                             "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
+                             "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
+                             "vmovntdq %%zmm2,%0\n\t"
+                             "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
+                             "vmovntdq %%zmm4,%1\n\t"
+                             "vpxorq %%zmm4,%%zmm4,%%zmm4"
+                             :
+                             : "m" (p[d]), "m" (q[d]));
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
+                                       size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = stop;              /* P/Q right side optimization */
+        p = dptr[disks-2];      /* XOR parity */
+        q = dptr[disks-1];      /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("vmovdqa64 %0,%%zmm0"
+                     : : "m" (raid6_avx512_constants.x1d[0]));
+        for (d = 0 ; d < bytes ; d += 64) {
+                asm volatile("vmovdqa64 %0,%%zmm4\n\t"
+                             "vmovdqa64 %1,%%zmm2\n\t"
+                             "vpxorq %%zmm4,%%zmm2,%%zmm2"
+                             :
+                             : "m" (dptr[z0][d]),  "m" (p[d]));
+                /* P/Q data pages */
+                for (z = z0-1 ; z >= start ; z--) {
+                        asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+                                     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+                                     "vpmovm2b %%k1,%%zmm5\n\t"
+                                     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                                     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vmovdqa64 %0,%%zmm5\n\t"
+                                     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4"
+                                     :
+                                     : "m" (dptr[z][d]));
+                }
+                /* P/Q left side optimization */
+                for (z = start-1 ; z >= 0 ; z--) {
+                        asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+                                     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+                                     "vpmovm2b %%k1,%%zmm5\n\t"
+                                     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                                     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4"
+                                     :
+                                     : );
+                }
+                asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
+                /* Don't use movntdq for r/w memory area < cache line */
+                             "vmovdqa64 %%zmm4,%0\n\t"
+                             "vmovdqa64 %%zmm2,%1"
+                             :
+                             : "m" (q[d]), "m" (p[d]));
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_avx512x1 = {
+        raid6_avx5121_gen_syndrome,
+        raid6_avx5121_xor_syndrome,
+        raid6_have_avx512,
+        "avx512x1",
+        1                       /* Has cache hints */
+};
+/*
+ * Unrolled-by-2 AVX512 implementation
+ */
+static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("vmovdqa64 %0,%%zmm0\n\t"
+                     "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
+                     :
+                     : "m" (raid6_avx512_constants.x1d[0]));
+        /* We uniformly assume a single prefetch covers at least 64 bytes */
+        for (d = 0; d < bytes; d += 128) {
+                asm volatile("prefetchnta %0\n\t"
+                             "prefetchnta %1\n\t"
+                             "vmovdqa64 %0,%%zmm2\n\t"      /* P[0] */
+                             "vmovdqa64 %1,%%zmm3\n\t"      /* P[1] */
+                             "vmovdqa64 %%zmm2,%%zmm4\n\t"  /* Q[0] */
+                             "vmovdqa64 %%zmm3,%%zmm6"      /* Q[1] */
+                             :
+                             : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
+                for (z = z0-1; z >= 0; z--) {
+                        asm volatile("prefetchnta %0\n\t"
+                                     "prefetchnta %1\n\t"
+                                     "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
+                                     "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
+                                     "vpmovm2b %%k1,%%zmm5\n\t"
+                                     "vpmovm2b %%k2,%%zmm7\n\t"
+                                     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                                     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+                                     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                                     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+                                     "vmovdqa64 %0,%%zmm5\n\t"
+                                     "vmovdqa64 %1,%%zmm7\n\t"
+                                     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+                                     "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm7,%%zmm6,%%zmm6"
+                                     :
+                                     : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
+                }
+                asm volatile("vmovntdq %%zmm2,%0\n\t"
+                             "vmovntdq %%zmm3,%1\n\t"
+                             "vmovntdq %%zmm4,%2\n\t"
+                             "vmovntdq %%zmm6,%3"
+                             :
+                             : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
+                               "m" (q[d+64]));
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
+                                       size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = stop;              /* P/Q right side optimization */
+        p = dptr[disks-2];      /* XOR parity */
+        q = dptr[disks-1];      /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("vmovdqa64 %0,%%zmm0"
+                     : : "m" (raid6_avx512_constants.x1d[0]));
+        for (d = 0 ; d < bytes ; d += 128) {
+                asm volatile("vmovdqa64 %0,%%zmm4\n\t"
+                             "vmovdqa64 %1,%%zmm6\n\t"
+                             "vmovdqa64 %2,%%zmm2\n\t"
+                             "vmovdqa64 %3,%%zmm3\n\t"
+                             "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
+                             "vpxorq %%zmm6,%%zmm3,%%zmm3"
+                             :
+                             : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
+                               "m" (p[d]), "m" (p[d+64]));
+                /* P/Q data pages */
+                for (z = z0-1 ; z >= start ; z--) {
+                        asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+                                     "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
+                                     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+                                     "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
+                                     "vpmovm2b %%k1,%%zmm5\n\t"
+                                     "vpmovm2b %%k2,%%zmm7\n\t"
+                                     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                                     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+                                     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                                     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+                                     "vmovdqa64 %0,%%zmm5\n\t"
+                                     "vmovdqa64 %1,%%zmm7\n\t"
+                                     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+                                     "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm7,%%zmm6,%%zmm6"
+                                     :
+                                     : "m" (dptr[z][d]),  "m" (dptr[z][d+64]));
+                }
+                /* P/Q left side optimization */
+                for (z = start-1 ; z >= 0 ; z--) {
+                        asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+                                     "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
+                                     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+                                     "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
+                                     "vpmovm2b %%k1,%%zmm5\n\t"
+                                     "vpmovm2b %%k2,%%zmm7\n\t"
+                                     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                                     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+                                     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                                     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm7,%%zmm6,%%zmm6"
+                                     :
+                                     : );
+                }
+                asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
+                             "vpxorq %1,%%zmm6,%%zmm6\n\t"
+                             /* Don't use movntdq for r/w
+                              * memory area < cache line
+                              */
+                             "vmovdqa64 %%zmm4,%0\n\t"
+                             "vmovdqa64 %%zmm6,%1\n\t"
+                             "vmovdqa64 %%zmm2,%2\n\t"
+                             "vmovdqa64 %%zmm3,%3"
+                             :
+                             : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
+                               "m" (p[d+64]));
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_avx512x2 = {
+        raid6_avx5122_gen_syndrome,
+        raid6_avx5122_xor_syndrome,
+        raid6_have_avx512,
+        "avx512x2",
+        1                       /* Has cache hints */
+};
+#ifdef CONFIG_X86_64
+/*
+ * Unrolled-by-4 AVX2 implementation
+ */
+static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("vmovdqa64 %0,%%zmm0\n\t"
+                     "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t"       /* Zero temp */
+                     "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"       /* P[0] */
+                     "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"       /* P[1] */
+                     "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"       /* Q[0] */
+                     "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"       /* Q[1] */
+                     "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"    /* P[2] */
+                     "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"    /* P[3] */
+                     "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"    /* Q[2] */
+                     "vpxorq %%zmm14,%%zmm14,%%zmm14"        /* Q[3] */
+                     :
+                     : "m" (raid6_avx512_constants.x1d[0]));
+        for (d = 0; d < bytes; d += 256) {
+                for (z = z0; z >= 0; z--) {
+                asm volatile("prefetchnta %0\n\t"
+                             "prefetchnta %1\n\t"
+                             "prefetchnta %2\n\t"
+                             "prefetchnta %3\n\t"
+                             "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
+                             "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
+                             "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
+                             "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
+                             "vpmovm2b %%k1,%%zmm5\n\t"
+                             "vpmovm2b %%k2,%%zmm7\n\t"
+                             "vpmovm2b %%k3,%%zmm13\n\t"
+                             "vpmovm2b %%k4,%%zmm15\n\t"
+                             "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                             "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+                             "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
+                             "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
+                             "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                             "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+                             "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
+                             "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
+                             "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                             "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+                             "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+                             "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
+                             "vmovdqa64 %0,%%zmm5\n\t"
+                             "vmovdqa64 %1,%%zmm7\n\t"
+                             "vmovdqa64 %2,%%zmm13\n\t"
+                             "vmovdqa64 %3,%%zmm15\n\t"
+                             "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+                             "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
+                             "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
+                             "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
+                             "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                             "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+                             "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+                             "vpxorq %%zmm15,%%zmm14,%%zmm14"
+                             :
+                             : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
+                               "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
+                }
+                asm volatile("vmovntdq %%zmm2,%0\n\t"
+                             "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
+                             "vmovntdq %%zmm3,%1\n\t"
+                             "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
+                             "vmovntdq %%zmm10,%2\n\t"
+                             "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
+                             "vmovntdq %%zmm11,%3\n\t"
+                             "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
+                             "vmovntdq %%zmm4,%4\n\t"
+                             "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
+                             "vmovntdq %%zmm6,%5\n\t"
+                             "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
+                             "vmovntdq %%zmm12,%6\n\t"
+                             "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
+                             "vmovntdq %%zmm14,%7\n\t"
+                             "vpxorq %%zmm14,%%zmm14,%%zmm14"
+                             :
+                             : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
+                               "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
+                               "m" (q[d+128]), "m" (q[d+192]));
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
+                                       size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = stop;              /* P/Q right side optimization */
+        p = dptr[disks-2];      /* XOR parity */
+        q = dptr[disks-1];      /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("vmovdqa64 %0,%%zmm0"
+                     :: "m" (raid6_avx512_constants.x1d[0]));
+        for (d = 0 ; d < bytes ; d += 256) {
+                asm volatile("vmovdqa64 %0,%%zmm4\n\t"
+                             "vmovdqa64 %1,%%zmm6\n\t"
+                             "vmovdqa64 %2,%%zmm12\n\t"
+                             "vmovdqa64 %3,%%zmm14\n\t"
+                             "vmovdqa64 %4,%%zmm2\n\t"
+                             "vmovdqa64 %5,%%zmm3\n\t"
+                             "vmovdqa64 %6,%%zmm10\n\t"
+                             "vmovdqa64 %7,%%zmm11\n\t"
+                             "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
+                             "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
+                             "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
+                             "vpxorq %%zmm14,%%zmm11,%%zmm11"
+                             :
+                             : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
+                               "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
+                               "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
+                               "m" (p[d+192]));
+                /* P/Q data pages */
+                for (z = z0-1 ; z >= start ; z--) {
+                        asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+                                     "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
+                                     "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
+                                     "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
+                                     "prefetchnta %0\n\t"
+                                     "prefetchnta %2\n\t"
+                                     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+                                     "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
+                                     "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
+                                     "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
+                                     "vpmovm2b %%k1,%%zmm5\n\t"
+                                     "vpmovm2b %%k2,%%zmm7\n\t"
+                                     "vpmovm2b %%k3,%%zmm13\n\t"
+                                     "vpmovm2b %%k4,%%zmm15\n\t"
+                                     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                                     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+                                     "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
+                                     "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
+                                     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                                     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+                                     "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
+                                     "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+                                     "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+                                     "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
+                                     "vmovdqa64 %0,%%zmm5\n\t"
+                                     "vmovdqa64 %1,%%zmm7\n\t"
+                                     "vmovdqa64 %2,%%zmm13\n\t"
+                                     "vmovdqa64 %3,%%zmm15\n\t"
+                                     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+                                     "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
+                                     "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
+                                     "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+                                     "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+                                     "vpxorq %%zmm15,%%zmm14,%%zmm14"
+                                     :
+                                     : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
+                                       "m" (dptr[z][d+128]),
+                                       "m" (dptr[z][d+192]));
+                }
+                asm volatile("prefetchnta %0\n\t"
+                             "prefetchnta %1\n\t"
+                             :
+                             : "m" (q[d]), "m" (q[d+128]));
+                /* P/Q left side optimization */
+                for (z = start-1 ; z >= 0 ; z--) {
+                        asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+                                     "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
+                                     "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
+                                     "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
+                                     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+                                     "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
+                                     "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
+                                     "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
+                                     "vpmovm2b %%k1,%%zmm5\n\t"
+                                     "vpmovm2b %%k2,%%zmm7\n\t"
+                                     "vpmovm2b %%k3,%%zmm13\n\t"
+                                     "vpmovm2b %%k4,%%zmm15\n\t"
+                                     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+                                     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+                                     "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
+                                     "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
+                                     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+                                     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+                                     "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
+                                     "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
+                                     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+                                     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+                                     "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+                                     "vpxorq %%zmm15,%%zmm14,%%zmm14"
+                                     :
+                                     : );
+                }
+                asm volatile("vmovntdq %%zmm2,%0\n\t"
+                             "vmovntdq %%zmm3,%1\n\t"
+                             "vmovntdq %%zmm10,%2\n\t"
+                             "vmovntdq %%zmm11,%3\n\t"
+                             "vpxorq %4,%%zmm4,%%zmm4\n\t"
+                             "vpxorq %5,%%zmm6,%%zmm6\n\t"
+                             "vpxorq %6,%%zmm12,%%zmm12\n\t"
+                             "vpxorq %7,%%zmm14,%%zmm14\n\t"
+                             "vmovntdq %%zmm4,%4\n\t"
+                             "vmovntdq %%zmm6,%5\n\t"
+                             "vmovntdq %%zmm12,%6\n\t"
+                             "vmovntdq %%zmm14,%7"
+                             :
+                             : "m" (p[d]),  "m" (p[d+64]), "m" (p[d+128]),
+                               "m" (p[d+192]), "m" (q[d]),  "m" (q[d+64]),
+                               "m" (q[d+128]), "m" (q[d+192]));
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_avx512x4 = {
+        raid6_avx5124_gen_syndrome,
+        raid6_avx5124_xor_syndrome,
+        raid6_have_avx512,
+        "avx512x4",
+        1                       /* Has cache hints */
+};
+#endif
+#endif /* CONFIG_AS_AVX512 */
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c
new file mode 100644
index 000000000000..625aafa33b61
--- /dev/null
+++ b/lib/raid6/recov_avx512.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Author: Gayatri Kammela <gayatri.kammela@intel.com>
+ * Author: Megha Dey <megha.dey@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ */
+#ifdef CONFIG_AS_AVX512
+#include <linux/raid/pq.h>
+#include "x86.h"
+static int raid6_has_avx512(void)
+{
+        return boot_cpu_has(X86_FEATURE_AVX2) &&
+                boot_cpu_has(X86_FEATURE_AVX) &&
+                boot_cpu_has(X86_FEATURE_AVX512F) &&
+                boot_cpu_has(X86_FEATURE_AVX512BW) &&
+                boot_cpu_has(X86_FEATURE_AVX512VL) &&
+                boot_cpu_has(X86_FEATURE_AVX512DQ);
+}
+static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
+                                     int failb, void **ptrs)
+{
+        u8 *p, *q, *dp, *dq;
+        const u8 *pbmul;        /* P multiplier table for B data */
+        const u8 *qmul;         /* Q multiplier table (for both) */
+        const u8 x0f = 0x0f;
+        p = (u8 *)ptrs[disks-2];
+        q = (u8 *)ptrs[disks-1];
+        /*
+         * Compute syndrome with zero for the missing data pages
+         * Use the dead data pages as temporary storage for
+         * delta p and delta q
+         */
+        dp = (u8 *)ptrs[faila];
+        ptrs[faila] = (void *)raid6_empty_zero_page;
+        ptrs[disks-2] = dp;
+        dq = (u8 *)ptrs[failb];
+        ptrs[failb] = (void *)raid6_empty_zero_page;
+        ptrs[disks-1] = dq;
+        raid6_call.gen_syndrome(disks, bytes, ptrs);
+        /* Restore pointer table */
+        ptrs[faila]   = dp;
+        ptrs[failb]   = dq;
+        ptrs[disks-2] = p;
+        ptrs[disks-1] = q;
+        /* Now, pick the proper data tables */
+        pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+        qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+                raid6_gfexp[failb]]];
+        kernel_fpu_begin();
+        /* zmm0 = x0f[16] */
+        asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
+        while (bytes) {
+#ifdef CONFIG_X86_64
+                asm volatile("vmovdqa64 %0, %%zmm1\n\t"
+                             "vmovdqa64 %1, %%zmm9\n\t"
+                             "vmovdqa64 %2, %%zmm0\n\t"
+                             "vmovdqa64 %3, %%zmm8\n\t"
+                             "vpxorq %4, %%zmm1, %%zmm1\n\t"
+                             "vpxorq %5, %%zmm9, %%zmm9\n\t"
+                             "vpxorq %6, %%zmm0, %%zmm0\n\t"
+                             "vpxorq %7, %%zmm8, %%zmm8"
+                             :
+                             : "m" (q[0]), "m" (q[64]), "m" (p[0]),
+                               "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
+                               "m" (dp[0]), "m" (dp[64]));
+                /*
+                 * 1 = dq[0]  ^ q[0]
+                 * 9 = dq[64] ^ q[64]
+                 * 0 = dp[0]  ^ p[0]
+                 * 8 = dp[64] ^ p[64]
+                 */
+                asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
+                             "vbroadcasti64x2 %1, %%zmm5"
+                             :
+                             : "m" (qmul[0]), "m" (qmul[16]));
+                asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
+                             "vpsraw $4, %%zmm9, %%zmm12\n\t"
+                             "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
+                             "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
+                             "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
+                             "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
+                             "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
+                             "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
+                             "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
+                             "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
+                             "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
+                             "vpxorq %%zmm4, %%zmm5, %%zmm5"
+                             :
+                             : );
+                /*
+                 * 5 = qx[0]
+                 * 15 = qx[64]
+                 */
+                asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
+                             "vbroadcasti64x2 %1, %%zmm1\n\t"
+                             "vpsraw $4, %%zmm0, %%zmm2\n\t"
+                             "vpsraw $4, %%zmm8, %%zmm6\n\t"
+                             "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
+                             "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
+                             "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
+                             "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
+                             "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
+                             "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
+                             "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
+                             "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
+                             "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
+                             "vpxorq %%zmm12, %%zmm13, %%zmm13"
+                             :
+                             : "m" (pbmul[0]), "m" (pbmul[16]));
+                /*
+                 * 1  = pbmul[px[0]]
+                 * 13 = pbmul[px[64]]
+                 */
+                asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
+                             "vpxorq %%zmm15, %%zmm13, %%zmm13"
+                             :
+                             : );
+                /*
+                 * 1 = db = DQ
+                 * 13 = db[64] = DQ[64]
+                 */
+                asm volatile("vmovdqa64 %%zmm1, %0\n\t"
+                             "vmovdqa64 %%zmm13,%1\n\t"
+                             "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
+                             "vpxorq %%zmm13, %%zmm8, %%zmm8"
+                             :
+                             : "m" (dq[0]), "m" (dq[64]));
+                asm volatile("vmovdqa64 %%zmm0, %0\n\t"
+                             "vmovdqa64 %%zmm8, %1"
+                             :
+                             : "m" (dp[0]), "m" (dp[64]));
+                bytes -= 128;
+                p += 128;
+                q += 128;
+                dp += 128;
+                dq += 128;
+#else
+                asm volatile("vmovdqa64 %0, %%zmm1\n\t"
+                             "vmovdqa64 %1, %%zmm0\n\t"
+                             "vpxorq %2, %%zmm1, %%zmm1\n\t"
+                             "vpxorq %3, %%zmm0, %%zmm0"
+                             :
+                             : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
+                /* 1 = dq ^ q;  0 = dp ^ p */
+                asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
+                             "vbroadcasti64x2 %1, %%zmm5"
+                             :
+                             : "m" (qmul[0]), "m" (qmul[16]));
+                /*
+                 * 1 = dq ^ q
+                 * 3 = dq ^ p >> 4
+                 */
+                asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
+                             "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
+                             "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
+                             "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
+                             "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
+                             "vpxorq %%zmm4, %%zmm5, %%zmm5"
+                             :
+                             : );
+                /* 5 = qx */
+                asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
+                             "vbroadcasti64x2 %1, %%zmm1"
+                             :
+                             : "m" (pbmul[0]), "m" (pbmul[16]));
+                asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
+                             "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
+                             "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
+                             "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
+                             "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
+                             "vpxorq %%zmm4, %%zmm1, %%zmm1"
+                             :
+                             : );
+                /* 1 = pbmul[px] */
+                asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
+                             /* 1 = db = DQ */
+                             "vmovdqa64 %%zmm1, %0\n\t"
+                             :
+                             : "m" (dq[0]));
+                asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
+                             "vmovdqa64 %%zmm0, %0"
+                             :
+                             : "m" (dp[0]));
+                bytes -= 64;
+                p += 64;
+                q += 64;
+                dp += 64;
+                dq += 64;
+#endif
+        }
+        kernel_fpu_end();
+}
+static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
+                                     void **ptrs)
+{
+        u8 *p, *q, *dq;
+        const u8 *qmul;         /* Q multiplier table */
+        const u8 x0f = 0x0f;
+        p = (u8 *)ptrs[disks-2];
+        q = (u8 *)ptrs[disks-1];
+        /*
+         * Compute syndrome with zero for the missing data page
+         * Use the dead data page as temporary storage for delta q
+         */
+        dq = (u8 *)ptrs[faila];
+        ptrs[faila] = (void *)raid6_empty_zero_page;
+        ptrs[disks-1] = dq;
+        raid6_call.gen_syndrome(disks, bytes, ptrs);
+        /* Restore pointer table */
+        ptrs[faila]   = dq;
+        ptrs[disks-1] = q;
+        /* Now, pick the proper data tables */
+        qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+        kernel_fpu_begin();
+        asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
+        while (bytes) {
+#ifdef CONFIG_X86_64
+                asm volatile("vmovdqa64 %0, %%zmm3\n\t"
+                             "vmovdqa64 %1, %%zmm8\n\t"
+                             "vpxorq %2, %%zmm3, %%zmm3\n\t"
+                             "vpxorq %3, %%zmm8, %%zmm8"
+                             :
+                             : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
+                               "m" (q[64]));
+                /*
+                 * 3 = q[0] ^ dq[0]
+                 * 8 = q[64] ^ dq[64]
+                 */
+                asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
+                             "vmovapd %%zmm0, %%zmm13\n\t"
+                             "vbroadcasti64x2 %1, %%zmm1\n\t"
+                             "vmovapd %%zmm1, %%zmm14"
+                             :
+                             : "m" (qmul[0]), "m" (qmul[16]));
+                asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
+                             "vpsraw $4, %%zmm8, %%zmm12\n\t"
+                             "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
+                             "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
+                             "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
+                             "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
+                             "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
+                             "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
+                             "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
+                             "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
+                             "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
+                             "vpxorq %%zmm13, %%zmm14, %%zmm14"
+                             :
+                             : );
+                /*
+                 * 1  = qmul[q[0]  ^ dq[0]]
+                 * 14 = qmul[q[64] ^ dq[64]]
+                 */
+                asm volatile("vmovdqa64 %0, %%zmm2\n\t"
+                             "vmovdqa64 %1, %%zmm12\n\t"
+                             "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
+                             "vpxorq %%zmm14, %%zmm12, %%zmm12"
+                             :
+                             : "m" (p[0]), "m" (p[64]));
+                /*
+                 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
+                 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
+                 */
+                asm volatile("vmovdqa64 %%zmm1, %0\n\t"
+                             "vmovdqa64 %%zmm14, %1\n\t"
+                             "vmovdqa64 %%zmm2, %2\n\t"
+                             "vmovdqa64 %%zmm12,%3"
+                             :
+                             : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
+                               "m" (p[64]));
+                bytes -= 128;
+                p += 128;
+                q += 128;
+                dq += 128;
+#else
+                asm volatile("vmovdqa64 %0, %%zmm3\n\t"
+                             "vpxorq %1, %%zmm3, %%zmm3"
+                             :
+                             : "m" (dq[0]), "m" (q[0]));
+                /* 3 = q ^ dq */
+                asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
+                             "vbroadcasti64x2 %1, %%zmm1"
+                             :
+                             : "m" (qmul[0]), "m" (qmul[16]));
+                asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
+                             "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
+                             "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
+                             "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
+                             "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
+                             "vpxorq %%zmm0, %%zmm1, %%zmm1"
+                             :
+                             : );
+                /* 1 = qmul[q ^ dq] */
+                asm volatile("vmovdqa64 %0, %%zmm2\n\t"
+                             "vpxorq %%zmm1, %%zmm2, %%zmm2"
+                             :
+                             : "m" (p[0]));
+                /* 2 = p ^ qmul[q ^ dq] */
+                asm volatile("vmovdqa64 %%zmm1, %0\n\t"
+                             "vmovdqa64 %%zmm2, %1"
+                             :
+                             : "m" (dq[0]), "m" (p[0]));
+                bytes -= 64;
+                p += 64;
+                q += 64;
+                dq += 64;
+#endif
+        }
+        kernel_fpu_end();
+}
+const struct raid6_recov_calls raid6_recov_avx512 = {
+        .data2 = raid6_2data_recov_avx512,
+        .datap = raid6_datap_recov_avx512,
+        .valid = raid6_has_avx512,
+#ifdef CONFIG_X86_64
+        .name = "avx512x2",
+#else
+        .name = "avx512x1",
+#endif
+        .priority = 3,
+};
+#else
+#warning "your version of binutils lacks AVX512 support"
+#endif
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c
new file mode 100644
index 000000000000..b042dac826cc
--- /dev/null
+++ b/lib/raid6/recov_s390xc.c
@@ -0,0 +1,116 @@
+/*
+ * RAID-6 data recovery in dual failure mode based on the XC instruction.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/export.h>
+#include <linux/raid/pq.h>
+static inline void xor_block(u8 *p1, u8 *p2)
+{
+        typedef struct { u8 _[256]; } addrtype;
+        asm volatile(
+                "       xc      0(256,%[p1]),0(%[p2])\n"
+                : "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2),
+                  [p1] "a" (p1), [p2] "a" (p2) : "cc");
+}
+/* Recover two failed data blocks. */
+static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
+                int failb, void **ptrs)
+{
+        u8 *p, *q, *dp, *dq;
+        const u8 *pbmul;        /* P multiplier table for B data */
+        const u8 *qmul;         /* Q multiplier table (for both) */
+        int i;
+        p = (u8 *)ptrs[disks-2];
+        q = (u8 *)ptrs[disks-1];
+        /* Compute syndrome with zero for the missing data pages
+           Use the dead data pages as temporary storage for
+           delta p and delta q */
+        dp = (u8 *)ptrs[faila];
+        ptrs[faila] = (void *)raid6_empty_zero_page;
+        ptrs[disks-2] = dp;
+        dq = (u8 *)ptrs[failb];
+        ptrs[failb] = (void *)raid6_empty_zero_page;
+        ptrs[disks-1] = dq;
+        raid6_call.gen_syndrome(disks, bytes, ptrs);
+        /* Restore pointer table */
+        ptrs[faila]   = dp;
+        ptrs[failb]   = dq;
+        ptrs[disks-2] = p;
+        ptrs[disks-1] = q;
+        /* Now, pick the proper data tables */
+        pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+        qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+        /* Now do it... */
+        while (bytes) {
+                xor_block(dp, p);
+                xor_block(dq, q);
+                for (i = 0; i < 256; i++)
+                        dq[i] = pbmul[dp[i]] ^ qmul[dq[i]];
+                xor_block(dp, dq);
+                p += 256;
+                q += 256;
+                dp += 256;
+                dq += 256;
+                bytes -= 256;
+        }
+}
+/* Recover failure of one data block plus the P block */
+static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
+                void **ptrs)
+{
+        u8 *p, *q, *dq;
+        const u8 *qmul;         /* Q multiplier table */
+        int i;
+        p = (u8 *)ptrs[disks-2];
+        q = (u8 *)ptrs[disks-1];
+        /* Compute syndrome with zero for the missing data page
+           Use the dead data page as temporary storage for delta q */
+        dq = (u8 *)ptrs[faila];
+        ptrs[faila] = (void *)raid6_empty_zero_page;
+        ptrs[disks-1] = dq;
+        raid6_call.gen_syndrome(disks, bytes, ptrs);
+        /* Restore pointer table */
+        ptrs[faila]   = dq;
+        ptrs[disks-1] = q;
+        /* Now, pick the proper data tables */
+        qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+        /* Now do it... */
+        while (bytes) {
+                xor_block(dq, q);
+                for (i = 0; i < 256; i++)
+                        dq[i] = qmul[dq[i]];
+                xor_block(p, dq);
+                p += 256;
+                q += 256;
+                dq += 256;
+                bytes -= 256;
+        }
+}
+const struct raid6_recov_calls raid6_recov_s390xc = {
+        .data2 = raid6_2data_recov_s390xc,
+        .datap = raid6_datap_recov_s390xc,
+        .valid = NULL,
+        .name = "s390xc",
+        .priority = 1,
+};
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
new file mode 100644
index 000000000000..7b45191a655f
--- /dev/null
+++ b/lib/raid6/s390vx.uc
@@ -0,0 +1,168 @@
+/*
+ * raid6_vx$#.c
+ *
+ * $#-way unrolled RAID6 gen/xor functions for s390
+ * based on the vector facility
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This file is postprocessed using unroll.awk.
+ */
+#include <linux/raid/pq.h>
+#include <asm/fpu/api.h>
+asm(".include \"asm/vx-insn.h\"\n");
+#define NSIZE 16
+static inline void LOAD_CONST(void)
+{
+        asm volatile("VREPIB %v24,7");
+        asm volatile("VREPIB %v25,0x1d");
+}
+/*
+ * The SHLBYTE() operation shifts each of the 16 bytes in
+ * vector register y left by 1 bit and stores the result in
+ * vector register x.
+ */
+static inline void SHLBYTE(int x, int y)
+{
+        asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
+}
+/*
+ * For each of the 16 bytes in the vector register y the MASK()
+ * operation returns 0xFF if the high bit of the byte is 1,
+ * or 0x00 if the high bit is 0. The result is stored in vector
+ * register x.
+ */
+static inline void MASK(int x, int y)
+{
+        asm volatile ("VESRAVB  %0,%1,24" : : "i" (x), "i" (y));
+}
+static inline void AND(int x, int y, int z)
+{
+        asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+static inline void XOR(int x, int y, int z)
+{
+        asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+static inline void LOAD_DATA(int x, int n, u8 *ptr)
+{
+        typedef struct { u8 _[16*n]; } addrtype;
+        register addrtype *__ptr asm("1") = (addrtype *) ptr;
+        asm volatile ("VLM %2,%3,0,%r1"
+                      : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+static inline void STORE_DATA(int x, int n, u8 *ptr)
+{
+        typedef struct { u8 _[16*n]; } addrtype;
+        register addrtype *__ptr asm("1") = (addrtype *) ptr;
+        asm volatile ("VSTM %2,%3,0,1"
+                      : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+static inline void COPY_VEC(int x, int y)
+{
+        asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
+}
+static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        struct kernel_fpu vxstate;
+        u8 **dptr, *p, *q;
+        int d, z, z0;
+        kernel_fpu_begin(&vxstate, KERNEL_VXR);
+        LOAD_CONST();
+        dptr = (u8 **) ptrs;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0 + 1];       /* XOR parity */
+        q = dptr[z0 + 2];       /* RS syndrome */
+        for (d = 0; d < bytes; d += $#*NSIZE) {
+                LOAD_DATA(0,$#,&dptr[z0][d]);
+                COPY_VEC(8+$$,0+$$);
+                for (z = z0 - 1; z >= 0; z--) {
+                        MASK(16+$$,8+$$);
+                        AND(16+$$,16+$$,25);
+                        SHLBYTE(8+$$,8+$$);
+                        XOR(8+$$,8+$$,16+$$);
+                        LOAD_DATA(16,$#,&dptr[z][d]);
+                        XOR(0+$$,0+$$,16+$$);
+                        XOR(8+$$,8+$$,16+$$);
+                }
+                STORE_DATA(0,$#,&p[d]);
+                STORE_DATA(8,$#,&q[d]);
+        }
+        kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
+                                        size_t bytes, void **ptrs)
+{
+        struct kernel_fpu vxstate;
+        u8 **dptr, *p, *q;
+        int d, z, z0;
+        dptr = (u8 **) ptrs;
+        z0 = stop;              /* P/Q right side optimization */
+        p = dptr[disks - 2];    /* XOR parity */
+        q = dptr[disks - 1];    /* RS syndrome */
+        kernel_fpu_begin(&vxstate, KERNEL_VXR);
+        LOAD_CONST();
+        for (d = 0; d < bytes; d += $#*NSIZE) {
+                /* P/Q data pages */
+                LOAD_DATA(0,$#,&dptr[z0][d]);
+                COPY_VEC(8+$$,0+$$);
+                for (z = z0 - 1; z >= start; z--) {
+                        MASK(16+$$,8+$$);
+                        AND(16+$$,16+$$,25);
+                        SHLBYTE(8+$$,8+$$);
+                        XOR(8+$$,8+$$,16+$$);
+                        LOAD_DATA(16,$#,&dptr[z][d]);
+                        XOR(0+$$,0+$$,16+$$);
+                        XOR(8+$$,8+$$,16+$$);
+                }
+                /* P/Q left side optimization */
+                for (z = start - 1; z >= 0; z--) {
+                        MASK(16+$$,8+$$);
+                        AND(16+$$,16+$$,25);
+                        SHLBYTE(8+$$,8+$$);
+                        XOR(8+$$,8+$$,16+$$);
+                }
+                LOAD_DATA(16,$#,&p[d]);
+                XOR(16+$$,16+$$,0+$$);
+                STORE_DATA(16,$#,&p[d]);
+                LOAD_DATA(16,$#,&q[d]);
+                XOR(16+$$,16+$$,8+$$);
+                STORE_DATA(16,$#,&q[d]);
+        }
+        kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+static int raid6_s390vx$#_valid(void)
+{
+        return MACHINE_HAS_VX;
+}
+const struct raid6_calls raid6_s390vx$# = {
+        raid6_s390vx$#_gen_syndrome,
+        raid6_s390vx$#_xor_syndrome,
+        raid6_s390vx$#_valid,
+        "vx128x$#",
+        1
+};
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 29090f3db677..2c7b60edea04 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -32,10 +32,13 @@ ifeq ($(ARCH),arm64)
 endif
 ifeq ($(IS_X86),yes)
-        OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
+        OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
        CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" |    \
                    gcc -c -x assembler - >&/dev/null &&        \
                    rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+        CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" |          \
+                    gcc -c -x assembler - >&/dev/null &&        \
+                    rm ./-.o && echo -DCONFIG_AS_AVX512=1)
 else ifeq ($(HAS_NEON),yes)
        OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o
        CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 3bebbabdb510..b07f4d8e6b03 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -21,12 +21,13 @@
 #define NDISKS          16      /* Including P and Q */
-const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
 struct raid6_calls raid6_call;
 char *dataptrs[NDISKS];
-char data[NDISKS][PAGE_SIZE];
+char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
-char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
+char recovi[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+char recovj[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
 static void makedata(int start, int stop)
 {
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index 8fe9d9662abb..834d268a4b05 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void)
 #define X86_FEATURE_SSSE3       (4*32+ 9) /* Supplemental SSE-3 */
 #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
 #define X86_FEATURE_AVX2        (9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_AVX512F     (9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ    (9*32+17) /* AVX-512 DQ (Double/Quad granular)
+                                           * Instructions
+                                           */
+#define X86_FEATURE_AVX512BW    (9*32+30) /* AVX-512 BW (Byte/Word granular)
+                                           * Instructions
+                                           */
+#define X86_FEATURE_AVX512VL    (9*32+31) /* AVX-512 VL (128/256 Vector Length)
+                                           * Extensions
+                                           */
 #define X86_FEATURE_MMXEXT      (1*32+22) /* AMD MMX extensions */
 /* Should work well enough on modern CPUs for testing */
diff --git a/lib/random32.c b/lib/random32.c
index 69ed593aab07..fa594b1140e6 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -47,7 +47,7 @@ static inline void prandom_state_selftest(void)
 }
 #endif
-static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
+static DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
 /**
 *      prandom_u32_state - seeded pseudo-random number generator.
@@ -81,7 +81,7 @@ u32 prandom_u32(void)
        u32 res;
        res = prandom_u32_state(state);
-        put_cpu_var(state);
+        put_cpu_var(net_rand_state);
        return res;
 }
@@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes)
        struct rnd_state *state = &get_cpu_var(net_rand_state);
        prandom_bytes_state(state, buf, bytes);
-        put_cpu_var(state);
+        put_cpu_var(net_rand_state);
 }
 EXPORT_SYMBOL(prandom_bytes);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 56054e541a0f..32d0ad058380 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work)
                schedule_work(&ht->run_work);
 }
-static bool rhashtable_check_elasticity(struct rhashtable *ht,
+static int rhashtable_insert_rehash(struct rhashtable *ht,
-                                        struct bucket_table *tbl,
+                                    struct bucket_table *tbl)
-                                        unsigned int hash)
-{
-        unsigned int elasticity = ht->elasticity;
-        struct rhash_head *head;
-        rht_for_each(head, tbl, hash)
-                if (!--elasticity)
-                        return true;
-        return false;
-}
-int rhashtable_insert_rehash(struct rhashtable *ht,
-                             struct bucket_table *tbl)
 {
        struct bucket_table *old_tbl;
        struct bucket_table *new_tbl;
@@ -439,61 +425,172 @@ fail:
        return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
-struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+static void *rhashtable_lookup_one(struct rhashtable *ht,
-                                            const void *key,
+                                   struct bucket_table *tbl, unsigned int hash,
-                                            struct rhash_head *obj,
+                                   const void *key, struct rhash_head *obj)
-                                            struct bucket_table *tbl)
 {
+        struct rhashtable_compare_arg arg = {
+                .ht = ht,
+                .key = key,
+        };
+        struct rhash_head __rcu **pprev;
        struct rhash_head *head;
-        unsigned int hash;
+        int elasticity;
-        int err;
-        tbl = rhashtable_last_table(ht, tbl);
+        elasticity = ht->elasticity;
-        hash = head_hashfn(ht, tbl, obj);
+        pprev = &tbl->buckets[hash];
-        spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+        rht_for_each(head, tbl, hash) {
+                struct rhlist_head *list;
+                struct rhlist_head *plist;
-        err = -EEXIST;
+                elasticity--;
-        if (key && rhashtable_lookup_fast(ht, key, ht->p))
+                if (!key ||
-                goto exit;
+                    (ht->p.obj_cmpfn ?
+                     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
+                     rhashtable_compare(&arg, rht_obj(ht, head))))
+                        continue;
-        err = -E2BIG;
+                if (!ht->rhlist)
-        if (unlikely(rht_grow_above_max(ht, tbl)))
+                        return rht_obj(ht, head);
-                goto exit;
+                list = container_of(obj, struct rhlist_head, rhead);
+                plist = container_of(head, struct rhlist_head, rhead);
+                RCU_INIT_POINTER(list->next, plist);
+                head = rht_dereference_bucket(head->next, tbl, hash);
+                RCU_INIT_POINTER(list->rhead.next, head);
+                rcu_assign_pointer(*pprev, obj);
+                return NULL;
+        }
+        if (elasticity <= 0)
+                return ERR_PTR(-EAGAIN);
+        return ERR_PTR(-ENOENT);
+}
+static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
+                                                  struct bucket_table *tbl,
+                                                  unsigned int hash,
+                                                  struct rhash_head *obj,
+                                                  void *data)
+{
+        struct bucket_table *new_tbl;
+        struct rhash_head *head;
+        if (!IS_ERR_OR_NULL(data))
+                return ERR_PTR(-EEXIST);
+        if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
+                return ERR_CAST(data);
-        err = -EAGAIN;
+        new_tbl = rcu_dereference(tbl->future_tbl);
-        if (rhashtable_check_elasticity(ht, tbl, hash) ||
+        if (new_tbl)
-            rht_grow_above_100(ht, tbl))
+                return new_tbl;
-                goto exit;
-        err = 0;
+        if (PTR_ERR(data) != -ENOENT)
+                return ERR_CAST(data);
+        if (unlikely(rht_grow_above_max(ht, tbl)))
+                return ERR_PTR(-E2BIG);
+        if (unlikely(rht_grow_above_100(ht, tbl)))
+                return ERR_PTR(-EAGAIN);
        head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
        RCU_INIT_POINTER(obj->next, head);
+        if (ht->rhlist) {
+                struct rhlist_head *list;
+                list = container_of(obj, struct rhlist_head, rhead);
+                RCU_INIT_POINTER(list->next, NULL);
+        }
        rcu_assign_pointer(tbl->buckets[hash], obj);
        atomic_inc(&ht->nelems);
+        if (rht_grow_above_75(ht, tbl))
+                schedule_work(&ht->run_work);
-exit:
+        return NULL;
-        spin_unlock(rht_bucket_lock(tbl, hash));
+}
-        if (err == 0)
+static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
-                return NULL;
+                                   struct rhash_head *obj)
-        else if (err == -EAGAIN)
+{
-                return tbl;
+        struct bucket_table *new_tbl;
-        else
+        struct bucket_table *tbl;
-                return ERR_PTR(err);
+        unsigned int hash;
+        spinlock_t *lock;
+        void *data;
+        tbl = rcu_dereference(ht->tbl);
+        /* All insertions must grab the oldest table containing
+         * the hashed bucket that is yet to be rehashed.
+         */
+        for (;;) {
+                hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+                lock = rht_bucket_lock(tbl, hash);
+                spin_lock_bh(lock);
+                if (tbl->rehash <= hash)
+                        break;
+                spin_unlock_bh(lock);
+                tbl = rcu_dereference(tbl->future_tbl);
+        }
+        data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
+        new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
+        if (PTR_ERR(new_tbl) != -EEXIST)
+                data = ERR_CAST(new_tbl);
+        while (!IS_ERR_OR_NULL(new_tbl)) {
+                tbl = new_tbl;
+                hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+                spin_lock_nested(rht_bucket_lock(tbl, hash),
+                                 SINGLE_DEPTH_NESTING);
+                data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
+                new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
+                if (PTR_ERR(new_tbl) != -EEXIST)
+                        data = ERR_CAST(new_tbl);
+                spin_unlock(rht_bucket_lock(tbl, hash));
+        }
+        spin_unlock_bh(lock);
+        if (PTR_ERR(data) == -EAGAIN)
+                data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
+                               -EAGAIN);
+        return data;
+}
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+                             struct rhash_head *obj)
+{
+        void *data;
+        do {
+                rcu_read_lock();
+                data = rhashtable_try_insert(ht, key, obj);
+                rcu_read_unlock();
+        } while (PTR_ERR(data) == -EAGAIN);
+        return data;
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
 /**
- * rhashtable_walk_init - Initialise an iterator
+ * rhashtable_walk_enter - Initialise an iterator
 * @ht:         Table to walk over
 * @iter:       Hash table Iterator
- * @gfp:        GFP flags for allocations
 *
 * This function prepares a hash table walk.
 *
@@ -508,30 +605,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
 * This function may sleep so you must not call it from interrupt
 * context or with spin locks held.
 *
- * You must call rhashtable_walk_exit if this function returns
+ * You must call rhashtable_walk_exit after this function returns.
- * successfully.
 */
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
+void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
-                         gfp_t gfp)
 {
        iter->ht = ht;
        iter->p = NULL;
        iter->slot = 0;
        iter->skip = 0;
-        iter->walker = kmalloc(sizeof(*iter->walker), gfp);
-        if (!iter->walker)
-                return -ENOMEM;
        spin_lock(&ht->lock);
-        iter->walker->tbl =
+        iter->walker.tbl =
                rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
-        list_add(&iter->walker->list, &iter->walker->tbl->walkers);
+        list_add(&iter->walker.list, &iter->walker.tbl->walkers);
        spin_unlock(&ht->lock);
-        return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_walk_init);
+EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
 /**
 * rhashtable_walk_exit - Free an iterator
@@ -542,10 +631,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
        spin_lock(&iter->ht->lock);
-        if (iter->walker->tbl)
+        if (iter->walker.tbl)
-                list_del(&iter->walker->list);
+                list_del(&iter->walker.list);
        spin_unlock(&iter->ht->lock);
-        kfree(iter->walker);
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
@@ -571,12 +659,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
        rcu_read_lock();
        spin_lock(&ht->lock);
-        if (iter->walker->tbl)
+        if (iter->walker.tbl)
-                list_del(&iter->walker->list);
+                list_del(&iter->walker.list);
        spin_unlock(&ht->lock);
-        if (!iter->walker->tbl) {
+        if (!iter->walker.tbl) {
-                iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
+                iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
                return -EAGAIN;
        }
@@ -598,12 +686,17 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
 */
 void *rhashtable_walk_next(struct rhashtable_iter *iter)
 {
-        struct bucket_table *tbl = iter->walker->tbl;
+        struct bucket_table *tbl = iter->walker.tbl;
+        struct rhlist_head *list = iter->list;
        struct rhashtable *ht = iter->ht;
        struct rhash_head *p = iter->p;
+        bool rhlist = ht->rhlist;
        if (p) {
-                p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
+                if (!rhlist || !(list = rcu_dereference(list->next))) {
+                        p = rcu_dereference(p->next);
+                        list = container_of(p, struct rhlist_head, rhead);
+                }
                goto next;
        }
@@ -611,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
                int skip = iter->skip;
                rht_for_each_rcu(p, tbl, iter->slot) {
+                        if (rhlist) {
+                                list = container_of(p, struct rhlist_head,
+                                                    rhead);
+                                do {
+                                        if (!skip)
+                                                goto next;
+                                        skip--;
+                                        list = rcu_dereference(list->next);
+                                } while (list);
+                                continue;
+                        }
                        if (!skip)
                                break;
                        skip--;
@@ -620,7 +725,8 @@ next:
                if (!rht_is_a_nulls(p)) {
                        iter->skip++;
                        iter->p = p;
-                        return rht_obj(ht, p);
+                        iter->list = list;
+                        return rht_obj(ht, rhlist ? &list->rhead : p);
                }
                iter->skip = 0;
@@ -631,8 +737,8 @@ next:
        /* Ensure we see any new tables. */
        smp_rmb();
-        iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+        iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
-        if (iter->walker->tbl) {
+        if (iter->walker.tbl) {
                iter->slot = 0;
                iter->skip = 0;
                return ERR_PTR(-EAGAIN);
@@ -652,7 +758,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
        __releases(RCU)
 {
        struct rhashtable *ht;
-        struct bucket_table *tbl = iter->walker->tbl;
+        struct bucket_table *tbl = iter->walker.tbl;
        if (!tbl)
                goto out;
@@ -661,9 +767,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
        spin_lock(&ht->lock);
        if (tbl->rehash < tbl->size)
-                list_add(&iter->walker->list, &tbl->walkers);
+                list_add(&iter->walker.list, &tbl->walkers);
        else
-                iter->walker->tbl = NULL;
+                iter->walker.tbl = NULL;
        spin_unlock(&ht->lock);
        iter->p = NULL;
@@ -809,6 +915,48 @@ int rhashtable_init(struct rhashtable *ht,
 EXPORT_SYMBOL_GPL(rhashtable_init);
 /**
+ * rhltable_init - initialize a new hash list table
+ * @hlt:        hash list table to be initialized
+ * @params:     configuration parameters
+ *
+ * Initializes a new hash list table.
+ *
+ * See documentation for rhashtable_init.
+ */
+int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+{
+        int err;
+        /* No rhlist NULLs marking for now. */
+        if (params->nulls_base)
+                return -EINVAL;
+        err = rhashtable_init(&hlt->ht, params);
+        hlt->ht.rhlist = true;
+        return err;
+}
+EXPORT_SYMBOL_GPL(rhltable_init);
+static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
+                                void (*free_fn)(void *ptr, void *arg),
+                                void *arg)
+{
+        struct rhlist_head *list;
+        if (!ht->rhlist) {
+                free_fn(rht_obj(ht, obj), arg);
+                return;
+        }
+        list = container_of(obj, struct rhlist_head, rhead);
+        do {
+                obj = &list->rhead;
+                list = rht_dereference(list->next, ht);
+                free_fn(rht_obj(ht, obj), arg);
+        } while (list);
+}
+/**
 * rhashtable_free_and_destroy - free elements and destroy hash table
 * @ht:         the hash table to destroy
 * @free_fn:    callback to release resources of element
@@ -845,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
                             pos = next,
                             next = !rht_is_a_nulls(pos) ?
                                        rht_dereference(pos->next, ht) : NULL)
-                                free_fn(rht_obj(ht, pos), arg);
+                                rhashtable_free_one(ht, pos, free_fn, arg);
                }
        }
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
new file mode 100644
index 000000000000..2cecf05c82fd
--- /dev/null
+++ b/lib/sbitmap.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2016 Facebook
+ * Copyright (C) 2013-2014 Jens Axboe
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+#include <linux/random.h>
+#include <linux/sbitmap.h>
+int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
+                      gfp_t flags, int node)
+{
+        unsigned int bits_per_word;
+        unsigned int i;
+        if (shift < 0) {
+                shift = ilog2(BITS_PER_LONG);
+                /*
+                 * If the bitmap is small, shrink the number of bits per word so
+                 * we spread over a few cachelines, at least. If less than 4
+                 * bits, just forget about it, it's not going to work optimally
+                 * anyway.
+                 */
+                if (depth >= 4) {
+                        while ((4U << shift) > depth)
+                                shift--;
+                }
+        }
+        bits_per_word = 1U << shift;
+        if (bits_per_word > BITS_PER_LONG)
+                return -EINVAL;
+        sb->shift = shift;
+        sb->depth = depth;
+        sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
+        if (depth == 0) {
+                sb->map = NULL;
+                return 0;
+        }
+        sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
+        if (!sb->map)
+                return -ENOMEM;
+        for (i = 0; i < sb->map_nr; i++) {
+                sb->map[i].depth = min(depth, bits_per_word);
+                depth -= sb->map[i].depth;
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(sbitmap_init_node);
+void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
+{
+        unsigned int bits_per_word = 1U << sb->shift;
+        unsigned int i;
+        sb->depth = depth;
+        sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
+        for (i = 0; i < sb->map_nr; i++) {
+                sb->map[i].depth = min(depth, bits_per_word);
+                depth -= sb->map[i].depth;
+        }
+}
+EXPORT_SYMBOL_GPL(sbitmap_resize);
+static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
+                              bool wrap)
+{
+        unsigned int orig_hint = hint;
+        int nr;
+        while (1) {
+                nr = find_next_zero_bit(&word->word, word->depth, hint);
+                if (unlikely(nr >= word->depth)) {
+                        /*
+                         * We started with an offset, and we didn't reset the
+                         * offset to 0 in a failure case, so start from 0 to
+                         * exhaust the map.
+                         */
+                        if (orig_hint && hint && wrap) {
+                                hint = orig_hint = 0;
+                                continue;
+                        }
+                        return -1;
+                }
+                if (!test_and_set_bit(nr, &word->word))
+                        break;
+                hint = nr + 1;
+                if (hint >= word->depth - 1)
+                        hint = 0;
+        }
+        return nr;
+}
+int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
+{
+        unsigned int i, index;
+        int nr = -1;
+        index = SB_NR_TO_INDEX(sb, alloc_hint);
+        for (i = 0; i < sb->map_nr; i++) {
+                nr = __sbitmap_get_word(&sb->map[index],
+                                        SB_NR_TO_BIT(sb, alloc_hint),
+                                        !round_robin);
+                if (nr != -1) {
+                        nr += index << sb->shift;
+                        break;
+                }
+                /* Jump to next index. */
+                index++;
+                alloc_hint = index << sb->shift;
+                if (index >= sb->map_nr) {
+                        index = 0;
+                        alloc_hint = 0;
+                }
+        }
+        return nr;
+}
+EXPORT_SYMBOL_GPL(sbitmap_get);
+bool sbitmap_any_bit_set(const struct sbitmap *sb)
+{
+        unsigned int i;
+        for (i = 0; i < sb->map_nr; i++) {
+                if (sb->map[i].word)
+                        return true;
+        }
+        return false;
+}
+EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
+bool sbitmap_any_bit_clear(const struct sbitmap *sb)
+{
+        unsigned int i;
+        for (i = 0; i < sb->map_nr; i++) {
+                const struct sbitmap_word *word = &sb->map[i];
+                unsigned long ret;
+                ret = find_first_zero_bit(&word->word, word->depth);
+                if (ret < word->depth)
+                        return true;
+        }
+        return false;
+}
+EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
+unsigned int sbitmap_weight(const struct sbitmap *sb)
+{
+        unsigned int i, weight = 0;
+        for (i = 0; i < sb->map_nr; i++) {
+                const struct sbitmap_word *word = &sb->map[i];
+                weight += bitmap_weight(&word->word, word->depth);
+        }
+        return weight;
+}
+EXPORT_SYMBOL_GPL(sbitmap_weight);
+static unsigned int sbq_calc_wake_batch(unsigned int depth)
+{
+        unsigned int wake_batch;
+        /*
+         * For each batch, we wake up one queue. We need to make sure that our
+         * batch size is small enough that the full depth of the bitmap is
+         * enough to wake up all of the queues.
+         */
+        wake_batch = SBQ_WAKE_BATCH;
+        if (wake_batch > depth / SBQ_WAIT_QUEUES)
+                wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
+        return wake_batch;
+}
+int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
+                            int shift, bool round_robin, gfp_t flags, int node)
+{
+        int ret;
+        int i;
+        ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
+        if (ret)
+                return ret;
+        sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
+        if (!sbq->alloc_hint) {
+                sbitmap_free(&sbq->sb);
+                return -ENOMEM;
+        }
+        if (depth && !round_robin) {
+                for_each_possible_cpu(i)
+                        *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
+        }
+        sbq->wake_batch = sbq_calc_wake_batch(depth);
+        atomic_set(&sbq->wake_index, 0);
+        sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
+        if (!sbq->ws) {
+                free_percpu(sbq->alloc_hint);
+                sbitmap_free(&sbq->sb);
+                return -ENOMEM;
+        }
+        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
+                init_waitqueue_head(&sbq->ws[i].wait);
+                atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
+        }
+        sbq->round_robin = round_robin;
+        return 0;
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
+void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+{
+        sbq->wake_batch = sbq_calc_wake_batch(depth);
+        sbitmap_resize(&sbq->sb, depth);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
+int __sbitmap_queue_get(struct sbitmap_queue *sbq)
+{
+        unsigned int hint, depth;
+        int nr;
+        hint = this_cpu_read(*sbq->alloc_hint);
+        depth = READ_ONCE(sbq->sb.depth);
+        if (unlikely(hint >= depth)) {
+                hint = depth ? prandom_u32() % depth : 0;
+                this_cpu_write(*sbq->alloc_hint, hint);
+        }
+        nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
+        if (nr == -1) {
+                /* If the map is full, a hint won't do us much good. */
+                this_cpu_write(*sbq->alloc_hint, 0);
+        } else if (nr == hint || unlikely(sbq->round_robin)) {
+                /* Only update the hint if we used it. */
+                hint = nr + 1;
+                if (hint >= depth - 1)
+                        hint = 0;
+                this_cpu_write(*sbq->alloc_hint, hint);
+        }
+        return nr;
+}
+EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
+static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
+{
+        int i, wake_index;
+        wake_index = atomic_read(&sbq->wake_index);
+        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
+                struct sbq_wait_state *ws = &sbq->ws[wake_index];
+                if (waitqueue_active(&ws->wait)) {
+                        int o = atomic_read(&sbq->wake_index);
+                        if (wake_index != o)
+                                atomic_cmpxchg(&sbq->wake_index, o, wake_index);
+                        return ws;
+                }
+                wake_index = sbq_index_inc(wake_index);
+        }
+        return NULL;
+}
+static void sbq_wake_up(struct sbitmap_queue *sbq)
+{
+        struct sbq_wait_state *ws;
+        int wait_cnt;
+        /* Ensure that the wait list checks occur after clear_bit(). */
+        smp_mb();
+        ws = sbq_wake_ptr(sbq);
+        if (!ws)
+                return;
+        wait_cnt = atomic_dec_return(&ws->wait_cnt);
+        if (unlikely(wait_cnt < 0))
+                wait_cnt = atomic_inc_return(&ws->wait_cnt);
+        if (wait_cnt == 0) {
+                atomic_add(sbq->wake_batch, &ws->wait_cnt);
+                sbq_index_atomic_inc(&sbq->wake_index);
+                wake_up(&ws->wait);
+        }
+}
+void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
+                         unsigned int cpu)
+{
+        sbitmap_clear_bit(&sbq->sb, nr);
+        sbq_wake_up(sbq);
+        if (likely(!sbq->round_robin && nr < sbq->sb.depth))
+                *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
+void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
+{
+        int i, wake_index;
+        /*
+         * Make sure all changes prior to this are visible from other CPUs.
+         */
+        smp_mb();
+        wake_index = atomic_read(&sbq->wake_index);
+        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
+                struct sbq_wait_state *ws = &sbq->ws[wake_index];
+                if (waitqueue_active(&ws->wait))
+                        wake_up(&ws->wait);
+                wake_index = sbq_index_inc(wake_index);
+        }
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 9c5fe8110413..7e35fc450c5b 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,6 +1,7 @@
 #include <linux/compiler.h>
 #include <linux/export.h>
 #include <linux/kasan-checks.h>
+#include <linux/thread_info.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -111,6 +112,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
                long retval;
                kasan_check_write(dst, count);
+                check_object_size(dst, count, false);
                user_access_begin();
                retval = do_strncpy_from_user(dst, src, count, max);
                user_access_end();
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 93f45011a59d..94346b4d8984 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size)
        skb->hash = SKB_HASH;
        skb->queue_mapping = SKB_QUEUE_MAP;
        skb->vlan_tci = SKB_VLAN_TCI;
+        skb->vlan_proto = htons(ETH_P_IP);
        skb->dev = &dev;
        skb->dev->ifindex = SKB_DEV_IFINDEX;
        skb->dev->type = SKB_DEV_TYPE;
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
new file mode 100644
index 000000000000..c8420d404926
--- /dev/null
+++ b/lib/win_minmax.c
@@ -0,0 +1,98 @@
+/**
+ * lib/minmax.c: windowed min/max tracker
+ *
+ * Kathleen Nichols' algorithm for tracking the minimum (or maximum)
+ * value of a data stream over some fixed time interval.  (E.g.,
+ * the minimum RTT over the past five minutes.) It uses constant
+ * space and constant time per update yet almost always delivers
+ * the same minimum as an implementation that has to keep all the
+ * data in the window.
+ *
+ * The algorithm keeps track of the best, 2nd best & 3rd best min
+ * values, maintaining an invariant that the measurement time of
+ * the n'th best >= n-1'th best. It also makes sure that the three
+ * values are widely separated in the time window since that bounds
+ * the worse case error when that data is monotonically increasing
+ * over the window.
+ *
+ * Upon getting a new min, we can forget everything earlier because
+ * it has no value - the new min is <= everything else in the window
+ * by definition and it's the most recent. So we restart fresh on
+ * every new min and overwrites 2nd & 3rd choices. The same property
+ * holds for 2nd & 3rd best.
+ */
+#include <linux/module.h>
+#include <linux/win_minmax.h>
+/* As time advances, update the 1st, 2nd, and 3rd choices. */
+static u32 minmax_subwin_update(struct minmax *m, u32 win,
+                                const struct minmax_sample *val)
+{
+        u32 dt = val->t - m->s[0].t;
+        if (unlikely(dt > win)) {
+                /*
+                 * Passed entire window without a new val so make 2nd
+                 * choice the new val & 3rd choice the new 2nd choice.
+                 * we may have to iterate this since our 2nd choice
+                 * may also be outside the window (we checked on entry
+                 * that the third choice was in the window).
+                 */
+                m->s[0] = m->s[1];
+                m->s[1] = m->s[2];
+                m->s[2] = *val;
+                if (unlikely(val->t - m->s[0].t > win)) {
+                        m->s[0] = m->s[1];
+                        m->s[1] = m->s[2];
+                        m->s[2] = *val;
+                }
+        } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) {
+                /*
+                 * We've passed a quarter of the window without a new val
+                 * so take a 2nd choice from the 2nd quarter of the window.
+                 */
+                m->s[2] = m->s[1] = *val;
+        } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) {
+                /*
+                 * We've passed half the window without finding a new val
+                 * so take a 3rd choice from the last half of the window
+                 */
+                m->s[2] = *val;
+        }
+        return m->s[0].v;
+}
+/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */
+u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas)
+{
+        struct minmax_sample val = { .t = t, .v = meas };
+        if (unlikely(val.v >= m->s[0].v) ||       /* found new max? */
+            unlikely(val.t - m->s[2].t > win))    /* nothing left in window? */
+                return minmax_reset(m, t, meas);  /* forget earlier samples */
+        if (unlikely(val.v >= m->s[1].v))
+                m->s[2] = m->s[1] = val;
+        else if (unlikely(val.v >= m->s[2].v))
+                m->s[2] = val;
+        return minmax_subwin_update(m, win, &val);
+}
+EXPORT_SYMBOL(minmax_running_max);
+/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */
+u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas)
+{
+        struct minmax_sample val = { .t = t, .v = meas };
+        if (unlikely(val.v <= m->s[0].v) ||       /* found new min? */
+            unlikely(val.t - m->s[2].t > win))    /* nothing left in window? */
+                return minmax_reset(m, t, meas);  /* forget earlier samples */
+        if (unlikely(val.v <= m->s[1].v))
+                m->s[2] = m->s[1] = val;
+        else if (unlikely(val.v <= m->s[2].v))
+                m->s[2] = val;
+        return minmax_subwin_update(m, win, &val);
+}