Merge branch 'akpm' (patchbomb from Andrew Morton)

Merge incoming from Andrew Morton: - Various misc things. - arch/sh updates. - Part of ocfs2. Review is slow. - Slab updates. - Most of -mm. - printk updates. - lib/ updates. - checkpatch updates. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (226 commits) checkpatch: update $declaration_macros, add uninitialized_var checkpatch: warn on missing spaces in broken up quoted checkpatch: fix false positives for --strict "space after cast" test checkpatch: fix false positive MISSING_BREAK warnings with --file checkpatch: add test for native c90 types in unusual order checkpatch: add signed generic types checkpatch: add short int to c variable types checkpatch: add for_each tests to indentation and brace tests checkpatch: fix brace style misuses of else and while checkpatch: add --fix option for a couple OPEN_BRACE misuses checkpatch: use the correct indentation for which() checkpatch: add fix_insert_line and fix_delete_line helpers checkpatch: add ability to insert and delete lines to patch/file checkpatch: add an index variable for fixed lines checkpatch: warn on break after goto or return with same tab indentation checkpatch: emit a warning on file add/move/delete checkpatch: add test for commit id formatting style in commit log checkpatch: emit fewer kmalloc_array/kcalloc conversion warnings checkpatch: improve "no space after cast" test checkpatch: allow multiple const * types ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-08-07 00:14:42 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-08-07 00:14:42 -0400
commit: 33caee39925b887a99a2400dc5c980097c3573f9 (patch)
tree: 8e68ad97e1fee88c4a3f31453041f8d139f2027e
parent: 6456a0438b984186a0c9c8ecc9fe3d97b7ac3613 (diff)
parent: f84223087402c45179be5e7060c5736c17a7b271 (diff)
156 files changed, 3930 insertions, 2919 deletions
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 49b8551a3b68..e48c57f1943b 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -818,7 +818,7 @@ RCU pointer/list update:
        list_add_tail_rcu
        list_del_rcu
        list_replace_rcu
-        hlist_add_after_rcu
+        hlist_add_behind_rcu
        hlist_add_before_rcu
        hlist_add_head_rcu
        hlist_del_rcu
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 883901b9ac4f..9344d833b7ea 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1716,8 +1716,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        7 (KERN_DEBUG)          debug-level messages
        log_buf_len=n[KMG]      Sets the size of the printk ring buffer,
-                        in bytes.  n must be a power of two.  The default
+                        in bytes.  n must be a power of two and greater
-                        size is set in the kernel config file.
+                        than the minimal size. The minimal size is defined
+                        by LOG_BUF_SHIFT kernel config parameter. There is
+                        also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
+                        that allows to increase the default size depending on
+                        the number of CPUs. See init/Kconfig for more details.
        logo.nologo     [FB] Disables display of the built-in Linux logo.
                        This may be used to provide more screen space for
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index 78c9a7b2b58f..8f961ef2b457 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -47,6 +47,10 @@ use constant HIGH_KSWAPD_REWAKEUP		=> 21;
 use constant HIGH_NR_SCANNED                    => 22;
 use constant HIGH_NR_TAKEN                      => 23;
 use constant HIGH_NR_RECLAIMED                  => 24;
+use constant HIGH_NR_FILE_SCANNED               => 25;
+use constant HIGH_NR_ANON_SCANNED               => 26;
+use constant HIGH_NR_FILE_RECLAIMED             => 27;
+use constant HIGH_NR_ANON_RECLAIMED             => 28;
 my %perprocesspid;
 my %perprocess;
@@ -56,14 +60,18 @@ my $opt_read_procstat;
 my $total_wakeup_kswapd;
 my ($total_direct_reclaim, $total_direct_nr_scanned);
+my ($total_direct_nr_file_scanned, $total_direct_nr_anon_scanned);
 my ($total_direct_latency, $total_kswapd_latency);
 my ($total_direct_nr_reclaimed);
+my ($total_direct_nr_file_reclaimed, $total_direct_nr_anon_reclaimed);
 my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async);
 my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async);
 my ($total_kswapd_nr_scanned, $total_kswapd_wake);
+my ($total_kswapd_nr_file_scanned, $total_kswapd_nr_anon_scanned);
 my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async);
 my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async);
 my ($total_kswapd_nr_reclaimed);
+my ($total_kswapd_nr_file_reclaimed, $total_kswapd_nr_anon_reclaimed);
 # Catch sigint and exit on request
 my $sigint_report = 0;
@@ -374,6 +382,7 @@ EVENT_PROCESS:
                        }
                        my $isolate_mode = $1;
                        my $nr_scanned = $4;
+                        my $file = $6;
                        # To closer match vmstat scanning statistics, only count isolate_both
                        # and isolate_inactive as scanning. isolate_active is rotation
@@ -382,6 +391,11 @@ EVENT_PROCESS:
                        # isolate_both     == 3
                        if ($isolate_mode != 2) {
                                $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+                                if ($file == 1) {
+                                        $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned;
+                                } else {
+                                        $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned;
+                                }
                        }
                } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
                        $details = $6;
@@ -391,8 +405,19 @@ EVENT_PROCESS:
                                print "         $regex_lru_shrink_inactive/o\n";
                                next;
                        }
                        my $nr_reclaimed = $4;
+                        my $flags = $6;
+                        my $file = 0;
+                        if ($flags =~ /RECLAIM_WB_FILE/) {
+                                $file = 1;
+                        }
                        $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
+                        if ($file) {
+                                $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED} += $nr_reclaimed;
+                        } else {
+                                $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED} += $nr_reclaimed;
+                        }
                } elsif ($tracepoint eq "mm_vmscan_writepage") {
                        $details = $6;
                        if ($details !~ /$regex_writepage/o) {
@@ -493,7 +518,11 @@ sub dump_stats {
                $total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
                $total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
                $total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+                $total_direct_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+                $total_direct_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
                $total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+                $total_direct_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+                $total_direct_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                $total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                $total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                $total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -513,7 +542,11 @@ sub dump_stats {
                        $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN},
                        $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD},
                        $stats{$process_pid}->{HIGH_NR_SCANNED},
+                        $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+                        $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
                        $stats{$process_pid}->{HIGH_NR_RECLAIMED},
+                        $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+                        $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
                        $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
                        $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC},
                        $this_reclaim_delay / 1000);
@@ -552,7 +585,11 @@ sub dump_stats {
                $total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
                $total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+                $total_kswapd_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+                $total_kswapd_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
                $total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+                $total_kswapd_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+                $total_kswapd_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                $total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                $total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                $total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -563,7 +600,11 @@ sub dump_stats {
                        $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE},
                        $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP},
                        $stats{$process_pid}->{HIGH_NR_SCANNED},
+                        $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+                        $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
                        $stats{$process_pid}->{HIGH_NR_RECLAIMED},
+                        $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+                        $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
                        $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
                        $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC});
@@ -594,7 +635,11 @@ sub dump_stats {
        print "\nSummary\n";
        print "Direct reclaims:                         $total_direct_reclaim\n";
        print "Direct reclaim pages scanned:            $total_direct_nr_scanned\n";
+        print "Direct reclaim file pages scanned:       $total_direct_nr_file_scanned\n";
+        print "Direct reclaim anon pages scanned:       $total_direct_nr_anon_scanned\n";
        print "Direct reclaim pages reclaimed:          $total_direct_nr_reclaimed\n";
+        print "Direct reclaim file pages reclaimed:     $total_direct_nr_file_reclaimed\n";
+        print "Direct reclaim anon pages reclaimed:     $total_direct_nr_anon_reclaimed\n";
        print "Direct reclaim write file sync I/O:      $total_direct_writepage_file_sync\n";
        print "Direct reclaim write anon sync I/O:      $total_direct_writepage_anon_sync\n";
        print "Direct reclaim write file async I/O:     $total_direct_writepage_file_async\n";
@@ -604,7 +649,11 @@ sub dump_stats {
        print "\n";
        print "Kswapd wakeups:                          $total_kswapd_wake\n";
        print "Kswapd pages scanned:                    $total_kswapd_nr_scanned\n";
+        print "Kswapd file pages scanned:               $total_kswapd_nr_file_scanned\n";
+        print "Kswapd anon pages scanned:               $total_kswapd_nr_anon_scanned\n";
        print "Kswapd pages reclaimed:                  $total_kswapd_nr_reclaimed\n";
+        print "Kswapd file pages reclaimed:             $total_kswapd_nr_file_reclaimed\n";
+        print "Kswapd anon pages reclaimed:             $total_kswapd_nr_anon_reclaimed\n";
        print "Kswapd reclaim write file sync I/O:      $total_kswapd_writepage_file_sync\n";
        print "Kswapd reclaim write anon sync I/O:      $total_kswapd_writepage_anon_sync\n";
        print "Kswapd reclaim write file async I/O:     $total_kswapd_writepage_file_async\n";
@@ -629,7 +678,11 @@ sub aggregate_perprocesspid() {
                $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
                $perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP};
                $perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED};
+                $perprocess{$process}->{HIGH_NR_FILE_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED};
+                $perprocess{$process}->{HIGH_NR_ANON_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED};
                $perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED};
+                $perprocess{$process}->{HIGH_NR_FILE_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+                $perprocess{$process}->{HIGH_NR_ANON_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
diff --git a/Makefile b/Makefile
index d0901b46b4bf..a897c50db515 100644
--- a/Makefile
+++ b/Makefile
@@ -621,6 +621,9 @@ else
 KBUILD_CFLAGS   += -O2
 endif
+# Tell gcc to never replace conditional load with a non-conditional one
+KBUILD_CFLAGS   += $(call cc-option,--param=allow-store-data-races=0)
 ifdef CONFIG_READABLE_ASM
 # Disable optimizations that make assembler listings hard to read.
 # reorder blocks reorders the control in the function
@@ -636,6 +639,22 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 # Handle stack protector mode.
+#
+# Since kbuild can potentially perform two passes (first with the old
+# .config values and then with updated .config values), we cannot error out
+# if a desired compiler option is unsupported. If we were to error, kbuild
+# could never get to the second pass and actually notice that we changed
+# the option to something that was supported.
+#
+# Additionally, we don't want to fallback and/or silently change which compiler
+# flags will be used, since that leads to producing kernels with different
+# security feature characteristics depending on the compiler used. ("But I
+# selected CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
+#
+# The middle ground is to warn here so that the failed option is obvious, but
+# to let the build fail with bad compiler flags so that we can't produce a
+# kernel when there is a CONFIG and compiler mismatch.
+#
 ifdef CONFIG_CC_STACKPROTECTOR_REGULAR
  stackp-flag := -fstack-protector
  ifeq ($(call cc-option, $(stackp-flag)),)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1f88db06b133..7a996aaa061e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -26,6 +26,7 @@
 #include <linux/io.h>
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
+#include <linux/cma.h>
 #include <asm/memory.h>
 #include <asm/highmem.h>
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 25c350264a41..892d43e32f3b 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -631,7 +631,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
        pgdat = NODE_DATA(nid);
-        zone = pgdat->node_zones + ZONE_NORMAL;
+        zone = pgdat->node_zones +
+                zone_for_memory(nid, start, size, ZONE_NORMAL);
        ret = __add_pages(nid, zone, start_pfn, nr_pages);
        if (ret)
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ce569b6bf4d8..72905c30082e 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -90,7 +90,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
        book3s_hv_rm_mmu.o \
        book3s_hv_ras.o \
        book3s_hv_builtin.o \
-        book3s_hv_cma.o \
        $(kvm-book3s_64-builtin-xics-objs-y)
 endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 68468d695f12..a01744fc3483 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,6 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
-#include "book3s_hv_cma.h"
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970    63
@@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
        }
        kvm->arch.hpt_cma_alloc = 0;
-        VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
        page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
        if (page) {
                hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+                memset((void *)hpt, 0, (1 << order));
                kvm->arch.hpt_cma_alloc = 1;
        }
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7cde8a665205..6cf498a9bc98 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,12 +16,14 @@
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/sizes.h>
+#include <linux/cma.h>
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
-#include "book3s_hv_cma.h"
+#define KVM_CMA_CHUNK_ORDER     18
 /*
 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
 * should be power of 2.
@@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5;
 unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;  /* 128MB */
 EXPORT_SYMBOL_GPL(kvm_rma_pages);
+static struct cma *kvm_cma;
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
   Assumes POWER7 or PPC970. */
 static inline int lpcr_rmls(unsigned long rma_size)
@@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
        ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
        if (!ri)
                return NULL;
-        page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+        page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
        if (!page)
                goto err_out;
        atomic_set(&ri->use_count, 1);
@@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 void kvm_release_rma(struct kvm_rma_info *ri)
 {
        if (atomic_dec_and_test(&ri->use_count)) {
-                kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+                cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
                kfree(ri);
        }
 }
@@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
        unsigned long align_pages = HPT_ALIGN_PAGES;
+        VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
        /* Old CPUs require HPT aligned on a multiple of its size */
        if (!cpu_has_feature(CPU_FTR_ARCH_206))
                align_pages = nr_pages;
-        return kvm_alloc_cma(nr_pages, align_pages);
+        return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 {
-        kvm_release_cma(page, nr_pages);
+        cma_release(kvm_cma, page, nr_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
@@ -179,7 +185,8 @@ void __init kvm_cma_reserve(void)
                        align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
                align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
-                kvm_cma_declare_contiguous(selected_size, align_size);
+                cma_declare_contiguous(0, selected_size, 0, align_size,
+                        KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
        }
 }
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
deleted file mode 100644
index d9d3d8553d51..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-#define pr_fmt(fmt) "kvm_cma: " fmt
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-#  define DEBUG
-#endif
-#endif
-#include <linux/memblock.h>
-#include <linux/mutex.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-#include "book3s_hv_cma.h"
-struct kvm_cma {
-        unsigned long   base_pfn;
-        unsigned long   count;
-        unsigned long   *bitmap;
-};
-static DEFINE_MUTEX(kvm_cma_mutex);
-static struct kvm_cma kvm_cma_area;
-/**
- * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
- *                                for kvm hash pagetable
- * @size:  Size of the reserved memory.
- * @alignment:  Alignment for the contiguous memory area
- *
- * This function reserves memory for kvm cma area. It should be
- * called by arch code when early allocator (memblock or bootmem)
- * is still activate.
- */
-long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
-{
-        long base_pfn;
-        phys_addr_t addr;
-        struct kvm_cma *cma = &kvm_cma_area;
-        pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
-        if (!size)
-                return -EINVAL;
-        /*
-         * Sanitise input arguments.
-         * We should be pageblock aligned for CMA.
-         */
-        alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
-        size = ALIGN(size, alignment);
-        /*
-         * Reserve memory
-         * Use __memblock_alloc_base() since
-         * memblock_alloc_base() panic()s.
-         */
-        addr = __memblock_alloc_base(size, alignment, 0);
-        if (!addr) {
-                base_pfn = -ENOMEM;
-                goto err;
-        } else
-                base_pfn = PFN_DOWN(addr);
-        /*
-         * Each reserved area must be initialised later, when more kernel
-         * subsystems (like slab allocator) are available.
-         */
-        cma->base_pfn = base_pfn;
-        cma->count    = size >> PAGE_SHIFT;
-        pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
-        return 0;
-err:
-        pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-        return base_pfn;
-}
-/**
- * kvm_alloc_cma() - allocate pages from contiguous area
- * @nr_pages: Requested number of pages.
- * @align_pages: Requested alignment in number of pages
- *
- * This function allocates memory buffer for hash pagetable.
- */
-struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
-{
-        int ret;
-        struct page *page = NULL;
-        struct kvm_cma *cma = &kvm_cma_area;
-        unsigned long chunk_count, nr_chunk;
-        unsigned long mask, pfn, pageno, start = 0;
-        if (!cma || !cma->count)
-                return NULL;
-        pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
-                 (void *)cma, nr_pages, align_pages);
-        if (!nr_pages)
-                return NULL;
-        /*
-         * align mask with chunk size. The bit tracks pages in chunk size
-         */
-        VM_BUG_ON(!is_power_of_2(align_pages));
-        mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
-        BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
-        chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-        nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-        mutex_lock(&kvm_cma_mutex);
-        for (;;) {
-                pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
-                                                    start, nr_chunk, mask);
-                if (pageno >= chunk_count)
-                        break;
-                pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
-                ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
-                if (ret == 0) {
-                        bitmap_set(cma->bitmap, pageno, nr_chunk);
-                        page = pfn_to_page(pfn);
-                        memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
-                        break;
-                } else if (ret != -EBUSY) {
-                        break;
-                }
-                pr_debug("%s(): memory range at %p is busy, retrying\n",
-                         __func__, pfn_to_page(pfn));
-                /* try again with a bit different memory target */
-                start = pageno + mask + 1;
-        }
-        mutex_unlock(&kvm_cma_mutex);
-        pr_debug("%s(): returned %p\n", __func__, page);
-        return page;
-}
-/**
- * kvm_release_cma() - release allocated pages for hash pagetable
- * @pages: Allocated pages.
- * @nr_pages: Number of allocated pages.
- *
- * This function releases memory allocated by kvm_alloc_cma().
- * It returns false when provided pages do not belong to contiguous area and
- * true otherwise.
- */
-bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
-{
-        unsigned long pfn;
-        unsigned long nr_chunk;
-        struct kvm_cma *cma = &kvm_cma_area;
-        if (!cma || !pages)
-                return false;
-        pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
-        pfn = page_to_pfn(pages);
-        if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-                return false;
-        VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
-        nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-        mutex_lock(&kvm_cma_mutex);
-        bitmap_clear(cma->bitmap,
-                     (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
-                     nr_chunk);
-        free_contig_range(pfn, nr_pages);
-        mutex_unlock(&kvm_cma_mutex);
-        return true;
-}
-static int __init kvm_cma_activate_area(unsigned long base_pfn,
-                                        unsigned long count)
-{
-        unsigned long pfn = base_pfn;
-        unsigned i = count >> pageblock_order;
-        struct zone *zone;
-        WARN_ON_ONCE(!pfn_valid(pfn));
-        zone = page_zone(pfn_to_page(pfn));
-        do {
-                unsigned j;
-                base_pfn = pfn;
-                for (j = pageblock_nr_pages; j; --j, pfn++) {
-                        WARN_ON_ONCE(!pfn_valid(pfn));
-                        /*
-                         * alloc_contig_range requires the pfn range
-                         * specified to be in the same zone. Make this
-                         * simple by forcing the entire CMA resv range
-                         * to be in the same zone.
-                         */
-                        if (page_zone(pfn_to_page(pfn)) != zone)
-                                return -EINVAL;
-                }
-                init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-        } while (--i);
-        return 0;
-}
-static int __init kvm_cma_init_reserved_areas(void)
-{
-        int bitmap_size, ret;
-        unsigned long chunk_count;
-        struct kvm_cma *cma = &kvm_cma_area;
-        pr_debug("%s()\n", __func__);
-        if (!cma->count)
-                return 0;
-        chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-        bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
-        cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-        if (!cma->bitmap)
-                return -ENOMEM;
-        ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
-        if (ret)
-                goto error;
-        return 0;
-error:
-        kfree(cma->bitmap);
-        return ret;
-}
-core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
deleted file mode 100644
index 655144f75fa5..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-#ifndef __POWERPC_KVM_CMA_ALLOC_H__
-#define __POWERPC_KVM_CMA_ALLOC_H__
-/*
- * Both RMA and Hash page allocation will be multiple of 256K.
- */
-#define KVM_CMA_CHUNK_ORDER     18
-extern struct page *kvm_alloc_cma(unsigned long nr_pages,
-                                  unsigned long align_pages);
-extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-extern long kvm_cma_declare_contiguous(phys_addr_t size,
-                                       phys_addr_t alignment) __init;
-#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2c8e90f5789e..e0f7a189c48e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
                return -EINVAL;
        /* this should work for most non-highmem platforms */
-        zone = pgdata->node_zones;
+        zone = pgdata->node_zones +
+                zone_for_memory(nid, start, size, 0);
        return __add_pages(nid, zone, start_pfn, nr_pages);
 }
diff --git a/arch/score/include/uapi/asm/ptrace.h b/arch/score/include/uapi/asm/ptrace.h
index f59771a3f127..5c5e794058be 100644
--- a/arch/score/include/uapi/asm/ptrace.h
+++ b/arch/score/include/uapi/asm/ptrace.h
@@ -4,17 +4,6 @@
 #define PTRACE_GETREGS          12
 #define PTRACE_SETREGS          13
-#define PC              32
-#define CONDITION       33
-#define ECR             34
-#define EMA             35
-#define CEH             36
-#define CEL             37
-#define COUNTER         38
-#define LDCR            39
-#define STCR            40
-#define PSR             41
 #define SINGLESTEP16_INSN       0x7006
 #define SINGLESTEP32_INSN       0x840C8000
 #define BREAKPOINT16_INSN       0x7002          /* work on SPG300 */
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index cfd5b90a8628..78bc97b1d027 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -12,9 +12,8 @@ config SH_DMA_IRQ_MULTI
        default y if CPU_SUBTYPE_SH7750  || CPU_SUBTYPE_SH7751  || \
                     CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
                     CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091  || \
-                     CPU_SUBTYPE_SH7763  || CPU_SUBTYPE_SH7764  || \
+                     CPU_SUBTYPE_SH7763  || CPU_SUBTYPE_SH7780  || \
-                     CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785  || \
+                     CPU_SUBTYPE_SH7785  || CPU_SUBTYPE_SH7760
-                     CPU_SUBTYPE_SH7760
 config SH_DMA_API
        depends on SH_DMA
diff --git a/arch/sh/include/asm/io_noioport.h b/arch/sh/include/asm/io_noioport.h
index 4d48f1436a63..c727e6ddf69e 100644
--- a/arch/sh/include/asm/io_noioport.h
+++ b/arch/sh/include/asm/io_noioport.h
@@ -34,6 +34,17 @@ static inline void outl(unsigned int x, unsigned long port)
        BUG();
 }
+static inline void __iomem *ioport_map(unsigned long port, unsigned int size)
+{
+        BUG();
+        return NULL;
+}
+static inline void ioport_unmap(void __iomem *addr)
+{
+        BUG();
+}
 #define inb_p(addr)     inb(addr)
 #define inw_p(addr)     inw(addr)
 #define inl_p(addr)     inl(addr)
diff --git a/arch/sh/include/cpu-sh4/cpu/dma-register.h b/arch/sh/include/cpu-sh4/cpu/dma-register.h
index 02788b6a03b7..9cd81e54056a 100644
--- a/arch/sh/include/cpu-sh4/cpu/dma-register.h
+++ b/arch/sh/include/cpu-sh4/cpu/dma-register.h
@@ -32,7 +32,6 @@
 #define CHCR_TS_HIGH_SHIFT      (20 - 2)        /* 2 bits for shifted low TS */
 #elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \
        defined(CONFIG_CPU_SUBTYPE_SH7763) || \
-        defined(CONFIG_CPU_SUBTYPE_SH7764) || \
        defined(CONFIG_CPU_SUBTYPE_SH7780) || \
        defined(CONFIG_CPU_SUBTYPE_SH7785)
 #define CHCR_TS_LOW_MASK        0x00000018
diff --git a/arch/sh/include/cpu-sh4a/cpu/dma.h b/arch/sh/include/cpu-sh4a/cpu/dma.h
index 89afb650ce25..8ceccceae844 100644
--- a/arch/sh/include/cpu-sh4a/cpu/dma.h
+++ b/arch/sh/include/cpu-sh4a/cpu/dma.h
@@ -14,8 +14,7 @@
 #define DMTE4_IRQ       evt2irq(0xb80)
 #define DMAE0_IRQ       evt2irq(0xbc0)  /* DMA Error IRQ*/
 #define SH_DMAC_BASE0   0xFE008020
-#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
+#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
-        defined(CONFIG_CPU_SUBTYPE_SH7764)
 #define DMTE0_IRQ       evt2irq(0x640)
 #define DMTE4_IRQ       evt2irq(0x780)
 #define DMAE0_IRQ       evt2irq(0x6c0)
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index f579dd528198..c187b9579c21 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -307,7 +307,7 @@ static struct clk_lookup lookups[] = {
        CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]),
        CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]),
-        CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[HWBLK_CMT]),
+        CLKDEV_ICK_ID("fck", "sh-cmt-32.0", &mstp_clks[HWBLK_CMT]),
        CLKDEV_DEV_ID("sh-wdt.0", &mstp_clks[HWBLK_RWDT]),
        CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[HWBLK_DMAC1]),
@@ -332,6 +332,8 @@ static struct clk_lookup lookups[] = {
        CLKDEV_CON_ID("tsif0", &mstp_clks[HWBLK_TSIF]),
        CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[HWBLK_USB1]),
        CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[HWBLK_USB0]),
+        CLKDEV_CON_ID("usb1", &mstp_clks[HWBLK_USB1]),
+        CLKDEV_CON_ID("usb0", &mstp_clks[HWBLK_USB0]),
        CLKDEV_CON_ID("2dg0", &mstp_clks[HWBLK_2DG]),
        CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[HWBLK_SDHI0]),
        CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[HWBLK_SDHI1]),
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 552c8fcf9416..d6d0a986c6e9 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -80,10 +80,8 @@ static int __init rtc_generic_init(void)
                return -ENODEV;
        pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
-        if (IS_ERR(pdev))
-                return PTR_ERR(pdev);
-        return 0;
+        return PTR_ERR_OR_ZERO(pdev);
 }
 module_init(rtc_generic_init);
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index 74c03ecc4871..ecfc6b0c1da1 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -67,10 +67,8 @@ static int __init asids_debugfs_init(void)
                                           NULL, &asids_debugfs_fops);
        if (!asids_dentry)
                return -ENOMEM;
-        if (IS_ERR(asids_dentry))
-                return PTR_ERR(asids_dentry);
-        return 0;
+        return PTR_ERR_OR_ZERO(asids_dentry);
 }
 module_init(asids_debugfs_init);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 2d089fe2cba9..2790b6a64157 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -495,8 +495,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
        pgdat = NODE_DATA(nid);
        /* We only have ZONE_NORMAL, so this is easy.. */
-        ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
+        ret = __add_pages(nid, pgdat->node_zones +
-                                start_pfn, nr_pages);
+                        zone_for_memory(nid, start, size, ZONE_NORMAL),
+                        start_pfn, nr_pages);
        if (unlikely(ret))
                printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 4918d91bc3a6..d19b13e3a59f 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
        area->nr_pages = npages;
        area->pages = pages;
-        if (map_vm_area(area, prot_rwx, &pages)) {
+        if (map_vm_area(area, prot_rwx, pages)) {
                vunmap(area->addr);
                goto error;
        }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbade870f90..a24194681513 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1218,7 +1218,8 @@ good_area:
        /*
         * If for any reason at all we couldn't handle the fault,
         * make sure we exit gracefully rather than endlessly redo
-         * the fault:
+         * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
+         * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
         */
        fault = handle_mm_fault(mm, vma, address, flags);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e39504878aec..7d05565ba781 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -825,7 +825,8 @@ void __init mem_init(void)
 int arch_add_memory(int nid, u64 start, u64 size)
 {
        struct pglist_data *pgdata = NODE_DATA(nid);
-        struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
+        struct zone *zone = pgdata->node_zones +
+                zone_for_memory(nid, start, size, ZONE_HIGHMEM);
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df1a9927ad29..5621c47d7a1a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -691,7 +691,8 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
 int arch_add_memory(int nid, u64 start, u64 size)
 {
        struct pglist_data *pgdat = NODE_DATA(nid);
-        struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+        struct zone *zone = pgdat->node_zones +
+                zone_for_memory(nid, start, size, ZONE_NORMAL);
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
        int ret;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index e65d400efd44..e1b92788c225 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -16,6 +16,7 @@ menuconfig ATA
        depends on BLOCK
        depends on !(M32R || M68K || S390) || BROKEN
        select SCSI
+        select GLOB
        ---help---
          If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or
          any other ATA device under Linux, say Y and make sure that you know
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 677c0c1b03bd..dbdc5d32343f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -59,6 +59,7 @@
 #include <linux/async.h>
 #include <linux/log2.h>
 #include <linux/slab.h>
+#include <linux/glob.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
@@ -4250,73 +4251,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { }
 };
-/**
- *      glob_match - match a text string against a glob-style pattern
- *      @text: the string to be examined
- *      @pattern: the glob-style pattern to be matched against
- *
- *      Either/both of text and pattern can be empty strings.
- *
- *      Match text against a glob-style pattern, with wildcards and simple sets:
- *
- *              ?       matches any single character.
- *              *       matches any run of characters.
- *              [xyz]   matches a single character from the set: x, y, or z.
- *              [a-d]   matches a single character from the range: a, b, c, or d.
- *              [a-d0-9] matches a single character from either range.
- *
- *      The special characters ?, [, -, or *, can be matched using a set, eg. [*]
- *      Behaviour with malformed patterns is undefined, though generally reasonable.
- *
- *      Sample patterns:  "SD1?",  "SD1[0-5]",  "*R0",  "SD*1?[012]*xx"
- *
- *      This function uses one level of recursion per '*' in pattern.
- *      Since it calls _nothing_ else, and has _no_ explicit local variables,
- *      this will not cause stack problems for any reasonable use here.
- *
- *      RETURNS:
- *      0 on match, 1 otherwise.
- */
-static int glob_match (const char *text, const char *pattern)
-{
-        do {
-                /* Match single character or a '?' wildcard */
-                if (*text == *pattern || *pattern == '?') {
-                        if (!*pattern++)
-                                return 0;  /* End of both strings: match */
-                } else {
-                        /* Match single char against a '[' bracketed ']' pattern set */
-                        if (!*text || *pattern != '[')
-                                break;  /* Not a pattern set */
-                        while (*++pattern && *pattern != ']' && *text != *pattern) {
-                                if (*pattern == '-' && *(pattern - 1) != '[')
-                                        if (*text > *(pattern - 1) && *text < *(pattern + 1)) {
-                                                ++pattern;
-                                                break;
-                                        }
-                        }
-                        if (!*pattern || *pattern == ']')
-                                return 1;  /* No match */
-                        while (*pattern && *pattern++ != ']');
-                }
-        } while (*++text && *pattern);
-        /* Match any run of chars against a '*' wildcard */
-        if (*pattern == '*') {
-                if (!*++pattern)
-                        return 0;  /* Match: avoid recursion at end of pattern */
-                /* Loop to handle additional pattern chars after the wildcard */
-                while (*text) {
-                        if (glob_match(text, pattern) == 0)
-                                return 0;  /* Remainder matched */
-                        ++text;  /* Absorb (match) this char and try again */
-                }
-        }
-        if (!*text && !*pattern)
-                return 0;  /* End of both strings: match */
-        return 1;  /* No match */
-}
 static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
 {
        unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -4327,10 +4261,10 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
        ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev));
        while (ad->model_num) {
-                if (!glob_match(model_num, ad->model_num)) {
+                if (glob_match(model_num, ad->model_num)) {
                        if (ad->model_rev == NULL)
                                return ad->horkage;
-                        if (!glob_match(model_rev, ad->model_rev))
+                        if (glob_match(model_rev, ad->model_rev))
                                return ad->horkage;
                }
                ad++;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 88500fed3c7a..4e7f0ff83ae7 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -289,16 +289,6 @@ config CMA_ALIGNMENT
          If unsure, leave the default value "8".
-config CMA_AREAS
-        int "Maximum count of the CMA device-private areas"
-        default 7
-        help
-          CMA allows to create CMA areas for particular devices. This parameter
-          sets the maximum number of such device private CMA areas in the
-          system.
-          If unsure, leave the default value "7".
 endif
 endmenu
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 6467c919c509..6606abdf880c 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -24,23 +24,9 @@
 #include <linux/memblock.h>
 #include <linux/err.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-isolation.h>
 #include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/mm_types.h>
 #include <linux/dma-contiguous.h>
+#include <linux/cma.h>
-struct cma {
-        unsigned long   base_pfn;
-        unsigned long   count;
-        unsigned long   *bitmap;
-        struct mutex    lock;
-};
-struct cma *dma_contiguous_default_area;
 #ifdef CONFIG_CMA_SIZE_MBYTES
 #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -48,6 +34,8 @@ struct cma *dma_contiguous_default_area;
 #define CMA_SIZE_MBYTES 0
 #endif
+struct cma *dma_contiguous_default_area;
 /*
 * Default global CMA area size can be defined in kernel's .config.
 * This is useful mainly for distro maintainers to create a kernel
@@ -154,65 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
        }
 }
-static DEFINE_MUTEX(cma_mutex);
-static int __init cma_activate_area(struct cma *cma)
-{
-        int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
-        unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
-        unsigned i = cma->count >> pageblock_order;
-        struct zone *zone;
-        cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-        if (!cma->bitmap)
-                return -ENOMEM;
-        WARN_ON_ONCE(!pfn_valid(pfn));
-        zone = page_zone(pfn_to_page(pfn));
-        do {
-                unsigned j;
-                base_pfn = pfn;
-                for (j = pageblock_nr_pages; j; --j, pfn++) {
-                        WARN_ON_ONCE(!pfn_valid(pfn));
-                        /*
-                         * alloc_contig_range requires the pfn range
-                         * specified to be in the same zone. Make this
-                         * simple by forcing the entire CMA resv range
-                         * to be in the same zone.
-                         */
-                        if (page_zone(pfn_to_page(pfn)) != zone)
-                                goto err;
-                }
-                init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-        } while (--i);
-        mutex_init(&cma->lock);
-        return 0;
-err:
-        kfree(cma->bitmap);
-        return -EINVAL;
-}
-static struct cma cma_areas[MAX_CMA_AREAS];
-static unsigned cma_area_count;
-static int __init cma_init_reserved_areas(void)
-{
-        int i;
-        for (i = 0; i < cma_area_count; i++) {
-                int ret = cma_activate_area(&cma_areas[i]);
-                if (ret)
-                        return ret;
-        }
-        return 0;
-}
-core_initcall(cma_init_reserved_areas);
 /**
 * dma_contiguous_reserve_area() - reserve custom contiguous area
 * @size: Size of the reserved area (in bytes),
@@ -234,72 +163,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
                                       phys_addr_t limit, struct cma **res_cma,
                                       bool fixed)
 {
-        struct cma *cma = &cma_areas[cma_area_count];
+        int ret;
-        phys_addr_t alignment;
-        int ret = 0;
-        pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
-                 (unsigned long)size, (unsigned long)base,
-                 (unsigned long)limit);
-        /* Sanity checks */
-        if (cma_area_count == ARRAY_SIZE(cma_areas)) {
-                pr_err("Not enough slots for CMA reserved regions!\n");
-                return -ENOSPC;
-        }
-        if (!size)
-                return -EINVAL;
-        /* Sanitise input arguments */
-        alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
-        base = ALIGN(base, alignment);
-        size = ALIGN(size, alignment);
-        limit &= ~(alignment - 1);
-        /* Reserve memory */
-        if (base && fixed) {
-                if (memblock_is_region_reserved(base, size) ||
-                    memblock_reserve(base, size) < 0) {
-                        ret = -EBUSY;
-                        goto err;
-                }
-        } else {
-                phys_addr_t addr = memblock_alloc_range(size, alignment, base,
-                                                        limit);
-                if (!addr) {
-                        ret = -ENOMEM;
-                        goto err;
-                } else {
-                        base = addr;
-                }
-        }
-        /*
-         * Each reserved area must be initialised later, when more kernel
-         * subsystems (like slab allocator) are available.
-         */
-        cma->base_pfn = PFN_DOWN(base);
-        cma->count = size >> PAGE_SHIFT;
-        *res_cma = cma;
-        cma_area_count++;
-        pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+        ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
-                (unsigned long)base);
+        if (ret)
+                return ret;
        /* Architecture specific contiguous memory fixup. */
-        dma_contiguous_early_fixup(base, size);
+        dma_contiguous_early_fixup(cma_get_base(*res_cma),
-        return 0;
+                                cma_get_size(*res_cma));
-err:
-        pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-        return ret;
-}
-static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
+        return 0;
-{
-        mutex_lock(&cma->lock);
-        bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
-        mutex_unlock(&cma->lock);
 }
 /**
@@ -316,62 +190,10 @@ static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
 struct page *dma_alloc_from_contiguous(struct device *dev, int count,
                                       unsigned int align)
 {
-        unsigned long mask, pfn, pageno, start = 0;
-        struct cma *cma = dev_get_cma_area(dev);
-        struct page *page = NULL;
-        int ret;
-        if (!cma || !cma->count)
-                return NULL;
        if (align > CONFIG_CMA_ALIGNMENT)
                align = CONFIG_CMA_ALIGNMENT;
-        pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+        return cma_alloc(dev_get_cma_area(dev), count, align);
-                 count, align);
-        if (!count)
-                return NULL;
-        mask = (1 << align) - 1;
-        for (;;) {
-                mutex_lock(&cma->lock);
-                pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
-                                                    start, count, mask);
-                if (pageno >= cma->count) {
-                        mutex_unlock(&cma->lock);
-                        break;
-                }
-                bitmap_set(cma->bitmap, pageno, count);
-                /*
-                 * It's safe to drop the lock here. We've marked this region for
-                 * our exclusive use. If the migration fails we will take the
-                 * lock again and unmark it.
-                 */
-                mutex_unlock(&cma->lock);
-                pfn = cma->base_pfn + pageno;
-                mutex_lock(&cma_mutex);
-                ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
-                mutex_unlock(&cma_mutex);
-                if (ret == 0) {
-                        page = pfn_to_page(pfn);
-                        break;
-                } else if (ret != -EBUSY) {
-                        clear_cma_bitmap(cma, pfn, count);
-                        break;
-                }
-                clear_cma_bitmap(cma, pfn, count);
-                pr_debug("%s(): memory range at %p is busy, retrying\n",
-                         __func__, pfn_to_page(pfn));
-                /* try again with a bit different memory target */
-                start = pageno + mask + 1;
-        }
-        pr_debug("%s(): returned %p\n", __func__, page);
-        return page;
 }
 /**
@@ -387,23 +209,5 @@ struct page *dma_alloc_from_contiguous(struct device *dev, int count,
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
                                 int count)
 {
-        struct cma *cma = dev_get_cma_area(dev);
+        return cma_release(dev_get_cma_area(dev), pages, count);
-        unsigned long pfn;
-        if (!cma || !pages)
-                return false;
-        pr_debug("%s(page %p)\n", __func__, (void *)pages);
-        pfn = page_to_pfn(pages);
-        if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-                return false;
-        VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
-        free_contig_range(pfn, count);
-        clear_cma_bitmap(cma, pfn, count);
-        return true;
 }
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 89f752dd8465..a2e13e250bba 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
         * attribute and need to set the online_type.
         */
        if (mem->online_type < 0)
-                mem->online_type = ONLINE_KEEP;
+                mem->online_type = MMOP_ONLINE_KEEP;
        ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
@@ -315,23 +315,23 @@ store_mem_state(struct device *dev,
        if (ret)
                return ret;
-        if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
+        if (sysfs_streq(buf, "online_kernel"))
-                online_type = ONLINE_KERNEL;
+                online_type = MMOP_ONLINE_KERNEL;
-        else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
+        else if (sysfs_streq(buf, "online_movable"))
-                online_type = ONLINE_MOVABLE;
+                online_type = MMOP_ONLINE_MOVABLE;
-        else if (!strncmp(buf, "online", min_t(int, count, 6)))
+        else if (sysfs_streq(buf, "online"))
-                online_type = ONLINE_KEEP;
+                online_type = MMOP_ONLINE_KEEP;
-        else if (!strncmp(buf, "offline", min_t(int, count, 7)))
+        else if (sysfs_streq(buf, "offline"))
-                online_type = -1;
+                online_type = MMOP_OFFLINE;
        else {
                ret = -EINVAL;
                goto err;
        }
        switch (online_type) {
-        case ONLINE_KERNEL:
+        case MMOP_ONLINE_KERNEL:
-        case ONLINE_MOVABLE:
+        case MMOP_ONLINE_MOVABLE:
-        case ONLINE_KEEP:
+        case MMOP_ONLINE_KEEP:
                /*
                 * mem->online_type is not protected so there can be a
                 * race here.  However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
                mem->online_type = online_type;
                ret = device_online(&mem->dev);
                break;
-        case -1:
+        case MMOP_OFFLINE:
                ret = device_offline(&mem->dev);
                break;
        default:
@@ -406,7 +406,9 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
        int i, ret;
        unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
-        phys_addr = simple_strtoull(buf, NULL, 0);
+        ret = kstrtoull(buf, 0, &phys_addr);
+        if (ret)
+                return ret;
        if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
                return -EINVAL;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 8f7ed9933a7c..c6d3ae05f1ca 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -126,7 +126,7 @@ static ssize_t node_read_meminfo(struct device *dev,
                       nid, K(node_page_state(nid, NR_FILE_PAGES)),
                       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
                       nid, K(node_page_state(nid, NR_ANON_PAGES)),
-                       nid, K(node_page_state(nid, NR_SHMEM)),
+                       nid, K(i.sharedram),
                       nid, node_page_state(nid, NR_KERNEL_STACK) *
                                THREAD_SIZE / 1024,
                       nid, K(node_page_state(nid, NR_PAGETABLE)),
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 36e54be402df..dfa4024c448a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev,
 static int zram_test_flag(struct zram_meta *meta, u32 index,
                        enum zram_pageflags flag)
 {
-        return meta->table[index].flags & BIT(flag);
+        return meta->table[index].value & BIT(flag);
 }
 static void zram_set_flag(struct zram_meta *meta, u32 index,
                        enum zram_pageflags flag)
 {
-        meta->table[index].flags |= BIT(flag);
+        meta->table[index].value |= BIT(flag);
 }
 static void zram_clear_flag(struct zram_meta *meta, u32 index,
                        enum zram_pageflags flag)
 {
-        meta->table[index].flags &= ~BIT(flag);
+        meta->table[index].value &= ~BIT(flag);
+}
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+        return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+static void zram_set_obj_size(struct zram_meta *meta,
+                                        u32 index, size_t size)
+{
+        unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+        meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
 }
 static inline int is_partial_io(struct bio_vec *bvec)
@@ -255,7 +268,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
                goto free_table;
        }
-        rwlock_init(&meta->tb_lock);
        return meta;
 free_table:
@@ -304,7 +316,12 @@ static void handle_zero_page(struct bio_vec *bvec)
        flush_dcache_page(page);
 }
-/* NOTE: caller should hold meta->tb_lock with write-side */
+/*
+ * To protect concurrent access to the same index entry,
+ * caller should hold this table index entry's bit_spinlock to
+ * indicate this index entry is accessing.
+ */
 static void zram_free_page(struct zram *zram, size_t index)
 {
        struct zram_meta *meta = zram->meta;
@@ -324,11 +341,12 @@ static void zram_free_page(struct zram *zram, size_t index)
        zs_free(meta->mem_pool, handle);
-        atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size);
+        atomic64_sub(zram_get_obj_size(meta, index),
+                        &zram->stats.compr_data_size);
        atomic64_dec(&zram->stats.pages_stored);
        meta->table[index].handle = 0;
-        meta->table[index].size = 0;
+        zram_set_obj_size(meta, index, 0);
 }
 static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
@@ -337,14 +355,14 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
        unsigned char *cmem;
        struct zram_meta *meta = zram->meta;
        unsigned long handle;
-        u16 size;
+        size_t size;
-        read_lock(&meta->tb_lock);
+        bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        handle = meta->table[index].handle;
-        size = meta->table[index].size;
+        size = zram_get_obj_size(meta, index);
        if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
-                read_unlock(&meta->tb_lock);
+                bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                clear_page(mem);
                return 0;
        }
@@ -355,7 +373,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
        else
                ret = zcomp_decompress(zram->comp, cmem, size, mem);
        zs_unmap_object(meta->mem_pool, handle);
-        read_unlock(&meta->tb_lock);
+        bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
        /* Should NEVER happen. Return bio error if it does. */
        if (unlikely(ret)) {
@@ -376,14 +394,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
        struct zram_meta *meta = zram->meta;
        page = bvec->bv_page;
-        read_lock(&meta->tb_lock);
+        bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        if (unlikely(!meta->table[index].handle) ||
                        zram_test_flag(meta, index, ZRAM_ZERO)) {
-                read_unlock(&meta->tb_lock);
+                bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                handle_zero_page(bvec);
                return 0;
        }
-        read_unlock(&meta->tb_lock);
+        bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
        if (is_partial_io(bvec))
                /* Use  a temporary buffer to decompress the page */
@@ -461,10 +479,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
        if (page_zero_filled(uncmem)) {
                kunmap_atomic(user_mem);
                /* Free memory associated with this sector now. */
-                write_lock(&zram->meta->tb_lock);
+                bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
                zram_free_page(zram, index);
                zram_set_flag(meta, index, ZRAM_ZERO);
-                write_unlock(&zram->meta->tb_lock);
+                bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                atomic64_inc(&zram->stats.zero_pages);
                ret = 0;
@@ -514,12 +532,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
         * Free memory associated with this sector
         * before overwriting unused sectors.
         */
-        write_lock(&zram->meta->tb_lock);
+        bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        zram_free_page(zram, index);
        meta->table[index].handle = handle;
-        meta->table[index].size = clen;
+        zram_set_obj_size(meta, index, clen);
-        write_unlock(&zram->meta->tb_lock);
+        bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
        /* Update stats */
        atomic64_add(clen, &zram->stats.compr_data_size);
@@ -560,6 +578,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
                             int offset, struct bio *bio)
 {
        size_t n = bio->bi_iter.bi_size;
+        struct zram_meta *meta = zram->meta;
        /*
         * zram manages data in physical block size units. Because logical block
@@ -580,13 +599,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
        }
        while (n >= PAGE_SIZE) {
-                /*
+                bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
-                 * Discard request can be large so the lock hold times could be
-                 * lengthy.  So take the lock once per page.
-                 */
-                write_lock(&zram->meta->tb_lock);
                zram_free_page(zram, index);
-                write_unlock(&zram->meta->tb_lock);
+                bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                index++;
                n -= PAGE_SIZE;
        }
@@ -821,9 +836,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
        zram = bdev->bd_disk->private_data;
        meta = zram->meta;
-        write_lock(&meta->tb_lock);
+        bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        zram_free_page(zram, index);
-        write_unlock(&meta->tb_lock);
+        bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
        atomic64_inc(&zram->stats.notify_free);
 }
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 7f21c145e317..5b0afde729cd 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
 /*-- End of configurable params */
 #define SECTOR_SHIFT            9
-#define SECTOR_SIZE             (1 << SECTOR_SHIFT)
 #define SECTORS_PER_PAGE_SHIFT  (PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTORS_PER_PAGE        (1 << SECTORS_PER_PAGE_SHIFT)
 #define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -51,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
 #define ZRAM_SECTOR_PER_LOGICAL_BLOCK   \
        (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
-/* Flags for zram pages (table[page_no].flags) */
+/*
+ * The lower ZRAM_FLAG_SHIFT bits of table.value is for
+ * object size (excluding header), the higher bits is for
+ * zram_pageflags.
+ *
+ * zram is mainly used for memory efficiency so we want to keep memory
+ * footprint small so we can squeeze size and flags into a field.
+ * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
+ * the higher bits is for zram_pageflags.
+ */
+#define ZRAM_FLAG_SHIFT 24
+/* Flags for zram pages (table[page_no].value) */
 enum zram_pageflags {
        /* Page consists entirely of zeros */
-        ZRAM_ZERO,
+        ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1,
+        ZRAM_ACCESS,    /* page in now accessed */
        __NR_ZRAM_PAGEFLAGS,
 };
@@ -62,11 +75,10 @@ enum zram_pageflags {
 /*-- Data structures */
 /* Allocated for each disk page */
-struct table {
+struct zram_table_entry {
        unsigned long handle;
-        u16 size;       /* object size (excluding header) */
+        unsigned long value;
-        u8 flags;
+};
-} __aligned(4);
 struct zram_stats {
        atomic64_t compr_data_size;     /* compressed size of pages stored */
@@ -81,8 +93,7 @@ struct zram_stats {
 };
 struct zram_meta {
-        rwlock_t tb_lock;       /* protect table */
+        struct zram_table_entry *table;
-        struct table *table;
        struct zs_pool *mem_pool;
 };
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 17cf96c45f2b..79f18e6d9c4f 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -286,7 +286,11 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 {
        struct firmware_map_entry *entry;
-        entry = firmware_map_find_entry_bootmem(start, end, type);
+        entry = firmware_map_find_entry(start, end - 1, type);
+        if (entry)
+                return 0;
+        entry = firmware_map_find_entry_bootmem(start, end - 1, type);
        if (!entry) {
                entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
                if (!entry)
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 7e4bae760e27..c3b80fd65d62 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
                parent = &entry->head;
        }
        if (parent) {
-                hlist_add_after_rcu(parent, &item->head);
+                hlist_add_behind_rcu(&item->head, parent);
        } else {
                hlist_add_head_rcu(&item->head, h_list);
        }
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index ae208f612198..cccef87963e0 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm,
                        atk_debugfs_gitm_get,
                        NULL,
-                        "0x%08llx\n")
+                        "0x%08llx\n");
 static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj)
 {
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 0bf1e4edf04d..6590558d1d31 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock);
 static __init int map_switcher(void)
 {
        int i, err;
-        struct page **pagep;
        /*
         * Map the Switcher in to high memory.
@@ -110,11 +109,9 @@ static __init int map_switcher(void)
         * This code actually sets up the pages we've allocated to appear at
         * switcher_addr.  map_vm_area() takes the vma we allocated above, the
         * kind of pages we're mapping (kernel pages), and a pointer to our
-         * array of struct pages.  It increments that pointer, but we don't
+         * array of struct pages.
-         * care.
         */
-        pagep = lg_switcher_pages;
+        err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
-        err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
        if (err) {
                printk("lguest: map_vm_area failed: %i\n", err);
                goto free_vma;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 681a9e81ff51..e8ba7470700a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
        /* add filter to the list */
        if (parent)
-                hlist_add_after(&parent->fdir_node, &input->fdir_node);
+                hlist_add_behind(&input->fdir_node, &parent->fdir_node);
        else
                hlist_add_head(&input->fdir_node,
                               &pf->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 94a1c07efeb0..e4100b5737b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
        /* add filter to the list */
        if (parent)
-                hlist_add_after(&parent->fdir_node, &input->fdir_node);
+                hlist_add_behind(&input->fdir_node, &parent->fdir_node);
        else
                hlist_add_head(&input->fdir_node,
                               &adapter->fdir_filter_list);
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 02b0379ae550..4f34dc0095b5 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
        for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
                int ret;
-                struct page **page_array_ptr;
                page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
@@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
                }
                tmp_area.addr = page_addr;
                tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
-                page_array_ptr = page;
+                ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
-                ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
                if (ret) {
                        pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
                               proc->pid, page_addr);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c
index 5dde79418297..8ef1deb59d4a 100644
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                            cfs_hash_dhead_t, dh_head);
        if (dh->dh_tail != NULL) /* not empty */
-                hlist_add_after(dh->dh_tail, hnode);
+                hlist_add_behind(hnode, dh->dh_tail);
        else /* empty list */
                hlist_add_head(hnode, &dh->dh_head);
        dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                                cfs_hash_dhead_dep_t, dd_head);
        if (dh->dd_tail != NULL) /* not empty */
-                hlist_add_after(dh->dd_tail, hnode);
+                hlist_add_behind(hnode, dh->dd_tail);
        else /* empty list */
                hlist_add_head(hnode, &dh->dd_head);
        dh->dd_tail = hnode;
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 454b65898e2c..42bad18c66c9 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = {
 static void moom_callback(struct work_struct *ignored)
 {
-        out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL,
+        out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL,
                      0, NULL, true);
 }
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index a31b83c5cbd9..b39d487ccfb0 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
        return ret;
 }
-struct ctl_table fscache_sysctls[] = {
+static struct ctl_table fscache_sysctls[] = {
        {
                .procname       = "object_max_active",
                .data           = &fscache_object_max_active,
@@ -87,7 +87,7 @@ struct ctl_table fscache_sysctls[] = {
        {}
 };
-struct ctl_table fscache_sysctls_root[] = {
+static struct ctl_table fscache_sysctls_root[] = {
        {
                .procname       = "fscache",
                .mode           = 0555,
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 48140315f627..380d86e1ab45 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1019,11 +1019,11 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
 /**
 * logfs_is_valid_block - check whether this block is still valid
 *
- * @sb  - superblock
+ * @sb:         superblock
- * @ofs - block physical offset
+ * @ofs:        block physical offset
- * @ino - block inode number
+ * @ino:        block inode number
- * @bix - block index
+ * @bix:        block index
- * @level - block level
+ * @gc_level:   block level
 *
 * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
 * become invalid once the journal is written.
@@ -2226,10 +2226,9 @@ void btree_write_block(struct logfs_block *block)
 *
 * @inode:              parent inode (ifile or directory)
 * @buf:                object to write (inode or dentry)
- * @n:                  object size
+ * @count:              object size
- * @_pos:               object number (file position in blocks/objects)
+ * @bix:                block index
 * @flags:              write flags
- * @lock:               0 if write lock is already taken, 1 otherwise
 * @shadow_tree:        shadow below this inode
 *
 * FIXME: All caller of this put a 200-300 byte variable on the stack,
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..2a1447c946e7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
        list_splice(&head, n->list.prev);
        if (shadows)
-                hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+                hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
        else
                hlist_add_head_rcu(&mnt->mnt_hash,
                                m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index ee9cb3795c2b..30d3addfad75 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group,
        wait_event(group->fanotify_data.access_waitq, event->response ||
                                atomic_read(&group->fanotify_data.bypass_perm));
-        if (!event->response) /* bypass_perm set */
+        if (!event->response) { /* bypass_perm set */
+                /*
+                 * Event was canceled because group is being destroyed. Remove
+                 * it from group's event list because we are responsible for
+                 * freeing the permission event.
+                 */
+                fsnotify_remove_event(group, &event->fae.fse);
                return 0;
+        }
        /* userspace responded, convert to something usable */
        switch (event->response) {
@@ -210,7 +217,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
                return -ENOMEM;
        fsn_event = &event->fse;
-        ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge);
+        ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
        if (ret) {
                /* Permission events shouldn't be merged */
                BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3fdc8a3e1134..b13992a41bd9 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
        /* held the notification_mutex the whole time, so this is the
         * same event we peeked above */
-        return fsnotify_remove_notify_event(group);
+        return fsnotify_remove_first_event(group);
 }
 static int create_fd(struct fsnotify_group *group,
@@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
        struct fanotify_perm_event_info *event, *next;
+        /*
+         * There may be still new events arriving in the notification queue
+         * but since userspace cannot use fanotify fd anymore, no event can
+         * enter or leave access_list by now.
+         */
        spin_lock(&group->fanotify_data.access_lock);
        atomic_inc(&group->fanotify_data.bypass_perm);
@@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
        }
        spin_unlock(&group->fanotify_data.access_lock);
+        /*
+         * Since bypass_perm is set, newly queued events will not wait for
+         * access response. Wake up the already sleeping ones now.
+         * synchronize_srcu() in fsnotify_destroy_group() will wait for all
+         * processes sleeping in fanotify_handle_event() waiting for access
+         * response and thus also for all permission events to be freed.
+         */
        wake_up(&group->fanotify_data.access_waitq);
 #endif
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
        BUG_ON(last == NULL);
        /* mark should be the last entry.  last is the current last entry */
-        hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+        hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
 out:
        fsnotify_recalc_inode_mask_locked(inode);
        spin_unlock(&inode->i_lock);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 43ab1e1a07a2..0f88bc0b4e6c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group,
        if (len)
                strcpy(event->name, file_name);
-        ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
+        ret = fsnotify_add_event(group, fsn_event, inotify_merge);
        if (ret) {
                /* Our event wasn't used in the end. Free it. */
                fsnotify_destroy_event(group, fsn_event);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index cc423a30a0c8..daf76652fe58 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
        if (fsnotify_notify_queue_is_empty(group))
                return NULL;
-        event = fsnotify_peek_notify_event(group);
+        event = fsnotify_peek_first_event(group);
        pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
        /* held the notification_mutex the whole time, so this is the
         * same event we peeked above */
-        fsnotify_remove_notify_event(group);
+        fsnotify_remove_first_event(group);
        return event;
 }
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 1e58402171a5..a95d8e037aeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
        /* Overflow events are per-group and we don't want to free them */
        if (!event || event->mask == FS_Q_OVERFLOW)
                return;
+        /* If the event is still queued, we have a problem... */
+        WARN_ON(!list_empty(&event->list));
        group->ops->free_event(event);
 }
@@ -83,10 +84,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
 * added to the queue, 1 if the event was merged with some other queued event,
 * 2 if the queue of events has overflown.
 */
-int fsnotify_add_notify_event(struct fsnotify_group *group,
+int fsnotify_add_event(struct fsnotify_group *group,
-                              struct fsnotify_event *event,
+                       struct fsnotify_event *event,
-                              int (*merge)(struct list_head *,
+                       int (*merge)(struct list_head *,
-                                           struct fsnotify_event *))
+                                    struct fsnotify_event *))
 {
        int ret = 0;
        struct list_head *list = &group->notification_list;
@@ -125,10 +126,25 @@ queue:
 }
 /*
+ * Remove @event from group's notification queue. It is the responsibility of
+ * the caller to destroy the event.
+ */
+void fsnotify_remove_event(struct fsnotify_group *group,
+                           struct fsnotify_event *event)
+{
+        mutex_lock(&group->notification_mutex);
+        if (!list_empty(&event->list)) {
+                list_del_init(&event->list);
+                group->q_len--;
+        }
+        mutex_unlock(&group->notification_mutex);
+}
+/*
 * Remove and return the first event from the notification list.  It is the
 * responsibility of the caller to destroy the obtained event
 */
-struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
 {
        struct fsnotify_event *event;
@@ -140,7 +156,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
                                 struct fsnotify_event, list);
        /*
         * We need to init list head for the case of overflow event so that
-         * check in fsnotify_add_notify_events() works
+         * check in fsnotify_add_event() works
         */
        list_del_init(&event->list);
        group->q_len--;
@@ -149,9 +165,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 }
 /*
- * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ * This will not remove the event, that must be done with
+ * fsnotify_remove_first_event()
 */
-struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
 {
        BUG_ON(!mutex_is_locked(&group->notification_mutex));
@@ -169,7 +186,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
        mutex_lock(&group->notification_mutex);
        while (!fsnotify_notify_queue_is_empty(group)) {
-                event = fsnotify_remove_notify_event(group);
+                event = fsnotify_remove_first_event(group);
                fsnotify_destroy_event(group, event);
        }
        mutex_unlock(&group->notification_mutex);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
        BUG_ON(last == NULL);
        /* mark should be the last entry.  last is the current last entry */
-        hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+        hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
 out:
        fsnotify_recalc_vfsmount_mask_locked(mnt);
        spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5c9e2c81cb11..f5ec1ce7a532 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -74,8 +74,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
 * ntfs_attr_extend_initialized - extend the initialized size of an attribute
 * @ni:                 ntfs inode of the attribute to extend
 * @new_init_size:      requested new initialized size in bytes
- * @cached_page:        store any allocated but unused page here
- * @lru_pvec:           lru-buffering pagevec of the caller
 *
 * Extend the initialized size of an attribute described by the ntfs inode @ni
 * to @new_init_size bytes.  This involves zeroing any non-sparse space between
@@ -395,7 +393,6 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
 * @nr_pages:   number of page cache pages to obtain
 * @pages:      array of pages in which to return the obtained page cache pages
 * @cached_page: allocated but as yet unused page
- * @lru_pvec:   lru-buffering pagevec of caller
 *
 * Obtain @nr_pages locked page cache pages from the mapping @mapping and
 * starting at index @index.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9d8fcf2f3b94..a93bf9892256 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4961,6 +4961,15 @@ leftright:
                el = path_leaf_el(path);
                split_index = ocfs2_search_extent_list(el, cpos);
+                if (split_index == -1) {
+                        ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+                                        "Owner %llu has an extent at cpos %u "
+                                        "which can no longer be found.\n",
+                                        (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+                                        cpos);
+                        ret = -EROFS;
+                        goto out;
+                }
                goto leftright;
        }
 out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
        el = path_leaf_el(left_path);
        index = ocfs2_search_extent_list(el, cpos);
-        if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+        if (index == -1) {
                ocfs2_error(sb,
                            "Owner %llu has an extent at cpos %u which can no "
                            "longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
        el = path_leaf_el(path);
        index = ocfs2_search_extent_list(el, cpos);
-        if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+        if (index == -1) {
                ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
                            "Owner %llu has an extent at cpos %u which can no "
                            "longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
                el = path_leaf_el(path);
                index = ocfs2_search_extent_list(el, cpos);
-                if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+                if (index == -1) {
                        ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
                                    "Owner %llu: split at cpos %u lost record.",
                                    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 39efc5057a36..3fcf205ee900 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
                                goto bail;
                        }
-                        if (total_backoff >
+                        if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
-                            msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
                                status = -ERESTARTSYS;
                                mlog(ML_NOTICE, "Timed out joining dlm domain "
                                     "%s after %u msecs\n", dlm->name,
-                                     jiffies_to_msecs(total_backoff));
+                                     total_backoff);
                                goto bail;
                        }
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 82abf0cc9a12..3ec906ef5d9a 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2405,6 +2405,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
        if (res->state & DLM_LOCK_RES_MIGRATING)
                return 0;
+        /* delay migration when the lockres is in RECOCERING state */
+        if (res->state & DLM_LOCK_RES_RECOVERING)
+                return 0;
        if (res->owner != dlm->node_num)
                return 0;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 599eb4c4c8be..6219aaadeb08 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
        el = path_leaf_el(path);
        index = ocfs2_search_extent_list(el, cpos);
-        if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+        if (index == -1) {
                ocfs2_error(inode->i_sb,
                            "Inode %llu has an extent at cpos %u which can no "
                            "longer be found.\n",
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 636aab69ead5..d81f6e2a97f5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
        el = path_leaf_el(path);
        index = ocfs2_search_extent_list(el, cpos);
-        if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+        if (index == -1) {
                ocfs2_error(sb,
                            "Inode %llu has an extent at cpos %u which can no "
                            "longer be found.\n",
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 1424c151cccc..a88b2a4fcc85 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
        trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
-        si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
+        si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
                            GFP_KERNEL);
        if (!si->si_bh) {
                status = -ENOMEM;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 7445af0b1aa3..aa1eee06420f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -168,7 +168,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                K(global_page_state(NR_WRITEBACK)),
                K(global_page_state(NR_ANON_PAGES)),
                K(global_page_state(NR_FILE_MAPPED)),
-                K(global_page_state(NR_SHMEM)),
+                K(i.sharedram),
                K(global_page_state(NR_SLAB_RECLAIMABLE) +
                                global_page_state(NR_SLAB_UNRECLAIMABLE)),
                K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cfa63ee92c96..dfc791c42d64 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -925,15 +925,30 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
                                struct mm_walk *walk)
 {
        struct pagemapread *pm = walk->private;
-        unsigned long addr;
+        unsigned long addr = start;
        int err = 0;
-        pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
-        for (addr = start; addr < end; addr += PAGE_SIZE) {
+        while (addr < end) {
-                err = add_to_pagemap(addr, &pme, pm);
+                struct vm_area_struct *vma = find_vma(walk->mm, addr);
-                if (err)
+                pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
-                        break;
+                unsigned long vm_end;
+                if (!vma) {
+                        vm_end = end;
+                } else {
+                        vm_end = min(end, vma->vm_end);
+                        if (vma->vm_flags & VM_SOFTDIRTY)
+                                pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
+                }
+                for (; addr < vm_end; addr += PAGE_SIZE) {
+                        err = add_to_pagemap(addr, &pme, pm);
+                        if (err)
+                                goto out;
+                }
        }
+out:
        return err;
 }
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 62a0de6632e1..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
        pages = end_index - start_index + 1;
-        page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+        page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
        if (page == NULL)
                return res;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 031c8d67fd51..5056babe00df 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
 * the filesystem.
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
                return err;
        }
-        printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
+        pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
-                "Phillip Lougher\n");
        return 0;
 }
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7ad634501e48..e1c8d080c427 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -88,32 +88,32 @@
 * lib/bitmap.c provides these functions:
 */
-extern int __bitmap_empty(const unsigned long *bitmap, int bits);
+extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
-extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
-                        const unsigned long *bitmap2, int bits);
+                          const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
-                        int bits);
+                        unsigned int nbits);
 extern void __bitmap_shift_right(unsigned long *dst,
                        const unsigned long *src, int shift, int bits);
 extern void __bitmap_shift_left(unsigned long *dst,
                        const unsigned long *src, int shift, int bits);
 extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                        const unsigned long *bitmap2, int bits);
+                        const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                        const unsigned long *bitmap2, int bits);
+                        const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                        const unsigned long *bitmap2, int bits);
+                        const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                        const unsigned long *bitmap2, int bits);
+                        const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_intersects(const unsigned long *bitmap1,
-                        const unsigned long *bitmap2, int bits);
+                        const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
-                        const unsigned long *bitmap2, int bits);
+                        const unsigned long *bitmap2, unsigned int nbits);
-extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-extern void bitmap_set(unsigned long *map, int i, int len);
+extern void bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void bitmap_clear(unsigned long *map, int start, int nr);
+extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
 extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
                                         unsigned long size,
                                         unsigned long start,
@@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
                const unsigned long *relmap, int bits);
 extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
                int sz, int bits);
-extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
+extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
-extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
+extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
-extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
@@ -188,15 +188,15 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 }
 static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
-                        const unsigned long *src2, int nbits)
+                        const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
-                return (*dst = *src1 & *src2) != 0;
+                return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        return __bitmap_and(dst, src1, src2, nbits);
 }
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
-                        const unsigned long *src2, int nbits)
+                        const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                *dst = *src1 | *src2;
@@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 }
 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
-                        const unsigned long *src2, int nbits)
+                        const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                *dst = *src1 ^ *src2;
@@ -214,24 +214,24 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 }
 static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
-                        const unsigned long *src2, int nbits)
+                        const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
-                return (*dst = *src1 & ~(*src2)) != 0;
+                return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        return __bitmap_andnot(dst, src1, src2, nbits);
 }
 static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
-                        int nbits)
+                        unsigned int nbits)
 {
        if (small_const_nbits(nbits))
-                *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+                *dst = ~(*src);
        else
                __bitmap_complement(dst, src, nbits);
 }
 static inline int bitmap_equal(const unsigned long *src1,
-                        const unsigned long *src2, int nbits)
+                        const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1,
 }
 static inline int bitmap_intersects(const unsigned long *src1,
-                        const unsigned long *src2, int nbits)
+                        const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
 }
 static inline int bitmap_subset(const unsigned long *src1,
-                        const unsigned long *src2, int nbits)
+                        const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1,
                return __bitmap_subset(src1, src2, nbits);
 }
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
 {
        if (small_const_nbits(nbits))
                return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
                return __bitmap_empty(src, nbits);
 }
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
                return __bitmap_full(src, nbits);
 }
-static inline int bitmap_weight(const unsigned long *src, int nbits)
+static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
                        const unsigned long *src, int n, int nbits)
 {
        if (small_const_nbits(nbits))
-                *dst = *src >> n;
+                *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
        else
                __bitmap_shift_right(dst, src, n, nbits);
 }
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 0846e6b931ce..89f67c1c3160 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -2,7 +2,7 @@
 #define _LINUX_BYTEORDER_GENERIC_H
 /*
- * linux/byteorder_generic.h
+ * linux/byteorder/generic.h
 * Generic Byte-reordering support
 *
 * The "... p" macros, like le64_to_cpup, can be used with pointers
diff --git a/include/linux/cma.h b/include/linux/cma.h
new file mode 100644
index 000000000000..371b93042520
--- /dev/null
+++ b/include/linux/cma.h
@@ -0,0 +1,27 @@
+#ifndef __CMA_H__
+#define __CMA_H__
+/*
+ * There is always at least global CMA area and a few optional
+ * areas configured in kernel .config.
+ */
+#ifdef CONFIG_CMA_AREAS
+#define MAX_CMA_AREAS   (1 + CONFIG_CMA_AREAS)
+#else
+#define MAX_CMA_AREAS   (0)
+#endif
+struct cma;
+extern phys_addr_t cma_get_base(struct cma *cma);
+extern unsigned long cma_get_size(struct cma *cma);
+extern int __init cma_declare_contiguous(phys_addr_t size,
+                        phys_addr_t base, phys_addr_t limit,
+                        phys_addr_t alignment, unsigned int order_per_bit,
+                        bool fixed, struct cma **res_cma);
+extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
+extern bool cma_release(struct cma *cma, struct page *pages, int count);
+#endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 772eab5d524a..569bbd039896 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -53,18 +53,13 @@
 #ifdef __KERNEL__
+#include <linux/device.h>
 struct cma;
 struct page;
-struct device;
 #ifdef CONFIG_DMA_CMA
-/*
- * There is always at least global CMA area and a few optional device
- * private areas configured in kernel .config.
- */
-#define MAX_CMA_AREAS   (1 + CONFIG_CMA_AREAS)
 extern struct cma *dma_contiguous_default_area;
 static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 #else
-#define MAX_CMA_AREAS   (0)
 static inline struct cma *dev_get_cma_area(struct device *dev)
 {
        return NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2daccaf4b547..1ab6c6913040 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2688,7 +2688,7 @@ static const struct file_operations __fops = {				\
        .read    = simple_attr_read,                                    \
        .write   = simple_attr_write,                                   \
        .llseek  = generic_file_llseek,                                 \
-};
+}
 static inline __printf(1, 2)
 void __simple_attr_check_format(const char *fmt, ...)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index fc7718c6bd3e..ca060d7c4fa6 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -322,16 +322,18 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
 extern void fsnotify_destroy_event(struct fsnotify_group *group,
                                   struct fsnotify_event *event);
 /* attach the event to the group notification queue */
-extern int fsnotify_add_notify_event(struct fsnotify_group *group,
+extern int fsnotify_add_event(struct fsnotify_group *group,
-                                     struct fsnotify_event *event,
+                              struct fsnotify_event *event,
-                                     int (*merge)(struct list_head *,
+                              int (*merge)(struct list_head *,
-                                                  struct fsnotify_event *));
+                                           struct fsnotify_event *));
+/* Remove passed event from groups notification queue */
+extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
 /* return AND dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group);
 /* functions used to manipulate the marks attached to inodes */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6eb1fb37de9a..5e7219dc0fae 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
 void free_pages_exact(void *virt, size_t size);
 /* This is different from alloc_pages_exact_node !!! */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 #define __get_free_page(gfp_mask) \
                __get_free_pages((gfp_mask), 0)
diff --git a/include/linux/glob.h b/include/linux/glob.h
new file mode 100644
index 000000000000..861d8347d08e
--- /dev/null
+++ b/include/linux/glob.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_GLOB_H
+#define _LINUX_GLOB_H
+#include <linux/types.h>        /* For bool */
+#include <linux/compiler.h>     /* For __pure */
+bool __pure glob_match(char const *pat, char const *str);
+#endif  /* _LINUX_GLOB_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 7fb31da45d03..9286a46b7d69 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -93,7 +93,7 @@ static inline int kmap_atomic_idx_push(void)
 #ifdef CONFIG_DEBUG_HIGHMEM
        WARN_ON_ONCE(in_irq() && !irqs_disabled());
-        BUG_ON(idx > KM_TYPE_NR);
+        BUG_ON(idx >= KM_TYPE_NR);
 #endif
        return idx;
 }
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b826239bdce0..63579cb8d3dc 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 #endif /* CONFIG_DEBUG_VM */
 extern unsigned long transparent_hugepage_flags;
-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-                          pmd_t *dst_pmd, pmd_t *src_pmd,
-                          struct vm_area_struct *vma,
-                          unsigned long addr, unsigned long end);
 extern int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a23c096b3080..6e6d338641fe 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
 #endif
 extern unsigned long hugepages_treat_as_movable;
-extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 extern struct list_head huge_boot_pages;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a9e2268ecccb..3dc22abbc68a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -493,11 +493,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
        return buf;
 }
-static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
-{
-        return hex_byte_pack(buf, byte);
-}
 extern int hex_to_bin(char ch);
 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
diff --git a/include/linux/klist.h b/include/linux/klist.h
index a370ce57cf1d..61e5b723ae73 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -44,7 +44,7 @@ struct klist_node {
 extern void klist_add_tail(struct klist_node *n, struct klist *k);
 extern void klist_add_head(struct klist_node *n, struct klist *k);
-extern void klist_add_after(struct klist_node *n, struct klist_node *pos);
+extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
 extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
 extern void klist_del(struct klist_node *n);
diff --git a/include/linux/list.h b/include/linux/list.h
index ef9594171062..cbbb96fcead9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
        *(n->pprev) = n;
 }
-static inline void hlist_add_after(struct hlist_node *n,
+static inline void hlist_add_behind(struct hlist_node *n,
-                                        struct hlist_node *next)
+                                    struct hlist_node *prev)
 {
-        next->next = n->next;
+        n->next = prev->next;
-        n->next = next;
+        prev->next = n;
-        next->pprev = &n->next;
+        n->pprev = &prev->next;
-        if(next->next)
+        if (n->next)
-                next->next->pprev  = &next->next;
+                n->next->pprev  = &n->next;
 }
 /* after that we'll appear to be on some hlist and hlist_del will work */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b660e05b63d4..e8cc45307f8f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 /*
 * Set the allocation direction to bottom-up or top-down.
 */
-static inline void memblock_set_bottom_up(bool enable)
+static inline void __init memblock_set_bottom_up(bool enable)
 {
        memblock.bottom_up = enable;
 }
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
        return memblock.bottom_up;
 }
 #else
-static inline void memblock_set_bottom_up(bool enable) {}
+static inline void __init memblock_set_bottom_up(bool enable) {}
 static inline bool memblock_bottom_up(void) { return false; }
 #endif
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 010d125bffbf..d9524c49d767 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,11 +26,12 @@ enum {
        MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
 };
-/* Types for control the zone type of onlined memory */
+/* Types for control the zone type of onlined and offlined memory */
 enum {
-        ONLINE_KEEP,
+        MMOP_OFFLINE = -1,
-        ONLINE_KERNEL,
+        MMOP_ONLINE_KEEP,
-        ONLINE_MOVABLE,
+        MMOP_ONLINE_KERNEL,
+        MMOP_ONLINE_MOVABLE,
 };
 /*
@@ -258,6 +259,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
                void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index edd82a105220..2f348d02f640 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
        } while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
 #ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index deca87452528..27288692241e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -170,6 +170,8 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
                                   struct mm_struct *mm);
 extern void mmu_notifier_unregister(struct mmu_notifier *mn,
                                    struct mm_struct *mm);
+extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+                                               struct mm_struct *mm);
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -288,6 +290,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 })
+extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
+                                   void (*func)(struct rcu_head *rcu));
+extern void mmu_notifier_synchronize(void);
 #else /* CONFIG_MMU_NOTIFIER */
 static inline void mmu_notifier_release(struct mm_struct *mm)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6cbd1b6c3d20..318df7051850 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -143,6 +143,7 @@ enum zone_stat_item {
        NR_SHMEM,               /* shmem pages (included tmpfs/GEM pages) */
        NR_DIRTIED,             /* page dirtyings since bootup */
        NR_WRITTEN,             /* page writings since bootup */
+        NR_PAGES_SCANNED,       /* pages scanned since last reclaim */
 #ifdef CONFIG_NUMA
        NUMA_HIT,               /* allocated in intended node */
        NUMA_MISS,              /* allocated in non intended node */
@@ -324,19 +325,12 @@ enum zone_type {
 #ifndef __GENERATING_BOUNDS_H
 struct zone {
-        /* Fields commonly accessed by the page allocator */
+        /* Read-mostly fields */
        /* zone watermarks, access with *_wmark_pages(zone) macros */
        unsigned long watermark[NR_WMARK];
        /*
-         * When free pages are below this point, additional steps are taken
-         * when reading the number of free pages to avoid per-cpu counter
-         * drift allowing watermarks to be breached
-         */
-        unsigned long percpu_drift_mark;
-        /*
         * We don't know if the memory that we're going to allocate will be freeable
         * or/and it will be released eventually, so to avoid totally wasting several
         * GB of ram we must reserve some of the lower zone memory (otherwise we risk
@@ -344,41 +338,26 @@ struct zone {
         * on the higher zones). This array is recalculated at runtime if the
         * sysctl_lowmem_reserve_ratio sysctl changes.
         */
-        unsigned long           lowmem_reserve[MAX_NR_ZONES];
+        long lowmem_reserve[MAX_NR_ZONES];
-        /*
-         * This is a per-zone reserve of pages that should not be
-         * considered dirtyable memory.
-         */
-        unsigned long           dirty_balance_reserve;
 #ifdef CONFIG_NUMA
        int node;
+#endif
        /*
-         * zone reclaim becomes active if more unmapped pages exist.
+         * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+         * this zone's LRU.  Maintained by the pageout code.
         */
-        unsigned long           min_unmapped_pages;
+        unsigned int inactive_ratio;
-        unsigned long           min_slab_pages;
-#endif
+        struct pglist_data      *zone_pgdat;
        struct per_cpu_pageset __percpu *pageset;
        /*
-         * free areas of different sizes
+         * This is a per-zone reserve of pages that should not be
+         * considered dirtyable memory.
         */
-        spinlock_t              lock;
+        unsigned long           dirty_balance_reserve;
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
-        /* Set to true when the PG_migrate_skip bits should be cleared */
-        bool                    compact_blockskip_flush;
-        /* pfn where compaction free scanner should start */
-        unsigned long           compact_cached_free_pfn;
-        /* pfn where async and sync compaction migration scanner should start */
-        unsigned long           compact_cached_migrate_pfn[2];
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
-        /* see spanned/present_pages for more description */
-        seqlock_t               span_seqlock;
-#endif
-        struct free_area        free_area[MAX_ORDER];
 #ifndef CONFIG_SPARSEMEM
        /*
@@ -388,74 +367,14 @@ struct zone {
        unsigned long           *pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
-#ifdef CONFIG_COMPACTION
+#ifdef CONFIG_NUMA
-        /*
-         * On compaction failure, 1<<compact_defer_shift compactions
-         * are skipped before trying again. The number attempted since
-         * last failure is tracked with compact_considered.
-         */
-        unsigned int            compact_considered;
-        unsigned int            compact_defer_shift;
-        int                     compact_order_failed;
-#endif
-        ZONE_PADDING(_pad1_)
-        /* Fields commonly accessed by the page reclaim scanner */
-        spinlock_t              lru_lock;
-        struct lruvec           lruvec;
-        /* Evictions & activations on the inactive file list */
-        atomic_long_t           inactive_age;
-        unsigned long           pages_scanned;     /* since last reclaim */
-        unsigned long           flags;             /* zone flags, see below */
-        /* Zone statistics */
-        atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
-        /*
-         * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
-         * this zone's LRU.  Maintained by the pageout code.
-         */
-        unsigned int inactive_ratio;
-        ZONE_PADDING(_pad2_)
-        /* Rarely used or read-mostly fields */
        /*
-         * wait_table           -- the array holding the hash table
+         * zone reclaim becomes active if more unmapped pages exist.
-         * wait_table_hash_nr_entries   -- the size of the hash table array
-         * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
-         *
-         * The purpose of all these is to keep track of the people
-         * waiting for a page to become available and make them
-         * runnable again when possible. The trouble is that this
-         * consumes a lot of space, especially when so few things
-         * wait on pages at a given time. So instead of using
-         * per-page waitqueues, we use a waitqueue hash table.
-         *
-         * The bucket discipline is to sleep on the same queue when
-         * colliding and wake all in that wait queue when removing.
-         * When something wakes, it must check to be sure its page is
-         * truly available, a la thundering herd. The cost of a
-         * collision is great, but given the expected load of the
-         * table, they should be so rare as to be outweighed by the
-         * benefits from the saved space.
-         *
-         * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
-         * primary users of these fields, and in mm/page_alloc.c
-         * free_area_init_core() performs the initialization of them.
         */
-        wait_queue_head_t       * wait_table;
+        unsigned long           min_unmapped_pages;
-        unsigned long           wait_table_hash_nr_entries;
+        unsigned long           min_slab_pages;
-        unsigned long           wait_table_bits;
+#endif /* CONFIG_NUMA */
-        /*
-         * Discontig memory support fields.
-         */
-        struct pglist_data      *zone_pgdat;
        /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
        unsigned long           zone_start_pfn;
@@ -500,9 +419,11 @@ struct zone {
         * adjust_managed_page_count() should be used instead of directly
         * touching zone->managed_pages and totalram_pages.
         */
+        unsigned long           managed_pages;
        unsigned long           spanned_pages;
        unsigned long           present_pages;
-        unsigned long           managed_pages;
+        const char              *name;
        /*
         * Number of MIGRATE_RESEVE page block. To maintain for just
@@ -510,10 +431,94 @@ struct zone {
         */
        int                     nr_migrate_reserve_block;
+#ifdef CONFIG_MEMORY_HOTPLUG
+        /* see spanned/present_pages for more description */
+        seqlock_t               span_seqlock;
+#endif
        /*
-         * rarely used fields:
+         * wait_table           -- the array holding the hash table
+         * wait_table_hash_nr_entries   -- the size of the hash table array
+         * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
+         *
+         * The purpose of all these is to keep track of the people
+         * waiting for a page to become available and make them
+         * runnable again when possible. The trouble is that this
+         * consumes a lot of space, especially when so few things
+         * wait on pages at a given time. So instead of using
+         * per-page waitqueues, we use a waitqueue hash table.
+         *
+         * The bucket discipline is to sleep on the same queue when
+         * colliding and wake all in that wait queue when removing.
+         * When something wakes, it must check to be sure its page is
+         * truly available, a la thundering herd. The cost of a
+         * collision is great, but given the expected load of the
+         * table, they should be so rare as to be outweighed by the
+         * benefits from the saved space.
+         *
+         * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+         * primary users of these fields, and in mm/page_alloc.c
+         * free_area_init_core() performs the initialization of them.
         */
-        const char              *name;
+        wait_queue_head_t       *wait_table;
+        unsigned long           wait_table_hash_nr_entries;
+        unsigned long           wait_table_bits;
+        ZONE_PADDING(_pad1_)
+        /* Write-intensive fields used from the page allocator */
+        spinlock_t              lock;
+        /* free areas of different sizes */
+        struct free_area        free_area[MAX_ORDER];
+        /* zone flags, see below */
+        unsigned long           flags;
+        ZONE_PADDING(_pad2_)
+        /* Write-intensive fields used by page reclaim */
+        /* Fields commonly accessed by the page reclaim scanner */
+        spinlock_t              lru_lock;
+        struct lruvec           lruvec;
+        /* Evictions & activations on the inactive file list */
+        atomic_long_t           inactive_age;
+        /*
+         * When free pages are below this point, additional steps are taken
+         * when reading the number of free pages to avoid per-cpu counter
+         * drift allowing watermarks to be breached
+         */
+        unsigned long percpu_drift_mark;
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+        /* pfn where compaction free scanner should start */
+        unsigned long           compact_cached_free_pfn;
+        /* pfn where async and sync compaction migration scanner should start */
+        unsigned long           compact_cached_migrate_pfn[2];
+#endif
+#ifdef CONFIG_COMPACTION
+        /*
+         * On compaction failure, 1<<compact_defer_shift compactions
+         * are skipped before trying again. The number attempted since
+         * last failure is tracked with compact_considered.
+         */
+        unsigned int            compact_considered;
+        unsigned int            compact_defer_shift;
+        int                     compact_order_failed;
+#endif
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+        /* Set to true when the PG_migrate_skip bits should be cleared */
+        bool                    compact_blockskip_flush;
+#endif
+        ZONE_PADDING(_pad3_)
+        /* Zone statistics */
+        atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
 typedef enum {
@@ -529,6 +534,7 @@ typedef enum {
        ZONE_WRITEBACK,                 /* reclaim scanning has recently found
                                         * many pages under writeback
                                         */
+        ZONE_FAIR_DEPLETED,             /* fair zone policy batch depleted */
 } zone_flags_t;
 static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -566,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
        return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
 }
+static inline int zone_is_fair_depleted(const struct zone *zone)
+{
+        return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
+}
 static inline int zone_is_oom_locked(const struct zone *zone)
 {
        return test_bit(ZONE_OOM_LOCKED, &zone->flags);
@@ -872,6 +883,8 @@ static inline int zone_movable_is_highmem(void)
 {
 #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
        return movable_zone == ZONE_HIGHMEM;
+#elif defined(CONFIG_HIGHMEM)
+        return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
 #else
        return 0;
 #endif
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 58b9a02c38d2..83a6aeda899d 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -430,7 +430,15 @@ static inline int num_node_state(enum node_states state)
        for_each_node_mask((__node), node_states[__state])
 #define first_online_node       first_node(node_states[N_ONLINE])
-#define next_online_node(nid)   next_node((nid), node_states[N_ONLINE])
+#define first_memory_node       first_node(node_states[N_MEMORY])
+static inline int next_online_node(int nid)
+{
+        return next_node(nid, node_states[N_ONLINE]);
+}
+static inline int next_memory_node(int nid)
+{
+        return next_node(nid, node_states[N_MEMORY]);
+}
 extern int nr_node_ids;
 extern int nr_online_nodes;
@@ -471,6 +479,7 @@ static inline int num_node_state(enum node_states state)
        for ( (node) = 0; (node) == 0; (node) = 1)
 #define first_online_node       0
+#define first_memory_node       0
 #define next_online_node(nid)   (MAX_NUMNODES)
 #define nr_node_ids             1
 #define nr_online_nodes         1
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 4cd62677feb9..647395a1a550 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -55,8 +55,8 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
                             struct mem_cgroup *memcg, nodemask_t *nodemask,
                             const char *message);
-extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
+extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags);
-extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
+extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
                               int order, const nodemask_t *nodemask);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959ad336..e1f5fcd79792 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page)		\
 #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)             \
        __SETPAGEFLAG(uname, lname)  __CLEARPAGEFLAG(uname, lname)
-#define PAGEFLAG_FALSE(uname)                                           \
-static inline int Page##uname(const struct page *page)                  \
-                        { return 0; }
 #define TESTSCFLAG(uname, lname)                                        \
        TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
+#define TESTPAGEFLAG_FALSE(uname)                                       \
+static inline int Page##uname(const struct page *page) { return 0; }
 #define SETPAGEFLAG_NOOP(uname)                                         \
 static inline void SetPage##uname(struct page *page) {  }
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) {  }
 #define __CLEARPAGEFLAG_NOOP(uname)                                     \
 static inline void __ClearPage##uname(struct page *page) {  }
+#define TESTSETFLAG_FALSE(uname)                                        \
+static inline int TestSetPage##uname(struct page *page) { return 0; }
 #define TESTCLEARFLAG_FALSE(uname)                                      \
 static inline int TestClearPage##uname(struct page *page) { return 0; }
 #define __TESTCLEARFLAG_FALSE(uname)                                    \
 static inline int __TestClearPage##uname(struct page *page) { return 0; }
+#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname)                 \
+        SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+#define TESTSCFLAG_FALSE(uname)                                         \
+        TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
 struct page;    /* forward declaration */
 TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
 PAGEFLAG(SwapCache, swapcache)
 #else
 PAGEFLAG_FALSE(SwapCache)
-        SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
 #endif
 PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
        TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
 #else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
+PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
-        TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+        TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e1474ae18c88..3df8c7db7a4e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
 /*
 * lock_page_or_retry - Lock the page, unless this would block and the
 * caller indicated that it can handle a retry.
+ *
+ * Return value and mmap_sem implications depend on flags; see
+ * __lock_page_or_retry().
 */
 static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
                                     unsigned int flags)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 319ff7e53efb..0990997a5304 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer)
 }
 /* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 /* We show everything that is MORE important than this.. */
 #define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8183b46fbaa2..372ad5e0dcb8 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
 }
 /**
- * hlist_add_after_rcu
+ * hlist_add_behind_rcu
- * @prev: the existing element to add the new element after.
 * @n: the new element to add to the hash list.
+ * @prev: the existing element to add the new element after.
 *
 * Description:
 * Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
 * problems on Alpha CPUs.
 */
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
+static inline void hlist_add_behind_rcu(struct hlist_node *n,
-                                       struct hlist_node *n)
+                                        struct hlist_node *prev)
 {
        n->next = prev->next;
        n->pprev = &prev->next;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bdbee80eede..1eb64043c076 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
                         struct lruvec *lruvec, struct list_head *head);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
-extern void init_page_accessed(struct page *page);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4b8a89189a29..b87696fdf06a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
-                        struct page ***pages);
+                        struct page **pages);
 #ifdef CONFIG_MMU
 extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
                                    pgprot_t prot, struct page **pages);
diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 13af0d450bf6..f9d41a6e361f 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
 struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
 void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
        unsigned long *handle);
 void zbud_free(struct zbud_pool *pool, unsigned long handle);
 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 9c5a6b4de0a3..197abb2a54c5 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm,
   method). msg is set to null if there is no error message.  deflateInit2 does
   not perform any compression: this will be done by deflate().
 */
-                            
-#if 0
-extern int zlib_deflateSetDictionary (z_streamp strm,
-                                                     const Byte *dictionary,
-                                                     uInt  dictLength);
-#endif
-/*
-     Initializes the compression dictionary from the given byte sequence
-   without producing any compressed output. This function must be called
-   immediately after deflateInit, deflateInit2 or deflateReset, before any
-   call of deflate. The compressor and decompressor must use exactly the same
-   dictionary (see inflateSetDictionary).
-     The dictionary should consist of strings (byte sequences) that are likely
-   to be encountered later in the data to be compressed, with the most commonly
-   used strings preferably put towards the end of the dictionary. Using a
-   dictionary is most useful when the data to be compressed is short and can be
-   predicted with good accuracy; the data can then be compressed better than
-   with the default empty dictionary.
-     Depending on the size of the compression data structures selected by
-   deflateInit or deflateInit2, a part of the dictionary may in effect be
-   discarded, for example if the dictionary is larger than the window size in
-   deflate or deflate2. Thus the strings most likely to be useful should be
-   put at the end of the dictionary, not at the front.
-     Upon return of this function, strm->adler is set to the Adler32 value
-   of the dictionary; the decompressor may later use this value to determine
-   which dictionary has been used by the compressor. (The Adler32 value
-   applies to the whole dictionary even if only a subset of the dictionary is
-   actually used by the compressor.)
-     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent (for example if deflate has already been called for this stream
-   or if the compression method is bsort). deflateSetDictionary does not
-   perform any compression: this will be done by deflate().
-*/
-#if 0
-extern int zlib_deflateCopy (z_streamp dest, z_streamp source);
-#endif
-/*
-     Sets the destination stream as a complete copy of the source stream.
-     This function can be useful when several compression strategies will be
-   tried, for example when there are several ways of pre-processing the input
-   data with a filter. The streams that will be discarded should then be freed
-   by calling deflateEnd.  Note that deflateCopy duplicates the internal
-   compression state which can be quite large, so this strategy is slow and
-   can consume lots of memory.
-     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-   (such as zalloc being NULL). msg is left unchanged in both source and
-   destination.
-*/
 extern int zlib_deflateReset (z_streamp strm);
 /*
@@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s)
        return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
 }
-#if 0
-extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
-#endif
-/*
-     Dynamically update the compression level and compression strategy.  The
-   interpretation of level and strategy is as in deflateInit2.  This can be
-   used to switch between compression and straight copy of the input data, or
-   to switch to a different kind of input data requiring a different
-   strategy. If the compression level is changed, the input available so far
-   is compressed with the old level (and may be flushed); the new level will
-   take effect only at the next call of deflate().
-     Before the call of deflateParams, the stream state must be set as for
-   a call of deflate(), since the currently available input may have to
-   be compressed and flushed. In particular, strm->avail_out must be non-zero.
-     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
-   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
-   if strm->avail_out was zero.
-*/
 /*   
 extern int inflateInit2 (z_streamp strm, int  windowBits);
@@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int  windowBits);
   and avail_out are unchanged.)
 */
-extern int zlib_inflateSetDictionary (z_streamp strm,
-                                                     const Byte *dictionary,
-                                                     uInt  dictLength);
-/*
-     Initializes the decompression dictionary from the given uncompressed byte
-   sequence. This function must be called immediately after a call of inflate,
-   if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
-   can be determined from the adler32 value returned by that call of inflate.
-   The compressor and decompressor must use exactly the same dictionary (see
-   deflateSetDictionary).  For raw inflate, this function can be called
-   immediately after inflateInit2() or inflateReset() and before any call of
-   inflate() to set the dictionary.  The application must insure that the
-   dictionary that was used for compression is provided.
-     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-   expected one (incorrect adler32 value). inflateSetDictionary does not
-   perform any decompression: this will be done by subsequent calls of
-   inflate().
-*/
-#if 0
-extern int zlib_inflateSync (z_streamp strm);
-#endif
-/* 
-    Skips invalid compressed data until a full flush point (see above the
-  description of deflate with Z_FULL_FLUSH) can be found, or until all
-  available input is skipped. No output is provided.
-    inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
-  if no more input was provided, Z_DATA_ERROR if no flush point has been found,
-  or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
-  case, the application may save the current current value of total_in which
-  indicates where valid compressed data was found. In the error case, the
-  application may repeatedly call inflateSync, providing more input each time,
-  until success or end of the input data.
-*/
 extern int zlib_inflateReset (z_streamp strm);
 /*
     This function is equivalent to inflateEnd followed by inflateInit,
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
new file mode 100644
index 000000000000..f14bd75f08b3
--- /dev/null
+++ b/include/linux/zpool.h
@@ -0,0 +1,106 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for the zbud and zsmalloc memory
+ * storage pool implementations.  Typically, this is used to
+ * store compressed memory.
+ */
+#ifndef _ZPOOL_H_
+#define _ZPOOL_H_
+struct zpool;
+struct zpool_ops {
+        int (*evict)(struct zpool *pool, unsigned long handle);
+};
+/*
+ * Control how a handle is mapped.  It will be ignored if the
+ * implementation does not support it.  Its use is optional.
+ * Note that this does not refer to memory protection, it
+ * refers to how the memory will be copied in/out if copying
+ * is necessary during mapping; read-write is the safest as
+ * it copies the existing memory in on map, and copies the
+ * changed memory back out on unmap.  Write-only does not copy
+ * in the memory and should only be used for initialization.
+ * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
+ */
+enum zpool_mapmode {
+        ZPOOL_MM_RW, /* normal read-write mapping */
+        ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
+        ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
+        ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
+};
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
+char *zpool_get_type(struct zpool *pool);
+void zpool_destroy_pool(struct zpool *pool);
+int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
+                        unsigned long *handle);
+void zpool_free(struct zpool *pool, unsigned long handle);
+int zpool_shrink(struct zpool *pool, unsigned int pages,
+                        unsigned int *reclaimed);
+void *zpool_map_handle(struct zpool *pool, unsigned long handle,
+                        enum zpool_mapmode mm);
+void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+u64 zpool_get_total_size(struct zpool *pool);
+/**
+ * struct zpool_driver - driver implementation for zpool
+ * @type:       name of the driver.
+ * @list:       entry in the list of zpool drivers.
+ * @create:     create a new pool.
+ * @destroy:    destroy a pool.
+ * @malloc:     allocate mem from a pool.
+ * @free:       free mem from a pool.
+ * @shrink:     shrink the pool.
+ * @map:        map a handle.
+ * @unmap:      unmap a handle.
+ * @total_size: get total size of a pool.
+ *
+ * This is created by a zpool implementation and registered
+ * with zpool.
+ */
+struct zpool_driver {
+        char *type;
+        struct module *owner;
+        atomic_t refcount;
+        struct list_head list;
+        void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+        void (*destroy)(void *pool);
+        int (*malloc)(void *pool, size_t size, gfp_t gfp,
+                                unsigned long *handle);
+        void (*free)(void *pool, unsigned long handle);
+        int (*shrink)(void *pool, unsigned int pages,
+                                unsigned int *reclaimed);
+        void *(*map)(void *pool, unsigned long handle,
+                                enum zpool_mapmode mm);
+        void (*unmap)(void *pool, unsigned long handle);
+        u64 (*total_size)(void *pool);
+};
+void zpool_register_driver(struct zpool_driver *driver);
+int zpool_unregister_driver(struct zpool_driver *driver);
+int zpool_evict(void *pool, unsigned long handle);
+#endif
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 4e4f2f8b1ac2..dd2b5467d905 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -17,6 +17,7 @@
        {MR_MEMORY_HOTPLUG,     "memory_hotplug"},              \
        {MR_SYSCALL,            "syscall_or_cpuset"},           \
        {MR_MEMPOLICY_MBIND,    "mempolicy_mbind"},             \
+        {MR_NUMA_MISPLACED,     "numa_misplaced"},              \
        {MR_CMA,                "cma"}
 TRACE_EVENT(mm_migrate_pages,
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 1c9fabde69e4..ce0803b8d05f 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion,
        TP_PROTO(
                struct page *page,
-                unsigned long pfn,
+                int lru
-                int lru,
-                unsigned long flags
        ),
-        TP_ARGS(page, pfn, lru, flags),
+        TP_ARGS(page, lru),
        TP_STRUCT__entry(
                __field(struct page *,  page    )
@@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion,
        TP_fast_assign(
                __entry->page   = page;
-                __entry->pfn    = pfn;
+                __entry->pfn    = page_to_pfn(page);
                __entry->lru    = lru;
-                __entry->flags  = flags;
+                __entry->flags  = trace_pagemap_flags(page);
        ),
        /* Flag format is based on page-types.c formatting for pagemap */
@@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion,
 TRACE_EVENT(mm_lru_activate,
-        TP_PROTO(struct page *page, unsigned long pfn),
+        TP_PROTO(struct page *page),
-        TP_ARGS(page, pfn),
+        TP_ARGS(page),
        TP_STRUCT__entry(
                __field(struct page *,  page    )
@@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate,
        TP_fast_assign(
                __entry->page   = page;
-                __entry->pfn    = pfn;
+                __entry->pfn    = page_to_pfn(page);
        ),
        /* Flag format is based on page-types.c formatting for pagemap */
diff --git a/init/Kconfig b/init/Kconfig
index 41066e49e880..a291b7ef4738 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -807,15 +807,53 @@ config LOG_BUF_SHIFT
        range 12 21
        default 17
        help
-          Select kernel log buffer size as a power of 2.
+          Select the minimal kernel log buffer size as a power of 2.
+          The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
+          parameter, see below. Any higher size also might be forced
+          by "log_buf_len" boot parameter.
          Examples:
-                     17 => 128 KB
+                     17 => 128 KB
                     16 => 64 KB
-                     15 => 32 KB
+                     15 => 32 KB
-                     14 => 16 KB
+                     14 => 16 KB
                     13 =>  8 KB
                     12 =>  4 KB
+config LOG_CPU_MAX_BUF_SHIFT
+        int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
+        range 0 21
+        default 12 if !BASE_SMALL
+        default 0 if BASE_SMALL
+        help
+          This option allows to increase the default ring buffer size
+          according to the number of CPUs. The value defines the contribution
+          of each CPU as a power of 2. The used space is typically only few
+          lines however it might be much more when problems are reported,
+          e.g. backtraces.
+          The increased size means that a new buffer has to be allocated and
+          the original static one is unused. It makes sense only on systems
+          with more CPUs. Therefore this value is used only when the sum of
+          contributions is greater than the half of the default kernel ring
+          buffer as defined by LOG_BUF_SHIFT. The default values are set
+          so that more than 64 CPUs are needed to trigger the allocation.
+          Also this option is ignored when "log_buf_len" kernel parameter is
+          used as it forces an exact (power of two) size of the ring buffer.
+          The number of possible CPUs is used for this computation ignoring
+          hotplugging making the compuation optimal for the the worst case
+          scenerio while allowing a simple algorithm to be used from bootup.
+          Examples shift values and their meaning:
+                     17 => 128 KB for each CPU
+                     16 =>  64 KB for each CPU
+                     15 =>  32 KB for each CPU
+                     14 =>  16 KB for each CPU
+                     13 =>   8 KB for each CPU
+                     12 =>   4 KB for each CPU
 #
 # Architectures with an unreliable sched_clock() should select this:
 #
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 8e9bc9c3dbb7..c447cd9848d1 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -106,7 +106,7 @@ static inline struct audit_entry *audit_init_entry(u32 field_count)
        if (unlikely(!entry))
                return NULL;
-        fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+        fields = kcalloc(field_count, sizeof(*fields), GFP_KERNEL);
        if (unlikely(!fields)) {
                kfree(entry);
                return NULL;
@@ -160,7 +160,7 @@ static __u32 *classes[AUDIT_SYSCALL_CLASSES];
 int __init audit_register_class(int class, unsigned *list)
 {
-        __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL);
+        __u32 *p = kcalloc(AUDIT_BITMASK_SIZE, sizeof(__u32), GFP_KERNEL);
        if (!p)
                return -ENOMEM;
        while (*list != ~0U) {
diff --git a/kernel/exit.c b/kernel/exit.c
index e5c4668f1799..88c6b3e42583 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -455,6 +455,7 @@ static void exit_mm(struct task_struct * tsk)
        task_unlock(tsk);
        mm_update_next_owner(mm);
        mmput(mm);
+        clear_thread_flag(TIF_MEMDIE);
 }
 /*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 13e839dbca07..de1a6bb6861d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -45,6 +45,7 @@
 #include <linux/poll.h>
 #include <linux/irq_work.h>
 #include <linux/utsname.h>
+#include <linux/ctype.h>
 #include <asm/uaccess.h>
@@ -56,7 +57,7 @@
 int console_printk[4] = {
        CONSOLE_LOGLEVEL_DEFAULT,       /* console_loglevel */
-        DEFAULT_MESSAGE_LOGLEVEL,       /* default_message_loglevel */
+        MESSAGE_LOGLEVEL_DEFAULT,       /* default_message_loglevel */
        CONSOLE_LOGLEVEL_MIN,           /* minimum_console_loglevel */
        CONSOLE_LOGLEVEL_DEFAULT,       /* default_console_loglevel */
 };
@@ -113,9 +114,9 @@ static int __down_trylock_console_sem(unsigned long ip)
 * This is used for debugging the mess that is the VT code by
 * keeping track if we have the console semaphore held. It's
 * definitely not the perfect debug tool (we don't know if _WE_
- * hold it are racing, but it helps tracking those weird code
+ * hold it and are racing, but it helps tracking those weird code
- * path in the console code where we end up in places I want
+ * paths in the console code where we end up in places I want
- * locked without the console sempahore held
+ * locked without the console sempahore held).
 */
 static int console_locked, console_suspended;
@@ -146,8 +147,8 @@ static int console_may_schedule;
 * the overall length of the record.
 *
 * The heads to the first and last entry in the buffer, as well as the
- * sequence numbers of these both entries are maintained when messages
+ * sequence numbers of these entries are maintained when messages are
- * are stored..
+ * stored.
 *
 * If the heads indicate available messages, the length in the header
 * tells the start next message. A length == 0 for the next message
@@ -257,7 +258,7 @@ static u64 clear_seq;
 static u32 clear_idx;
 #define PREFIX_MAX              32
-#define LOG_LINE_MAX            1024 - PREFIX_MAX
+#define LOG_LINE_MAX            (1024 - PREFIX_MAX)
 /* record buffer */
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -266,6 +267,7 @@ static u32 clear_idx;
 #define LOG_ALIGN __alignof__(struct printk_log)
 #endif
 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
@@ -344,7 +346,7 @@ static int log_make_free_space(u32 msg_size)
        while (log_first_seq < log_next_seq) {
                if (logbuf_has_space(msg_size, false))
                        return 0;
-                /* drop old messages until we have enough continuous space */
+                /* drop old messages until we have enough contiguous space */
                log_first_idx = log_next(log_first_idx);
                log_first_seq++;
        }
@@ -453,11 +455,7 @@ static int log_store(int facility, int level,
        return msg->text_len;
 }
-#ifdef CONFIG_SECURITY_DMESG_RESTRICT
+int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
-int dmesg_restrict = 1;
-#else
-int dmesg_restrict;
-#endif
 static int syslog_action_restricted(int type)
 {
@@ -828,34 +826,74 @@ void log_buf_kexec_setup(void)
 /* requested log_buf_len from kernel cmdline */
 static unsigned long __initdata new_log_buf_len;
-/* save requested log_buf_len since it's too early to process it */
+/* we practice scaling the ring buffer by powers of 2 */
-static int __init log_buf_len_setup(char *str)
+static void __init log_buf_len_update(unsigned size)
 {
-        unsigned size = memparse(str, &str);
        if (size)
                size = roundup_pow_of_two(size);
        if (size > log_buf_len)
                new_log_buf_len = size;
+}
+/* save requested log_buf_len since it's too early to process it */
+static int __init log_buf_len_setup(char *str)
+{
+        unsigned size = memparse(str, &str);
+        log_buf_len_update(size);
        return 0;
 }
 early_param("log_buf_len", log_buf_len_setup);
+static void __init log_buf_add_cpu(void)
+{
+        unsigned int cpu_extra;
+        /*
+         * archs should set up cpu_possible_bits properly with
+         * set_cpu_possible() after setup_arch() but just in
+         * case lets ensure this is valid.
+         */
+        if (num_possible_cpus() == 1)
+                return;
+        cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
+        /* by default this will only continue through for large > 64 CPUs */
+        if (cpu_extra <= __LOG_BUF_LEN / 2)
+                return;
+        pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
+                __LOG_CPU_MAX_BUF_LEN);
+        pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
+                cpu_extra);
+        pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
+        log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
+}
 void __init setup_log_buf(int early)
 {
        unsigned long flags;
        char *new_log_buf;
        int free;
+        if (log_buf != __log_buf)
+                return;
+        if (!early && !new_log_buf_len)
+                log_buf_add_cpu();
        if (!new_log_buf_len)
                return;
        if (early) {
                new_log_buf =
-                        memblock_virt_alloc(new_log_buf_len, PAGE_SIZE);
+                        memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
        } else {
-                new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0);
+                new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
+                                                          LOG_ALIGN);
        }
        if (unlikely(!new_log_buf)) {
@@ -872,7 +910,7 @@ void __init setup_log_buf(int early)
        memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
        raw_spin_unlock_irqrestore(&logbuf_lock, flags);
-        pr_info("log_buf_len: %d\n", log_buf_len);
+        pr_info("log_buf_len: %d bytes\n", log_buf_len);
        pr_info("early log buf free: %d(%d%%)\n",
                free, (free * 100) / __LOG_BUF_LEN);
 }
@@ -881,7 +919,7 @@ static bool __read_mostly ignore_loglevel;
 static int __init ignore_loglevel_setup(char *str)
 {
-        ignore_loglevel = 1;
+        ignore_loglevel = true;
        pr_info("debug: ignoring loglevel setting.\n");
        return 0;
@@ -947,11 +985,7 @@ static inline void boot_delay_msec(int level)
 }
 #endif
-#if defined(CONFIG_PRINTK_TIME)
+static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
-static bool printk_time = 1;
-#else
-static bool printk_time;
-#endif
 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
 static size_t print_time(u64 ts, char *buf)
@@ -1310,7 +1344,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                         * for pending data, not the size; return the count of
                         * records, not the length.
                         */
-                        error = log_next_idx - syslog_idx;
+                        error = log_next_seq - syslog_seq;
                } else {
                        u64 seq = syslog_seq;
                        u32 idx = syslog_idx;
@@ -1416,10 +1450,9 @@ static int have_callable_console(void)
 /*
 * Can we actually use the console at this time on this cpu?
 *
- * Console drivers may assume that per-cpu resources have
+ * Console drivers may assume that per-cpu resources have been allocated. So
- * been allocated. So unless they're explicitly marked as
+ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * being able to cope (CON_ANYTIME) don't call them until
+ * call them until this CPU is officially up.
- * this CPU is officially up.
 */
 static inline int can_use_console(unsigned int cpu)
 {
@@ -1432,8 +1465,10 @@ static inline int can_use_console(unsigned int cpu)
 * console_lock held, and 'console_locked' set) if it
 * is successful, false otherwise.
 */
-static int console_trylock_for_printk(unsigned int cpu)
+static int console_trylock_for_printk(void)
 {
+        unsigned int cpu = smp_processor_id();
        if (!console_trylock())
                return 0;
        /*
@@ -1476,7 +1511,7 @@ static struct cont {
        struct task_struct *owner;      /* task of first print*/
        u64 ts_nsec;                    /* time of first print */
        u8 level;                       /* log level of first message */
-        u8 facility;                    /* log level of first message */
+        u8 facility;                    /* log facility of first message */
        enum log_flags flags;           /* prefix, newline flags */
        bool flushed:1;                 /* buffer sealed and committed */
 } cont;
@@ -1608,7 +1643,8 @@ asmlinkage int vprintk_emit(int facility, int level,
                 */
                if (!oops_in_progress && !lockdep_recursing(current)) {
                        recursion_bug = 1;
-                        goto out_restore_irqs;
+                        local_irq_restore(flags);
+                        return 0;
                }
                zap_locks();
        }
@@ -1716,21 +1752,30 @@ asmlinkage int vprintk_emit(int facility, int level,
        logbuf_cpu = UINT_MAX;
        raw_spin_unlock(&logbuf_lock);
+        lockdep_on();
+        local_irq_restore(flags);
        /* If called from the scheduler, we can not call up(). */
        if (!in_sched) {
+                lockdep_off();
+                /*
+                 * Disable preemption to avoid being preempted while holding
+                 * console_sem which would prevent anyone from printing to
+                 * console
+                 */
+                preempt_disable();
                /*
                 * Try to acquire and then immediately release the console
                 * semaphore.  The release will print out buffers and wake up
                 * /dev/kmsg and syslog() users.
                 */
-                if (console_trylock_for_printk(this_cpu))
+                if (console_trylock_for_printk())
                        console_unlock();
+                preempt_enable();
+                lockdep_on();
        }
-        lockdep_on();
-out_restore_irqs:
-        local_irq_restore(flags);
        return printed_len;
 }
 EXPORT_SYMBOL(vprintk_emit);
@@ -1802,7 +1847,7 @@ EXPORT_SYMBOL(printk);
 #define LOG_LINE_MAX            0
 #define PREFIX_MAX              0
-#define LOG_LINE_MAX 0
 static u64 syslog_seq;
 static u32 syslog_idx;
 static u64 console_seq;
@@ -1881,11 +1926,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
        return 0;
 }
 /*
- * Set up a list of consoles.  Called from init/main.c
+ * Set up a console.  Called via do_early_param() in init/main.c
+ * for each "console=" parameter in the boot command line.
 */
 static int __init console_setup(char *str)
 {
-        char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
+        char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
        char *s, *options, *brl_options = NULL;
        int idx;
@@ -1902,7 +1948,8 @@ static int __init console_setup(char *str)
                strncpy(buf, str, sizeof(buf) - 1);
        }
        buf[sizeof(buf) - 1] = 0;
-        if ((options = strchr(str, ',')) != NULL)
+        options = strchr(str, ',');
+        if (options)
                *(options++) = 0;
 #ifdef __sparc__
        if (!strcmp(str, "ttya"))
@@ -1911,7 +1958,7 @@ static int __init console_setup(char *str)
                strcpy(buf, "ttyS1");
 #endif
        for (s = buf; *s; s++)
-                if ((*s >= '0' && *s <= '9') || *s == ',')
+                if (isdigit(*s) || *s == ',')
                        break;
        idx = simple_strtoul(s, NULL, 10);
        *s = 0;
@@ -1950,7 +1997,6 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
             i++, c++)
                if (strcmp(c->name, name) == 0 && c->index == idx) {
                        strlcpy(c->name, name_new, sizeof(c->name));
-                        c->name[sizeof(c->name) - 1] = 0;
                        c->options = options;
                        c->index = idx_new;
                        return i;
@@ -1959,12 +2005,12 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
        return -1;
 }
-bool console_suspend_enabled = 1;
+bool console_suspend_enabled = true;
 EXPORT_SYMBOL(console_suspend_enabled);
 static int __init console_suspend_disable(char *str)
 {
-        console_suspend_enabled = 0;
+        console_suspend_enabled = false;
        return 1;
 }
 __setup("no_console_suspend", console_suspend_disable);
@@ -2045,8 +2091,8 @@ EXPORT_SYMBOL(console_lock);
 /**
 * console_trylock - try to lock the console system for exclusive use.
 *
- * Tried to acquire a lock which guarantees that the caller has
+ * Try to acquire a lock which guarantees that the caller has exclusive
- * exclusive access to the console system and the console_drivers list.
+ * access to the console system and the console_drivers list.
 *
 * returns 1 on success, and 0 on failure to acquire the lock.
 */
@@ -2618,14 +2664,13 @@ EXPORT_SYMBOL(__printk_ratelimit);
 bool printk_timed_ratelimit(unsigned long *caller_jiffies,
                        unsigned int interval_msecs)
 {
-        if (*caller_jiffies == 0
+        unsigned long elapsed = jiffies - *caller_jiffies;
-                        || !time_in_range(jiffies, *caller_jiffies,
-                                        *caller_jiffies
+        if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
-                                        + msecs_to_jiffies(interval_msecs))) {
+                return false;
-                *caller_jiffies = jiffies;
-                return true;
+        *caller_jiffies = jiffies;
-        }
+        return true;
-        return false;
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
diff --git a/kernel/smp.c b/kernel/smp.c
index 487653b5844f..aff8aa14f547 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -670,7 +670,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
                        if (cond_func(cpu, info)) {
                                ret = smp_call_function_single(cpu, func,
                                                                info, wait);
-                                WARN_ON_ONCE(!ret);
+                                WARN_ON_ONCE(ret);
                        }
                preempt_enable();
        }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75b22e22a72c..75875a741b5e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = hugetlb_sysctl_handler,
-                .extra1         = (void *)&hugetlb_zero,
+                .extra1         = &zero,
-                .extra2         = (void *)&hugetlb_infinity,
        },
 #ifdef CONFIG_NUMA
        {
@@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
-                .extra1         = (void *)&hugetlb_zero,
+                .extra1         = &zero,
-                .extra2         = (void *)&hugetlb_infinity,
        },
 #endif
         {
@@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = hugetlb_overcommit_handler,
-                .extra1         = (void *)&hugetlb_zero,
+                .extra1         = &zero,
-                .extra2         = (void *)&hugetlb_infinity,
        },
 #endif
        {
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c3319bd1b040..51b29e9d2ba6 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -260,9 +260,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
                        return;
                if (hardlockup_panic)
-                        panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+                        panic("Watchdog detected hard LOCKUP on cpu %d",
+                              this_cpu);
                else
-                        WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+                        WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
+                             this_cpu);
                __this_cpu_write(hard_watchdog_warn, true);
                return;
@@ -345,7 +347,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                        }
                }
-                printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+                pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                        smp_processor_id(), duration,
                        current->comm, task_pid_nr(current));
                print_modules();
@@ -484,7 +486,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
        if (PTR_ERR(event) == -EOPNOTSUPP)
                pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
        else if (PTR_ERR(event) == -ENOENT)
-                pr_warning("disabled (cpu%i): hardware events not enabled\n",
+                pr_warn("disabled (cpu%i): hardware events not enabled\n",
                         cpu);
        else
                pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
diff --git a/lib/Kconfig b/lib/Kconfig
index a8a775730c09..df872659ddd3 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -396,6 +396,39 @@ config CPU_RMAP
 config DQL
        bool
+config GLOB
+        bool
+#       This actually supports modular compilation, but the module overhead
+#       is ridiculous for the amount of code involved.  Until an out-of-tree
+#       driver asks for it, we'll just link it directly it into the kernel
+#       when required.  Since we're ignoring out-of-tree users, there's also
+#       no need bother prompting for a manual decision:
+#       prompt "glob_match() function"
+        help
+          This option provides a glob_match function for performing
+          simple text pattern matching.  It originated in the ATA code
+          to blacklist particular drive models, but other device drivers
+          may need similar functionality.
+          All drivers in the Linux kernel tree that require this function
+          should automatically select this option.  Say N unless you
+          are compiling an out-of tree driver which tells you that it
+          depends on this.
+config GLOB_SELFTEST
+        bool "glob self-test on init"
+        default n
+        depends on GLOB
+        help
+          This option enables a simple self-test of the glob_match
+          function on startup.  It is primarily useful for people
+          working on the code to ensure they haven't introduced any
+          regressions.
+          It only adds a little bit of code and slows kernel boot (or
+          module load) by a small amount, so you're welcome to play with
+          it, but you probably don't need it.
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cfe7df8f62cc..cb45f59685e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,7 +15,7 @@ config PRINTK_TIME
          The behavior is also controlled by the kernel command line
          parameter printk.time=1. See Documentation/kernel-parameters.txt
-config DEFAULT_MESSAGE_LOGLEVEL
+config MESSAGE_LOGLEVEL_DEFAULT
        int "Default message log level (1-7)"
        range 1 7
        default "4"
diff --git a/lib/Makefile b/lib/Makefile
index 8427df95dade..d6b4bc496408 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -137,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+obj-$(CONFIG_GLOB) += glob.o
 obj-$(CONFIG_MPILIB) += mpi/
 obj-$(CONFIG_SIGNATURE) += digsig.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..1e031f2c9aba 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -40,9 +40,9 @@
 * for the best explanations of this ordering.
 */
-int __bitmap_empty(const unsigned long *bitmap, int bits)
+int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
 {
-        int k, lim = bits/BITS_PER_LONG;
+        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap[k])
                        return 0;
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_empty);
-int __bitmap_full(const unsigned long *bitmap, int bits)
+int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
 {
-        int k, lim = bits/BITS_PER_LONG;
+        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (~bitmap[k])
                        return 0;
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, int bits)
 EXPORT_SYMBOL(__bitmap_full);
 int __bitmap_equal(const unsigned long *bitmap1,
-                const unsigned long *bitmap2, int bits)
+                const unsigned long *bitmap2, unsigned int bits)
 {
-        int k, lim = bits/BITS_PER_LONG;
+        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] != bitmap2[k])
                        return 0;
@@ -86,14 +86,14 @@ int __bitmap_equal(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_equal);
-void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
 {
-        int k, lim = bits/BITS_PER_LONG;
+        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                dst[k] = ~src[k];
        if (bits % BITS_PER_LONG)
-                dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+                dst[k] = ~src[k];
 }
 EXPORT_SYMBOL(__bitmap_complement);
@@ -182,23 +182,26 @@ void __bitmap_shift_left(unsigned long *dst,
 EXPORT_SYMBOL(__bitmap_shift_left);
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                                const unsigned long *bitmap2, int bits)
+                                const unsigned long *bitmap2, unsigned int bits)
 {
-        int k;
+        unsigned int k;
-        int nr = BITS_TO_LONGS(bits);
+        unsigned int lim = bits/BITS_PER_LONG;
        unsigned long result = 0;
-        for (k = 0; k < nr; k++)
+        for (k = 0; k < lim; k++)
                result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+        if (bits % BITS_PER_LONG)
+                result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+                           BITMAP_LAST_WORD_MASK(bits));
        return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                                const unsigned long *bitmap2, int bits)
+                                const unsigned long *bitmap2, unsigned int bits)
 {
-        int k;
+        unsigned int k;
-        int nr = BITS_TO_LONGS(bits);
+        unsigned int nr = BITS_TO_LONGS(bits);
        for (k = 0; k < nr; k++)
                dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +209,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_or);
 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                                const unsigned long *bitmap2, int bits)
+                                const unsigned long *bitmap2, unsigned int bits)
 {
-        int k;
+        unsigned int k;
-        int nr = BITS_TO_LONGS(bits);
+        unsigned int nr = BITS_TO_LONGS(bits);
        for (k = 0; k < nr; k++)
                dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,22 +220,25 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_xor);
 int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                                const unsigned long *bitmap2, int bits)
+                                const unsigned long *bitmap2, unsigned int bits)
 {
-        int k;
+        unsigned int k;
-        int nr = BITS_TO_LONGS(bits);
+        unsigned int lim = bits/BITS_PER_LONG;
        unsigned long result = 0;
-        for (k = 0; k < nr; k++)
+        for (k = 0; k < lim; k++)
                result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+        if (bits % BITS_PER_LONG)
+                result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+                           BITMAP_LAST_WORD_MASK(bits));
        return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
 int __bitmap_intersects(const unsigned long *bitmap1,
-                                const unsigned long *bitmap2, int bits)
+                        const unsigned long *bitmap2, unsigned int bits)
 {
-        int k, lim = bits/BITS_PER_LONG;
+        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] & bitmap2[k])
                        return 1;
@@ -245,9 +251,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_intersects);
 int __bitmap_subset(const unsigned long *bitmap1,
-                                const unsigned long *bitmap2, int bits)
+                    const unsigned long *bitmap2, unsigned int bits)
 {
-        int k, lim = bits/BITS_PER_LONG;
+        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] & ~bitmap2[k])
                        return 0;
@@ -259,9 +265,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_subset);
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 {
-        int k, w = 0, lim = bits/BITS_PER_LONG;
+        unsigned int k, lim = bits/BITS_PER_LONG;
+        int w = 0;
        for (k = 0; k < lim; k++)
                w += hweight_long(bitmap[k]);
@@ -273,42 +280,42 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, unsigned int start, int len)
 {
        unsigned long *p = map + BIT_WORD(start);
-        const int size = start + nr;
+        const unsigned int size = start + len;
        int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
        unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
-        while (nr - bits_to_set >= 0) {
+        while (len - bits_to_set >= 0) {
                *p |= mask_to_set;
-                nr -= bits_to_set;
+                len -= bits_to_set;
                bits_to_set = BITS_PER_LONG;
                mask_to_set = ~0UL;
                p++;
        }
-        if (nr) {
+        if (len) {
                mask_to_set &= BITMAP_LAST_WORD_MASK(size);
                *p |= mask_to_set;
        }
 }
 EXPORT_SYMBOL(bitmap_set);
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
        unsigned long *p = map + BIT_WORD(start);
-        const int size = start + nr;
+        const unsigned int size = start + len;
        int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
        unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
-        while (nr - bits_to_clear >= 0) {
+        while (len - bits_to_clear >= 0) {
                *p &= ~mask_to_clear;
-                nr -= bits_to_clear;
+                len -= bits_to_clear;
                bits_to_clear = BITS_PER_LONG;
                mask_to_clear = ~0UL;
                p++;
        }
-        if (nr) {
+        if (len) {
                mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
                *p &= ~mask_to_clear;
        }
@@ -664,13 +671,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
 int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 {
-        char *nl  = strchr(bp, '\n');
+        char *nl  = strchrnul(bp, '\n');
-        int len;
+        int len = nl - bp;
-        if (nl)
-                len = nl - bp;
-        else
-                len = strlen(bp);
        return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
 }
@@ -716,7 +718,7 @@ EXPORT_SYMBOL(bitmap_parselist_user);
 *
 * If for example, just bits 4 through 7 are set in @buf, then @pos
 * values 4 through 7 will get mapped to 0 through 3, respectively,
- * and other @pos values will get mapped to 0.  When @pos value 7
+ * and other @pos values will get mapped to -1.  When @pos value 7
 * gets mapped to (returns) @ord value 3 in this example, that means
 * that bit 7 is the 3rd (starting with 0th) set bit in @buf.
 *
@@ -1046,7 +1048,7 @@ enum {
        REG_OP_RELEASE,         /* clear all bits in region */
 };
-static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
 {
        int nbits_reg;          /* number of bits in region */
        int index;              /* index first long of region in bitmap */
@@ -1112,11 +1114,11 @@ done:
 * Return the bit offset in bitmap of the allocated region,
 * or -errno on failure.
 */
-int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
 {
-        int pos, end;           /* scans bitmap by regions of size order */
+        unsigned int pos, end;          /* scans bitmap by regions of size order */
-        for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+        for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
                if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
                        continue;
                __reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1137,7 +1139,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
 *
 * No return value.
 */
-void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
 {
        __reg_op(bitmap, pos, order, REG_OP_RELEASE);
 }
@@ -1154,12 +1156,11 @@ EXPORT_SYMBOL(bitmap_release_region);
 * Return 0 on success, or %-EBUSY if specified region wasn't
 * free (not all bits were zero).
 */
-int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 {
        if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
                return -EBUSY;
-        __reg_op(bitmap, pos, order, REG_OP_ALLOC);
+        return __reg_op(bitmap, pos, order, REG_OP_ALLOC);
-        return 0;
 }
 EXPORT_SYMBOL(bitmap_allocate_region);
diff --git a/lib/cmdline.c b/lib/cmdline.c
index d4932f745e92..76a712e6e20e 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -121,11 +121,7 @@ EXPORT_SYMBOL(get_options);
 *      @retptr: (output) Optional pointer to next char after parse completes
 *
 *      Parses a string into a number.  The number stored at @ptr is
- *      potentially suffixed with %K (for kilobytes, or 1024 bytes),
+ *      potentially suffixed with K, M, G, T, P, E.
- *      %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- *      1073741824).  If the number is suffixed with K, M, or G, then
- *      the return value is the number multiplied by one kilobyte, one
- *      megabyte, or one gigabyte, respectively.
 */
 unsigned long long memparse(const char *ptr, char **retptr)
@@ -135,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr)
        unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
        switch (*endptr) {
+        case 'E':
+        case 'e':
+                ret <<= 10;
+        case 'P':
+        case 'p':
+                ret <<= 10;
+        case 'T':
+        case 't':
+                ret <<= 10;
        case 'G':
        case 'g':
                ret <<= 10;
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644
index 000000000000..500fc80d23e1
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,287 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well.  In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match.  The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns.  Thus, it
+ * does not preprocess the patterns.  It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+        /*
+         * Backtrack to previous * on mismatch and retry starting one
+         * character later in the string.  Because * matches all characters
+         * (no exception for /), it can be easily proved that there's
+         * never a need to backtrack multiple levels.
+         */
+        char const *back_pat = NULL, *back_str = back_str;
+        /*
+         * Loop over each token (character or class) in pat, matching
+         * it against the remaining unmatched tail of str.  Return false
+         * on mismatch, or true after matching the trailing nul bytes.
+         */
+        for (;;) {
+                unsigned char c = *str++;
+                unsigned char d = *pat++;
+                switch (d) {
+                case '?':       /* Wildcard: anything but nul */
+                        if (c == '\0')
+                                return false;
+                        break;
+                case '*':       /* Any-length wildcard */
+                        if (*pat == '\0')       /* Optimize trailing * case */
+                                return true;
+                        back_pat = pat;
+                        back_str = --str;       /* Allow zero-length match */
+                        break;
+                case '[': {     /* Character class */
+                        bool match = false, inverted = (*pat == '!');
+                        char const *class = pat + inverted;
+                        unsigned char a = *class++;
+                        /*
+                         * Iterate over each span in the character class.
+                         * A span is either a single character a, or a
+                         * range a-b.  The first span may begin with ']'.
+                         */
+                        do {
+                                unsigned char b = a;
+                                if (a == '\0')  /* Malformed */
+                                        goto literal;
+                                if (class[0] == '-' && class[1] != ']') {
+                                        b = class[1];
+                                        if (b == '\0')
+                                                goto literal;
+                                        class += 2;
+                                        /* Any special action if a > b? */
+                                }
+                                match |= (a <= c && c <= b);
+                        } while ((a = *class++) != ']');
+                        if (match == inverted)
+                                goto backtrack;
+                        pat = class;
+                        }
+                        break;
+                case '\\':
+                        d = *pat++;
+                        /*FALLTHROUGH*/
+                default:        /* Literal character */
+literal:
+                        if (c == d) {
+                                if (d == '\0')
+                                        return true;
+                                break;
+                        }
+backtrack:
+                        if (c == '\0' || !back_pat)
+                                return false;   /* No point continuing */
+                        /* Try again from last *, one character later in str. */
+                        pat = back_pat;
+                        str = ++back_str;
+                        break;
+                }
+        }
+}
+EXPORT_SYMBOL(glob_match);
+#ifdef CONFIG_GLOB_SELFTEST
+#include <linux/printk.h>
+#include <linux/moduleparam.h>
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+struct glob_test {
+        char const *pat, *str;
+        bool expected;
+};
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+        bool match = glob_match(pat, str);
+        bool success = match == expected;
+        /* Can't get string literals into a particular section, so... */
+        static char const msg_error[] __initconst =
+                KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+        static char const msg_ok[] __initconst =
+                KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+        static char const mismatch[] __initconst = "mismatch";
+        char const *message;
+        if (!success)
+                message = msg_error;
+        else if (verbose)
+                message = msg_ok;
+        else
+                return success;
+        printk(message, pat, str, mismatch + 3*match);
+        return success;
+}
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section.  The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+        /* Some basic tests */
+        "1" "a\0" "a\0"
+        "0" "a\0" "b\0"
+        "0" "a\0" "aa\0"
+        "0" "a\0" "\0"
+        "1" "\0" "\0"
+        "0" "\0" "a\0"
+        /* Simple character class tests */
+        "1" "[a]\0" "a\0"
+        "0" "[a]\0" "b\0"
+        "0" "[!a]\0" "a\0"
+        "1" "[!a]\0" "b\0"
+        "1" "[ab]\0" "a\0"
+        "1" "[ab]\0" "b\0"
+        "0" "[ab]\0" "c\0"
+        "1" "[!ab]\0" "c\0"
+        "1" "[a-c]\0" "b\0"
+        "0" "[a-c]\0" "d\0"
+        /* Corner cases in character class parsing */
+        "1" "[a-c-e-g]\0" "-\0"
+        "0" "[a-c-e-g]\0" "d\0"
+        "1" "[a-c-e-g]\0" "f\0"
+        "1" "[]a-ceg-ik[]\0" "a\0"
+        "1" "[]a-ceg-ik[]\0" "]\0"
+        "1" "[]a-ceg-ik[]\0" "[\0"
+        "1" "[]a-ceg-ik[]\0" "h\0"
+        "0" "[]a-ceg-ik[]\0" "f\0"
+        "0" "[!]a-ceg-ik[]\0" "h\0"
+        "0" "[!]a-ceg-ik[]\0" "]\0"
+        "1" "[!]a-ceg-ik[]\0" "f\0"
+        /* Simple wild cards */
+        "1" "?\0" "a\0"
+        "0" "?\0" "aa\0"
+        "0" "??\0" "a\0"
+        "1" "?x?\0" "axb\0"
+        "0" "?x?\0" "abx\0"
+        "0" "?x?\0" "xab\0"
+        /* Asterisk wild cards (backtracking) */
+        "0" "*??\0" "a\0"
+        "1" "*??\0" "ab\0"
+        "1" "*??\0" "abc\0"
+        "1" "*??\0" "abcd\0"
+        "0" "??*\0" "a\0"
+        "1" "??*\0" "ab\0"
+        "1" "??*\0" "abc\0"
+        "1" "??*\0" "abcd\0"
+        "0" "?*?\0" "a\0"
+        "1" "?*?\0" "ab\0"
+        "1" "?*?\0" "abc\0"
+        "1" "?*?\0" "abcd\0"
+        "1" "*b\0" "b\0"
+        "1" "*b\0" "ab\0"
+        "0" "*b\0" "ba\0"
+        "1" "*b\0" "bb\0"
+        "1" "*b\0" "abb\0"
+        "1" "*b\0" "bab\0"
+        "1" "*bc\0" "abbc\0"
+        "1" "*bc\0" "bc\0"
+        "1" "*bc\0" "bbc\0"
+        "1" "*bc\0" "bcbc\0"
+        /* Multiple asterisks (complex backtracking) */
+        "1" "*ac*\0" "abacadaeafag\0"
+        "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+        "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+        "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+        "1" "*abcd*\0" "abcabcabcabcdefg\0"
+        "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+        "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+        "0" "*abcd*\0" "abcabcabcabcefg\0"
+        "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+static int __init glob_init(void)
+{
+        unsigned successes = 0;
+        unsigned n = 0;
+        char const *p = glob_tests;
+        static char const message[] __initconst =
+                KERN_INFO "glob: %u self-tests passed, %u failed\n";
+        /*
+         * Tests are jammed together in a string.  The first byte is '1'
+         * or '0' to indicate the expected outcome, or '\0' to indicate the
+         * end of the tests.  Then come two null-terminated strings: the
+         * pattern and the string to match it against.
+         */
+        while (*p) {
+                bool expected = *p++ & 1;
+                char const *pat = p;
+                p += strlen(p) + 1;
+                successes += test(pat, p, expected);
+                p += strlen(p) + 1;
+                n++;
+        }
+        n -= successes;
+        printk(message, successes, n);
+        /* What's the errno for "kernel bug detected"?  Guess... */
+        return n ? -ECANCELED : 0;
+}
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+module_init(glob_init);
+module_exit(glob_fini);
+#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/klist.c b/lib/klist.c
index 358a368a2947..89b485a2a58d 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
 EXPORT_SYMBOL_GPL(klist_add_tail);
 /**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
 * @n: node we're adding.
 * @pos: node to put @n after
 */
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
 {
        struct klist *k = knode_klist(pos);
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
        list_add(&n->n_node, &pos->n_node);
        spin_unlock(&k->k_lock);
 }
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
 /**
 * klist_add_before - Init a klist_node and add it before an existing node
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 1183fa70a44d..12bcba1c8612 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,3 +1,6 @@
+#define pr_fmt(fmt) "list_sort_test: " fmt
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/list_sort.h>
@@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv,
                                struct list_head *a, struct list_head *b)
 {
        struct list_head *tail = head;
+        u8 count = 0;
        while (a && b) {
                /* if equal, take 'a' -- important for sort stability */
@@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv,
                 * element comparison is needed, so the client's cmp()
                 * routine can invoke cond_resched() periodically.
                 */
-                (*cmp)(priv, tail->next, tail->next);
+                if (unlikely(!(++count)))
+                        (*cmp)(priv, tail->next, tail->next);
                tail->next->prev = tail;
                tail = tail->next;
@@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head,
                }
                if (lev > max_lev) {
                        if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
-                                printk_once(KERN_DEBUG "list passed to"
+                                printk_once(KERN_DEBUG "list too long for efficiency\n");
-                                        " list_sort() too long for"
-                                        " efficiency\n");
                                lev--;
                        }
                        max_lev = lev;
@@ -168,27 +171,25 @@ static struct debug_el **elts __initdata;
 static int __init check(struct debug_el *ela, struct debug_el *elb)
 {
        if (ela->serial >= TEST_LIST_LEN) {
-                printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
+                pr_err("error: incorrect serial %d\n", ela->serial);
-                                ela->serial);
                return -EINVAL;
        }
        if (elb->serial >= TEST_LIST_LEN) {
-                printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
+                pr_err("error: incorrect serial %d\n", elb->serial);
-                                elb->serial);
                return -EINVAL;
        }
        if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
-                printk(KERN_ERR "list_sort_test: error: phantom element\n");
+                pr_err("error: phantom element\n");
                return -EINVAL;
        }
        if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
-                printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
+                pr_err("error: bad poison: %#x/%#x\n",
-                                ela->poison1, ela->poison2);
+                        ela->poison1, ela->poison2);
                return -EINVAL;
        }
        if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
-                printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
+                pr_err("error: bad poison: %#x/%#x\n",
-                                elb->poison1, elb->poison2);
+                        elb->poison1, elb->poison2);
                return -EINVAL;
        }
        return 0;
@@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
 static int __init list_sort_test(void)
 {
-        int i, count = 1, err = -EINVAL;
+        int i, count = 1, err = -ENOMEM;
        struct debug_el *el;
-        struct list_head *cur, *tmp;
+        struct list_head *cur;
        LIST_HEAD(head);
-        printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+        pr_debug("start testing list_sort()\n");
-        elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+        elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL);
        if (!elts) {
-                printk(KERN_ERR "list_sort_test: error: cannot allocate "
+                pr_err("error: cannot allocate memory\n");
-                                "memory\n");
+                return err;
-                goto exit;
        }
        for (i = 0; i < TEST_LIST_LEN; i++) {
                el = kmalloc(sizeof(*el), GFP_KERNEL);
                if (!el) {
-                        printk(KERN_ERR "list_sort_test: error: cannot "
+                        pr_err("error: cannot allocate memory\n");
-                                        "allocate memory\n");
                        goto exit;
                }
                 /* force some equivalencies */
@@ -239,52 +238,52 @@ static int __init list_sort_test(void)
        list_sort(NULL, &head, cmp);
+        err = -EINVAL;
        for (cur = head.next; cur->next != &head; cur = cur->next) {
                struct debug_el *el1;
                int cmp_result;
                if (cur->next->prev != cur) {
-                        printk(KERN_ERR "list_sort_test: error: list is "
+                        pr_err("error: list is corrupted\n");
-                                        "corrupted\n");
                        goto exit;
                }
                cmp_result = cmp(NULL, cur, cur->next);
                if (cmp_result > 0) {
-                        printk(KERN_ERR "list_sort_test: error: list is not "
+                        pr_err("error: list is not sorted\n");
-                                        "sorted\n");
                        goto exit;
                }
                el = container_of(cur, struct debug_el, list);
                el1 = container_of(cur->next, struct debug_el, list);
                if (cmp_result == 0 && el->serial >= el1->serial) {
-                        printk(KERN_ERR "list_sort_test: error: order of "
+                        pr_err("error: order of equivalent elements not "
-                                        "equivalent elements not preserved\n");
+                                "preserved\n");
                        goto exit;
                }
                if (check(el, el1)) {
-                        printk(KERN_ERR "list_sort_test: error: element check "
+                        pr_err("error: element check failed\n");
-                                        "failed\n");
                        goto exit;
                }
                count++;
        }
+        if (head.prev != cur) {
+                pr_err("error: list is corrupted\n");
+                goto exit;
+        }
        if (count != TEST_LIST_LEN) {
-                printk(KERN_ERR "list_sort_test: error: bad list length %d",
+                pr_err("error: bad list length %d", count);
-                                count);
                goto exit;
        }
        err = 0;
 exit:
+        for (i = 0; i < TEST_LIST_LEN; i++)
+                kfree(elts[i]);
        kfree(elts);
-        list_for_each_safe(cur, tmp, &head) {
-                list_del(cur);
-                kfree(container_of(cur, struct debug_el, list));
-        }
        return err;
 }
 module_init(list_sort_test);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index ed5c1454dd62..29033f319aea 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -25,12 +25,15 @@
 int string_get_size(u64 size, const enum string_size_units units,
                    char *buf, int len)
 {
-        static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
+        static const char *const units_10[] = {
-                                   "EB", "ZB", "YB", NULL};
+                "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
-        static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
+        };
-                                 "EiB", "ZiB", "YiB", NULL };
+        static const char *const units_2[] = {
-        static const char **units_str[] = {
+                "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
-                [STRING_UNITS_10] =  units_10,
+                NULL
+        };
+        static const char *const *const units_str[] = {
+                [STRING_UNITS_10] = units_10,
                [STRING_UNITS_2] = units_2,
        };
        static const unsigned int divisor[] = {
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index bea3f3fa3f02..4137bca5f8e8 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -3,7 +3,7 @@
 #include <linux/module.h>
 #define for_each_test(i, test)  \
-        for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+        for (i = 0; i < ARRAY_SIZE(test); i++)
 struct test_fail {
        const char *str;
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index d63381e8e333..d20ef458f137 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -250,52 +250,6 @@ int zlib_deflateInit2(
 }
 /* ========================================================================= */
-#if 0
-int zlib_deflateSetDictionary(
-        z_streamp strm,
-        const Byte *dictionary,
-        uInt  dictLength
-)
-{
-    deflate_state *s;
-    uInt length = dictLength;
-    uInt n;
-    IPos hash_head = 0;
-    if (strm == NULL || strm->state == NULL || dictionary == NULL)
-        return Z_STREAM_ERROR;
-    s = (deflate_state *) strm->state;
-    if (s->status != INIT_STATE) return Z_STREAM_ERROR;
-    strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
-    if (length < MIN_MATCH) return Z_OK;
-    if (length > MAX_DIST(s)) {
-        length = MAX_DIST(s);
-#ifndef USE_DICT_HEAD
-        dictionary += dictLength - length; /* use the tail of the dictionary */
-#endif
-    }
-    memcpy((char *)s->window, dictionary, length);
-    s->strstart = length;
-    s->block_start = (long)length;
-    /* Insert all strings in the hash table (except for the last two bytes).
-     * s->lookahead stays null, so s->ins_h will be recomputed at the next
-     * call of fill_window.
-     */
-    s->ins_h = s->window[0];
-    UPDATE_HASH(s, s->ins_h, s->window[1]);
-    for (n = 0; n <= length - MIN_MATCH; n++) {
-        INSERT_STRING(s, n, hash_head);
-    }
-    if (hash_head) hash_head = 0;  /* to make compiler happy */
-    return Z_OK;
-}
-#endif  /*  0  */
-/* ========================================================================= */
 int zlib_deflateReset(
        z_streamp strm
 )
@@ -326,45 +280,6 @@ int zlib_deflateReset(
    return Z_OK;
 }
-/* ========================================================================= */
-#if 0
-int zlib_deflateParams(
-        z_streamp strm,
-        int level,
-        int strategy
-)
-{
-    deflate_state *s;
-    compress_func func;
-    int err = Z_OK;
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    s = (deflate_state *) strm->state;
-    if (level == Z_DEFAULT_COMPRESSION) {
-        level = 6;
-    }
-    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
-        return Z_STREAM_ERROR;
-    }
-    func = configuration_table[s->level].func;
-    if (func != configuration_table[level].func && strm->total_in != 0) {
-        /* Flush the last buffer: */
-        err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
-    }
-    if (s->level != level) {
-        s->level = level;
-        s->max_lazy_match   = configuration_table[level].max_lazy;
-        s->good_match       = configuration_table[level].good_length;
-        s->nice_match       = configuration_table[level].nice_length;
-        s->max_chain_length = configuration_table[level].max_chain;
-    }
-    s->strategy = strategy;
-    return err;
-}
-#endif  /*  0  */
 /* =========================================================================
 * Put a short in the pending buffer. The 16-bit value is put in MSB order.
 * IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
    return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
 }
-/* =========================================================================
- * Copy the source state to the destination state.
- */
-#if 0
-int zlib_deflateCopy (
-        z_streamp dest,
-        z_streamp source
-)
-{
-#ifdef MAXSEG_64K
-    return Z_STREAM_ERROR;
-#else
-    deflate_state *ds;
-    deflate_state *ss;
-    ush *overlay;
-    deflate_workspace *mem;
-    if (source == NULL || dest == NULL || source->state == NULL) {
-        return Z_STREAM_ERROR;
-    }
-    ss = (deflate_state *) source->state;
-    *dest = *source;
-    mem = (deflate_workspace *) dest->workspace;
-    ds = &(mem->deflate_memory);
-    dest->state = (struct internal_state *) ds;
-    *ds = *ss;
-    ds->strm = dest;
-    ds->window = (Byte *) mem->window_memory;
-    ds->prev   = (Pos *)  mem->prev_memory;
-    ds->head   = (Pos *)  mem->head_memory;
-    overlay = (ush *) mem->overlay_memory;
-    ds->pending_buf = (uch *) overlay;
-    memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-    memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
-    memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
-    memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-    ds->l_desc.dyn_tree = ds->dyn_ltree;
-    ds->d_desc.dyn_tree = ds->dyn_dtree;
-    ds->bl_desc.dyn_tree = ds->bl_tree;
-    return Z_OK;
-#endif
-}
-#endif  /*  0  */
 /* ===========================================================================
 * Read a new buffer from the current input stream, update the adler32
 * and total number of bytes read.  All deflate() input goes through
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
index f5ce87b0800e..58a733b10387 100644
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
    return Z_OK;
 }
-#if 0
-int zlib_inflatePrime(z_streamp strm, int bits, int value)
-{
-    struct inflate_state *state;
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
-    value &= (1L << bits) - 1;
-    state->hold += value << state->bits;
-    state->bits += bits;
-    return Z_OK;
-}
-#endif
 int zlib_inflateInit2(z_streamp strm, int windowBits)
 {
    struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
    return Z_OK;
 }
-#if 0
-int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
-        uInt dictLength)
-{
-    struct inflate_state *state;
-    unsigned long id;
-    /* check state */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (state->wrap != 0 && state->mode != DICT)
-        return Z_STREAM_ERROR;
-    /* check for correct dictionary id */
-    if (state->mode == DICT) {
-        id = zlib_adler32(0L, NULL, 0);
-        id = zlib_adler32(id, dictionary, dictLength);
-        if (id != state->check)
-            return Z_DATA_ERROR;
-    }
-    /* copy dictionary to window */
-    zlib_updatewindow(strm, strm->avail_out);
-    if (dictLength > state->wsize) {
-        memcpy(state->window, dictionary + dictLength - state->wsize,
-                state->wsize);
-        state->whave = state->wsize;
-    }
-    else {
-        memcpy(state->window + state->wsize - dictLength, dictionary,
-                dictLength);
-        state->whave = dictLength;
-    }
-    state->havedict = 1;
-    return Z_OK;
-}
-#endif
-#if 0
-/*
-   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
-   or when out of input.  When called, *have is the number of pattern bytes
-   found in order so far, in 0..3.  On return *have is updated to the new
-   state.  If on return *have equals four, then the pattern was found and the
-   return value is how many bytes were read including the last byte of the
-   pattern.  If *have is less than four, then the pattern has not been found
-   yet and the return value is len.  In the latter case, zlib_syncsearch() can be
-   called again with more data and the *have state.  *have is initialized to
-   zero for the first call.
- */
-static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
-        unsigned len)
-{
-    unsigned got;
-    unsigned next;
-    got = *have;
-    next = 0;
-    while (next < len && got < 4) {
-        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
-            got++;
-        else if (buf[next])
-            got = 0;
-        else
-            got = 4 - got;
-        next++;
-    }
-    *have = got;
-    return next;
-}
-#endif
-#if 0
-int zlib_inflateSync(z_streamp strm)
-{
-    unsigned len;               /* number of bytes to look at or looked at */
-    unsigned long in, out;      /* temporary to save total_in and total_out */
-    unsigned char buf[4];       /* to restore bit buffer to byte string */
-    struct inflate_state *state;
-    /* check parameters */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-    /* if first time, start search in bit buffer */
-    if (state->mode != SYNC) {
-        state->mode = SYNC;
-        state->hold <<= state->bits & 7;
-        state->bits -= state->bits & 7;
-        len = 0;
-        while (state->bits >= 8) {
-            buf[len++] = (unsigned char)(state->hold);
-            state->hold >>= 8;
-            state->bits -= 8;
-        }
-        state->have = 0;
-        zlib_syncsearch(&(state->have), buf, len);
-    }
-    /* search available input */
-    len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
-    strm->avail_in -= len;
-    strm->next_in += len;
-    strm->total_in += len;
-    /* return no joy or set up to restart inflate() on a new block */
-    if (state->have != 4) return Z_DATA_ERROR;
-    in = strm->total_in;  out = strm->total_out;
-    zlib_inflateReset(strm);
-    strm->total_in = in;  strm->total_out = out;
-    state->mode = TYPE;
-    return Z_OK;
-}
-#endif
 /*
 * This subroutine adds the data at next_in/avail_in to the output history
 * without performing any output.  The output buffer must be "caught up";
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e9977a9d657..886db2158538 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -508,21 +508,34 @@ config CMA_DEBUG
          processing calls such as dma_alloc_from_contiguous().
          This option does not affect warning and error messages.
-config ZBUD
+config CMA_AREAS
-        tristate
+        int "Maximum count of the CMA areas"
-        default n
+        depends on CMA
+        default 7
        help
-          A special purpose allocator for storing compressed pages.
+          CMA allows to create CMA areas for particular purpose, mainly,
-          It is designed to store up to two compressed pages per physical
+          used as device private area. This parameter sets the maximum
-          page.  While this design limits storage density, it has simple and
+          number of CMA area in the system.
-          deterministic reclaim properties that make it preferable to a higher
-          density approach when reclaim will be used.
+          If unsure, leave the default value "7".
+config MEM_SOFT_DIRTY
+        bool "Track memory changes"
+        depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+        select PROC_PAGE_MONITOR
+        help
+          This option enables memory changes tracking by introducing a
+          soft-dirty bit on pte-s. This bit it set when someone writes
+          into a page just as regular dirty bit, but unlike the latter
+          it can be cleared by hands.
+          See Documentation/vm/soft-dirty.txt for more details.
 config ZSWAP
        bool "Compressed cache for swap pages (EXPERIMENTAL)"
        depends on FRONTSWAP && CRYPTO=y
        select CRYPTO_LZO
-        select ZBUD
+        select ZPOOL
        default n
        help
          A lightweight compressed cache for swap pages.  It takes
@@ -538,17 +551,22 @@ config ZSWAP
          they have not be fully explored on the large set of potential
          configurations and workloads that exist.
-config MEM_SOFT_DIRTY
+config ZPOOL
-        bool "Track memory changes"
+        tristate "Common API for compressed memory storage"
-        depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+        default n
-        select PROC_PAGE_MONITOR
        help
-          This option enables memory changes tracking by introducing a
+          Compressed memory storage API.  This allows using either zbud or
-          soft-dirty bit on pte-s. This bit it set when someone writes
+          zsmalloc.
-          into a page just as regular dirty bit, but unlike the latter
-          it can be cleared by hands.
-          See Documentation/vm/soft-dirty.txt for more details.
+config ZBUD
+        tristate "Low density storage for compressed pages"
+        default n
+        help
+          A special purpose allocator for storing compressed pages.
+          It is designed to store up to two compressed pages per physical
+          page.  While this design limits storage density, it has simple and
+          deterministic reclaim properties that make it preferable to a higher
+          density approach when reclaim will be used.
 config ZSMALLOC
        tristate "Memory allocator for compressed pages"
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..632ae77e6070 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -59,6 +59,8 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
+obj-$(CONFIG_ZPOOL)     += zpool.o
 obj-$(CONFIG_ZBUD)      += zbud.o
 obj-$(CONFIG_ZSMALLOC)  += zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
+obj-$(CONFIG_CMA)       += cma.o
diff --git a/mm/cma.c b/mm/cma.c
new file mode 100644
index 000000000000..c17751c0dcaf
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,335 @@
+/*
+ * Contiguous Memory Allocator
+ *
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Copyright IBM Corporation, 2013
+ * Copyright LG Electronics Inc., 2014
+ * Written by:
+ *      Marek Szyprowski <m.szyprowski@samsung.com>
+ *      Michal Nazarewicz <mina86@mina86.com>
+ *      Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *      Joonsoo Kim <iamjoonsoo.kim@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+#define pr_fmt(fmt) "cma: " fmt
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/cma.h>
+struct cma {
+        unsigned long   base_pfn;
+        unsigned long   count;
+        unsigned long   *bitmap;
+        unsigned int order_per_bit; /* Order of pages represented by one bit */
+        struct mutex    lock;
+};
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+static DEFINE_MUTEX(cma_mutex);
+phys_addr_t cma_get_base(struct cma *cma)
+{
+        return PFN_PHYS(cma->base_pfn);
+}
+unsigned long cma_get_size(struct cma *cma)
+{
+        return cma->count << PAGE_SHIFT;
+}
+static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
+{
+        return (1UL << (align_order >> cma->order_per_bit)) - 1;
+}
+static unsigned long cma_bitmap_maxno(struct cma *cma)
+{
+        return cma->count >> cma->order_per_bit;
+}
+static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
+                                                unsigned long pages)
+{
+        return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
+}
+static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+        unsigned long bitmap_no, bitmap_count;
+        bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
+        bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+        mutex_lock(&cma->lock);
+        bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
+        mutex_unlock(&cma->lock);
+}
+static int __init cma_activate_area(struct cma *cma)
+{
+        int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+        unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+        unsigned i = cma->count >> pageblock_order;
+        struct zone *zone;
+        cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+        if (!cma->bitmap)
+                return -ENOMEM;
+        WARN_ON_ONCE(!pfn_valid(pfn));
+        zone = page_zone(pfn_to_page(pfn));
+        do {
+                unsigned j;
+                base_pfn = pfn;
+                for (j = pageblock_nr_pages; j; --j, pfn++) {
+                        WARN_ON_ONCE(!pfn_valid(pfn));
+                        /*
+                         * alloc_contig_range requires the pfn range
+                         * specified to be in the same zone. Make this
+                         * simple by forcing the entire CMA resv range
+                         * to be in the same zone.
+                         */
+                        if (page_zone(pfn_to_page(pfn)) != zone)
+                                goto err;
+                }
+                init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+        } while (--i);
+        mutex_init(&cma->lock);
+        return 0;
+err:
+        kfree(cma->bitmap);
+        return -EINVAL;
+}
+static int __init cma_init_reserved_areas(void)
+{
+        int i;
+        for (i = 0; i < cma_area_count; i++) {
+                int ret = cma_activate_area(&cma_areas[i]);
+                if (ret)
+                        return ret;
+        }
+        return 0;
+}
+core_initcall(cma_init_reserved_areas);
+/**
+ * cma_declare_contiguous() - reserve custom contiguous area
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @size: Size of the reserved area (in bytes),
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @fixed: hint about where to place the reserved area
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base.  If false,
+ * reserve in range from @base to @limit.
+ */
+int __init cma_declare_contiguous(phys_addr_t base,
+                        phys_addr_t size, phys_addr_t limit,
+                        phys_addr_t alignment, unsigned int order_per_bit,
+                        bool fixed, struct cma **res_cma)
+{
+        struct cma *cma;
+        int ret = 0;
+        pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
+                __func__, (unsigned long)size, (unsigned long)base,
+                (unsigned long)limit, (unsigned long)alignment);
+        if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+                pr_err("Not enough slots for CMA reserved regions!\n");
+                return -ENOSPC;
+        }
+        if (!size)
+                return -EINVAL;
+        if (alignment && !is_power_of_2(alignment))
+                return -EINVAL;
+        /*
+         * Sanitise input arguments.
+         * Pages both ends in CMA area could be merged into adjacent unmovable
+         * migratetype page by page allocator's buddy algorithm. In the case,
+         * you couldn't get a contiguous memory, which is not what we want.
+         */
+        alignment = max(alignment,
+                (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+        base = ALIGN(base, alignment);
+        size = ALIGN(size, alignment);
+        limit &= ~(alignment - 1);
+        /* size should be aligned with order_per_bit */
+        if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+                return -EINVAL;
+        /* Reserve memory */
+        if (base && fixed) {
+                if (memblock_is_region_reserved(base, size) ||
+                    memblock_reserve(base, size) < 0) {
+                        ret = -EBUSY;
+                        goto err;
+                }
+        } else {
+                phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+                                                        limit);
+                if (!addr) {
+                        ret = -ENOMEM;
+                        goto err;
+                } else {
+                        base = addr;
+                }
+        }
+        /*
+         * Each reserved area must be initialised later, when more kernel
+         * subsystems (like slab allocator) are available.
+         */
+        cma = &cma_areas[cma_area_count];
+        cma->base_pfn = PFN_DOWN(base);
+        cma->count = size >> PAGE_SHIFT;
+        cma->order_per_bit = order_per_bit;
+        *res_cma = cma;
+        cma_area_count++;
+        pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+                (unsigned long)base);
+        return 0;
+err:
+        pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+        return ret;
+}
+/**
+ * cma_alloc() - allocate pages from contiguous area
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates part of contiguous memory on specific
+ * contiguous memory area.
+ */
+struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
+{
+        unsigned long mask, pfn, start = 0;
+        unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+        struct page *page = NULL;
+        int ret;
+        if (!cma || !cma->count)
+                return NULL;
+        pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+                 count, align);
+        if (!count)
+                return NULL;
+        mask = cma_bitmap_aligned_mask(cma, align);
+        bitmap_maxno = cma_bitmap_maxno(cma);
+        bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+        for (;;) {
+                mutex_lock(&cma->lock);
+                bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
+                                bitmap_maxno, start, bitmap_count, mask);
+                if (bitmap_no >= bitmap_maxno) {
+                        mutex_unlock(&cma->lock);
+                        break;
+                }
+                bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
+                /*
+                 * It's safe to drop the lock here. We've marked this region for
+                 * our exclusive use. If the migration fails we will take the
+                 * lock again and unmark it.
+                 */
+                mutex_unlock(&cma->lock);
+                pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+                mutex_lock(&cma_mutex);
+                ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+                mutex_unlock(&cma_mutex);
+                if (ret == 0) {
+                        page = pfn_to_page(pfn);
+                        break;
+                }
+                cma_clear_bitmap(cma, pfn, count);
+                if (ret != -EBUSY)
+                        break;
+                pr_debug("%s(): memory range at %p is busy, retrying\n",
+                         __func__, pfn_to_page(pfn));
+                /* try again with a bit different memory target */
+                start = bitmap_no + mask + 1;
+        }
+        pr_debug("%s(): returned %p\n", __func__, page);
+        return page;
+}
+/**
+ * cma_release() - release allocated pages
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_release(struct cma *cma, struct page *pages, int count)
+{
+        unsigned long pfn;
+        if (!cma || !pages)
+                return false;
+        pr_debug("%s(page %p)\n", __func__, (void *)pages);
+        pfn = page_to_pfn(pages);
+        if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+                return false;
+        VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+        free_contig_range(pfn, count);
+        cma_clear_bitmap(cma, pfn, count);
+        return true;
+}
diff --git a/mm/filemap.c b/mm/filemap.c
index 65d44fd88c78..af19a6b079f5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
+/*
+ * Return values:
+ * 1 - page is locked; mmap_sem is still held.
+ * 0 - page is not locked.
+ *     mmap_sem has been released (up_read()), unless flags had both
+ *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
+ *     which case mmap_sem is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
+ * with the page locked and the mmap_sem unperturbed.
+ */
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                         unsigned int flags)
 {
@@ -1091,9 +1102,9 @@ no_page:
                if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
                        fgp_flags |= FGP_LOCK;
-                /* Init accessed so avoit atomic mark_page_accessed later */
+                /* Init accessed so avoid atomic mark_page_accessed later */
                if (fgp_flags & FGP_ACCESSED)
-                        init_page_accessed(page);
+                        __SetPageReferenced(page);
                err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
                if (unlikely(err)) {
@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
 * The goto's are kind of ugly, but this streamlines the normal case of having
 * it in the page cache, and handles the special cases reasonably without
 * having a lot of duplicated code.
+ *
+ * vma->vm_mm->mmap_sem must be held on entry.
+ *
+ * If our return value has VM_FAULT_RETRY set, it's because
+ * lock_page_or_retry() returned 0.
+ * The mmap_sem has usually been released in this case.
+ * See __lock_page_or_retry() for the exception.
+ *
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * has not been released.
+ *
+ * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
 */
 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
diff --git a/mm/gup.c b/mm/gup.c
index cc5a9e7adea7..91d044b1600d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
        return ret;
 }
+/*
+ * mmap_sem must be held on entry.  If @nonblocking != NULL and
+ * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
+ * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
+ */
 static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
                unsigned long address, unsigned int *flags, int *nonblocking)
 {
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 * with a put_page() call when it is finished with. vmas will only
 * remain valid while mmap_sem is held.
 *
- * Must be called with mmap_sem held for read or write.
+ * Must be called with mmap_sem held.  It may be released.  See below.
 *
 * __get_user_pages walks a process's page tables and takes a reference to
 * each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 *
 * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
 * or mmap_sem contention, and if waiting is needed to pin all pages,
- * *@nonblocking will be set to 0.
+ * *@nonblocking will be set to 0.  Further, if @gup_flags does not
+ * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
+ * this case.
+ *
+ * A caller using such a combination of @nonblocking and @gup_flags
+ * must therefore hold the mmap_sem for reading only, and recognize
+ * when it's been released.  Otherwise, it must be held for either
+ * reading or writing and will not be released.
 *
 * In most cases, get_user_pages or get_user_pages_fast should be used
 * instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
 * such architectures, gup() will not be enough to make a subsequent access
 * succeed.
 *
- * This should be called with the mm_sem held for read.
+ * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
 */
 int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                     unsigned long address, unsigned int fault_flags)
diff --git a/mm/highmem.c b/mm/highmem.c
index b32b70cdaed6..123bcd3ed4f2 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -44,6 +44,66 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
 */
 #ifdef CONFIG_HIGHMEM
+/*
+ * Architecture with aliasing data cache may define the following family of
+ * helper functions in its asm/highmem.h to control cache color of virtual
+ * addresses where physical memory pages are mapped by kmap.
+ */
+#ifndef get_pkmap_color
+/*
+ * Determine color of virtual address where the page should be mapped.
+ */
+static inline unsigned int get_pkmap_color(struct page *page)
+{
+        return 0;
+}
+#define get_pkmap_color get_pkmap_color
+/*
+ * Get next index for mapping inside PKMAP region for page with given color.
+ */
+static inline unsigned int get_next_pkmap_nr(unsigned int color)
+{
+        static unsigned int last_pkmap_nr;
+        last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
+        return last_pkmap_nr;
+}
+/*
+ * Determine if page index inside PKMAP region (pkmap_nr) of given color
+ * has wrapped around PKMAP region end. When this happens an attempt to
+ * flush all unused PKMAP slots is made.
+ */
+static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color)
+{
+        return pkmap_nr == 0;
+}
+/*
+ * Get the number of PKMAP entries of the given color. If no free slot is
+ * found after checking that many entries, kmap will sleep waiting for
+ * someone to call kunmap and free PKMAP slot.
+ */
+static inline int get_pkmap_entries_count(unsigned int color)
+{
+        return LAST_PKMAP;
+}
+/*
+ * Get head of a wait queue for PKMAP entries of the given color.
+ * Wait queues for different mapping colors should be independent to avoid
+ * unnecessary wakeups caused by freeing of slots of other colors.
+ */
+static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
+{
+        static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
+        return &pkmap_map_wait;
+}
+#endif
 unsigned long totalhigh_pages __read_mostly;
 EXPORT_SYMBOL(totalhigh_pages);
@@ -68,13 +128,10 @@ unsigned int nr_free_highpages (void)
 }
 static int pkmap_count[LAST_PKMAP];
-static unsigned int last_pkmap_nr;
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
 pte_t * pkmap_page_table;
-static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
 /*
 * Most architectures have no use for kmap_high_get(), so let's abstract
 * the disabling of IRQ out of the locking in that case to save on a
@@ -161,15 +218,17 @@ static inline unsigned long map_new_virtual(struct page *page)
 {
        unsigned long vaddr;
        int count;
+        unsigned int last_pkmap_nr;
+        unsigned int color = get_pkmap_color(page);
 start:
-        count = LAST_PKMAP;
+        count = get_pkmap_entries_count(color);
        /* Find an empty entry */
        for (;;) {
-                last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
+                last_pkmap_nr = get_next_pkmap_nr(color);
-                if (!last_pkmap_nr) {
+                if (no_more_pkmaps(last_pkmap_nr, color)) {
                        flush_all_zero_pkmaps();
-                        count = LAST_PKMAP;
+                        count = get_pkmap_entries_count(color);
                }
                if (!pkmap_count[last_pkmap_nr])
                        break;  /* Found a usable entry */
@@ -181,12 +240,14 @@ start:
                 */
                {
                        DECLARE_WAITQUEUE(wait, current);
+                        wait_queue_head_t *pkmap_map_wait =
+                                get_pkmap_wait_queue_head(color);
                        __set_current_state(TASK_UNINTERRUPTIBLE);
-                        add_wait_queue(&pkmap_map_wait, &wait);
+                        add_wait_queue(pkmap_map_wait, &wait);
                        unlock_kmap();
                        schedule();
-                        remove_wait_queue(&pkmap_map_wait, &wait);
+                        remove_wait_queue(pkmap_map_wait, &wait);
                        lock_kmap();
                        /* Somebody else might have mapped it while we slept */
@@ -274,6 +335,8 @@ void kunmap_high(struct page *page)
        unsigned long nr;
        unsigned long flags;
        int need_wakeup;
+        unsigned int color = get_pkmap_color(page);
+        wait_queue_head_t *pkmap_map_wait;
        lock_kmap_any(flags);
        vaddr = (unsigned long)page_address(page);
@@ -299,13 +362,14 @@ void kunmap_high(struct page *page)
                 * no need for the wait-queue-head's lock.  Simply
                 * test if the queue is empty.
                 */
-                need_wakeup = waitqueue_active(&pkmap_map_wait);
+                pkmap_map_wait = get_pkmap_wait_queue_head(color);
+                need_wakeup = waitqueue_active(pkmap_map_wait);
        }
        unlock_kmap_any(flags);
        /* do wake-up, if needed, race-free outside of the spin lock */
        if (need_wakeup)
-                wake_up(&pkmap_map_wait);
+                wake_up(pkmap_map_wait);
 }
 EXPORT_SYMBOL(kunmap_high);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 33514d88fef9..3630d577e987 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -827,7 +827,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
        }
-        if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
+        if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) {
                put_page(page);
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -1132,7 +1132,7 @@ alloc:
                goto out;
        }
-        if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
+        if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) {
                put_page(new_page);
                if (page) {
                        split_huge_page(page);
@@ -1681,7 +1681,7 @@ static void __split_huge_page_refcount(struct page *page,
                           &page_tail->_count);
                /* after clearing PageTail the gup refcount can be released */
-                smp_mb();
+                smp_mb__after_atomic();
                /*
                 * retain hwpoison flag of the poisoned tail page:
@@ -1775,6 +1775,8 @@ static int __split_huge_page_map(struct page *page,
        if (pmd) {
                pgtable = pgtable_trans_huge_withdraw(mm, pmd);
                pmd_populate(mm, &_pmd, pgtable);
+                if (pmd_write(*pmd))
+                        BUG_ON(page_mapcount(page) != 1);
                haddr = address;
                for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1784,8 +1786,6 @@ static int __split_huge_page_map(struct page *page,
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                        if (!pmd_write(*pmd))
                                entry = pte_wrprotect(entry);
-                        else
-                                BUG_ON(page_mapcount(page) != 1);
                        if (!pmd_young(*pmd))
                                entry = pte_mkold(entry);
                        if (pmd_numa(*pmd))
@@ -2233,6 +2233,30 @@ static void khugepaged_alloc_sleep(void)
 static int khugepaged_node_load[MAX_NUMNODES];
+static bool khugepaged_scan_abort(int nid)
+{
+        int i;
+        /*
+         * If zone_reclaim_mode is disabled, then no extra effort is made to
+         * allocate memory locally.
+         */
+        if (!zone_reclaim_mode)
+                return false;
+        /* If there is a count for this node already, it must be acceptable */
+        if (khugepaged_node_load[nid])
+                return false;
+        for (i = 0; i < MAX_NUMNODES; i++) {
+                if (!khugepaged_node_load[i])
+                        continue;
+                if (node_distance(nid, i) > RECLAIM_DISTANCE)
+                        return true;
+        }
+        return false;
+}
 #ifdef CONFIG_NUMA
 static int khugepaged_find_target_node(void)
 {
@@ -2399,7 +2423,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        if (!new_page)
                return;
-        if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)))
+        if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE)))
                return;
        /*
@@ -2545,6 +2569,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                 * hit record.
                 */
                node = page_to_nid(page);
+                if (khugepaged_scan_abort(node))
+                        goto out_unmap;
                khugepaged_node_load[node]++;
                VM_BUG_ON_PAGE(PageCompound(page), page);
                if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7a0a73d2fcff..eeceeeb09019 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,6 @@
 #include <linux/node.h>
 #include "internal.h"
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 unsigned long hugepages_treat_as_movable;
 int hugetlb_max_hstate __read_mostly;
@@ -1089,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
        unsigned long pfn;
        struct hstate *h;
+        if (!hugepages_supported())
+                return;
        /* Set scan step to minimum hugepage size */
        for_each_hstate(h)
                if (order > huge_page_order(h))
@@ -1734,21 +1736,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
        return sprintf(buf, "%lu\n", nr_huge_pages);
 }
-static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
-                        struct kobject *kobj, struct kobj_attribute *attr,
+                                           struct hstate *h, int nid,
-                        const char *buf, size_t len)
+                                           unsigned long count, size_t len)
 {
        int err;
-        int nid;
-        unsigned long count;
-        struct hstate *h;
        NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
-        err = kstrtoul(buf, 10, &count);
-        if (err)
-                goto out;
-        h = kobj_to_hstate(kobj, &nid);
        if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
                err = -EINVAL;
                goto out;
@@ -1784,6 +1778,23 @@ out:
        return err;
 }
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+                                         struct kobject *kobj, const char *buf,
+                                         size_t len)
+{
+        struct hstate *h;
+        unsigned long count;
+        int nid;
+        int err;
+        err = kstrtoul(buf, 10, &count);
+        if (err)
+                return err;
+        h = kobj_to_hstate(kobj, &nid);
+        return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len);
+}
 static ssize_t nr_hugepages_show(struct kobject *kobj,
                                       struct kobj_attribute *attr, char *buf)
 {
@@ -1793,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj,
 static ssize_t nr_hugepages_store(struct kobject *kobj,
               struct kobj_attribute *attr, const char *buf, size_t len)
 {
-        return nr_hugepages_store_common(false, kobj, attr, buf, len);
+        return nr_hugepages_store_common(false, kobj, buf, len);
 }
 HSTATE_ATTR(nr_hugepages);
@@ -1812,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
 static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
               struct kobj_attribute *attr, const char *buf, size_t len)
 {
-        return nr_hugepages_store_common(true, kobj, attr, buf, len);
+        return nr_hugepages_store_common(true, kobj, buf, len);
 }
 HSTATE_ATTR(nr_hugepages_mempolicy);
 #endif
@@ -2248,36 +2259,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
                         void __user *buffer, size_t *length, loff_t *ppos)
 {
        struct hstate *h = &default_hstate;
-        unsigned long tmp;
+        unsigned long tmp = h->max_huge_pages;
        int ret;
        if (!hugepages_supported())
                return -ENOTSUPP;
-        tmp = h->max_huge_pages;
-        if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
-                return -EINVAL;
        table->data = &tmp;
        table->maxlen = sizeof(unsigned long);
        ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
        if (ret)
                goto out;
-        if (write) {
+        if (write)
-                NODEMASK_ALLOC(nodemask_t, nodes_allowed,
+                ret = __nr_hugepages_store_common(obey_mempolicy, h,
-                                                GFP_KERNEL | __GFP_NORETRY);
+                                                  NUMA_NO_NODE, tmp, *length);
-                if (!(obey_mempolicy &&
-                               init_nodemask_of_mempolicy(nodes_allowed))) {
-                        NODEMASK_FREE(nodes_allowed);
-                        nodes_allowed = &node_states[N_MEMORY];
-                }
-                h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
-                if (nodes_allowed != &node_states[N_MEMORY])
-                        NODEMASK_FREE(nodes_allowed);
-        }
 out:
        return ret;
 }
@@ -2754,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 * from other VMAs and let the children be SIGKILLed if they are faulting the
 * same region.
 */
-static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
-                                struct page *page, unsigned long address)
+                              struct page *page, unsigned long address)
 {
        struct hstate *h = hstate_vma(vma);
        struct vm_area_struct *iter_vma;
@@ -2794,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                                             address + huge_page_size(h), page);
        }
        mutex_unlock(&mapping->i_mmap_mutex);
-        return 1;
 }
 /*
@@ -2810,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct hstate *h = hstate_vma(vma);
        struct page *old_page, *new_page;
-        int outside_reserve = 0;
+        int ret = 0, outside_reserve = 0;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
@@ -2840,14 +2834,14 @@ retry_avoidcopy:
        page_cache_get(old_page);
-        /* Drop page table lock as buddy allocator may be called */
+        /*
+         * Drop page table lock as buddy allocator may be called. It will
+         * be acquired again before returning to the caller, as expected.
+         */
        spin_unlock(ptl);
        new_page = alloc_huge_page(vma, address, outside_reserve);
        if (IS_ERR(new_page)) {
-                long err = PTR_ERR(new_page);
-                page_cache_release(old_page);
                /*
                 * If a process owning a MAP_PRIVATE mapping fails to COW,
                 * it is due to references held by a child and an insufficient
@@ -2856,29 +2850,25 @@ retry_avoidcopy:
                 * may get SIGKILLed if it later faults.
                 */
                if (outside_reserve) {
+                        page_cache_release(old_page);
                        BUG_ON(huge_pte_none(pte));
-                        if (unmap_ref_private(mm, vma, old_page, address)) {
+                        unmap_ref_private(mm, vma, old_page, address);
-                                BUG_ON(huge_pte_none(pte));
+                        BUG_ON(huge_pte_none(pte));
-                                spin_lock(ptl);
+                        spin_lock(ptl);
-                                ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+                        ptep = huge_pte_offset(mm, address & huge_page_mask(h));
-                                if (likely(ptep &&
+                        if (likely(ptep &&
-                                           pte_same(huge_ptep_get(ptep), pte)))
+                                   pte_same(huge_ptep_get(ptep), pte)))
-                                        goto retry_avoidcopy;
+                                goto retry_avoidcopy;
-                                /*
+                        /*
-                                 * race occurs while re-acquiring page table
+                         * race occurs while re-acquiring page table
-                                 * lock, and our job is done.
+                         * lock, and our job is done.
-                                 */
+                         */
-                                return 0;
+                        return 0;
-                        }
-                        WARN_ON_ONCE(1);
                }
-                /* Caller expects lock to be held */
+                ret = (PTR_ERR(new_page) == -ENOMEM) ?
-                spin_lock(ptl);
+                        VM_FAULT_OOM : VM_FAULT_SIGBUS;
-                if (err == -ENOMEM)
+                goto out_release_old;
-                        return VM_FAULT_OOM;
-                else
-                        return VM_FAULT_SIGBUS;
        }
        /*
@@ -2886,11 +2876,8 @@ retry_avoidcopy:
         * anon_vma prepared.
         */
        if (unlikely(anon_vma_prepare(vma))) {
-                page_cache_release(new_page);
+                ret = VM_FAULT_OOM;
-                page_cache_release(old_page);
+                goto out_release_all;
-                /* Caller expects lock to be held */
-                spin_lock(ptl);
-                return VM_FAULT_OOM;
        }
        copy_user_huge_page(new_page, old_page, address, vma,
@@ -2900,6 +2887,7 @@ retry_avoidcopy:
        mmun_start = address & huge_page_mask(h);
        mmun_end = mmun_start + huge_page_size(h);
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        /*
         * Retake the page table lock to check for racing updates
         * before the page tables are altered
@@ -2920,12 +2908,13 @@ retry_avoidcopy:
        }
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out_release_all:
        page_cache_release(new_page);
+out_release_old:
        page_cache_release(old_page);
-        /* Caller expects lock to be held */
+        spin_lock(ptl); /* Caller expects lock to be held */
-        spin_lock(ptl);
+        return ret;
-        return 0;
 }
 /* Return the pagecache page at a given address within a VMA */
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 95487c71cad5..329caf56df22 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -72,8 +72,7 @@ DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
 static void pfn_inject_exit(void)
 {
-        if (hwpoison_dir)
+        debugfs_remove_recursive(hwpoison_dir);
-                debugfs_remove_recursive(hwpoison_dir);
 }
 static int pfn_inject_init(void)
diff --git a/mm/internal.h b/mm/internal.h
index 7f22a11fcc66..a1b651b11c5f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -247,7 +247,7 @@ static inline void mlock_migrate_page(struct page *new, struct page *old) { }
 static inline struct page *mem_map_offset(struct page *base, int offset)
 {
        if (unlikely(offset >= MAX_ORDER_NR_PAGES))
-                return pfn_to_page(page_to_pfn(base) + offset);
+                return nth_page(base, offset);
        return base + offset;
 }
diff --git a/mm/madvise.c b/mm/madvise.c
index a402f8fdc68e..0938b30da4ab 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -292,9 +292,6 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 /*
 * Application wants to free up the pages and associated backing store.
 * This is effectively punching a hole into the middle of a file.
- *
- * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
- * Other filesystems return -ENOSYS.
 */
 static long madvise_remove(struct vm_area_struct *vma,
                                struct vm_area_struct **prev,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f009a14918d2..90dc501eaf3f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2551,55 +2551,72 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
        return NOTIFY_OK;
 }
+/**
-/* See mem_cgroup_try_charge() for details */
+ * mem_cgroup_try_charge - try charging a memcg
-enum {
+ * @memcg: memcg to charge
-        CHARGE_OK,              /* success */
+ * @nr_pages: number of pages to charge
-        CHARGE_RETRY,           /* need to retry but retry is not bad */
+ *
-        CHARGE_NOMEM,           /* we can't do more. return -ENOMEM */
+ * Returns 0 if @memcg was charged successfully, -EINTR if the charge
-        CHARGE_WOULDBLOCK,      /* GFP_WAIT wasn't set and no enough res. */
+ * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
-};
+ */
+static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
-static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+                                 gfp_t gfp_mask,
-                                unsigned int nr_pages, unsigned int min_pages,
+                                 unsigned int nr_pages)
-                                bool invoke_oom)
 {
-        unsigned long csize = nr_pages * PAGE_SIZE;
+        unsigned int batch = max(CHARGE_BATCH, nr_pages);
+        int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
        struct mem_cgroup *mem_over_limit;
        struct res_counter *fail_res;
+        unsigned long nr_reclaimed;
        unsigned long flags = 0;
-        int ret;
+        unsigned long long size;
+        int ret = 0;
-        ret = res_counter_charge(&memcg->res, csize, &fail_res);
+retry:
+        if (consume_stock(memcg, nr_pages))
+                goto done;
-        if (likely(!ret)) {
+        size = batch * PAGE_SIZE;
+        if (!res_counter_charge(&memcg->res, size, &fail_res)) {
                if (!do_swap_account)
-                        return CHARGE_OK;
+                        goto done_restock;
-                ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
+                if (!res_counter_charge(&memcg->memsw, size, &fail_res))
-                if (likely(!ret))
+                        goto done_restock;
-                        return CHARGE_OK;
+                res_counter_uncharge(&memcg->res, size);
-                res_counter_uncharge(&memcg->res, csize);
                mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
                flags |= MEM_CGROUP_RECLAIM_NOSWAP;
        } else
                mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+        if (batch > nr_pages) {
+                batch = nr_pages;
+                goto retry;
+        }
        /*
-         * Never reclaim on behalf of optional batching, retry with a
+         * Unlike in global OOM situations, memcg is not in a physical
-         * single page instead.
+         * memory shortage.  Allow dying and OOM-killed tasks to
+         * bypass the last charges so that they can exit quickly and
+         * free their memory.
         */
-        if (nr_pages > min_pages)
+        if (unlikely(test_thread_flag(TIF_MEMDIE) ||
-                return CHARGE_RETRY;
+                     fatal_signal_pending(current) ||
+                     current->flags & PF_EXITING))
+                goto bypass;
+        if (unlikely(task_in_memcg_oom(current)))
+                goto nomem;
        if (!(gfp_mask & __GFP_WAIT))
-                return CHARGE_WOULDBLOCK;
+                goto nomem;
-        if (gfp_mask & __GFP_NORETRY)
+        nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
-                return CHARGE_NOMEM;
-        ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
        if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
-                return CHARGE_RETRY;
+                goto retry;
+        if (gfp_mask & __GFP_NORETRY)
+                goto nomem;
        /*
         * Even though the limit is exceeded at this point, reclaim
         * may have been able to free some pages.  Retry the charge
@@ -2609,96 +2626,38 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
         * unlikely to succeed so close to the limit, and we fall back
         * to regular pages anyway in case of failure.
         */
-        if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
+        if (nr_reclaimed && nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER))
-                return CHARGE_RETRY;
+                goto retry;
        /*
         * At task move, charge accounts can be doubly counted. So, it's
         * better to wait until the end of task_move if something is going on.
         */
        if (mem_cgroup_wait_acct_move(mem_over_limit))
-                return CHARGE_RETRY;
+                goto retry;
-        if (invoke_oom)
-                mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
-        return CHARGE_NOMEM;
-}
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
-                                 gfp_t gfp_mask,
-                                 unsigned int nr_pages,
-                                 bool oom)
-{
-        unsigned int batch = max(CHARGE_BATCH, nr_pages);
-        int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
-        int ret;
-        if (mem_cgroup_is_root(memcg))
-                goto done;
-        /*
-         * Unlike in global OOM situations, memcg is not in a physical
-         * memory shortage.  Allow dying and OOM-killed tasks to
-         * bypass the last charges so that they can exit quickly and
-         * free their memory.
-         */
-        if (unlikely(test_thread_flag(TIF_MEMDIE) ||
-                     fatal_signal_pending(current) ||
-                     current->flags & PF_EXITING))
-                goto bypass;
-        if (unlikely(task_in_memcg_oom(current)))
+        if (nr_retries--)
-                goto nomem;
+                goto retry;
        if (gfp_mask & __GFP_NOFAIL)
-                oom = false;
+                goto bypass;
-again:
-        if (consume_stock(memcg, nr_pages))
-                goto done;
-        do {
-                bool invoke_oom = oom && !nr_oom_retries;
-                /* If killed, bypass charge */
-                if (fatal_signal_pending(current))
-                        goto bypass;
-                ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
+        if (fatal_signal_pending(current))
-                                           nr_pages, invoke_oom);
+                goto bypass;
-                switch (ret) {
-                case CHARGE_OK:
-                        break;
-                case CHARGE_RETRY: /* not in OOM situation but retry */
-                        batch = nr_pages;
-                        goto again;
-                case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
-                        goto nomem;
-                case CHARGE_NOMEM: /* OOM routine works */
-                        if (!oom || invoke_oom)
-                                goto nomem;
-                        nr_oom_retries--;
-                        break;
-                }
-        } while (ret != CHARGE_OK);
-        if (batch > nr_pages)
+        mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages));
-                refill_stock(memcg, batch - nr_pages);
-done:
-        return 0;
 nomem:
        if (!(gfp_mask & __GFP_NOFAIL))
                return -ENOMEM;
 bypass:
-        return -EINTR;
+        memcg = root_mem_cgroup;
+        ret = -EINTR;
+        goto retry;
+done_restock:
+        if (batch > nr_pages)
+                refill_stock(memcg, batch - nr_pages);
+done:
+        return ret;
 }
 /**
@@ -2712,15 +2671,14 @@ bypass:
 */
 static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
                                 gfp_t gfp_mask,
-                                 unsigned int nr_pages,
+                                 unsigned int nr_pages)
-                                 bool oom)
 {
        struct mem_cgroup *memcg;
        int ret;
        memcg = get_mem_cgroup_from_mm(mm);
-        ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
+        ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
        css_put(&memcg->css);
        if (ret == -EINTR)
                memcg = root_mem_cgroup;
@@ -2738,13 +2696,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
 static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
                                       unsigned int nr_pages)
 {
-        if (!mem_cgroup_is_root(memcg)) {
+        unsigned long bytes = nr_pages * PAGE_SIZE;
-                unsigned long bytes = nr_pages * PAGE_SIZE;
-                res_counter_uncharge(&memcg->res, bytes);
+        res_counter_uncharge(&memcg->res, bytes);
-                if (do_swap_account)
+        if (do_swap_account)
-                        res_counter_uncharge(&memcg->memsw, bytes);
+                res_counter_uncharge(&memcg->memsw, bytes);
-        }
 }
 /*
@@ -2756,9 +2712,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
 {
        unsigned long bytes = nr_pages * PAGE_SIZE;
-        if (mem_cgroup_is_root(memcg))
-                return;
        res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
        if (do_swap_account)
                res_counter_uncharge_until(&memcg->memsw,
@@ -2842,14 +2795,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        }
        pc->mem_cgroup = memcg;
-        /*
-         * We access a page_cgroup asynchronously without lock_page_cgroup().
-         * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
-         * is accessed after testing USED bit. To make pc->mem_cgroup visible
-         * before USED bit, we need memory barrier here.
-         * See mem_cgroup_add_lru_list(), etc.
-         */
-        smp_wmb();
        SetPageCgroupUsed(pc);
        if (lrucare) {
@@ -2937,8 +2882,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
        if (ret)
                return ret;
-        ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
+        ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
-                                    oom_gfp_allowed(gfp));
        if (ret == -EINTR)  {
                /*
                 * mem_cgroup_try_charge() chosed to bypass to root due to
@@ -3463,12 +3407,13 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
                memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
                return;
        }
+        /*
+         * The page is freshly allocated and not visible to any
+         * outside callers yet.  Set up pc non-atomically.
+         */
        pc = lookup_page_cgroup(page);
-        lock_page_cgroup(pc);
        pc->mem_cgroup = memcg;
-        SetPageCgroupUsed(pc);
+        pc->flags = PCG_USED;
-        unlock_page_cgroup(pc);
 }
 void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -3478,19 +3423,11 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
        pc = lookup_page_cgroup(page);
-        /*
-         * Fast unlocked return. Theoretically might have changed, have to
-         * check again after locking.
-         */
        if (!PageCgroupUsed(pc))
                return;
-        lock_page_cgroup(pc);
+        memcg = pc->mem_cgroup;
-        if (PageCgroupUsed(pc)) {
+        pc->flags = 0;
-                memcg = pc->mem_cgroup;
-                ClearPageCgroupUsed(pc);
-        }
-        unlock_page_cgroup(pc);
        /*
         * We trust that only if there is a memcg associated with the page, it
@@ -3531,7 +3468,6 @@ void mem_cgroup_split_huge_fixup(struct page *head)
        for (i = 1; i < HPAGE_PMD_NR; i++) {
                pc = head_pc + i;
                pc->mem_cgroup = memcg;
-                smp_wmb();/* see __commit_charge() */
                pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
        }
        __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
@@ -3687,7 +3623,6 @@ int mem_cgroup_charge_anon(struct page *page,
 {
        unsigned int nr_pages = 1;
        struct mem_cgroup *memcg;
-        bool oom = true;
        if (mem_cgroup_disabled())
                return 0;
@@ -3699,14 +3634,9 @@ int mem_cgroup_charge_anon(struct page *page,
        if (PageTransHuge(page)) {
                nr_pages <<= compound_order(page);
                VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-                /*
-                 * Never OOM-kill a process for a huge page.  The
-                 * fault handler will fall back to regular pages.
-                 */
-                oom = false;
        }
-        memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
+        memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
        if (!memcg)
                return -ENOMEM;
        __mem_cgroup_commit_charge(memcg, page, nr_pages,
@@ -3743,7 +3673,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
                memcg = try_get_mem_cgroup_from_page(page);
        if (!memcg)
                memcg = get_mem_cgroup_from_mm(mm);
-        ret = mem_cgroup_try_charge(memcg, mask, 1, true);
+        ret = mem_cgroup_try_charge(memcg, mask, 1);
        css_put(&memcg->css);
        if (ret == -EINTR)
                memcg = root_mem_cgroup;
@@ -3770,7 +3700,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
        if (!PageSwapCache(page)) {
                struct mem_cgroup *memcg;
-                memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+                memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
                if (!memcg)
                        return -ENOMEM;
                *memcgp = memcg;
@@ -3839,7 +3769,7 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
                return 0;
        }
-        memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+        memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
        if (!memcg)
                return -ENOMEM;
        __mem_cgroup_commit_charge(memcg, page, 1, type, false);
@@ -3993,7 +3923,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
         * replacement page, so leave it alone when phasing out the
         * page that is unused after the migration.
         */
-        if (!end_migration && !mem_cgroup_is_root(memcg))
+        if (!end_migration)
                mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
        return memcg;
@@ -4126,8 +4056,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
                 * We uncharge this because swap is freed.  This memcg can
                 * be obsolete one. We avoid calling css_tryget_online().
                 */
-                if (!mem_cgroup_is_root(memcg))
+                res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
-                        res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
                mem_cgroup_swap_statistics(memcg, false);
                css_put(&memcg->css);
        }
@@ -4817,78 +4746,24 @@ out:
        return retval;
 }
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
-                                               enum mem_cgroup_stat_index idx)
-{
-        struct mem_cgroup *iter;
-        long val = 0;
-        /* Per-cpu values can be negative, use a signed accumulator */
-        for_each_mem_cgroup_tree(iter, memcg)
-                val += mem_cgroup_read_stat(iter, idx);
-        if (val < 0) /* race ? */
-                val = 0;
-        return val;
-}
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
-        u64 val;
-        if (!mem_cgroup_is_root(memcg)) {
-                if (!swap)
-                        return res_counter_read_u64(&memcg->res, RES_USAGE);
-                else
-                        return res_counter_read_u64(&memcg->memsw, RES_USAGE);
-        }
-        /*
-         * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
-         * as well as in MEM_CGROUP_STAT_RSS_HUGE.
-         */
-        val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
-        val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-        if (swap)
-                val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-        return val << PAGE_SHIFT;
-}
 static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
-                                   struct cftype *cft)
+                               struct cftype *cft)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-        u64 val;
+        enum res_type type = MEMFILE_TYPE(cft->private);
-        int name;
+        int name = MEMFILE_ATTR(cft->private);
-        enum res_type type;
-        type = MEMFILE_TYPE(cft->private);
-        name = MEMFILE_ATTR(cft->private);
        switch (type) {
        case _MEM:
-                if (name == RES_USAGE)
+                return res_counter_read_u64(&memcg->res, name);
-                        val = mem_cgroup_usage(memcg, false);
-                else
-                        val = res_counter_read_u64(&memcg->res, name);
-                break;
        case _MEMSWAP:
-                if (name == RES_USAGE)
+                return res_counter_read_u64(&memcg->memsw, name);
-                        val = mem_cgroup_usage(memcg, true);
-                else
-                        val = res_counter_read_u64(&memcg->memsw, name);
-                break;
        case _KMEM:
-                val = res_counter_read_u64(&memcg->kmem, name);
+                return res_counter_read_u64(&memcg->kmem, name);
                break;
        default:
                BUG();
        }
-        return val;
 }
 #ifdef CONFIG_MEMCG_KMEM
@@ -5350,7 +5225,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
        if (!t)
                goto unlock;
-        usage = mem_cgroup_usage(memcg, swap);
+        if (!swap)
+                usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+        else
+                usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
        /*
         * current_threshold points to threshold just below or equal to usage.
@@ -5446,15 +5324,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
        mutex_lock(&memcg->thresholds_lock);
-        if (type == _MEM)
+        if (type == _MEM) {
                thresholds = &memcg->thresholds;
-        else if (type == _MEMSWAP)
+                usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+        } else if (type == _MEMSWAP) {
                thresholds = &memcg->memsw_thresholds;
-        else
+                usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+        } else
                BUG();
-        usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
        /* Check if a threshold crossed before adding a new one */
        if (thresholds->primary)
                __mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -5534,18 +5412,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
        int i, j, size;
        mutex_lock(&memcg->thresholds_lock);
-        if (type == _MEM)
+        if (type == _MEM) {
                thresholds = &memcg->thresholds;
-        else if (type == _MEMSWAP)
+                usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+        } else if (type == _MEMSWAP) {
                thresholds = &memcg->memsw_thresholds;
-        else
+                usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+        } else
                BUG();
        if (!thresholds->primary)
                goto unlock;
-        usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
        /* Check if a threshold crossed before removing */
        __mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -6299,9 +6178,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
                 * core guarantees its existence.
                 */
        } else {
-                res_counter_init(&memcg->res, NULL);
+                res_counter_init(&memcg->res, &root_mem_cgroup->res);
-                res_counter_init(&memcg->memsw, NULL);
+                res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
-                res_counter_init(&memcg->kmem, NULL);
+                res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
                /*
                 * Deeper hierachy with use_hierarchy == false doesn't make
                 * much sense so let cgroup subsystem know about this
@@ -6435,55 +6314,39 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 #ifdef CONFIG_MMU
 /* Handlers for move charge at task migration. */
-#define PRECHARGE_COUNT_AT_ONCE 256
 static int mem_cgroup_do_precharge(unsigned long count)
 {
-        int ret = 0;
+        int ret;
-        int batch_count = PRECHARGE_COUNT_AT_ONCE;
-        struct mem_cgroup *memcg = mc.to;
-        if (mem_cgroup_is_root(memcg)) {
+        /* Try a single bulk charge without reclaim first */
+        ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+        if (!ret) {
                mc.precharge += count;
-                /* we don't need css_get for root */
                return ret;
        }
-        /* try to charge at once */
+        if (ret == -EINTR) {
-        if (count > 1) {
+                __mem_cgroup_cancel_charge(root_mem_cgroup, count);
-                struct res_counter *dummy;
-                /*
-                 * "memcg" cannot be under rmdir() because we've already checked
-                 * by cgroup_lock_live_cgroup() that it is not removed and we
-                 * are still under the same cgroup_mutex. So we can postpone
-                 * css_get().
-                 */
-                if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
-                        goto one_by_one;
-                if (do_swap_account && res_counter_charge(&memcg->memsw,
-                                                PAGE_SIZE * count, &dummy)) {
-                        res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
-                        goto one_by_one;
-                }
-                mc.precharge += count;
                return ret;
        }
-one_by_one:
-        /* fall back to one by one charge */
+        /* Try charges one by one with reclaim */
        while (count--) {
-                if (signal_pending(current)) {
+                ret = mem_cgroup_try_charge(mc.to,
-                        ret = -EINTR;
+                                            GFP_KERNEL & ~__GFP_NORETRY, 1);
-                        break;
+                /*
-                }
+                 * In case of failure, any residual charges against
-                if (!batch_count--) {
+                 * mc.to will be dropped by mem_cgroup_clear_mc()
-                        batch_count = PRECHARGE_COUNT_AT_ONCE;
+                 * later on.  However, cancel any charges that are
-                        cond_resched();
+                 * bypassed to root right away or they'll be lost.
-                }
+                 */
-                ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+                if (ret == -EINTR)
+                        __mem_cgroup_cancel_charge(root_mem_cgroup, 1);
                if (ret)
-                        /* mem_cgroup_clear_mc() will do uncharge later */
                        return ret;
                mc.precharge++;
+                cond_resched();
        }
-        return ret;
+        return 0;
 }
 /**
@@ -6760,21 +6623,18 @@ static void __mem_cgroup_clear_mc(void)
        /* we must fixup refcnts and charges */
        if (mc.moved_swap) {
                /* uncharge swap account from the old cgroup */
-                if (!mem_cgroup_is_root(mc.from))
+                res_counter_uncharge(&mc.from->memsw,
-                        res_counter_uncharge(&mc.from->memsw,
+                                     PAGE_SIZE * mc.moved_swap);
-                                                PAGE_SIZE * mc.moved_swap);
                for (i = 0; i < mc.moved_swap; i++)
                        css_put(&mc.from->css);
-                if (!mem_cgroup_is_root(mc.to)) {
+                /*
-                        /*
+                 * we charged both to->res and to->memsw, so we should
-                         * we charged both to->res and to->memsw, so we should
+                 * uncharge to->res.
-                         * uncharge to->res.
+                 */
-                         */
+                res_counter_uncharge(&mc.to->res,
-                        res_counter_uncharge(&mc.to->res,
+                                     PAGE_SIZE * mc.moved_swap);
-                                                PAGE_SIZE * mc.moved_swap);
-                }
                /* we've already done css_get(mc.to) */
                mc.moved_swap = 0;
        }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a013bc94ebbe..44c6bd201d3a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1173,6 +1173,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        lock_page(hpage);
        /*
+         * The page could have changed compound pages during the locking.
+         * If this happens just bail out.
+         */
+        if (compound_head(p) != hpage) {
+                action_result(pfn, "different compound page after locking", IGNORED);
+                res = -EBUSY;
+                goto out;
+        }
+        /*
         * We use page flags to determine what action should be taken, but
         * the flags can be modified by the error containment action.  One
         * example is an mlocked page, where PG_mlocked is cleared by
diff --git a/mm/memory.c b/mm/memory.c
index 8b44f765b645..5c55270729f7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -884,7 +884,7 @@ out_set_pte:
        return 0;
 }
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                   pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
                   unsigned long addr, unsigned long end)
 {
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
 /*
 * We enter with non-exclusive mmap_sem (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
 */
 static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2688,6 +2691,11 @@ oom:
        return VM_FAULT_OOM;
 }
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
 static int __do_fault(struct vm_area_struct *vma, unsigned long address,
                pgoff_t pgoff, unsigned int flags, struct page **page)
 {
@@ -2744,7 +2752,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
        if (write)
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
        else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
-                pte_mksoft_dirty(entry);
+                entry = pte_mksoft_dirty(entry);
        if (anon) {
                inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
                page_add_new_anon_rmap(page, vma, address);
@@ -2758,17 +2766,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
        update_mmu_cache(vma, address, pte);
 }
-static unsigned long fault_around_bytes = rounddown_pow_of_two(65536);
+static unsigned long fault_around_bytes __read_mostly =
+        rounddown_pow_of_two(65536);
-static inline unsigned long fault_around_pages(void)
-{
-        return fault_around_bytes >> PAGE_SHIFT;
-}
-static inline unsigned long fault_around_mask(void)
-{
-        return ~(fault_around_bytes - 1) & PAGE_MASK;
-}
 #ifdef CONFIG_DEBUG_FS
 static int fault_around_bytes_get(void *data, u64 *val)
@@ -2834,12 +2833,15 @@ late_initcall(fault_around_debugfs);
 static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
                pte_t *pte, pgoff_t pgoff, unsigned int flags)
 {
-        unsigned long start_addr;
+        unsigned long start_addr, nr_pages, mask;
        pgoff_t max_pgoff;
        struct vm_fault vmf;
        int off;
-        start_addr = max(address & fault_around_mask(), vma->vm_start);
+        nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT;
+        mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
+        start_addr = max(address & mask, vma->vm_start);
        off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
        pte -= off;
        pgoff -= off;
@@ -2851,7 +2853,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
        max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
                PTRS_PER_PTE - 1;
        max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
-                        pgoff + fault_around_pages() - 1);
+                        pgoff + nr_pages - 1);
        /* Check if it makes any sense to call ->map_pages */
        while (!pte_none(*pte)) {
@@ -2886,7 +2888,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         * something).
         */
        if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
-            fault_around_pages() > 1) {
+            fault_around_bytes >> PAGE_SHIFT > 1) {
                pte = pte_offset_map_lock(mm, pmd, address, &ptl);
                do_fault_around(vma, address, pte, pgoff, flags);
                if (!pte_same(*pte, orig_pte))
@@ -3016,6 +3018,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        return ret;
 }
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
 static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
                unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3048,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 *
 * We enter with non-exclusive mmap_sem (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
 */
 static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3182,10 @@ out:
 *
 * We enter with non-exclusive mmap_sem (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
 */
 static int handle_pte_fault(struct mm_struct *mm,
                     struct vm_area_struct *vma, unsigned long address,
@@ -3181,7 +3194,7 @@ static int handle_pte_fault(struct mm_struct *mm,
        pte_t entry;
        spinlock_t *ptl;
-        entry = *pte;
+        entry = ACCESS_ONCE(*pte);
        if (!pte_present(entry)) {
                if (pte_none(entry)) {
                        if (vma->vm_ops) {
@@ -3232,6 +3245,9 @@ unlock:
 /*
 * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
 */
 static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                             unsigned long address, unsigned int flags)
@@ -3313,6 +3329,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        return handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                    unsigned long address, unsigned int flags)
 {
@@ -3591,11 +3613,13 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                ret = get_user_pages(tsk, mm, addr, 1,
                                write, 1, &page, &vma);
                if (ret <= 0) {
+#ifndef CONFIG_HAVE_IOREMAP_PROT
+                        break;
+#else
                        /*
                         * Check if this is a VM_IO | VM_PFNMAP VMA, which
                         * we can access using slightly different code.
                         */
-#ifdef CONFIG_HAVE_IOREMAP_PROT
                        vma = find_vma(mm, addr);
                        if (!vma || vma->vm_start > addr)
                                break;
@@ -3603,9 +3627,9 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                                ret = vma->vm_ops->access(vma, addr, buf,
                                                          len, write);
                        if (ret <= 0)
-#endif
                                break;
                        bytes = ret;
+#endif
                } else {
                        bytes = len;
                        offset = addr & (PAGE_SIZE-1);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 469bbf505f85..2ff8c2325e96 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -284,8 +284,8 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
 }
 #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
-static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
+static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
-                           unsigned long end_pfn)
+                                     unsigned long end_pfn)
 {
        unsigned long old_zone_end_pfn;
@@ -427,8 +427,8 @@ out_fail:
        return -1;
 }
-static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
+static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
-                            unsigned long end_pfn)
+                                      unsigned long end_pfn)
 {
        unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
        zone = page_zone(pfn_to_page(pfn));
        ret = -EINVAL;
-        if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
+        if ((zone_idx(zone) > ZONE_NORMAL ||
+            online_type == MMOP_ONLINE_MOVABLE) &&
            !can_online_high_movable(zone))
                goto out;
-        if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+        if (online_type == MMOP_ONLINE_KERNEL &&
+            zone_idx(zone) == ZONE_MOVABLE) {
                if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
                        goto out;
        }
-        if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+        if (online_type == MMOP_ONLINE_MOVABLE &&
+            zone_idx(zone) == ZONE_MOVABLE - 1) {
                if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
                        goto out;
        }
@@ -1156,6 +1159,34 @@ static int check_hotplug_memory_range(u64 start, u64 size)
        return 0;
 }
+/*
+ * If movable zone has already been setup, newly added memory should be check.
+ * If its address is higher than movable zone, it should be added as movable.
+ * Without this check, movable zone may overlap with other zone.
+ */
+static int should_add_memory_movable(int nid, u64 start, u64 size)
+{
+        unsigned long start_pfn = start >> PAGE_SHIFT;
+        pg_data_t *pgdat = NODE_DATA(nid);
+        struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
+        if (zone_is_empty(movable_zone))
+                return 0;
+        if (movable_zone->zone_start_pfn <= start_pfn)
+                return 1;
+        return 0;
+}
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+{
+        if (should_add_memory_movable(nid, start, size))
+                return ZONE_MOVABLE;
+        return zone_default;
+}
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
 int __ref add_memory(int nid, u64 start, u64 size)
 {
diff --git a/mm/mlock.c b/mm/mlock.c
index b1eb53634005..ce84cb0b83ef 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -210,12 +210,19 @@ out:
 * @vma:   target vma
 * @start: start address
 * @end:   end address
+ * @nonblocking:
 *
 * This takes care of making the pages present too.
 *
 * return 0 on success, negative error code on error.
 *
- * vma->vm_mm->mmap_sem must be held for at least read.
+ * vma->vm_mm->mmap_sem must be held.
+ *
+ * If @nonblocking is NULL, it may be held for read or write and will
+ * be unperturbed.
+ *
+ * If @nonblocking is non-NULL, it must held for read only and may be
+ * released.  If it's released, *@nonblocking will be set to 0.
 */
 long __mlock_vma_pages_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end, int *nonblocking)
diff --git a/mm/mmap.c b/mm/mmap.c
index 129b847d30cc..64c9d736155c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
+#include <linux/mmdebug.h>
 #include <linux/perf_event.h>
 #include <linux/audit.h>
 #include <linux/khugepaged.h>
@@ -134,6 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 {
        unsigned long free, allowed, reserve;
+        VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+                        -(s64)vm_committed_as_batch * num_online_cpus(),
+                        "memory commitment underflow");
        vm_acct_memory(pages);
        /*
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 41cefdf0aadd..950813b1eb36 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -23,6 +23,25 @@
 static struct srcu_struct srcu;
 /*
+ * This function allows mmu_notifier::release callback to delay a call to
+ * a function that will free appropriate resources. The function must be
+ * quick and must not block.
+ */
+void mmu_notifier_call_srcu(struct rcu_head *rcu,
+                            void (*func)(struct rcu_head *rcu))
+{
+        call_srcu(&srcu, rcu, func);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
+void mmu_notifier_synchronize(void)
+{
+        /* Wait for any running method to finish. */
+        srcu_barrier(&srcu);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
+/*
 * This function can't run concurrently against mmu_notifier_register
 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
@@ -53,7 +72,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
                 */
                if (mn->ops->release)
                        mn->ops->release(mn, mm);
-        srcu_read_unlock(&srcu, id);
        spin_lock(&mm->mmu_notifier_mm->lock);
        while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -69,6 +87,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
                hlist_del_init_rcu(&mn->hlist);
        }
        spin_unlock(&mm->mmu_notifier_mm->lock);
+        srcu_read_unlock(&srcu, id);
        /*
         * synchronize_srcu here prevents mmu_notifier_release from returning to
@@ -325,6 +344,25 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
+/*
+ * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
+ */
+void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+                                        struct mm_struct *mm)
+{
+        spin_lock(&mm->mmu_notifier_mm->lock);
+        /*
+         * Can not use list_del_rcu() since __mmu_notifier_release
+         * can delete it before we hold the lock.
+         */
+        hlist_del_init_rcu(&mn->hlist);
+        spin_unlock(&mm->mmu_notifier_mm->lock);
+        BUG_ON(atomic_read(&mm->mm_count) <= 0);
+        mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
 static int __init mmu_notifier_init(void)
 {
        return init_srcu_struct(&srcu);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3291e82d4352..1e11df8fa7ec 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -258,8 +258,6 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
                unsigned long totalpages, const nodemask_t *nodemask,
                bool force_kill)
 {
-        if (task->exit_state)
-                return OOM_SCAN_CONTINUE;
        if (oom_unkillable_task(task, NULL, nodemask))
                return OOM_SCAN_CONTINUE;
@@ -559,28 +557,25 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 * if a parallel OOM killing is already taking place that includes a zone in
 * the zonelist.  Otherwise, locks all zones in the zonelist and returns 1.
 */
-int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
 {
        struct zoneref *z;
        struct zone *zone;
-        int ret = 1;
+        bool ret = true;
        spin_lock(&zone_scan_lock);
-        for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+        for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
                if (zone_is_oom_locked(zone)) {
-                        ret = 0;
+                        ret = false;
                        goto out;
                }
-        }
-        for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+        /*
-                /*
+         * Lock each zone in the zonelist under zone_scan_lock so a parallel
-                 * Lock each zone in the zonelist under zone_scan_lock so a
+         * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
-                 * parallel invocation of try_set_zonelist_oom() doesn't succeed
+         */
-                 * when it shouldn't.
+        for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
-                 */
                zone_set_flag(zone, ZONE_OOM_LOCKED);
-        }
 out:
        spin_unlock(&zone_scan_lock);
@@ -592,15 +587,14 @@ out:
 * allocation attempts with zonelists containing them may now recall the OOM
 * killer, if necessary.
 */
-void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
 {
        struct zoneref *z;
        struct zone *zone;
        spin_lock(&zone_scan_lock);
-        for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+        for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
                zone_clear_flag(zone, ZONE_OOM_LOCKED);
-        }
        spin_unlock(&zone_scan_lock);
 }
@@ -694,9 +688,9 @@ void pagefault_out_of_memory(void)
        if (mem_cgroup_oom_synchronize(true))
                return;
-        zonelist = node_zonelist(first_online_node, GFP_KERNEL);
+        zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
-        if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
+        if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
                out_of_memory(NULL, 0, 0, NULL, false);
-                clear_zonelist_oom(zonelist, GFP_KERNEL);
+                oom_zonelist_unlock(zonelist, GFP_KERNEL);
        }
 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e0c943014eb7..91d73ef1744d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -261,14 +261,11 @@ static unsigned long global_dirtyable_memory(void)
 */
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
 {
+        const unsigned long available_memory = global_dirtyable_memory();
        unsigned long background;
        unsigned long dirty;
-        unsigned long uninitialized_var(available_memory);
        struct task_struct *tsk;
-        if (!vm_dirty_bytes || !dirty_background_bytes)
-                available_memory = global_dirtyable_memory();
        if (vm_dirty_bytes)
                dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
        else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef44ad736ca1..18cee0d4c8a2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
        int migratetype = 0;
        int batch_free = 0;
        int to_free = count;
+        unsigned long nr_scanned;
        spin_lock(&zone->lock);
-        zone->pages_scanned = 0;
+        nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+        if (nr_scanned)
+                __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
        while (to_free) {
                struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
                                unsigned int order,
                                int migratetype)
 {
+        unsigned long nr_scanned;
        spin_lock(&zone->lock);
-        zone->pages_scanned = 0;
+        nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+        if (nr_scanned)
+                __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
        __free_one_page(page, pfn, zone, order, migratetype);
        if (unlikely(!is_migrate_isolate(migratetype)))
@@ -1257,15 +1263,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 {
        unsigned long flags;
-        int to_drain;
+        int to_drain, batch;
-        unsigned long batch;
        local_irq_save(flags);
        batch = ACCESS_ONCE(pcp->batch);
-        if (pcp->count >= batch)
+        to_drain = min(pcp->count, batch);
-                to_drain = batch;
-        else
-                to_drain = pcp->count;
        if (to_drain > 0) {
                free_pcppages_bulk(zone, to_drain, pcp);
                pcp->count -= to_drain;
@@ -1610,6 +1612,9 @@ again:
        }
        __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+        if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
+            !zone_is_fair_depleted(zone))
+                zone_set_flag(zone, ZONE_FAIR_DEPLETED);
        __count_zone_vm_events(PGALLOC, zone, 1 << order);
        zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1712,7 +1717,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 {
        /* free_pages my go negative - that's OK */
        long min = mark;
-        long lowmem_reserve = z->lowmem_reserve[classzone_idx];
        int o;
        long free_cma = 0;
@@ -1727,7 +1731,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
                free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
-        if (free_pages - free_cma <= min + lowmem_reserve)
+        if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
                return false;
        for (o = 0; o < order; o++) {
                /* At the next order, this order's pages become unavailable */
@@ -1922,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 #endif  /* CONFIG_NUMA */
+static void reset_alloc_batches(struct zone *preferred_zone)
+{
+        struct zone *zone = preferred_zone->zone_pgdat->node_zones;
+        do {
+                mod_zone_page_state(zone, NR_ALLOC_BATCH,
+                        high_wmark_pages(zone) - low_wmark_pages(zone) -
+                        atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+                zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+        } while (zone++ != preferred_zone);
+}
 /*
 * get_page_from_freelist goes through the zonelist trying to allocate
 * a page.
@@ -1939,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
        int did_zlc_setup = 0;          /* just call zlc_setup() one time */
        bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
                                (gfp_mask & __GFP_WRITE);
+        int nr_fair_skipped = 0;
+        bool zonelist_rescan;
 zonelist_scan:
+        zonelist_rescan = false;
        /*
         * Scan zonelist, looking for a zone with enough free.
         * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1964,9 +1984,11 @@ zonelist_scan:
                 */
                if (alloc_flags & ALLOC_FAIR) {
                        if (!zone_local(preferred_zone, zone))
+                                break;
+                        if (zone_is_fair_depleted(zone)) {
+                                nr_fair_skipped++;
                                continue;
-                        if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+                        }
-                                continue;
                }
                /*
                 * When allocating a page cache page for writing, we
@@ -2072,13 +2094,7 @@ this_zone_full:
                        zlc_mark_zone_full(zonelist, z);
        }
-        if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
+        if (page) {
-                /* Disable zlc cache for second zonelist scan */
-                zlc_active = 0;
-                goto zonelist_scan;
-        }
-        if (page)
                /*
                 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
                 * necessary to allocate the page. The expectation is
@@ -2087,8 +2103,37 @@ this_zone_full:
                 * for !PFMEMALLOC purposes.
                 */
                page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+                return page;
+        }
-        return page;
+        /*
+         * The first pass makes sure allocations are spread fairly within the
+         * local node.  However, the local node might have free pages left
+         * after the fairness batches are exhausted, and remote zones haven't
+         * even been considered yet.  Try once more without fairness, and
+         * include remote zones now, before entering the slowpath and waking
+         * kswapd: prefer spilling to a remote zone over swapping locally.
+         */
+        if (alloc_flags & ALLOC_FAIR) {
+                alloc_flags &= ~ALLOC_FAIR;
+                if (nr_fair_skipped) {
+                        zonelist_rescan = true;
+                        reset_alloc_batches(preferred_zone);
+                }
+                if (nr_online_nodes > 1)
+                        zonelist_rescan = true;
+        }
+        if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
+                /* Disable zlc cache for second zonelist scan */
+                zlc_active = 0;
+                zonelist_rescan = true;
+        }
+        if (zonelist_rescan)
+                goto zonelist_scan;
+        return NULL;
 }
 /*
@@ -2201,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 {
        struct page *page;
-        /* Acquire the OOM killer lock for the zones in zonelist */
+        /* Acquire the per-zone oom lock for each zone */
-        if (!try_set_zonelist_oom(zonelist, gfp_mask)) {
+        if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
                schedule_timeout_uninterruptible(1);
                return NULL;
        }
@@ -2240,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        out_of_memory(zonelist, gfp_mask, order, nodemask, false);
 out:
-        clear_zonelist_oom(zonelist, gfp_mask);
+        oom_zonelist_unlock(zonelist, gfp_mask);
        return page;
 }
@@ -2409,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
        return page;
 }
-static void reset_alloc_batches(struct zonelist *zonelist,
-                                enum zone_type high_zoneidx,
-                                struct zone *preferred_zone)
-{
-        struct zoneref *z;
-        struct zone *zone;
-        for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
-                /*
-                 * Only reset the batches of zones that were actually
-                 * considered in the fairness pass, we don't want to
-                 * trash fairness information for zones that are not
-                 * actually part of this zonelist's round-robin cycle.
-                 */
-                if (!zone_local(preferred_zone, zone))
-                        continue;
-                mod_zone_page_state(zone, NR_ALLOC_BATCH,
-                        high_wmark_pages(zone) - low_wmark_pages(zone) -
-                        atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-        }
-}
 static void wake_all_kswapds(unsigned int order,
                             struct zonelist *zonelist,
                             enum zone_type high_zoneidx,
@@ -2616,14 +2639,6 @@ rebalance:
                goto got_pg;
        /*
-         * It can become very expensive to allocate transparent hugepages at
-         * fault, so use asynchronous memory compaction for THP unless it is
-         * khugepaged trying to collapse.
-         */
-        if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
-                migration_mode = MIGRATE_SYNC_LIGHT;
-        /*
         * If compaction is deferred for high-order allocations, it is because
         * sync compaction recently failed. In this is the case and the caller
         * requested a movable allocation that does not heavily disrupt the
@@ -2633,6 +2648,15 @@ rebalance:
                                                (gfp_mask & __GFP_NO_KSWAPD))
                goto nopage;
+        /*
+         * It can become very expensive to allocate transparent hugepages at
+         * fault, so use asynchronous memory compaction for THP unless it is
+         * khugepaged trying to collapse.
+         */
+        if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
+                                                (current->flags & PF_KTHREAD))
+                migration_mode = MIGRATE_SYNC_LIGHT;
        /* Try direct reclaim and then allocating */
        page = __alloc_pages_direct_reclaim(gfp_mask, order,
                                        zonelist, high_zoneidx,
@@ -2766,29 +2790,12 @@ retry_cpuset:
        if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
                alloc_flags |= ALLOC_CMA;
 #endif
-retry:
        /* First allocation attempt */
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                        zonelist, high_zoneidx, alloc_flags,
                        preferred_zone, classzone_idx, migratetype);
        if (unlikely(!page)) {
                /*
-                 * The first pass makes sure allocations are spread
-                 * fairly within the local node.  However, the local
-                 * node might have free pages left after the fairness
-                 * batches are exhausted, and remote zones haven't
-                 * even been considered yet.  Try once more without
-                 * fairness, and include remote zones now, before
-                 * entering the slowpath and waking kswapd: prefer
-                 * spilling to a remote zone over swapping locally.
-                 */
-                if (alloc_flags & ALLOC_FAIR) {
-                        reset_alloc_batches(zonelist, high_zoneidx,
-                                            preferred_zone);
-                        alloc_flags &= ~ALLOC_FAIR;
-                        goto retry;
-                }
-                /*
                 * Runtime PM, block IO and its error handling path
                 * can deadlock because I/O on the device might not
                 * complete.
@@ -2962,7 +2969,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
 * Note this is not alloc_pages_exact_node() which allocates on a specific node,
 * but is not exact.
 */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 {
        unsigned order = get_order(size);
        struct page *p = alloc_pages_node(nid, gfp_mask, order);
@@ -2970,7 +2977,6 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
                return NULL;
        return make_alloc_exact((unsigned long)page_address(p), order, size);
 }
-EXPORT_SYMBOL(alloc_pages_exact_nid);
 /**
 * free_pages_exact - release memory allocated via alloc_pages_exact()
@@ -3052,7 +3058,7 @@ static inline void show_node(struct zone *zone)
 void si_meminfo(struct sysinfo *val)
 {
        val->totalram = totalram_pages;
-        val->sharedram = 0;
+        val->sharedram = global_page_state(NR_SHMEM);
        val->freeram = global_page_state(NR_FREE_PAGES);
        val->bufferram = nr_blockdev_pages();
        val->totalhigh = totalhigh_pages;
@@ -3072,6 +3078,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
        for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
                managed_pages += pgdat->node_zones[zone_type].managed_pages;
        val->totalram = managed_pages;
+        val->sharedram = node_page_state(nid, NR_SHMEM);
        val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
        val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3253,12 +3260,12 @@ void show_free_areas(unsigned int filter)
                        K(zone_page_state(zone, NR_BOUNCE)),
                        K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
                        K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
-                        zone->pages_scanned,
+                        K(zone_page_state(zone, NR_PAGES_SCANNED)),
                        (!zone_reclaimable(zone) ? "yes" : "no")
                        );
                printk("lowmem_reserve[]:");
                for (i = 0; i < MAX_NR_ZONES; i++)
-                        printk(" %lu", zone->lowmem_reserve[i]);
+                        printk(" %ld", zone->lowmem_reserve[i]);
                printk("\n");
        }
@@ -5579,7 +5586,7 @@ static void calculate_totalreserve_pages(void)
        for_each_online_pgdat(pgdat) {
                for (i = 0; i < MAX_NR_ZONES; i++) {
                        struct zone *zone = pgdat->node_zones + i;
-                        unsigned long max = 0;
+                        long max = 0;
                        /* Find valid and maximum lowmem_reserve in the zone */
                        for (j = i; j < MAX_NR_ZONES; j++) {
diff --git a/mm/readahead.c b/mm/readahead.c
index 0ca36a7770b1..17b9172ec37f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -326,7 +326,6 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
 *      - thrashing threshold in memory tight systems
 */
 static pgoff_t count_history_pages(struct address_space *mapping,
-                                   struct file_ra_state *ra,
                                   pgoff_t offset, unsigned long max)
 {
        pgoff_t head;
@@ -349,7 +348,7 @@ static int try_context_readahead(struct address_space *mapping,
 {
        pgoff_t size;
-        size = count_history_pages(mapping, ra, offset, max);
+        size = count_history_pages(mapping, offset, max);
        /*
         * not enough history pages:
diff --git a/mm/shmem.c b/mm/shmem.c
index af68b15a8fc1..302d1cf7ad07 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -149,6 +149,19 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
                vm_unacct_memory(VM_ACCT(size));
 }
+static inline int shmem_reacct_size(unsigned long flags,
+                loff_t oldsize, loff_t newsize)
+{
+        if (!(flags & VM_NORESERVE)) {
+                if (VM_ACCT(newsize) > VM_ACCT(oldsize))
+                        return security_vm_enough_memory_mm(current->mm,
+                                        VM_ACCT(newsize) - VM_ACCT(oldsize));
+                else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
+                        vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
+        }
+        return 0;
+}
 /*
 * ... whereas tmpfs objects are accounted incrementally as
 * pages are allocated, in order to allow huge sparse files.
@@ -280,7 +293,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
 */
 static int shmem_add_to_page_cache(struct page *page,
                                   struct address_space *mapping,
-                                   pgoff_t index, gfp_t gfp, void *expected)
+                                   pgoff_t index, void *expected)
 {
        int error;
@@ -549,6 +562,10 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
                loff_t newsize = attr->ia_size;
                if (newsize != oldsize) {
+                        error = shmem_reacct_size(SHMEM_I(inode)->flags,
+                                        oldsize, newsize);
+                        if (error)
+                                return error;
                        i_size_write(inode, newsize);
                        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
                }
@@ -649,7 +666,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
         */
        if (!error)
                error = shmem_add_to_page_cache(*pagep, mapping, index,
-                                                GFP_NOWAIT, radswap);
+                                                radswap);
        if (error != -ENOMEM) {
                /*
                 * Truncation and eviction use free_swap_and_cache(), which
@@ -1095,7 +1112,7 @@ repeat:
                                                gfp & GFP_RECLAIM_MASK);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
-                                                gfp, swp_to_radix_entry(swap));
+                                                swp_to_radix_entry(swap));
                        /*
                         * We already confirmed swap under page lock, and make
                         * no memory allocation here, so usually no possibility
@@ -1149,7 +1166,7 @@ repeat:
                __SetPageSwapBacked(page);
                __set_page_locked(page);
                if (sgp == SGP_WRITE)
-                        init_page_accessed(page);
+                        __SetPageReferenced(page);
                error = mem_cgroup_charge_file(page, current->mm,
                                                gfp & GFP_RECLAIM_MASK);
@@ -1158,7 +1175,7 @@ repeat:
                error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
-                                                        gfp, NULL);
+                                                        NULL);
                        radix_tree_preload_end();
                }
                if (error) {
@@ -2932,16 +2949,16 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
        this.len = strlen(name);
        this.hash = 0; /* will go */
        sb = shm_mnt->mnt_sb;
+        path.mnt = mntget(shm_mnt);
        path.dentry = d_alloc_pseudo(sb, &this);
        if (!path.dentry)
                goto put_memory;
        d_set_d_op(path.dentry, &anon_ops);
-        path.mnt = mntget(shm_mnt);
        res = ERR_PTR(-ENOSPC);
        inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
        if (!inode)
-                goto put_dentry;
+                goto put_memory;
        inode->i_flags |= i_flags;
        d_instantiate(path.dentry, inode);
@@ -2949,19 +2966,19 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
        clear_nlink(inode);     /* It is unlinked */
        res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
        if (IS_ERR(res))
-                goto put_dentry;
+                goto put_path;
        res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
                  &shmem_file_operations);
        if (IS_ERR(res))
-                goto put_dentry;
+                goto put_path;
        return res;
-put_dentry:
-        path_put(&path);
 put_memory:
        shmem_unacct_size(flags, size);
+put_path:
+        path_put(&path);
        return res;
 }
diff --git a/mm/slab.c b/mm/slab.c
index 3070b929a1bf..2e60bf3dedbb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,7 +191,6 @@ struct array_cache {
        unsigned int limit;
        unsigned int batchcount;
        unsigned int touched;
-        spinlock_t lock;
        void *entry[];  /*
                         * Must have this definition in here for the proper
                         * alignment of array_cache. Also simplifies accessing
@@ -203,6 +202,11 @@ struct array_cache {
                         */
 };
+struct alien_cache {
+        spinlock_t lock;
+        struct array_cache ac;
+};
 #define SLAB_OBJ_PFMEMALLOC     1
 static inline bool is_obj_pfmemalloc(void *objp)
 {
@@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
 static int drain_freelist(struct kmem_cache *cache,
                        struct kmem_cache_node *n, int tofree);
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
-                        int node);
+                        int node, struct list_head *list);
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
 static void cache_reap(struct work_struct *unused);
@@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
 #define MAKE_LIST(cachep, listp, slab, nodeid)                          \
        do {                                                            \
                INIT_LIST_HEAD(listp);                                  \
-                list_splice(&(cachep->node[nodeid]->slab), listp);      \
+                list_splice(&get_node(cachep, nodeid)->slab, listp);    \
        } while (0)
 #define MAKE_ALL_LISTS(cachep, ptr, nodeid)                             \
@@ -465,143 +470,6 @@ static struct kmem_cache kmem_cache_boot = {
        .name = "kmem_cache",
 };
-#define BAD_ALIEN_MAGIC 0x01020304ul
-#ifdef CONFIG_LOCKDEP
-/*
- * Slab sometimes uses the kmalloc slabs to store the slab headers
- * for other slabs "off slab".
- * The locking for this is tricky in that it nests within the locks
- * of all other slabs in a few places; to deal with this special
- * locking we put on-slab caches into a separate lock-class.
- *
- * We set lock class for alien array caches which are up during init.
- * The lock annotation will be lost if all cpus of a node goes down and
- * then comes back up during hotplug
- */
-static struct lock_class_key on_slab_l3_key;
-static struct lock_class_key on_slab_alc_key;
-static struct lock_class_key debugobj_l3_key;
-static struct lock_class_key debugobj_alc_key;
-static void slab_set_lock_classes(struct kmem_cache *cachep,
-                struct lock_class_key *l3_key, struct lock_class_key *alc_key,
-                int q)
-{
-        struct array_cache **alc;
-        struct kmem_cache_node *n;
-        int r;
-        n = cachep->node[q];
-        if (!n)
-                return;
-        lockdep_set_class(&n->list_lock, l3_key);
-        alc = n->alien;
-        /*
-         * FIXME: This check for BAD_ALIEN_MAGIC
-         * should go away when common slab code is taught to
-         * work even without alien caches.
-         * Currently, non NUMA code returns BAD_ALIEN_MAGIC
-         * for alloc_alien_cache,
-         */
-        if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
-                return;
-        for_each_node(r) {
-                if (alc[r])
-                        lockdep_set_class(&alc[r]->lock, alc_key);
-        }
-}
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-        slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
-}
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-        int node;
-        for_each_online_node(node)
-                slab_set_debugobj_lock_classes_node(cachep, node);
-}
-static void init_node_lock_keys(int q)
-{
-        int i;
-        if (slab_state < UP)
-                return;
-        for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
-                struct kmem_cache_node *n;
-                struct kmem_cache *cache = kmalloc_caches[i];
-                if (!cache)
-                        continue;
-                n = cache->node[q];
-                if (!n || OFF_SLAB(cache))
-                        continue;
-                slab_set_lock_classes(cache, &on_slab_l3_key,
-                                &on_slab_alc_key, q);
-        }
-}
-static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
-{
-        if (!cachep->node[q])
-                return;
-        slab_set_lock_classes(cachep, &on_slab_l3_key,
-                        &on_slab_alc_key, q);
-}
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-        int node;
-        VM_BUG_ON(OFF_SLAB(cachep));
-        for_each_node(node)
-                on_slab_lock_classes_node(cachep, node);
-}
-static inline void init_lock_keys(void)
-{
-        int node;
-        for_each_node(node)
-                init_node_lock_keys(node);
-}
-#else
-static void init_node_lock_keys(int q)
-{
-}
-static inline void init_lock_keys(void)
-{
-}
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-}
-static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-}
-#endif
 static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -792,13 +660,8 @@ static void start_cpu_timer(int cpu)
        }
 }
-static struct array_cache *alloc_arraycache(int node, int entries,
+static void init_arraycache(struct array_cache *ac, int limit, int batch)
-                                            int batchcount, gfp_t gfp)
 {
-        int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
-        struct array_cache *nc = NULL;
-        nc = kmalloc_node(memsize, gfp, node);
        /*
         * The array_cache structures contain pointers to free object.
         * However, when such objects are allocated or transferred to another
@@ -806,15 +669,24 @@ static struct array_cache *alloc_arraycache(int node, int entries,
         * valid references during a kmemleak scan. Therefore, kmemleak must
         * not scan such objects.
         */
-        kmemleak_no_scan(nc);
+        kmemleak_no_scan(ac);
-        if (nc) {
+        if (ac) {
-                nc->avail = 0;
+                ac->avail = 0;
-                nc->limit = entries;
+                ac->limit = limit;
-                nc->batchcount = batchcount;
+                ac->batchcount = batch;
-                nc->touched = 0;
+                ac->touched = 0;
-                spin_lock_init(&nc->lock);
        }
-        return nc;
+}
+static struct array_cache *alloc_arraycache(int node, int entries,
+                                            int batchcount, gfp_t gfp)
+{
+        size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
+        struct array_cache *ac = NULL;
+        ac = kmalloc_node(memsize, gfp, node);
+        init_arraycache(ac, entries, batchcount);
+        return ac;
 }
 static inline bool is_slab_pfmemalloc(struct page *page)
@@ -826,7 +698,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
 static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
                                                struct array_cache *ac)
 {
-        struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+        struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
        struct page *page;
        unsigned long flags;
@@ -881,7 +753,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
                 * If there are empty slabs on the slabs_free list and we are
                 * being forced to refill the cache, mark this one !pfmemalloc.
                 */
-                n = cachep->node[numa_mem_id()];
+                n = get_node(cachep, numa_mem_id());
                if (!list_empty(&n->slabs_free) && force_refill) {
                        struct page *page = virt_to_head_page(objp);
                        ClearPageSlabPfmemalloc(page);
@@ -961,12 +833,13 @@ static int transfer_objects(struct array_cache *to,
 #define drain_alien_cache(cachep, alien) do { } while (0)
 #define reap_alien(cachep, n) do { } while (0)
-static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static inline struct alien_cache **alloc_alien_cache(int node,
+                                                int limit, gfp_t gfp)
 {
-        return (struct array_cache **)BAD_ALIEN_MAGIC;
+        return NULL;
 }
-static inline void free_alien_cache(struct array_cache **ac_ptr)
+static inline void free_alien_cache(struct alien_cache **ac_ptr)
 {
 }
@@ -992,46 +865,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
 static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
 static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
-static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static struct alien_cache *__alloc_alien_cache(int node, int entries,
+                                                int batch, gfp_t gfp)
+{
+        size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
+        struct alien_cache *alc = NULL;
+        alc = kmalloc_node(memsize, gfp, node);
+        init_arraycache(&alc->ac, entries, batch);
+        spin_lock_init(&alc->lock);
+        return alc;
+}
+static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
 {
-        struct array_cache **ac_ptr;
+        struct alien_cache **alc_ptr;
-        int memsize = sizeof(void *) * nr_node_ids;
+        size_t memsize = sizeof(void *) * nr_node_ids;
        int i;
        if (limit > 1)
                limit = 12;
-        ac_ptr = kzalloc_node(memsize, gfp, node);
+        alc_ptr = kzalloc_node(memsize, gfp, node);
-        if (ac_ptr) {
+        if (!alc_ptr)
-                for_each_node(i) {
+                return NULL;
-                        if (i == node || !node_online(i))
-                                continue;
+        for_each_node(i) {
-                        ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
+                if (i == node || !node_online(i))
-                        if (!ac_ptr[i]) {
+                        continue;
-                                for (i--; i >= 0; i--)
+                alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
-                                        kfree(ac_ptr[i]);
+                if (!alc_ptr[i]) {
-                                kfree(ac_ptr);
+                        for (i--; i >= 0; i--)
-                                return NULL;
+                                kfree(alc_ptr[i]);
-                        }
+                        kfree(alc_ptr);
+                        return NULL;
                }
        }
-        return ac_ptr;
+        return alc_ptr;
 }
-static void free_alien_cache(struct array_cache **ac_ptr)
+static void free_alien_cache(struct alien_cache **alc_ptr)
 {
        int i;
-        if (!ac_ptr)
+        if (!alc_ptr)
                return;
        for_each_node(i)
-            kfree(ac_ptr[i]);
+            kfree(alc_ptr[i]);
-        kfree(ac_ptr);
+        kfree(alc_ptr);
 }
 static void __drain_alien_cache(struct kmem_cache *cachep,
-                                struct array_cache *ac, int node)
+                                struct array_cache *ac, int node,
+                                struct list_head *list)
 {
-        struct kmem_cache_node *n = cachep->node[node];
+        struct kmem_cache_node *n = get_node(cachep, node);
        if (ac->avail) {
                spin_lock(&n->list_lock);
@@ -1043,7 +930,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
                if (n->shared)
                        transfer_objects(n->shared, ac, ac->limit);
-                free_block(cachep, ac->entry, ac->avail, node);
+                free_block(cachep, ac->entry, ac->avail, node, list);
                ac->avail = 0;
                spin_unlock(&n->list_lock);
        }
@@ -1057,28 +944,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
        int node = __this_cpu_read(slab_reap_node);
        if (n->alien) {
-                struct array_cache *ac = n->alien[node];
+                struct alien_cache *alc = n->alien[node];
+                struct array_cache *ac;
+                if (alc) {
+                        ac = &alc->ac;
+                        if (ac->avail && spin_trylock_irq(&alc->lock)) {
+                                LIST_HEAD(list);
-                if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
+                                __drain_alien_cache(cachep, ac, node, &list);
-                        __drain_alien_cache(cachep, ac, node);
+                                spin_unlock_irq(&alc->lock);
-                        spin_unlock_irq(&ac->lock);
+                                slabs_destroy(cachep, &list);
+                        }
                }
        }
 }
 static void drain_alien_cache(struct kmem_cache *cachep,
-                                struct array_cache **alien)
+                                struct alien_cache **alien)
 {
        int i = 0;
+        struct alien_cache *alc;
        struct array_cache *ac;
        unsigned long flags;
        for_each_online_node(i) {
-                ac = alien[i];
+                alc = alien[i];
-                if (ac) {
+                if (alc) {
-                        spin_lock_irqsave(&ac->lock, flags);
+                        LIST_HEAD(list);
-                        __drain_alien_cache(cachep, ac, i);
-                        spin_unlock_irqrestore(&ac->lock, flags);
+                        ac = &alc->ac;
+                        spin_lock_irqsave(&alc->lock, flags);
+                        __drain_alien_cache(cachep, ac, i, &list);
+                        spin_unlock_irqrestore(&alc->lock, flags);
+                        slabs_destroy(cachep, &list);
                }
        }
 }
@@ -1087,8 +986,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 {
        int nodeid = page_to_nid(virt_to_page(objp));
        struct kmem_cache_node *n;
-        struct array_cache *alien = NULL;
+        struct alien_cache *alien = NULL;
+        struct array_cache *ac;
        int node;
+        LIST_HEAD(list);
        node = numa_mem_id();
@@ -1099,21 +1000,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
        if (likely(nodeid == node))
                return 0;
-        n = cachep->node[node];
+        n = get_node(cachep, node);
        STATS_INC_NODEFREES(cachep);
        if (n->alien && n->alien[nodeid]) {
                alien = n->alien[nodeid];
+                ac = &alien->ac;
                spin_lock(&alien->lock);
-                if (unlikely(alien->avail == alien->limit)) {
+                if (unlikely(ac->avail == ac->limit)) {
                        STATS_INC_ACOVERFLOW(cachep);
-                        __drain_alien_cache(cachep, alien, nodeid);
+                        __drain_alien_cache(cachep, ac, nodeid, &list);
                }
-                ac_put_obj(cachep, alien, objp);
+                ac_put_obj(cachep, ac, objp);
                spin_unlock(&alien->lock);
+                slabs_destroy(cachep, &list);
        } else {
-                spin_lock(&(cachep->node[nodeid])->list_lock);
+                n = get_node(cachep, nodeid);
-                free_block(cachep, &objp, 1, nodeid);
+                spin_lock(&n->list_lock);
-                spin_unlock(&(cachep->node[nodeid])->list_lock);
+                free_block(cachep, &objp, 1, nodeid, &list);
+                spin_unlock(&n->list_lock);
+                slabs_destroy(cachep, &list);
        }
        return 1;
 }
@@ -1132,7 +1037,7 @@ static int init_cache_node_node(int node)
 {
        struct kmem_cache *cachep;
        struct kmem_cache_node *n;
-        const int memsize = sizeof(struct kmem_cache_node);
+        const size_t memsize = sizeof(struct kmem_cache_node);
        list_for_each_entry(cachep, &slab_caches, list) {
                /*
@@ -1140,7 +1045,8 @@ static int init_cache_node_node(int node)
                 * begin anything. Make sure some other cpu on this
                 * node has not already allocated this
                 */
-                if (!cachep->node[node]) {
+                n = get_node(cachep, node);
+                if (!n) {
                        n = kmalloc_node(memsize, GFP_KERNEL, node);
                        if (!n)
                                return -ENOMEM;
@@ -1156,11 +1062,11 @@ static int init_cache_node_node(int node)
                        cachep->node[node] = n;
                }
-                spin_lock_irq(&cachep->node[node]->list_lock);
+                spin_lock_irq(&n->list_lock);
-                cachep->node[node]->free_limit =
+                n->free_limit =
                        (1 + nr_cpus_node(node)) *
                        cachep->batchcount + cachep->num;
-                spin_unlock_irq(&cachep->node[node]->list_lock);
+                spin_unlock_irq(&n->list_lock);
        }
        return 0;
 }
@@ -1181,12 +1087,13 @@ static void cpuup_canceled(long cpu)
        list_for_each_entry(cachep, &slab_caches, list) {
                struct array_cache *nc;
                struct array_cache *shared;
-                struct array_cache **alien;
+                struct alien_cache **alien;
+                LIST_HEAD(list);
                /* cpu is dead; no one can alloc from it. */
                nc = cachep->array[cpu];
                cachep->array[cpu] = NULL;
-                n = cachep->node[node];
+                n = get_node(cachep, node);
                if (!n)
                        goto free_array_cache;
@@ -1196,7 +1103,7 @@ static void cpuup_canceled(long cpu)
                /* Free limit for this kmem_cache_node */
                n->free_limit -= cachep->batchcount;
                if (nc)
-                        free_block(cachep, nc->entry, nc->avail, node);
+                        free_block(cachep, nc->entry, nc->avail, node, &list);
                if (!cpumask_empty(mask)) {
                        spin_unlock_irq(&n->list_lock);
@@ -1206,7 +1113,7 @@ static void cpuup_canceled(long cpu)
                shared = n->shared;
                if (shared) {
                        free_block(cachep, shared->entry,
-                                   shared->avail, node);
+                                   shared->avail, node, &list);
                        n->shared = NULL;
                }
@@ -1221,6 +1128,7 @@ static void cpuup_canceled(long cpu)
                        free_alien_cache(alien);
                }
 free_array_cache:
+                slabs_destroy(cachep, &list);
                kfree(nc);
        }
        /*
@@ -1229,7 +1137,7 @@ free_array_cache:
         * shrink each nodelist to its limit.
         */
        list_for_each_entry(cachep, &slab_caches, list) {
-                n = cachep->node[node];
+                n = get_node(cachep, node);
                if (!n)
                        continue;
                drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1260,7 +1168,7 @@ static int cpuup_prepare(long cpu)
        list_for_each_entry(cachep, &slab_caches, list) {
                struct array_cache *nc;
                struct array_cache *shared = NULL;
-                struct array_cache **alien = NULL;
+                struct alien_cache **alien = NULL;
                nc = alloc_arraycache(node, cachep->limit,
                                        cachep->batchcount, GFP_KERNEL);
@@ -1284,7 +1192,7 @@ static int cpuup_prepare(long cpu)
                        }
                }
                cachep->array[cpu] = nc;
-                n = cachep->node[node];
+                n = get_node(cachep, node);
                BUG_ON(!n);
                spin_lock_irq(&n->list_lock);
@@ -1305,13 +1213,7 @@ static int cpuup_prepare(long cpu)
                spin_unlock_irq(&n->list_lock);
                kfree(shared);
                free_alien_cache(alien);
-                if (cachep->flags & SLAB_DEBUG_OBJECTS)
-                        slab_set_debugobj_lock_classes_node(cachep, node);
-                else if (!OFF_SLAB(cachep) &&
-                         !(cachep->flags & SLAB_DESTROY_BY_RCU))
-                        on_slab_lock_classes_node(cachep, node);
        }
-        init_node_lock_keys(node);
        return 0;
 bad:
@@ -1395,7 +1297,7 @@ static int __meminit drain_cache_node_node(int node)
        list_for_each_entry(cachep, &slab_caches, list) {
                struct kmem_cache_node *n;
-                n = cachep->node[node];
+                n = get_node(cachep, node);
                if (!n)
                        continue;
@@ -1575,10 +1477,6 @@ void __init kmem_cache_init(void)
                memcpy(ptr, cpu_cache_get(kmem_cache),
                       sizeof(struct arraycache_init));
-                /*
-                 * Do not assume that spinlocks can be initialized via memcpy:
-                 */
-                spin_lock_init(&ptr->lock);
                kmem_cache->array[smp_processor_id()] = ptr;
@@ -1588,10 +1486,6 @@ void __init kmem_cache_init(void)
                       != &initarray_generic.cache);
                memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
                       sizeof(struct arraycache_init));
-                /*
-                 * Do not assume that spinlocks can be initialized via memcpy:
-                 */
-                spin_lock_init(&ptr->lock);
                kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
        }
@@ -1628,9 +1522,6 @@ void __init kmem_cache_init_late(void)
                        BUG();
        mutex_unlock(&slab_mutex);
-        /* Annotate slab for lockdep -- annotate the malloc caches */
-        init_lock_keys();
        /* Done! */
        slab_state = FULL;
@@ -1690,14 +1581,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
        printk(KERN_WARNING "  cache: %s, object size: %d, order: %d\n",
                cachep->name, cachep->size, cachep->gfporder);
-        for_each_online_node(node) {
+        for_each_kmem_cache_node(cachep, node, n) {
                unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
                unsigned long active_slabs = 0, num_slabs = 0;
-                n = cachep->node[node];
-                if (!n)
-                        continue;
                spin_lock_irqsave(&n->list_lock, flags);
                list_for_each_entry(page, &n->slabs_full, lru) {
                        active_objs += cachep->num;
@@ -1724,7 +1611,8 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
 }
 /*
- * Interface to system's page allocator. No need to hold the cache-lock.
+ * Interface to system's page allocator. No need to hold the
+ * kmem_cache_node ->list_lock.
 *
 * If we requested dmaable memory, we will get it. Even if we
 * did not request dmaable memory, we might get it, but that
@@ -2026,9 +1914,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
 * @cachep: cache pointer being destroyed
 * @page: page pointer being destroyed
 *
- * Destroy all the objs in a slab, and release the mem back to the system.
+ * Destroy all the objs in a slab page, and release the mem back to the system.
- * Before calling the slab must have been unlinked from the cache.  The
+ * Before calling the slab page must have been unlinked from the cache. The
- * cache-lock is not held/needed.
+ * kmem_cache_node ->list_lock is not held/needed.
 */
 static void slab_destroy(struct kmem_cache *cachep, struct page *page)
 {
@@ -2060,6 +1948,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
                kmem_cache_free(cachep->freelist_cache, freelist);
 }
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
+{
+        struct page *page, *n;
+        list_for_each_entry_safe(page, n, list, lru) {
+                list_del(&page->lru);
+                slab_destroy(cachep, page);
+        }
+}
 /**
 * calculate_slab_order - calculate size (page order) of slabs
 * @cachep: pointer to the cache that is being created
@@ -2405,17 +2303,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                return err;
        }
-        if (flags & SLAB_DEBUG_OBJECTS) {
-                /*
-                 * Would deadlock through slab_destroy()->call_rcu()->
-                 * debug_object_activate()->kmem_cache_alloc().
-                 */
-                WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
-                slab_set_debugobj_lock_classes(cachep);
-        } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
-                on_slab_lock_classes(cachep);
        return 0;
 }
@@ -2434,7 +2321,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
 {
 #ifdef CONFIG_SMP
        check_irq_off();
-        assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
+        assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
 #endif
 }
@@ -2442,7 +2329,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
 {
 #ifdef CONFIG_SMP
        check_irq_off();
-        assert_spin_locked(&cachep->node[node]->list_lock);
+        assert_spin_locked(&get_node(cachep, node)->list_lock);
 #endif
 }
@@ -2462,12 +2349,16 @@ static void do_drain(void *arg)
        struct kmem_cache *cachep = arg;
        struct array_cache *ac;
        int node = numa_mem_id();
+        struct kmem_cache_node *n;
+        LIST_HEAD(list);
        check_irq_off();
        ac = cpu_cache_get(cachep);
-        spin_lock(&cachep->node[node]->list_lock);
+        n = get_node(cachep, node);
-        free_block(cachep, ac->entry, ac->avail, node);
+        spin_lock(&n->list_lock);
-        spin_unlock(&cachep->node[node]->list_lock);
+        free_block(cachep, ac->entry, ac->avail, node, &list);
+        spin_unlock(&n->list_lock);
+        slabs_destroy(cachep, &list);
        ac->avail = 0;
 }
@@ -2478,17 +2369,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
        on_each_cpu(do_drain, cachep, 1);
        check_irq_on();
-        for_each_online_node(node) {
+        for_each_kmem_cache_node(cachep, node, n)
-                n = cachep->node[node];
+                if (n->alien)
-                if (n && n->alien)
                        drain_alien_cache(cachep, n->alien);
-        }
-        for_each_online_node(node) {
+        for_each_kmem_cache_node(cachep, node, n)
-                n = cachep->node[node];
+                drain_array(cachep, n, n->shared, 1, node);
-                if (n)
-                        drain_array(cachep, n, n->shared, 1, node);
-        }
 }
 /*
@@ -2534,17 +2420,14 @@ out:
 int __kmem_cache_shrink(struct kmem_cache *cachep)
 {
-        int ret = 0, i = 0;
+        int ret = 0;
+        int node;
        struct kmem_cache_node *n;
        drain_cpu_caches(cachep);
        check_irq_on();
-        for_each_online_node(i) {
+        for_each_kmem_cache_node(cachep, node, n) {
-                n = cachep->node[i];
-                if (!n)
-                        continue;
                drain_freelist(cachep, n, slabs_tofree(cachep, n));
                ret += !list_empty(&n->slabs_full) ||
@@ -2566,13 +2449,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
            kfree(cachep->array[i]);
        /* NUMA: free the node structures */
-        for_each_online_node(i) {
+        for_each_kmem_cache_node(cachep, i, n) {
-                n = cachep->node[i];
+                kfree(n->shared);
-                if (n) {
+                free_alien_cache(n->alien);
-                        kfree(n->shared);
+                kfree(n);
-                        free_alien_cache(n->alien);
+                cachep->node[i] = NULL;
-                        kfree(n);
-                }
        }
        return 0;
 }
@@ -2751,7 +2632,7 @@ static int cache_grow(struct kmem_cache *cachep,
        /* Take the node list lock to change the colour_next on this node */
        check_irq_off();
-        n = cachep->node[nodeid];
+        n = get_node(cachep, nodeid);
        spin_lock(&n->list_lock);
        /* Get colour for the slab, and cal the next value. */
@@ -2920,7 +2801,7 @@ retry:
                 */
                batchcount = BATCHREFILL_LIMIT;
        }
-        n = cachep->node[node];
+        n = get_node(cachep, node);
        BUG_ON(ac->avail > 0 || !n);
        spin_lock(&n->list_lock);
@@ -3060,7 +2941,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
 {
-        if (cachep == kmem_cache)
+        if (unlikely(cachep == kmem_cache))
                return false;
        return should_failslab(cachep->object_size, flags, cachep->flags);
@@ -3169,8 +3050,8 @@ retry:
                nid = zone_to_nid(zone);
                if (cpuset_zone_allowed_hardwall(zone, flags) &&
-                        cache->node[nid] &&
+                        get_node(cache, nid) &&
-                        cache->node[nid]->free_objects) {
+                        get_node(cache, nid)->free_objects) {
                                obj = ____cache_alloc_node(cache,
                                        flags | GFP_THISNODE, nid);
                                if (obj)
@@ -3233,7 +3114,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
        int x;
        VM_BUG_ON(nodeid > num_online_nodes());
-        n = cachep->node[nodeid];
+        n = get_node(cachep, nodeid);
        BUG_ON(!n);
 retry:
@@ -3304,7 +3185,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
        if (nodeid == NUMA_NO_NODE)
                nodeid = slab_node;
-        if (unlikely(!cachep->node[nodeid])) {
+        if (unlikely(!get_node(cachep, nodeid))) {
                /* Node not bootstrapped yet */
                ptr = fallback_alloc(cachep, flags);
                goto out;
@@ -3405,12 +3286,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 /*
 * Caller needs to acquire correct kmem_cache_node's list_lock
+ * @list: List of detached free slabs should be freed by caller
 */
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
+static void free_block(struct kmem_cache *cachep, void **objpp,
-                       int node)
+                        int nr_objects, int node, struct list_head *list)
 {
        int i;
-        struct kmem_cache_node *n;
+        struct kmem_cache_node *n = get_node(cachep, node);
        for (i = 0; i < nr_objects; i++) {
                void *objp;
@@ -3420,7 +3302,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                objp = objpp[i];
                page = virt_to_head_page(objp);
-                n = cachep->node[node];
                list_del(&page->lru);
                check_spinlock_acquired_node(cachep, node);
                slab_put_obj(cachep, page, objp, node);
@@ -3431,13 +3312,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                if (page->active == 0) {
                        if (n->free_objects > n->free_limit) {
                                n->free_objects -= cachep->num;
-                                /* No need to drop any previously held
+                                list_add_tail(&page->lru, list);
-                                 * lock here, even if we have a off-slab slab
-                                 * descriptor it is guaranteed to come from
-                                 * a different cache, refer to comments before
-                                 * alloc_slabmgmt.
-                                 */
-                                slab_destroy(cachep, page);
                        } else {
                                list_add(&page->lru, &n->slabs_free);
                        }
@@ -3456,13 +3331,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
        int batchcount;
        struct kmem_cache_node *n;
        int node = numa_mem_id();
+        LIST_HEAD(list);
        batchcount = ac->batchcount;
 #if DEBUG
        BUG_ON(!batchcount || batchcount > ac->avail);
 #endif
        check_irq_off();
-        n = cachep->node[node];
+        n = get_node(cachep, node);
        spin_lock(&n->list_lock);
        if (n->shared) {
                struct array_cache *shared_array = n->shared;
@@ -3477,7 +3353,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
                }
        }
-        free_block(cachep, ac->entry, batchcount, node);
+        free_block(cachep, ac->entry, batchcount, node, &list);
 free_done:
 #if STATS
        {
@@ -3498,6 +3374,7 @@ free_done:
        }
 #endif
        spin_unlock(&n->list_lock);
+        slabs_destroy(cachep, &list);
        ac->avail -= batchcount;
        memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
 }
@@ -3754,7 +3631,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
        int node;
        struct kmem_cache_node *n;
        struct array_cache *new_shared;
-        struct array_cache **new_alien = NULL;
+        struct alien_cache **new_alien = NULL;
        for_each_online_node(node) {
@@ -3775,15 +3652,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
                        }
                }
-                n = cachep->node[node];
+                n = get_node(cachep, node);
                if (n) {
                        struct array_cache *shared = n->shared;
+                        LIST_HEAD(list);
                        spin_lock_irq(&n->list_lock);
                        if (shared)
                                free_block(cachep, shared->entry,
-                                                shared->avail, node);
+                                                shared->avail, node, &list);
                        n->shared = new_shared;
                        if (!n->alien) {
@@ -3793,6 +3671,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
                        n->free_limit = (1 + nr_cpus_node(node)) *
                                        cachep->batchcount + cachep->num;
                        spin_unlock_irq(&n->list_lock);
+                        slabs_destroy(cachep, &list);
                        kfree(shared);
                        free_alien_cache(new_alien);
                        continue;
@@ -3820,9 +3699,8 @@ fail:
                /* Cache is not active yet. Roll back what we did */
                node--;
                while (node >= 0) {
-                        if (cachep->node[node]) {
+                        n = get_node(cachep, node);
-                                n = cachep->node[node];
+                        if (n) {
                                kfree(n->shared);
                                free_alien_cache(n->alien);
                                kfree(n);
@@ -3883,12 +3761,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
        cachep->shared = shared;
        for_each_online_cpu(i) {
+                LIST_HEAD(list);
                struct array_cache *ccold = new->new[i];
+                int node;
+                struct kmem_cache_node *n;
                if (!ccold)
                        continue;
-                spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
-                free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
+                node = cpu_to_mem(i);
-                spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+                n = get_node(cachep, node);
+                spin_lock_irq(&n->list_lock);
+                free_block(cachep, ccold->entry, ccold->avail, node, &list);
+                spin_unlock_irq(&n->list_lock);
+                slabs_destroy(cachep, &list);
                kfree(ccold);
        }
        kfree(new);
@@ -3996,6 +3882,7 @@ skip_setup:
 static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
                         struct array_cache *ac, int force, int node)
 {
+        LIST_HEAD(list);
        int tofree;
        if (!ac || !ac->avail)
@@ -4008,12 +3895,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
                        tofree = force ? ac->avail : (ac->limit + 4) / 5;
                        if (tofree > ac->avail)
                                tofree = (ac->avail + 1) / 2;
-                        free_block(cachep, ac->entry, tofree, node);
+                        free_block(cachep, ac->entry, tofree, node, &list);
                        ac->avail -= tofree;
                        memmove(ac->entry, &(ac->entry[tofree]),
                                sizeof(void *) * ac->avail);
                }
                spin_unlock_irq(&n->list_lock);
+                slabs_destroy(cachep, &list);
        }
 }
@@ -4048,7 +3936,7 @@ static void cache_reap(struct work_struct *w)
                 * have established with reasonable certainty that
                 * we can do some work if the lock was obtained.
                 */
-                n = searchp->node[node];
+                n = get_node(searchp, node);
                reap_alien(searchp, n);
@@ -4100,10 +3988,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
        active_objs = 0;
        num_slabs = 0;
-        for_each_online_node(node) {
+        for_each_kmem_cache_node(cachep, node, n) {
-                n = cachep->node[node];
-                if (!n)
-                        continue;
                check_irq_on();
                spin_lock_irq(&n->list_lock);
@@ -4328,10 +4213,7 @@ static int leaks_show(struct seq_file *m, void *p)
        x[1] = 0;
-        for_each_online_node(node) {
+        for_each_kmem_cache_node(cachep, node, n) {
-                n = cachep->node[node];
-                if (!n)
-                        continue;
                check_irq_on();
                spin_lock_irq(&n->list_lock);
diff --git a/mm/slab.h b/mm/slab.h
index 961a3fb1f5a2..0e0fdd365840 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -256,13 +256,12 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
                return cachep;
        pr_err("%s: Wrong slab cache. %s but object is from %s\n",
-                __FUNCTION__, cachep->name, s->name);
+               __func__, cachep->name, s->name);
        WARN_ON_ONCE(1);
        return s;
 }
-#endif
+#ifndef CONFIG_SLOB
 /*
 * The slab lists for all objects.
 */
@@ -277,7 +276,7 @@ struct kmem_cache_node {
        unsigned int free_limit;
        unsigned int colour_next;       /* Per-node cache coloring */
        struct array_cache *shared;     /* shared per node */
-        struct array_cache **alien;     /* on other nodes */
+        struct alien_cache **alien;     /* on other nodes */
        unsigned long next_reap;        /* updated without locking */
        int free_touched;               /* updated without locking */
 #endif
@@ -294,5 +293,22 @@ struct kmem_cache_node {
 };
+static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+{
+        return s->node[node];
+}
+/*
+ * Iterator over all nodes. The body will be executed for each node that has
+ * a kmem_cache_node structure allocated (which is true for all online nodes)
+ */
+#define for_each_kmem_cache_node(__s, __node, __n) \
+        for (__node = 0; __n = get_node(__s, __node), __node < nr_node_ids; __node++) \
+                 if (__n)
+#endif
 void *slab_next(struct seq_file *m, void *p, loff_t *pos);
 void slab_stop(struct seq_file *m, void *p);
+#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d31c4bacc6a2..d319502b2403 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -19,6 +19,8 @@
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <linux/memcontrol.h>
+#define CREATE_TRACE_POINTS
 #include <trace/events/kmem.h>
 #include "slab.h"
@@ -787,3 +789,102 @@ static int __init slab_proc_init(void)
 }
 module_init(slab_proc_init);
 #endif /* CONFIG_SLABINFO */
+static __always_inline void *__do_krealloc(const void *p, size_t new_size,
+                                           gfp_t flags)
+{
+        void *ret;
+        size_t ks = 0;
+        if (p)
+                ks = ksize(p);
+        if (ks >= new_size)
+                return (void *)p;
+        ret = kmalloc_track_caller(new_size, flags);
+        if (ret && p)
+                memcpy(ret, p, ks);
+        return ret;
+}
+/**
+ * __krealloc - like krealloc() but don't free @p.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * This function is like krealloc() except it never frees the originally
+ * allocated buffer. Use this if you don't want to free the buffer immediately
+ * like, for example, with RCU.
+ */
+void *__krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+        if (unlikely(!new_size))
+                return ZERO_SIZE_PTR;
+        return __do_krealloc(p, new_size, flags);
+}
+EXPORT_SYMBOL(__krealloc);
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.  If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+        void *ret;
+        if (unlikely(!new_size)) {
+                kfree(p);
+                return ZERO_SIZE_PTR;
+        }
+        ret = __do_krealloc(p, new_size, flags);
+        if (ret && p != ret)
+                kfree(p);
+        return ret;
+}
+EXPORT_SYMBOL(krealloc);
+/**
+ * kzfree - like kfree but zero memory
+ * @p: object to free memory of
+ *
+ * The memory of the object @p points to is zeroed before freed.
+ * If @p is %NULL, kzfree() does nothing.
+ *
+ * Note: this function zeroes the whole allocated buffer which can be a good
+ * deal bigger than the requested buffer size passed to kmalloc(). So be
+ * careful when using this function in performance sensitive code.
+ */
+void kzfree(const void *p)
+{
+        size_t ks;
+        void *mem = (void *)p;
+        if (unlikely(ZERO_OR_NULL_PTR(mem)))
+                return;
+        ks = ksize(mem);
+        memset(mem, 0, ks);
+        kfree(mem);
+}
+EXPORT_SYMBOL(kzfree);
+/* Tracepoints definitions. */
+EXPORT_TRACEPOINT_SYMBOL(kmalloc);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
+EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kfree);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/slub.c b/mm/slub.c
index 73004808537e..3e8afcc07a76 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -233,11 +233,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
 *                      Core slab cache functions
 *******************************************************************/
-static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
-{
-        return s->node[node];
-}
 /* Verify that a pointer has an address that is valid within a slab page */
 static inline int check_valid_pointer(struct kmem_cache *s,
                                struct page *page, const void *object)
@@ -288,6 +283,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
        for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
                        __p += (__s)->size)
+#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
+        for (__p = (__addr), __idx = 1; __idx <= __objects;\
+                        __p += (__s)->size, __idx++)
 /* Determine object index from a given position */
 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
 {
@@ -382,9 +381,9 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
        if (s->flags & __CMPXCHG_DOUBLE) {
                if (cmpxchg_double(&page->freelist, &page->counters,
-                        freelist_old, counters_old,
+                                   freelist_old, counters_old,
-                        freelist_new, counters_new))
+                                   freelist_new, counters_new))
-                return 1;
+                        return 1;
        } else
 #endif
        {
@@ -418,9 +417,9 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
        if (s->flags & __CMPXCHG_DOUBLE) {
                if (cmpxchg_double(&page->freelist, &page->counters,
-                        freelist_old, counters_old,
+                                   freelist_old, counters_old,
-                        freelist_new, counters_new))
+                                   freelist_new, counters_new))
-                return 1;
+                        return 1;
        } else
 #endif
        {
@@ -945,60 +944,6 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
 }
 /*
- * Hooks for other subsystems that check memory allocations. In a typical
- * production configuration these hooks all should produce no code at all.
- */
-static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
-{
-        kmemleak_alloc(ptr, size, 1, flags);
-}
-static inline void kfree_hook(const void *x)
-{
-        kmemleak_free(x);
-}
-static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-{
-        flags &= gfp_allowed_mask;
-        lockdep_trace_alloc(flags);
-        might_sleep_if(flags & __GFP_WAIT);
-        return should_failslab(s->object_size, flags, s->flags);
-}
-static inline void slab_post_alloc_hook(struct kmem_cache *s,
-                                        gfp_t flags, void *object)
-{
-        flags &= gfp_allowed_mask;
-        kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
-        kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
-}
-static inline void slab_free_hook(struct kmem_cache *s, void *x)
-{
-        kmemleak_free_recursive(x, s->flags);
-        /*
-         * Trouble is that we may no longer disable interrupts in the fast path
-         * So in order to make the debug calls that expect irqs to be
-         * disabled we need to disable interrupts temporarily.
-         */
-#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
-        {
-                unsigned long flags;
-                local_irq_save(flags);
-                kmemcheck_slab_free(s, x, s->object_size);
-                debug_check_no_locks_freed(x, s->object_size);
-                local_irq_restore(flags);
-        }
-#endif
-        if (!(s->flags & SLAB_DEBUG_OBJECTS))
-                debug_check_no_obj_freed(x, s->object_size);
-}
-/*
 * Tracking of fully allocated slabs for debugging purposes.
 */
 static void add_full(struct kmem_cache *s,
@@ -1282,6 +1227,12 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
 static inline void dec_slabs_node(struct kmem_cache *s, int node,
                                                        int objects) {}
+#endif /* CONFIG_SLUB_DEBUG */
+/*
+ * Hooks for other subsystems that check memory allocations. In a typical
+ * production configuration these hooks all should produce no code at all.
+ */
 static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
 {
        kmemleak_alloc(ptr, size, 1, flags);
@@ -1293,21 +1244,44 @@ static inline void kfree_hook(const void *x)
 }
 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-                                                        { return 0; }
+{
+        flags &= gfp_allowed_mask;
+        lockdep_trace_alloc(flags);
+        might_sleep_if(flags & __GFP_WAIT);
+        return should_failslab(s->object_size, flags, s->flags);
+}
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
-                void *object)
+                                        gfp_t flags, void *object)
 {
-        kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
+        flags &= gfp_allowed_mask;
-                flags & gfp_allowed_mask);
+        kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+        kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
 }
 static inline void slab_free_hook(struct kmem_cache *s, void *x)
 {
        kmemleak_free_recursive(x, s->flags);
-}
-#endif /* CONFIG_SLUB_DEBUG */
+        /*
+         * Trouble is that we may no longer disable interrupts in the fast path
+         * So in order to make the debug calls that expect irqs to be
+         * disabled we need to disable interrupts temporarily.
+         */
+#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
+        {
+                unsigned long flags;
+                local_irq_save(flags);
+                kmemcheck_slab_free(s, x, s->object_size);
+                debug_check_no_locks_freed(x, s->object_size);
+                local_irq_restore(flags);
+        }
+#endif
+        if (!(s->flags & SLAB_DEBUG_OBJECTS))
+                debug_check_no_obj_freed(x, s->object_size);
+}
 /*
 * Slab allocation and freeing
@@ -1409,9 +1383,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
        struct page *page;
        void *start;
-        void *last;
        void *p;
        int order;
+        int idx;
        BUG_ON(flags & GFP_SLAB_BUG_MASK);
@@ -1432,14 +1406,13 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
        if (unlikely(s->flags & SLAB_POISON))
                memset(start, POISON_INUSE, PAGE_SIZE << order);
-        last = start;
+        for_each_object_idx(p, idx, s, start, page->objects) {
-        for_each_object(p, s, start, page->objects) {
+                setup_object(s, page, p);
-                setup_object(s, page, last);
+                if (likely(idx < page->objects))
-                set_freepointer(s, last, p);
+                        set_freepointer(s, p, p + s->size);
-                last = p;
+                else
+                        set_freepointer(s, p, NULL);
        }
-        setup_object(s, page, last);
-        set_freepointer(s, last, NULL);
        page->freelist = start;
        page->inuse = page->objects;
@@ -2162,6 +2135,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
        static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
        int node;
+        struct kmem_cache_node *n;
        if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
                return;
@@ -2176,15 +2150,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
                pr_warn("  %s debugging increased min order, use slub_debug=O to disable.\n",
                        s->name);
-        for_each_online_node(node) {
+        for_each_kmem_cache_node(s, node, n) {
-                struct kmem_cache_node *n = get_node(s, node);
                unsigned long nr_slabs;
                unsigned long nr_objs;
                unsigned long nr_free;
-                if (!n)
-                        continue;
                nr_free  = count_partial(n, count_free);
                nr_slabs = node_nr_slabs(n);
                nr_objs  = node_nr_objs(n);
@@ -2928,13 +2898,10 @@ static void early_kmem_cache_node_alloc(int node)
 static void free_kmem_cache_nodes(struct kmem_cache *s)
 {
        int node;
+        struct kmem_cache_node *n;
-        for_each_node_state(node, N_NORMAL_MEMORY) {
+        for_each_kmem_cache_node(s, node, n) {
-                struct kmem_cache_node *n = s->node[node];
+                kmem_cache_free(kmem_cache_node, n);
-                if (n)
-                        kmem_cache_free(kmem_cache_node, n);
                s->node[node] = NULL;
        }
 }
@@ -3222,12 +3189,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 static inline int kmem_cache_close(struct kmem_cache *s)
 {
        int node;
+        struct kmem_cache_node *n;
        flush_all(s);
        /* Attempt to free all objects */
-        for_each_node_state(node, N_NORMAL_MEMORY) {
+        for_each_kmem_cache_node(s, node, n) {
-                struct kmem_cache_node *n = get_node(s, node);
                free_partial(s, n);
                if (n->nr_partial || slabs_node(s, node))
                        return 1;
@@ -3412,9 +3378,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
                return -ENOMEM;
        flush_all(s);
-        for_each_node_state(node, N_NORMAL_MEMORY) {
+        for_each_kmem_cache_node(s, node, n) {
-                n = get_node(s, node);
                if (!n->nr_partial)
                        continue;
@@ -3586,6 +3550,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
 {
        int node;
        struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+        struct kmem_cache_node *n;
        memcpy(s, static_cache, kmem_cache->object_size);
@@ -3595,19 +3560,16 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
         * IPIs around.
         */
        __flush_cpu_slab(s, smp_processor_id());
-        for_each_node_state(node, N_NORMAL_MEMORY) {
+        for_each_kmem_cache_node(s, node, n) {
-                struct kmem_cache_node *n = get_node(s, node);
                struct page *p;
-                if (n) {
+                list_for_each_entry(p, &n->partial, lru)
-                        list_for_each_entry(p, &n->partial, lru)
+                        p->slab_cache = s;
-                                p->slab_cache = s;
 #ifdef CONFIG_SLUB_DEBUG
-                        list_for_each_entry(p, &n->full, lru)
+                list_for_each_entry(p, &n->full, lru)
-                                p->slab_cache = s;
+                        p->slab_cache = s;
 #endif
-                }
        }
        list_add(&s->list, &slab_caches);
        return s;
@@ -3960,16 +3922,14 @@ static long validate_slab_cache(struct kmem_cache *s)
        unsigned long count = 0;
        unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
                                sizeof(unsigned long), GFP_KERNEL);
+        struct kmem_cache_node *n;
        if (!map)
                return -ENOMEM;
        flush_all(s);
-        for_each_node_state(node, N_NORMAL_MEMORY) {
+        for_each_kmem_cache_node(s, node, n)
-                struct kmem_cache_node *n = get_node(s, node);
                count += validate_slab_node(s, n, map);
-        }
        kfree(map);
        return count;
 }
@@ -4123,6 +4083,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
        int node;
        unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
                                     sizeof(unsigned long), GFP_KERNEL);
+        struct kmem_cache_node *n;
        if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
                                     GFP_TEMPORARY)) {
@@ -4132,8 +4093,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
        /* Push back cpu slabs */
        flush_all(s);
-        for_each_node_state(node, N_NORMAL_MEMORY) {
+        for_each_kmem_cache_node(s, node, n) {
-                struct kmem_cache_node *n = get_node(s, node);
                unsigned long flags;
                struct page *page;
@@ -4205,7 +4165,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
 #endif
 #ifdef SLUB_RESILIENCY_TEST
-static void resiliency_test(void)
+static void __init resiliency_test(void)
 {
        u8 *p;
@@ -4332,8 +4292,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
        get_online_mems();
 #ifdef CONFIG_SLUB_DEBUG
        if (flags & SO_ALL) {
-                for_each_node_state(node, N_NORMAL_MEMORY) {
+                struct kmem_cache_node *n;
-                        struct kmem_cache_node *n = get_node(s, node);
+                for_each_kmem_cache_node(s, node, n) {
                        if (flags & SO_TOTAL)
                                x = atomic_long_read(&n->total_objects);
@@ -4349,9 +4310,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
        } else
 #endif
        if (flags & SO_PARTIAL) {
-                for_each_node_state(node, N_NORMAL_MEMORY) {
+                struct kmem_cache_node *n;
-                        struct kmem_cache_node *n = get_node(s, node);
+                for_each_kmem_cache_node(s, node, n) {
                        if (flags & SO_TOTAL)
                                x = count_partial(n, count_total);
                        else if (flags & SO_OBJECTS)
@@ -4364,7 +4325,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
        }
        x = sprintf(buf, "%lu", total);
 #ifdef CONFIG_NUMA
-        for_each_node_state(node, N_NORMAL_MEMORY)
+        for (node = 0; node < nr_node_ids; node++)
                if (nodes[node])
                        x += sprintf(buf + x, " N%d=%lu",
                                        node, nodes[node]);
@@ -4378,16 +4339,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 static int any_slab_objects(struct kmem_cache *s)
 {
        int node;
+        struct kmem_cache_node *n;
-        for_each_online_node(node) {
+        for_each_kmem_cache_node(s, node, n)
-                struct kmem_cache_node *n = get_node(s, node);
-                if (!n)
-                        continue;
                if (atomic_long_read(&n->total_objects))
                        return 1;
-        }
        return 0;
 }
 #endif
@@ -4509,7 +4466,7 @@ SLAB_ATTR_RO(ctor);
 static ssize_t aliases_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%d\n", s->refcount - 1);
+        return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
 }
 SLAB_ATTR_RO(aliases);
@@ -5171,12 +5128,6 @@ static char *create_unique_id(struct kmem_cache *s)
                *p++ = '-';
        p += sprintf(p, "%07d", s->size);
-#ifdef CONFIG_MEMCG_KMEM
-        if (!is_root_cache(s))
-                p += sprintf(p, "-%08d",
-                                memcg_cache_id(s->memcg_params->memcg));
-#endif
        BUG_ON(p > name + ID_STR_LENGTH - 1);
        return name;
 }
@@ -5342,13 +5293,9 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
        unsigned long nr_objs = 0;
        unsigned long nr_free = 0;
        int node;
+        struct kmem_cache_node *n;
-        for_each_online_node(node) {
+        for_each_kmem_cache_node(s, node, n) {
-                struct kmem_cache_node *n = get_node(s, node);
-                if (!n)
-                        continue;
                nr_slabs += node_nr_slabs(n);
                nr_objs += node_nr_objs(n);
                nr_free += count_partial(n, count_free);
diff --git a/mm/swap.c b/mm/swap.c
index 9e8e3472248b..c789d01c9ec3 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -501,7 +501,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
                SetPageActive(page);
                lru += LRU_ACTIVE;
                add_page_to_lru_list(page, lruvec, lru);
-                trace_mm_lru_activate(page, page_to_pfn(page));
+                trace_mm_lru_activate(page);
                __count_vm_event(PGACTIVATE);
                update_page_reclaim_stat(lruvec, file, 1);
@@ -589,6 +589,9 @@ static void __lru_cache_activate_page(struct page *page)
 * inactive,unreferenced        ->      inactive,referenced
 * inactive,referenced          ->      active,unreferenced
 * active,unreferenced          ->      active,referenced
+ *
+ * When a newly allocated page is not yet visible, so safe for non-atomic ops,
+ * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
 */
 void mark_page_accessed(struct page *page)
 {
@@ -614,17 +617,6 @@ void mark_page_accessed(struct page *page)
 }
 EXPORT_SYMBOL(mark_page_accessed);
-/*
- * Used to mark_page_accessed(page) that is not visible yet and when it is
- * still safe to use non-atomic ops
- */
-void init_page_accessed(struct page *page)
-{
-        if (!PageReferenced(page))
-                __SetPageReferenced(page);
-}
-EXPORT_SYMBOL(init_page_accessed);
 static void __lru_cache_add(struct page *page)
 {
        struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
@@ -996,7 +988,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
        SetPageLRU(page);
        add_page_to_lru_list(page, lruvec, lru);
        update_page_reclaim_stat(lruvec, file, active);
-        trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
+        trace_mm_lru_insertion(page, lru);
 }
 /*
diff --git a/mm/util.c b/mm/util.c
index d5ea733c5082..7b6608df2ee8 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -16,9 +16,6 @@
 #include "internal.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
 /**
 * kstrdup - allocate space for and copy an existing string
 * @s: the string to duplicate
@@ -112,97 +109,6 @@ void *memdup_user(const void __user *src, size_t len)
 }
 EXPORT_SYMBOL(memdup_user);
-static __always_inline void *__do_krealloc(const void *p, size_t new_size,
-                                           gfp_t flags)
-{
-        void *ret;
-        size_t ks = 0;
-        if (p)
-                ks = ksize(p);
-        if (ks >= new_size)
-                return (void *)p;
-        ret = kmalloc_track_caller(new_size, flags);
-        if (ret && p)
-                memcpy(ret, p, ks);
-        return ret;
-}
-/**
- * __krealloc - like krealloc() but don't free @p.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * This function is like krealloc() except it never frees the originally
- * allocated buffer. Use this if you don't want to free the buffer immediately
- * like, for example, with RCU.
- */
-void *__krealloc(const void *p, size_t new_size, gfp_t flags)
-{
-        if (unlikely(!new_size))
-                return ZERO_SIZE_PTR;
-        return __do_krealloc(p, new_size, flags);
-}
-EXPORT_SYMBOL(__krealloc);
-/**
- * krealloc - reallocate memory. The contents will remain unchanged.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * The contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes.  If @p is %NULL, krealloc()
- * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
- * %NULL pointer, the object pointed to is freed.
- */
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
-{
-        void *ret;
-        if (unlikely(!new_size)) {
-                kfree(p);
-                return ZERO_SIZE_PTR;
-        }
-        ret = __do_krealloc(p, new_size, flags);
-        if (ret && p != ret)
-                kfree(p);
-        return ret;
-}
-EXPORT_SYMBOL(krealloc);
-/**
- * kzfree - like kfree but zero memory
- * @p: object to free memory of
- *
- * The memory of the object @p points to is zeroed before freed.
- * If @p is %NULL, kzfree() does nothing.
- *
- * Note: this function zeroes the whole allocated buffer which can be a good
- * deal bigger than the requested buffer size passed to kmalloc(). So be
- * careful when using this function in performance sensitive code.
- */
-void kzfree(const void *p)
-{
-        size_t ks;
-        void *mem = (void *)p;
-        if (unlikely(ZERO_OR_NULL_PTR(mem)))
-                return;
-        ks = ksize(mem);
-        memset(mem, 0, ks);
-        kfree(mem);
-}
-EXPORT_SYMBOL(kzfree);
 /*
 * strndup_user - duplicate an existing string from user space
 * @s: The string to duplicate
@@ -504,11 +410,3 @@ out_mm:
 out:
        return res;
 }
-/* Tracepoints definitions. */
-EXPORT_TRACEPOINT_SYMBOL(kmalloc);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kfree);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f64632b67196..2b0aa5486092 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(unmap_kernel_range);
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
 {
        unsigned long addr = (unsigned long)area->addr;
        unsigned long end = addr + get_vm_area_size(area);
        int err;
-        err = vmap_page_range(addr, end, prot, *pages);
+        err = vmap_page_range(addr, end, prot, pages);
-        if (err > 0) {
-                *pages += err;
-                err = 0;
-        }
-        return err;
+        return err > 0 ? 0 : err;
 }
 EXPORT_SYMBOL_GPL(map_vm_area);
@@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count,
        if (!area)
                return NULL;
-        if (map_vm_area(area, prot, &pages)) {
+        if (map_vm_area(area, prot, pages)) {
                vunmap(area->addr);
                return NULL;
        }
@@ -1566,7 +1562,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        const int order = 0;
        struct page **pages;
        unsigned int nr_pages, array_size, i;
-        gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+        const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+        const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
        nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
        array_size = (nr_pages * sizeof(struct page *));
@@ -1589,12 +1586,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        for (i = 0; i < area->nr_pages; i++) {
                struct page *page;
-                gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
                if (node == NUMA_NO_NODE)
-                        page = alloc_page(tmp_mask);
+                        page = alloc_page(alloc_mask);
                else
-                        page = alloc_pages_node(node, tmp_mask, order);
+                        page = alloc_pages_node(node, alloc_mask, order);
                if (unlikely(!page)) {
                        /* Successfully allocated i pages, free them in __vunmap() */
@@ -1602,9 +1598,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                        goto fail;
                }
                area->pages[i] = page;
+                if (gfp_mask & __GFP_WAIT)
+                        cond_resched();
        }
-        if (map_vm_area(area, prot, &pages))
+        if (map_vm_area(area, prot, pages))
                goto fail;
        return area->addr;
@@ -2690,14 +2688,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
        prev_end = VMALLOC_START;
-        spin_lock(&vmap_area_lock);
+        rcu_read_lock();
        if (list_empty(&vmap_area_list)) {
                vmi->largest_chunk = VMALLOC_TOTAL;
                goto out;
        }
-        list_for_each_entry(va, &vmap_area_list, list) {
+        list_for_each_entry_rcu(va, &vmap_area_list, list) {
                unsigned long addr = va->va_start;
                /*
@@ -2724,7 +2722,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
                vmi->largest_chunk = VMALLOC_END - prev_end;
 out:
-        spin_unlock(&vmap_area_lock);
+        rcu_read_unlock();
 }
 #endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0f16ffe8eb67..d2f65c856350 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -59,35 +59,20 @@
 #include <trace/events/vmscan.h>
 struct scan_control {
-        /* Incremented by the number of inactive pages that were scanned */
-        unsigned long nr_scanned;
-        /* Number of pages freed so far during a call to shrink_zones() */
-        unsigned long nr_reclaimed;
        /* How many pages shrink_list() should reclaim */
        unsigned long nr_to_reclaim;
-        unsigned long hibernation_mode;
        /* This context's GFP mask */
        gfp_t gfp_mask;
-        int may_writepage;
+        /* Allocation order */
-        /* Can mapped pages be reclaimed? */
-        int may_unmap;
-        /* Can pages be swapped as part of reclaim? */
-        int may_swap;
        int order;
-        /* Scan (total_size >> priority) pages at once */
+        /*
-        int priority;
+         * Nodemask of nodes allowed by the caller. If NULL, all nodes
+         * are scanned.
-        /* anon vs. file LRUs scanning "ratio" */
+         */
-        int swappiness;
+        nodemask_t      *nodemask;
        /*
         * The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
         */
        struct mem_cgroup *target_mem_cgroup;
-        /*
+        /* Scan (total_size >> priority) pages at once */
-         * Nodemask of nodes allowed by the caller. If NULL, all nodes
+        int priority;
-         * are scanned.
-         */
+        unsigned int may_writepage:1;
-        nodemask_t      *nodemask;
+        /* Can mapped pages be reclaimed? */
+        unsigned int may_unmap:1;
+        /* Can pages be swapped as part of reclaim? */
+        unsigned int may_swap:1;
+        unsigned int hibernation_mode:1;
+        /* One of the zones is ready for compaction */
+        unsigned int compaction_ready:1;
+        /* Incremented by the number of inactive pages that were scanned */
+        unsigned long nr_scanned;
+        /* Number of pages freed so far during a call to shrink_zones() */
+        unsigned long nr_reclaimed;
 };
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
 * From 0 .. 100.  Higher means more swappy.
 */
 int vm_swappiness = 60;
-unsigned long vm_total_pages;   /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
@@ -169,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
 bool zone_reclaimable(struct zone *zone)
 {
-        return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+        return zone_page_state(zone, NR_PAGES_SCANNED) <
+                zone_reclaimable_pages(zone) * 6;
 }
 static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -1503,7 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
        if (global_reclaim(sc)) {
-                zone->pages_scanned += nr_scanned;
+                __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
                if (current_is_kswapd())
                        __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
                else
@@ -1693,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
        nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
                                     &nr_scanned, sc, isolate_mode, lru);
        if (global_reclaim(sc))
-                zone->pages_scanned += nr_scanned;
+                __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
        reclaim_stat->recent_scanned[file] += nr_taken;
@@ -1750,7 +1756,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
         * Count referenced pages from currently used mappings as rotated,
         * even though only some of them are actually re-activated.  This
         * helps balance scan pressure between file and anonymous pages in
-         * get_scan_ratio.
+         * get_scan_count.
         */
        reclaim_stat->recent_rotated[file] += nr_rotated;
@@ -1865,8 +1871,8 @@ enum scan_balance {
 * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
 * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
 */
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
-                           unsigned long *nr)
+                           struct scan_control *sc, unsigned long *nr)
 {
        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
        u64 fraction[2];
@@ -1909,7 +1915,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * using the memory controller's swap limit feature would be
         * too expensive.
         */
-        if (!global_reclaim(sc) && !sc->swappiness) {
+        if (!global_reclaim(sc) && !swappiness) {
                scan_balance = SCAN_FILE;
                goto out;
        }
@@ -1919,16 +1925,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * system is close to OOM, scan both anon and file equally
         * (unless the swappiness setting disagrees with swapping).
         */
-        if (!sc->priority && sc->swappiness) {
+        if (!sc->priority && swappiness) {
                scan_balance = SCAN_EQUAL;
                goto out;
        }
-        anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
-                get_lru_size(lruvec, LRU_INACTIVE_ANON);
-        file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
-                get_lru_size(lruvec, LRU_INACTIVE_FILE);
        /*
         * Prevent the reclaimer from falling into the cache trap: as
         * cache pages start out inactive, every cache fault will tip
@@ -1939,9 +1940,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * anon pages.  Try to detect this based on file LRU size.
         */
        if (global_reclaim(sc)) {
-                unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
+                unsigned long zonefile;
+                unsigned long zonefree;
-                if (unlikely(file + free <= high_wmark_pages(zone))) {
+                zonefree = zone_page_state(zone, NR_FREE_PAGES);
+                zonefile = zone_page_state(zone, NR_ACTIVE_FILE) +
+                           zone_page_state(zone, NR_INACTIVE_FILE);
+                if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) {
                        scan_balance = SCAN_ANON;
                        goto out;
                }
@@ -1962,7 +1968,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * With swappiness at 100, anonymous and file have the same priority.
         * This scanning priority is essentially the inverse of IO cost.
         */
-        anon_prio = sc->swappiness;
+        anon_prio = swappiness;
        file_prio = 200 - anon_prio;
        /*
@@ -1976,6 +1982,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         *
         * anon in [0], file in [1]
         */
+        anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+                get_lru_size(lruvec, LRU_INACTIVE_ANON);
+        file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+                get_lru_size(lruvec, LRU_INACTIVE_FILE);
        spin_lock_irq(&zone->lru_lock);
        if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
                reclaim_stat->recent_scanned[0] /= 2;
@@ -2052,7 +2064,8 @@ out:
 /*
 * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
 */
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+                          struct scan_control *sc)
 {
        unsigned long nr[NR_LRU_LISTS];
        unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2076,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
        struct blk_plug plug;
        bool scan_adjusted;
-        get_scan_count(lruvec, sc, nr);
+        get_scan_count(lruvec, swappiness, sc, nr);
        /* Record the original scan target for proportional adjustments later */
        memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2254,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
        }
 }
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 {
        unsigned long nr_reclaimed, nr_scanned;
+        bool reclaimable = false;
        do {
                struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2273,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                memcg = mem_cgroup_iter(root, NULL, &reclaim);
                do {
                        struct lruvec *lruvec;
+                        int swappiness;
                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+                        swappiness = mem_cgroup_swappiness(memcg);
-                        sc->swappiness = mem_cgroup_swappiness(memcg);
+                        shrink_lruvec(lruvec, swappiness, sc);
-                        shrink_lruvec(lruvec, sc);
                        /*
                         * Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2302,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                           sc->nr_scanned - nr_scanned,
                           sc->nr_reclaimed - nr_reclaimed);
+                if (sc->nr_reclaimed - nr_reclaimed)
+                        reclaimable = true;
        } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
                                         sc->nr_scanned - nr_scanned, sc));
+        return reclaimable;
 }
 /* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
 {
        unsigned long balance_gap, watermark;
        bool watermark_ok;
-        /* Do not consider compaction for orders reclaim is meant to satisfy */
-        if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
-                return false;
        /*
         * Compaction takes time to run and there are potentially other
         * callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2325,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
         */
        balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
                        zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
-        watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+        watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
        watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
        /*
         * If compaction is deferred, reclaim up to a point where
         * compaction will have a chance of success when re-enabled
         */
-        if (compaction_deferred(zone, sc->order))
+        if (compaction_deferred(zone, order))
                return watermark_ok;
        /* If compaction is not ready to start, keep reclaiming */
-        if (!compaction_suitable(zone, sc->order))
+        if (!compaction_suitable(zone, order))
                return false;
        return watermark_ok;
@@ -2342,10 +2358,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 * If a zone is deemed to be full of pinned pages then just give it a light
 * scan then give up on it.
 *
- * This function returns true if a zone is being reclaimed for a costly
+ * Returns true if a zone was reclaimable.
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
 */
 static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 {
@@ -2354,13 +2367,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        unsigned long nr_soft_reclaimed;
        unsigned long nr_soft_scanned;
        unsigned long lru_pages = 0;
-        bool aborted_reclaim = false;
        struct reclaim_state *reclaim_state = current->reclaim_state;
        gfp_t orig_mask;
        struct shrink_control shrink = {
                .gfp_mask = sc->gfp_mask,
        };
        enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+        bool reclaimable = false;
        /*
         * If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2404,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                        if (sc->priority != DEF_PRIORITY &&
                            !zone_reclaimable(zone))
                                continue;       /* Let kswapd poll it */
-                        if (IS_ENABLED(CONFIG_COMPACTION)) {
-                                /*
+                        /*
-                                 * If we already have plenty of memory free for
+                         * If we already have plenty of memory free for
-                                 * compaction in this zone, don't free any more.
+                         * compaction in this zone, don't free any more.
-                                 * Even though compaction is invoked for any
+                         * Even though compaction is invoked for any
-                                 * non-zero order, only frequent costly order
+                         * non-zero order, only frequent costly order
-                                 * reclamation is disruptive enough to become a
+                         * reclamation is disruptive enough to become a
-                                 * noticeable problem, like transparent huge
+                         * noticeable problem, like transparent huge
-                                 * page allocations.
+                         * page allocations.
-                                 */
+                         */
-                                if ((zonelist_zone_idx(z) <= requested_highidx)
+                        if (IS_ENABLED(CONFIG_COMPACTION) &&
-                                    && compaction_ready(zone, sc)) {
+                            sc->order > PAGE_ALLOC_COSTLY_ORDER &&
-                                        aborted_reclaim = true;
+                            zonelist_zone_idx(z) <= requested_highidx &&
-                                        continue;
+                            compaction_ready(zone, sc->order)) {
-                                }
+                                sc->compaction_ready = true;
+                                continue;
                        }
                        /*
                         * This steals pages from memory cgroups over softlimit
                         * and returns the number of reclaimed pages and
@@ -2419,10 +2434,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                                &nr_soft_scanned);
                        sc->nr_reclaimed += nr_soft_reclaimed;
                        sc->nr_scanned += nr_soft_scanned;
+                        if (nr_soft_reclaimed)
+                                reclaimable = true;
                        /* need some check for avoid more shrink_zone() */
                }
-                shrink_zone(zone, sc);
+                if (shrink_zone(zone, sc))
+                        reclaimable = true;
+                if (global_reclaim(sc) &&
+                    !reclaimable && zone_reclaimable(zone))
+                        reclaimable = true;
        }
        /*
@@ -2445,27 +2467,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         */
        sc->gfp_mask = orig_mask;
-        return aborted_reclaim;
+        return reclaimable;
-}
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
-                struct scan_control *sc)
-{
-        struct zoneref *z;
-        struct zone *zone;
-        for_each_zone_zonelist_nodemask(zone, z, zonelist,
-                        gfp_zone(sc->gfp_mask), sc->nodemask) {
-                if (!populated_zone(zone))
-                        continue;
-                if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-                        continue;
-                if (zone_reclaimable(zone))
-                        return false;
-        }
-        return true;
 }
 /*
@@ -2489,7 +2491,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 {
        unsigned long total_scanned = 0;
        unsigned long writeback_threshold;
-        bool aborted_reclaim;
+        bool zones_reclaimable;
        delayacct_freepages_start();
@@ -2500,11 +2502,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
                                sc->priority);
                sc->nr_scanned = 0;
-                aborted_reclaim = shrink_zones(zonelist, sc);
+                zones_reclaimable = shrink_zones(zonelist, sc);
                total_scanned += sc->nr_scanned;
                if (sc->nr_reclaimed >= sc->nr_to_reclaim)
-                        goto out;
+                        break;
+                if (sc->compaction_ready)
+                        break;
                /*
                 * If we're getting trouble reclaiming, start doing
@@ -2526,28 +2531,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                                WB_REASON_TRY_TO_FREE_PAGES);
                        sc->may_writepage = 1;
                }
-        } while (--sc->priority >= 0 && !aborted_reclaim);
+        } while (--sc->priority >= 0);
-out:
        delayacct_freepages_end();
        if (sc->nr_reclaimed)
                return sc->nr_reclaimed;
-        /*
-         * As hibernation is going on, kswapd is freezed so that it can't mark
-         * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
-         * check.
-         */
-        if (oom_killer_disabled)
-                return 0;
        /* Aborted reclaim to try compaction? don't OOM, then */
-        if (aborted_reclaim)
+        if (sc->compaction_ready)
                return 1;
-        /* top priority shrink_zones still had more to do? don't OOM, then */
+        /* Any of the zones still reclaimable?  Don't OOM. */
-        if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+        if (zones_reclaimable)
                return 1;
        return 0;
@@ -2684,15 +2680,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 {
        unsigned long nr_reclaimed;
        struct scan_control sc = {
+                .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
+                .order = order,
+                .nodemask = nodemask,
+                .priority = DEF_PRIORITY,
                .may_writepage = !laptop_mode,
-                .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .may_unmap = 1,
                .may_swap = 1,
-                .order = order,
-                .priority = DEF_PRIORITY,
-                .target_mem_cgroup = NULL,
-                .nodemask = nodemask,
        };
        /*
@@ -2722,17 +2717,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
                                                unsigned long *nr_scanned)
 {
        struct scan_control sc = {
-                .nr_scanned = 0,
                .nr_to_reclaim = SWAP_CLUSTER_MAX,
+                .target_mem_cgroup = memcg,
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
                .may_swap = !noswap,
-                .order = 0,
-                .priority = 0,
-                .swappiness = mem_cgroup_swappiness(memcg),
-                .target_mem_cgroup = memcg,
        };
        struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+        int swappiness = mem_cgroup_swappiness(memcg);
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2740,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
         * will pick up pages from other mem cgroup's as well. We hack
         * the priority and make it zero.
         */
-        shrink_lruvec(lruvec, &sc);
+        shrink_lruvec(lruvec, swappiness, &sc);
        trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2764,16 +2756,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
        unsigned long nr_reclaimed;
        int nid;
        struct scan_control sc = {
-                .may_writepage = !laptop_mode,
-                .may_unmap = 1,
-                .may_swap = !noswap,
                .nr_to_reclaim = SWAP_CLUSTER_MAX,
-                .order = 0,
-                .priority = DEF_PRIORITY,
-                .target_mem_cgroup = memcg,
-                .nodemask = NULL, /* we don't care the placement */
                .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+                .target_mem_cgroup = memcg,
+                .priority = DEF_PRIORITY,
+                .may_writepage = !laptop_mode,
+                .may_unmap = 1,
+                .may_swap = !noswap,
        };
        /*
@@ -3031,12 +3021,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
        unsigned long nr_soft_scanned;
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
+                .order = order,
                .priority = DEF_PRIORITY,
+                .may_writepage = !laptop_mode,
                .may_unmap = 1,
                .may_swap = 1,
-                .may_writepage = !laptop_mode,
-                .order = order,
-                .target_mem_cgroup = NULL,
        };
        count_vm_event(PAGEOUTRUN);
@@ -3417,14 +3406,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 {
        struct reclaim_state reclaim_state;
        struct scan_control sc = {
+                .nr_to_reclaim = nr_to_reclaim,
                .gfp_mask = GFP_HIGHUSER_MOVABLE,
-                .may_swap = 1,
+                .priority = DEF_PRIORITY,
-                .may_unmap = 1,
                .may_writepage = 1,
-                .nr_to_reclaim = nr_to_reclaim,
+                .may_unmap = 1,
+                .may_swap = 1,
                .hibernation_mode = 1,
-                .order = 0,
-                .priority = DEF_PRIORITY,
        };
        struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
        struct task_struct *p = current;
@@ -3604,13 +3592,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
        struct task_struct *p = current;
        struct reclaim_state reclaim_state;
        struct scan_control sc = {
-                .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
-                .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
-                .may_swap = 1,
                .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
                .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
                .order = order,
                .priority = ZONE_RECLAIM_PRIORITY,
+                .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+                .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+                .may_swap = 1,
        };
        struct shrink_control shrink = {
                .gfp_mask = sc.gfp_mask,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b37bd49bfd55..e9ab104b956f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -200,7 +200,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
                        continue;
                threshold = (*calculate_pressure)(zone);
-                for_each_possible_cpu(cpu)
+                for_each_online_cpu(cpu)
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
                                                        = threshold;
        }
@@ -763,6 +763,7 @@ const char * const vmstat_text[] = {
        "nr_shmem",
        "nr_dirtied",
        "nr_written",
+        "nr_pages_scanned",
 #ifdef CONFIG_NUMA
        "numa_hit",
@@ -1067,7 +1068,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   min_wmark_pages(zone),
                   low_wmark_pages(zone),
                   high_wmark_pages(zone),
-                   zone->pages_scanned,
+                   zone_page_state(zone, NR_PAGES_SCANNED),
                   zone->spanned_pages,
                   zone->present_pages,
                   zone->managed_pages);
@@ -1077,10 +1078,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                                zone_page_state(zone, i));
        seq_printf(m,
-                   "\n        protection: (%lu",
+                   "\n        protection: (%ld",
                   zone->lowmem_reserve[0]);
        for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
-                seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+                seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
        seq_printf(m,
                   ")"
                   "\n  pagesets");
diff --git a/mm/zbud.c b/mm/zbud.c
index 01df13a7e2e1..a05790b1915e 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -51,6 +51,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/zbud.h>
+#include <linux/zpool.h>
 /*****************
 * Structures
@@ -113,6 +114,90 @@ struct zbud_header {
 };
 /*****************
+ * zpool
+ ****************/
+#ifdef CONFIG_ZPOOL
+static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
+{
+        return zpool_evict(pool, handle);
+}
+static struct zbud_ops zbud_zpool_ops = {
+        .evict =        zbud_zpool_evict
+};
+static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+        return zbud_create_pool(gfp, &zbud_zpool_ops);
+}
+static void zbud_zpool_destroy(void *pool)
+{
+        zbud_destroy_pool(pool);
+}
+static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+                        unsigned long *handle)
+{
+        return zbud_alloc(pool, size, gfp, handle);
+}
+static void zbud_zpool_free(void *pool, unsigned long handle)
+{
+        zbud_free(pool, handle);
+}
+static int zbud_zpool_shrink(void *pool, unsigned int pages,
+                        unsigned int *reclaimed)
+{
+        unsigned int total = 0;
+        int ret = -EINVAL;
+        while (total < pages) {
+                ret = zbud_reclaim_page(pool, 8);
+                if (ret < 0)
+                        break;
+                total++;
+        }
+        if (reclaimed)
+                *reclaimed = total;
+        return ret;
+}
+static void *zbud_zpool_map(void *pool, unsigned long handle,
+                        enum zpool_mapmode mm)
+{
+        return zbud_map(pool, handle);
+}
+static void zbud_zpool_unmap(void *pool, unsigned long handle)
+{
+        zbud_unmap(pool, handle);
+}
+static u64 zbud_zpool_total_size(void *pool)
+{
+        return zbud_get_pool_size(pool) * PAGE_SIZE;
+}
+static struct zpool_driver zbud_zpool_driver = {
+        .type =         "zbud",
+        .owner =        THIS_MODULE,
+        .create =       zbud_zpool_create,
+        .destroy =      zbud_zpool_destroy,
+        .malloc =       zbud_zpool_malloc,
+        .free =         zbud_zpool_free,
+        .shrink =       zbud_zpool_shrink,
+        .map =          zbud_zpool_map,
+        .unmap =        zbud_zpool_unmap,
+        .total_size =   zbud_zpool_total_size,
+};
+#endif /* CONFIG_ZPOOL */
+/*****************
 * Helpers
 *****************/
 /* Just to make the code easier to read */
@@ -122,7 +207,7 @@ enum buddy {
 };
 /* Converts an allocation size in bytes to size in zbud chunks */
-static int size_to_chunks(int size)
+static int size_to_chunks(size_t size)
 {
        return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
 }
@@ -247,7 +332,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
 * a new page.
 */
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
                        unsigned long *handle)
 {
        int chunks, i, freechunks;
@@ -511,11 +596,20 @@ static int __init init_zbud(void)
        /* Make sure the zbud header will fit in one chunk */
        BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
        pr_info("loaded\n");
+#ifdef CONFIG_ZPOOL
+        zpool_register_driver(&zbud_zpool_driver);
+#endif
        return 0;
 }
 static void __exit exit_zbud(void)
 {
+#ifdef CONFIG_ZPOOL
+        zpool_unregister_driver(&zbud_zpool_driver);
+#endif
        pr_info("unloaded\n");
 }
diff --git a/mm/zpool.c b/mm/zpool.c
new file mode 100644
index 000000000000..e40612a1df00
--- /dev/null
+++ b/mm/zpool.c
@@ -0,0 +1,364 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for memory storage pool implementations.
+ * Typically, this is used to store compressed memory.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/zpool.h>
+struct zpool {
+        char *type;
+        struct zpool_driver *driver;
+        void *pool;
+        struct zpool_ops *ops;
+        struct list_head list;
+};
+static LIST_HEAD(drivers_head);
+static DEFINE_SPINLOCK(drivers_lock);
+static LIST_HEAD(pools_head);
+static DEFINE_SPINLOCK(pools_lock);
+/**
+ * zpool_register_driver() - register a zpool implementation.
+ * @driver:     driver to register
+ */
+void zpool_register_driver(struct zpool_driver *driver)
+{
+        spin_lock(&drivers_lock);
+        atomic_set(&driver->refcount, 0);
+        list_add(&driver->list, &drivers_head);
+        spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_register_driver);
+/**
+ * zpool_unregister_driver() - unregister a zpool implementation.
+ * @driver:     driver to unregister.
+ *
+ * Module usage counting is used to prevent using a driver
+ * while/after unloading, so if this is called from module
+ * exit function, this should never fail; if called from
+ * other than the module exit function, and this returns
+ * failure, the driver is in use and must remain available.
+ */
+int zpool_unregister_driver(struct zpool_driver *driver)
+{
+        int ret = 0, refcount;
+        spin_lock(&drivers_lock);
+        refcount = atomic_read(&driver->refcount);
+        WARN_ON(refcount < 0);
+        if (refcount > 0)
+                ret = -EBUSY;
+        else
+                list_del(&driver->list);
+        spin_unlock(&drivers_lock);
+        return ret;
+}
+EXPORT_SYMBOL(zpool_unregister_driver);
+/**
+ * zpool_evict() - evict callback from a zpool implementation.
+ * @pool:       pool to evict from.
+ * @handle:     handle to evict.
+ *
+ * This can be used by zpool implementations to call the
+ * user's evict zpool_ops struct evict callback.
+ */
+int zpool_evict(void *pool, unsigned long handle)
+{
+        struct zpool *zpool;
+        spin_lock(&pools_lock);
+        list_for_each_entry(zpool, &pools_head, list) {
+                if (zpool->pool == pool) {
+                        spin_unlock(&pools_lock);
+                        if (!zpool->ops || !zpool->ops->evict)
+                                return -EINVAL;
+                        return zpool->ops->evict(zpool, handle);
+                }
+        }
+        spin_unlock(&pools_lock);
+        return -ENOENT;
+}
+EXPORT_SYMBOL(zpool_evict);
+static struct zpool_driver *zpool_get_driver(char *type)
+{
+        struct zpool_driver *driver;
+        spin_lock(&drivers_lock);
+        list_for_each_entry(driver, &drivers_head, list) {
+                if (!strcmp(driver->type, type)) {
+                        bool got = try_module_get(driver->owner);
+                        if (got)
+                                atomic_inc(&driver->refcount);
+                        spin_unlock(&drivers_lock);
+                        return got ? driver : NULL;
+                }
+        }
+        spin_unlock(&drivers_lock);
+        return NULL;
+}
+static void zpool_put_driver(struct zpool_driver *driver)
+{
+        atomic_dec(&driver->refcount);
+        module_put(driver->owner);
+}
+/**
+ * zpool_create_pool() - Create a new zpool
+ * @type        The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @gfp         The GFP flags to use when allocating the pool.
+ * @ops         The optional ops callback.
+ *
+ * This creates a new zpool of the specified type.  The gfp flags will be
+ * used when allocating memory, if the implementation supports it.  If the
+ * ops param is NULL, then the created zpool will not be shrinkable.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: New zpool on success, NULL on failure.
+ */
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
+{
+        struct zpool_driver *driver;
+        struct zpool *zpool;
+        pr_info("creating pool type %s\n", type);
+        driver = zpool_get_driver(type);
+        if (!driver) {
+                request_module(type);
+                driver = zpool_get_driver(type);
+        }
+        if (!driver) {
+                pr_err("no driver for type %s\n", type);
+                return NULL;
+        }
+        zpool = kmalloc(sizeof(*zpool), gfp);
+        if (!zpool) {
+                pr_err("couldn't create zpool - out of memory\n");
+                zpool_put_driver(driver);
+                return NULL;
+        }
+        zpool->type = driver->type;
+        zpool->driver = driver;
+        zpool->pool = driver->create(gfp, ops);
+        zpool->ops = ops;
+        if (!zpool->pool) {
+                pr_err("couldn't create %s pool\n", type);
+                zpool_put_driver(driver);
+                kfree(zpool);
+                return NULL;
+        }
+        pr_info("created %s pool\n", type);
+        spin_lock(&pools_lock);
+        list_add(&zpool->list, &pools_head);
+        spin_unlock(&pools_lock);
+        return zpool;
+}
+/**
+ * zpool_destroy_pool() - Destroy a zpool
+ * @pool        The zpool to destroy.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when destroying different pools.  The same
+ * pool should only be destroyed once, and should not be used
+ * after it is destroyed.
+ *
+ * This destroys an existing zpool.  The zpool should not be in use.
+ */
+void zpool_destroy_pool(struct zpool *zpool)
+{
+        pr_info("destroying pool type %s\n", zpool->type);
+        spin_lock(&pools_lock);
+        list_del(&zpool->list);
+        spin_unlock(&pools_lock);
+        zpool->driver->destroy(zpool->pool);
+        zpool_put_driver(zpool->driver);
+        kfree(zpool);
+}
+/**
+ * zpool_get_type() - Get the type of the zpool
+ * @pool        The zpool to check
+ *
+ * This returns the type of the pool.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: The type of zpool.
+ */
+char *zpool_get_type(struct zpool *zpool)
+{
+        return zpool->type;
+}
+/**
+ * zpool_malloc() - Allocate memory
+ * @pool        The zpool to allocate from.
+ * @size        The amount of memory to allocate.
+ * @gfp         The GFP flags to use when allocating memory.
+ * @handle      Pointer to the handle to set
+ *
+ * This allocates the requested amount of memory from the pool.
+ * The gfp flags will be used when allocating memory, if the
+ * implementation supports it.  The provided @handle will be
+ * set to the allocated object handle.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error.
+ */
+int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
+                        unsigned long *handle)
+{
+        return zpool->driver->malloc(zpool->pool, size, gfp, handle);
+}
+/**
+ * zpool_free() - Free previously allocated memory
+ * @pool        The zpool that allocated the memory.
+ * @handle      The handle to the memory to free.
+ *
+ * This frees previously allocated memory.  This does not guarantee
+ * that the pool will actually free memory, only that the memory
+ * in the pool will become available for use by the pool.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when freeing different handles.  The same
+ * handle should only be freed once, and should not be used
+ * after freeing.
+ */
+void zpool_free(struct zpool *zpool, unsigned long handle)
+{
+        zpool->driver->free(zpool->pool, handle);
+}
+/**
+ * zpool_shrink() - Shrink the pool size
+ * @pool        The zpool to shrink.
+ * @pages       The number of pages to shrink the pool.
+ * @reclaimed   The number of pages successfully evicted.
+ *
+ * This attempts to shrink the actual memory size of the pool
+ * by evicting currently used handle(s).  If the pool was
+ * created with no zpool_ops, or the evict call fails for any
+ * of the handles, this will fail.  If non-NULL, the @reclaimed
+ * parameter will be set to the number of pages reclaimed,
+ * which may be more than the number of pages requested.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error/failure.
+ */
+int zpool_shrink(struct zpool *zpool, unsigned int pages,
+                        unsigned int *reclaimed)
+{
+        return zpool->driver->shrink(zpool->pool, pages, reclaimed);
+}
+/**
+ * zpool_map_handle() - Map a previously allocated handle into memory
+ * @pool        The zpool that the handle was allocated from
+ * @handle      The handle to map
+ * @mm          How the memory should be mapped
+ *
+ * This maps a previously allocated handle into memory.  The @mm
+ * param indicates to the implementation how the memory will be
+ * used, i.e. read-only, write-only, read-write.  If the
+ * implementation does not support it, the memory will be treated
+ * as read-write.
+ *
+ * This may hold locks, disable interrupts, and/or preemption,
+ * and the zpool_unmap_handle() must be called to undo those
+ * actions.  The code that uses the mapped handle should complete
+ * its operatons on the mapped handle memory quickly and unmap
+ * as soon as possible.  As the implementation may use per-cpu
+ * data, multiple handles should not be mapped concurrently on
+ * any cpu.
+ *
+ * Returns: A pointer to the handle's mapped memory area.
+ */
+void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
+                        enum zpool_mapmode mapmode)
+{
+        return zpool->driver->map(zpool->pool, handle, mapmode);
+}
+/**
+ * zpool_unmap_handle() - Unmap a previously mapped handle
+ * @pool        The zpool that the handle was allocated from
+ * @handle      The handle to unmap
+ *
+ * This unmaps a previously mapped handle.  Any locks or other
+ * actions that the implementation took in zpool_map_handle()
+ * will be undone here.  The memory area returned from
+ * zpool_map_handle() should no longer be used after this.
+ */
+void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
+{
+        zpool->driver->unmap(zpool->pool, handle);
+}
+/**
+ * zpool_get_total_size() - The total size of the pool
+ * @pool        The zpool to check
+ *
+ * This returns the total size in bytes of the pool.
+ *
+ * Returns: Total size of the zpool in bytes.
+ */
+u64 zpool_get_total_size(struct zpool *zpool)
+{
+        return zpool->driver->total_size(zpool->pool);
+}
+static int __init init_zpool(void)
+{
+        pr_info("loaded\n");
+        return 0;
+}
+static void __exit exit_zpool(void)
+{
+        pr_info("unloaded\n");
+}
+module_init(init_zpool);
+module_exit(exit_zpool);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index fe78189624cf..4e2fc83cb394 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -92,6 +92,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/zsmalloc.h>
+#include <linux/zpool.h>
 /*
 * This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -240,6 +241,81 @@ struct mapping_area {
        enum zs_mapmode vm_mm; /* mapping mode */
 };
+/* zpool driver */
+#ifdef CONFIG_ZPOOL
+static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+        return zs_create_pool(gfp);
+}
+static void zs_zpool_destroy(void *pool)
+{
+        zs_destroy_pool(pool);
+}
+static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+                        unsigned long *handle)
+{
+        *handle = zs_malloc(pool, size);
+        return *handle ? 0 : -1;
+}
+static void zs_zpool_free(void *pool, unsigned long handle)
+{
+        zs_free(pool, handle);
+}
+static int zs_zpool_shrink(void *pool, unsigned int pages,
+                        unsigned int *reclaimed)
+{
+        return -EINVAL;
+}
+static void *zs_zpool_map(void *pool, unsigned long handle,
+                        enum zpool_mapmode mm)
+{
+        enum zs_mapmode zs_mm;
+        switch (mm) {
+        case ZPOOL_MM_RO:
+                zs_mm = ZS_MM_RO;
+                break;
+        case ZPOOL_MM_WO:
+                zs_mm = ZS_MM_WO;
+                break;
+        case ZPOOL_MM_RW: /* fallthru */
+        default:
+                zs_mm = ZS_MM_RW;
+                break;
+        }
+        return zs_map_object(pool, handle, zs_mm);
+}
+static void zs_zpool_unmap(void *pool, unsigned long handle)
+{
+        zs_unmap_object(pool, handle);
+}
+static u64 zs_zpool_total_size(void *pool)
+{
+        return zs_get_total_size_bytes(pool);
+}
+static struct zpool_driver zs_zpool_driver = {
+        .type =         "zsmalloc",
+        .owner =        THIS_MODULE,
+        .create =       zs_zpool_create,
+        .destroy =      zs_zpool_destroy,
+        .malloc =       zs_zpool_malloc,
+        .free =         zs_zpool_free,
+        .shrink =       zs_zpool_shrink,
+        .map =          zs_zpool_map,
+        .unmap =        zs_zpool_unmap,
+        .total_size =   zs_zpool_total_size,
+};
+#endif /* CONFIG_ZPOOL */
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
@@ -690,7 +766,7 @@ static inline void __zs_cpu_down(struct mapping_area *area)
 static inline void *__zs_map_object(struct mapping_area *area,
                                struct page *pages[2], int off, int size)
 {
-        BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+        BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
        area->vm_addr = area->vm->addr;
        return area->vm_addr + off;
 }
@@ -814,6 +890,10 @@ static void zs_exit(void)
 {
        int cpu;
+#ifdef CONFIG_ZPOOL
+        zpool_unregister_driver(&zs_zpool_driver);
+#endif
        cpu_notifier_register_begin();
        for_each_online_cpu(cpu)
@@ -840,6 +920,10 @@ static int zs_init(void)
        cpu_notifier_register_done();
+#ifdef CONFIG_ZPOOL
+        zpool_register_driver(&zs_zpool_driver);
+#endif
        return 0;
 fail:
        zs_exit();
diff --git a/mm/zswap.c b/mm/zswap.c
index 008388fe7b0f..032c21eeab2b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -34,7 +34,7 @@
 #include <linux/swap.h>
 #include <linux/crypto.h>
 #include <linux/mempool.h>
-#include <linux/zbud.h>
+#include <linux/zpool.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
@@ -45,8 +45,8 @@
 /*********************************
 * statistics
 **********************************/
-/* Number of memory pages used by the compressed pool */
+/* Total bytes used by the compressed storage */
-static u64 zswap_pool_pages;
+static u64 zswap_pool_total_size;
 /* The number of compressed pages currently stored in zswap */
 static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
@@ -89,8 +89,13 @@ static unsigned int zswap_max_pool_percent = 20;
 module_param_named(max_pool_percent,
                        zswap_max_pool_percent, uint, 0644);
-/* zbud_pool is shared by all of zswap backend  */
+/* Compressed storage to use */
-static struct zbud_pool *zswap_pool;
+#define ZSWAP_ZPOOL_DEFAULT "zbud"
+static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+module_param_named(zpool, zswap_zpool_type, charp, 0444);
+/* zpool is shared by all of zswap backend  */
+static struct zpool *zswap_pool;
 /*********************************
 * compression functions
@@ -168,7 +173,7 @@ static void zswap_comp_exit(void)
 *            be held while changing the refcount.  Since the lock must
 *            be held, there is no reason to also make refcount atomic.
 * offset - the swap offset for the entry.  Index into the red-black tree.
- * handle - zbud allocation handle that stores the compressed page data
+ * handle - zpool allocation handle that stores the compressed page data
 * length - the length in bytes of the compressed page data.  Needed during
 *          decompression
 */
@@ -284,15 +289,15 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
 }
 /*
- * Carries out the common pattern of freeing and entry's zbud allocation,
+ * Carries out the common pattern of freeing and entry's zpool allocation,
 * freeing the entry itself, and decrementing the number of stored pages.
 */
 static void zswap_free_entry(struct zswap_entry *entry)
 {
-        zbud_free(zswap_pool, entry->handle);
+        zpool_free(zswap_pool, entry->handle);
        zswap_entry_cache_free(entry);
        atomic_dec(&zswap_stored_pages);
-        zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+        zswap_pool_total_size = zpool_get_total_size(zswap_pool);
 }
 /* caller must hold the tree lock */
@@ -409,7 +414,7 @@ cleanup:
 static bool zswap_is_full(void)
 {
        return totalram_pages * zswap_max_pool_percent / 100 <
-                zswap_pool_pages;
+                DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
 }
 /*********************************
@@ -525,7 +530,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
 * the swap cache, the compressed version stored by zswap can be
 * freed.
 */
-static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
+static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 {
        struct zswap_header *zhdr;
        swp_entry_t swpentry;
@@ -541,9 +546,9 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
        };
        /* extract swpentry from data */
-        zhdr = zbud_map(pool, handle);
+        zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
        swpentry = zhdr->swpentry; /* here */
-        zbud_unmap(pool, handle);
+        zpool_unmap_handle(pool, handle);
        tree = zswap_trees[swp_type(swpentry)];
        offset = swp_offset(swpentry);
@@ -573,13 +578,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
        case ZSWAP_SWAPCACHE_NEW: /* page is locked */
                /* decompress */
                dlen = PAGE_SIZE;
-                src = (u8 *)zbud_map(zswap_pool, entry->handle) +
+                src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
-                        sizeof(struct zswap_header);
+                                ZPOOL_MM_RO) + sizeof(struct zswap_header);
                dst = kmap_atomic(page);
                ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
                                entry->length, dst, &dlen);
                kunmap_atomic(dst);
-                zbud_unmap(zswap_pool, entry->handle);
+                zpool_unmap_handle(zswap_pool, entry->handle);
                BUG_ON(ret);
                BUG_ON(dlen != PAGE_SIZE);
@@ -652,7 +657,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        /* reclaim space if needed */
        if (zswap_is_full()) {
                zswap_pool_limit_hit++;
-                if (zbud_reclaim_page(zswap_pool, 8)) {
+                if (zpool_shrink(zswap_pool, 1, NULL)) {
                        zswap_reject_reclaim_fail++;
                        ret = -ENOMEM;
                        goto reject;
@@ -679,7 +684,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        /* store */
        len = dlen + sizeof(struct zswap_header);
-        ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
+        ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
                &handle);
        if (ret == -ENOSPC) {
                zswap_reject_compress_poor++;
@@ -689,11 +694,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
                zswap_reject_alloc_fail++;
                goto freepage;
        }
-        zhdr = zbud_map(zswap_pool, handle);
+        zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
        zhdr->swpentry = swp_entry(type, offset);
        buf = (u8 *)(zhdr + 1);
        memcpy(buf, dst, dlen);
-        zbud_unmap(zswap_pool, handle);
+        zpool_unmap_handle(zswap_pool, handle);
        put_cpu_var(zswap_dstmem);
        /* populate entry */
@@ -716,7 +721,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        /* update stats */
        atomic_inc(&zswap_stored_pages);
-        zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+        zswap_pool_total_size = zpool_get_total_size(zswap_pool);
        return 0;
@@ -752,13 +757,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
        /* decompress */
        dlen = PAGE_SIZE;
-        src = (u8 *)zbud_map(zswap_pool, entry->handle) +
+        src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
-                        sizeof(struct zswap_header);
+                        ZPOOL_MM_RO) + sizeof(struct zswap_header);
        dst = kmap_atomic(page);
        ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
                dst, &dlen);
        kunmap_atomic(dst);
-        zbud_unmap(zswap_pool, entry->handle);
+        zpool_unmap_handle(zswap_pool, entry->handle);
        BUG_ON(ret);
        spin_lock(&tree->lock);
@@ -811,7 +816,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
        zswap_trees[type] = NULL;
 }
-static struct zbud_ops zswap_zbud_ops = {
+static struct zpool_ops zswap_zpool_ops = {
        .evict = zswap_writeback_entry
 };
@@ -869,8 +874,8 @@ static int __init zswap_debugfs_init(void)
                        zswap_debugfs_root, &zswap_written_back_pages);
        debugfs_create_u64("duplicate_entry", S_IRUGO,
                        zswap_debugfs_root, &zswap_duplicate_entry);
-        debugfs_create_u64("pool_pages", S_IRUGO,
+        debugfs_create_u64("pool_total_size", S_IRUGO,
-                        zswap_debugfs_root, &zswap_pool_pages);
+                        zswap_debugfs_root, &zswap_pool_total_size);
        debugfs_create_atomic_t("stored_pages", S_IRUGO,
                        zswap_debugfs_root, &zswap_stored_pages);
@@ -895,16 +900,26 @@ static void __exit zswap_debugfs_exit(void) { }
 **********************************/
 static int __init init_zswap(void)
 {
+        gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
        if (!zswap_enabled)
                return 0;
        pr_info("loading zswap\n");
-        zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+        zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
+        if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
+                pr_info("%s zpool not available\n", zswap_zpool_type);
+                zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+                zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
+                                        &zswap_zpool_ops);
+        }
        if (!zswap_pool) {
-                pr_err("zbud pool creation failed\n");
+                pr_err("%s zpool not available\n", zswap_zpool_type);
+                pr_err("zpool creation failed\n");
                goto error;
        }
+        pr_info("using %s pool\n", zswap_zpool_type);
        if (zswap_entry_cache_create()) {
                pr_err("entry cache creation failed\n");
@@ -928,7 +943,7 @@ pcpufail:
 compfail:
        zswap_entry_cache_destory();
 cachefail:
-        zbud_destroy_pool(zswap_pool);
+        zpool_destroy_pool(zswap_pool);
 error:
        return -ENOMEM;
 }
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 022d18ab27a6..52c43f904220 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
        /* Reached the end of the list, so insert after 'frag_entry_last'. */
        if (likely(frag_entry_last)) {
-                hlist_add_after(&frag_entry_last->list, &frag_entry_new->list);
+                hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list);
                chain->size += skb->len - hdr_size;
                chain->timestamp = jiffies;
                ret = true;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b4845f4b2bb4..7751c92c8c57 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
        }
        if (slot)
-                hlist_add_after_rcu(slot, &port->rlist);
+                hlist_add_behind_rcu(&port->rlist, slot);
        else
                hlist_add_head_rcu(&port->rlist, &br->router_list);
 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5afeb5aa4c7c..e9cb2588e416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
                        last = li;
                }
                if (last)
-                        hlist_add_after_rcu(&last->hlist, &new->hlist);
+                        hlist_add_behind_rcu(&new->hlist, &last->hlist);
                else
                        hlist_add_before_rcu(&new->hlist, &li->hlist);
        }
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 731e1e1722d9..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
                last = p;
        }
        if (last)
-                hlist_add_after_rcu(&last->list, &newp->list);
+                hlist_add_behind_rcu(&newp->list, &last->list);
        else
                hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
 out:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0525d78ba328..beeed602aeb3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -389,7 +389,7 @@ redo:
                        if (h != h0)
                                continue;
                        hlist_del(&pol->bydst);
-                        hlist_add_after(entry0, &pol->bydst);
+                        hlist_add_behind(&pol->bydst, entry0);
                }
                entry0 = &pol->bydst;
        }
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
                        break;
        }
        if (newpos)
-                hlist_add_after(newpos, &policy->bydst);
+                hlist_add_behind(&policy->bydst, newpos);
        else
                hlist_add_head(&policy->bydst, chain);
        xfrm_pol_hold(policy);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 182be0f12407..31a731e06f50 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -309,9 +309,12 @@ our $Operators	= qr{
 our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x;
 our $NonptrType;
+our $NonptrTypeMisordered;
 our $NonptrTypeWithAttr;
 our $Type;
+our $TypeMisordered;
 our $Declare;
+our $DeclareMisordered;
 our $NON_ASCII_UTF8     = qr{
        [\xC2-\xDF][\x80-\xBF]               # non-overlong 2-byte
@@ -353,16 +356,36 @@ our $signature_tags = qr{(?xi:
        Cc:
 )};
+our @typeListMisordered = (
+        qr{char\s+(?:un)?signed},
+        qr{int\s+(?:(?:un)?signed\s+)?short\s},
+        qr{int\s+short(?:\s+(?:un)?signed)},
+        qr{short\s+int(?:\s+(?:un)?signed)},
+        qr{(?:un)?signed\s+int\s+short},
+        qr{short\s+(?:un)?signed},
+        qr{long\s+int\s+(?:un)?signed},
+        qr{int\s+long\s+(?:un)?signed},
+        qr{long\s+(?:un)?signed\s+int},
+        qr{int\s+(?:un)?signed\s+long},
+        qr{int\s+(?:un)?signed},
+        qr{int\s+long\s+long\s+(?:un)?signed},
+        qr{long\s+long\s+int\s+(?:un)?signed},
+        qr{long\s+long\s+(?:un)?signed\s+int},
+        qr{long\s+long\s+(?:un)?signed},
+        qr{long\s+(?:un)?signed},
+);
 our @typeList = (
        qr{void},
-        qr{(?:unsigned\s+)?char},
+        qr{(?:(?:un)?signed\s+)?char},
-        qr{(?:unsigned\s+)?short},
+        qr{(?:(?:un)?signed\s+)?short\s+int},
-        qr{(?:unsigned\s+)?int},
+        qr{(?:(?:un)?signed\s+)?short},
-        qr{(?:unsigned\s+)?long},
+        qr{(?:(?:un)?signed\s+)?int},
-        qr{(?:unsigned\s+)?long\s+int},
+        qr{(?:(?:un)?signed\s+)?long\s+int},
-        qr{(?:unsigned\s+)?long\s+long},
+        qr{(?:(?:un)?signed\s+)?long\s+long\s+int},
-        qr{(?:unsigned\s+)?long\s+long\s+int},
+        qr{(?:(?:un)?signed\s+)?long\s+long},
-        qr{unsigned},
+        qr{(?:(?:un)?signed\s+)?long},
+        qr{(?:un)?signed},
        qr{float},
        qr{double},
        qr{bool},
@@ -372,6 +395,7 @@ our @typeList = (
        qr{${Ident}_t},
        qr{${Ident}_handler},
        qr{${Ident}_handler_fn},
+        @typeListMisordered,
 );
 our @typeListWithAttr = (
        @typeList,
@@ -399,11 +423,6 @@ foreach my $entry (@mode_permission_funcs) {
        $mode_perms_search .= $entry->[0];
 }
-our $declaration_macros = qr{(?x:
-        (?:$Storage\s+)?(?:DECLARE|DEFINE)_[A-Z]+\s*\(|
-        (?:$Storage\s+)?LIST_HEAD\s*\(
-)};
 our $allowed_asm_includes = qr{(?x:
        irq|
        memory
@@ -413,6 +432,7 @@ our $allowed_asm_includes = qr{(?x:
 sub build_types {
        my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
        my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
+        my $Misordered = "(?x:  \n" . join("|\n  ", @typeListMisordered) . "\n)";
        my $allWithAttr = "(?x:  \n" . join("|\n  ", @typeListWithAttr) . "\n)";
        $Modifier       = qr{(?:$Attribute|$Sparse|$mods)};
        $NonptrType     = qr{
@@ -424,6 +444,13 @@ sub build_types {
                        )
                        (?:\s+$Modifier|\s+const)*
                  }x;
+        $NonptrTypeMisordered   = qr{
+                        (?:$Modifier\s+|const\s+)*
+                        (?:
+                                (?:${Misordered}\b)
+                        )
+                        (?:\s+$Modifier|\s+const)*
+                  }x;
        $NonptrTypeWithAttr     = qr{
                        (?:$Modifier\s+|const\s+)*
                        (?:
@@ -435,10 +462,16 @@ sub build_types {
                  }x;
        $Type   = qr{
                        $NonptrType
-                        (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
+                        (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
+                        (?:\s+$Inline|\s+$Modifier)*
+                  }x;
+        $TypeMisordered = qr{
+                        $NonptrTypeMisordered
+                        (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
                        (?:\s+$Inline|\s+$Modifier)*
                  }x;
        $Declare        = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type};
+        $DeclareMisordered      = qr{(?:$Storage\s+(?:$Inline\s+)?)?$TypeMisordered};
 }
 build_types();
@@ -452,6 +485,12 @@ our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
 our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*};
 our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)};
+our $declaration_macros = qr{(?x:
+        (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(|
+        (?:$Storage\s+)?LIST_HEAD\s*\(|
+        (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
+)};
 sub deparenthesize {
        my ($string) = @_;
        return "" if (!defined($string));
@@ -550,11 +589,43 @@ sub seed_camelcase_includes {
        }
 }
+sub git_commit_info {
+        my ($commit, $id, $desc) = @_;
+        return ($id, $desc) if ((which("git") eq "") || !(-e ".git"));
+        my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`;
+        $output =~ s/^\s*//gm;
+        my @lines = split("\n", $output);
+        if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) {
+# Maybe one day convert this block of bash into something that returns
+# all matching commit ids, but it's very slow...
+#
+#               echo "checking commits $1..."
+#               git rev-list --remotes | grep -i "^$1" |
+#               while read line ; do
+#                   git log --format='%H %s' -1 $line |
+#                   echo "commit $(cut -c 1-12,41-)"
+#               done
+        } elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
+        } else {
+                $id = substr($lines[0], 0, 12);
+                $desc = substr($lines[0], 41);
+        }
+        return ($id, $desc);
+}
 $chk_signoff = 0 if ($file);
 my @rawlines = ();
 my @lines = ();
 my @fixed = ();
+my @fixed_inserted = ();
+my @fixed_deleted = ();
+my $fixlinenr = -1;
 my $vname;
 for my $filename (@ARGV) {
        my $FILE;
@@ -583,6 +654,9 @@ for my $filename (@ARGV) {
        @rawlines = ();
        @lines = ();
        @fixed = ();
+        @fixed_inserted = ();
+        @fixed_deleted = ();
+        $fixlinenr = -1;
 }
 exit($exit);
@@ -674,6 +748,18 @@ sub format_email {
        return $formatted_email;
 }
+sub which {
+        my ($bin) = @_;
+        foreach my $path (split(/:/, $ENV{PATH})) {
+                if (-e "$path/$bin") {
+                        return "$path/$bin";
+                }
+        }
+        return "";
+}
 sub which_conf {
        my ($conf) = @_;
@@ -1483,6 +1569,90 @@ sub report_dump {
        our @report;
 }
+sub fixup_current_range {
+        my ($lineRef, $offset, $length) = @_;
+        if ($$lineRef =~ /^\@\@ -\d+,\d+ \+(\d+),(\d+) \@\@/) {
+                my $o = $1;
+                my $l = $2;
+                my $no = $o + $offset;
+                my $nl = $l + $length;
+                $$lineRef =~ s/\+$o,$l \@\@/\+$no,$nl \@\@/;
+        }
+}
+sub fix_inserted_deleted_lines {
+        my ($linesRef, $insertedRef, $deletedRef) = @_;
+        my $range_last_linenr = 0;
+        my $delta_offset = 0;
+        my $old_linenr = 0;
+        my $new_linenr = 0;
+        my $next_insert = 0;
+        my $next_delete = 0;
+        my @lines = ();
+        my $inserted = @{$insertedRef}[$next_insert++];
+        my $deleted = @{$deletedRef}[$next_delete++];
+        foreach my $old_line (@{$linesRef}) {
+                my $save_line = 1;
+                my $line = $old_line;   #don't modify the array
+                if ($line =~ /^(?:\+\+\+\|\-\-\-)\s+\S+/) {     #new filename
+                        $delta_offset = 0;
+                } elsif ($line =~ /^\@\@ -\d+,\d+ \+\d+,\d+ \@\@/) {    #new hunk
+                        $range_last_linenr = $new_linenr;
+                        fixup_current_range(\$line, $delta_offset, 0);
+                }
+                while (defined($deleted) && ${$deleted}{'LINENR'} == $old_linenr) {
+                        $deleted = @{$deletedRef}[$next_delete++];
+                        $save_line = 0;
+                        fixup_current_range(\$lines[$range_last_linenr], $delta_offset--, -1);
+                }
+                while (defined($inserted) && ${$inserted}{'LINENR'} == $old_linenr) {
+                        push(@lines, ${$inserted}{'LINE'});
+                        $inserted = @{$insertedRef}[$next_insert++];
+                        $new_linenr++;
+                        fixup_current_range(\$lines[$range_last_linenr], $delta_offset++, 1);
+                }
+                if ($save_line) {
+                        push(@lines, $line);
+                        $new_linenr++;
+                }
+                $old_linenr++;
+        }
+        return @lines;
+}
+sub fix_insert_line {
+        my ($linenr, $line) = @_;
+        my $inserted = {
+                LINENR => $linenr,
+                LINE => $line,
+        };
+        push(@fixed_inserted, $inserted);
+}
+sub fix_delete_line {
+        my ($linenr, $line) = @_;
+        my $deleted = {
+                LINENR => $linenr,
+                LINE => $line,
+        };
+        push(@fixed_deleted, $deleted);
+}
 sub ERROR {
        my ($type, $msg) = @_;
@@ -1637,11 +1807,13 @@ sub process {
        my $signoff = 0;
        my $is_patch = 0;
-        my $in_header_lines = 1;
+        my $in_header_lines = $file ? 0 : 1;
        my $in_commit_log = 0;          #Scanning lines before patch
+        my $reported_maintainer_file = 0;
        my $non_utf8_charset = 0;
+        my $last_blank_line = 0;
        our @report = ();
        our $cnt_lines = 0;
        our $cnt_error = 0;
@@ -1759,8 +1931,10 @@ sub process {
        $realcnt = 0;
        $linenr = 0;
+        $fixlinenr = -1;
        foreach my $line (@lines) {
                $linenr++;
+                $fixlinenr++;
                my $sline = $line;      #copy of $line
                $sline =~ s/$;/ /g;     #with comments as spaces
@@ -1891,7 +2065,7 @@ sub process {
                                if (WARN("BAD_SIGN_OFF",
                                         "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) &&
                                    $fix) {
-                                        $fixed[$linenr - 1] =
+                                        $fixed[$fixlinenr] =
                                            "$ucfirst_sign_off $email";
                                }
                        }
@@ -1899,7 +2073,7 @@ sub process {
                                if (WARN("BAD_SIGN_OFF",
                                         "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) &&
                                    $fix) {
-                                        $fixed[$linenr - 1] =
+                                        $fixed[$fixlinenr] =
                                            "$ucfirst_sign_off $email";
                                }
@@ -1908,7 +2082,7 @@ sub process {
                                if (WARN("BAD_SIGN_OFF",
                                         "Use a single space after $ucfirst_sign_off\n" . $herecurr) &&
                                    $fix) {
-                                        $fixed[$linenr - 1] =
+                                        $fixed[$fixlinenr] =
                                            "$ucfirst_sign_off $email";
                                }
                        }
@@ -1956,6 +2130,31 @@ sub process {
                              "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr);
                }
+# Check for improperly formed commit descriptions
+                if ($in_commit_log &&
+                    $line =~ /\bcommit\s+[0-9a-f]{5,}/i &&
+                    $line !~ /\b[Cc]ommit [0-9a-f]{12,16} \("/) {
+                        $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i;
+                        my $init_char = $1;
+                        my $orig_commit = lc($2);
+                        my $id = '01234567890ab';
+                        my $desc = 'commit description';
+                        ($id, $desc) = git_commit_info($orig_commit, $id, $desc);
+                        ERROR("GIT_COMMIT_ID",
+                              "Please use 12 to 16 chars for the git commit ID like: '${init_char}ommit $id (\"$desc\")'\n" . $herecurr);
+                }
+# Check for added, moved or deleted files
+                if (!$reported_maintainer_file && !$in_commit_log &&
+                    ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
+                     $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
+                     ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
+                      (defined($1) || defined($2))))) {
+                        $reported_maintainer_file = 1;
+                        WARN("FILE_PATH_CHANGES",
+                             "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
+                }
 # Check for wrappage within a valid hunk of the file
                if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
                        ERROR("CORRUPTED_PATCH",
@@ -1993,7 +2192,8 @@ sub process {
 # Check if it's the start of a commit log
 # (not a header line and we haven't seen the patch filename)
                if ($in_header_lines && $realfile =~ /^$/ &&
-                    $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) {
+                    !($rawline =~ /^\s+\S/ ||
+                      $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) {
                        $in_header_lines = 0;
                        $in_commit_log = 1;
                }
@@ -2021,14 +2221,14 @@ sub process {
                        if (ERROR("DOS_LINE_ENDINGS",
                                  "DOS line endings\n" . $herevet) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/[\s\015]+$//;
+                                $fixed[$fixlinenr] =~ s/[\s\015]+$//;
                        }
                } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
                        my $herevet = "$here\n" . cat_vet($rawline) . "\n";
                        if (ERROR("TRAILING_WHITESPACE",
                                  "trailing whitespace\n" . $herevet) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\s+$//;
+                                $fixed[$fixlinenr] =~ s/\s+$//;
                        }
                        $rpt_cleaners = 1;
@@ -2049,7 +2249,7 @@ sub process {
 # Only applies when adding the entry originally, after that we do not have
 # sufficient context to determine whether it is indeed long enough.
                if ($realfile =~ /Kconfig/ &&
-                    $line =~ /.\s*config\s+/) {
+                    $line =~ /^\+\s*config\s+/) {
                        my $length = 0;
                        my $cnt = $realcnt;
                        my $ln = $linenr + 1;
@@ -2062,10 +2262,11 @@ sub process {
                                $is_end = $lines[$ln - 1] =~ /^\+/;
                                next if ($f =~ /^-/);
+                                last if (!$file && $f =~ /^\@\@/);
-                                if ($lines[$ln - 1] =~ /.\s*(?:bool|tristate)\s*\"/) {
+                                if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate)\s*\"/) {
                                        $is_start = 1;
-                                } elsif ($lines[$ln - 1] =~ /.\s*(?:---)?help(?:---)?$/) {
+                                } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
                                        $length = -1;
                                }
@@ -2161,12 +2362,18 @@ sub process {
                             "quoted string split across lines\n" . $hereprev);
                }
+# check for missing a space in a string concatination
+                if ($prevrawline =~ /[^\\]\w"$/ && $rawline =~ /^\+[\t ]+"\w/) {
+                        WARN('MISSING_SPACE',
+                             "break quoted strings at a space character\n" . $hereprev);
+                }
 # check for spaces before a quoted newline
                if ($rawline =~ /^.*\".*\s\\n/) {
                        if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
                                 "unnecessary whitespace before a quoted newline\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
+                                $fixed[$fixlinenr] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
                        }
                }
@@ -2203,7 +2410,7 @@ sub process {
                        if (ERROR("CODE_INDENT",
                                  "code indent should use tabs where possible\n" . $herevet) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+                                $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
                        }
                }
@@ -2213,9 +2420,9 @@ sub process {
                        if (WARN("SPACE_BEFORE_TAB",
                                "please, no space before tabs\n" . $herevet) &&
                            $fix) {
-                                while ($fixed[$linenr - 1] =~
+                                while ($fixed[$fixlinenr] =~
                                           s/(^\+.*) {8,8}+\t/$1\t\t/) {}
-                                while ($fixed[$linenr - 1] =~
+                                while ($fixed[$fixlinenr] =~
                                           s/(^\+.*) +\t/$1\t/) {}
                        }
                }
@@ -2249,19 +2456,19 @@ sub process {
                                        if (CHK("PARENTHESIS_ALIGNMENT",
                                                "Alignment should match open parenthesis\n" . $hereprev) &&
                                            $fix && $line =~ /^\+/) {
-                                                $fixed[$linenr - 1] =~
+                                                $fixed[$fixlinenr] =~
                                                    s/^\+[ \t]*/\+$goodtabindent/;
                                        }
                                }
                        }
                }
-                if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) {
+                if ($line =~ /^\+.*\(\s*$Type\s*\)[ \t]+(?!$Assignment|$Arithmetic|{)/) {
                        if (CHK("SPACING",
-                                "No space is necessary after a cast\n" . $hereprev) &&
+                                "No space is necessary after a cast\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
-                                    s/^(\+.*\*[ \t]*\))[ \t]+/$1/;
+                                    s/(\(\s*$Type\s*\))[ \t]+/$1/;
                        }
                }
@@ -2291,10 +2498,44 @@ sub process {
                             "networking block comments put the trailing */ on a separate line\n" . $herecurr);
                }
+# check for missing blank lines after struct/union declarations
+# with exceptions for various attributes and macros
+                if ($prevline =~ /^[\+ ]};?\s*$/ &&
+                    $line =~ /^\+/ &&
+                    !($line =~ /^\+\s*$/ ||
+                      $line =~ /^\+\s*EXPORT_SYMBOL/ ||
+                      $line =~ /^\+\s*MODULE_/i ||
+                      $line =~ /^\+\s*\#\s*(?:end|elif|else)/ ||
+                      $line =~ /^\+[a-z_]*init/ ||
+                      $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ ||
+                      $line =~ /^\+\s*DECLARE/ ||
+                      $line =~ /^\+\s*__setup/)) {
+                        if (CHK("LINE_SPACING",
+                                "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) &&
+                            $fix) {
+                                fix_insert_line($fixlinenr, "\+");
+                        }
+                }
+# check for multiple consecutive blank lines
+                if ($prevline =~ /^[\+ ]\s*$/ &&
+                    $line =~ /^\+\s*$/ &&
+                    $last_blank_line != ($linenr - 1)) {
+                        if (CHK("LINE_SPACING",
+                                "Please don't use multiple blank lines\n" . $hereprev) &&
+                            $fix) {
+                                fix_delete_line($fixlinenr, $rawline);
+                        }
+                        $last_blank_line = $linenr;
+                }
 # check for missing blank lines after declarations
                if ($sline =~ /^\+\s+\S/ &&                     #Not at char 1
                        # actual declarations
                    ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+                        # function pointer declarations
+                     $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
                        # foo bar; where foo is some local typedef or #define
                     $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
                        # known declaration macros
@@ -2307,6 +2548,8 @@ sub process {
                      $prevline =~ /(?:\{\s*|\\)$/) &&
                        # looks like a declaration
                    !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+                        # function pointer declarations
+                      $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
                        # foo bar; where foo is some local typedef or #define
                      $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
                        # known declaration macros
@@ -2321,8 +2564,11 @@ sub process {
                      $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
                        # indentation of previous and current line are the same
                    (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
-                        WARN("SPACING",
+                        if (WARN("LINE_SPACING",
-                             "Missing a blank line after declarations\n" . $hereprev);
+                                 "Missing a blank line after declarations\n" . $hereprev) &&
+                            $fix) {
+                                fix_insert_line($fixlinenr, "\+");
+                        }
                }
 # check for spaces at the beginning of a line.
@@ -2335,13 +2581,33 @@ sub process {
                        if (WARN("LEADING_SPACE",
                                 "please, no spaces at the start of a line\n" . $herevet) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+                                $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
                        }
                }
 # check we are in a valid C source file if not then ignore this hunk
                next if ($realfile !~ /\.(h|c)$/);
+# check indentation of any line with a bare else
+# if the previous line is a break or return and is indented 1 tab more...
+                if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) {
+                        my $tabs = length($1) + 1;
+                        if ($prevline =~ /^\+\t{$tabs,$tabs}(?:break|return)\b/) {
+                                WARN("UNNECESSARY_ELSE",
+                                     "else is not generally useful after a break or return\n" . $hereprev);
+                        }
+                }
+# check indentation of a line with a break;
+# if the previous line is a goto or return and is indented the same # of tabs
+                if ($sline =~ /^\+([\t]+)break\s*;\s*$/) {
+                        my $tabs = $1;
+                        if ($prevline =~ /^\+$tabs(?:goto|return)\b/) {
+                                WARN("UNNECESSARY_BREAK",
+                                     "break is not useful after a goto or return\n" . $hereprev);
+                        }
+                }
 # discourage the addition of CONFIG_EXPERIMENTAL in #if(def).
                if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) {
                        WARN("CONFIG_EXPERIMENTAL",
@@ -2477,7 +2743,7 @@ sub process {
 # if/while/etc brace do not go on next line, unless defining a do while loop,
 # or if that brace on the next line is for something else
-                if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
+                if ($line =~ /(.*)\b((?:if|while|for|switch|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
                        my $pre_ctx = "$1$2";
                        my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
@@ -2504,7 +2770,7 @@ sub process {
                        #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
                        #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
-                        if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+                        if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
                                ERROR("OPEN_BRACE",
                                      "that open brace { should be on the previous line\n" .
                                        "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
@@ -2523,7 +2789,7 @@ sub process {
                }
 # Check relative indent for conditionals and blocks.
-                if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+                if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
                        ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
                                ctx_statement_block($linenr, $realcnt, 0)
                                        if (!defined $stat);
@@ -2654,8 +2920,18 @@ sub process {
 # check for initialisation to aggregates open brace on the next line
                if ($line =~ /^.\s*{/ &&
                    $prevline =~ /(?:^|[^=])=\s*$/) {
-                        ERROR("OPEN_BRACE",
+                        if (ERROR("OPEN_BRACE",
-                              "that open brace { should be on the previous line\n" . $hereprev);
+                                  "that open brace { should be on the previous line\n" . $hereprev) &&
+                            $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                                fix_delete_line($fixlinenr - 1, $prevrawline);
+                                fix_delete_line($fixlinenr, $rawline);
+                                my $fixedline = $prevrawline;
+                                $fixedline =~ s/\s*=\s*$/ = {/;
+                                fix_insert_line($fixlinenr, $fixedline);
+                                $fixedline = $line;
+                                $fixedline =~ s/^(.\s*){\s*/$1/;
+                                fix_insert_line($fixlinenr, $fixedline);
+                        }
                }
 #
@@ -2680,10 +2956,10 @@ sub process {
                        if (ERROR("C99_COMMENTS",
                                  "do not use C99 // comments\n" . $herecurr) &&
                            $fix) {
-                                my $line = $fixed[$linenr - 1];
+                                my $line = $fixed[$fixlinenr];
                                if ($line =~ /\/\/(.*)$/) {
                                        my $comment = trim($1);
-                                        $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@;
+                                        $fixed[$fixlinenr] =~ s@\/\/(.*)$@/\* $comment \*/@;
                                }
                        }
                }
@@ -2742,7 +3018,7 @@ sub process {
                                  "do not initialise globals to 0 or NULL\n" .
                                      $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+                                $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
                        }
                }
 # check for static initialisers.
@@ -2751,10 +3027,17 @@ sub process {
                                  "do not initialise statics to 0 or NULL\n" .
                                      $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+                                $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
                        }
                }
+# check for misordered declarations of char/short/int/long with signed/unsigned
+                while ($sline =~ m{(\b$TypeMisordered\b)}g) {
+                        my $tmp = trim($1);
+                        WARN("MISORDERED_TYPE",
+                             "type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr);
+                }
 # check for static const char * arrays.
                if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
                        WARN("STATIC_CONST_CHAR_ARRAY",
@@ -2781,7 +3064,7 @@ sub process {
                        if (ERROR("FUNCTION_WITHOUT_ARGS",
                                  "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
+                                $fixed[$fixlinenr] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
                        }
                }
@@ -2790,7 +3073,7 @@ sub process {
                        if (WARN("DEFINE_PCI_DEVICE_TABLE",
                                 "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
+                                $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
                        }
                }
@@ -2827,7 +3110,7 @@ sub process {
                                        my $sub_from = $ident;
                                        my $sub_to = $ident;
                                        $sub_to =~ s/\Q$from\E/$to/;
-                                        $fixed[$linenr - 1] =~
+                                        $fixed[$fixlinenr] =~
                                            s@\Q$sub_from\E@$sub_to@;
                                }
                        }
@@ -2855,7 +3138,7 @@ sub process {
                                        my $sub_from = $match;
                                        my $sub_to = $match;
                                        $sub_to =~ s/\Q$from\E/$to/;
-                                        $fixed[$linenr - 1] =~
+                                        $fixed[$fixlinenr] =~
                                            s@\Q$sub_from\E@$sub_to@;
                                }
                        }
@@ -2917,7 +3200,7 @@ sub process {
                        if (WARN("PREFER_PR_LEVEL",
                                 "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/\bpr_warning\b/pr_warn/;
                        }
                }
@@ -2933,17 +3216,40 @@ sub process {
 # function brace can't be on same line, except for #defines of do while,
 # or if closed on same line
-                if (($line=~/$Type\s*$Ident\(.*\).*\s{/) and
+                if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and
                    !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) {
-                        ERROR("OPEN_BRACE",
+                        if (ERROR("OPEN_BRACE",
-                              "open brace '{' following function declarations go on the next line\n" . $herecurr);
+                                  "open brace '{' following function declarations go on the next line\n" . $herecurr) &&
+                            $fix) {
+                                fix_delete_line($fixlinenr, $rawline);
+                                my $fixed_line = $rawline;
+                                $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*){(.*)$/;
+                                my $line1 = $1;
+                                my $line2 = $2;
+                                fix_insert_line($fixlinenr, ltrim($line1));
+                                fix_insert_line($fixlinenr, "\+{");
+                                if ($line2 !~ /^\s*$/) {
+                                        fix_insert_line($fixlinenr, "\+\t" . trim($line2));
+                                }
+                        }
                }
 # open braces for enum, union and struct go on the same line.
                if ($line =~ /^.\s*{/ &&
                    $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
-                        ERROR("OPEN_BRACE",
+                        if (ERROR("OPEN_BRACE",
-                              "open brace '{' following $1 go on the same line\n" . $hereprev);
+                                  "open brace '{' following $1 go on the same line\n" . $hereprev) &&
+                            $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                                fix_delete_line($fixlinenr - 1, $prevrawline);
+                                fix_delete_line($fixlinenr, $rawline);
+                                my $fixedline = rtrim($prevrawline) . " {";
+                                fix_insert_line($fixlinenr, $fixedline);
+                                $fixedline = $rawline;
+                                $fixedline =~ s/^(.\s*){\s*/$1\t/;
+                                if ($fixedline !~ /^\+\s*$/) {
+                                        fix_insert_line($fixlinenr, $fixedline);
+                                }
+                        }
                }
 # missing space after union, struct or enum definition
@@ -2951,7 +3257,7 @@ sub process {
                        if (WARN("SPACING",
                                 "missing space after $1 definition\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/;
                        }
                }
@@ -3021,7 +3327,7 @@ sub process {
                        }
                        if (show_type("SPACING") && $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex;
                        }
                }
@@ -3038,7 +3344,7 @@ sub process {
                                if (ERROR("BRACKET_SPACE",
                                          "space prohibited before open square bracket '['\n" . $herecurr) &&
                                    $fix) {
-                                    $fixed[$linenr - 1] =~
+                                    $fixed[$fixlinenr] =~
                                        s/^(\+.*?)\s+\[/$1\[/;
                                }
                        }
@@ -3073,7 +3379,7 @@ sub process {
                                if (WARN("SPACING",
                                         "space prohibited between function name and open parenthesis '('\n" . $herecurr) &&
                                             $fix) {
-                                        $fixed[$linenr - 1] =~
+                                        $fixed[$fixlinenr] =~
                                            s/\b$name\s+\(/$name\(/;
                                }
                        }
@@ -3341,8 +3647,8 @@ sub process {
                                $fixed_line = $fixed_line . $fix_elements[$#elements];
                        }
-                        if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) {
+                        if ($fix && $line_fixed && $fixed_line ne $fixed[$fixlinenr]) {
-                                $fixed[$linenr - 1] = $fixed_line;
+                                $fixed[$fixlinenr] = $fixed_line;
                        }
@@ -3353,7 +3659,7 @@ sub process {
                        if (WARN("SPACING",
                                 "space prohibited before semicolon\n" . $herecurr) &&
                            $fix) {
-                                1 while $fixed[$linenr - 1] =~
+                                1 while $fixed[$fixlinenr] =~
                                    s/^(\+.*\S)\s+;/$1;/;
                        }
                }
@@ -3386,7 +3692,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space required before the open brace '{'\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\))){/$1 {/;
+                                $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
                        }
                }
@@ -3404,7 +3710,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space required after that close brace '}'\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/}((?!(?:,|;|\)))\S)/} $1/;
                        }
                }
@@ -3414,7 +3720,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space prohibited after that open square bracket '['\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/\[\s+/\[/;
                        }
                }
@@ -3422,7 +3728,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space prohibited before that close square bracket ']'\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/\s+\]/\]/;
                        }
                }
@@ -3433,7 +3739,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space prohibited after that open parenthesis '('\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/\(\s+/\(/;
                        }
                }
@@ -3443,18 +3749,27 @@ sub process {
                        if (ERROR("SPACING",
                                  "space prohibited before that close parenthesis ')'\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                print("fixlinenr: <$fixlinenr> fixed[fixlinenr]: <$fixed[$fixlinenr]>\n");
+                                $fixed[$fixlinenr] =~
                                    s/\s+\)/\)/;
                        }
                }
+# check unnecessary parentheses around addressof/dereference single $Lvals
+# ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar
+                while ($line =~ /(?:[^&]&\s*|\*)\(\s*($Ident\s*(?:$Member\s*)+)\s*\)/g) {
+                        CHK("UNNECESSARY_PARENTHESES",
+                            "Unnecessary parentheses around $1\n" . $herecurr);
+                    }
 #goto labels aren't indented, allow a single space however
                if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
                   !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
                        if (WARN("INDENTED_LABEL",
                                 "labels should not be indented\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/^(.)\s+/$1/;
                        }
                }
@@ -3516,7 +3831,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space required before the open parenthesis '('\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/\b(if|while|for|switch)\(/$1 \(/;
                        }
                }
@@ -3606,7 +3921,7 @@ sub process {
 # if should not continue a brace
                if ($line =~ /}\s*if\b/) {
                        ERROR("TRAILING_STATEMENTS",
-                              "trailing statements should be on next line\n" .
+                              "trailing statements should be on next line (or did you mean 'else if'?)\n" .
                                $herecurr);
                }
 # case and default should not have general statements after them
@@ -3622,14 +3937,26 @@ sub process {
                # Check for }<nl>else {, these must be at the same
                # indent level to be relevant to each other.
-                if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and
+                if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ &&
-                                                $previndent == $indent) {
+                    $previndent == $indent) {
-                        ERROR("ELSE_AFTER_BRACE",
+                        if (ERROR("ELSE_AFTER_BRACE",
-                              "else should follow close brace '}'\n" . $hereprev);
+                                  "else should follow close brace '}'\n" . $hereprev) &&
+                            $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                                fix_delete_line($fixlinenr - 1, $prevrawline);
+                                fix_delete_line($fixlinenr, $rawline);
+                                my $fixedline = $prevrawline;
+                                $fixedline =~ s/}\s*$//;
+                                if ($fixedline !~ /^\+\s*$/) {
+                                        fix_insert_line($fixlinenr, $fixedline);
+                                }
+                                $fixedline = $rawline;
+                                $fixedline =~ s/^(.\s*)else/$1} else/;
+                                fix_insert_line($fixlinenr, $fixedline);
+                        }
                }
-                if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and
+                if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ &&
-                                                $previndent == $indent) {
+                    $previndent == $indent) {
                        my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
                        # Find out what is on the end of the line after the
@@ -3638,8 +3965,18 @@ sub process {
                        $s =~ s/\n.*//g;
                        if ($s =~ /^\s*;/) {
-                                ERROR("WHILE_AFTER_BRACE",
+                                if (ERROR("WHILE_AFTER_BRACE",
-                                      "while should follow close brace '}'\n" . $hereprev);
+                                          "while should follow close brace '}'\n" . $hereprev) &&
+                                    $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                                        fix_delete_line($fixlinenr - 1, $prevrawline);
+                                        fix_delete_line($fixlinenr, $rawline);
+                                        my $fixedline = $prevrawline;
+                                        my $trailing = $rawline;
+                                        $trailing =~ s/^\+//;
+                                        $trailing = trim($trailing);
+                                        $fixedline =~ s/}\s*$/} $trailing/;
+                                        fix_insert_line($fixlinenr, $fixedline);
+                                }
                        }
                }
@@ -3653,7 +3990,7 @@ sub process {
                                         "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
                                    $fix) {
                                        my $hexval = sprintf("0x%x", oct($var));
-                                        $fixed[$linenr - 1] =~
+                                        $fixed[$fixlinenr] =~
                                            s/\b$var\b/$hexval/;
                                }
                        }
@@ -3689,7 +4026,7 @@ sub process {
                        if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
                                 "Whitespace after \\ makes next lines useless\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\s+$//;
+                                $fixed[$fixlinenr] =~ s/\s+$//;
                        }
                }
@@ -3762,7 +4099,7 @@ sub process {
                            $dstat !~ /^(?:$Ident|-?$Constant),$/ &&                    # 10, // foo(),
                            $dstat !~ /^(?:$Ident|-?$Constant);$/ &&                    # foo();
                            $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ &&          # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
-                            $dstat !~ /^'X'$/ &&                                        # character constants
+                            $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ &&                  # character constants
                            $dstat !~ /$exceptions/ &&
                            $dstat !~ /^\.$Ident\s*=/ &&                                # .foo =
                            $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ &&          # stringification #foo
@@ -4014,6 +4351,23 @@ sub process {
                        }
                }
+# check for unnecessary "Out of Memory" messages
+                if ($line =~ /^\+.*\b$logFunctions\s*\(/ &&
+                    $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ &&
+                    (defined $1 || defined $3) &&
+                    $linenr > 3) {
+                        my $testval = $2;
+                        my $testline = $lines[$linenr - 3];
+                        my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
+#                       print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
+                        if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
+                                WARN("OOM_MESSAGE",
+                                     "Possible unnecessary 'out of memory' message\n" . $hereprev);
+                        }
+                }
 # check for bad placement of section $InitAttribute (e.g.: __initdata)
                if ($line =~ /(\b$InitAttribute\b)/) {
                        my $attr = $1;
@@ -4027,7 +4381,7 @@ sub process {
                                      WARN("MISPLACED_INIT",
                                           "$attr should be placed after $var\n" . $herecurr))) &&
                                    $fix) {
-                                        $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
+                                        $fixed[$fixlinenr] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
                                }
                        }
                }
@@ -4041,7 +4395,7 @@ sub process {
                        if (ERROR("INIT_ATTRIBUTE",
                                  "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/$InitAttributeData/${attr_prefix}initconst/;
                        }
                }
@@ -4052,12 +4406,12 @@ sub process {
                        if (ERROR("INIT_ATTRIBUTE",
                                  "Use of $attr requires a separate use of const\n" . $herecurr) &&
                            $fix) {
-                                my $lead = $fixed[$linenr - 1] =~
+                                my $lead = $fixed[$fixlinenr] =~
                                    /(^\+\s*(?:static\s+))/;
                                $lead = rtrim($1);
                                $lead = "$lead " if ($lead !~ /^\+$/);
                                $lead = "${lead}const ";
-                                $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/;
+                                $fixed[$fixlinenr] =~ s/(^\+\s*(?:static\s+))/$lead/;
                        }
                }
@@ -4070,7 +4424,7 @@ sub process {
                        if (WARN("CONSTANT_CONVERSION",
                                 "$constant_func should be $func\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g;
+                                $fixed[$fixlinenr] =~ s/\b$constant_func\b/$func/g;
                        }
                }
@@ -4120,7 +4474,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "exactly one space required after that #$1\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~
+                                $fixed[$fixlinenr] =~
                                    s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /;
                        }
@@ -4168,7 +4522,7 @@ sub process {
                        if (WARN("INLINE",
                                 "plain inline is preferred over $1\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/;
+                                $fixed[$fixlinenr] =~ s/\b(__inline__|__inline)\b/inline/;
                        }
                }
@@ -4193,7 +4547,7 @@ sub process {
                        if (WARN("PREFER_PRINTF",
                                 "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
+                                $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
                        }
                }
@@ -4204,7 +4558,7 @@ sub process {
                        if (WARN("PREFER_SCANF",
                                 "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
+                                $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
                        }
                }
@@ -4219,7 +4573,7 @@ sub process {
                        if (WARN("SIZEOF_PARENTHESIS",
                                 "sizeof $1 should be sizeof($1)\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
+                                $fixed[$fixlinenr] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
                        }
                }
@@ -4242,7 +4596,7 @@ sub process {
                                if (WARN("PREFER_SEQ_PUTS",
                                         "Prefer seq_puts to seq_printf\n" . $herecurr) &&
                                    $fix) {
-                                        $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/;
+                                        $fixed[$fixlinenr] =~ s/\bseq_printf\b/seq_puts/;
                                }
                        }
                }
@@ -4271,7 +4625,7 @@ sub process {
                        if (WARN("PREFER_ETHER_ADDR_COPY",
                                 "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
+                                $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
                        }
                }
@@ -4359,7 +4713,7 @@ sub process {
                        if (CHK("AVOID_EXTERNS",
                                "extern prototypes should be avoided in .h files\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
+                                $fixed[$fixlinenr] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
                        }
                }
@@ -4419,23 +4773,24 @@ sub process {
 # check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc
                if ($^V && $^V ge 5.10.0 &&
-                    $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/) {
+                    $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) {
                        my $oldfunc = $3;
                        my $a1 = $4;
                        my $a2 = $10;
                        my $newfunc = "kmalloc_array";
                        $newfunc = "kcalloc" if ($oldfunc eq "kzalloc");
-                        if ($a1 =~ /^sizeof\s*\S/ || $a2 =~ /^sizeof\s*\S/) {
+                        my $r1 = $a1;
+                        my $r2 = $a2;
+                        if ($a1 =~ /^sizeof\s*\S/) {
+                                $r1 = $a2;
+                                $r2 = $a1;
+                        }
+                        if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
+                            !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
                                if (WARN("ALLOC_WITH_MULTIPLY",
                                         "Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) &&
                                    $fix) {
-                                        my $r1 = $a1;
+                                        $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
-                                        my $r2 = $a2;
-                                        if ($a1 =~ /^sizeof\s*\S/) {
-                                                $r1 = $a2;
-                                                $r2 = $a1;
-                                        }
-                                        $fixed[$linenr - 1] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
                                }
                        }
@@ -4459,17 +4814,17 @@ sub process {
                        if (WARN("ONE_SEMICOLON",
                                 "Statements terminations use 1 semicolon\n" . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g;
+                                $fixed[$fixlinenr] =~ s/(\s*;\s*){2,}$/;/g;
                        }
                }
-# check for case / default statements not preceeded by break/fallthrough/switch
+# check for case / default statements not preceded by break/fallthrough/switch
                if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
                        my $has_break = 0;
                        my $has_statement = 0;
                        my $count = 0;
                        my $prevline = $linenr;
-                        while ($prevline > 1 && $count < 3 && !$has_break) {
+                        while ($prevline > 1 && ($file || $count < 3) && !$has_break) {
                                $prevline--;
                                my $rline = $rawlines[$prevline - 1];
                                my $fline = $lines[$prevline - 1];
@@ -4507,7 +4862,7 @@ sub process {
                        if (WARN("USE_FUNC",
                                 "__func__ should be used instead of gcc specific __FUNCTION__\n"  . $herecurr) &&
                            $fix) {
-                                $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g;
+                                $fixed[$fixlinenr] =~ s/\b__FUNCTION__\b/__func__/g;
                        }
                }
@@ -4750,12 +5105,16 @@ sub process {
        hash_show_words(\%use_type, "Used");
        hash_show_words(\%ignore_type, "Ignored");
-        if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
+        if ($clean == 0 && $fix &&
+            ("@rawlines" ne "@fixed" ||
+             $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) {
                my $newfile = $filename;
                $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
                my $linecount = 0;
                my $f;
+                @fixed = fix_inserted_deleted_lines(\@fixed, \@fixed_inserted, \@fixed_deleted);
                open($f, '>', $newfile)
                    or die "$P: Can't open $newfile for write\n";
                foreach my $fixed_line (@fixed) {
@@ -4763,7 +5122,7 @@ sub process {
                        if ($file) {
                                if ($linecount > 3) {
                                        $fixed_line =~ s/^\+//;
-                                        print $f $fixed_line. "\n";
+                                        print $f $fixed_line . "\n";
                                }
                        } else {
                                print $f $fixed_line . "\n";
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-08-07 00:14:42 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-08-07 00:14:42 -0400
commit	33caee39925b887a99a2400dc5c980097c3573f9 (patch)
tree	8e68ad97e1fee88c4a3f31453041f8d139f2027e
parent	6456a0438b984186a0c9c8ecc9fe3d97b7ac3613 (diff)
parent	f84223087402c45179be5e7060c5736c17a7b271 (diff)