Merge branch 'akpm' (patches from Andrew)

Merge updates from Andrew Morton: - a few misc things - ocfs2 updates - the v9fs maintainers have been missing for a long time. I've taken over v9fs patch slinging. - most of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (116 commits) mm,oom_reaper: check for MMF_OOM_SKIP before complaining mm/ksm: fix interaction with THP mm/memblock.c: cast constant ULLONG_MAX to phys_addr_t headers: untangle kmemleak.h from mm.h include/linux/mmdebug.h: make VM_WARN* non-rvals mm/page_isolation.c: make start_isolate_page_range() fail if already isolated mm: change return type to vm_fault_t mm, oom: remove 3% bonus for CAP_SYS_ADMIN processes mm, page_alloc: wakeup kcompactd even if kswapd cannot free more memory kernel/fork.c: detect early free of a live mm mm: make counting of list_lru_one::nr_items lockless mm/swap_state.c: make bool enable_vma_readahead and swap_vma_readahead() static block_invalidatepage(): only release page if the full page was invalidated mm: kernel-doc: add missing parameter descriptions mm/swap.c: remove @cold parameter description for release_pages() mm/nommu: remove description of alloc_vm_area zram: drop max_zpage_size and use zs_huge_class_size() zsmalloc: introduce zs_huge_class_size() mm: fix races between swapoff and flush dcache fs/direct-io.c: minor cleanups in do_blockdev_direct_IO ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-04-06 17:19:26 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-04-06 17:19:26 -0400
commit: 3b54765cca23152ec0cc254b75c877c10f6e2870 (patch)
tree: 795785d2a9d7498df9452be138867bd996c4cea5
parent: 3fd14cdcc05a682b03743683ce3a726898b20555 (diff)
parent: 97b1255cb27c551d7c3c5c496d787da40772da99 (diff)
151 files changed, 1601 insertions, 1269 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 34dac7cef4cf..3c87a69cffcb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1840,30 +1840,29 @@
        keepinitrd      [HW,ARM]
        kernelcore=     [KNL,X86,IA-64,PPC]
-                        Format: nn[KMGTPE] | "mirror"
+                        Format: nn[KMGTPE] | nn% | "mirror"
-                        This parameter
+                        This parameter specifies the amount of memory usable by
-                        specifies the amount of memory usable by the kernel
+                        the kernel for non-movable allocations.  The requested
-                        for non-movable allocations.  The requested amount is
+                        amount is spread evenly throughout all nodes in the
-                        spread evenly throughout all nodes in the system. The
+                        system as ZONE_NORMAL.  The remaining memory is used for
-                        remaining memory in each node is used for Movable
+                        movable memory in its own zone, ZONE_MOVABLE.  In the
-                        pages. In the event, a node is too small to have both
+                        event, a node is too small to have both ZONE_NORMAL and
-                        kernelcore and Movable pages, kernelcore pages will
+                        ZONE_MOVABLE, kernelcore memory will take priority and
-                        take priority and other nodes will have a larger number
+                        other nodes will have a larger ZONE_MOVABLE.
-                        of Movable pages.  The Movable zone is used for the
-                        allocation of pages that may be reclaimed or moved
+                        ZONE_MOVABLE is used for the allocation of pages that
-                        by the page migration subsystem.  This means that
+                        may be reclaimed or moved by the page migration
-                        HugeTLB pages may not be allocated from this zone.
+                        subsystem.  Note that allocations like PTEs-from-HighMem
-                        Note that allocations like PTEs-from-HighMem still
+                        still use the HighMem zone if it exists, and the Normal
-                        use the HighMem zone if it exists, and the Normal
                        zone if it does not.
-                        Instead of specifying the amount of memory (nn[KMGTPE]),
+                        It is possible to specify the exact amount of memory in
-                        you can specify "mirror" option. In case "mirror"
+                        the form of "nn[KMGTPE]", a percentage of total system
+                        memory in the form of "nn%", or "mirror".  If "mirror"
                        option is specified, mirrored (reliable) memory is used
                        for non-movable allocations and remaining memory is used
-                        for Movable pages. nn[KMGTPE] and "mirror" are exclusive,
+                        for Movable pages.  "nn[KMGTPE]", "nn%", and "mirror"
-                        so you can NOT specify nn[KMGTPE] and "mirror" at the same
+                        are exclusive, so you cannot specify multiple forms.
-                        time.
        kgdbdbgp=       [KGDB,HW] kgdb over EHCI usb debug port.
                        Format: <Controller#>[,poll interval]
@@ -2377,13 +2376,14 @@
        mousedev.yres=  [MOUSE] Vertical screen resolution, used for devices
                        reporting absolute coordinates, such as tablets
-        movablecore=nn[KMG]     [KNL,X86,IA-64,PPC] This parameter
+        movablecore=    [KNL,X86,IA-64,PPC]
-                        is similar to kernelcore except it specifies the
+                        Format: nn[KMGTPE] | nn%
-                        amount of memory used for migratable allocations.
+                        This parameter is the complement to kernelcore=, it
-                        If both kernelcore and movablecore is specified,
+                        specifies the amount of memory used for migratable
-                        then kernelcore will be at *least* the specified
+                        allocations.  If both kernelcore and movablecore is
-                        value but may be more. If movablecore on its own
+                        specified, then kernelcore will be at *least* the
-                        is specified, the administrator must be careful
+                        specified value but may be more.  If movablecore on its
+                        own is specified, the administrator must be careful
                        that the amount of memory usable for all allocations
                        is not too small.
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index ba976805853a..66bfd8396877 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -111,7 +111,7 @@ my $regex_direct_begin_default = 'order=([0-9]*) may_writepage=([0-9]*) gfp_flag
 my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)';
 my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)';
 my $regex_kswapd_sleep_default = 'nid=([0-9]*)';
-my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*)';
+my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)';
 my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)';
 my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
 my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)';
@@ -201,7 +201,7 @@ $regex_kswapd_sleep = generate_traceevent_regex(
 $regex_wakeup_kswapd = generate_traceevent_regex(
                        "vmscan/mm_vmscan_wakeup_kswapd",
                        $regex_wakeup_kswapd_default,
-                        "nid", "zid", "order");
+                        "nid", "zid", "order", "gfp_flags");
 $regex_lru_isolate = generate_traceevent_regex(
                        "vmscan/mm_vmscan_lru_isolate",
                        $regex_lru_isolate_default,
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 2072f3451e9c..9dbe645ee127 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -833,7 +833,7 @@ void flush_dcache_page(struct page *page)
        }
        /* don't handle anon pages here */
-        mapping = page_mapping(page);
+        mapping = page_mapping_file(page);
        if (!mapping)
                return;
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 16a8a804e958..e8fe51f4e97a 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -128,12 +128,7 @@ asmlinkage void __div0(void)
        error("Attempting division by 0!");
 }
-unsigned long __stack_chk_guard;
+const unsigned long __stack_chk_guard = 0x000a0dff;
-void __stack_chk_guard_setup(void)
-{
-        __stack_chk_guard = 0x000a0dff;
-}
 void __stack_chk_fail(void)
 {
@@ -150,8 +145,6 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
 {
        int ret;
-        __stack_chk_guard_setup();
        output_data             = (unsigned char *)output_start;
        free_mem_ptr            = free_mem_ptr_p;
        free_mem_end_ptr        = free_mem_ptr_end_p;
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
index 1267e64133b9..0224416cba3c 100644
--- a/arch/arm/mm/copypage-v4mc.c
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -70,7 +70,7 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from,
        void *kto = kmap_atomic(to);
        if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-                __flush_dcache_page(page_mapping(from), from);
+                __flush_dcache_page(page_mapping_file(from), from);
        raw_spin_lock(&minicache_lock);
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index 70423345da26..a698e575e321 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -76,7 +76,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to,
        unsigned long kfrom, kto;
        if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-                __flush_dcache_page(page_mapping(from), from);
+                __flush_dcache_page(page_mapping_file(from), from);
        /* FIXME: not highmem safe */
        discard_old_kernel_data(page_address(to));
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
index 0fb85025344d..97972379f4d6 100644
--- a/arch/arm/mm/copypage-xscale.c
+++ b/arch/arm/mm/copypage-xscale.c
@@ -90,7 +90,7 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from,
        void *kto = kmap_atomic(to);
        if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-                __flush_dcache_page(page_mapping(from), from);
+                __flush_dcache_page(page_mapping_file(from), from);
        raw_spin_lock(&minicache_lock);
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index d9e0d00a6699..4d75dae5ac96 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -195,7 +195,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
        if (page == ZERO_PAGE(0))
                return;
-        mapping = page_mapping(page);
+        mapping = page_mapping_file(page);
        if (!test_and_set_bit(PG_dcache_clean, &page->flags))
                __flush_dcache_page(mapping, page);
        if (mapping) {
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index f1e6190aa7ea..58469623b015 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -285,7 +285,7 @@ void __sync_icache_dcache(pte_t pteval)
        page = pfn_to_page(pfn);
        if (cache_is_vipt_aliasing())
-                mapping = page_mapping(page);
+                mapping = page_mapping_file(page);
        else
                mapping = NULL;
@@ -333,7 +333,7 @@ void flush_dcache_page(struct page *page)
                return;
        }
-        mapping = page_mapping(page);
+        mapping = page_mapping_file(page);
        if (!cache_ops_need_broadcast() &&
            mapping && !page_mapcount(page))
@@ -363,7 +363,7 @@ void flush_kernel_dcache_page(struct page *page)
        if (cache_is_vivt() || cache_is_vipt_aliasing()) {
                struct address_space *mapping;
-                mapping = page_mapping(page);
+                mapping = page_mapping_file(page);
                if (!mapping || mapping_mapped(mapping)) {
                        void *addr;
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index fdf99e9dd4c3..81df9047e110 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -76,12 +76,7 @@ void error(char *x)
 #include "../../../../lib/decompress_unxz.c"
 #endif
-unsigned long __stack_chk_guard;
+const unsigned long __stack_chk_guard = 0x000a0dff;
-void __stack_chk_guard_setup(void)
-{
-        __stack_chk_guard = 0x000a0dff;
-}
 void __stack_chk_fail(void)
 {
@@ -92,8 +87,6 @@ void decompress_kernel(unsigned long boot_heap_start)
 {
        unsigned long zimage_start, zimage_size;
-        __stack_chk_guard_setup();
        zimage_start = (unsigned long)(&__image_begin);
        zimage_size = (unsigned long)(&__image_end) -
            (unsigned long)(&__image_begin);
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 44ac64d51827..0d3c656feba0 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -86,7 +86,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
 void __flush_dcache_page(struct page *page)
 {
-        struct address_space *mapping = page_mapping(page);
+        struct address_space *mapping = page_mapping_file(page);
        unsigned long addr;
        if (mapping && !mapping_mapped(mapping)) {
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 87bf88ed04c6..506f6e1c86d5 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -180,7 +180,7 @@ void flush_dcache_page(struct page *page)
        if (page == ZERO_PAGE(0))
                return;
-        mapping = page_mapping(page);
+        mapping = page_mapping_file(page);
        /* Flush this page if there are aliases. */
        if (mapping && !mapping_mapped(mapping)) {
@@ -215,7 +215,7 @@ void update_mmu_cache(struct vm_area_struct *vma,
        if (page == ZERO_PAGE(0))
                return;
-        mapping = page_mapping(page);
+        mapping = page_mapping_file(page);
        if (!test_and_set_bit(PG_dcache_clean, &page->flags))
                __flush_dcache_page(mapping, page);
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index e3b45546d589..a99da95fc9fd 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -88,7 +88,8 @@ update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
                return;
        page = pfn_to_page(pfn);
-        if (page_mapping(page) && test_bit(PG_dcache_dirty, &page->flags)) {
+        if (page_mapping_file(page) &&
+            test_bit(PG_dcache_dirty, &page->flags)) {
                flush_kernel_dcache_page_addr(pfn_va(pfn));
                clear_bit(PG_dcache_dirty, &page->flags);
        } else if (parisc_requires_coherency())
@@ -304,7 +305,7 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
 void flush_dcache_page(struct page *page)
 {
-        struct address_space *mapping = page_mapping(page);
+        struct address_space *mapping = page_mapping_file(page);
        struct vm_area_struct *mpnt;
        unsigned long offset;
        unsigned long addr, old_addr = 0;
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 1a4847f67ea8..6f6751d3eba9 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -118,12 +118,6 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
                            unsigned long ceiling);
 /*
- * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
- * to override the version in mm/hugetlb.c
- */
-#define vma_mmu_pagesize vma_mmu_pagesize
-/*
 * If the arch doesn't supply something else, assume that hugepage
 * size aligned regions are ok without further preparation.
 */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 876da2bc1796..3a08d211d2ee 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -568,10 +568,7 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
        if (!radix_enabled())
                return 1UL << mmu_psize_to_shift(psize);
 #endif
-        if (!is_vm_hugetlb_page(vma))
+        return vma_kernel_pagesize(vma);
-                return PAGE_SIZE;
-        return huge_page_size(hstate_vma(vma));
 }
 static inline bool is_power_of_4(unsigned long x)
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index e0a2d8e806ed..9a8a084e4aba 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -112,7 +112,7 @@ static int mm_iommu_move_page_from_cma(struct page *page)
        put_page(page); /* Drop the gup reference */
        ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
-                                NULL, 0, MIGRATE_SYNC, MR_CMA);
+                                NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
        if (ret) {
                if (!list_empty(&cma_migrate_pages))
                        putback_movable_pages(&cma_migrate_pages);
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index a6198d4f0f03..5ca3e22d0512 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -38,6 +38,7 @@
 #include <linux/suspend.h>
 #include <linux/memblock.h>
 #include <linux/gfp.h>
+#include <linux/kmemleak.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/iommu.h>
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index c4dae27172b3..6243a7e537d0 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
+#include <linux/kmemleak.h>
 #include <linux/bitmap.h>
 #include <linux/bootmem.h>
 #include <asm/msi_bitmap.h>
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index c7a627620e5e..8c867b43c8eb 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -15,7 +15,7 @@
 #include <linux/hardirq.h>
 #include <linux/log2.h>
 #include <linux/kprobes.h>
-#include <linux/slab.h>
+#include <linux/kmemleak.h>
 #include <linux/time.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a4a9fe1934e9..2f8f7d7dd9a8 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -27,7 +27,6 @@
 #include <linux/err.h>
 #include <linux/spinlock.h>
 #include <linux/kernel_stat.h>
-#include <linux/kmemleak.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irqflags.h>
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c
index 627ce8e75e01..c15cac9251b9 100644
--- a/arch/sh/boot/compressed/misc.c
+++ b/arch/sh/boot/compressed/misc.c
@@ -104,12 +104,7 @@ static void error(char *x)
        while(1);       /* Halt */
 }
-unsigned long __stack_chk_guard;
+const unsigned long __stack_chk_guard = 0x000a0dff;
-void __stack_chk_guard_setup(void)
-{
-        __stack_chk_guard = 0x000a0dff;
-}
 void __stack_chk_fail(void)
 {
@@ -130,8 +125,6 @@ void decompress_kernel(void)
 {
        unsigned long output_addr;
-        __stack_chk_guard_setup();
 #ifdef CONFIG_SUPERH64
        output_addr = (CONFIG_MEMORY_START + 0x2000);
 #else
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 58aaa4f33b81..eee911422cf9 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -112,7 +112,7 @@ static void sh4_flush_dcache_page(void *arg)
        struct page *page = arg;
        unsigned long addr = (unsigned long)page_address(page);
 #ifndef CONFIG_SMP
-        struct address_space *mapping = page_mapping(page);
+        struct address_space *mapping = page_mapping_file(page);
        if (mapping && !mapping_mapped(mapping))
                clear_bit(PG_dcache_clean, &page->flags);
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 6cd2aa395817..ed25eba80667 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -136,7 +136,7 @@ static void __flush_dcache_page(unsigned long phys)
 static void sh7705_flush_dcache_page(void *arg)
 {
        struct page *page = arg;
-        struct address_space *mapping = page_mapping(page);
+        struct address_space *mapping = page_mapping_file(page);
        if (mapping && !mapping_mapped(mapping))
                clear_bit(PG_dcache_clean, &page->flags);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index d66dde833f5e..713670e6d13d 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -22,7 +22,6 @@
 #include <linux/seq_file.h>
 #include <linux/ftrace.h>
 #include <linux/irq.h>
-#include <linux/kmemleak.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index c50182cd2f64..d3ea1f3c06a0 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -929,9 +929,9 @@ static inline void __local_flush_dcache_page(struct page *page)
 #ifdef DCACHE_ALIASING_POSSIBLE
        __flush_dcache_page(page_address(page),
                            ((tlb_type == spitfire) &&
-                             page_mapping(page) != NULL));
+                             page_mapping_file(page) != NULL));
 #else
-        if (page_mapping(page) != NULL &&
+        if (page_mapping_file(page) != NULL &&
            tlb_type == spitfire)
                __flush_icache_page(__pa(page_address(page)));
 #endif
@@ -958,7 +958,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
                if (tlb_type == spitfire) {
                        data0 = ((u64)&xcall_flush_dcache_page_spitfire);
-                        if (page_mapping(page) != NULL)
+                        if (page_mapping_file(page) != NULL)
                                data0 |= ((u64)1 << 32);
                } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 #ifdef DCACHE_ALIASING_POSSIBLE
@@ -994,7 +994,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
        pg_addr = page_address(page);
        if (tlb_type == spitfire) {
                data0 = ((u64)&xcall_flush_dcache_page_spitfire);
-                if (page_mapping(page) != NULL)
+                if (page_mapping_file(page) != NULL)
                        data0 |= ((u64)1 << 32);
        } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 #ifdef DCACHE_ALIASING_POSSIBLE
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index cb9ebac6663f..8aeb1aabe76e 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -206,9 +206,9 @@ inline void flush_dcache_page_impl(struct page *page)
 #ifdef DCACHE_ALIASING_POSSIBLE
        __flush_dcache_page(page_address(page),
                            ((tlb_type == spitfire) &&
-                             page_mapping(page) != NULL));
+                             page_mapping_file(page) != NULL));
 #else
-        if (page_mapping(page) != NULL &&
+        if (page_mapping_file(page) != NULL &&
            tlb_type == spitfire)
                __flush_icache_page(__pa(page_address(page)));
 #endif
@@ -490,7 +490,7 @@ void flush_dcache_page(struct page *page)
        this_cpu = get_cpu();
-        mapping = page_mapping(page);
+        mapping = page_mapping_file(page);
        if (mapping && !mapping_mapped(mapping)) {
                int dirty = test_bit(PG_dcache_dirty, &page->flags);
                if (dirty) {
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index b5cfab711651..3d72d2deb13b 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -128,7 +128,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
                        goto no_cache_flush;
                /* A real file page? */
-                mapping = page_mapping(page);
+                mapping = page_mapping_file(page);
                if (!mapping)
                        goto no_cache_flush;
diff --git a/arch/unicore32/mm/flush.c b/arch/unicore32/mm/flush.c
index 6d4c096ffa2a..74f4d636df2d 100644
--- a/arch/unicore32/mm/flush.c
+++ b/arch/unicore32/mm/flush.c
@@ -83,7 +83,7 @@ void flush_dcache_page(struct page *page)
        if (page == ZERO_PAGE(0))
                return;
-        mapping = page_mapping(page);
+        mapping = page_mapping_file(page);
        if (mapping && !mapping_mapped(mapping))
                clear_bit(PG_dcache_clean, &page->flags);
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 4f5a532bee13..0c94b7b4514d 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -503,7 +503,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
        if (page == ZERO_PAGE(0))
                return;
-        mapping = page_mapping(page);
+        mapping = page_mapping_file(page);
        if (!test_and_set_bit(PG_dcache_clean, &page->flags))
                __flush_dcache_page(mapping, page);
        if (mapping)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 14437116ffea..77625b60a510 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -6,7 +6,6 @@
 #include <linux/bootmem.h>
 #include <linux/gfp.h>
 #include <linux/pci.h>
-#include <linux/kmemleak.h>
 #include <asm/proto.h>
 #include <asm/dma.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 45241de66785..dca9abf2b85c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1328,14 +1328,39 @@ int kern_addr_valid(unsigned long addr)
        return pfn_valid(pte_pfn(*pte));
 }
+/*
+ * Block size is the minimum amount of memory which can be hotplugged or
+ * hotremoved. It must be power of two and must be equal or larger than
+ * MIN_MEMORY_BLOCK_SIZE.
+ */
+#define MAX_BLOCK_SIZE (2UL << 30)
+/* Amount of ram needed to start using large blocks */
+#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30)
 static unsigned long probe_memory_block_size(void)
 {
-        unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
+        unsigned long boot_mem_end = max_pfn << PAGE_SHIFT;
+        unsigned long bz;
-        /* if system is UV or has 64GB of RAM or more, use large blocks */
+        /* If this is UV system, always set 2G block size */
-        if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
+        if (is_uv_system()) {
-                bz = 2UL << 30; /* 2GB */
+                bz = MAX_BLOCK_SIZE;
+                goto done;
+        }
+        /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */
+        if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) {
+                bz = MIN_MEMORY_BLOCK_SIZE;
+                goto done;
+        }
+        /* Find the largest allowed block size that aligns to memory end */
+        for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
+                if (IS_ALIGNED(boot_mem_end, bz))
+                        break;
+        }
+done:
        pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
        return bz;
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 57dc231a0709..9220dcde7520 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(copy_user_highpage);
 void flush_dcache_page(struct page *page)
 {
-        struct address_space *mapping = page_mapping(page);
+        struct address_space *mapping = page_mapping_file(page);
        /*
         * If we have a mapping but the page is not mapped to user-space
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index fe4b24f05f6a..79fcd2bae96b 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -187,13 +187,14 @@ int memory_isolate_notify(unsigned long val, void *v)
 }
 /*
- * The probe routines leave the pages reserved, just as the bootmem code does.
+ * The probe routines leave the pages uninitialized, just as the bootmem code
- * Make sure they're still that way.
+ * does. Make sure we do not access them, but instead use only information from
+ * within sections.
 */
-static bool pages_correctly_reserved(unsigned long start_pfn)
+static bool pages_correctly_probed(unsigned long start_pfn)
 {
-        int i, j;
+        unsigned long section_nr = pfn_to_section_nr(start_pfn);
-        struct page *page;
+        unsigned long section_nr_end = section_nr + sections_per_block;
        unsigned long pfn = start_pfn;
        /*
@@ -201,21 +202,24 @@ static bool pages_correctly_reserved(unsigned long start_pfn)
         * SPARSEMEM_VMEMMAP. We lookup the page once per section
         * and assume memmap is contiguous within each section
         */
-        for (i = 0; i < sections_per_block; i++, pfn += PAGES_PER_SECTION) {
+        for (; section_nr < section_nr_end; section_nr++) {
                if (WARN_ON_ONCE(!pfn_valid(pfn)))
                        return false;
-                page = pfn_to_page(pfn);
-                for (j = 0; j < PAGES_PER_SECTION; j++) {
-                        if (PageReserved(page + j))
-                                continue;
-                        printk(KERN_WARNING "section number %ld page number %d "
-                                "not reserved, was it already online?\n",
-                                pfn_to_section_nr(pfn), j);
+                if (!present_section_nr(section_nr)) {
+                        pr_warn("section %ld pfn[%lx, %lx) not present",
+                                section_nr, pfn, pfn + PAGES_PER_SECTION);
+                        return false;
+                } else if (!valid_section_nr(section_nr)) {
+                        pr_warn("section %ld pfn[%lx, %lx) no valid memmap",
+                                section_nr, pfn, pfn + PAGES_PER_SECTION);
+                        return false;
+                } else if (online_section_nr(section_nr)) {
+                        pr_warn("section %ld pfn[%lx, %lx) is already online",
+                                section_nr, pfn, pfn + PAGES_PER_SECTION);
                        return false;
                }
+                pfn += PAGES_PER_SECTION;
        }
        return true;
@@ -237,7 +241,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t
        switch (action) {
        case MEM_ONLINE:
-                if (!pages_correctly_reserved(start_pfn))
+                if (!pages_correctly_probed(start_pfn))
                        return -EBUSY;
                ret = online_pages(start_pfn, nr_pages, online_type);
@@ -708,7 +712,7 @@ static int add_memory_block(int base_section_nr)
 * need an interface for the VM to add new memory regions,
 * but without onlining it.
 */
-int register_new_memory(int nid, struct mem_section *section)
+int hotplug_memory_register(int nid, struct mem_section *section)
 {
        int ret = 0;
        struct memory_block *mem;
@@ -727,7 +731,7 @@ int register_new_memory(int nid, struct mem_section *section)
        }
        if (mem->section_count == sections_per_block)
-                ret = register_mem_sect_under_node(mem, nid);
+                ret = register_mem_sect_under_node(mem, nid, false);
 out:
        mutex_unlock(&mem_sysfs_mutex);
        return ret;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index c5f81fc621ac..7a3a580821e0 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -399,13 +399,16 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 }
 /* register memory section under specified node if it spans that node */
-int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
+int register_mem_sect_under_node(struct memory_block *mem_blk, int nid,
+                                 bool check_nid)
 {
        int ret;
        unsigned long pfn, sect_start_pfn, sect_end_pfn;
        if (!mem_blk)
                return -EFAULT;
+        mem_blk->nid = nid;
        if (!node_online(nid))
                return 0;
@@ -425,11 +428,18 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
                        continue;
                }
-                page_nid = get_nid_for_pfn(pfn);
+                /*
-                if (page_nid < 0)
+                 * We need to check if page belongs to nid only for the boot
-                        continue;
+                 * case, during hotplug we know that all pages in the memory
-                if (page_nid != nid)
+                 * block belong to the same node.
-                        continue;
+                 */
+                if (check_nid) {
+                        page_nid = get_nid_for_pfn(pfn);
+                        if (page_nid < 0)
+                                continue;
+                        if (page_nid != nid)
+                                continue;
+                }
                ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
                                        &mem_blk->dev.kobj,
                                        kobject_name(&mem_blk->dev.kobj));
@@ -504,7 +514,7 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
                mem_blk = find_memory_block_hinted(mem_sect, mem_blk);
-                ret = register_mem_sect_under_node(mem_blk, nid);
+                ret = register_mem_sect_under_node(mem_blk, nid, true);
                if (!err)
                        err = ret;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 71b449613cfa..0f3fadd71230 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -44,6 +44,11 @@ static const char *default_compressor = "lzo";
 /* Module params (documentation at end) */
 static unsigned int num_devices = 1;
+/*
+ * Pages that compress to sizes equals or greater than this are stored
+ * uncompressed in memory.
+ */
+static size_t huge_class_size;
 static void zram_free_page(struct zram *zram, size_t index);
@@ -786,6 +791,8 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
                return false;
        }
+        if (!huge_class_size)
+                huge_class_size = zs_huge_class_size(zram->mem_pool);
        return true;
 }
@@ -965,7 +972,7 @@ compress_again:
                return ret;
        }
-        if (unlikely(comp_len > max_zpage_size)) {
+        if (unlikely(comp_len >= huge_class_size)) {
                if (zram_wb_enabled(zram) && allow_wb) {
                        zcomp_stream_put(zram->comp);
                        ret = write_to_bdev(zram, bvec, index, bio, &element);
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 1e9bf65c0bfb..008861220723 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -21,22 +21,6 @@
 #include "zcomp.h"
-/*-- Configurable parameters */
-/*
- * Pages that compress to size greater than this are stored
- * uncompressed in memory.
- */
-static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
-/*
- * NOTE: max_zpage_size must be less than or equal to:
- *   ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
- * always return failure.
- */
-/*-- End of configurable params */
 #define SECTORS_PER_PAGE_SHIFT  (PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTORS_PER_PAGE        (1 << SECTORS_PER_PAGE_SHIFT)
 #define ZRAM_LOGICAL_BLOCK_SHIFT 12
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 2137dbc29877..0b61f48f21a6 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -439,10 +439,20 @@ static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
        return 0;
 }
+static unsigned long dev_dax_pagesize(struct vm_area_struct *vma)
+{
+        struct file *filp = vma->vm_file;
+        struct dev_dax *dev_dax = filp->private_data;
+        struct dax_region *dax_region = dev_dax->region;
+        return dax_region->align;
+}
 static const struct vm_operations_struct dax_vm_ops = {
        .fault = dev_dax_fault,
        .huge_fault = dev_dax_huge_fault,
        .split = dev_dax_split,
+        .pagesize = dev_dax_pagesize,
 };
 static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 2138102ef611..c5f4f7691b57 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/iommu.h>
 #include <linux/interrupt.h>
+#include <linux/kmemleak.h>
 #include <linux/list.h>
 #include <linux/of.h>
 #include <linux/of_iommu.h>
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 542930cd183d..5a96fd14ac22 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -25,7 +25,6 @@
 #include <linux/io.h>
 #include <linux/iommu.h>
 #include <linux/iopoll.h>
-#include <linux/kmemleak.h>
 #include <linux/list.h>
 #include <linux/of_address.h>
 #include <linux/of_iommu.h>
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 1b4af54a4968..30371274409d 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -35,6 +35,7 @@
 #include <linux/of_net.h>
 #include <linux/of_device.h>
 #include <linux/if_vlan.h>
+#include <linux/kmemleak.h>
 #include <linux/pinctrl/consumer.h>
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index 2437422625bf..57bb8f049e59 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -31,7 +31,6 @@
 #include "efuse.h"
 #include <linux/interrupt.h>
 #include <linux/export.h>
-#include <linux/kmemleak.h>
 #include <linux/module.h>
 MODULE_AUTHOR("lizhaoming       <chaoming_li@realsil.com.cn>");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c
index 015476e3f7e5..f3bff66e85d0 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c
@@ -32,7 +32,6 @@
 #include "../rtl8192ce/def.h"
 #include "fw_common.h"
 #include <linux/export.h>
-#include <linux/kmemleak.h>
 static void _rtl92c_enable_fw_download(struct ieee80211_hw *hw, bool enable)
 {
diff --git a/drivers/staging/rtl8188eu/hal/fw.c b/drivers/staging/rtl8188eu/hal/fw.c
index 03d091bad13a..6b67b38a6a9f 100644
--- a/drivers/staging/rtl8188eu/hal/fw.c
+++ b/drivers/staging/rtl8188eu/hal/fw.c
@@ -30,7 +30,7 @@
 #include "rtl8188e_hal.h"
 #include <linux/firmware.h>
-#include <linux/kmemleak.h>
+#include <linux/slab.h>
 static void _rtl88e_enable_fw_download(struct adapter *adapt, bool enable)
 {
diff --git a/drivers/staging/rtlwifi/pci.c b/drivers/staging/rtlwifi/pci.c
index 70a64a5f564a..d56810eabde7 100644
--- a/drivers/staging/rtlwifi/pci.c
+++ b/drivers/staging/rtlwifi/pci.c
@@ -31,7 +31,6 @@
 #include "efuse.h"
 #include <linux/interrupt.h>
 #include <linux/export.h>
-#include <linux/kmemleak.h>
 #include <linux/module.h>
 MODULE_AUTHOR("lizhaoming       <chaoming_li@realsil.com.cn>");
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 71458f493cf8..21d464a29cf8 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -23,7 +23,6 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/hrtimer.h>
-#include <linux/kmemleak.h>
 #include <linux/dma-mapping.h>
 #include <xen/xen.h>
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 8fb89ddc6cc7..e622f0f10502 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -292,6 +292,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 #ifdef CONFIG_9P_FSCACHE
                        kfree(v9ses->cachetag);
                        v9ses->cachetag = match_strdup(&args[0]);
+                        if (!v9ses->cachetag) {
+                                ret = -ENOMEM;
+                                goto free_and_return;
+                        }
 #endif
                        break;
                case Opt_cache:
@@ -471,6 +475,9 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
        return fid;
 err_clnt:
+#ifdef CONFIG_9P_FSCACHE
+        kfree(v9ses->cachetag);
+#endif
        p9_client_destroy(v9ses->clnt);
 err_names:
        kfree(v9ses->uname);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index bdabb2765d1b..9ee534159cc6 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -579,6 +579,24 @@ static int v9fs_at_to_dotl_flags(int flags)
 }
 /**
+ * v9fs_dec_count - helper functon to drop i_nlink.
+ *
+ * If a directory had nlink <= 2 (including . and ..), then we should not drop
+ * the link count, which indicates the underlying exported fs doesn't maintain
+ * nlink accurately. e.g.
+ * - overlayfs sets nlink to 1 for merged dir
+ * - ext4 (with dir_nlink feature enabled) sets nlink to 1 if a dir has more
+ *   than EXT4_LINK_MAX (65000) links.
+ *
+ * @inode: inode whose nlink is being dropped
+ */
+static void v9fs_dec_count(struct inode *inode)
+{
+        if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
+                drop_nlink(inode);
+}
+/**
 * v9fs_remove - helper function to remove files and directories
 * @dir: directory inode that is being deleted
 * @dentry:  dentry that is being deleted
@@ -621,9 +639,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
                 */
                if (flags & AT_REMOVEDIR) {
                        clear_nlink(inode);
-                        drop_nlink(dir);
+                        v9fs_dec_count(dir);
                } else
-                        drop_nlink(inode);
+                        v9fs_dec_count(inode);
                v9fs_invalidate_inode_attr(inode);
                v9fs_invalidate_inode_attr(dir);
@@ -1024,12 +1042,12 @@ clunk_newdir:
                        if (S_ISDIR(new_inode->i_mode))
                                clear_nlink(new_inode);
                        else
-                                drop_nlink(new_inode);
+                                v9fs_dec_count(new_inode);
                }
                if (S_ISDIR(old_inode->i_mode)) {
                        if (!new_inode)
                                inc_nlink(new_dir);
-                        drop_nlink(old_dir);
+                        v9fs_dec_count(old_dir);
                }
                v9fs_invalidate_inode_attr(old_inode);
                v9fs_invalidate_inode_attr(old_dir);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index af03c2a901eb..48ce50484e80 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -94,7 +94,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
        if (v9ses->cache)
                sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE;
-        sb->s_flags |= SB_ACTIVE | SB_DIRSYNC | SB_NOATIME;
+        sb->s_flags |= SB_ACTIVE | SB_DIRSYNC;
        if (!v9ses->cache)
                sb->s_flags |= SB_SYNCHRONOUS;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index fe09ef9c21f3..7a506c55a993 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1324,7 +1324,8 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
 * @bdev: struct bdev to adjust.
 *
 * This routine checks to see if the bdev size does not match the disk size
- * and adjusts it if it differs.
+ * and adjusts it if it differs. When shrinking the bdev size, its all caches
+ * are freed.
 */
 void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
 {
@@ -1337,7 +1338,8 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
                       "%s: detected capacity change from %lld to %lld\n",
                       disk->disk_name, bdev_size, disk_size);
                i_size_write(bdev->bd_inode, disk_size);
-                flush_disk(bdev, false);
+                if (bdev_size > disk_size)
+                        flush_disk(bdev, false);
        }
 }
 EXPORT_SYMBOL(check_disk_size_change);
diff --git a/fs/buffer.c b/fs/buffer.c
index 9a73924db22f..ec5dd39071e6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1511,7 +1511,7 @@ void block_invalidatepage(struct page *page, unsigned int offset,
         * The get_block cached value has been unconditionally invalidated,
         * so real IO is not possible anymore.
         */
-        if (offset == 0)
+        if (length == PAGE_SIZE)
                try_to_release_page(page, 0);
 out:
        return;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ba12ee659673..874607bb6e02 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1177,9 +1177,9 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        unsigned blkbits = i_blkbits;
        unsigned blocksize_mask = (1 << blkbits) - 1;
        ssize_t retval = -EINVAL;
-        size_t count = iov_iter_count(iter);
+        const size_t count = iov_iter_count(iter);
        loff_t offset = iocb->ki_pos;
-        loff_t end = offset + count;
+        const loff_t end = offset + count;
        struct dio *dio;
        struct dio_submit sdio = { 0, };
        struct buffer_head map_bh = { 0, };
@@ -1200,7 +1200,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        }
        /* watch out for a 0 len io from a tricksy fs */
-        if (iov_iter_rw(iter) == READ && !iov_iter_count(iter))
+        if (iov_iter_rw(iter) == READ && !count)
                return 0;
        dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1315,8 +1315,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        dio->should_dirty = (iter->type == ITER_IOVEC);
        sdio.iter = iter;
-        sdio.final_block_in_request =
+        sdio.final_block_in_request = end >> blkbits;
-                (offset + iov_iter_count(iter)) >> blkbits;
        /*
         * In case of non-aligned buffers, we may need 2 more
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9a254dcc0e7..d508c7844681 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -138,10 +138,14 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        /*
         * page based offset in vm_pgoff could be sufficiently large to
-         * overflow a (l)off_t when converted to byte offset.
+         * overflow a loff_t when converted to byte offset.  This can
+         * only happen on architectures where sizeof(loff_t) ==
+         * sizeof(unsigned long).  So, only check in those instances.
         */
-        if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+        if (sizeof(unsigned long) == sizeof(loff_t)) {
-                return -EINVAL;
+                if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+                        return -EINVAL;
+        }
        /* must be huge page aligned */
        if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9a876bb07cac..0f157bbd3e0f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7119,7 +7119,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                        goto out_commit;
                did_quota = 1;
-                data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
+                data_ac->ac_resv = &oi->ip_la_data_resv;
                ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
                                           &num);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e8e205bf2e41..302cd7caa4a7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -346,7 +346,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
        unlock = 0;
 out_alloc:
-        up_read(&OCFS2_I(inode)->ip_alloc_sem);
+        up_read(&oi->ip_alloc_sem);
 out_inode_unlock:
        ocfs2_inode_unlock(inode, 0);
 out:
@@ -2213,7 +2213,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
        down_write(&oi->ip_alloc_sem);
        if (first_get_block) {
-                if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+                if (ocfs2_sparse_alloc(osb))
                        ret = ocfs2_zero_tail(inode, di_bh, pos);
                else
                        ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 8614ff069d99..3494a62ed749 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -78,7 +78,7 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
 /*
 * Using a named enum representing lock types in terms of #N bit stored in
 * iocb->private, which is going to be used for communication between
- * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
+ * ocfs2_dio_end_io() and ocfs2_file_write/read_iter().
 */
 enum ocfs2_iocb_lock_bits {
        OCFS2_IOCB_RW_LOCK = 0,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index ea8c551bcd7e..91a8889abf9b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -570,7 +570,16 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
                     current_page, vec_len, vec_start);
                len = bio_add_page(bio, page, vec_len, vec_start);
-                if (len != vec_len) break;
+                if (len != vec_len) {
+                        mlog(ML_ERROR, "Adding page[%d] to bio failed, "
+                             "page %p, len %d, vec_len %u, vec_start %u, "
+                             "bi_sector %llu\n", current_page, page, len,
+                             vec_len, vec_start,
+                             (unsigned long long)bio->bi_iter.bi_sector);
+                        bio_put(bio);
+                        bio = ERR_PTR(-EIO);
+                        return bio;
+                }
                cs += vec_len / (PAGE_SIZE/spp);
                vec_start = 0;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 977763d4c27d..b048d4fa3959 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3072,7 +3072,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
                         * We need to return the correct block within the
                         * cluster which should hold our entry.
                         */
-                        off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb),
+                        off = ocfs2_dx_dir_hash_idx(osb,
                                                    &lookup->dl_hinfo);
                        get_bh(dx_leaves[off]);
                        lookup->dl_dx_leaf_bh = dx_leaves[off];
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index fd6bbbbd7d78..39831fc2fd52 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -224,14 +224,12 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
                      struct dlm_lock *lock)
 {
        dlm_astlockfunc_t *fn;
-        struct dlm_lockstatus *lksb;
        mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
             res->lockname.len, res->lockname.name,
             dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
             dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
-        lksb = lock->lksb;
        fn = lock->ast;
        BUG_ON(lock->ml.node != dlm->node_num);
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index e9f3705c4c9f..d06e27ec4be4 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -140,6 +140,7 @@ struct dlm_ctxt
        u8 node_num;
        u32 key;
        u8  joining_node;
+        u8 migrate_done; /* set to 1 means node has migrated all lock resources */
        wait_queue_head_t dlm_join_events;
        unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
        unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -960,13 +961,10 @@ static inline int dlm_send_proxy_ast(struct dlm_ctxt *dlm,
 void dlm_print_one_lock_resource(struct dlm_lock_resource *res);
 void __dlm_print_one_lock_resource(struct dlm_lock_resource *res);
-u8 dlm_nm_this_node(struct dlm_ctxt *dlm);
 void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
 void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
-int dlm_nm_init(struct dlm_ctxt *dlm);
-int dlm_heartbeat_init(struct dlm_ctxt *dlm);
 void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data);
 void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index e1fea149f50b..425081be6161 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -461,6 +461,19 @@ redo_bucket:
                cond_resched_lock(&dlm->spinlock);
                num += n;
        }
+        if (!num) {
+                if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
+                        mlog(0, "%s: perhaps there are more lock resources "
+                             "need to be migrated after dlm recovery\n", dlm->name);
+                        ret = -EAGAIN;
+                } else {
+                        mlog(0, "%s: we won't do dlm recovery after migrating "
+                             "all lock resources\n", dlm->name);
+                        dlm->migrate_done = 1;
+                }
+        }
        spin_unlock(&dlm->spinlock);
        wake_up(&dlm->dlm_thread_wq);
@@ -675,20 +688,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
        spin_unlock(&dlm->spinlock);
 }
-int dlm_shutting_down(struct dlm_ctxt *dlm)
-{
-        int ret = 0;
-        spin_lock(&dlm_domain_lock);
-        if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
-                ret = 1;
-        spin_unlock(&dlm_domain_lock);
-        return ret;
-}
 void dlm_unregister_domain(struct dlm_ctxt *dlm)
 {
        int leave = 0;
@@ -2052,6 +2051,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
        dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
        init_waitqueue_head(&dlm->dlm_join_events);
+        dlm->migrate_done = 0;
        dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
        dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;
diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h
index fd6122a38dbd..8a9281411c18 100644
--- a/fs/ocfs2/dlm/dlmdomain.h
+++ b/fs/ocfs2/dlm/dlmdomain.h
@@ -28,7 +28,30 @@
 extern spinlock_t dlm_domain_lock;
 extern struct list_head dlm_domains;
-int dlm_shutting_down(struct dlm_ctxt *dlm);
+static inline int dlm_joined(struct dlm_ctxt *dlm)
+{
+        int ret = 0;
+        spin_lock(&dlm_domain_lock);
+        if (dlm->dlm_state == DLM_CTXT_JOINED)
+                ret = 1;
+        spin_unlock(&dlm_domain_lock);
+        return ret;
+}
+static inline int dlm_shutting_down(struct dlm_ctxt *dlm)
+{
+        int ret = 0;
+        spin_lock(&dlm_domain_lock);
+        if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
+                ret = 1;
+        spin_unlock(&dlm_domain_lock);
+        return ret;
+}
 void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
                                        int node_num);
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 66c2a491f68d..74962315794e 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -77,8 +77,7 @@ int dlm_init_lock_cache(void)
 void dlm_destroy_lock_cache(void)
 {
-        if (dlm_lock_cache)
+        kmem_cache_destroy(dlm_lock_cache);
-                kmem_cache_destroy(dlm_lock_cache);
 }
 /* Tell us whether we can grant a new lock request.
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a7df226f9449..aaca0949fe53 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -414,8 +414,7 @@ int dlm_init_mle_cache(void)
 void dlm_destroy_mle_cache(void)
 {
-        if (dlm_mle_cache)
+        kmem_cache_destroy(dlm_mle_cache);
-                kmem_cache_destroy(dlm_mle_cache);
 }
 static void dlm_mle_release(struct kref *kref)
@@ -472,15 +471,11 @@ bail:
 void dlm_destroy_master_caches(void)
 {
-        if (dlm_lockname_cache) {
+        kmem_cache_destroy(dlm_lockname_cache);
-                kmem_cache_destroy(dlm_lockname_cache);
+        dlm_lockname_cache = NULL;
-                dlm_lockname_cache = NULL;
-        }
-        if (dlm_lockres_cache) {
+        kmem_cache_destroy(dlm_lockres_cache);
-                kmem_cache_destroy(dlm_lockres_cache);
+        dlm_lockres_cache = NULL;
-                dlm_lockres_cache = NULL;
-        }
 }
 static void dlm_lockres_release(struct kref *kref)
@@ -2495,13 +2490,13 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
 }
 /*
- * A migrateable resource is one that is :
+ * A migratable resource is one that is :
 * 1. locally mastered, and,
 * 2. zero local locks, and,
 * 3. one or more non-local locks, or, one or more references
 * Returns 1 if yes, 0 if not.
 */
-static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
+static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm,
                                      struct dlm_lock_resource *res)
 {
        enum dlm_lockres_list idx;
@@ -2532,7 +2527,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
                                continue;
                        }
                        cookie = be64_to_cpu(lock->ml.cookie);
-                        mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on "
+                        mlog(0, "%s: Not migratable res %.*s, lock %u:%llu on "
                             "%s list\n", dlm->name, res->lockname.len,
                             res->lockname.name,
                             dlm_get_lock_cookie_node(cookie),
@@ -2548,7 +2543,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
                        return 0;
        }
-        mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len,
+        mlog(0, "%s: res %.*s, Migratable\n", dlm->name, res->lockname.len,
             res->lockname.name);
        return 1;
@@ -2792,7 +2787,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
        assert_spin_locked(&dlm->spinlock);
        spin_lock(&res->spinlock);
-        if (dlm_is_lockres_migrateable(dlm, res))
+        if (dlm_is_lockres_migratable(dlm, res))
                target = dlm_pick_migration_target(dlm, res);
        spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index ec8f75813beb..802636d50365 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -62,7 +62,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node);
 static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node);
 static int dlm_request_all_locks(struct dlm_ctxt *dlm,
                                 u8 request_from, u8 dead_node);
-static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node);
+static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm);
 static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res);
 static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
@@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
 static void dlm_begin_recovery(struct dlm_ctxt *dlm)
 {
-        spin_lock(&dlm->spinlock);
+        assert_spin_locked(&dlm->spinlock);
        BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
        printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
               dlm->name, dlm->reco.dead_node);
        dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
-        spin_unlock(&dlm->spinlock);
 }
 static void dlm_end_recovery(struct dlm_ctxt *dlm)
@@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
        spin_lock(&dlm->spinlock);
+        if (dlm->migrate_done) {
+                mlog(0, "%s: no need do recovery after migrating all "
+                     "lock resources\n", dlm->name);
+                spin_unlock(&dlm->spinlock);
+                return 0;
+        }
        /* check to see if the new master has died */
        if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
            test_bit(dlm->reco.new_master, dlm->recovery_map)) {
@@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
        mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
             dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
             dlm->reco.dead_node);
-        spin_unlock(&dlm->spinlock);
        /* take write barrier */
        /* (stops the list reshuffling thread, proxy ast handling) */
        dlm_begin_recovery(dlm);
+        spin_unlock(&dlm->spinlock);
        if (dlm->reco.new_master == dlm->node_num)
                goto master_here;
@@ -739,7 +746,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
        }
        if (destroy)
-                dlm_destroy_recovery_area(dlm, dead_node);
+                dlm_destroy_recovery_area(dlm);
        return status;
 }
@@ -764,7 +771,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
                ndata = kzalloc(sizeof(*ndata), GFP_NOFS);
                if (!ndata) {
-                        dlm_destroy_recovery_area(dlm, dead_node);
+                        dlm_destroy_recovery_area(dlm);
                        return -ENOMEM;
                }
                ndata->node_num = num;
@@ -778,7 +785,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
        return 0;
 }
-static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
+static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm)
 {
        struct dlm_reco_node_data *ndata, *next;
        LIST_HEAD(tmplist);
@@ -1378,6 +1385,15 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
        if (!dlm_grab(dlm))
                return -EINVAL;
+        if (!dlm_joined(dlm)) {
+                mlog(ML_ERROR, "Domain %s not joined! "
+                          "lockres %.*s, master %u\n",
+                          dlm->name, mres->lockname_len,
+                          mres->lockname, mres->master);
+                dlm_put(dlm);
+                return -EINVAL;
+        }
        BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
        real_master = mres->master;
@@ -1807,7 +1823,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
        int i, j, bad;
        struct dlm_lock *lock;
        u8 from = O2NM_MAX_NODES;
-        unsigned int added = 0;
        __be64 c;
        mlog(0, "running %d locks for this lockres\n", mres->num_locks);
@@ -1823,7 +1838,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
                        spin_lock(&res->spinlock);
                        dlm_lockres_set_refmap_bit(dlm, res, from);
                        spin_unlock(&res->spinlock);
-                        added++;
                        break;
                }
                BUG_ON(ml->highest_blocked != LKM_IVMODE);
@@ -1911,7 +1925,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
                        /* do not alter lock refcount.  switching lists. */
                        list_move_tail(&lock->list, queue);
                        spin_unlock(&res->spinlock);
-                        added++;
                        mlog(0, "just reordered a local lock!\n");
                        continue;
@@ -2037,7 +2050,6 @@ skip_lvb:
                             "setting refmap bit\n", dlm->name,
                             res->lockname.len, res->lockname.name, ml->node);
                        dlm_lockres_set_refmap_bit(dlm, res, ml->node);
-                        added++;
                }
                spin_unlock(&res->spinlock);
        }
@@ -2331,13 +2343,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
        __dlm_dirty_lockres(dlm, res);
 }
-/* if this node is the recovery master, and there are no
- * locks for a given lockres owned by this node that are in
- * either PR or EX mode, zero out the lvb before requesting.
- *
- */
 static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
 {
        struct dlm_lock_resource *res;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b552d1f8508c..97a972efab83 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1756,8 +1756,7 @@ int ocfs2_rw_lock(struct inode *inode, int write)
        level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
-        status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
+        status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
-                                    0);
        if (status < 0)
                mlog_errno(status);
@@ -1796,7 +1795,7 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
             write ? "EXMODE" : "PRMODE");
        if (!ocfs2_mount_local(osb))
-                ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
+                ocfs2_cluster_unlock(osb, lockres, level);
 }
 /*
@@ -1816,8 +1815,7 @@ int ocfs2_open_lock(struct inode *inode)
        lockres = &OCFS2_I(inode)->ip_open_lockres;
-        status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
+        status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_PR, 0, 0);
-                                    DLM_LOCK_PR, 0, 0);
        if (status < 0)
                mlog_errno(status);
@@ -1854,8 +1852,7 @@ int ocfs2_try_open_lock(struct inode *inode, int write)
         * other nodes and the -EAGAIN will indicate to the caller that
         * this inode is still in use.
         */
-        status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
+        status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
-                                    level, DLM_LKF_NOQUEUE, 0);
 out:
        return status;
@@ -1876,11 +1873,9 @@ void ocfs2_open_unlock(struct inode *inode)
                goto out;
        if(lockres->l_ro_holders)
-                ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
+                ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_PR);
-                                     DLM_LOCK_PR);
        if(lockres->l_ex_holders)
-                ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
+                ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
-                                     DLM_LOCK_EX);
 out:
        return;
@@ -2601,9 +2596,9 @@ void ocfs2_inode_unlock(struct inode *inode,
             (unsigned long long)OCFS2_I(inode)->ip_blkno,
             ex ? "EXMODE" : "PRMODE");
-        if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
+        if (!ocfs2_is_hard_readonly(osb) &&
            !ocfs2_mount_local(osb))
-                ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
+                ocfs2_cluster_unlock(osb, lockres, level);
 }
 /*
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5d1784a365a3..6ee94bc23f5b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -101,7 +101,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
        trace_ocfs2_file_open(inode, file, file->f_path.dentry,
-                              (unsigned long long)OCFS2_I(inode)->ip_blkno,
+                              (unsigned long long)oi->ip_blkno,
                              file->f_path.dentry->d_name.len,
                              file->f_path.dentry->d_name.name, mode);
@@ -116,7 +116,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
        /* Check that the inode hasn't been wiped from disk by another
         * node. If it hasn't then we're safe as long as we hold the
         * spin lock until our increment of open count. */
-        if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
+        if (oi->ip_flags & OCFS2_INODE_DELETED) {
                spin_unlock(&oi->ip_lock);
                status = -ENOENT;
@@ -190,7 +190,7 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
        bool needs_barrier = false;
        trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
-                              OCFS2_I(inode)->ip_blkno,
+                              oi->ip_blkno,
                              file->f_path.dentry->d_name.len,
                              file->f_path.dentry->d_name.name,
                              (unsigned long long)datasync);
@@ -296,7 +296,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
        ocfs2_journal_dirty(handle, bh);
 out_commit:
-        ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+        ocfs2_commit_trans(osb, handle);
 out:
        return ret;
 }
@@ -2257,7 +2257,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
        int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
        int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
-        trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
+        trace_ocfs2_file_write_iter(inode, file, file->f_path.dentry,
                (unsigned long long)OCFS2_I(inode)->ip_blkno,
                file->f_path.dentry->d_name.len,
                file->f_path.dentry->d_name.name,
@@ -2405,7 +2405,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
        int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
        int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
-        trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
+        trace_ocfs2_file_read_iter(inode, filp, filp->f_path.dentry,
                        (unsigned long long)OCFS2_I(inode)->ip_blkno,
                        filp->f_path.dentry->d_name.len,
                        filp->f_path.dentry->d_name.name,
@@ -2448,7 +2448,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
         *
         * Take and drop the meta data lock to update inode fields
         * like i_size. This allows the checks down below
-         * generic_file_aio_read() a chance of actually working.
+         * generic_file_read_iter() a chance of actually working.
         */
        ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
                                     !nowait);
@@ -2460,7 +2460,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
        ocfs2_inode_unlock(inode, lock_level);
        ret = generic_file_read_iter(iocb, to);
-        trace_generic_file_aio_read_ret(ret);
+        trace_generic_file_read_iter_ret(ret);
        /* buffered aio wouldn't have proper lock coverage today */
        BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 6b92cb241138..f65f2b2f594d 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -53,36 +53,6 @@ static const char * const ocfs2_filecheck_errs[] = {
        "UNSUPPORTED"
 };
-static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock);
-static LIST_HEAD(ocfs2_filecheck_sysfs_list);
-struct ocfs2_filecheck {
-        struct list_head fc_head;       /* File check entry list head */
-        spinlock_t fc_lock;
-        unsigned int fc_max;    /* Maximum number of entry in list */
-        unsigned int fc_size;   /* Current entry count in list */
-        unsigned int fc_done;   /* Finished entry count in list */
-};
-struct ocfs2_filecheck_sysfs_entry {    /* sysfs entry per mounting */
-        struct list_head fs_list;
-        atomic_t fs_count;
-        struct super_block *fs_sb;
-        struct kset *fs_devicekset;
-        struct kset *fs_fcheckkset;
-        struct ocfs2_filecheck *fs_fcheck;
-};
-#define OCFS2_FILECHECK_MAXSIZE         100
-#define OCFS2_FILECHECK_MINSIZE         10
-/* File check operation type */
-enum {
-        OCFS2_FILECHECK_TYPE_CHK = 0,   /* Check a file(inode) */
-        OCFS2_FILECHECK_TYPE_FIX,       /* Fix a file(inode) */
-        OCFS2_FILECHECK_TYPE_SET = 100  /* Set entry list maximum size */
-};
 struct ocfs2_filecheck_entry {
        struct list_head fe_list;
        unsigned long fe_ino;
@@ -110,35 +80,84 @@ ocfs2_filecheck_error(int errno)
        return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1];
 }
-static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj,
-                                    struct kobj_attribute *attr,
+                                        struct kobj_attribute *attr,
-                                    char *buf);
+                                        char *buf);
-static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
-                                     struct kobj_attribute *attr,
+                                        struct kobj_attribute *attr,
-                                     const char *buf, size_t count);
+                                        const char *buf, size_t count);
-static struct kobj_attribute ocfs2_attr_filecheck_chk =
+static struct kobj_attribute ocfs2_filecheck_attr_chk =
                                        __ATTR(check, S_IRUSR | S_IWUSR,
-                                        ocfs2_filecheck_show,
+                                        ocfs2_filecheck_attr_show,
-                                        ocfs2_filecheck_store);
+                                        ocfs2_filecheck_attr_store);
-static struct kobj_attribute ocfs2_attr_filecheck_fix =
+static struct kobj_attribute ocfs2_filecheck_attr_fix =
                                        __ATTR(fix, S_IRUSR | S_IWUSR,
-                                        ocfs2_filecheck_show,
+                                        ocfs2_filecheck_attr_show,
-                                        ocfs2_filecheck_store);
+                                        ocfs2_filecheck_attr_store);
-static struct kobj_attribute ocfs2_attr_filecheck_set =
+static struct kobj_attribute ocfs2_filecheck_attr_set =
                                        __ATTR(set, S_IRUSR | S_IWUSR,
-                                        ocfs2_filecheck_show,
+                                        ocfs2_filecheck_attr_show,
-                                        ocfs2_filecheck_store);
+                                        ocfs2_filecheck_attr_store);
+static struct attribute *ocfs2_filecheck_attrs[] = {
+        &ocfs2_filecheck_attr_chk.attr,
+        &ocfs2_filecheck_attr_fix.attr,
+        &ocfs2_filecheck_attr_set.attr,
+        NULL
+};
+static void ocfs2_filecheck_release(struct kobject *kobj)
+{
+        struct ocfs2_filecheck_sysfs_entry *entry = container_of(kobj,
+                                struct ocfs2_filecheck_sysfs_entry, fs_kobj);
+        complete(&entry->fs_kobj_unregister);
+}
+static ssize_t
+ocfs2_filecheck_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+        ssize_t ret = -EIO;
+        struct kobj_attribute *kattr = container_of(attr,
+                                        struct kobj_attribute, attr);
+        kobject_get(kobj);
+        if (kattr->show)
+                ret = kattr->show(kobj, kattr, buf);
+        kobject_put(kobj);
+        return ret;
+}
+static ssize_t
+ocfs2_filecheck_store(struct kobject *kobj, struct attribute *attr,
+                        const char *buf, size_t count)
+{
+        ssize_t ret = -EIO;
+        struct kobj_attribute *kattr = container_of(attr,
+                                        struct kobj_attribute, attr);
+        kobject_get(kobj);
+        if (kattr->store)
+                ret = kattr->store(kobj, kattr, buf, count);
+        kobject_put(kobj);
+        return ret;
+}
+static const struct sysfs_ops ocfs2_filecheck_ops = {
+        .show = ocfs2_filecheck_show,
+        .store = ocfs2_filecheck_store,
+};
+static struct kobj_type ocfs2_ktype_filecheck = {
+        .default_attrs = ocfs2_filecheck_attrs,
+        .sysfs_ops = &ocfs2_filecheck_ops,
+        .release = ocfs2_filecheck_release,
+};
 static void
 ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
 {
        struct ocfs2_filecheck_entry *p;
-        if (!atomic_dec_and_test(&entry->fs_count)) {
-                wait_var_event(&entry->fs_count,
-                               !atomic_read(&entry->fs_count));
-        }
        spin_lock(&entry->fs_fcheck->fc_lock);
        while (!list_empty(&entry->fs_fcheck->fc_head)) {
                p = list_first_entry(&entry->fs_fcheck->fc_head,
@@ -149,151 +168,48 @@ ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
        }
        spin_unlock(&entry->fs_fcheck->fc_lock);
-        kset_unregister(entry->fs_fcheckkset);
-        kset_unregister(entry->fs_devicekset);
        kfree(entry->fs_fcheck);
-        kfree(entry);
+        entry->fs_fcheck = NULL;
-}
-static void
-ocfs2_filecheck_sysfs_add(struct ocfs2_filecheck_sysfs_entry *entry)
-{
-        spin_lock(&ocfs2_filecheck_sysfs_lock);
-        list_add_tail(&entry->fs_list, &ocfs2_filecheck_sysfs_list);
-        spin_unlock(&ocfs2_filecheck_sysfs_lock);
 }
-static int ocfs2_filecheck_sysfs_del(const char *devname)
+int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb)
 {
-        struct ocfs2_filecheck_sysfs_entry *p;
+        int ret;
+        struct ocfs2_filecheck *fcheck;
-        spin_lock(&ocfs2_filecheck_sysfs_lock);
+        struct ocfs2_filecheck_sysfs_entry *entry = &osb->osb_fc_ent;
-        list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
-                if (!strcmp(p->fs_sb->s_id, devname)) {
-                        list_del(&p->fs_list);
-                        spin_unlock(&ocfs2_filecheck_sysfs_lock);
-                        ocfs2_filecheck_sysfs_free(p);
-                        return 0;
-                }
-        }
-        spin_unlock(&ocfs2_filecheck_sysfs_lock);
-        return 1;
-}
-static void
-ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry)
-{
-        if (atomic_dec_and_test(&entry->fs_count))
-                wake_up_var(&entry->fs_count);
-}
-static struct ocfs2_filecheck_sysfs_entry *
-ocfs2_filecheck_sysfs_get(const char *devname)
-{
-        struct ocfs2_filecheck_sysfs_entry *p = NULL;
-        spin_lock(&ocfs2_filecheck_sysfs_lock);
-        list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
-                if (!strcmp(p->fs_sb->s_id, devname)) {
-                        atomic_inc(&p->fs_count);
-                        spin_unlock(&ocfs2_filecheck_sysfs_lock);
-                        return p;
-                }
-        }
-        spin_unlock(&ocfs2_filecheck_sysfs_lock);
-        return NULL;
-}
-int ocfs2_filecheck_create_sysfs(struct super_block *sb)
-{
-        int ret = 0;
-        struct kset *device_kset = NULL;
-        struct kset *fcheck_kset = NULL;
-        struct ocfs2_filecheck *fcheck = NULL;
-        struct ocfs2_filecheck_sysfs_entry *entry = NULL;
-        struct attribute **attrs = NULL;
-        struct attribute_group attrgp;
-        if (!ocfs2_kset)
-                return -ENOMEM;
-        attrs = kmalloc(sizeof(struct attribute *) * 4, GFP_NOFS);
-        if (!attrs) {
-                ret = -ENOMEM;
-                goto error;
-        } else {
-                attrs[0] = &ocfs2_attr_filecheck_chk.attr;
-                attrs[1] = &ocfs2_attr_filecheck_fix.attr;
-                attrs[2] = &ocfs2_attr_filecheck_set.attr;
-                attrs[3] = NULL;
-                memset(&attrgp, 0, sizeof(attrgp));
-                attrgp.attrs = attrs;
-        }
        fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS);
-        if (!fcheck) {
+        if (!fcheck)
-                ret = -ENOMEM;
+                return -ENOMEM;
-                goto error;
-        } else {
-                INIT_LIST_HEAD(&fcheck->fc_head);
-                spin_lock_init(&fcheck->fc_lock);
-                fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
-                fcheck->fc_size = 0;
-                fcheck->fc_done = 0;
-        }
-        if (strlen(sb->s_id) <= 0) {
-                mlog(ML_ERROR,
-                "Cannot get device basename when create filecheck sysfs\n");
-                ret = -ENODEV;
-                goto error;
-        }
-        device_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj);
-        if (!device_kset) {
-                ret = -ENOMEM;
-                goto error;
-        }
-        fcheck_kset = kset_create_and_add("filecheck", NULL,
-                                          &device_kset->kobj);
-        if (!fcheck_kset) {
-                ret = -ENOMEM;
-                goto error;
-        }
-        ret = sysfs_create_group(&fcheck_kset->kobj, &attrgp);
-        if (ret)
-                goto error;
-        entry = kmalloc(sizeof(struct ocfs2_filecheck_sysfs_entry), GFP_NOFS);
+        INIT_LIST_HEAD(&fcheck->fc_head);
-        if (!entry) {
+        spin_lock_init(&fcheck->fc_lock);
-                ret = -ENOMEM;
+        fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
-                goto error;
+        fcheck->fc_size = 0;
-        } else {
+        fcheck->fc_done = 0;
-                atomic_set(&entry->fs_count, 1);
-                entry->fs_sb = sb;
+        entry->fs_kobj.kset = osb->osb_dev_kset;
-                entry->fs_devicekset = device_kset;
+        init_completion(&entry->fs_kobj_unregister);
-                entry->fs_fcheckkset = fcheck_kset;
+        ret = kobject_init_and_add(&entry->fs_kobj, &ocfs2_ktype_filecheck,
-                entry->fs_fcheck = fcheck;
+                                        NULL, "filecheck");
-                ocfs2_filecheck_sysfs_add(entry);
+        if (ret) {
+                kfree(fcheck);
+                return ret;
        }
-        kfree(attrs);
+        entry->fs_fcheck = fcheck;
        return 0;
-error:
-        kfree(attrs);
-        kfree(entry);
-        kfree(fcheck);
-        kset_unregister(fcheck_kset);
-        kset_unregister(device_kset);
-        return ret;
 }
-int ocfs2_filecheck_remove_sysfs(struct super_block *sb)
+void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb)
 {
-        return ocfs2_filecheck_sysfs_del(sb->s_id);
+        if (!osb->osb_fc_ent.fs_fcheck)
+                return;
+        kobject_del(&osb->osb_fc_ent.fs_kobj);
+        kobject_put(&osb->osb_fc_ent.fs_kobj);
+        wait_for_completion(&osb->osb_fc_ent.fs_kobj_unregister);
+        ocfs2_filecheck_sysfs_free(&osb->osb_fc_ent);
 }
 static int
@@ -310,7 +226,7 @@ ocfs2_filecheck_adjust_max(struct ocfs2_filecheck_sysfs_entry *ent,
        spin_lock(&ent->fs_fcheck->fc_lock);
        if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) {
-                mlog(ML_ERROR,
+                mlog(ML_NOTICE,
                "Cannot set online file check maximum entry number "
                "to %u due to too many pending entries(%u)\n",
                len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done);
@@ -387,7 +303,7 @@ ocfs2_filecheck_args_parse(const char *name, const char *buf, size_t count,
        return 0;
 }
-static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj,
                                    struct kobj_attribute *attr,
                                    char *buf)
 {
@@ -395,19 +311,12 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
        ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
        unsigned int type;
        struct ocfs2_filecheck_entry *p;
-        struct ocfs2_filecheck_sysfs_entry *ent;
+        struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj,
+                                struct ocfs2_filecheck_sysfs_entry, fs_kobj);
        if (ocfs2_filecheck_type_parse(attr->attr.name, &type))
                return -EINVAL;
-        ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
-        if (!ent) {
-                mlog(ML_ERROR,
-                "Cannot get the corresponding entry via device basename %s\n",
-                kobj->name);
-                return -ENODEV;
-        }
        if (type == OCFS2_FILECHECK_TYPE_SET) {
                spin_lock(&ent->fs_fcheck->fc_lock);
                total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max);
@@ -441,11 +350,26 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
        spin_unlock(&ent->fs_fcheck->fc_lock);
 exit:
-        ocfs2_filecheck_sysfs_put(ent);
        return total;
 }
-static int
+static inline int
+ocfs2_filecheck_is_dup_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+                                unsigned long ino)
+{
+        struct ocfs2_filecheck_entry *p;
+        list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+                if (!p->fe_done) {
+                        if (p->fe_ino == ino)
+                                return 1;
+                }
+        }
+        return 0;
+}
+static inline int
 ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent)
 {
        struct ocfs2_filecheck_entry *p;
@@ -484,21 +408,21 @@ static void
 ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent,
                           struct ocfs2_filecheck_entry *entry)
 {
-        entry->fe_done = 1;
        spin_lock(&ent->fs_fcheck->fc_lock);
+        entry->fe_done = 1;
        ent->fs_fcheck->fc_done++;
        spin_unlock(&ent->fs_fcheck->fc_lock);
 }
 static unsigned int
-ocfs2_filecheck_handle(struct super_block *sb,
+ocfs2_filecheck_handle(struct ocfs2_super *osb,
                       unsigned long ino, unsigned int flags)
 {
        unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS;
        struct inode *inode = NULL;
        int rc;
-        inode = ocfs2_iget(OCFS2_SB(sb), ino, flags, 0);
+        inode = ocfs2_iget(osb, ino, flags, 0);
        if (IS_ERR(inode)) {
                rc = (int)(-(long)inode);
                if (rc >= OCFS2_FILECHECK_ERR_START &&
@@ -516,11 +440,14 @@ static void
 ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
                             struct ocfs2_filecheck_entry *entry)
 {
+        struct ocfs2_super *osb = container_of(ent, struct ocfs2_super,
+                                                osb_fc_ent);
        if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK)
-                entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+                entry->fe_status = ocfs2_filecheck_handle(osb,
                                entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK);
        else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX)
-                entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+                entry->fe_status = ocfs2_filecheck_handle(osb,
                                entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX);
        else
                entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED;
@@ -528,30 +455,21 @@ ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
        ocfs2_filecheck_done_entry(ent, entry);
 }
-static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
                                     struct kobj_attribute *attr,
                                     const char *buf, size_t count)
 {
+        ssize_t ret = 0;
        struct ocfs2_filecheck_args args;
        struct ocfs2_filecheck_entry *entry;
-        struct ocfs2_filecheck_sysfs_entry *ent;
+        struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj,
-        ssize_t ret = 0;
+                                struct ocfs2_filecheck_sysfs_entry, fs_kobj);
        if (count == 0)
                return count;
-        if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) {
+        if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args))
-                mlog(ML_ERROR, "Invalid arguments for online file check\n");
                return -EINVAL;
-        }
-        ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
-        if (!ent) {
-                mlog(ML_ERROR,
-                "Cannot get the corresponding entry via device basename %s\n",
-                kobj->parent->name);
-                return -ENODEV;
-        }
        if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) {
                ret = ocfs2_filecheck_adjust_max(ent, args.fa_len);
@@ -565,13 +483,16 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
        }
        spin_lock(&ent->fs_fcheck->fc_lock);
-        if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+        if (ocfs2_filecheck_is_dup_entry(ent, args.fa_ino)) {
-            (ent->fs_fcheck->fc_done == 0)) {
+                ret = -EEXIST;
-                mlog(ML_ERROR,
+                kfree(entry);
+        } else if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+                (ent->fs_fcheck->fc_done == 0)) {
+                mlog(ML_NOTICE,
                "Cannot do more file check "
                "since file check queue(%u) is full now\n",
                ent->fs_fcheck->fc_max);
-                ret = -EBUSY;
+                ret = -EAGAIN;
                kfree(entry);
        } else {
                if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
@@ -596,6 +517,5 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
                ocfs2_filecheck_handle_entry(ent, entry);
 exit:
-        ocfs2_filecheck_sysfs_put(ent);
        return (!ret ? count : ret);
 }
diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h
index e5cd002a2c09..6a22ee79e8d0 100644
--- a/fs/ocfs2/filecheck.h
+++ b/fs/ocfs2/filecheck.h
@@ -43,7 +43,32 @@ enum {
 #define OCFS2_FILECHECK_ERR_START       OCFS2_FILECHECK_ERR_FAILED
 #define OCFS2_FILECHECK_ERR_END         OCFS2_FILECHECK_ERR_UNSUPPORTED
-int ocfs2_filecheck_create_sysfs(struct super_block *sb);
+struct ocfs2_filecheck {
-int ocfs2_filecheck_remove_sysfs(struct super_block *sb);
+        struct list_head fc_head;       /* File check entry list head */
+        spinlock_t fc_lock;
+        unsigned int fc_max;    /* Maximum number of entry in list */
+        unsigned int fc_size;   /* Current entry count in list */
+        unsigned int fc_done;   /* Finished entry count in list */
+};
+#define OCFS2_FILECHECK_MAXSIZE         100
+#define OCFS2_FILECHECK_MINSIZE         10
+/* File check operation type */
+enum {
+        OCFS2_FILECHECK_TYPE_CHK = 0,   /* Check a file(inode) */
+        OCFS2_FILECHECK_TYPE_FIX,       /* Fix a file(inode) */
+        OCFS2_FILECHECK_TYPE_SET = 100  /* Set entry list maximum size */
+};
+struct ocfs2_filecheck_sysfs_entry {    /* sysfs entry per partition */
+        struct kobject fs_kobj;
+        struct completion fs_kobj_unregister;
+        struct ocfs2_filecheck *fs_fcheck;
+};
+int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb);
+void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb);
 #endif  /* FILECHECK_H */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index d51b80edd972..ddc3e9470c87 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1135,7 +1135,7 @@ static void ocfs2_clear_inode(struct inode *inode)
        trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
                                inode->i_nlink);
-        mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
+        mlog_bug_on_msg(osb == NULL,
                        "Inode=%lu\n", inode->i_ino);
        dquot_drop(inode);
@@ -1150,7 +1150,7 @@ static void ocfs2_clear_inode(struct inode *inode)
        ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres);
        ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres);
-        ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
+        ocfs2_resv_discard(&osb->osb_la_resmap,
                           &oi->ip_la_data_resv);
        ocfs2_resv_init_once(&oi->ip_la_data_resv);
@@ -1160,7 +1160,7 @@ static void ocfs2_clear_inode(struct inode *inode)
         * exception here are successfully wiped inodes - their
         * metadata can now be considered to be part of the system
         * inodes from which it came. */
-        if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED))
+        if (!(oi->ip_flags & OCFS2_INODE_DELETED))
                ocfs2_checkpoint_inode(inode);
        mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
@@ -1223,7 +1223,7 @@ static void ocfs2_clear_inode(struct inode *inode)
         * the journal is flushed before journal shutdown. Thus it is safe to
         * have inodes get cleaned up after journal shutdown.
         */
-        jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
+        jbd2_journal_release_jbd_inode(osb->journal->j_journal,
                                       &oi->ip_jinode);
 }
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index c801eddc4bf3..8dd6f703c819 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -525,7 +525,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
         * these are used by the support functions here and in
         * callers. */
        inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
-        OCFS2_I(inode)->ip_blkno = fe_blkno;
+        oi->ip_blkno = fe_blkno;
        spin_lock(&osb->osb_lock);
        inode->i_generation = osb->s_next_generation++;
        spin_unlock(&osb->osb_lock);
@@ -1186,8 +1186,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
        }
        trace_ocfs2_double_lock_end(
-                        (unsigned long long)OCFS2_I(inode1)->ip_blkno,
+                        (unsigned long long)oi1->ip_blkno,
-                        (unsigned long long)OCFS2_I(inode2)->ip_blkno);
+                        (unsigned long long)oi2->ip_blkno);
 bail:
        if (status)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6867eef2e06b..4f86ac0027b5 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -50,6 +50,8 @@
 #include "reservations.h"
+#include "filecheck.h"
 /* Caching of metadata buffers */
 /* Most user visible OCFS2 inodes will have very few pieces of
@@ -472,6 +474,12 @@ struct ocfs2_super
         * workqueue and schedule on our own.
         */
        struct workqueue_struct *ocfs2_wq;
+        /* sysfs directory per partition */
+        struct kset *osb_dev_kset;
+        /* file check related stuff */
+        struct ocfs2_filecheck_sysfs_entry osb_fc_ent;
 };
 #define OCFS2_SB(sb)        ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index e2a11aaece10..2ee76a90ba8f 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1311,11 +1311,11 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_release);
 DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file);
-DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
+DEFINE_OCFS2_FILE_OPS(ocfs2_file_write_iter);
 DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
-DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
+DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter);
 DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
@@ -1467,7 +1467,7 @@ TRACE_EVENT(ocfs2_prepare_inode_for_write,
                  __entry->saved_pos, __entry->count, __entry->wait)
 );
-DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
+DEFINE_OCFS2_INT_EVENT(generic_file_read_iter_ret);
 /* End of trace events for fs/ocfs2/file.c. */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ab156e35ec00..01c6b3894406 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -573,7 +573,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
        BUG_ON(ocfs2_is_refcount_inode(inode));
        trace_ocfs2_create_refcount_tree(
-                (unsigned long long)OCFS2_I(inode)->ip_blkno);
+                (unsigned long long)oi->ip_blkno);
        ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
        if (ret) {
@@ -3359,7 +3359,7 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
        unsigned int ext_flags;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-        if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
+        if (!ocfs2_refcount_tree(osb)) {
                return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
                                   inode->i_ino);
        }
@@ -3707,7 +3707,7 @@ int ocfs2_add_refcount_flag(struct inode *inode,
        trace_ocfs2_add_refcount_flag(ref_blocks, credits);
        if (ref_blocks) {
-                ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
+                ret = ocfs2_reserve_new_metadata_blocks(osb,
                                                        ref_blocks, &meta_ac);
                if (ret) {
                        mlog_errno(ret);
@@ -4766,8 +4766,8 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
                *bh2 = *bh1;
        trace_ocfs2_double_lock_end(
-                        (unsigned long long)OCFS2_I(inode1)->ip_blkno,
+                        (unsigned long long)oi1->ip_blkno,
-                        (unsigned long long)OCFS2_I(inode2)->ip_blkno);
+                        (unsigned long long)oi2->ip_blkno);
        return 0;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d8f5f6ce99dc..f7c972fbed6a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -79,8 +79,6 @@ static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
        return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
 }
-static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
-static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
 static int ocfs2_block_group_fill(handle_t *handle,
                                  struct inode *alloc_inode,
@@ -387,7 +385,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
        memset(bg, 0, sb->s_blocksize);
        strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
-        bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
+        bg->bg_generation = cpu_to_le32(osb->fs_generation);
        bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1,
                                                osb->s_feature_incompat));
        bg->bg_chain = cpu_to_le16(my_chain);
@@ -1521,7 +1519,7 @@ static int ocfs2_cluster_group_search(struct inode *inode,
                                OCFS2_I(inode)->ip_clusters, max_bits);
                }
-                ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
+                ret = ocfs2_block_group_find_clear_bits(osb,
                                                        group_bh, bits_wanted,
                                                        max_bits, res);
                if (ret)
@@ -2626,53 +2624,6 @@ int ocfs2_release_clusters(handle_t *handle,
                                    _ocfs2_clear_bit);
 }
-static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
-{
-        printk("Block Group:\n");
-        printk("bg_signature:       %s\n", bg->bg_signature);
-        printk("bg_size:            %u\n", bg->bg_size);
-        printk("bg_bits:            %u\n", bg->bg_bits);
-        printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
-        printk("bg_chain:           %u\n", bg->bg_chain);
-        printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
-        printk("bg_next_group:      %llu\n",
-               (unsigned long long)bg->bg_next_group);
-        printk("bg_parent_dinode:   %llu\n",
-               (unsigned long long)bg->bg_parent_dinode);
-        printk("bg_blkno:           %llu\n",
-               (unsigned long long)bg->bg_blkno);
-}
-static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
-{
-        int i;
-        printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
-        printk("i_signature:                  %s\n", fe->i_signature);
-        printk("i_size:                       %llu\n",
-               (unsigned long long)fe->i_size);
-        printk("i_clusters:                   %u\n", fe->i_clusters);
-        printk("i_generation:                 %u\n",
-               le32_to_cpu(fe->i_generation));
-        printk("id1.bitmap1.i_used:           %u\n",
-               le32_to_cpu(fe->id1.bitmap1.i_used));
-        printk("id1.bitmap1.i_total:          %u\n",
-               le32_to_cpu(fe->id1.bitmap1.i_total));
-        printk("id2.i_chain.cl_cpg:           %u\n", fe->id2.i_chain.cl_cpg);
-        printk("id2.i_chain.cl_bpc:           %u\n", fe->id2.i_chain.cl_bpc);
-        printk("id2.i_chain.cl_count:         %u\n", fe->id2.i_chain.cl_count);
-        printk("id2.i_chain.cl_next_free_rec: %u\n",
-               fe->id2.i_chain.cl_next_free_rec);
-        for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
-                printk("fe->id2.i_chain.cl_recs[%d].c_free:  %u\n", i,
-                       fe->id2.i_chain.cl_recs[i].c_free);
-                printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
-                       fe->id2.i_chain.cl_recs[i].c_total);
-                printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
-                       (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
-        }
-}
 /*
 * For a given allocation, determine which allocators will need to be
 * accessed, and lock them, reserving the appropriate number of bits.
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ffa4952d432b..3415e0b09398 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -423,10 +423,10 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
                ocfs2_schedule_truncate_log_flush(osb, 0);
        }
-        if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal,
+        if (jbd2_journal_start_commit(osb->journal->j_journal,
                                      &target)) {
                if (wait)
-                        jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal,
+                        jbd2_log_wait_commit(osb->journal->j_journal,
                                             target);
        }
        return 0;
@@ -1161,6 +1161,23 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
        ocfs2_complete_mount_recovery(osb);
+        osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
+                                                &ocfs2_kset->kobj);
+        if (!osb->osb_dev_kset) {
+                status = -ENOMEM;
+                mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id);
+                goto read_super_error;
+        }
+        /* Create filecheck sysfs related directories/files at
+         * /sys/fs/ocfs2/<devname>/filecheck */
+        if (ocfs2_filecheck_create_sysfs(osb)) {
+                status = -ENOMEM;
+                mlog(ML_ERROR, "Unable to create filecheck sysfs directory at "
+                        "/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id);
+                goto read_super_error;
+        }
        if (ocfs2_mount_local(osb))
                snprintf(nodestr, sizeof(nodestr), "local");
        else
@@ -1199,9 +1216,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
        /* Start this when the mount is almost sure of being successful */
        ocfs2_orphan_scan_start(osb);
-        /* Create filecheck sysfile /sys/fs/ocfs2/<devname>/filecheck */
-        ocfs2_filecheck_create_sysfs(sb);
        return status;
 read_super_error:
@@ -1653,7 +1667,6 @@ static void ocfs2_put_super(struct super_block *sb)
        ocfs2_sync_blockdev(sb);
        ocfs2_dismount_volume(sb, 0);
-        ocfs2_filecheck_remove_sysfs(sb);
 }
 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1768,12 +1781,9 @@ static int ocfs2_initialize_mem_caches(void)
                                        NULL);
        if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
            !ocfs2_qf_chunk_cachep) {
-                if (ocfs2_inode_cachep)
+                kmem_cache_destroy(ocfs2_inode_cachep);
-                        kmem_cache_destroy(ocfs2_inode_cachep);
+                kmem_cache_destroy(ocfs2_dquot_cachep);
-                if (ocfs2_dquot_cachep)
+                kmem_cache_destroy(ocfs2_qf_chunk_cachep);
-                        kmem_cache_destroy(ocfs2_dquot_cachep);
-                if (ocfs2_qf_chunk_cachep)
-                        kmem_cache_destroy(ocfs2_qf_chunk_cachep);
                return -ENOMEM;
        }
@@ -1787,16 +1797,13 @@ static void ocfs2_free_mem_caches(void)
         * destroy cache.
         */
        rcu_barrier();
-        if (ocfs2_inode_cachep)
+        kmem_cache_destroy(ocfs2_inode_cachep);
-                kmem_cache_destroy(ocfs2_inode_cachep);
        ocfs2_inode_cachep = NULL;
-        if (ocfs2_dquot_cachep)
+        kmem_cache_destroy(ocfs2_dquot_cachep);
-                kmem_cache_destroy(ocfs2_dquot_cachep);
        ocfs2_dquot_cachep = NULL;
-        if (ocfs2_qf_chunk_cachep)
+        kmem_cache_destroy(ocfs2_qf_chunk_cachep);
-                kmem_cache_destroy(ocfs2_qf_chunk_cachep);
        ocfs2_qf_chunk_cachep = NULL;
 }
@@ -1899,6 +1906,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
        osb = OCFS2_SB(sb);
        BUG_ON(!osb);
+        /* Remove file check sysfs related directores/files,
+         * and wait for the pending file check operations */
+        ocfs2_filecheck_remove_sysfs(osb);
+        kset_unregister(osb->osb_dev_kset);
        debugfs_remove(osb->osb_ctxt);
        /* Orphan scan should be stopped as early as possible */
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 82e17b076ce7..78f09c76ab3c 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -633,6 +633,5 @@ int __init init_ocfs2_uptodate_cache(void)
 void exit_ocfs2_uptodate_cache(void)
 {
-        if (ocfs2_uptodate_cachep)
+        kmem_cache_destroy(ocfs2_uptodate_cachep);
-                kmem_cache_destroy(ocfs2_uptodate_cachep);
 }
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index c261c1dfd374..3a24ce3deb01 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3564,7 +3564,7 @@ int ocfs2_xattr_set(struct inode *inode,
                .not_found = -ENODATA,
        };
-        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
+        if (!ocfs2_supports_xattr(osb))
                return -EOPNOTSUPP;
        /*
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index c3c95d18bf43..7e6c77740413 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -64,10 +64,11 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
 struct kmem_cache;
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #ifdef CONFIG_FAILSLAB
-extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
-static inline bool should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
        return false;
 }
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index d6459bd1376d..de784fd11d12 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -43,7 +43,7 @@ void kasan_unpoison_stack_above_sp_to(const void *watermark);
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
-void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
                        slab_flags_t *flags);
 void kasan_cache_shrink(struct kmem_cache *cache);
 void kasan_cache_shutdown(struct kmem_cache *cache);
@@ -92,7 +92,7 @@ static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
 static inline void kasan_free_pages(struct page *page, unsigned int order) {}
 static inline void kasan_cache_create(struct kmem_cache *cache,
-                                      size_t *size,
+                                      unsigned int *size,
                                      slab_flags_t *flags) {}
 static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
 static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index bb8129a3474d..96def9d15b1b 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -32,6 +32,7 @@ struct list_lru_one {
 };
 struct list_lru_memcg {
+        struct rcu_head         rcu;
        /* array of per cgroup lists, indexed by memcg_cache_id */
        struct list_lru_one     *lru[0];
 };
@@ -43,7 +44,7 @@ struct list_lru_node {
        struct list_lru_one     lru;
 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
        /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
-        struct list_lru_memcg   *memcg_lrus;
+        struct list_lru_memcg   __rcu *memcg_lrus;
 #endif
        long nr_items;
 } ____cacheline_aligned_in_smp;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f92ea7783652..0257aee7ab4b 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -416,21 +416,11 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
 {
 }
 #endif
-extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
-                phys_addr_t end_addr);
 #else
 static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
 {
        return 0;
 }
-static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
-                phys_addr_t end_addr)
-{
-        return 0;
-}
 #endif /* CONFIG_HAVE_MEMBLOCK */
 #endif /* __KERNEL__ */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index f71e732c77b2..31ca3e28b0eb 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -33,6 +33,7 @@ struct memory_block {
        void *hw;                       /* optional pointer to fw/hw data */
        int (*phys_callback)(struct memory_block *);
        struct device dev;
+        int nid;                        /* NID for this memory block */
 };
 int arch_get_memory_phys_device(unsigned long start_pfn);
@@ -109,7 +110,7 @@ extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
 extern int register_memory_isolate_notifier(struct notifier_block *nb);
 extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
-extern int register_new_memory(int, struct mem_section *);
+int hotplug_memory_register(int nid, struct mem_section *section);
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern int unregister_memory_section(struct mem_section *);
 #endif
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index aba5f86eb038..2b0265265c28 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -52,24 +52,6 @@ enum {
 };
 /*
- * pgdat resizing functions
- */
-static inline
-void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
-{
-        spin_lock_irqsave(&pgdat->node_size_lock, *flags);
-}
-static inline
-void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
-{
-        spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
-}
-static inline
-void pgdat_resize_init(struct pglist_data *pgdat)
-{
-        spin_lock_init(&pgdat->node_size_lock);
-}
-/*
 * Zone resizing functions
 *
 * Note: any attempt to resize a zone should has pgdat_resize_lock()
@@ -246,13 +228,6 @@ extern void clear_zone_contiguous(struct zone *zone);
        ___page;                                \
 })
-/*
- * Stub functions for when hotplug is off
- */
-static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
-static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
-static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
 static inline unsigned zone_span_seqbegin(struct zone *zone)
 {
        return 0;
@@ -293,6 +268,34 @@ static inline bool movable_node_is_enabled(void)
 }
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
+#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
+/*
+ * pgdat resizing functions
+ */
+static inline
+void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
+{
+        spin_lock_irqsave(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
+{
+        spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_init(struct pglist_data *pgdat)
+{
+        spin_lock_init(&pgdat->node_size_lock);
+}
+#else /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */
+/*
+ * Stub functions for when hotplug is off
+ */
+static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
+#endif /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index a2246cf670ba..ab45f8a0d288 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -25,7 +25,7 @@ enum migrate_reason {
        MR_SYSCALL,             /* also applies to cpusets */
        MR_MEMPOLICY_MBIND,
        MR_NUMA_MISPLACED,
-        MR_CMA,
+        MR_CONTIG_RANGE,
        MR_TYPES
 };
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f945dff34925..3ad632366973 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -386,17 +386,19 @@ struct vm_operations_struct {
        void (*close)(struct vm_area_struct * area);
        int (*split)(struct vm_area_struct * area, unsigned long addr);
        int (*mremap)(struct vm_area_struct * area);
-        int (*fault)(struct vm_fault *vmf);
+        vm_fault_t (*fault)(struct vm_fault *vmf);
-        int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
+        vm_fault_t (*huge_fault)(struct vm_fault *vmf,
+                        enum page_entry_size pe_size);
        void (*map_pages)(struct vm_fault *vmf,
                        pgoff_t start_pgoff, pgoff_t end_pgoff);
+        unsigned long (*pagesize)(struct vm_area_struct * area);
        /* notification that a previously read-only page is about to become
         * writable, if an error is returned it will cause a SIGBUS */
-        int (*page_mkwrite)(struct vm_fault *vmf);
+        vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
        /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
-        int (*pfn_mkwrite)(struct vm_fault *vmf);
+        vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
        /* called by access_process_vm when get_user_pages() fails, typically
         * for use by special VMAs that can switch between memory and hardware
@@ -903,7 +905,9 @@ extern int page_to_nid(const struct page *page);
 #else
 static inline int page_to_nid(const struct page *page)
 {
-        return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
+        struct page *p = (struct page *)page;
+        return (PF_POISONED_CHECK(p)->flags >> NODES_PGSHIFT) & NODES_MASK;
 }
 #endif
@@ -1152,6 +1156,7 @@ static inline pgoff_t page_index(struct page *page)
 bool page_mapped(struct page *page);
 struct address_space *page_mapping(struct page *page);
+struct address_space *page_mapping_file(struct page *page);
 /*
 * Return true only if the page has been allocated with
@@ -2420,6 +2425,44 @@ int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr,
                        pfn_t pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
+static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
+                                unsigned long addr, struct page *page)
+{
+        int err = vm_insert_page(vma, addr, page);
+        if (err == -ENOMEM)
+                return VM_FAULT_OOM;
+        if (err < 0 && err != -EBUSY)
+                return VM_FAULT_SIGBUS;
+        return VM_FAULT_NOPAGE;
+}
+static inline vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma,
+                                unsigned long addr, pfn_t pfn)
+{
+        int err = vm_insert_mixed(vma, addr, pfn);
+        if (err == -ENOMEM)
+                return VM_FAULT_OOM;
+        if (err < 0 && err != -EBUSY)
+                return VM_FAULT_SIGBUS;
+        return VM_FAULT_NOPAGE;
+}
+static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
+                        unsigned long addr, unsigned long pfn)
+{
+        int err = vm_insert_pfn(vma, addr, pfn);
+        if (err == -ENOMEM)
+                return VM_FAULT_OOM;
+        if (err < 0 && err != -EBUSY)
+                return VM_FAULT_SIGBUS;
+        return VM_FAULT_NOPAGE;
+}
 struct page *follow_page_mask(struct vm_area_struct *vma,
                              unsigned long address, unsigned int foll_flags,
@@ -2589,7 +2632,7 @@ extern int get_hwpoison_page(struct page *page);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p, int access);
-extern atomic_long_t num_poisoned_pages;
+extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(struct page *page, int flags);
@@ -2611,6 +2654,7 @@ enum mf_action_page_type {
        MF_MSG_POISONED_HUGE,
        MF_MSG_HUGE,
        MF_MSG_FREE_HUGE,
+        MF_MSG_NON_PMD_HUGE,
        MF_MSG_UNMAP_FAILED,
        MF_MSG_DIRTY_SWAPCACHE,
        MF_MSG_CLEAN_SWAPCACHE,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fd1af6b9591d..21612347d311 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -22,6 +22,8 @@
 #endif
 #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
+typedef int vm_fault_t;
 struct address_space;
 struct mem_cgroup;
 struct hmm;
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 57b0030d3800..2ad72d2c8cc5 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -37,10 +37,10 @@ void dump_mm(const struct mm_struct *mm);
                        BUG();                                          \
                }                                                       \
        } while (0)
-#define VM_WARN_ON(cond) WARN_ON(cond)
+#define VM_WARN_ON(cond) (void)WARN_ON(cond)
-#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
-#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
+#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
-#define VM_WARN(cond, format...) WARN(cond, format)
+#define VM_WARN(cond, format...) (void)WARN(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a2db4576e499..f11ae29005f1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -633,14 +633,15 @@ typedef struct pglist_data {
 #ifndef CONFIG_NO_BOOTMEM
        struct bootmem_data *bdata;
 #endif
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
        /*
         * Must be held any time you expect node_start_pfn, node_present_pages
         * or node_spanned_pages stay constant.  Holding this will also
         * guarantee that any pfn_valid() stays that way.
         *
         * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
-         * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG.
+         * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
+         * or CONFIG_DEFERRED_STRUCT_PAGE_INIT.
         *
         * Nests above zone->lock and zone->span_seqlock
         */
@@ -775,7 +776,8 @@ static inline bool is_dev_zone(const struct zone *zone)
 #include <linux/memory_hotplug.h>
 void build_all_zonelists(pg_data_t *pgdat);
-void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
+void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
+                   enum zone_type classzone_idx);
 bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                         int classzone_idx, unsigned int alloc_flags,
                         long free_pages);
diff --git a/include/linux/node.h b/include/linux/node.h
index 4ece0fee0ffc..41f171861dcc 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -67,7 +67,7 @@ extern void unregister_one_node(int nid);
 extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int register_mem_sect_under_node(struct memory_block *mem_blk,
-                                                int nid);
+                                                int nid, bool check_nid);
 extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
                                           unsigned long phys_index);
@@ -97,7 +97,7 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
        return 0;
 }
 static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
-                                                        int nid)
+                                                        int nid, bool check_nid)
 {
        return 0;
 }
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 50c2b8786831..e34a27727b9a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -156,9 +156,18 @@ static __always_inline int PageCompound(struct page *page)
        return test_bit(PG_head, &page->flags) || PageTail(page);
 }
+#define PAGE_POISON_PATTERN     -1l
+static inline int PagePoisoned(const struct page *page)
+{
+        return page->flags == PAGE_POISON_PATTERN;
+}
 /*
 * Page flags policies wrt compound pages
 *
+ * PF_POISONED_CHECK
+ *     check if this struct page poisoned/uninitialized
+ *
 * PF_ANY:
 *     the page flag is relevant for small, head and tail pages.
 *
@@ -176,17 +185,20 @@ static __always_inline int PageCompound(struct page *page)
 * PF_NO_COMPOUND:
 *     the page flag is not relevant for compound pages.
 */
-#define PF_ANY(page, enforce)   page
+#define PF_POISONED_CHECK(page) ({                                      \
-#define PF_HEAD(page, enforce)  compound_head(page)
+                VM_BUG_ON_PGFLAGS(PagePoisoned(page), page);            \
+                page; })
+#define PF_ANY(page, enforce)   PF_POISONED_CHECK(page)
+#define PF_HEAD(page, enforce)  PF_POISONED_CHECK(compound_head(page))
 #define PF_ONLY_HEAD(page, enforce) ({                                  \
                VM_BUG_ON_PGFLAGS(PageTail(page), page);                \
-                page;})
+                PF_POISONED_CHECK(page); })
 #define PF_NO_TAIL(page, enforce) ({                                    \
                VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page);     \
-                compound_head(page);})
+                PF_POISONED_CHECK(compound_head(page)); })
 #define PF_NO_COMPOUND(page, enforce) ({                                \
                VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \
-                page;})
+                PF_POISONED_CHECK(page); })
 /*
 * Macros to create function definitions for page flags
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 760d74a0e9a9..14d14beb1f7f 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -175,8 +175,7 @@ static inline void page_ref_unfreeze(struct page *page, int count)
        VM_BUG_ON_PAGE(page_count(page) != 0, page);
        VM_BUG_ON(count == 0);
-        smp_mb();
+        atomic_set_release(&page->_refcount, count);
-        atomic_set(&page->_refcount, count);
        if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
                __page_ref_unfreeze(page, count);
 }
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 231abc8976c5..81ebd71f8c03 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -125,7 +125,6 @@
 #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
                                (unsigned long)ZERO_SIZE_PTR)
-#include <linux/kmemleak.h>
 #include <linux/kasan.h>
 struct mem_cgroup;
@@ -137,12 +136,13 @@ bool slab_is_available(void);
 extern bool usercopy_fallback;
-struct kmem_cache *kmem_cache_create(const char *name, size_t size,
+struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
-                        size_t align, slab_flags_t flags,
+                        unsigned int align, slab_flags_t flags,
                        void (*ctor)(void *));
 struct kmem_cache *kmem_cache_create_usercopy(const char *name,
-                        size_t size, size_t align, slab_flags_t flags,
+                        unsigned int size, unsigned int align,
-                        size_t useroffset, size_t usersize,
+                        slab_flags_t flags,
+                        unsigned int useroffset, unsigned int usersize,
                        void (*ctor)(void *));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
@@ -308,7 +308,7 @@ extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
 * 2 = 129 .. 192 bytes
 * n = 2^(n-1)+1 .. 2^n
 */
-static __always_inline int kmalloc_index(size_t size)
+static __always_inline unsigned int kmalloc_index(size_t size)
 {
        if (!size)
                return 0;
@@ -504,7 +504,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
                        return kmalloc_large(size, flags);
 #ifndef CONFIG_SLOB
                if (!(flags & GFP_DMA)) {
-                        int index = kmalloc_index(size);
+                        unsigned int index = kmalloc_index(size);
                        if (!index)
                                return ZERO_SIZE_PTR;
@@ -522,11 +522,11 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
 * return size or 0 if a kmalloc cache for that
 * size does not exist
 */
-static __always_inline int kmalloc_size(int n)
+static __always_inline unsigned int kmalloc_size(unsigned int n)
 {
 #ifndef CONFIG_SLOB
        if (n > 2)
-                return 1 << n;
+                return 1U << n;
        if (n == 1 && KMALLOC_MIN_SIZE <= 32)
                return 96;
@@ -542,7 +542,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 #ifndef CONFIG_SLOB
        if (__builtin_constant_p(size) &&
                size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
-                int i = kmalloc_index(size);
+                unsigned int i = kmalloc_index(size);
                if (!i)
                        return ZERO_SIZE_PTR;
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 7385547c04b1..d9228e4d0320 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -85,8 +85,8 @@ struct kmem_cache {
        unsigned int *random_seq;
 #endif
-        size_t useroffset;              /* Usercopy region offset */
+        unsigned int useroffset;        /* Usercopy region offset */
-        size_t usersize;                /* Usercopy region size */
+        unsigned int usersize;          /* Usercopy region size */
        struct kmem_cache_node *node[MAX_NUMNODES];
 };
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 8ad99c47b19c..3773e26c08c1 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -73,7 +73,7 @@ struct kmem_cache_cpu {
 * given order would contain.
 */
 struct kmem_cache_order_objects {
-        unsigned long x;
+        unsigned int x;
 };
 /*
@@ -84,11 +84,12 @@ struct kmem_cache {
        /* Used for retriving partial slabs etc */
        slab_flags_t flags;
        unsigned long min_partial;
-        int size;               /* The size of an object including meta data */
+        unsigned int size;      /* The size of an object including meta data */
-        int object_size;        /* The size of an object without meta data */
+        unsigned int object_size;/* The size of an object without meta data */
-        int offset;             /* Free pointer offset. */
+        unsigned int offset;    /* Free pointer offset. */
 #ifdef CONFIG_SLUB_CPU_PARTIAL
-        int cpu_partial;        /* Number of per cpu partial objects to keep around */
+        /* Number of per cpu partial objects to keep around */
+        unsigned int cpu_partial;
 #endif
        struct kmem_cache_order_objects oo;
@@ -98,10 +99,10 @@ struct kmem_cache {
        gfp_t allocflags;       /* gfp flags to use on each alloc */
        int refcount;           /* Refcount for slab cache destroy */
        void (*ctor)(void *);
-        int inuse;              /* Offset to metadata */
+        unsigned int inuse;             /* Offset to metadata */
-        int align;              /* Alignment */
+        unsigned int align;             /* Alignment */
-        int reserved;           /* Reserved bytes at the end of slabs */
+        unsigned int reserved;          /* Reserved bytes at the end of slabs */
-        int red_left_pad;       /* Left redzone padding size */
+        unsigned int red_left_pad;      /* Left redzone padding size */
        const char *name;       /* Name (only for display!) */
        struct list_head list;  /* List of slab caches */
 #ifdef CONFIG_SYSFS
@@ -110,7 +111,8 @@ struct kmem_cache {
 #endif
 #ifdef CONFIG_MEMCG
        struct memcg_cache_params memcg_params;
-        int max_attr_size; /* for propagation, maximum size of a stored attr */
+        /* for propagation, maximum size of a stored attr */
+        unsigned int max_attr_size;
 #ifdef CONFIG_SYSFS
        struct kset *memcg_kset;
 #endif
@@ -124,7 +126,7 @@ struct kmem_cache {
        /*
         * Defragmentation by allocating from a remote node.
         */
-        int remote_node_defrag_ratio;
+        unsigned int remote_node_defrag_ratio;
 #endif
 #ifdef CONFIG_SLAB_FREELIST_RANDOM
@@ -135,8 +137,8 @@ struct kmem_cache {
        struct kasan_cache kasan_info;
 #endif
-        size_t useroffset;              /* Usercopy region offset */
+        unsigned int useroffset;        /* Usercopy region offset */
-        size_t usersize;                /* Usercopy region size */
+        unsigned int usersize;          /* Usercopy region size */
        struct kmem_cache_node *node[MAX_NUMNODES];
 };
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a1a3f4ed94ce..2417d288e016 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -400,7 +400,6 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
 #define SWAP_ADDRESS_SPACE_SHIFT        14
 #define SWAP_ADDRESS_SPACE_PAGES        (1 << SWAP_ADDRESS_SPACE_SHIFT)
 extern struct address_space *swapper_spaces[];
-extern bool swap_vma_readahead;
 #define swap_address_space(entry)                           \
        (&swapper_spaces[swp_type(entry)][swp_offset(entry) \
                >> SWAP_ADDRESS_SPACE_SHIFT])
@@ -422,14 +421,10 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
 extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
                        struct vm_area_struct *vma, unsigned long addr,
                        bool *new_page_allocated);
-extern struct page *swapin_readahead(swp_entry_t, gfp_t,
+extern struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
-                        struct vm_area_struct *vma, unsigned long addr);
+                                struct vm_fault *vmf);
+extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
-extern struct page *swap_readahead_detect(struct vm_fault *vmf,
+                                struct vm_fault *vmf);
-                                          struct vma_swap_readahead *swap_ra);
-extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
-                                           struct vm_fault *vmf,
-                                           struct vma_swap_readahead *swap_ra);
 /* linux/mm/swapfile.c */
 extern atomic_long_t nr_swap_pages;
@@ -437,11 +432,6 @@ extern long total_swap_pages;
 extern atomic_t nr_rotate_swap;
 extern bool has_usable_swap(void);
-static inline bool swap_use_vma_readahead(void)
-{
-        return READ_ONCE(swap_vma_readahead) && !atomic_read(&nr_rotate_swap);
-}
 /* Swap 50% full? Release swapcache more aggressively.. */
 static inline bool vm_swap_full(void)
 {
@@ -537,26 +527,14 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp)
 {
 }
-static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
+static inline struct page *swap_cluster_readahead(swp_entry_t entry,
-                        struct vm_area_struct *vma, unsigned long addr)
+                                gfp_t gfp_mask, struct vm_fault *vmf)
 {
        return NULL;
 }
-static inline bool swap_use_vma_readahead(void)
+static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
-{
+                        struct vm_fault *vmf)
-        return false;
-}
-static inline struct page *swap_readahead_detect(
-        struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
-{
-        return NULL;
-}
-static inline struct page *do_swap_page_readahead(
-        swp_entry_t fentry, gfp_t gfp_mask,
-        struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
 {
        return NULL;
 }
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 57a8e98f2708..2219cce81ca4 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -47,6 +47,8 @@ void zs_destroy_pool(struct zs_pool *pool);
 unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags);
 void zs_free(struct zs_pool *pool, unsigned long obj);
+size_t zs_huge_class_size(struct zs_pool *pool);
 void *zs_map_object(struct zs_pool *pool, unsigned long handle,
                        enum zs_mapmode mm);
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
diff --git a/include/net/sock.h b/include/net/sock.h
index 49bd2c1796b0..74d725fdbe0f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1114,8 +1114,8 @@ struct proto {
        struct kmem_cache       *slab;
        unsigned int            obj_size;
        slab_flags_t            slab_flags;
-        size_t                  useroffset;     /* Usercopy region offset */
+        unsigned int            useroffset;     /* Usercopy region offset */
-        size_t                  usersize;       /* Usercopy region size */
+        unsigned int            usersize;       /* Usercopy region size */
        struct percpu_counter   *orphan_count;
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index bcf4daccd6be..711372845945 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -20,7 +20,7 @@
        EM( MR_SYSCALL,         "syscall_or_cpuset")            \
        EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind")              \
        EM( MR_NUMA_MISPLACED,  "numa_misplaced")               \
-        EMe(MR_CMA,             "cma")
+        EMe(MR_CONTIG_RANGE,    "contig_range")
 /*
 * First define the enums in the above macros to be exported to userspace
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index e0b8b9173e1c..6570c5b45ba1 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -78,26 +78,29 @@ TRACE_EVENT(mm_vmscan_kswapd_wake,
 TRACE_EVENT(mm_vmscan_wakeup_kswapd,
-        TP_PROTO(int nid, int zid, int order),
+        TP_PROTO(int nid, int zid, int order, gfp_t gfp_flags),
-        TP_ARGS(nid, zid, order),
+        TP_ARGS(nid, zid, order, gfp_flags),
        TP_STRUCT__entry(
-                __field(        int,            nid     )
+                __field(        int,    nid             )
-                __field(        int,            zid     )
+                __field(        int,    zid             )
-                __field(        int,            order   )
+                __field(        int,    order           )
+                __field(        gfp_t,  gfp_flags       )
        ),
        TP_fast_assign(
                __entry->nid            = nid;
                __entry->zid            = zid;
                __entry->order          = order;
+                __entry->gfp_flags      = gfp_flags;
        ),
-        TP_printk("nid=%d zid=%d order=%d",
+        TP_printk("nid=%d zid=%d order=%d gfp_flags=%s",
                __entry->nid,
                __entry->zid,
-                __entry->order)
+                __entry->order,
+                show_gfp_flags(__entry->gfp_flags))
 );
 DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template,
diff --git a/kernel/fork.c b/kernel/fork.c
index f71b67dc156d..242c8c93d285 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -595,6 +595,8 @@ static void check_mm(struct mm_struct *mm)
 void __mmdrop(struct mm_struct *mm)
 {
        BUG_ON(mm == &init_mm);
+        WARN_ON_ONCE(mm == current->mm);
+        WARN_ON_ONCE(mm == current->active_mm);
        mm_free_pgd(mm);
        destroy_context(mm);
        hmm_mm_destroy(mm);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 28b68995a417..e8afd6086f23 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5560,6 +5560,7 @@ void idle_task_exit(void)
        if (mm != &init_mm) {
                switch_mm(mm, &init_mm, current);
+                current->active_mm = &init_mm;
                finish_arch_post_lock_switch();
        }
        mmdrop(mm);
diff --git a/kernel/ucount.c b/kernel/ucount.c
index b4eeee03934f..f48d1b6376a4 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/cred.h>
 #include <linux/hash.h>
+#include <linux/kmemleak.h>
 #include <linux/user_namespace.h>
 #define UCOUNTS_HASHTABLE_BITS 10
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 9e498c77ed0e..a42eff7e8c48 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -607,7 +607,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
                /* if no digit is after '-', it's wrong*/
                if (at_start && in_range)
                        return -EINVAL;
-                if (!(a <= b) || !(used_size <= group_size))
+                if (!(a <= b) || group_size == 0 || !(used_size <= group_size))
                        return -EINVAL;
                if (b >= nmaskbits)
                        return -ERANGE;
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index b3f235baa05d..413367cf569e 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -255,6 +255,10 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
        {-EINVAL, "-1", NULL, 8, 0},
        {-EINVAL, "-0", NULL, 8, 0},
        {-EINVAL, "10-1", NULL, 8, 0},
+        {-EINVAL, "0-31:", NULL, 8, 0},
+        {-EINVAL, "0-31:0", NULL, 8, 0},
+        {-EINVAL, "0-31:0/0", NULL, 8, 0},
+        {-EINVAL, "0-31:1/0", NULL, 8, 0},
        {-EINVAL, "0-31:10/1", NULL, 8, 0},
 };
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 078a61480573..cee000ac54d8 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -21,6 +21,7 @@
 #include <linux/uaccess.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/vmalloc.h>
 #define TEST_FIRMWARE_NAME      "test-firmware.bin"
 #define TEST_FIRMWARE_NUM_REQS  4
diff --git a/mm/Makefile b/mm/Makefile
index e669f02c5a54..b4e54a9ae9c5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -37,7 +37,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o \
                           readahead.o swap.o truncate.o vmscan.o shmem.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
-                           compaction.o vmacache.o swap_slots.o \
+                           compaction.o vmacache.o \
                           interval_tree.o list_lru.o workingset.o \
                           debug.o $(mmu-y)
@@ -55,7 +55,7 @@ ifdef CONFIG_MMU
 endif
 obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
-obj-$(CONFIG_SWAP)      += page_io.o swap_state.o swapfile.o
+obj-$(CONFIG_SWAP)      += page_io.o swap_state.o swapfile.o swap_slots.o
 obj-$(CONFIG_FRONTSWAP) += frontswap.o
 obj-$(CONFIG_ZSWAP)     += zswap.o
 obj-$(CONFIG_HAS_DMA)   += dmapool.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d2984e9fcf08..08b9aab631ab 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -100,18 +100,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
        return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
-static int bdi_debug_stats_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, bdi_debug_stats_show, inode->i_private);
-}
-static const struct file_operations bdi_debug_stats_fops = {
-        .open           = bdi_debug_stats_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = single_release,
-};
 static int bdi_debug_register(struct backing_dev_info *bdi, const char *name)
 {
diff --git a/mm/cma.c b/mm/cma.c
index 0607729abf3b..5809bbe360d7 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -35,6 +35,7 @@
 #include <linux/cma.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
+#include <linux/kmemleak.h>
 #include <trace/events/cma.h>
 #include "cma.h"
@@ -165,6 +166,9 @@ core_initcall(cma_init_reserved_areas);
 * @base: Base address of the reserved area
 * @size: Size of the reserved area (in bytes),
 * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @name: The name of the area. If this parameter is NULL, the name of
+ *        the area will be set to "cmaN", where N is a running counter of
+ *        used areas.
 * @res_cma: Pointer to store the created cma region.
 *
 * This function creates custom contiguous area from already reserved memory.
@@ -227,6 +231,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 * @alignment: Alignment for the CMA area, should be power of 2 or zero
 * @order_per_bit: Order of pages represented by one bit on bitmap.
 * @fixed: hint about where to place the reserved area
+ * @name: The name of the area. See function cma_init_reserved_mem()
 * @res_cma: Pointer to store the created cma region.
 *
 * This function reserves memory from early allocator. It should be
@@ -390,6 +395,7 @@ static inline void cma_debug_show_areas(struct cma *cma) { }
 * @cma:   Contiguous memory region for which the allocation is performed.
 * @count: Requested number of pages.
 * @align: Requested alignment of pages (in PAGE_SIZE order).
+ * @gfp_mask:  GFP mask to use during compaction
 *
 * This function allocates part of contiguous memory on specific
 * contiguous memory area.
diff --git a/mm/compaction.c b/mm/compaction.c
index 2c8999d027ab..88d01a50a015 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -576,6 +576,7 @@ isolate_fail:
 /**
 * isolate_freepages_range() - isolate free pages.
+ * @cc:        Compaction control structure.
 * @start_pfn: The first PFN to start isolating.
 * @end_pfn:   The one-past-last PFN.
 *
@@ -1988,6 +1989,14 @@ static void kcompactd_do_work(pg_data_t *pgdat)
                        compaction_defer_reset(zone, cc.order, false);
                } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
                        /*
+                         * Buddy pages may become stranded on pcps that could
+                         * otherwise coalesce on the zone's free area for
+                         * order >= cc.order.  This is ratelimited by the
+                         * upcoming deferral.
+                         */
+                        drain_all_pages(zone);
+                        /*
                         * We use sync migration mode here, so we defer like
                         * sync direct compaction does.
                         */
diff --git a/mm/failslab.c b/mm/failslab.c
index 8087d976a809..1f2f248e3601 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -14,7 +14,7 @@ static struct {
        .cache_filter = false,
 };
-bool should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
        /* No fault-injection for bootstrap cache */
        if (unlikely(s == kmem_cache))
diff --git a/mm/gup.c b/mm/gup.c
index 6afae32571ca..f296df6cf666 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -531,7 +531,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
         * reCOWed by userspace write).
         */
        if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
-                *flags |= FOLL_COW;
+                *flags |= FOLL_COW;
        return 0;
 }
@@ -1638,7 +1638,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                                         PMD_SHIFT, next, write, pages, nr))
                                return 0;
                } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
-                                return 0;
+                        return 0;
        } while (pmdp++, addr = next, addr != end);
        return 1;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5a68730eebd6..f0ae8d1d4329 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2356,26 +2356,13 @@ static void __split_huge_page_tail(struct page *head, int tail,
        struct page *page_tail = head + tail;
        VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
-        VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
        /*
-         * tail_page->_refcount is zero and not changing from under us. But
+         * Clone page flags before unfreezing refcount.
-         * get_page_unless_zero() may be running from under us on the
+         *
-         * tail_page. If we used atomic_set() below instead of atomic_inc() or
+         * After successful get_page_unless_zero() might follow flags change,
-         * atomic_add(), we would then run atomic_set() concurrently with
+         * for exmaple lock_page() which set PG_waiters.
-         * get_page_unless_zero(), and atomic_set() is implemented in C not
-         * using locked ops. spin_unlock on x86 sometime uses locked ops
-         * because of PPro errata 66, 92, so unless somebody can guarantee
-         * atomic_set() here would be safe on all archs (and not only on x86),
-         * it's safer to use atomic_inc()/atomic_add().
         */
-        if (PageAnon(head) && !PageSwapCache(head)) {
-                page_ref_inc(page_tail);
-        } else {
-                /* Additional pin to radix tree */
-                page_ref_add(page_tail, 2);
-        }
        page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
        page_tail->flags |= (head->flags &
                        ((1L << PG_referenced) |
@@ -2388,14 +2375,21 @@ static void __split_huge_page_tail(struct page *head, int tail,
                         (1L << PG_unevictable) |
                         (1L << PG_dirty)));
-        /*
+        /* Page flags must be visible before we make the page non-compound. */
-         * After clearing PageTail the gup refcount can be released.
-         * Page flags also must be visible before we make the page non-compound.
-         */
        smp_wmb();
+        /*
+         * Clear PageTail before unfreezing page refcount.
+         *
+         * After successful get_page_unless_zero() might follow put_page()
+         * which needs correct compound_head().
+         */
        clear_compound_head(page_tail);
+        /* Finally unfreeze refcount. Additional reference from page cache. */
+        page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) ||
+                                          PageSwapCache(head)));
        if (page_is_young(head))
                set_page_young(page_tail);
        if (page_is_idle(head))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 976bbc5646fe..218679138255 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -637,29 +637,22 @@ EXPORT_SYMBOL_GPL(linear_hugepage_index);
 */
 unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 {
-        struct hstate *hstate;
+        if (vma->vm_ops && vma->vm_ops->pagesize)
+                return vma->vm_ops->pagesize(vma);
-        if (!is_vm_hugetlb_page(vma))
+        return PAGE_SIZE;
-                return PAGE_SIZE;
-        hstate = hstate_vma(vma);
-        return 1UL << huge_page_shift(hstate);
 }
 EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
 /*
 * Return the page size being used by the MMU to back a VMA. In the majority
 * of cases, the page size used by the kernel matches the MMU size. On
- * architectures where it differs, an architecture-specific version of this
+ * architectures where it differs, an architecture-specific 'strong'
- * function is required.
+ * version of this symbol is required.
 */
-#ifndef vma_mmu_pagesize
+__weak unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
-unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 {
        return vma_kernel_pagesize(vma);
 }
-#endif
 /*
 * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
@@ -3153,6 +3146,13 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
        return 0;
 }
+static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
+{
+        struct hstate *hstate = hstate_vma(vma);
+        return 1UL << huge_page_shift(hstate);
+}
 /*
 * We cannot handle pagefaults against hugetlb pages at all.  They cause
 * handle_mm_fault() to try to instantiate regular-sized pages in the
@@ -3170,6 +3170,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {
        .open = hugetlb_vm_op_open,
        .close = hugetlb_vm_op_close,
        .split = hugetlb_vm_op_split,
+        .pagesize = hugetlb_vm_op_pagesize,
 };
 static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index e13d911251e7..bc0e68f7dc75 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -323,9 +323,9 @@ void kasan_free_pages(struct page *page, unsigned int order)
 * Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
 * For larger allocations larger redzones are used.
 */
-static size_t optimal_redzone(size_t object_size)
+static unsigned int optimal_redzone(unsigned int object_size)
 {
-        int rz =
+        return
                object_size <= 64        - 16   ? 16 :
                object_size <= 128       - 32   ? 32 :
                object_size <= 512       - 64   ? 64 :
@@ -333,14 +333,13 @@ static size_t optimal_redzone(size_t object_size)
                object_size <= (1 << 14) - 256  ? 256 :
                object_size <= (1 << 15) - 512  ? 512 :
                object_size <= (1 << 16) - 1024 ? 1024 : 2048;
-        return rz;
 }
-void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
                        slab_flags_t *flags)
 {
+        unsigned int orig_size = *size;
        int redzone_adjust;
-        int orig_size = *size;
        /* Add alloc meta. */
        cache->kasan_info.alloc_meta_offset = *size;
@@ -358,7 +357,8 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
        if (redzone_adjust > 0)
                *size += redzone_adjust;
-        *size = min(KMALLOC_MAX_SIZE, max(*size, cache->object_size +
+        *size = min_t(unsigned int, KMALLOC_MAX_SIZE,
+                        max(*size, cache->object_size +
                                        optimal_redzone(cache->object_size)));
        /*
@@ -382,7 +382,8 @@ void kasan_cache_shrink(struct kmem_cache *cache)
 void kasan_cache_shutdown(struct kmem_cache *cache)
 {
-        quarantine_remove_cache(cache);
+        if (!__kmem_cache_empty(cache))
+                quarantine_remove_cache(cache);
 }
 size_t kasan_metadata_size(struct kmem_cache *cache)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 46c2290a08f1..9a085d525bbc 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1187,6 +1187,11 @@ EXPORT_SYMBOL(kmemleak_no_scan);
 /**
 * kmemleak_alloc_phys - similar to kmemleak_alloc but taking a physical
 *                       address argument
+ * @phys:       physical address of the object
+ * @size:       size of the object
+ * @min_count:  minimum number of references to this object.
+ *              See kmemleak_alloc()
+ * @gfp:        kmalloc() flags used for kmemleak internal memory allocations
 */
 void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
                               gfp_t gfp)
@@ -1199,6 +1204,9 @@ EXPORT_SYMBOL(kmemleak_alloc_phys);
 /**
 * kmemleak_free_part_phys - similar to kmemleak_free_part but taking a
 *                           physical address argument
+ * @phys:       physical address if the beginning or inside an object. This
+ *              also represents the start of the range to be freed
+ * @size:       size to be unregistered
 */
 void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size)
 {
@@ -1210,6 +1218,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys);
 /**
 * kmemleak_not_leak_phys - similar to kmemleak_not_leak but taking a physical
 *                          address argument
+ * @phys:       physical address of the object
 */
 void __ref kmemleak_not_leak_phys(phys_addr_t phys)
 {
@@ -1221,6 +1230,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys);
 /**
 * kmemleak_ignore_phys - similar to kmemleak_ignore but taking a physical
 *                        address argument
+ * @phys:       physical address of the object
 */
 void __ref kmemleak_ignore_phys(phys_addr_t phys)
 {
@@ -1963,7 +1973,7 @@ static void kmemleak_disable(void)
 /*
 * Allow boot-time kmemleak disabling (enabled by default).
 */
-static int kmemleak_boot_config(char *str)
+static int __init kmemleak_boot_config(char *str)
 {
        if (!str)
                return -EINVAL;
diff --git a/mm/ksm.c b/mm/ksm.c
index adb5f991da8e..e8d6c6210b80 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1318,10 +1318,10 @@ bool is_page_sharing_candidate(struct stable_node *stable_node)
        return __is_page_sharing_candidate(stable_node, 0);
 }
-struct page *stable_node_dup(struct stable_node **_stable_node_dup,
+static struct page *stable_node_dup(struct stable_node **_stable_node_dup,
-                             struct stable_node **_stable_node,
+                                    struct stable_node **_stable_node,
-                             struct rb_root *root,
+                                    struct rb_root *root,
-                             bool prune_stale_stable_nodes)
+                                    bool prune_stale_stable_nodes)
 {
        struct stable_node *dup, *found = NULL, *stable_node = *_stable_node;
        struct hlist_node *hlist_safe;
@@ -2082,8 +2082,22 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
        tree_rmap_item =
                unstable_tree_search_insert(rmap_item, page, &tree_page);
        if (tree_rmap_item) {
+                bool split;
                kpage = try_to_merge_two_pages(rmap_item, page,
                                                tree_rmap_item, tree_page);
+                /*
+                 * If both pages we tried to merge belong to the same compound
+                 * page, then we actually ended up increasing the reference
+                 * count of the same compound page twice, and split_huge_page
+                 * failed.
+                 * Here we set a flag if that happened, and we use it later to
+                 * try split_huge_page again. Since we call put_page right
+                 * afterwards, the reference count will be correct and
+                 * split_huge_page should succeed.
+                 */
+                split = PageTransCompound(page)
+                        && compound_head(page) == compound_head(tree_page);
                put_page(tree_page);
                if (kpage) {
                        /*
@@ -2110,6 +2124,20 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
                                break_cow(tree_rmap_item);
                                break_cow(rmap_item);
                        }
+                } else if (split) {
+                        /*
+                         * We are here if we tried to merge two pages and
+                         * failed because they both belonged to the same
+                         * compound page. We will split the page now, but no
+                         * merging will take place.
+                         * We do not want to add the cost of a full lock; if
+                         * the page is locked, it is better to skip it and
+                         * perhaps try again later.
+                         */
+                        if (!trylock_page(page))
+                                return;
+                        split_huge_page(page);
+                        unlock_page(page);
                }
        }
 }
diff --git a/mm/list_lru.c b/mm/list_lru.c
index fd41e969ede5..fcfb6c89ed47 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -52,14 +52,15 @@ static inline bool list_lru_memcg_aware(struct list_lru *lru)
 static inline struct list_lru_one *
 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
 {
+        struct list_lru_memcg *memcg_lrus;
        /*
-         * The lock protects the array of per cgroup lists from relocation
+         * Either lock or RCU protects the array of per cgroup lists
-         * (see memcg_update_list_lru_node).
+         * from relocation (see memcg_update_list_lru_node).
         */
-        lockdep_assert_held(&nlru->lock);
+        memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
-        if (nlru->memcg_lrus && idx >= 0)
+                                           lockdep_is_held(&nlru->lock));
-                return nlru->memcg_lrus->lru[idx];
+        if (memcg_lrus && idx >= 0)
+                return memcg_lrus->lru[idx];
        return &nlru->lru;
 }
@@ -168,10 +169,10 @@ static unsigned long __list_lru_count_one(struct list_lru *lru,
        struct list_lru_one *l;
        unsigned long count;
-        spin_lock(&nlru->lock);
+        rcu_read_lock();
        l = list_lru_from_memcg_idx(nlru, memcg_idx);
        count = l->nr_items;
-        spin_unlock(&nlru->lock);
+        rcu_read_unlock();
        return count;
 }
@@ -324,24 +325,41 @@ fail:
 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
 {
+        struct list_lru_memcg *memcg_lrus;
        int size = memcg_nr_cache_ids;
-        nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL);
+        memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
-        if (!nlru->memcg_lrus)
+                              size * sizeof(void *), GFP_KERNEL);
+        if (!memcg_lrus)
                return -ENOMEM;
-        if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
+        if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
-                kvfree(nlru->memcg_lrus);
+                kvfree(memcg_lrus);
                return -ENOMEM;
        }
+        RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
        return 0;
 }
 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
 {
-        __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
+        struct list_lru_memcg *memcg_lrus;
-        kvfree(nlru->memcg_lrus);
+        /*
+         * This is called when shrinker has already been unregistered,
+         * and nobody can use it. So, there is no need to use kvfree_rcu().
+         */
+        memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
+        __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
+        kvfree(memcg_lrus);
+}
+static void kvfree_rcu(struct rcu_head *head)
+{
+        struct list_lru_memcg *mlru;
+        mlru = container_of(head, struct list_lru_memcg, rcu);
+        kvfree(mlru);
 }
 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
@@ -351,8 +369,9 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
        BUG_ON(old_size > new_size);
-        old = nlru->memcg_lrus;
+        old = rcu_dereference_protected(nlru->memcg_lrus,
-        new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL);
+                                        lockdep_is_held(&list_lrus_mutex));
+        new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
        if (!new)
                return -ENOMEM;
@@ -361,29 +380,33 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
                return -ENOMEM;
        }
-        memcpy(new, old, old_size * sizeof(void *));
+        memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
        /*
-         * The lock guarantees that we won't race with a reader
+         * The locking below allows readers that hold nlru->lock avoid taking
-         * (see list_lru_from_memcg_idx).
+         * rcu_read_lock (see list_lru_from_memcg_idx).
         *
         * Since list_lru_{add,del} may be called under an IRQ-safe lock,
         * we have to use IRQ-safe primitives here to avoid deadlock.
         */
        spin_lock_irq(&nlru->lock);
-        nlru->memcg_lrus = new;
+        rcu_assign_pointer(nlru->memcg_lrus, new);
        spin_unlock_irq(&nlru->lock);
-        kvfree(old);
+        call_rcu(&old->rcu, kvfree_rcu);
        return 0;
 }
 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
                                              int old_size, int new_size)
 {
+        struct list_lru_memcg *memcg_lrus;
+        memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
+                                               lockdep_is_held(&list_lrus_mutex));
        /* do not bother shrinking the array back to the old size, because we
         * cannot handle allocation failures here */
-        __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size);
+        __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
 }
 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
diff --git a/mm/memblock.c b/mm/memblock.c
index 48376bd33274..9b04568ad42a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -17,6 +17,7 @@
 #include <linux/poison.h>
 #include <linux/pfn.h>
 #include <linux/debugfs.h>
+#include <linux/kmemleak.h>
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
@@ -924,7 +925,7 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
                        r = &type_b->regions[idx_b];
                        r_start = idx_b ? r[-1].base + r[-1].size : 0;
                        r_end = idx_b < type_b->cnt ?
-                                r->base : ULLONG_MAX;
+                                r->base : (phys_addr_t)ULLONG_MAX;
                        /*
                         * if idx_b advanced past idx_a,
@@ -1040,7 +1041,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
                        r = &type_b->regions[idx_b];
                        r_start = idx_b ? r[-1].base + r[-1].size : 0;
                        r_end = idx_b < type_b->cnt ?
-                                r->base : ULLONG_MAX;
+                                r->base : (phys_addr_t)ULLONG_MAX;
                        /*
                         * if idx_b advanced past idx_a,
                         * break out to advance idx_a
@@ -1345,7 +1346,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
                                           min_addr, max_addr, nid);
 #ifdef CONFIG_DEBUG_VM
        if (ptr && size > 0)
-                memset(ptr, 0xff, size);
+                memset(ptr, PAGE_POISON_PATTERN, size);
 #endif
        return ptr;
 }
@@ -1750,29 +1751,6 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
        }
 }
-extern unsigned long __init_memblock
-memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
-{
-        struct memblock_region *rgn;
-        unsigned long size = 0;
-        int idx;
-        for_each_memblock_type(idx, (&memblock.reserved), rgn) {
-                phys_addr_t start, end;
-                if (rgn->base + rgn->size < start_addr)
-                        continue;
-                if (rgn->base > end_addr)
-                        continue;
-                start = rgn->base;
-                end = start + rgn->size;
-                size += end - start;
-        }
-        return size;
-}
 void __init_memblock __memblock_dump_all(void)
 {
        pr_info("MEMBLOCK configuration:\n");
@@ -1818,18 +1796,7 @@ static int memblock_debug_show(struct seq_file *m, void *private)
        }
        return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(memblock_debug);
-static int memblock_debug_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, memblock_debug_show, inode->i_private);
-}
-static const struct file_operations memblock_debug_fops = {
-        .open = memblock_debug_open,
-        .read = seq_read,
-        .llseek = seq_lseek,
-        .release = single_release,
-};
 static int __init memblock_init_debugfs(void)
 {
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8291b75f42c8..2d4bf647cf01 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -502,6 +502,7 @@ static const char * const action_page_types[] = {
        [MF_MSG_POISONED_HUGE]          = "huge page already hardware poisoned",
        [MF_MSG_HUGE]                   = "huge page",
        [MF_MSG_FREE_HUGE]              = "free huge page",
+        [MF_MSG_NON_PMD_HUGE]           = "non-pmd-sized huge page",
        [MF_MSG_UNMAP_FAILED]           = "unmapping failed page",
        [MF_MSG_DIRTY_SWAPCACHE]        = "dirty swapcache page",
        [MF_MSG_CLEAN_SWAPCACHE]        = "clean swapcache page",
@@ -1084,6 +1085,21 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
                return 0;
        }
+        /*
+         * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
+         * simply disable it. In order to make it work properly, we need
+         * make sure that:
+         *  - conversion of a pud that maps an error hugetlb into hwpoison
+         *    entry properly works, and
+         *  - other mm code walking over page table is aware of pud-aligned
+         *    hwpoison entries.
+         */
+        if (huge_page_size(page_hstate(head)) > PMD_SIZE) {
+                action_result(pfn, MF_MSG_NON_PMD_HUGE, MF_IGNORED);
+                res = -EBUSY;
+                goto out;
+        }
        if (!hwpoison_user_mappings(p, pfn, flags, &head)) {
                action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
                res = -EBUSY;
diff --git a/mm/memory.c b/mm/memory.c
index aed37325d94e..01f5464e0fd2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2883,26 +2883,16 @@ EXPORT_SYMBOL(unmap_mapping_range);
 int do_swap_page(struct vm_fault *vmf)
 {
        struct vm_area_struct *vma = vmf->vma;
-        struct page *page = NULL, *swapcache = NULL;
+        struct page *page = NULL, *swapcache;
        struct mem_cgroup *memcg;
-        struct vma_swap_readahead swap_ra;
        swp_entry_t entry;
        pte_t pte;
        int locked;
        int exclusive = 0;
        int ret = 0;
-        bool vma_readahead = swap_use_vma_readahead();
-        if (vma_readahead) {
+        if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
-                page = swap_readahead_detect(vmf, &swap_ra);
-                swapcache = page;
-        }
-        if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
-                if (page)
-                        put_page(page);
                goto out;
-        }
        entry = pte_to_swp_entry(vmf->orig_pte);
        if (unlikely(non_swap_entry(entry))) {
@@ -2928,11 +2918,8 @@ int do_swap_page(struct vm_fault *vmf)
        delayacct_set_flag(DELAYACCT_PF_SWAPIN);
-        if (!page) {
+        page = lookup_swap_cache(entry, vma, vmf->address);
-                page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
+        swapcache = page;
-                                         vmf->address);
-                swapcache = page;
-        }
        if (!page) {
                struct swap_info_struct *si = swp_swap_info(entry);
@@ -2940,7 +2927,8 @@ int do_swap_page(struct vm_fault *vmf)
                if (si->flags & SWP_SYNCHRONOUS_IO &&
                                __swap_count(si, entry) == 1) {
                        /* skip swapcache */
-                        page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
+                        page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
+                                                        vmf->address);
                        if (page) {
                                __SetPageLocked(page);
                                __SetPageSwapBacked(page);
@@ -2949,12 +2937,8 @@ int do_swap_page(struct vm_fault *vmf)
                                swap_readpage(page, true);
                        }
                } else {
-                        if (vma_readahead)
+                        page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
-                                page = do_swap_page_readahead(entry,
+                                                vmf);
-                                        GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
-                        else
-                                page = swapin_readahead(entry,
-                                       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
                        swapcache = page;
                }
@@ -2982,7 +2966,6 @@ int do_swap_page(struct vm_fault *vmf)
                 */
                ret = VM_FAULT_HWPOISON;
                delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-                swapcache = page;
                goto out_release;
        }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b2bd52ff7605..cc6dfa5832ca 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -250,7 +250,6 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
                struct vmem_altmap *altmap, bool want_memblock)
 {
        int ret;
-        int i;
        if (pfn_valid(phys_start_pfn))
                return -EEXIST;
@@ -259,27 +258,10 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
        if (ret < 0)
                return ret;
-        /*
-         * Make all the pages reserved so that nobody will stumble over half
-         * initialized state.
-         * FIXME: We also have to associate it with a node because page_to_nid
-         * relies on having page with the proper node.
-         */
-        for (i = 0; i < PAGES_PER_SECTION; i++) {
-                unsigned long pfn = phys_start_pfn + i;
-                struct page *page;
-                if (!pfn_valid(pfn))
-                        continue;
-                page = pfn_to_page(pfn);
-                set_page_node(page, nid);
-                SetPageReserved(page);
-        }
        if (!want_memblock)
                return 0;
-        return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
+        return hotplug_memory_register(nid, __pfn_to_section(phys_start_pfn));
 }
 /*
@@ -559,6 +541,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
 * @zone: zone from which pages need to be removed
 * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
 * @nr_pages: number of pages to remove (must be multiple of section size)
+ * @altmap: alternative device page map or %NULL if default memmap is used
 *
 * Generic helper function to remove section mappings and sysfs entries
 * for the section of the memory we are removing. Caller needs to make
@@ -908,8 +891,15 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
        int nid;
        int ret;
        struct memory_notify arg;
+        struct memory_block *mem;
+        /*
+         * We can't use pfn_to_nid() because nid might be stored in struct page
+         * which is not yet initialized. Instead, we find nid from memory block.
+         */
+        mem = find_memory_block(__pfn_to_section(pfn));
+        nid = mem->nid;
-        nid = pfn_to_nid(pfn);
        /* associate pfn range with the zone */
        zone = move_pfn_range(online_type, nid, pfn, nr_pages);
@@ -1055,6 +1045,7 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
 /**
 * try_online_node - online a node if offlined
+ * @nid: the node ID
 *
 * called by cpu_up() to online a node without onlined memory.
 */
@@ -1083,15 +1074,16 @@ out:
 static int check_hotplug_memory_range(u64 start, u64 size)
 {
-        u64 start_pfn = PFN_DOWN(start);
+        unsigned long block_sz = memory_block_size_bytes();
+        u64 block_nr_pages = block_sz >> PAGE_SHIFT;
        u64 nr_pages = size >> PAGE_SHIFT;
+        u64 start_pfn = PFN_DOWN(start);
-        /* Memory range must be aligned with section */
+        /* memory range must be block size aligned */
-        if ((start_pfn & ~PAGE_SECTION_MASK) ||
+        if (!nr_pages || !IS_ALIGNED(start_pfn, block_nr_pages) ||
-            (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) {
+            !IS_ALIGNED(nr_pages, block_nr_pages)) {
-                pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx\n",
+                pr_err("Block size [%#lx] unaligned hotplug range: start %#llx, size %#llx",
-                                (unsigned long long)start,
+                       block_sz, start, size);
-                                (unsigned long long)size);
                return -EINVAL;
        }
@@ -1814,6 +1806,7 @@ static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
 /**
 * try_offline_node
+ * @nid: the node ID
 *
 * Offline a node if all memory sections and cpus of the node are removed.
 *
@@ -1857,6 +1850,9 @@ EXPORT_SYMBOL(try_offline_node);
 /**
 * remove_memory
+ * @nid: the node ID
+ * @start: physical address of the region to remove
+ * @size: size of the region to remove
 *
 * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
 * and online/offline operations before this call, as required by
diff --git a/mm/mmap.c b/mm/mmap.c
index aa0dc8231c0d..f2154fc2548b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3191,13 +3191,15 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
                if (rlimit(RLIMIT_DATA) == 0 &&
                    mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
                        return true;
-                if (!ignore_rlimit_data) {
-                        pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.\n",
+                pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
-                                     current->comm, current->pid,
+                             current->comm, current->pid,
-                                     (mm->data_vm + npages) << PAGE_SHIFT,
+                             (mm->data_vm + npages) << PAGE_SHIFT,
-                                     rlimit(RLIMIT_DATA));
+                             rlimit(RLIMIT_DATA),
+                             ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
+                if (!ignore_rlimit_data)
                        return false;
-                }
        }
        return true;
diff --git a/mm/nommu.c b/mm/nommu.c
index 4f8720243ae7..13723736d38f 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -457,18 +457,6 @@ void __weak vmalloc_sync_all(void)
 {
 }
-/**
- *      alloc_vm_area - allocate a range of kernel address space
- *      @size:          size of the area
- *
- *      Returns:        NULL on failure, vm_struct on success
- *
- *      This function reserves a range of kernel address space, and
- *      allocates pagetables to map that range.  No actual mappings
- *      are created.  If the kernel address space is not shared
- *      between processes, it syncs the pagetable across all
- *      processes.
- */
 struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
 {
        BUG();
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f2e7dfb81eee..ff992fa8760a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -185,6 +185,8 @@ static bool is_dump_unreclaim_slabs(void)
 * oom_badness - heuristic function to determine which candidate task to kill
 * @p: task struct of which task we should calculate
 * @totalpages: total present RAM allowed for page allocation
+ * @memcg: task's memory controller, if constrained
+ * @nodemask: nodemask passed to page allocator for mempolicy ooms
 *
 * The heuristic for determining which task to kill is made to be as simple and
 * predictable as possible.  The goal is to return the highest value for the
@@ -224,13 +226,6 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
                mm_pgtables_bytes(p->mm) / PAGE_SIZE;
        task_unlock(p);
-        /*
-         * Root processes get 3% bonus, just like the __vm_enough_memory()
-         * implementation used by LSMs.
-         */
-        if (has_capability_noaudit(p, CAP_SYS_ADMIN))
-                points -= (points * 3) / 100;
        /* Normalize to oom_score_adj units */
        adj *= totalpages / 1000;
        points += adj;
@@ -595,7 +590,8 @@ static void oom_reap_task(struct task_struct *tsk)
        while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
                schedule_timeout_idle(HZ/10);
-        if (attempts <= MAX_OOM_REAP_RETRIES)
+        if (attempts <= MAX_OOM_REAP_RETRIES ||
+            test_bit(MMF_OOM_SKIP, &mm->flags))
                goto done;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4ea018263210..0b97b8ece4a9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -265,17 +265,19 @@ int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
 int watermark_scale_factor = 10;
-static unsigned long __meminitdata nr_kernel_pages;
+static unsigned long nr_kernel_pages __meminitdata;
-static unsigned long __meminitdata nr_all_pages;
+static unsigned long nr_all_pages __meminitdata;
-static unsigned long __meminitdata dma_reserve;
+static unsigned long dma_reserve __meminitdata;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
+static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __meminitdata;
-static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __meminitdata;
-static unsigned long __initdata required_kernelcore;
+static unsigned long required_kernelcore __initdata;
-static unsigned long __initdata required_movablecore;
+static unsigned long required_kernelcore_percent __initdata;
-static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
+static unsigned long required_movablecore __initdata;
-static bool mirrored_kernelcore;
+static unsigned long required_movablecore_percent __initdata;
+static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata;
+static bool mirrored_kernelcore __meminitdata;
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
 int movable_zone;
@@ -292,40 +294,6 @@ EXPORT_SYMBOL(nr_online_nodes);
 int page_group_by_mobility_disabled __read_mostly;
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-/*
- * Determine how many pages need to be initialized during early boot
- * (non-deferred initialization).
- * The value of first_deferred_pfn will be set later, once non-deferred pages
- * are initialized, but for now set it ULONG_MAX.
- */
-static inline void reset_deferred_meminit(pg_data_t *pgdat)
-{
-        phys_addr_t start_addr, end_addr;
-        unsigned long max_pgcnt;
-        unsigned long reserved;
-        /*
-         * Initialise at least 2G of a node but also take into account that
-         * two large system hashes that can take up 1GB for 0.25TB/node.
-         */
-        max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
-                        (pgdat->node_spanned_pages >> 8));
-        /*
-         * Compensate the all the memblock reservations (e.g. crash kernel)
-         * from the initial estimation to make sure we will initialize enough
-         * memory to boot.
-         */
-        start_addr = PFN_PHYS(pgdat->node_start_pfn);
-        end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
-        reserved = memblock_reserved_memory_within(start_addr, end_addr);
-        max_pgcnt += PHYS_PFN(reserved);
-        pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
-        pgdat->first_deferred_pfn = ULONG_MAX;
-}
 /* Returns true if the struct page for the pfn is uninitialised */
 static inline bool __meminit early_page_uninitialised(unsigned long pfn)
 {
@@ -361,10 +329,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
        return true;
 }
 #else
-static inline void reset_deferred_meminit(pg_data_t *pgdat)
-{
-}
 static inline bool early_page_uninitialised(unsigned long pfn)
 {
        return false;
@@ -1099,6 +1063,15 @@ static bool bulkfree_pcp_prepare(struct page *page)
 }
 #endif /* CONFIG_DEBUG_VM */
+static inline void prefetch_buddy(struct page *page)
+{
+        unsigned long pfn = page_to_pfn(page);
+        unsigned long buddy_pfn = __find_buddy_pfn(pfn, 0);
+        struct page *buddy = page + (buddy_pfn - pfn);
+        prefetch(buddy);
+}
 /*
 * Frees a number of pages from the PCP lists
 * Assumes all pages on list are in same zone, and of same order.
@@ -1115,13 +1088,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 {
        int migratetype = 0;
        int batch_free = 0;
+        int prefetch_nr = 0;
        bool isolated_pageblocks;
+        struct page *page, *tmp;
-        spin_lock(&zone->lock);
+        LIST_HEAD(head);
-        isolated_pageblocks = has_isolate_pageblock(zone);
        while (count) {
-                struct page *page;
                struct list_head *list;
                /*
@@ -1143,26 +1115,48 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                        batch_free = count;
                do {
-                        int mt; /* migratetype of the to-be-freed page */
                        page = list_last_entry(list, struct page, lru);
-                        /* must delete as __free_one_page list manipulates */
+                        /* must delete to avoid corrupting pcp list */
                        list_del(&page->lru);
+                        pcp->count--;
-                        mt = get_pcppage_migratetype(page);
-                        /* MIGRATE_ISOLATE page should not go to pcplists */
-                        VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-                        /* Pageblock could have been isolated meanwhile */
-                        if (unlikely(isolated_pageblocks))
-                                mt = get_pageblock_migratetype(page);
                        if (bulkfree_pcp_prepare(page))
                                continue;
-                        __free_one_page(page, page_to_pfn(page), zone, 0, mt);
+                        list_add_tail(&page->lru, &head);
-                        trace_mm_page_pcpu_drain(page, 0, mt);
+                        /*
+                         * We are going to put the page back to the global
+                         * pool, prefetch its buddy to speed up later access
+                         * under zone->lock. It is believed the overhead of
+                         * an additional test and calculating buddy_pfn here
+                         * can be offset by reduced memory latency later. To
+                         * avoid excessive prefetching due to large count, only
+                         * prefetch buddy for the first pcp->batch nr of pages.
+                         */
+                        if (prefetch_nr++ < pcp->batch)
+                                prefetch_buddy(page);
                } while (--count && --batch_free && !list_empty(list));
        }
+        spin_lock(&zone->lock);
+        isolated_pageblocks = has_isolate_pageblock(zone);
+        /*
+         * Use safe version since after __free_one_page(),
+         * page->lru.next will not point to original list.
+         */
+        list_for_each_entry_safe(page, tmp, &head, lru) {
+                int mt = get_pcppage_migratetype(page);
+                /* MIGRATE_ISOLATE page should not go to pcplists */
+                VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+                /* Pageblock could have been isolated meanwhile */
+                if (unlikely(isolated_pageblocks))
+                        mt = get_pageblock_migratetype(page);
+                __free_one_page(page, page_to_pfn(page), zone, 0, mt);
+                trace_mm_page_pcpu_drain(page, 0, mt);
+        }
        spin_unlock(&zone->lock);
 }
@@ -1181,10 +1175,9 @@ static void free_one_page(struct zone *zone,
 }
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
-                                unsigned long zone, int nid, bool zero)
+                                unsigned long zone, int nid)
 {
-        if (zero)
+        mm_zero_struct_page(page);
-                mm_zero_struct_page(page);
        set_page_links(page, zone, nid, pfn);
        init_page_count(page);
        page_mapcount_reset(page);
@@ -1198,12 +1191,6 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
 #endif
 }
-static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
-                                        int nid, bool zero)
-{
-        return __init_single_page(pfn_to_page(pfn), pfn, zone, nid, zero);
-}
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static void __meminit init_reserved_page(unsigned long pfn)
 {
@@ -1222,7 +1209,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
                if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
                        break;
        }
-        __init_single_pfn(pfn, zid, nid, true);
+        __init_single_page(pfn_to_page(pfn), pfn, zid, nid);
 }
 #else
 static inline void init_reserved_page(unsigned long pfn)
@@ -1506,7 +1493,7 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
                } else if (!(pfn & nr_pgmask)) {
                        deferred_free_range(pfn - nr_free, nr_free);
                        nr_free = 1;
-                        cond_resched();
+                        touch_nmi_watchdog();
                } else {
                        nr_free++;
                }
@@ -1535,11 +1522,11 @@ static unsigned long  __init deferred_init_pages(int nid, int zid,
                        continue;
                } else if (!page || !(pfn & nr_pgmask)) {
                        page = pfn_to_page(pfn);
-                        cond_resched();
+                        touch_nmi_watchdog();
                } else {
                        page++;
                }
-                __init_single_page(page, pfn, zid, nid, true);
+                __init_single_page(page, pfn, zid, nid);
                nr_pages++;
        }
        return (nr_pages);
@@ -1552,23 +1539,25 @@ static int __init deferred_init_memmap(void *data)
        int nid = pgdat->node_id;
        unsigned long start = jiffies;
        unsigned long nr_pages = 0;
-        unsigned long spfn, epfn;
+        unsigned long spfn, epfn, first_init_pfn, flags;
        phys_addr_t spa, epa;
        int zid;
        struct zone *zone;
-        unsigned long first_init_pfn = pgdat->first_deferred_pfn;
        const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
        u64 i;
+        /* Bind memory initialisation thread to a local node if possible */
+        if (!cpumask_empty(cpumask))
+                set_cpus_allowed_ptr(current, cpumask);
+        pgdat_resize_lock(pgdat, &flags);
+        first_init_pfn = pgdat->first_deferred_pfn;
        if (first_init_pfn == ULONG_MAX) {
+                pgdat_resize_unlock(pgdat, &flags);
                pgdat_init_report_one_done();
                return 0;
        }
-        /* Bind memory initialisation thread to a local node if possible */
-        if (!cpumask_empty(cpumask))
-                set_cpus_allowed_ptr(current, cpumask);
        /* Sanity check boundaries */
        BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
        BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
@@ -1598,6 +1587,7 @@ static int __init deferred_init_memmap(void *data)
                epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
                deferred_free_pages(nid, zid, spfn, epfn);
        }
+        pgdat_resize_unlock(pgdat, &flags);
        /* Sanity check that the next zone really is unpopulated */
        WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
@@ -1608,6 +1598,117 @@ static int __init deferred_init_memmap(void *data)
        pgdat_init_report_one_done();
        return 0;
 }
+/*
+ * During boot we initialize deferred pages on-demand, as needed, but once
+ * page_alloc_init_late() has finished, the deferred pages are all initialized,
+ * and we can permanently disable that path.
+ */
+static DEFINE_STATIC_KEY_TRUE(deferred_pages);
+/*
+ * If this zone has deferred pages, try to grow it by initializing enough
+ * deferred pages to satisfy the allocation specified by order, rounded up to
+ * the nearest PAGES_PER_SECTION boundary.  So we're adding memory in increments
+ * of SECTION_SIZE bytes by initializing struct pages in increments of
+ * PAGES_PER_SECTION * sizeof(struct page) bytes.
+ *
+ * Return true when zone was grown, otherwise return false. We return true even
+ * when we grow less than requested, to let the caller decide if there are
+ * enough pages to satisfy the allocation.
+ *
+ * Note: We use noinline because this function is needed only during boot, and
+ * it is called from a __ref function _deferred_grow_zone. This way we are
+ * making sure that it is not inlined into permanent text section.
+ */
+static noinline bool __init
+deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+        int zid = zone_idx(zone);
+        int nid = zone_to_nid(zone);
+        pg_data_t *pgdat = NODE_DATA(nid);
+        unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+        unsigned long nr_pages = 0;
+        unsigned long first_init_pfn, spfn, epfn, t, flags;
+        unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
+        phys_addr_t spa, epa;
+        u64 i;
+        /* Only the last zone may have deferred pages */
+        if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
+                return false;
+        pgdat_resize_lock(pgdat, &flags);
+        /*
+         * If deferred pages have been initialized while we were waiting for
+         * the lock, return true, as the zone was grown.  The caller will retry
+         * this zone.  We won't return to this function since the caller also
+         * has this static branch.
+         */
+        if (!static_branch_unlikely(&deferred_pages)) {
+                pgdat_resize_unlock(pgdat, &flags);
+                return true;
+        }
+        /*
+         * If someone grew this zone while we were waiting for spinlock, return
+         * true, as there might be enough pages already.
+         */
+        if (first_deferred_pfn != pgdat->first_deferred_pfn) {
+                pgdat_resize_unlock(pgdat, &flags);
+                return true;
+        }
+        first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn);
+        if (first_init_pfn >= pgdat_end_pfn(pgdat)) {
+                pgdat_resize_unlock(pgdat, &flags);
+                return false;
+        }
+        for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
+                spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
+                epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+                while (spfn < epfn && nr_pages < nr_pages_needed) {
+                        t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
+                        first_deferred_pfn = min(t, epfn);
+                        nr_pages += deferred_init_pages(nid, zid, spfn,
+                                                        first_deferred_pfn);
+                        spfn = first_deferred_pfn;
+                }
+                if (nr_pages >= nr_pages_needed)
+                        break;
+        }
+        for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
+                spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
+                epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa));
+                deferred_free_pages(nid, zid, spfn, epfn);
+                if (first_deferred_pfn == epfn)
+                        break;
+        }
+        pgdat->first_deferred_pfn = first_deferred_pfn;
+        pgdat_resize_unlock(pgdat, &flags);
+        return nr_pages > 0;
+}
+/*
+ * deferred_grow_zone() is __init, but it is called from
+ * get_page_from_freelist() during early boot until deferred_pages permanently
+ * disables this call. This is why we have refdata wrapper to avoid warning,
+ * and to ensure that the function body gets unloaded.
+ */
+static bool __ref
+_deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+        return deferred_grow_zone(zone, order);
+}
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 void __init page_alloc_init_late(void)
@@ -1626,6 +1727,12 @@ void __init page_alloc_init_late(void)
        /* Block until all are initialised */
        wait_for_completion(&pgdat_init_all_done_comp);
+        /*
+         * We initialized the rest of the deferred pages.  Permanently disable
+         * on-demand struct page initialization.
+         */
+        static_branch_disable(&deferred_pages);
        /* Reinit limits that are based on free pages after the kernel is up */
        files_maxfiles_init();
 #endif
@@ -2418,10 +2525,8 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
        local_irq_save(flags);
        batch = READ_ONCE(pcp->batch);
        to_drain = min(pcp->count, batch);
-        if (to_drain > 0) {
+        if (to_drain > 0)
                free_pcppages_bulk(zone, to_drain, pcp);
-                pcp->count -= to_drain;
-        }
        local_irq_restore(flags);
 }
 #endif
@@ -2443,10 +2548,8 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
        pset = per_cpu_ptr(zone->pageset, cpu);
        pcp = &pset->pcp;
-        if (pcp->count) {
+        if (pcp->count)
                free_pcppages_bulk(zone, pcp->count, pcp);
-                pcp->count = 0;
-        }
        local_irq_restore(flags);
 }
@@ -2670,7 +2773,6 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
        if (pcp->count >= pcp->high) {
                unsigned long batch = READ_ONCE(pcp->batch);
                free_pcppages_bulk(zone, batch, pcp);
-                pcp->count -= batch;
        }
 }
@@ -3205,6 +3307,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
                                       ac_classzone_idx(ac), alloc_flags)) {
                        int ret;
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+                        /*
+                         * Watermark failed for this zone, but see if we can
+                         * grow this zone if it contains deferred pages.
+                         */
+                        if (static_branch_unlikely(&deferred_pages)) {
+                                if (_deferred_grow_zone(zone, order))
+                                        goto try_this_zone;
+                        }
+#endif
                        /* Checked here to keep the fast path fast */
                        BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
                        if (alloc_flags & ALLOC_NO_WATERMARKS)
@@ -3246,6 +3358,14 @@ try_this_zone:
                                reserve_highatomic_pageblock(page, zone, order);
                        return page;
+                } else {
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+                        /* Try again if zone has deferred pages */
+                        if (static_branch_unlikely(&deferred_pages)) {
+                                if (_deferred_grow_zone(zone, order))
+                                        goto try_this_zone;
+                        }
+#endif
                }
        }
@@ -3685,16 +3805,18 @@ retry:
        return page;
 }
-static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac)
+static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
+                             const struct alloc_context *ac)
 {
        struct zoneref *z;
        struct zone *zone;
        pg_data_t *last_pgdat = NULL;
+        enum zone_type high_zoneidx = ac->high_zoneidx;
-        for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
+        for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, high_zoneidx,
-                                        ac->high_zoneidx, ac->nodemask) {
+                                        ac->nodemask) {
                if (last_pgdat != zone->zone_pgdat)
-                        wakeup_kswapd(zone, order, ac->high_zoneidx);
+                        wakeup_kswapd(zone, gfp_mask, order, high_zoneidx);
                last_pgdat = zone->zone_pgdat;
        }
 }
@@ -3973,7 +4095,7 @@ retry_cpuset:
                goto nopage;
        if (gfp_mask & __GFP_KSWAPD_RECLAIM)
-                wake_all_kswapds(order, ac);
+                wake_all_kswapds(order, gfp_mask, ac);
        /*
         * The adjusted alloc_flags might result in immediate success, so try
@@ -4031,7 +4153,7 @@ retry_cpuset:
 retry:
        /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
        if (gfp_mask & __GFP_KSWAPD_RECLAIM)
-                wake_all_kswapds(order, ac);
+                wake_all_kswapds(order, gfp_mask, ac);
        reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
        if (reserve_flags)
@@ -5334,6 +5456,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
        pg_data_t *pgdat = NODE_DATA(nid);
        unsigned long pfn;
        unsigned long nr_initialised = 0;
+        struct page *page;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
        struct memblock_region *r = NULL, *tmp;
 #endif
@@ -5386,6 +5509,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 #endif
 not_early:
+                page = pfn_to_page(pfn);
+                __init_single_page(page, pfn, zone, nid);
+                if (context == MEMMAP_HOTPLUG)
+                        SetPageReserved(page);
                /*
                 * Mark the block movable so that blocks are reserved for
                 * movable at startup. This will force kernel allocations
@@ -5402,15 +5530,8 @@ not_early:
                 * because this is done early in sparse_add_one_section
                 */
                if (!(pfn & (pageblock_nr_pages - 1))) {
-                        struct page *page = pfn_to_page(pfn);
-                        __init_single_page(page, pfn, zone, nid,
-                                        context != MEMMAP_HOTPLUG);
                        set_pageblock_migratetype(page, MIGRATE_MOVABLE);
                        cond_resched();
-                } else {
-                        __init_single_pfn(pfn, zone, nid,
-                                        context != MEMMAP_HOTPLUG);
                }
        }
 }
@@ -6241,7 +6362,15 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        alloc_node_mem_map(pgdat);
-        reset_deferred_meminit(pgdat);
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+        /*
+         * We start only with one section of pages, more pages are added as
+         * needed until the rest of deferred pages are initialized.
+         */
+        pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
+                                         pgdat->node_spanned_pages);
+        pgdat->first_deferred_pfn = ULONG_MAX;
+#endif
        free_area_init_core(pgdat);
 }
@@ -6471,7 +6600,18 @@ static void __init find_zone_movable_pfns_for_nodes(void)
        }
        /*
-         * If movablecore=nn[KMG] was specified, calculate what size of
+         * If kernelcore=nn% or movablecore=nn% was specified, calculate the
+         * amount of necessary memory.
+         */
+        if (required_kernelcore_percent)
+                required_kernelcore = (totalpages * 100 * required_kernelcore_percent) /
+                                       10000UL;
+        if (required_movablecore_percent)
+                required_movablecore = (totalpages * 100 * required_movablecore_percent) /
+                                        10000UL;
+        /*
+         * If movablecore= was specified, calculate what size of
         * kernelcore that corresponds so that memory usable for
         * any allocation type is evenly spread. If both kernelcore
         * and movablecore are specified, then the value of kernelcore
@@ -6711,18 +6851,30 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
        zero_resv_unavail();
 }
-static int __init cmdline_parse_core(char *p, unsigned long *core)
+static int __init cmdline_parse_core(char *p, unsigned long *core,
+                                     unsigned long *percent)
 {
        unsigned long long coremem;
+        char *endptr;
        if (!p)
                return -EINVAL;
-        coremem = memparse(p, &p);
+        /* Value may be a percentage of total memory, otherwise bytes */
-        *core = coremem >> PAGE_SHIFT;
+        coremem = simple_strtoull(p, &endptr, 0);
+        if (*endptr == '%') {
+                /* Paranoid check for percent values greater than 100 */
+                WARN_ON(coremem > 100);
-        /* Paranoid check that UL is enough for the coremem value */
+                *percent = coremem;
-        WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
+        } else {
+                coremem = memparse(p, &p);
+                /* Paranoid check that UL is enough for the coremem value */
+                WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
+                *core = coremem >> PAGE_SHIFT;
+                *percent = 0UL;
+        }
        return 0;
 }
@@ -6738,7 +6890,8 @@ static int __init cmdline_parse_kernelcore(char *p)
                return 0;
        }
-        return cmdline_parse_core(p, &required_kernelcore);
+        return cmdline_parse_core(p, &required_kernelcore,
+                                  &required_kernelcore_percent);
 }
 /*
@@ -6747,7 +6900,8 @@ static int __init cmdline_parse_kernelcore(char *p)
 */
 static int __init cmdline_parse_movablecore(char *p)
 {
-        return cmdline_parse_core(p, &required_movablecore);
+        return cmdline_parse_core(p, &required_movablecore,
+                                  &required_movablecore_percent);
 }
 early_param("kernelcore", cmdline_parse_kernelcore);
@@ -7591,7 +7745,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                cc->nr_migratepages -= nr_reclaimed;
                ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
-                                    NULL, 0, cc->mode, MR_CMA);
+                                    NULL, 0, cc->mode, MR_CONTIG_RANGE);
        }
        if (ret < 0) {
                putback_movable_pages(&cc->migratepages);
@@ -7611,11 +7765,11 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 * @gfp_mask:   GFP mask to use during compaction
 *
 * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
- * aligned, however it's the caller's responsibility to guarantee that
+ * aligned.  The PFN range must belong to a single zone.
- * we are the only thread that changes migrate type of pageblocks the
- * pages fall in.
 *
- * The PFN range must belong to a single zone.
+ * The first thing this routine does is attempt to MIGRATE_ISOLATE all
+ * pageblocks in the range.  Once isolated, the pageblocks should not
+ * be modified by others.
 *
 * Returns zero on success or negative error code.  On success all
 * pages which PFN is in [start, end) are allocated for the caller and
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 0a49374e6931..e412a63b2b74 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -65,11 +65,15 @@ static bool page_idle_clear_pte_refs_one(struct page *page,
        while (page_vma_mapped_walk(&pvmw)) {
                addr = pvmw.address;
                if (pvmw.pte) {
-                        referenced = ptep_clear_young_notify(vma, addr,
+                        /*
-                                        pvmw.pte);
+                         * For PTE-mapped THP, one sub page is referenced,
+                         * the whole THP is referenced.
+                         */
+                        if (ptep_clear_young_notify(vma, addr, pvmw.pte))
+                                referenced = true;
                } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-                        referenced = pmdp_clear_young_notify(vma, addr,
+                        if (pmdp_clear_young_notify(vma, addr, pvmw.pmd))
-                                        pvmw.pmd);
+                                referenced = true;
                } else {
                        /* unexpected pmd-mapped page? */
                        WARN_ON_ONCE(1);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 165ed8117bd1..61dee77bb211 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -28,6 +28,14 @@ static int set_migratetype_isolate(struct page *page, int migratetype,
        spin_lock_irqsave(&zone->lock, flags);
+        /*
+         * We assume the caller intended to SET migrate type to isolate.
+         * If it is already set, then someone else must have raced and
+         * set it before us.  Return -EBUSY
+         */
+        if (is_migrate_isolate_page(page))
+                goto out;
        pfn = page_to_pfn(page);
        arg.start_pfn = pfn;
        arg.nr_pages = pageblock_nr_pages;
@@ -166,7 +174,15 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
 * future will not be allocated again.
 *
 * start_pfn/end_pfn must be aligned to pageblock_order.
- * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
+ * Return 0 on success and -EBUSY if any part of range cannot be isolated.
+ *
+ * There is no high level synchronization mechanism that prevents two threads
+ * from trying to isolate overlapping ranges.  If this happens, one thread
+ * will notice pageblocks in the overlapping range already set to isolate.
+ * This happens in set_migratetype_isolate, and set_migratetype_isolate
+ * returns an error.  We then clean up by restoring the migration type on
+ * pageblocks we may have modified and return -EBUSY to caller.  This
+ * prevents two threads from simultaneously working on overlapping ranges.
 */
 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
                             unsigned migratetype, bool skip_hwpoisoned_pages)
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 7172e0a80e13..75d21a2259b3 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -35,7 +35,7 @@ static depot_stack_handle_t early_handle;
 static void init_early_allocated_pages(void);
-static int early_page_owner_param(char *buf)
+static int __init early_page_owner_param(char *buf)
 {
        if (!buf)
                return -EINVAL;
diff --git a/mm/page_poison.c b/mm/page_poison.c
index e83fd44867de..aa2b3d34e8ea 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -9,7 +9,7 @@
 static bool want_page_poisoning __read_mostly;
-static int early_page_poison_param(char *buf)
+static int __init early_page_poison_param(char *buf)
 {
        if (!buf)
                return -EINVAL;
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 8d2da5dec1e0..c3084ff2569d 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -258,6 +258,9 @@ static int __walk_page_range(unsigned long start, unsigned long end,
 /**
 * walk_page_range - walk page table with caller specific callbacks
+ * @start: start address of the virtual address range
+ * @end: end address of the virtual address range
+ * @walk: mm_walk structure defining the callbacks and the target address space
 *
 * Recursively walk the page table tree of the process represented by @walk->mm
 * within the virtual address range [@start, @end). During walking, we can do
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 7a58460bfd27..063ff60ecd90 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -223,18 +223,7 @@ alloc_buffer:
        return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(percpu_stats);
-static int percpu_stats_open(struct inode *inode, struct file *filp)
-{
-        return single_open(filp, percpu_stats_show, NULL);
-}
-static const struct file_operations percpu_stats_fops = {
-        .open           = percpu_stats_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = single_release,
-};
 static int __init init_percpu_stats_debugfs(void)
 {
diff --git a/mm/rmap.c b/mm/rmap.c
index 144c66e688a9..9122787c4947 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1171,6 +1171,7 @@ void page_add_new_anon_rmap(struct page *page,
 /**
 * page_add_file_rmap - add pte mapping to a file page
 * @page: the page to add the mapping to
+ * @compound: charge the page as compound or small page
 *
 * The caller needs to hold the pte lock.
 */
diff --git a/mm/shmem.c b/mm/shmem.c
index b85919243399..4424fc0c33aa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1422,9 +1422,12 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
 {
        struct vm_area_struct pvma;
        struct page *page;
+        struct vm_fault vmf;
        shmem_pseudo_vma_init(&pvma, info, index);
-        page = swapin_readahead(swap, gfp, &pvma, 0);
+        vmf.vma = &pvma;
+        vmf.address = 0;
+        page = swap_cluster_readahead(swap, gfp, &vmf);
        shmem_pseudo_vma_destroy(&pvma);
        return page;
diff --git a/mm/slab.c b/mm/slab.c
index 9095c3945425..e3a9b8e23306 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1869,7 +1869,7 @@ static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
        return 0;
 }
-slab_flags_t kmem_cache_flags(unsigned long object_size,
+slab_flags_t kmem_cache_flags(unsigned int object_size,
        slab_flags_t flags, const char *name,
        void (*ctor)(void *))
 {
@@ -1877,7 +1877,7 @@ slab_flags_t kmem_cache_flags(unsigned long object_size,
 }
 struct kmem_cache *
-__kmem_cache_alias(const char *name, size_t size, size_t align,
+__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
                   slab_flags_t flags, void (*ctor)(void *))
 {
        struct kmem_cache *cachep;
@@ -1994,7 +1994,7 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
        size_t ralign = BYTES_PER_WORD;
        gfp_t gfp;
        int err;
-        size_t size = cachep->size;
+        unsigned int size = cachep->size;
 #if DEBUG
 #if FORCED_DEBUG
@@ -2291,6 +2291,18 @@ out:
        return nr_freed;
 }
+bool __kmem_cache_empty(struct kmem_cache *s)
+{
+        int node;
+        struct kmem_cache_node *n;
+        for_each_kmem_cache_node(s, node, n)
+                if (!list_empty(&n->slabs_full) ||
+                    !list_empty(&n->slabs_partial))
+                        return false;
+        return true;
+}
 int __kmem_cache_shrink(struct kmem_cache *cachep)
 {
        int ret = 0;
diff --git a/mm/slab.h b/mm/slab.h
index 51813236e773..68bdf498da3b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -22,8 +22,8 @@ struct kmem_cache {
        unsigned int size;      /* The aligned/padded/added on size  */
        unsigned int align;     /* Alignment as calculated */
        slab_flags_t flags;     /* Active flags on the slab */
-        size_t useroffset;      /* Usercopy region offset */
+        unsigned int useroffset;/* Usercopy region offset */
-        size_t usersize;        /* Usercopy region size */
+        unsigned int usersize;  /* Usercopy region size */
        const char *name;       /* Slab name for sysfs */
        int refcount;           /* Use counter */
        void (*ctor)(void *);   /* Called on object slot creation */
@@ -77,7 +77,7 @@ extern struct kmem_cache *kmem_cache;
 /* A table of kmalloc cache names and sizes */
 extern const struct kmalloc_info_struct {
        const char *name;
-        unsigned long size;
+        unsigned int size;
 } kmalloc_info[];
 #ifndef CONFIG_SLOB
@@ -93,31 +93,31 @@ struct kmem_cache *kmalloc_slab(size_t, gfp_t);
 /* Functions provided by the slab allocators */
 int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
-extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
+struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
-                        slab_flags_t flags, size_t useroffset,
+                        slab_flags_t flags, unsigned int useroffset,
-                        size_t usersize);
+                        unsigned int usersize);
 extern void create_boot_cache(struct kmem_cache *, const char *name,
-                        size_t size, slab_flags_t flags, size_t useroffset,
+                        unsigned int size, slab_flags_t flags,
-                        size_t usersize);
+                        unsigned int useroffset, unsigned int usersize);
 int slab_unmergeable(struct kmem_cache *s);
-struct kmem_cache *find_mergeable(size_t size, size_t align,
+struct kmem_cache *find_mergeable(unsigned size, unsigned align,
                slab_flags_t flags, const char *name, void (*ctor)(void *));
 #ifndef CONFIG_SLOB
 struct kmem_cache *
-__kmem_cache_alias(const char *name, size_t size, size_t align,
+__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
                   slab_flags_t flags, void (*ctor)(void *));
-slab_flags_t kmem_cache_flags(unsigned long object_size,
+slab_flags_t kmem_cache_flags(unsigned int object_size,
        slab_flags_t flags, const char *name,
        void (*ctor)(void *));
 #else
 static inline struct kmem_cache *
-__kmem_cache_alias(const char *name, size_t size, size_t align,
+__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
                   slab_flags_t flags, void (*ctor)(void *))
 { return NULL; }
-static inline slab_flags_t kmem_cache_flags(unsigned long object_size,
+static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
        slab_flags_t flags, const char *name,
        void (*ctor)(void *))
 {
@@ -166,6 +166,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned long object_size,
                              SLAB_TEMPORARY | \
                              SLAB_ACCOUNT)
+bool __kmem_cache_empty(struct kmem_cache *);
 int __kmem_cache_shutdown(struct kmem_cache *);
 void __kmem_cache_release(struct kmem_cache *);
 int __kmem_cache_shrink(struct kmem_cache *);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 10f127b2de7c..98dcdc352062 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -10,6 +10,7 @@
 #include <linux/poison.h>
 #include <linux/interrupt.h>
 #include <linux/memory.h>
+#include <linux/cache.h>
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
@@ -81,38 +82,19 @@ unsigned int kmem_cache_size(struct kmem_cache *s)
 EXPORT_SYMBOL(kmem_cache_size);
 #ifdef CONFIG_DEBUG_VM
-static int kmem_cache_sanity_check(const char *name, size_t size)
+static int kmem_cache_sanity_check(const char *name, unsigned int size)
 {
-        struct kmem_cache *s = NULL;
        if (!name || in_interrupt() || size < sizeof(void *) ||
                size > KMALLOC_MAX_SIZE) {
                pr_err("kmem_cache_create(%s) integrity check failed\n", name);
                return -EINVAL;
        }
-        list_for_each_entry(s, &slab_caches, list) {
-                char tmp;
-                int res;
-                /*
-                 * This happens when the module gets unloaded and doesn't
-                 * destroy its slab cache and no-one else reuses the vmalloc
-                 * area of the module.  Print a warning.
-                 */
-                res = probe_kernel_address(s->name, tmp);
-                if (res) {
-                        pr_err("Slab cache with size %d has lost its name\n",
-                               s->object_size);
-                        continue;
-                }
-        }
        WARN_ON(strchr(name, ' '));     /* It confuses parsers */
        return 0;
 }
 #else
-static inline int kmem_cache_sanity_check(const char *name, size_t size)
+static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
 {
        return 0;
 }
@@ -279,8 +261,8 @@ static inline void memcg_unlink_cache(struct kmem_cache *s)
 * Figure out what the alignment of the objects will be given a set of
 * flags, a user specified alignment and the size of the objects.
 */
-static unsigned long calculate_alignment(unsigned long flags,
+static unsigned int calculate_alignment(slab_flags_t flags,
-                unsigned long align, unsigned long size)
+                unsigned int align, unsigned int size)
 {
        /*
         * If the user wants hardware cache aligned objects then follow that
@@ -290,7 +272,7 @@ static unsigned long calculate_alignment(unsigned long flags,
         * alignment though. If that is greater then use it.
         */
        if (flags & SLAB_HWCACHE_ALIGN) {
-                unsigned long ralign;
+                unsigned int ralign;
                ralign = cache_line_size();
                while (size <= ralign / 2)
@@ -330,7 +312,7 @@ int slab_unmergeable(struct kmem_cache *s)
        return 0;
 }
-struct kmem_cache *find_mergeable(size_t size, size_t align,
+struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
                slab_flags_t flags, const char *name, void (*ctor)(void *))
 {
        struct kmem_cache *s;
@@ -378,9 +360,9 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
 }
 static struct kmem_cache *create_cache(const char *name,
-                size_t object_size, size_t size, size_t align,
+                unsigned int object_size, unsigned int align,
-                slab_flags_t flags, size_t useroffset,
+                slab_flags_t flags, unsigned int useroffset,
-                size_t usersize, void (*ctor)(void *),
+                unsigned int usersize, void (*ctor)(void *),
                struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 {
        struct kmem_cache *s;
@@ -395,8 +377,7 @@ static struct kmem_cache *create_cache(const char *name,
                goto out;
        s->name = name;
-        s->object_size = object_size;
+        s->size = s->object_size = object_size;
-        s->size = size;
        s->align = align;
        s->ctor = ctor;
        s->useroffset = useroffset;
@@ -451,8 +432,10 @@ out_free_cache:
 * as davem.
 */
 struct kmem_cache *
-kmem_cache_create_usercopy(const char *name, size_t size, size_t align,
+kmem_cache_create_usercopy(const char *name,
-                  slab_flags_t flags, size_t useroffset, size_t usersize,
+                  unsigned int size, unsigned int align,
+                  slab_flags_t flags,
+                  unsigned int useroffset, unsigned int usersize,
                  void (*ctor)(void *))
 {
        struct kmem_cache *s = NULL;
@@ -500,7 +483,7 @@ kmem_cache_create_usercopy(const char *name, size_t size, size_t align,
                goto out_unlock;
        }
-        s = create_cache(cache_name, size, size,
+        s = create_cache(cache_name, size,
                         calculate_alignment(flags, align, size),
                         flags, useroffset, usersize, ctor, NULL, NULL);
        if (IS_ERR(s)) {
@@ -531,7 +514,7 @@ out_unlock:
 EXPORT_SYMBOL(kmem_cache_create_usercopy);
 struct kmem_cache *
-kmem_cache_create(const char *name, size_t size, size_t align,
+kmem_cache_create(const char *name, unsigned int size, unsigned int align,
                slab_flags_t flags, void (*ctor)(void *))
 {
        return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
@@ -647,7 +630,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
                goto out_unlock;
        s = create_cache(cache_name, root_cache->object_size,
-                         root_cache->size, root_cache->align,
+                         root_cache->align,
                         root_cache->flags & CACHE_CREATE_MASK,
                         root_cache->useroffset, root_cache->usersize,
                         root_cache->ctor, memcg, root_cache);
@@ -916,8 +899,9 @@ bool slab_is_available(void)
 #ifndef CONFIG_SLOB
 /* Create a cache during boot when no slab services are available yet */
-void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
+void __init create_boot_cache(struct kmem_cache *s, const char *name,
-                slab_flags_t flags, size_t useroffset, size_t usersize)
+                unsigned int size, slab_flags_t flags,
+                unsigned int useroffset, unsigned int usersize)
 {
        int err;
@@ -932,15 +916,15 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz
        err = __kmem_cache_create(s, flags);
        if (err)
-                panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
+                panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n",
                                        name, size, err);
        s->refcount = -1;       /* Exempt from merging for now */
 }
-struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
+struct kmem_cache *__init create_kmalloc_cache(const char *name,
-                                slab_flags_t flags, size_t useroffset,
+                unsigned int size, slab_flags_t flags,
-                                size_t usersize)
+                unsigned int useroffset, unsigned int usersize)
 {
        struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
@@ -954,11 +938,11 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
        return s;
 }
-struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
+struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
 EXPORT_SYMBOL(kmalloc_caches);
 #ifdef CONFIG_ZONE_DMA
-struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
+struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
 EXPORT_SYMBOL(kmalloc_dma_caches);
 #endif
@@ -968,7 +952,7 @@ EXPORT_SYMBOL(kmalloc_dma_caches);
 * of two cache sizes there. The size of larger slabs can be determined using
 * fls.
 */
-static s8 size_index[24] = {
+static u8 size_index[24] __ro_after_init = {
        3,      /* 8 */
        4,      /* 16 */
        5,      /* 24 */
@@ -995,7 +979,7 @@ static s8 size_index[24] = {
        2       /* 192 */
 };
-static inline int size_index_elem(size_t bytes)
+static inline unsigned int size_index_elem(unsigned int bytes)
 {
        return (bytes - 1) / 8;
 }
@@ -1006,7 +990,7 @@ static inline int size_index_elem(size_t bytes)
 */
 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
 {
-        int index;
+        unsigned int index;
        if (unlikely(size > KMALLOC_MAX_SIZE)) {
                WARN_ON_ONCE(!(flags & __GFP_NOWARN));
@@ -1064,13 +1048,13 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = {
 */
 void __init setup_kmalloc_cache_index_table(void)
 {
-        int i;
+        unsigned int i;
        BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
                (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
        for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
-                int elem = size_index_elem(i);
+                unsigned int elem = size_index_elem(i);
                if (elem >= ARRAY_SIZE(size_index))
                        break;
@@ -1137,9 +1121,9 @@ void __init create_kmalloc_caches(slab_flags_t flags)
                struct kmem_cache *s = kmalloc_caches[i];
                if (s) {
-                        int size = kmalloc_size(i);
+                        unsigned int size = kmalloc_size(i);
                        char *n = kasprintf(GFP_NOWAIT,
-                                 "dma-kmalloc-%d", size);
+                                 "dma-kmalloc-%u", size);
                        BUG_ON(!n);
                        kmalloc_dma_caches[i] = create_kmalloc_cache(n,
@@ -1182,10 +1166,10 @@ EXPORT_SYMBOL(kmalloc_order_trace);
 #ifdef CONFIG_SLAB_FREELIST_RANDOM
 /* Randomize a generic freelist */
 static void freelist_randomize(struct rnd_state *state, unsigned int *list,
-                        size_t count)
+                               unsigned int count)
 {
-        size_t i;
        unsigned int rand;
+        unsigned int i;
        for (i = 0; i < count; i++)
                list[i] = i;
@@ -1532,3 +1516,11 @@ EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
 EXPORT_TRACEPOINT_SYMBOL(kfree);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+{
+        if (__should_failslab(s, gfpflags))
+                return -ENOMEM;
+        return 0;
+}
+ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
diff --git a/mm/slub.c b/mm/slub.c
index e381728a3751..4fb037c98782 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -311,18 +311,18 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
                __p += (__s)->size, __idx++)
 /* Determine object index from a given position */
-static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
+static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
 {
        return (p - addr) / s->size;
 }
-static inline int order_objects(int order, unsigned long size, int reserved)
+static inline unsigned int order_objects(unsigned int order, unsigned int size, unsigned int reserved)
 {
-        return ((PAGE_SIZE << order) - reserved) / size;
+        return (((unsigned int)PAGE_SIZE << order) - reserved) / size;
 }
-static inline struct kmem_cache_order_objects oo_make(int order,
+static inline struct kmem_cache_order_objects oo_make(unsigned int order,
-                unsigned long size, int reserved)
+                unsigned int size, unsigned int reserved)
 {
        struct kmem_cache_order_objects x = {
                (order << OO_SHIFT) + order_objects(order, size, reserved)
@@ -331,12 +331,12 @@ static inline struct kmem_cache_order_objects oo_make(int order,
        return x;
 }
-static inline int oo_order(struct kmem_cache_order_objects x)
+static inline unsigned int oo_order(struct kmem_cache_order_objects x)
 {
        return x.x >> OO_SHIFT;
 }
-static inline int oo_objects(struct kmem_cache_order_objects x)
+static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
 {
        return x.x & OO_MASK;
 }
@@ -466,7 +466,7 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
                set_bit(slab_index(p, s, addr), map);
 }
-static inline int size_from_object(struct kmem_cache *s)
+static inline unsigned int size_from_object(struct kmem_cache *s)
 {
        if (s->flags & SLAB_RED_ZONE)
                return s->size - s->red_left_pad;
@@ -598,13 +598,13 @@ static void init_tracking(struct kmem_cache *s, void *object)
        set_track(s, object, TRACK_ALLOC, 0UL);
 }
-static void print_track(const char *s, struct track *t)
+static void print_track(const char *s, struct track *t, unsigned long pr_time)
 {
        if (!t->addr)
                return;
        pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
-               s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
+               s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
 #ifdef CONFIG_STACKTRACE
        {
                int i;
@@ -619,11 +619,12 @@ static void print_track(const char *s, struct track *t)
 static void print_tracking(struct kmem_cache *s, void *object)
 {
+        unsigned long pr_time = jiffies;
        if (!(s->flags & SLAB_STORE_USER))
                return;
-        print_track("Allocated", get_track(s, object, TRACK_ALLOC));
+        print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
-        print_track("Freed", get_track(s, object, TRACK_FREE));
+        print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
 }
 static void print_page_info(struct page *page)
@@ -680,7 +681,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
                print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
        print_section(KERN_ERR, "Object ", p,
-                      min_t(unsigned long, s->object_size, PAGE_SIZE));
+                      min_t(unsigned int, s->object_size, PAGE_SIZE));
        if (s->flags & SLAB_RED_ZONE)
                print_section(KERN_ERR, "Redzone ", p + s->object_size,
                        s->inuse - s->object_size);
@@ -1292,7 +1293,7 @@ out:
 __setup("slub_debug", setup_slub_debug);
-slab_flags_t kmem_cache_flags(unsigned long object_size,
+slab_flags_t kmem_cache_flags(unsigned int object_size,
        slab_flags_t flags, const char *name,
        void (*ctor)(void *))
 {
@@ -1325,7 +1326,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
                                        struct page *page) {}
 static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
                                        struct page *page) {}
-slab_flags_t kmem_cache_flags(unsigned long object_size,
+slab_flags_t kmem_cache_flags(unsigned int object_size,
        slab_flags_t flags, const char *name,
        void (*ctor)(void *))
 {
@@ -1435,7 +1436,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
                gfp_t flags, int node, struct kmem_cache_order_objects oo)
 {
        struct page *page;
-        int order = oo_order(oo);
+        unsigned int order = oo_order(oo);
        if (node == NUMA_NO_NODE)
                page = alloc_pages(flags, order);
@@ -1454,8 +1455,8 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
 /* Pre-initialize the random sequence cache */
 static int init_cache_random_seq(struct kmem_cache *s)
 {
+        unsigned int count = oo_objects(s->oo);
        int err;
-        unsigned long i, count = oo_objects(s->oo);
        /* Bailout if already initialised */
        if (s->random_seq)
@@ -1470,6 +1471,8 @@ static int init_cache_random_seq(struct kmem_cache *s)
        /* Transform to an offset on the set of pages */
        if (s->random_seq) {
+                unsigned int i;
                for (i = 0; i < count; i++)
                        s->random_seq[i] *= s->size;
        }
@@ -1811,7 +1814,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 {
        struct page *page, *page2;
        void *object = NULL;
-        int available = 0;
+        unsigned int available = 0;
        int objects;
        /*
@@ -2398,7 +2401,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
        pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
                nid, gfpflags, &gfpflags);
-        pr_warn("  cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n",
+        pr_warn("  cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
                s->name, s->object_size, s->size, oo_order(s->oo),
                oo_order(s->min));
@@ -3181,9 +3184,9 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk);
 * and increases the number of allocations possible without having to
 * take the list_lock.
 */
-static int slub_min_order;
+static unsigned int slub_min_order;
-static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
+static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
-static int slub_min_objects;
+static unsigned int slub_min_objects;
 /*
 * Calculate the order of allocation given an slab object size.
@@ -3210,20 +3213,21 @@ static int slub_min_objects;
 * requested a higher mininum order then we start with that one instead of
 * the smallest order which will fit the object.
 */
-static inline int slab_order(int size, int min_objects,
+static inline unsigned int slab_order(unsigned int size,
-                                int max_order, int fract_leftover, int reserved)
+                unsigned int min_objects, unsigned int max_order,
+                unsigned int fract_leftover, unsigned int reserved)
 {
-        int order;
+        unsigned int min_order = slub_min_order;
-        int rem;
+        unsigned int order;
-        int min_order = slub_min_order;
        if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
                return get_order(size * MAX_OBJS_PER_PAGE) - 1;
-        for (order = max(min_order, get_order(min_objects * size + reserved));
+        for (order = max(min_order, (unsigned int)get_order(min_objects * size + reserved));
                        order <= max_order; order++) {
-                unsigned long slab_size = PAGE_SIZE << order;
+                unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
+                unsigned int rem;
                rem = (slab_size - reserved) % size;
@@ -3234,12 +3238,11 @@ static inline int slab_order(int size, int min_objects,
        return order;
 }
-static inline int calculate_order(int size, int reserved)
+static inline int calculate_order(unsigned int size, unsigned int reserved)
 {
-        int order;
+        unsigned int order;
-        int min_objects;
+        unsigned int min_objects;
-        int fraction;
+        unsigned int max_objects;
-        int max_objects;
        /*
         * Attempt to find best configuration for a slab. This
@@ -3256,6 +3259,8 @@ static inline int calculate_order(int size, int reserved)
        min_objects = min(min_objects, max_objects);
        while (min_objects > 1) {
+                unsigned int fraction;
                fraction = 16;
                while (fraction >= 4) {
                        order = slab_order(size, min_objects,
@@ -3457,8 +3462,8 @@ static void set_cpu_partial(struct kmem_cache *s)
 static int calculate_sizes(struct kmem_cache *s, int forced_order)
 {
        slab_flags_t flags = s->flags;
-        size_t size = s->object_size;
+        unsigned int size = s->object_size;
-        int order;
+        unsigned int order;
        /*
         * Round up object size to the next word boundary. We can only
@@ -3548,7 +3553,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
        else
                order = calculate_order(size, s->reserved);
-        if (order < 0)
+        if ((int)order < 0)
                return 0;
        s->allocflags = 0;
@@ -3632,8 +3637,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
        free_kmem_cache_nodes(s);
 error:
        if (flags & SLAB_PANIC)
-                panic("Cannot create slab %s size=%lu realsize=%u order=%u offset=%u flags=%lx\n",
+                panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n",
-                      s->name, (unsigned long)s->size, s->size,
+                      s->name, s->size, s->size,
                      oo_order(s->oo), s->offset, (unsigned long)flags);
        return -EINVAL;
 }
@@ -3691,6 +3696,17 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
                discard_slab(s, page);
 }
+bool __kmem_cache_empty(struct kmem_cache *s)
+{
+        int node;
+        struct kmem_cache_node *n;
+        for_each_kmem_cache_node(s, node, n)
+                if (n->nr_partial || slabs_node(s, node))
+                        return false;
+        return true;
+}
 /*
 * Release all resources used by a slab cache.
 */
@@ -3716,7 +3732,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
 static int __init setup_slub_min_order(char *str)
 {
-        get_option(&str, &slub_min_order);
+        get_option(&str, (int *)&slub_min_order);
        return 1;
 }
@@ -3725,8 +3741,8 @@ __setup("slub_min_order=", setup_slub_min_order);
 static int __init setup_slub_max_order(char *str)
 {
-        get_option(&str, &slub_max_order);
+        get_option(&str, (int *)&slub_max_order);
-        slub_max_order = min(slub_max_order, MAX_ORDER - 1);
+        slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
        return 1;
 }
@@ -3735,7 +3751,7 @@ __setup("slub_max_order=", setup_slub_max_order);
 static int __init setup_slub_min_objects(char *str)
 {
-        get_option(&str, &slub_min_objects);
+        get_option(&str, (int *)&slub_min_objects);
        return 1;
 }
@@ -3824,7 +3840,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
                         bool to_user)
 {
        struct kmem_cache *s;
-        unsigned long offset;
+        unsigned int offset;
        size_t object_size;
        /* Find object and usable object size. */
@@ -4230,7 +4246,7 @@ void __init kmem_cache_init(void)
        cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
                                  slub_cpu_dead);
-        pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%u, Nodes=%d\n",
+        pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%d\n",
                cache_line_size(),
                slub_min_order, slub_max_order, slub_min_objects,
                nr_cpu_ids, nr_node_ids);
@@ -4241,7 +4257,7 @@ void __init kmem_cache_init_late(void)
 }
 struct kmem_cache *
-__kmem_cache_alias(const char *name, size_t size, size_t align,
+__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
                   slab_flags_t flags, void (*ctor)(void *))
 {
        struct kmem_cache *s, *c;
@@ -4254,13 +4270,12 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
                 * Adjust the object sizes so that we clear
                 * the complete object on kzalloc.
                 */
-                s->object_size = max(s->object_size, (int)size);
+                s->object_size = max(s->object_size, size);
-                s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
+                s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
                for_each_memcg_cache(c, s) {
                        c->object_size = s->object_size;
-                        c->inuse = max_t(int, c->inuse,
+                        c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
-                                         ALIGN(size, sizeof(void *)));
                }
                if (sysfs_slab_alias(s, name)) {
@@ -4889,35 +4904,35 @@ struct slab_attribute {
 static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%d\n", s->size);
+        return sprintf(buf, "%u\n", s->size);
 }
 SLAB_ATTR_RO(slab_size);
 static ssize_t align_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%d\n", s->align);
+        return sprintf(buf, "%u\n", s->align);
 }
 SLAB_ATTR_RO(align);
 static ssize_t object_size_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%d\n", s->object_size);
+        return sprintf(buf, "%u\n", s->object_size);
 }
 SLAB_ATTR_RO(object_size);
 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%d\n", oo_objects(s->oo));
+        return sprintf(buf, "%u\n", oo_objects(s->oo));
 }
 SLAB_ATTR_RO(objs_per_slab);
 static ssize_t order_store(struct kmem_cache *s,
                                const char *buf, size_t length)
 {
-        unsigned long order;
+        unsigned int order;
        int err;
-        err = kstrtoul(buf, 10, &order);
+        err = kstrtouint(buf, 10, &order);
        if (err)
                return err;
@@ -4930,7 +4945,7 @@ static ssize_t order_store(struct kmem_cache *s,
 static ssize_t order_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%d\n", oo_order(s->oo));
+        return sprintf(buf, "%u\n", oo_order(s->oo));
 }
 SLAB_ATTR(order);
@@ -4962,10 +4977,10 @@ static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
                                 size_t length)
 {
-        unsigned long objects;
+        unsigned int objects;
        int err;
-        err = kstrtoul(buf, 10, &objects);
+        err = kstrtouint(buf, 10, &objects);
        if (err)
                return err;
        if (objects && !kmem_cache_has_cpu_partial(s))
@@ -5081,7 +5096,7 @@ SLAB_ATTR_RO(cache_dma);
 static ssize_t usersize_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%zu\n", s->usersize);
+        return sprintf(buf, "%u\n", s->usersize);
 }
 SLAB_ATTR_RO(usersize);
@@ -5093,7 +5108,7 @@ SLAB_ATTR_RO(destroy_by_rcu);
 static ssize_t reserved_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%d\n", s->reserved);
+        return sprintf(buf, "%u\n", s->reserved);
 }
 SLAB_ATTR_RO(reserved);
@@ -5288,21 +5303,22 @@ SLAB_ATTR(shrink);
 #ifdef CONFIG_NUMA
 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
 {
-        return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
+        return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10);
 }
 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
                                const char *buf, size_t length)
 {
-        unsigned long ratio;
+        unsigned int ratio;
        int err;
-        err = kstrtoul(buf, 10, &ratio);
+        err = kstrtouint(buf, 10, &ratio);
        if (err)
                return err;
+        if (ratio > 100)
+                return -ERANGE;
-        if (ratio <= 100)
+        s->remote_node_defrag_ratio = ratio * 10;
-                s->remote_node_defrag_ratio = ratio * 10;
        return length;
 }
@@ -5663,7 +5679,7 @@ static char *create_unique_id(struct kmem_cache *s)
                *p++ = 'A';
        if (p != name + 1)
                *p++ = '-';
-        p += sprintf(p, "%07d", s->size);
+        p += sprintf(p, "%07u", s->size);
        BUG_ON(p > name + ID_STR_LENGTH - 1);
        return name;
diff --git a/mm/sparse.c b/mm/sparse.c
index 58cab483e81b..62eef264a7bd 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -779,7 +779,13 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
                goto out;
        }
-        memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
+#ifdef CONFIG_DEBUG_VM
+        /*
+         * Poison uninitialized struct pages in order to catch invalid flags
+         * combinations.
+         */
+        memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION);
+#endif
        section_mark_present(ms);
diff --git a/mm/swap.c b/mm/swap.c
index 0f17330dd0e5..3dd518832096 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -707,7 +707,6 @@ void lru_add_drain_all(void)
 * release_pages - batched put_page()
 * @pages: array of pages to release
 * @nr: number of pages
- * @cold: whether the pages are cache cold
 *
 * Decrement the reference count on all the pages in @pages.  If it
 * fell to zero, remove the page from the LRU and free it.
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index bebc19292018..f2641894f440 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -34,8 +34,6 @@
 #include <linux/mutex.h>
 #include <linux/mm.h>
-#ifdef CONFIG_SWAP
 static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
 static bool     swap_slot_cache_active;
 bool    swap_slot_cache_enabled;
@@ -356,5 +354,3 @@ repeat:
        return entry;
 }
-#endif /* CONFIG_SWAP */
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 39ae7cfad90f..f233dccd3b1b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,7 +38,7 @@ static const struct address_space_operations swap_aops = {
 struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
 static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
-bool swap_vma_readahead __read_mostly = true;
+static bool enable_vma_readahead __read_mostly = true;
 #define SWAP_RA_WIN_SHIFT       (PAGE_SHIFT / 2)
 #define SWAP_RA_HITS_MASK       ((1UL << SWAP_RA_WIN_SHIFT) - 1)
@@ -322,6 +322,11 @@ void free_pages_and_swap_cache(struct page **pages, int nr)
        release_pages(pagep, nr);
 }
+static inline bool swap_use_vma_readahead(void)
+{
+        return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap);
+}
 /*
 * Lookup a swap entry in the swap cache. A found page will be returned
 * unlocked and with its refcount incremented - we rely on the kernel
@@ -332,32 +337,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
                               unsigned long addr)
 {
        struct page *page;
-        unsigned long ra_info;
-        int win, hits, readahead;
        page = find_get_page(swap_address_space(entry), swp_offset(entry));
        INC_CACHE_INFO(find_total);
        if (page) {
+                bool vma_ra = swap_use_vma_readahead();
+                bool readahead;
                INC_CACHE_INFO(find_success);
+                /*
+                 * At the moment, we don't support PG_readahead for anon THP
+                 * so let's bail out rather than confusing the readahead stat.
+                 */
                if (unlikely(PageTransCompound(page)))
                        return page;
                readahead = TestClearPageReadahead(page);
-                if (vma) {
+                if (vma && vma_ra) {
-                        ra_info = GET_SWAP_RA_VAL(vma);
+                        unsigned long ra_val;
-                        win = SWAP_RA_WIN(ra_info);
+                        int win, hits;
-                        hits = SWAP_RA_HITS(ra_info);
+                        ra_val = GET_SWAP_RA_VAL(vma);
+                        win = SWAP_RA_WIN(ra_val);
+                        hits = SWAP_RA_HITS(ra_val);
                        if (readahead)
                                hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
                        atomic_long_set(&vma->swap_readahead_info,
                                        SWAP_RA_VAL(addr, win, hits));
                }
                if (readahead) {
                        count_vm_event(SWAP_RA_HIT);
-                        if (!vma)
+                        if (!vma || !vma_ra)
                                atomic_inc(&swapin_readahead_hits);
                }
        }
        return page;
 }
@@ -533,11 +549,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
 }
 /**
- * swapin_readahead - swap in pages in hope we need them soon
+ * swap_cluster_readahead - swap in pages in hope we need them soon
 * @entry: swap entry of this memory
 * @gfp_mask: memory allocation flags
- * @vma: user vma this address belongs to
+ * @vmf: fault information
- * @addr: target address for mempolicy
 *
 * Returns the struct page for entry and addr, after queueing swapin.
 *
@@ -549,10 +564,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
 * This has been extended to use the NUMA policies from the mm triggering
 * the readahead.
 *
- * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
+ * Caller must hold down_read on the vma->vm_mm if vmf->vma is not NULL.
 */
-struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
+struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
-                        struct vm_area_struct *vma, unsigned long addr)
+                                struct vm_fault *vmf)
 {
        struct page *page;
        unsigned long entry_offset = swp_offset(entry);
@@ -562,6 +577,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
        struct swap_info_struct *si = swp_swap_info(entry);
        struct blk_plug plug;
        bool do_poll = true, page_allocated;
+        struct vm_area_struct *vma = vmf->vma;
+        unsigned long addr = vmf->address;
        mask = swapin_nr_pages(offset) - 1;
        if (!mask)
@@ -586,8 +603,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
                        continue;
                if (page_allocated) {
                        swap_readpage(page, false);
-                        if (offset != entry_offset &&
+                        if (offset != entry_offset) {
-                            likely(!PageTransCompound(page))) {
                                SetPageReadahead(page);
                                count_vm_event(SWAP_RA);
                        }
@@ -649,16 +665,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
                    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
 }
-struct page *swap_readahead_detect(struct vm_fault *vmf,
+static void swap_ra_info(struct vm_fault *vmf,
-                                   struct vma_swap_readahead *swap_ra)
+                        struct vma_swap_readahead *ra_info)
 {
        struct vm_area_struct *vma = vmf->vma;
-        unsigned long swap_ra_info;
+        unsigned long ra_val;
-        struct page *page;
        swp_entry_t entry;
        unsigned long faddr, pfn, fpfn;
        unsigned long start, end;
-        pte_t *pte;
+        pte_t *pte, *orig_pte;
        unsigned int max_win, hits, prev_win, win, left;
 #ifndef CONFIG_64BIT
        pte_t *tpte;
@@ -667,30 +682,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
        max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
                             SWAP_RA_ORDER_CEILING);
        if (max_win == 1) {
-                swap_ra->win = 1;
+                ra_info->win = 1;
-                return NULL;
+                return;
        }
        faddr = vmf->address;
-        entry = pte_to_swp_entry(vmf->orig_pte);
+        orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
-        if ((unlikely(non_swap_entry(entry))))
+        entry = pte_to_swp_entry(*pte);
-                return NULL;
+        if ((unlikely(non_swap_entry(entry)))) {
-        page = lookup_swap_cache(entry, vma, faddr);
+                pte_unmap(orig_pte);
-        if (page)
+                return;
-                return page;
+        }
        fpfn = PFN_DOWN(faddr);
-        swap_ra_info = GET_SWAP_RA_VAL(vma);
+        ra_val = GET_SWAP_RA_VAL(vma);
-        pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
+        pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
-        prev_win = SWAP_RA_WIN(swap_ra_info);
+        prev_win = SWAP_RA_WIN(ra_val);
-        hits = SWAP_RA_HITS(swap_ra_info);
+        hits = SWAP_RA_HITS(ra_val);
-        swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
+        ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
                                               max_win, prev_win);
        atomic_long_set(&vma->swap_readahead_info,
                        SWAP_RA_VAL(faddr, win, 0));
-        if (win == 1)
+        if (win == 1) {
-                return NULL;
+                pte_unmap(orig_pte);
+                return;
+        }
        /* Copy the PTEs because the page table may be unmapped */
        if (fpfn == pfn + 1)
@@ -703,23 +720,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
                swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
                                  &start, &end);
        }
-        swap_ra->nr_pte = end - start;
+        ra_info->nr_pte = end - start;
-        swap_ra->offset = fpfn - start;
+        ra_info->offset = fpfn - start;
-        pte = vmf->pte - swap_ra->offset;
+        pte -= ra_info->offset;
 #ifdef CONFIG_64BIT
-        swap_ra->ptes = pte;
+        ra_info->ptes = pte;
 #else
-        tpte = swap_ra->ptes;
+        tpte = ra_info->ptes;
        for (pfn = start; pfn != end; pfn++)
                *tpte++ = *pte++;
 #endif
+        pte_unmap(orig_pte);
-        return NULL;
 }
-struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
+static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
-                                    struct vm_fault *vmf,
+                                       struct vm_fault *vmf)
-                                    struct vma_swap_readahead *swap_ra)
 {
        struct blk_plug plug;
        struct vm_area_struct *vma = vmf->vma;
@@ -728,12 +743,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
        swp_entry_t entry;
        unsigned int i;
        bool page_allocated;
+        struct vma_swap_readahead ra_info = {0,};
-        if (swap_ra->win == 1)
+        swap_ra_info(vmf, &ra_info);
+        if (ra_info.win == 1)
                goto skip;
        blk_start_plug(&plug);
-        for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
+        for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
             i++, pte++) {
                pentry = *pte;
                if (pte_none(pentry))
@@ -749,8 +766,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
                        continue;
                if (page_allocated) {
                        swap_readpage(page, false);
-                        if (i != swap_ra->offset &&
+                        if (i != ra_info.offset) {
-                            likely(!PageTransCompound(page))) {
                                SetPageReadahead(page);
                                count_vm_event(SWAP_RA);
                        }
@@ -761,23 +777,43 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
        lru_add_drain();
 skip:
        return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
-                                     swap_ra->win == 1);
+                                     ra_info.win == 1);
+}
+/**
+ * swapin_readahead - swap in pages in hope we need them soon
+ * @entry: swap entry of this memory
+ * @gfp_mask: memory allocation flags
+ * @vmf: fault information
+ *
+ * Returns the struct page for entry and addr, after queueing swapin.
+ *
+ * It's a main entry function for swap readahead. By the configuration,
+ * it will read ahead blocks by cluster-based(ie, physical disk based)
+ * or vma-based(ie, virtual address based on faulty address) readahead.
+ */
+struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
+                                struct vm_fault *vmf)
+{
+        return swap_use_vma_readahead() ?
+                        swap_vma_readahead(entry, gfp_mask, vmf) :
+                        swap_cluster_readahead(entry, gfp_mask, vmf);
 }
 #ifdef CONFIG_SYSFS
 static ssize_t vma_ra_enabled_show(struct kobject *kobj,
                                     struct kobj_attribute *attr, char *buf)
 {
-        return sprintf(buf, "%s\n", swap_vma_readahead ? "true" : "false");
+        return sprintf(buf, "%s\n", enable_vma_readahead ? "true" : "false");
 }
 static ssize_t vma_ra_enabled_store(struct kobject *kobj,
                                      struct kobj_attribute *attr,
                                      const char *buf, size_t count)
 {
        if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
-                swap_vma_readahead = true;
+                enable_vma_readahead = true;
        else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
-                swap_vma_readahead = false;
+                enable_vma_readahead = false;
        else
                return -EINVAL;
diff --git a/mm/util.c b/mm/util.c
index c1250501364f..029fc2f3b395 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -515,6 +515,16 @@ struct address_space *page_mapping(struct page *page)
 }
 EXPORT_SYMBOL(page_mapping);
+/*
+ * For file cache pages, return the address_space, otherwise return NULL
+ */
+struct address_space *page_mapping_file(struct page *page)
+{
+        if (unlikely(PageSwapCache(page)))
+                return NULL;
+        return page_mapping(page);
+}
 /* Slow path of page_mapcount() for compound pages */
 int __page_mapcount(struct page *page)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cd5dc3faaa57..4390a8d5be41 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -442,16 +442,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
        if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)))
                return 0;
-        if (!down_read_trylock(&shrinker_rwsem)) {
+        if (!down_read_trylock(&shrinker_rwsem))
-                /*
-                 * If we would return 0, our callers would understand that we
-                 * have nothing else to shrink and give up trying. By returning
-                 * 1 we keep it going and assume we'll be able to shrink next
-                 * time.
-                 */
-                freed = 1;
                goto out;
-        }
        list_for_each_entry(shrinker, &shrinker_list, list) {
                struct shrink_control sc = {
@@ -3547,16 +3539,21 @@ kswapd_try_sleep:
 }
 /*
- * A zone is low on free memory, so wake its kswapd task to service it.
+ * A zone is low on free memory or too fragmented for high-order memory.  If
+ * kswapd should reclaim (direct reclaim is deferred), wake it up for the zone's
+ * pgdat.  It will wake up kcompactd after reclaiming memory.  If kswapd reclaim
+ * has failed or is not needed, still wake up kcompactd if only compaction is
+ * needed.
 */
-void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
+void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
+                   enum zone_type classzone_idx)
 {
        pg_data_t *pgdat;
        if (!managed_zone(zone))
                return;
-        if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
+        if (!cpuset_zone_allowed(zone, gfp_flags))
                return;
        pgdat = zone->zone_pgdat;
        pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat,
@@ -3565,14 +3562,23 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
        if (!waitqueue_active(&pgdat->kswapd_wait))
                return;
-        /* Hopeless node, leave it to direct reclaim */
+        /* Hopeless node, leave it to direct reclaim if possible */
-        if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+        if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ||
-                return;
+            pgdat_balanced(pgdat, order, classzone_idx)) {
+                /*
-        if (pgdat_balanced(pgdat, order, classzone_idx))
+                 * There may be plenty of free memory available, but it's too
+                 * fragmented for high-order allocations.  Wake up kcompactd
+                 * and rely on compaction_suitable() to determine if it's
+                 * needed.  If it fails, it will defer subsequent attempts to
+                 * ratelimit its work.
+                 */
+                if (!(gfp_flags & __GFP_DIRECT_RECLAIM))
+                        wakeup_kcompactd(pgdat, order, classzone_idx);
                return;
+        }
-        trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order);
+        trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order,
+                                      gfp_flags);
        wake_up_interruptible(&pgdat->kswapd_wait);
 }
@@ -3877,7 +3883,13 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
 */
 int page_evictable(struct page *page)
 {
-        return !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
+        int ret;
+        /* Prevent address_space of inode and swap cache from being freed */
+        rcu_read_lock();
+        ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
+        rcu_read_unlock();
+        return ret;
 }
 #ifdef CONFIG_SHMEM
diff --git a/mm/z3fold.c b/mm/z3fold.c
index d589d318727f..f579ad4a8100 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -620,24 +620,27 @@ lookup:
                bud = FIRST;
        }
-        spin_lock(&pool->stale_lock);
+        page = NULL;
-        zhdr = list_first_entry_or_null(&pool->stale,
+        if (can_sleep) {
-                                        struct z3fold_header, buddy);
+                spin_lock(&pool->stale_lock);
-        /*
+                zhdr = list_first_entry_or_null(&pool->stale,
-         * Before allocating a page, let's see if we can take one from the
+                                                struct z3fold_header, buddy);
-         * stale pages list. cancel_work_sync() can sleep so we must make
+                /*
-         * sure it won't be called in case we're in atomic context.
+                 * Before allocating a page, let's see if we can take one from
-         */
+                 * the stale pages list. cancel_work_sync() can sleep so we
-        if (zhdr && (can_sleep || !work_pending(&zhdr->work))) {
+                 * limit this case to the contexts where we can sleep
-                list_del(&zhdr->buddy);
+                 */
-                spin_unlock(&pool->stale_lock);
+                if (zhdr) {
-                if (can_sleep)
+                        list_del(&zhdr->buddy);
+                        spin_unlock(&pool->stale_lock);
                        cancel_work_sync(&zhdr->work);
-                page = virt_to_page(zhdr);
+                        page = virt_to_page(zhdr);
-        } else {
+                } else {
-                spin_unlock(&pool->stale_lock);
+                        spin_unlock(&pool->stale_lock);
-                page = alloc_page(gfp);
+                }
        }
+        if (!page)
+                page = alloc_page(gfp);
        if (!page)
                return -ENOMEM;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b7f61cd1c709..61cb05dc950c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -193,6 +193,7 @@ static struct vfsmount *zsmalloc_mnt;
 * (see: fix_fullness_group())
 */
 static const int fullness_threshold_frac = 4;
+static size_t huge_class_size;
 struct size_class {
        spinlock_t lock;
@@ -642,18 +643,7 @@ static int zs_stats_size_show(struct seq_file *s, void *v)
        return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(zs_stats_size);
-static int zs_stats_size_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, zs_stats_size_show, inode->i_private);
-}
-static const struct file_operations zs_stat_size_ops = {
-        .open           = zs_stats_size_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = single_release,
-};
 static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
 {
@@ -672,7 +662,7 @@ static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
        pool->stat_dentry = entry;
        entry = debugfs_create_file("classes", S_IFREG | S_IRUGO,
-                        pool->stat_dentry, pool, &zs_stat_size_ops);
+                        pool->stat_dentry, pool, &zs_stats_size_fops);
        if (!entry) {
                pr_warn("%s: debugfs file entry <%s> creation failed\n",
                                name, "classes");
@@ -861,6 +851,7 @@ static struct page *get_next_page(struct page *page)
 /**
 * obj_to_location - get (<page>, <obj_idx>) from encoded object value
+ * @obj: the encoded object value
 * @page: page object resides in zspage
 * @obj_idx: object index
 */
@@ -1311,6 +1302,7 @@ EXPORT_SYMBOL_GPL(zs_get_total_pages);
 * zs_map_object - get address of allocated object from handle.
 * @pool: pool from which the object was allocated
 * @handle: handle returned from zs_malloc
+ * @mm: maping mode to use
 *
 * Before using an object allocated from zs_malloc, it must be mapped using
 * this function. When done with the object, it must be unmapped using
@@ -1418,6 +1410,25 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
+/**
+ * zs_huge_class_size() - Returns the size (in bytes) of the first huge
+ *                        zsmalloc &size_class.
+ * @pool: zsmalloc pool to use
+ *
+ * The function returns the size of the first huge class - any object of equal
+ * or bigger size will be stored in zspage consisting of a single physical
+ * page.
+ *
+ * Context: Any context.
+ *
+ * Return: the size (in bytes) of the first huge zsmalloc &size_class.
+ */
+size_t zs_huge_class_size(struct zs_pool *pool)
+{
+        return huge_class_size;
+}
+EXPORT_SYMBOL_GPL(zs_huge_class_size);
 static unsigned long obj_malloc(struct size_class *class,
                                struct zspage *zspage, unsigned long handle)
 {
@@ -2375,6 +2386,27 @@ struct zs_pool *zs_create_pool(const char *name)
                objs_per_zspage = pages_per_zspage * PAGE_SIZE / size;
                /*
+                 * We iterate from biggest down to smallest classes,
+                 * so huge_class_size holds the size of the first huge
+                 * class. Any object bigger than or equal to that will
+                 * endup in the huge class.
+                 */
+                if (pages_per_zspage != 1 && objs_per_zspage != 1 &&
+                                !huge_class_size) {
+                        huge_class_size = size;
+                        /*
+                         * The object uses ZS_HANDLE_SIZE bytes to store the
+                         * handle. We need to subtract it, because zs_malloc()
+                         * unconditionally adds handle size before it performs
+                         * size class search - so object may be smaller than
+                         * huge class size, yet it still can end up in the huge
+                         * class because it grows by ZS_HANDLE_SIZE extra bytes
+                         * right before class lookup.
+                         */
+                        huge_class_size -= (ZS_HANDLE_SIZE - 1);
+                }
+                /*
                 * size_class is used for normal zsmalloc operation such
                 * as alloc/free for that size. Although it is natural that we
                 * have one size_class for each size, there is a chance that we
diff --git a/net/9p/client.c b/net/9p/client.c
index b433aff5ff13..21e6df1cc70f 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -190,7 +190,9 @@ static int parse_opts(char *opts, struct p9_client *clnt)
                                p9_debug(P9_DEBUG_ERROR,
                                         "problem allocating copy of trans arg\n");
                                goto free_and_return;
-                         }
+                        }
+                        v9fs_put_trans(clnt->trans_mod);
                        clnt->trans_mod = v9fs_get_trans_by_name(s);
                        if (clnt->trans_mod == NULL) {
                                pr_info("Could not find request transport: %s\n",
@@ -226,6 +228,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
        }
 free_and_return:
+        v9fs_put_trans(clnt->trans_mod);
        kfree(tmp_options);
        return ret;
 }
@@ -769,7 +772,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
        if (err < 0) {
                if (err != -ERESTARTSYS && err != -EFAULT)
                        c->status = Disconnected;
-                goto reterr;
+                goto recalc_sigpending;
        }
 again:
        /* Wait for the response */
@@ -804,6 +807,7 @@ again:
                if (req->status == REQ_STATUS_RCVD)
                        err = 0;
        }
+recalc_sigpending:
        if (sigpending) {
                spin_lock_irqsave(&current->sighand->siglock, flags);
                recalc_sigpending();
@@ -867,7 +871,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
                if (err == -EIO)
                        c->status = Disconnected;
                if (err != -ERESTARTSYS)
-                        goto reterr;
+                        goto recalc_sigpending;
        }
        if (req->status == REQ_STATUS_ERROR) {
                p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
@@ -885,6 +889,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
                if (req->status == REQ_STATUS_RCVD)
                        err = 0;
        }
+recalc_sigpending:
        if (sigpending) {
                spin_lock_irqsave(&current->sighand->siglock, flags);
                recalc_sigpending();
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b3b609f0eeb5..b1a2c5e38530 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -15,7 +15,6 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kmemleak.h>
 #include <net/ip.h>
 #include <net/sock.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8322e479f299..594a1c605c92 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -108,7 +108,6 @@
 #include <net/rtnetlink.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
-#include <linux/kmemleak.h>
 #endif
 #include <net/secure_seq.h>
 #include <net/ip_tunnels.h>
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 7721d5b2b0c0..9e5735a4d3a5 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -163,7 +163,17 @@ __faddr2line() {
                # pass real address to addr2line
                echo "$func+$offset/$sym_size:"
-                ${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
+                local file_lines=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;")
+                [[ -z $file_lines ]] && return
+                # show each line with context
+                echo "$file_lines" | while read -r line
+                do
+                        echo $line
+                        eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}')
+                        awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f
+                done
                DONE=1
        done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }')
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 9a65eeaf7dfa..6134302c143c 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -23,7 +23,6 @@
 #include <linux/sysctl.h>
 #include <linux/audit.h>
 #include <linux/user_namespace.h>
-#include <linux/kmemleak.h>
 #include <net/sock.h>
 #include "include/apparmor.h"
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index fa728f662a6f..933623784ccd 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -18,6 +18,7 @@
 #include <linux/err.h>
 #include <linux/scatterlist.h>
 #include <linux/random.h>
+#include <linux/vmalloc.h>
 #include <keys/user-type.h>
 #include <keys/big_key-type.h>
 #include <crypto/aead.h>
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-04-06 17:19:26 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-04-06 17:19:26 -0400
commit	3b54765cca23152ec0cc254b75c877c10f6e2870 (patch)
tree	795785d2a9d7498df9452be138867bd996c4cea5
parent	3fd14cdcc05a682b03743683ce3a726898b20555 (diff)
parent	97b1255cb27c551d7c3c5c496d787da40772da99 (diff)