20 files changed, 815 insertions, 138 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 527136b22384..f4e516e9c37c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -128,6 +128,9 @@ config SPARSEMEM_VMEMMAP
         pfn_to_page and page_to_pfn operations.  This is the most
         efficient option when sufficient kernel resources are available.
+config HAVE_MEMBLOCK
+        boolean
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
        bool "Allow for memory hot-add"
diff --git a/mm/Makefile b/mm/Makefile
index 8982504bd03b..34b2546a9e37 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -15,6 +15,8 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
                           $(mmu-y)
 obj-y += init-mm.o
+obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
 obj-$(CONFIG_BOUNCE)    += bounce.o
 obj-$(CONFIG_SWAP)      += page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HAS_DMA)   += dmapool.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 660a87a22511..f9fd3dd3916b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -104,15 +104,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
                   "b_more_io:        %8lu\n"
                   "bdi_list:         %8u\n"
                   "state:            %8lx\n"
-                   "wb_mask:          %8lx\n"
+                   "wb_list:          %8u\n",
-                   "wb_list:          %8u\n"
-                   "wb_cnt:           %8u\n",
                   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
                   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
                   K(bdi_thresh), K(dirty_thresh),
                   K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
-                   !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
+                   !list_empty(&bdi->bdi_list), bdi->state,
-                   !list_empty(&bdi->wb_list), bdi->wb_cnt);
+                   !list_empty(&bdi->wb_list));
 #undef K
        return 0;
@@ -340,14 +338,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
 static void bdi_flush_io(struct backing_dev_info *bdi)
 {
        struct writeback_control wbc = {
-                .bdi                    = bdi,
                .sync_mode              = WB_SYNC_NONE,
                .older_than_this        = NULL,
                .range_cyclic           = 1,
                .nr_to_write            = 1024,
        };
-        writeback_inodes_wbc(&wbc);
+        writeback_inodes_wb(&bdi->wb, &wbc);
 }
 /*
@@ -668,19 +665,12 @@ int bdi_init(struct backing_dev_info *bdi)
        bdi->max_ratio = 100;
        bdi->max_prop_frac = PROP_FRAC_BASE;
        spin_lock_init(&bdi->wb_lock);
-        INIT_RCU_HEAD(&bdi->rcu_head);
        INIT_LIST_HEAD(&bdi->bdi_list);
        INIT_LIST_HEAD(&bdi->wb_list);
        INIT_LIST_HEAD(&bdi->work_list);
        bdi_wb_init(&bdi->wb, bdi);
-        /*
-         * Just one thread support for now, hard code mask and count
-         */
-        bdi->wb_mask = 1;
-        bdi->wb_cnt = 1;
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
                err = percpu_counter_init(&bdi->bdi_stat[i], 0);
                if (err)
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 58c66cc5056a..142c84a54993 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
                                   unsigned long align, unsigned long goal)
 {
+        void *ptr;
        if (WARN_ON_ONCE(slab_is_available()))
                return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 #ifdef CONFIG_NO_BOOTMEM
-        return __alloc_memory_core_early(pgdat->node_id, size, align,
+        ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+                                         goal, -1ULL);
+        if (ptr)
+                return ptr;
+        ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
                                         goal, -1ULL);
 #else
-        return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+        ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
 #endif
+        return ptr;
 }
 void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
                                       unsigned long align, unsigned long goal)
 {
+        void *ptr;
        if (WARN_ON_ONCE(slab_is_available()))
                return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 #ifdef CONFIG_NO_BOOTMEM
-        return __alloc_memory_core_early(pgdat->node_id, size, align,
+        ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+                                goal, ARCH_LOW_ADDRESS_LIMIT);
+        if (ptr)
+                return ptr;
+        ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
                                goal, ARCH_LOW_ADDRESS_LIMIT);
 #else
-        return ___alloc_bootmem_node(pgdat->bdata, size, align,
+        ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
                                goal, ARCH_LOW_ADDRESS_LIMIT);
 #endif
+        return ptr;
 }
diff --git a/mm/highmem.c b/mm/highmem.c
index 66baa20f78f5..7a0aa1be4993 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
+#include <linux/kgdb.h>
 #include <asm/tlbflush.h>
 /*
@@ -470,6 +471,12 @@ void debug_kmap_atomic(enum km_type type)
                        warn_count--;
                }
        }
+#ifdef CONFIG_KGDB_KDB
+        if (unlikely(type == KM_KDB && atomic_read(&kgdb_active) == -1)) {
+                WARN_ON(1);
+                warn_count--;
+        }
+#endif /* CONFIG_KGDB_KDB */
 }
 #endif
diff --git a/mm/memblock.c b/mm/memblock.c
new file mode 100644
index 000000000000..3024eb30fc27
--- /dev/null
+++ b/mm/memblock.c
@@ -0,0 +1,541 @@
+/*
+ * Procedures for maintaining information about logical memory blocks.
+ *
+ * Peter Bergner, IBM Corp.     June 2001.
+ * Copyright (C) 2001 Peter Bergner.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/memblock.h>
+#define MEMBLOCK_ALLOC_ANYWHERE 0
+struct memblock memblock;
+static int memblock_debug;
+static int __init early_memblock(char *p)
+{
+        if (p && strstr(p, "debug"))
+                memblock_debug = 1;
+        return 0;
+}
+early_param("memblock", early_memblock);
+static void memblock_dump(struct memblock_region *region, char *name)
+{
+        unsigned long long base, size;
+        int i;
+        pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
+        for (i = 0; i < region->cnt; i++) {
+                base = region->region[i].base;
+                size = region->region[i].size;
+                pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
+                    name, i, base, base + size - 1, size);
+        }
+}
+void memblock_dump_all(void)
+{
+        if (!memblock_debug)
+                return;
+        pr_info("MEMBLOCK configuration:\n");
+        pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)memblock.rmo_size);
+        pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size);
+        memblock_dump(&memblock.memory, "memory");
+        memblock_dump(&memblock.reserved, "reserved");
+}
+static unsigned long memblock_addrs_overlap(u64 base1, u64 size1, u64 base2,
+                                        u64 size2)
+{
+        return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
+}
+static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
+{
+        if (base2 == base1 + size1)
+                return 1;
+        else if (base1 == base2 + size2)
+                return -1;
+        return 0;
+}
+static long memblock_regions_adjacent(struct memblock_region *rgn,
+                unsigned long r1, unsigned long r2)
+{
+        u64 base1 = rgn->region[r1].base;
+        u64 size1 = rgn->region[r1].size;
+        u64 base2 = rgn->region[r2].base;
+        u64 size2 = rgn->region[r2].size;
+        return memblock_addrs_adjacent(base1, size1, base2, size2);
+}
+static void memblock_remove_region(struct memblock_region *rgn, unsigned long r)
+{
+        unsigned long i;
+        for (i = r; i < rgn->cnt - 1; i++) {
+                rgn->region[i].base = rgn->region[i + 1].base;
+                rgn->region[i].size = rgn->region[i + 1].size;
+        }
+        rgn->cnt--;
+}
+/* Assumption: base addr of region 1 < base addr of region 2 */
+static void memblock_coalesce_regions(struct memblock_region *rgn,
+                unsigned long r1, unsigned long r2)
+{
+        rgn->region[r1].size += rgn->region[r2].size;
+        memblock_remove_region(rgn, r2);
+}
+void __init memblock_init(void)
+{
+        /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
+         * This simplifies the memblock_add() code below...
+         */
+        memblock.memory.region[0].base = 0;
+        memblock.memory.region[0].size = 0;
+        memblock.memory.cnt = 1;
+        /* Ditto. */
+        memblock.reserved.region[0].base = 0;
+        memblock.reserved.region[0].size = 0;
+        memblock.reserved.cnt = 1;
+}
+void __init memblock_analyze(void)
+{
+        int i;
+        memblock.memory.size = 0;
+        for (i = 0; i < memblock.memory.cnt; i++)
+                memblock.memory.size += memblock.memory.region[i].size;
+}
+static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size)
+{
+        unsigned long coalesced = 0;
+        long adjacent, i;
+        if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
+                rgn->region[0].base = base;
+                rgn->region[0].size = size;
+                return 0;
+        }
+        /* First try and coalesce this MEMBLOCK with another. */
+        for (i = 0; i < rgn->cnt; i++) {
+                u64 rgnbase = rgn->region[i].base;
+                u64 rgnsize = rgn->region[i].size;
+                if ((rgnbase == base) && (rgnsize == size))
+                        /* Already have this region, so we're done */
+                        return 0;
+                adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
+                if (adjacent > 0) {
+                        rgn->region[i].base -= size;
+                        rgn->region[i].size += size;
+                        coalesced++;
+                        break;
+                } else if (adjacent < 0) {
+                        rgn->region[i].size += size;
+                        coalesced++;
+                        break;
+                }
+        }
+        if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) {
+                memblock_coalesce_regions(rgn, i, i+1);
+                coalesced++;
+        }
+        if (coalesced)
+                return coalesced;
+        if (rgn->cnt >= MAX_MEMBLOCK_REGIONS)
+                return -1;
+        /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
+        for (i = rgn->cnt - 1; i >= 0; i--) {
+                if (base < rgn->region[i].base) {
+                        rgn->region[i+1].base = rgn->region[i].base;
+                        rgn->region[i+1].size = rgn->region[i].size;
+                } else {
+                        rgn->region[i+1].base = base;
+                        rgn->region[i+1].size = size;
+                        break;
+                }
+        }
+        if (base < rgn->region[0].base) {
+                rgn->region[0].base = base;
+                rgn->region[0].size = size;
+        }
+        rgn->cnt++;
+        return 0;
+}
+long memblock_add(u64 base, u64 size)
+{
+        struct memblock_region *_rgn = &memblock.memory;
+        /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */
+        if (base == 0)
+                memblock.rmo_size = size;
+        return memblock_add_region(_rgn, base, size);
+}
+static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size)
+{
+        u64 rgnbegin, rgnend;
+        u64 end = base + size;
+        int i;
+        rgnbegin = rgnend = 0; /* supress gcc warnings */
+        /* Find the region where (base, size) belongs to */
+        for (i=0; i < rgn->cnt; i++) {
+                rgnbegin = rgn->region[i].base;
+                rgnend = rgnbegin + rgn->region[i].size;
+                if ((rgnbegin <= base) && (end <= rgnend))
+                        break;
+        }
+        /* Didn't find the region */
+        if (i == rgn->cnt)
+                return -1;
+        /* Check to see if we are removing entire region */
+        if ((rgnbegin == base) && (rgnend == end)) {
+                memblock_remove_region(rgn, i);
+                return 0;
+        }
+        /* Check to see if region is matching at the front */
+        if (rgnbegin == base) {
+                rgn->region[i].base = end;
+                rgn->region[i].size -= size;
+                return 0;
+        }
+        /* Check to see if the region is matching at the end */
+        if (rgnend == end) {
+                rgn->region[i].size -= size;
+                return 0;
+        }
+        /*
+         * We need to split the entry -  adjust the current one to the
+         * beginging of the hole and add the region after hole.
+         */
+        rgn->region[i].size = base - rgn->region[i].base;
+        return memblock_add_region(rgn, end, rgnend - end);
+}
+long memblock_remove(u64 base, u64 size)
+{
+        return __memblock_remove(&memblock.memory, base, size);
+}
+long __init memblock_free(u64 base, u64 size)
+{
+        return __memblock_remove(&memblock.reserved, base, size);
+}
+long __init memblock_reserve(u64 base, u64 size)
+{
+        struct memblock_region *_rgn = &memblock.reserved;
+        BUG_ON(0 == size);
+        return memblock_add_region(_rgn, base, size);
+}
+long memblock_overlaps_region(struct memblock_region *rgn, u64 base, u64 size)
+{
+        unsigned long i;
+        for (i = 0; i < rgn->cnt; i++) {
+                u64 rgnbase = rgn->region[i].base;
+                u64 rgnsize = rgn->region[i].size;
+                if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
+                        break;
+        }
+        return (i < rgn->cnt) ? i : -1;
+}
+static u64 memblock_align_down(u64 addr, u64 size)
+{
+        return addr & ~(size - 1);
+}
+static u64 memblock_align_up(u64 addr, u64 size)
+{
+        return (addr + (size - 1)) & ~(size - 1);
+}
+static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end,
+                                           u64 size, u64 align)
+{
+        u64 base, res_base;
+        long j;
+        base = memblock_align_down((end - size), align);
+        while (start <= base) {
+                j = memblock_overlaps_region(&memblock.reserved, base, size);
+                if (j < 0) {
+                        /* this area isn't reserved, take it */
+                        if (memblock_add_region(&memblock.reserved, base, size) < 0)
+                                base = ~(u64)0;
+                        return base;
+                }
+                res_base = memblock.reserved.region[j].base;
+                if (res_base < size)
+                        break;
+                base = memblock_align_down(res_base - size, align);
+        }
+        return ~(u64)0;
+}
+static u64 __init memblock_alloc_nid_region(struct memblock_property *mp,
+                                       u64 (*nid_range)(u64, u64, int *),
+                                       u64 size, u64 align, int nid)
+{
+        u64 start, end;
+        start = mp->base;
+        end = start + mp->size;
+        start = memblock_align_up(start, align);
+        while (start < end) {
+                u64 this_end;
+                int this_nid;
+                this_end = nid_range(start, end, &this_nid);
+                if (this_nid == nid) {
+                        u64 ret = memblock_alloc_nid_unreserved(start, this_end,
+                                                           size, align);
+                        if (ret != ~(u64)0)
+                                return ret;
+                }
+                start = this_end;
+        }
+        return ~(u64)0;
+}
+u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
+                         u64 (*nid_range)(u64 start, u64 end, int *nid))
+{
+        struct memblock_region *mem = &memblock.memory;
+        int i;
+        BUG_ON(0 == size);
+        size = memblock_align_up(size, align);
+        for (i = 0; i < mem->cnt; i++) {
+                u64 ret = memblock_alloc_nid_region(&mem->region[i],
+                                               nid_range,
+                                               size, align, nid);
+                if (ret != ~(u64)0)
+                        return ret;
+        }
+        return memblock_alloc(size, align);
+}
+u64 __init memblock_alloc(u64 size, u64 align)
+{
+        return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
+}
+u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr)
+{
+        u64 alloc;
+        alloc = __memblock_alloc_base(size, align, max_addr);
+        if (alloc == 0)
+                panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
+                      (unsigned long long) size, (unsigned long long) max_addr);
+        return alloc;
+}
+u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
+{
+        long i, j;
+        u64 base = 0;
+        u64 res_base;
+        BUG_ON(0 == size);
+        size = memblock_align_up(size, align);
+        /* On some platforms, make sure we allocate lowmem */
+        /* Note that MEMBLOCK_REAL_LIMIT may be MEMBLOCK_ALLOC_ANYWHERE */
+        if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
+                max_addr = MEMBLOCK_REAL_LIMIT;
+        for (i = memblock.memory.cnt - 1; i >= 0; i--) {
+                u64 memblockbase = memblock.memory.region[i].base;
+                u64 memblocksize = memblock.memory.region[i].size;
+                if (memblocksize < size)
+                        continue;
+                if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
+                        base = memblock_align_down(memblockbase + memblocksize - size, align);
+                else if (memblockbase < max_addr) {
+                        base = min(memblockbase + memblocksize, max_addr);
+                        base = memblock_align_down(base - size, align);
+                } else
+                        continue;
+                while (base && memblockbase <= base) {
+                        j = memblock_overlaps_region(&memblock.reserved, base, size);
+                        if (j < 0) {
+                                /* this area isn't reserved, take it */
+                                if (memblock_add_region(&memblock.reserved, base, size) < 0)
+                                        return 0;
+                                return base;
+                        }
+                        res_base = memblock.reserved.region[j].base;
+                        if (res_base < size)
+                                break;
+                        base = memblock_align_down(res_base - size, align);
+                }
+        }
+        return 0;
+}
+/* You must call memblock_analyze() before this. */
+u64 __init memblock_phys_mem_size(void)
+{
+        return memblock.memory.size;
+}
+u64 memblock_end_of_DRAM(void)
+{
+        int idx = memblock.memory.cnt - 1;
+        return (memblock.memory.region[idx].base + memblock.memory.region[idx].size);
+}
+/* You must call memblock_analyze() after this. */
+void __init memblock_enforce_memory_limit(u64 memory_limit)
+{
+        unsigned long i;
+        u64 limit;
+        struct memblock_property *p;
+        if (!memory_limit)
+                return;
+        /* Truncate the memblock regions to satisfy the memory limit. */
+        limit = memory_limit;
+        for (i = 0; i < memblock.memory.cnt; i++) {
+                if (limit > memblock.memory.region[i].size) {
+                        limit -= memblock.memory.region[i].size;
+                        continue;
+                }
+                memblock.memory.region[i].size = limit;
+                memblock.memory.cnt = i + 1;
+                break;
+        }
+        if (memblock.memory.region[0].size < memblock.rmo_size)
+                memblock.rmo_size = memblock.memory.region[0].size;
+        memory_limit = memblock_end_of_DRAM();
+        /* And truncate any reserves above the limit also. */
+        for (i = 0; i < memblock.reserved.cnt; i++) {
+                p = &memblock.reserved.region[i];
+                if (p->base > memory_limit)
+                        p->size = 0;
+                else if ((p->base + p->size) > memory_limit)
+                        p->size = memory_limit - p->base;
+                if (p->size == 0) {
+                        memblock_remove_region(&memblock.reserved, i);
+                        i--;
+                }
+        }
+}
+int __init memblock_is_reserved(u64 addr)
+{
+        int i;
+        for (i = 0; i < memblock.reserved.cnt; i++) {
+                u64 upper = memblock.reserved.region[i].base +
+                        memblock.reserved.region[i].size - 1;
+                if ((addr >= memblock.reserved.region[i].base) && (addr <= upper))
+                        return 1;
+        }
+        return 0;
+}
+int memblock_is_region_reserved(u64 base, u64 size)
+{
+        return memblock_overlaps_region(&memblock.reserved, base, size);
+}
+/*
+ * Given a <base, len>, find which memory regions belong to this range.
+ * Adjust the request and return a contiguous chunk.
+ */
+int memblock_find(struct memblock_property *res)
+{
+        int i;
+        u64 rstart, rend;
+        rstart = res->base;
+        rend = rstart + res->size - 1;
+        for (i = 0; i < memblock.memory.cnt; i++) {
+                u64 start = memblock.memory.region[i].base;
+                u64 end = start + memblock.memory.region[i].size - 1;
+                if (start > rend)
+                        return -1;
+                if ((end >= rstart) && (start < rend)) {
+                        /* adjust the request */
+                        if (rstart < start)
+                                rstart = start;
+                        if (rend > end)
+                                rend = end;
+                        res->base = rstart;
+                        res->size = rend - rstart + 1;
+                        return 0;
+                }
+        }
+        return -1;
+}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c6ece0a57595..20a8193a7af8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1370,7 +1370,7 @@ static void memcg_wakeup_oom(struct mem_cgroup *mem)
 static void memcg_oom_recover(struct mem_cgroup *mem)
 {
-        if (mem->oom_kill_disable && atomic_read(&mem->oom_lock))
+        if (atomic_read(&mem->oom_lock))
                memcg_wakeup_oom(mem);
 }
@@ -3781,6 +3781,8 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
                return -EINVAL;
        }
        mem->oom_kill_disable = val;
+        if (!val)
+                memcg_oom_recover(mem);
        cgroup_unlock();
        return 0;
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 620b0b461593..6b44e52cacaa 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -45,6 +45,7 @@
 #include <linux/page-isolation.h>
 #include <linux/suspend.h>
 #include <linux/slab.h>
+#include <linux/swapops.h>
 #include "internal.h"
 int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1296,3 +1297,35 @@ done:
        /* keep elevated page count for bad page */
        return ret;
 }
+/*
+ * The caller must hold current->mm->mmap_sem in read mode.
+ */
+int is_hwpoison_address(unsigned long addr)
+{
+        pgd_t *pgdp;
+        pud_t pud, *pudp;
+        pmd_t pmd, *pmdp;
+        pte_t pte, *ptep;
+        swp_entry_t entry;
+        pgdp = pgd_offset(current->mm, addr);
+        if (!pgd_present(*pgdp))
+                return 0;
+        pudp = pud_offset(pgdp, addr);
+        pud = *pudp;
+        if (!pud_present(pud) || pud_large(pud))
+                return 0;
+        pmdp = pmd_offset(pudp, addr);
+        pmd = *pmdp;
+        if (!pmd_present(pmd) || pmd_large(pmd))
+                return 0;
+        ptep = pte_offset_map(pmdp, addr);
+        pte = *ptep;
+        pte_unmap(ptep);
+        if (!is_swap_pte(pte))
+                return 0;
+        entry = pte_to_swp_entry(pte);
+        return is_hwpoison_entry(entry);
+}
+EXPORT_SYMBOL_GPL(is_hwpoison_address);
diff --git a/mm/memory.c b/mm/memory.c
index 119b7ccdf39b..bde42c6d3633 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1394,10 +1394,20 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                return i ? : -EFAULT;
                        }
                        if (pages) {
-                                struct page *page = vm_normal_page(gate_vma, start, *pte);
+                                struct page *page;
+                                page = vm_normal_page(gate_vma, start, *pte);
+                                if (!page) {
+                                        if (!(gup_flags & FOLL_DUMP) &&
+                                             is_zero_pfn(pte_pfn(*pte)))
+                                                page = pte_page(*pte);
+                                        else {
+                                                pte_unmap(pte);
+                                                return i ? : -EFAULT;
+                                        }
+                                }
                                pages[i] = page;
-                                if (page)
+                                get_page(page);
-                                        get_page(page);
                        }
                        pte_unmap(pte);
                        if (vmas)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5d6fb339de03..5bc0a96beb51 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2094,7 +2094,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
                NODEMASK_SCRATCH(scratch);
                if (!scratch)
-                        return;
+                        goto put_mpol;
                /* contextualize the tmpfs mount point mempolicy */
                new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
                if (IS_ERR(new))
@@ -2103,19 +2103,20 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
                task_lock(current);
                ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch);
                task_unlock(current);
-                mpol_put(mpol); /* drop our ref on sb mpol */
                if (ret)
-                        goto put_free;
+                        goto put_new;
                /* Create pseudo-vma that contains just the policy */
                memset(&pvma, 0, sizeof(struct vm_area_struct));
                pvma.vm_end = TASK_SIZE;        /* policy covers entire file */
                mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
-put_free:
+put_new:
                mpol_put(new);                  /* drop initial ref */
 free_scratch:
                NODEMASK_SCRATCH_FREE(scratch);
+put_mpol:
+                mpol_put(mpol); /* drop our incoming ref on sb mpol */
        }
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 456ec6f27889..e38e910cb756 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1734,8 +1734,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                grow = (address - vma->vm_end) >> PAGE_SHIFT;
                error = acct_stack_growth(vma, size, grow);
-                if (!error)
+                if (!error) {
                        vma->vm_end = address;
+                        perf_event_mmap(vma);
+                }
        }
        anon_vma_unlock(vma);
        return error;
@@ -1781,6 +1783,7 @@ static int expand_downwards(struct vm_area_struct *vma,
                if (!error) {
                        vma->vm_start = address;
                        vma->vm_pgoff -= grow;
+                        perf_event_mmap(vma);
                }
        }
        anon_vma_unlock(vma);
@@ -2208,6 +2211,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
        vma->vm_page_prot = vm_get_page_prot(flags);
        vma_link(mm, vma, prev, rb_link, rb_parent);
 out:
+        perf_event_mmap(vma);
        mm->total_vm += len >> PAGE_SHIFT;
        if (flags & VM_LOCKED) {
                if (!mlock_vma_pages_range(vma, addr, addr + len))
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5fa63bdf52e4..37498ef61548 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping,
        for (;;) {
                struct writeback_control wbc = {
-                        .bdi            = bdi,
                        .sync_mode      = WB_SYNC_NONE,
                        .older_than_this = NULL,
                        .nr_to_write    = write_chunk,
@@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * up.
                 */
                if (bdi_nr_reclaimable > bdi_thresh) {
-                        writeback_inodes_wbc(&wbc);
+                        writeback_inodes_wb(&bdi->wb, &wbc);
                        pages_written += write_chunk - wbc.nr_to_write;
                        get_dirty_limits(&background_thresh, &dirty_thresh,
                                       &bdi_thresh, bdi);
@@ -597,7 +596,7 @@ static void balance_dirty_pages(struct address_space *mapping,
            (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
                               + global_page_state(NR_UNSTABLE_NFS))
                                          > background_thresh)))
-                bdi_start_writeback(bdi, NULL, 0);
+                bdi_start_background_writeback(bdi);
 }
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -705,9 +704,8 @@ void laptop_mode_timer_fn(unsigned long data)
         * We want to write everything out, not just down to the dirty
         * threshold
         */
        if (bdi_has_dirty_io(&q->backing_dev_info))
-                bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages);
+                bdi_start_writeback(&q->backing_dev_info, nr_pages);
 }
 /*
@@ -835,7 +833,6 @@ int write_cache_pages(struct address_space *mapping,
        pgoff_t done_index;
        int cycled;
        int range_whole = 0;
-        long nr_to_write = wbc->nr_to_write;
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
@@ -852,7 +849,22 @@ int write_cache_pages(struct address_space *mapping,
                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                        range_whole = 1;
                cycled = 1; /* ignore range_cyclic tests */
+                /*
+                 * If this is a data integrity sync, cap the writeback to the
+                 * current end of file. Any extension to the file that occurs
+                 * after this is a new write and we don't need to write those
+                 * pages out to fulfil our data integrity requirements. If we
+                 * try to write them out, we can get stuck in this scan until
+                 * the concurrent writer stops adding dirty pages and extending
+                 * EOF.
+                 */
+                if (wbc->sync_mode == WB_SYNC_ALL &&
+                    wbc->range_end == LLONG_MAX) {
+                        end = i_size_read(mapping->host) >> PAGE_CACHE_SHIFT;
+                }
        }
 retry:
        done_index = index;
        while (!done && (index <= end)) {
@@ -935,11 +947,10 @@ continue_unlock:
                                        done = 1;
                                        break;
                                }
-                        }
+                        }
-                        if (nr_to_write > 0) {
+                        if (wbc->nr_to_write > 0) {
-                                nr_to_write--;
+                                if (--wbc->nr_to_write == 0 &&
-                                if (nr_to_write == 0 &&
                                    wbc->sync_mode == WB_SYNC_NONE) {
                                        /*
                                         * We stop writing back only if we are
@@ -970,11 +981,8 @@ continue_unlock:
                end = writeback_index - 1;
                goto retry;
        }
-        if (!wbc->no_nrwrite_index_update) {
+        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-                if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
+                mapping->writeback_index = done_index;
-                        mapping->writeback_index = done_index;
-                wbc->nr_to_write = nr_to_write;
-        }
        return ret;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 431214b941ac..9bd339eb04c6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
        int i;
        void *ptr;
+        if (limit > get_max_mapped())
+                limit = get_max_mapped();
        /* need to go over early_node_map to find out good range for node */
        for_each_active_range_index_in_nid(i, nid) {
                u64 addr;
@@ -3659,6 +3662,11 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
                ptr = phys_to_virt(addr);
                memset(ptr, 0, size);
                reserve_early_without_check(addr, addr + size, "BOOTMEM");
+                /*
+                 * The min_count is set to 0 so that bootmem allocated blocks
+                 * are never reported as leaks.
+                 */
+                kmemleak_alloc(ptr, size, 0, 0);
                return ptr;
        }
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6c0081441a32..5bffada7cde1 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -9,6 +9,7 @@
 #include <linux/vmalloc.h>
 #include <linux/cgroup.h>
 #include <linux/swapops.h>
+#include <linux/kmemleak.h>
 static void __meminit
 __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -126,6 +127,12 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
                        if (!base)
                                base = vmalloc(table_size);
                }
+                /*
+                 * The value stored in section->page_cgroup is (base - pfn)
+                 * and it does not point to the memory block allocated above,
+                 * causing kmemleak false positives.
+                 */
+                kmemleak_not_leak(base);
        } else {
                /*
                 * We don't have to allocate page_cgroup again, but
diff --git a/mm/percpu.c b/mm/percpu.c
index 39f7dfd59585..e61dc2cc5873 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -229,8 +229,8 @@ static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
        return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
 }
-static unsigned long __maybe_unused pcpu_chunk_addr(struct pcpu_chunk *chunk,
+static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
-                                                unsigned int cpu, int page_idx)
+                                     unsigned int cpu, int page_idx)
 {
        return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] +
                (page_idx << PAGE_SHIFT);
@@ -282,6 +282,9 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
 */
 static void *pcpu_mem_alloc(size_t size)
 {
+        if (WARN_ON_ONCE(!slab_is_available()))
+                return NULL;
        if (size <= PAGE_SIZE)
                return kzalloc(size, GFP_KERNEL);
        else {
@@ -392,13 +395,6 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
        old_size = chunk->map_alloc * sizeof(chunk->map[0]);
        memcpy(new, chunk->map, old_size);
-        /*
-         * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
-         * one of the first chunks and still using static map.
-         */
-        if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
-                old = chunk->map;
        chunk->map_alloc = new_alloc;
        chunk->map = new;
        new = NULL;
@@ -604,7 +600,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 {
        struct pcpu_chunk *chunk;
-        chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL);
+        chunk = pcpu_mem_alloc(pcpu_chunk_struct_size);
        if (!chunk)
                return NULL;
@@ -978,7 +974,32 @@ bool is_kernel_percpu_address(unsigned long addr)
 */
 phys_addr_t per_cpu_ptr_to_phys(void *addr)
 {
-        if (pcpu_addr_in_first_chunk(addr)) {
+        void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
+        bool in_first_chunk = false;
+        unsigned long first_start, first_end;
+        unsigned int cpu;
+        /*
+         * The following test on first_start/end isn't strictly
+         * necessary but will speed up lookups of addresses which
+         * aren't in the first chunk.
+         */
+        first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0);
+        first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu,
+                                    pcpu_unit_pages);
+        if ((unsigned long)addr >= first_start &&
+            (unsigned long)addr < first_end) {
+                for_each_possible_cpu(cpu) {
+                        void *start = per_cpu_ptr(base, cpu);
+                        if (addr >= start && addr < start + pcpu_unit_size) {
+                                in_first_chunk = true;
+                                break;
+                        }
+                }
+        }
+        if (in_first_chunk) {
                if ((unsigned long)addr < VMALLOC_START ||
                    (unsigned long)addr >= VMALLOC_END)
                        return __pa(addr);
@@ -988,20 +1009,6 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
                return page_to_phys(pcpu_addr_to_page(addr));
 }
-static inline size_t pcpu_calc_fc_sizes(size_t static_size,
-                                        size_t reserved_size,
-                                        ssize_t *dyn_sizep)
-{
-        size_t size_sum;
-        size_sum = PFN_ALIGN(static_size + reserved_size +
-                             (*dyn_sizep >= 0 ? *dyn_sizep : 0));
-        if (*dyn_sizep != 0)
-                *dyn_sizep = size_sum - static_size - reserved_size;
-        return size_sum;
-}
 /**
 * pcpu_alloc_alloc_info - allocate percpu allocation info
 * @nr_groups: the number of groups
@@ -1060,7 +1067,7 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 /**
 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
 * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: minimum free size for dynamic allocation in bytes
 * @atom_size: allocation atom size
 * @cpu_distance_fn: callback to determine distance between cpus, optional
 *
@@ -1078,15 +1085,15 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 * On success, pointer to the new allocation_info is returned.  On
 * failure, ERR_PTR value is returned.
 */
-struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
-                                size_t reserved_size, ssize_t dyn_size,
+                                size_t reserved_size, size_t dyn_size,
                                size_t atom_size,
                                pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
 {
        static int group_map[NR_CPUS] __initdata;
        static int group_cnt[NR_CPUS] __initdata;
        const size_t static_size = __per_cpu_end - __per_cpu_start;
-        int group_cnt_max = 0, nr_groups = 1, nr_units = 0;
+        int nr_groups = 1, nr_units = 0;
        size_t size_sum, min_unit_size, alloc_size;
        int upa, max_upa, uninitialized_var(best_upa);  /* units_per_alloc */
        int last_allocs, group, unit;
@@ -1096,7 +1103,12 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
        /* this function may be called multiple times */
        memset(group_map, 0, sizeof(group_map));
-        memset(group_cnt, 0, sizeof(group_map));
+        memset(group_cnt, 0, sizeof(group_cnt));
+        /* calculate size_sum and ensure dyn_size is enough for early alloc */
+        size_sum = PFN_ALIGN(static_size + reserved_size +
+                            max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
+        dyn_size = size_sum - static_size - reserved_size;
        /*
         * Determine min_unit_size, alloc_size and max_upa such that
@@ -1104,7 +1116,6 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
         * which can accomodate 4k aligned segments which are equal to
         * or larger than min_unit_size.
         */
-        size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
        min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
        alloc_size = roundup(min_unit_size, atom_size);
@@ -1130,7 +1141,6 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
                }
                group_map[cpu] = group;
                group_cnt[group]++;
-                group_cnt_max = max(group_cnt_max, group_cnt[group]);
        }
        /*
@@ -1326,7 +1336,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
                                  void *base_addr)
 {
        static char cpus_buf[4096] __initdata;
-        static int smap[2], dmap[2];
+        static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
+        static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
        size_t dyn_size = ai->dyn_size;
        size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
        struct pcpu_chunk *schunk, *dchunk = NULL;
@@ -1349,14 +1360,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 } while (0)
        /* sanity checks */
-        BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
-                     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
        PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
        PCPU_SETUP_BUG_ON(!ai->static_size);
        PCPU_SETUP_BUG_ON(!base_addr);
        PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
        PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
        PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
+        PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
        PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
        /* process group information and build config tables accordingly */
@@ -1508,7 +1518,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
 /**
 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
 * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: minimum free size for dynamic allocation in bytes
 * @atom_size: allocation atom size
 * @cpu_distance_fn: callback to determine distance between cpus, optional
 * @alloc_fn: function to allocate percpu page
@@ -1529,10 +1539,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
 * vmalloc space is not orders of magnitude larger than distances
 * between node memory addresses (ie. 32bit NUMA machines).
 *
- * When @dyn_size is positive, dynamic area might be larger than
+ * @dyn_size specifies the minimum dynamic area size.
- * specified to fill page alignment.  When @dyn_size is auto,
- * @dyn_size is just big enough to fill page alignment after static
- * and reserved areas.
 *
 * If the needed size is smaller than the minimum or specified unit
 * size, the leftover is returned using @free_fn.
@@ -1540,7 +1547,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
 * RETURNS:
 * 0 on success, -errno on failure.
 */
-int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                                  size_t atom_size,
                                  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
                                  pcpu_fc_alloc_fn_t alloc_fn,
@@ -1671,7 +1678,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
        snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
-        ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL);
+        ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
        if (IS_ERR(ai))
                return PTR_ERR(ai);
        BUG_ON(ai->nr_groups != 1);
@@ -1797,3 +1804,33 @@ void __init setup_per_cpu_areas(void)
                __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 }
 #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+/*
+ * First and reserved chunks are initialized with temporary allocation
+ * map in initdata so that they can be used before slab is online.
+ * This function is called after slab is brought up and replaces those
+ * with properly allocated maps.
+ */
+void __init percpu_init_late(void)
+{
+        struct pcpu_chunk *target_chunks[] =
+                { pcpu_first_chunk, pcpu_reserved_chunk, NULL };
+        struct pcpu_chunk *chunk;
+        unsigned long flags;
+        int i;
+        for (i = 0; (chunk = target_chunks[i]); i++) {
+                int *map;
+                const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]);
+                BUILD_BUG_ON(size > PAGE_SIZE);
+                map = pcpu_mem_alloc(size);
+                BUG_ON(!map);
+                spin_lock_irqsave(&pcpu_lock, flags);
+                memcpy(map, chunk->map, size);
+                chunk->map = map;
+                spin_unlock_irqrestore(&pcpu_lock, flags);
+        }
+}
diff --git a/mm/slab.c b/mm/slab.c
index e49f8f46f46d..736e497733d6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,6 @@
 #include        <linux/cpu.h>
 #include        <linux/sysctl.h>
 #include        <linux/module.h>
-#include        <linux/kmemtrace.h>
 #include        <linux/rcupdate.h>
 #include        <linux/string.h>
 #include        <linux/uaccess.h>
@@ -861,7 +860,7 @@ static void __cpuinit start_cpu_timer(int cpu)
         */
        if (keventd_up() && reap_work->work.func == NULL) {
                init_reap_node(cpu);
-                INIT_DELAYED_WORK(reap_work, cache_reap);
+                INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
                schedule_delayed_work_on(cpu, reap_work,
                                        __round_jiffies_relative(HZ, cpu));
        }
diff --git a/mm/slob.c b/mm/slob.c
index 23631e2bb57a..d582171c8101 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,8 +66,10 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
-#include <linux/kmemtrace.h>
 #include <linux/kmemleak.h>
+#include <trace/events/kmem.h>
 #include <asm/atomic.h>
 /*
@@ -394,6 +396,7 @@ static void slob_free(void *block, int size)
        slob_t *prev, *next, *b = (slob_t *)block;
        slobidx_t units;
        unsigned long flags;
+        struct list_head *slob_list;
        if (unlikely(ZERO_OR_NULL_PTR(block)))
                return;
@@ -422,7 +425,13 @@ static void slob_free(void *block, int size)
                set_slob(b, units,
                        (void *)((unsigned long)(b +
                                        SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
-                set_slob_page_free(sp, &free_slob_small);
+                if (size < SLOB_BREAK1)
+                        slob_list = &free_slob_small;
+                else if (size < SLOB_BREAK2)
+                        slob_list = &free_slob_medium;
+                else
+                        slob_list = &free_slob_large;
+                set_slob_page_free(sp, slob_list);
                goto out;
        }
@@ -639,7 +648,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
        if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
                struct slob_rcu *slob_rcu;
                slob_rcu = b + (c->size - sizeof(struct slob_rcu));
-                INIT_RCU_HEAD(&slob_rcu->head);
                slob_rcu->size = c->size;
                call_rcu(&slob_rcu->head, kmem_rcu_free);
        } else {
diff --git a/mm/slub.c b/mm/slub.c
index 578f68f3c51f..13fffe1f0f3d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/kmemtrace.h>
 #include <linux/kmemcheck.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
@@ -107,11 +106,17 @@
 *                      the fast path and disables lockless freelists.
 */
+#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
+                SLAB_TRACE | SLAB_DEBUG_FREE)
+static inline int kmem_cache_debug(struct kmem_cache *s)
+{
 #ifdef CONFIG_SLUB_DEBUG
-#define SLABDEBUG 1
+        return unlikely(s->flags & SLAB_DEBUG_FLAGS);
 #else
-#define SLABDEBUG 0
+        return 0;
 #endif
+}
 /*
 * Issues still to be resolved:
@@ -162,8 +167,8 @@
 #define MAX_OBJS_PER_PAGE       65535 /* since page.objects is u16 */
 /* Internal SLUB flags */
-#define __OBJECT_POISON         0x80000000 /* Poison object */
+#define __OBJECT_POISON         0x80000000UL /* Poison object */
-#define __SYSFS_ADD_DEFERRED    0x40000000 /* Not yet visible via sysfs */
+#define __SYSFS_ADD_DEFERRED    0x40000000UL /* Not yet visible via sysfs */
 static int kmem_size = sizeof(struct kmem_cache);
@@ -1073,7 +1078,7 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node,
        flags |= __GFP_NOTRACK;
-        if (node == -1)
+        if (node == NUMA_NO_NODE)
                return alloc_pages(flags, order);
        else
                return alloc_pages_exact_node(node, flags, order);
@@ -1157,9 +1162,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
        inc_slabs_node(s, page_to_nid(page), page->objects);
        page->slab = s;
        page->flags |= 1 << PG_slab;
-        if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
-                        SLAB_STORE_USER | SLAB_TRACE))
-                __SetPageSlubDebug(page);
        start = page_address(page);
@@ -1186,14 +1188,13 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
        int order = compound_order(page);
        int pages = 1 << order;
-        if (unlikely(SLABDEBUG && PageSlubDebug(page))) {
+        if (kmem_cache_debug(s)) {
                void *p;
                slab_pad_check(s, page);
                for_each_object(p, s, page_address(page),
                                                page->objects)
                        check_object(s, page, p, 0);
-                __ClearPageSlubDebug(page);
        }
        kmemcheck_free_shadow(page, compound_order(page));
@@ -1387,10 +1388,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
 static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
 {
        struct page *page;
-        int searchnode = (node == -1) ? numa_node_id() : node;
+        int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
        page = get_partial_node(get_node(s, searchnode));
-        if (page || (flags & __GFP_THISNODE))
+        if (page || node != -1)
                return page;
        return get_any_partial(s, flags);
@@ -1415,8 +1416,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
                        stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
                } else {
                        stat(s, DEACTIVATE_FULL);
-                        if (SLABDEBUG && PageSlubDebug(page) &&
+                        if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER))
-                                                (s->flags & SLAB_STORE_USER))
                                add_full(n, page);
                }
                slab_unlock(page);
@@ -1515,7 +1515,7 @@ static void flush_all(struct kmem_cache *s)
 static inline int node_match(struct kmem_cache_cpu *c, int node)
 {
 #ifdef CONFIG_NUMA
-        if (node != -1 && c->node != node)
+        if (node != NUMA_NO_NODE && c->node != node)
                return 0;
 #endif
        return 1;
@@ -1624,7 +1624,7 @@ load_freelist:
        object = c->page->freelist;
        if (unlikely(!object))
                goto another_slab;
-        if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
+        if (kmem_cache_debug(s))
                goto debug;
        c->freelist = get_freepointer(s, object);
@@ -1727,7 +1727,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
-        void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
+        void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
        trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
@@ -1738,7 +1738,7 @@ EXPORT_SYMBOL(kmem_cache_alloc);
 #ifdef CONFIG_TRACING
 void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
 {
-        return slab_alloc(s, gfpflags, -1, _RET_IP_);
+        return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_alloc_notrace);
 #endif
@@ -1783,7 +1783,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
        stat(s, FREE_SLOWPATH);
        slab_lock(page);
-        if (unlikely(SLABDEBUG && PageSlubDebug(page)))
+        if (kmem_cache_debug(s))
                goto debug;
 checks_ok:
@@ -2490,7 +2490,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
        s->refcount--;
        if (!s->refcount) {
                list_del(&s->list);
-                up_write(&slub_lock);
                if (kmem_cache_close(s)) {
                        printk(KERN_ERR "SLUB %s: %s called for cache that "
                                "still has objects.\n", s->name, __func__);
@@ -2499,8 +2498,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
                if (s->flags & SLAB_DESTROY_BY_RCU)
                        rcu_barrier();
                sysfs_slab_remove(s);
-        } else
+        }
-                up_write(&slub_lock);
+        up_write(&slub_lock);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
@@ -2728,7 +2727,7 @@ void *__kmalloc(size_t size, gfp_t flags)
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
-        ret = slab_alloc(s, flags, -1, _RET_IP_);
+        ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
        trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
@@ -3118,9 +3117,12 @@ void __init kmem_cache_init(void)
        slab_state = UP;
        /* Provide the correct kmalloc names now that the caches are up */
-        for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
+        for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
-                kmalloc_caches[i]. name =
+                char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
-                        kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
+                BUG_ON(!s);
+                kmalloc_caches[i].name = s;
+        }
 #ifdef CONFIG_SMP
        register_cpu_notifier(&slab_notifier);
@@ -3223,14 +3225,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
                 */
                s->objsize = max(s->objsize, (int)size);
                s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
-                up_write(&slub_lock);
                if (sysfs_slab_alias(s, name)) {
-                        down_write(&slub_lock);
                        s->refcount--;
-                        up_write(&slub_lock);
                        goto err;
                }
+                up_write(&slub_lock);
                return s;
        }
@@ -3239,14 +3239,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
                if (kmem_cache_open(s, GFP_KERNEL, name,
                                size, align, flags, ctor)) {
                        list_add(&s->list, &slab_caches);
-                        up_write(&slub_lock);
                        if (sysfs_slab_add(s)) {
-                                down_write(&slub_lock);
                                list_del(&s->list);
-                                up_write(&slub_lock);
                                kfree(s);
                                goto err;
                        }
+                        up_write(&slub_lock);
                        return s;
                }
                kfree(s);
@@ -3312,7 +3310,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
-        ret = slab_alloc(s, gfpflags, -1, caller);
+        ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
        /* Honor the call site pointer we recieved. */
        trace_kmalloc(caller, ret, size, s->size, gfpflags);
@@ -3395,16 +3393,6 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page,
        } else
                printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
                        s->name, page);
-        if (s->flags & DEBUG_DEFAULT_FLAGS) {
-                if (!PageSlubDebug(page))
-                        printk(KERN_ERR "SLUB %s: SlubDebug not set "
-                                "on slab 0x%p\n", s->name, page);
-        } else {
-                if (PageSlubDebug(page))
-                        printk(KERN_ERR "SLUB %s: SlubDebug set on "
-                                "slab 0x%p\n", s->name, page);
-        }
 }
 static int validate_slab_node(struct kmem_cache *s,
@@ -4504,6 +4492,13 @@ static int sysfs_slab_add(struct kmem_cache *s)
 static void sysfs_slab_remove(struct kmem_cache *s)
 {
+        if (slab_state < SYSFS)
+                /*
+                 * Sysfs has not been setup yet so no need to remove the
+                 * cache from sysfs.
+                 */
+                return;
        kobject_uevent(&s->kobj, KOBJ_REMOVE);
        kobject_del(&s->kobj);
        kobject_put(&s->kobj);
@@ -4549,8 +4544,11 @@ static int __init slab_sysfs_init(void)
        struct kmem_cache *s;
        int err;
+        down_write(&slub_lock);
        slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
        if (!slab_kset) {
+                up_write(&slub_lock);
                printk(KERN_ERR "Cannot register slab subsystem.\n");
                return -ENOSYS;
        }
@@ -4575,6 +4573,7 @@ static int __init slab_sysfs_init(void)
                kfree(al);
        }
+        up_write(&slub_lock);
        resiliency_test();
        return 0;
 }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ae007462b7f6..b7e314b1009f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2403,7 +2403,7 @@ static int s_show(struct seq_file *m, void *p)
                seq_printf(m, " pages=%d", v->nr_pages);
        if (v->phys_addr)
-                seq_printf(m, " phys=%lx", v->phys_addr);
+                seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
        if (v->flags & VM_IOREMAP)
                seq_printf(m, " ioremap");
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9c7e57cc63a3..b94fe1b3da43 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -213,8 +213,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
        list_for_each_entry(shrinker, &shrinker_list, list) {
                unsigned long long delta;
                unsigned long total_scan;
-                unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
+                unsigned long max_pass;
+                max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
                delta = (4 * scanned) / shrinker->seeks;
                delta *= max_pass;
                do_div(delta, lru_pages + 1);
@@ -242,8 +243,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
                        int shrink_ret;
                        int nr_before;
-                        nr_before = (*shrinker->shrink)(0, gfp_mask);
+                        nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
-                        shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
+                        shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
+                                                                gfp_mask);
                        if (shrink_ret == -1)
                                break;
                        if (shrink_ret < nr_before)
@@ -296,7 +298,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
 static void handle_write_error(struct address_space *mapping,
                                struct page *page, int error)
 {
-        lock_page(page);
+        lock_page_nosync(page);
        if (page_mapping(page) == mapping)
                mapping_set_error(mapping, error);
        unlock_page(page);