Merge tag 'v4.2-rc7' into drm-next

Linux 4.2-rc7 Backmerge master for i915 fixes
author: Dave Airlie <airlied@redhat.com> 2015-08-17 00:13:53 -0400
committer: Dave Airlie <airlied@redhat.com> 2015-08-17 00:13:53 -0400
commit: 4eebf60b7452fbd551fd7dece855ba7825a49cbc (patch)
tree: 490b4d194ba09c90e10201ab7fc084a0bda0ed27 /mm
parent: 8f9cb50789e76f3e224e8861adf650e55c747af4 (diff)
parent: 2c6625cd545bdd66acff14f3394865d43920a5c7 (diff)
12 files changed, 102 insertions, 66 deletions
diff --git a/mm/cma.h b/mm/cma.h
index 1132d733556d..17c75a4246c8 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -16,7 +16,7 @@ struct cma {
 extern struct cma cma_areas[MAX_CMA_AREAS];
 extern unsigned cma_area_count;
-static unsigned long cma_bitmap_maxno(struct cma *cma)
+static inline unsigned long cma_bitmap_maxno(struct cma *cma)
 {
        return cma->count >> cma->order_per_bit;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c107094f79ba..097c7a4bfbd9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page,
                /* after clearing PageTail the gup refcount can be released */
                smp_mb__after_atomic();
-                /*
+                page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
-                 * retain hwpoison flag of the poisoned tail page:
-                 *   fix for the unsuitable process killed on Guest Machine(KVM)
-                 *   by the memory-failure.
-                 */
-                page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
                page_tail->flags |= (page->flags &
                                     ((1L << PG_referenced) |
                                      (1L << PG_swapbacked) |
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 6c513a63ea84..7b28e9cdf1c7 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -2,7 +2,7 @@
 * This file contains shadow memory manipulation code.
 *
 * Copyright (c) 2014 Samsung Electronics Co., Ltd.
- * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
 *
 * Some of code borrowed from https://github.com/xairy/linux by
 *        Andrey Konovalov <adech.fo@gmail.com>
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 680ceedf810a..e07c94fbd0ac 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -2,7 +2,7 @@
 * This file contains error reporting code.
 *
 * Copyright (c) 2014 Samsung Electronics Co., Ltd.
- * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
 *
 * Some of code borrowed from https://github.com/xairy/linux by
 *        Andrey Konovalov <adech.fo@gmail.com>
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c53543d89282..1f4446a90cef 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -909,6 +909,18 @@ int get_hwpoison_page(struct page *page)
         * directly for tail pages.
         */
        if (PageTransHuge(head)) {
+                /*
+                 * Non anonymous thp exists only in allocation/free time. We
+                 * can't handle such a case correctly, so let's give it up.
+                 * This should be better than triggering BUG_ON when kernel
+                 * tries to touch the "partially handled" page.
+                 */
+                if (!PageAnon(head)) {
+                        pr_err("MCE: %#lx: non anonymous thp\n",
+                                page_to_pfn(page));
+                        return 0;
+                }
                if (get_page_unless_zero(head)) {
                        if (PageTail(page))
                                get_page(page);
@@ -1134,17 +1146,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        }
        if (!PageHuge(p) && PageTransHuge(hpage)) {
-                if (!PageAnon(hpage)) {
+                if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) {
-                        pr_err("MCE: %#lx: non anonymous thp\n", pfn);
+                        if (!PageAnon(hpage))
-                        if (TestClearPageHWPoison(p))
+                                pr_err("MCE: %#lx: non anonymous thp\n", pfn);
-                                atomic_long_sub(nr_pages, &num_poisoned_pages);
+                        else
-                        put_page(p);
+                                pr_err("MCE: %#lx: thp split failed\n", pfn);
-                        if (p != hpage)
-                                put_page(hpage);
-                        return -EBUSY;
-                }
-                if (unlikely(split_huge_page(hpage))) {
-                        pr_err("MCE: %#lx: thp split failed\n", pfn);
                        if (TestClearPageHWPoison(p))
                                atomic_long_sub(nr_pages, &num_poisoned_pages);
                        put_page(p);
@@ -1209,9 +1215,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        if (!PageHWPoison(p)) {
                printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
                atomic_long_sub(nr_pages, &num_poisoned_pages);
+                unlock_page(hpage);
                put_page(hpage);
-                res = 0;
+                return 0;
-                goto out;
        }
        if (hwpoison_filter(p)) {
                if (TestClearPageHWPoison(p))
@@ -1535,6 +1541,8 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
                 */
                ret = __get_any_page(page, pfn, 0);
                if (!PageLRU(page)) {
+                        /* Drop page reference which is from __get_any_page() */
+                        put_page(page);
                        pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
                                pfn, page->flags);
                        return -EIO;
@@ -1564,13 +1572,12 @@ static int soft_offline_huge_page(struct page *page, int flags)
        unlock_page(hpage);
        ret = isolate_huge_page(hpage, &pagelist);
-        if (ret) {
+        /*
-                /*
+         * get_any_page() and isolate_huge_page() takes a refcount each,
-                 * get_any_page() and isolate_huge_page() takes a refcount each,
+         * so need to drop one here.
-                 * so need to drop one here.
+         */
-                 */
+        put_page(hpage);
-                put_page(hpage);
+        if (!ret) {
-        } else {
                pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
                return -EBUSY;
        }
@@ -1656,6 +1663,8 @@ static int __soft_offline_page(struct page *page, int flags)
                inc_zone_page_state(page, NR_ISOLATED_ANON +
                                        page_is_file_cache(page));
                list_add(&page->lru, &pagelist);
+                if (!TestSetPageHWPoison(page))
+                        atomic_long_inc(&num_poisoned_pages);
                ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
                                        MIGRATE_SYNC, MR_MEMORY_FAILURE);
                if (ret) {
@@ -1670,9 +1679,8 @@ static int __soft_offline_page(struct page *page, int flags)
                                pfn, ret, page->flags);
                        if (ret > 0)
                                ret = -EIO;
-                } else {
+                        if (TestClearPageHWPoison(page))
-                        SetPageHWPoison(page);
+                                atomic_long_dec(&num_poisoned_pages);
-                        atomic_long_inc(&num_poisoned_pages);
                }
        } else {
                pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 26fbba7d888f..6da82bcb0a8b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -446,7 +446,7 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
        int nr_pages = PAGES_PER_SECTION;
        int nid = pgdat->node_id;
        int zone_type;
-        unsigned long flags;
+        unsigned long flags, pfn;
        int ret;
        zone_type = zone - pgdat->node_zones;
@@ -461,6 +461,14 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
        pgdat_resize_unlock(zone->zone_pgdat, &flags);
        memmap_init_zone(nr_pages, nid, zone_type,
                         phys_start_pfn, MEMMAP_HOTPLUG);
+        /* online_page_range is called later and expects pages reserved */
+        for (pfn = phys_start_pfn; pfn < phys_start_pfn + nr_pages; pfn++) {
+                if (!pfn_valid(pfn))
+                        continue;
+                SetPageReserved(pfn_to_page(pfn));
+        }
        return 0;
 }
@@ -1269,6 +1277,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
        /* create new memmap entry */
        firmware_map_add_hotplug(start, start + size, "System RAM");
+        memblock_add_node(start, size, nid);
        goto out;
@@ -2005,6 +2014,8 @@ void __ref remove_memory(int nid, u64 start, u64 size)
        /* remove memmap entry */
        firmware_map_remove(start, start + size, "System RAM");
+        memblock_free(start, size);
+        memblock_remove(start, size);
        arch_remove_memory(start, size);
diff --git a/mm/migrate.c b/mm/migrate.c
index ee401e4e5ef1..eb4267107d1f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -880,7 +880,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
        /* Establish migration ptes or remove ptes */
        if (page_mapped(page)) {
                try_to_unmap(page,
-                        TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+                        TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
+                        TTU_IGNORE_HWPOISON);
                page_was_mapped = 1;
        }
@@ -950,7 +951,10 @@ out:
                list_del(&page->lru);
                dec_zone_page_state(page, NR_ISOLATED_ANON +
                                page_is_file_cache(page));
-                if (reason != MR_MEMORY_FAILURE)
+                /* Soft-offlined page shouldn't go through lru cache list */
+                if (reason == MR_MEMORY_FAILURE)
+                        put_page(page);
+                else
                        putback_lru_page(page);
        }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 22cddd3e5de8..5cccc127ef81 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2063,10 +2063,10 @@ static struct notifier_block ratelimit_nb = {
 */
 void __init page_writeback_init(void)
 {
+        BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
        writeback_set_ratelimit();
        register_cpu_notifier(&ratelimit_nb);
-        BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
 }
 /**
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef19f22b2b7d..df959b7d6085 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -18,7 +18,6 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/interrupt.h>
-#include <linux/rwsem.h>
 #include <linux/pagemap.h>
 #include <linux/jiffies.h>
 #include <linux/bootmem.h>
@@ -981,21 +980,21 @@ static void __init __free_pages_boot_core(struct page *page,
 #if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
        defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
-/* Only safe to use early in boot when initialisation is single-threaded */
 static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
 int __meminit early_pfn_to_nid(unsigned long pfn)
 {
+        static DEFINE_SPINLOCK(early_pfn_lock);
        int nid;
-        /* The system will behave unpredictably otherwise */
+        spin_lock(&early_pfn_lock);
-        BUG_ON(system_state != SYSTEM_BOOTING);
        nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
-        if (nid >= 0)
+        if (nid < 0)
-                return nid;
+                nid = 0;
-        /* just returns 0 */
+        spin_unlock(&early_pfn_lock);
-        return 0;
+        return nid;
 }
 #endif
@@ -1060,7 +1059,15 @@ static void __init deferred_free_range(struct page *page,
                __free_pages_boot_core(page, pfn, 0);
 }
-static __initdata DECLARE_RWSEM(pgdat_init_rwsem);
+/* Completion tracking for deferred_init_memmap() threads */
+static atomic_t pgdat_init_n_undone __initdata;
+static __initdata DECLARE_COMPLETION(pgdat_init_all_done_comp);
+static inline void __init pgdat_init_report_one_done(void)
+{
+        if (atomic_dec_and_test(&pgdat_init_n_undone))
+                complete(&pgdat_init_all_done_comp);
+}
 /* Initialise remaining memory on a node */
 static int __init deferred_init_memmap(void *data)
@@ -1077,7 +1084,7 @@ static int __init deferred_init_memmap(void *data)
        const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
        if (first_init_pfn == ULONG_MAX) {
-                up_read(&pgdat_init_rwsem);
+                pgdat_init_report_one_done();
                return 0;
        }
@@ -1177,7 +1184,8 @@ free_range:
        pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
                                        jiffies_to_msecs(jiffies - start));
-        up_read(&pgdat_init_rwsem);
+        pgdat_init_report_one_done();
        return 0;
 }
@@ -1185,14 +1193,17 @@ void __init page_alloc_init_late(void)
 {
        int nid;
+        /* There will be num_node_state(N_MEMORY) threads */
+        atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY));
        for_each_node_state(nid, N_MEMORY) {
-                down_read(&pgdat_init_rwsem);
                kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
        }
        /* Block until all are initialised */
-        down_write(&pgdat_init_rwsem);
+        wait_for_completion(&pgdat_init_all_done_comp);
-        up_write(&pgdat_init_rwsem);
+        /* Reinit limits that are based on free pages after the kernel is up */
+        files_maxfiles_init();
 }
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
@@ -1285,6 +1296,10 @@ static inline int check_new_page(struct page *page)
                bad_reason = "non-NULL mapping";
        if (unlikely(atomic_read(&page->_count) != 0))
                bad_reason = "nonzero _count";
+        if (unlikely(page->flags & __PG_HWPOISON)) {
+                bad_reason = "HWPoisoned (hardware-corrupted)";
+                bad_flags = __PG_HWPOISON;
+        }
        if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
                bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
                bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
@@ -5045,6 +5060,10 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
 {
        unsigned long zone_start_pfn, zone_end_pfn;
+        /* When hotadd a new node, the node should be empty */
+        if (!node_start_pfn && !node_end_pfn)
+                return 0;
        /* Get the start and end of the zone */
        zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
        zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
@@ -5108,6 +5127,10 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
        unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
        unsigned long zone_start_pfn, zone_end_pfn;
+        /* When hotadd a new node, the node should be empty */
+        if (!node_start_pfn && !node_end_pfn)
+                return 0;
        zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
        zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
diff --git a/mm/shmem.c b/mm/shmem.c
index 4caf8ed24d65..dbe0c1e8349c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3363,8 +3363,8 @@ put_path:
 * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
 *      kernel internal.  There will be NO LSM permission checks against the
 *      underlying inode.  So users of this interface must do LSM checks at a
- *      higher layer.  The one user is the big_key implementation.  LSM checks
+ *      higher layer.  The users are the big_key and shm implementations.  LSM
- *      are provided at the key level rather than the inode level.
+ *      checks are provided at the key or shm level rather than the inode.
 * @name: name for dentry (to be seen in /proc/<pid>/maps
 * @size: size to be set for the file
 * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3e5f8f29c286..86831105a09f 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -37,8 +37,7 @@ struct kmem_cache *kmem_cache;
                SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
                SLAB_FAILSLAB)
-#define SLAB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
+#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
-                SLAB_CACHE_DMA | SLAB_NOTRACK)
 /*
 * Merge control. If this is set then no merging of slab caches will occur.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e61445dce04e..8286938c70de 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -973,22 +973,18 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 *    caller can stall after page list has been processed.
                 *
                 * 2) Global or new memcg reclaim encounters a page that is
-                 *    not marked for immediate reclaim or the caller does not
+                 *    not marked for immediate reclaim, or the caller does not
-                 *    have __GFP_IO. In this case mark the page for immediate
+                 *    have __GFP_FS (or __GFP_IO if it's simply going to swap,
+                 *    not to fs). In this case mark the page for immediate
                 *    reclaim and continue scanning.
                 *
-                 *    __GFP_IO is checked  because a loop driver thread might
+                 *    Require may_enter_fs because we would wait on fs, which
+                 *    may not have submitted IO yet. And the loop driver might
                 *    enter reclaim, and deadlock if it waits on a page for
                 *    which it is needed to do the write (loop masks off
                 *    __GFP_IO|__GFP_FS for this reason); but more thought
                 *    would probably show more reasons.
                 *
-                 *    Don't require __GFP_FS, since we're not going into the
-                 *    FS, just waiting on its writeback completion. Worryingly,
-                 *    ext4 gfs2 and xfs allocate pages with
-                 *    grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing
-                 *    may_enter_fs here is liable to OOM on them.
-                 *
                 * 3) Legacy memcg encounters a page that is not already marked
                 *    PageReclaim. memcg does not have any dirty pages
                 *    throttling so we could easily OOM just because too many
@@ -1005,7 +1001,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        /* Case 2 above */
                        } else if (sane_reclaim(sc) ||
-                            !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) {
+                            !PageReclaim(page) || !may_enter_fs) {
                                /*
                                 * This is slightly racy - end_page_writeback()
                                 * might have just cleared PageReclaim, then
author	Dave Airlie <airlied@redhat.com>	2015-08-17 00:13:53 -0400
committer	Dave Airlie <airlied@redhat.com>	2015-08-17 00:13:53 -0400
commit	4eebf60b7452fbd551fd7dece855ba7825a49cbc (patch)
tree	490b4d194ba09c90e10201ab7fc084a0bda0ed27 /mm
parent	8f9cb50789e76f3e224e8861adf650e55c747af4 (diff)
parent	2c6625cd545bdd66acff14f3394865d43920a5c7 (diff)