Merge branch 'akpm' (patches from Andrew)

Merge misc updates and fixes from Andrew Morton: - late-breaking ocfs2 updates - random bunch of fixes * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm: disable DEFERRED_STRUCT_PAGE_INIT on !NO_BOOTMEM mm/memcontrol.c: move comments for get_mctgt_type() to proper position mm/memcontrol.c: fix the margin computation in mem_cgroup_margin() mm/cma: silence warnings due to max() usage mm: thp: avoid false positive VM_BUG_ON_PAGE in page_move_anon_rmap() oom_reaper: close race with exiting task mm: use early_pfn_to_nid in register_page_bootmem_info_node mm: use early_pfn_to_nid in page_ext_init MAINTAINERS: Kdump maintainers update MAINTAINERS: add kexec_core.c and kexec_file.c mm: oom: do not reap task if there are live threads in threadgroup direct-io: fix direct write stale data exposure from concurrent buffered read ocfs2: bump up o2cb network protocol version ocfs2: o2hb: fix hb hung time ocfs2: o2hb: don't negotiate if last hb fail ocfs2: o2hb: add some user/debug log ocfs2: o2hb: add NEGOTIATE_APPROVE message ocfs2: o2hb: add NEGO_TIMEOUT message ocfs2: o2hb: add negotiate timer
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-27 17:56:59 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-27 17:56:59 -0400
commit: af7d93729c7c2beadea8ec5a6e66c53bef0e6290 (patch)
tree: d807ab034c13fe7e758c8ca11fb8ee38e9ceb38c
parent: 564884fbdecaea56fb65f2f32963059d3049b967 (diff)
parent: 11e685672a0861ce136cc4e7f6fdd11e5390b1fa (diff)
12 files changed, 246 insertions, 51 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f2d7337ebdb3..f466673f86ff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6421,8 +6421,9 @@ F:	Documentation/kbuild/kconfig-language.txt
 F:      scripts/kconfig/
 KDUMP
-M:      Vivek Goyal <vgoyal@redhat.com>
+M:      Dave Young <dyoung@redhat.com>
-M:      Haren Myneni <hbabu@us.ibm.com>
+M:      Baoquan He <bhe@redhat.com>
+R:      Vivek Goyal <vgoyal@redhat.com>
 L:      kexec@lists.infradead.org
 W:      http://lse.sourceforge.net/kdump/
 S:      Maintained
@@ -6568,7 +6569,7 @@ L:	kexec@lists.infradead.org
 S:      Maintained
 F:      include/linux/kexec.h
 F:      include/uapi/linux/kexec.h
-F:      kernel/kexec.c
+F:      kernel/kexec*
 KEYS/KEYRINGS:
 M:      David Howells <dhowells@redhat.com>
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3bf3f20f8ecc..f3b4408be590 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -628,11 +628,11 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
                map_bh->b_size = fs_count << i_blkbits;
                /*
-                 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
+                 * For writes that could fill holes inside i_size on a
-                 * forbid block creations: only overwrites are permitted.
+                 * DIO_SKIP_HOLES filesystem we forbid block creations: only
-                 * We will return early to the caller once we see an
+                 * overwrites are permitted. We will return early to the caller
-                 * unmapped buffer head returned, and the caller will fall
+                 * once we see an unmapped buffer head returned, and the caller
-                 * back to buffered I/O.
+                 * will fall back to buffered I/O.
                 *
                 * Otherwise the decision is left to the get_blocks method,
                 * which may decide to handle it or also return an unmapped
@@ -640,8 +640,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
                 */
                create = dio->rw & WRITE;
                if (dio->flags & DIO_SKIP_HOLES) {
-                        if (sdio->block_in_file < (i_size_read(dio->inode) >>
+                        if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
-                                                        sdio->blkbits))
+                                                        i_blkbits))
                                create = 0;
                }
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a8d15beee5cb..6aaf3e351391 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -272,10 +272,21 @@ struct o2hb_region {
        struct delayed_work     hr_write_timeout_work;
        unsigned long           hr_last_timeout_start;
+        /* negotiate timer, used to negotiate extending hb timeout. */
+        struct delayed_work     hr_nego_timeout_work;
+        unsigned long           hr_nego_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
        /* Used during o2hb_check_slot to hold a copy of the block
         * being checked because we temporarily have to zero out the
         * crc field. */
        struct o2hb_disk_heartbeat_block *hr_tmp_block;
+        /* Message key for negotiate timeout message. */
+        unsigned int            hr_key;
+        struct list_head        hr_handler_list;
+        /* last hb status, 0 for success, other value for error. */
+        int                     hr_last_hb_status;
 };
 struct o2hb_bio_wait_ctxt {
@@ -284,6 +295,17 @@ struct o2hb_bio_wait_ctxt {
        int               wc_error;
 };
+#define O2HB_NEGO_TIMEOUT_MS (O2HB_MAX_WRITE_TIMEOUT_MS/2)
+enum {
+        O2HB_NEGO_TIMEOUT_MSG = 1,
+        O2HB_NEGO_APPROVE_MSG = 2,
+};
+struct o2hb_nego_msg {
+        u8 node_num;
+};
 static void o2hb_write_timeout(struct work_struct *work)
 {
        int failed, quorum;
@@ -319,7 +341,7 @@ static void o2hb_write_timeout(struct work_struct *work)
        o2quo_disk_timeout();
 }
-static void o2hb_arm_write_timeout(struct o2hb_region *reg)
+static void o2hb_arm_timeout(struct o2hb_region *reg)
 {
        /* Arm writeout only after thread reaches steady state */
        if (atomic_read(&reg->hr_steady_iterations) != 0)
@@ -334,14 +356,132 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg)
                spin_unlock(&o2hb_live_lock);
        }
        cancel_delayed_work(&reg->hr_write_timeout_work);
-        reg->hr_last_timeout_start = jiffies;
        schedule_delayed_work(&reg->hr_write_timeout_work,
                              msecs_to_jiffies(O2HB_MAX_WRITE_TIMEOUT_MS));
+        cancel_delayed_work(&reg->hr_nego_timeout_work);
+        /* negotiate timeout must be less than write timeout. */
+        schedule_delayed_work(&reg->hr_nego_timeout_work,
+                              msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS));
+        memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap));
 }
-static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
+static void o2hb_disarm_timeout(struct o2hb_region *reg)
 {
        cancel_delayed_work_sync(&reg->hr_write_timeout_work);
+        cancel_delayed_work_sync(&reg->hr_nego_timeout_work);
+}
+static int o2hb_send_nego_msg(int key, int type, u8 target)
+{
+        struct o2hb_nego_msg msg;
+        int status, ret;
+        msg.node_num = o2nm_this_node();
+again:
+        ret = o2net_send_message(type, key, &msg, sizeof(msg),
+                        target, &status);
+        if (ret == -EAGAIN || ret == -ENOMEM) {
+                msleep(100);
+                goto again;
+        }
+        return ret;
+}
+static void o2hb_nego_timeout(struct work_struct *work)
+{
+        unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
+        int master_node, i, ret;
+        struct o2hb_region *reg;
+        reg = container_of(work, struct o2hb_region, hr_nego_timeout_work.work);
+        /* don't negotiate timeout if last hb failed since it is very
+         * possible io failed. Should let write timeout fence self.
+         */
+        if (reg->hr_last_hb_status)
+                return;
+        o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
+        /* lowest node as master node to make negotiate decision. */
+        master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0);
+        if (master_node == o2nm_this_node()) {
+                if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
+                        printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s).\n",
+                                o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000,
+                                config_item_name(&reg->hr_item), reg->hr_dev_name);
+                        set_bit(master_node, reg->hr_nego_node_bitmap);
+                }
+                if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap,
+                                sizeof(reg->hr_nego_node_bitmap))) {
+                        /* check negotiate bitmap every second to do timeout
+                         * approve decision.
+                         */
+                        schedule_delayed_work(&reg->hr_nego_timeout_work,
+                                msecs_to_jiffies(1000));
+                        return;
+                }
+                printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%s) is down.\n",
+                        config_item_name(&reg->hr_item), reg->hr_dev_name);
+                /* approve negotiate timeout request. */
+                o2hb_arm_timeout(reg);
+                i = -1;
+                while ((i = find_next_bit(live_node_bitmap,
+                                O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
+                        if (i == master_node)
+                                continue;
+                        mlog(ML_HEARTBEAT, "send NEGO_APPROVE msg to node %d\n", i);
+                        ret = o2hb_send_nego_msg(reg->hr_key,
+                                        O2HB_NEGO_APPROVE_MSG, i);
+                        if (ret)
+                                mlog(ML_ERROR, "send NEGO_APPROVE msg to node %d fail %d\n",
+                                        i, ret);
+                }
+        } else {
+                /* negotiate timeout with master node. */
+                printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s), negotiate timeout with node %d.\n",
+                        o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(&reg->hr_item),
+                        reg->hr_dev_name, master_node);
+                ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG,
+                                master_node);
+                if (ret)
+                        mlog(ML_ERROR, "send NEGO_TIMEOUT msg to node %d fail %d\n",
+                                master_node, ret);
+        }
+}
+static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
+                                void **ret_data)
+{
+        struct o2hb_region *reg = data;
+        struct o2hb_nego_msg *nego_msg;
+        nego_msg = (struct o2hb_nego_msg *)msg->buf;
+        printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%s).\n",
+                nego_msg->node_num, config_item_name(&reg->hr_item), reg->hr_dev_name);
+        if (nego_msg->node_num < O2NM_MAX_NODES)
+                set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap);
+        else
+                mlog(ML_ERROR, "got nego timeout message from bad node.\n");
+        return 0;
+}
+static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data,
+                                void **ret_data)
+{
+        struct o2hb_region *reg = data;
+        printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%s).\n",
+                config_item_name(&reg->hr_item), reg->hr_dev_name);
+        o2hb_arm_timeout(reg);
+        return 0;
 }
 static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
@@ -1032,7 +1172,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
        /* Skip disarming the timeout if own slot has stale/bad data */
        if (own_slot_ok) {
                o2hb_set_quorum_device(reg);
-                o2hb_arm_write_timeout(reg);
+                o2hb_arm_timeout(reg);
+                reg->hr_last_timeout_start = jiffies;
        }
 bail:
@@ -1096,6 +1237,7 @@ static int o2hb_thread(void *data)
                before_hb = ktime_get_real();
                ret = o2hb_do_disk_heartbeat(reg);
+                reg->hr_last_hb_status = ret;
                after_hb = ktime_get_real();
@@ -1114,7 +1256,7 @@ static int o2hb_thread(void *data)
                }
        }
-        o2hb_disarm_write_timeout(reg);
+        o2hb_disarm_timeout(reg);
        /* unclean stop is only used in very bad situation */
        for(i = 0; !reg->hr_unclean_stop && i < reg->hr_blocks; i++)
@@ -1451,6 +1593,7 @@ static void o2hb_region_release(struct config_item *item)
        list_del(&reg->hr_all_item);
        spin_unlock(&o2hb_live_lock);
+        o2net_unregister_handler_list(&reg->hr_handler_list);
        kfree(reg);
 }
@@ -1762,6 +1905,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
        }
        INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
+        INIT_DELAYED_WORK(&reg->hr_nego_timeout_work, o2hb_nego_timeout);
        /*
         * A node is considered live after it has beat LIVE_THRESHOLD
@@ -1995,13 +2139,37 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
        config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
+        /* this is the same way to generate msg key as dlm, for local heartbeat,
+         * name is also the same, so make initial crc value different to avoid
+         * message key conflict.
+         */
+        reg->hr_key = crc32_le(reg->hr_region_num + O2NM_MAX_REGIONS,
+                name, strlen(name));
+        INIT_LIST_HEAD(&reg->hr_handler_list);
+        ret = o2net_register_handler(O2HB_NEGO_TIMEOUT_MSG, reg->hr_key,
+                        sizeof(struct o2hb_nego_msg),
+                        o2hb_nego_timeout_handler,
+                        reg, NULL, &reg->hr_handler_list);
+        if (ret)
+                goto free;
+        ret = o2net_register_handler(O2HB_NEGO_APPROVE_MSG, reg->hr_key,
+                        sizeof(struct o2hb_nego_msg),
+                        o2hb_nego_approve_handler,
+                        reg, NULL, &reg->hr_handler_list);
+        if (ret)
+                goto unregister_handler;
        ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
        if (ret) {
                config_item_put(&reg->hr_item);
-                goto free;
+                goto unregister_handler;
        }
        return &reg->hr_item;
+unregister_handler:
+        o2net_unregister_handler_list(&reg->hr_handler_list);
 free:
        kfree(reg);
        return ERR_PTR(ret);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index b95e7df5b76a..94b18369b1cc 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,6 +44,9 @@
 * version here in tcp_internal.h should not need to be bumped for
 * filesystem locking changes.
 *
+ * New in version 12
+ *      - Negotiate hb timeout when storage is down.
+ *
 * New in version 11
 *      - Negotiation of filesystem locking in the dlm join.
 *
@@ -75,7 +78,7 @@
 *      - full 64 bit i_size in the metadata lock lvbs
 *      - introduction of "rw" lock and pushing meta/data locking down
 */
-#define O2NET_PROTOCOL_VERSION 11ULL
+#define O2NET_PROTOCOL_VERSION 12ULL
 struct o2net_handshake {
        __be64  protocol_version;
        __be64  connector_id;
diff --git a/init/main.c b/init/main.c
index bc0f9e0bcf22..4c17fda5c2ff 100644
--- a/init/main.c
+++ b/init/main.c
@@ -607,6 +607,7 @@ asmlinkage __visible void __init start_kernel(void)
                initrd_start = 0;
        }
 #endif
+        page_ext_init();
        debug_objects_mem_init();
        kmemleak_init();
        setup_per_cpu_pageset();
@@ -1003,8 +1004,6 @@ static noinline void __init kernel_init_freeable(void)
        sched_init_smp();
        page_alloc_init_late();
-        /* Initialize page ext after all struct pages are initializaed */
-        page_ext_init();
        do_basic_setup();
diff --git a/mm/Kconfig b/mm/Kconfig
index 22fa8189e4fc..3e2daef3c946 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -648,7 +648,7 @@ config DEFERRED_STRUCT_PAGE_INIT
        bool "Defer initialisation of struct pages to kthreads"
        default n
        depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
-        depends on MEMORY_HOTPLUG
+        depends on NO_BOOTMEM && MEMORY_HOTPLUG
        depends on !FLATMEM
        help
          Ordinarily all struct pages are initialised during early boot in a
diff --git a/mm/cma.c b/mm/cma.c
index ea506eb18cd6..bd0e1412475e 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -183,7 +183,8 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
                return -EINVAL;
        /* ensure minimal alignment required by mm core */
-        alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+        alignment = PAGE_SIZE <<
+                        max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
        /* alignment should be aligned with order_per_bit */
        if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
@@ -266,8 +267,8 @@ int __init cma_declare_contiguous(phys_addr_t base,
         * migratetype page by page allocator's buddy algorithm. In the case,
         * you couldn't get a contiguous memory, which is not what we want.
         */
-        alignment = max(alignment,
+        alignment = max(alignment,  (phys_addr_t)PAGE_SIZE <<
-                (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+                          max_t(unsigned long, MAX_ORDER - 1, pageblock_order));
        base = ALIGN(base, alignment);
        size = ALIGN(size, alignment);
        limit &= ~(alignment - 1);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f6477a9dbe7a..925b431f3f03 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1108,6 +1108,8 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
                limit = READ_ONCE(memcg->memsw.limit);
                if (count <= limit)
                        margin = min(margin, limit - count);
+                else
+                        margin = 0;
        }
        return margin;
@@ -4307,24 +4309,6 @@ static int mem_cgroup_do_precharge(unsigned long count)
        return 0;
 }
-/**
- * get_mctgt_type - get target type of moving charge
- * @vma: the vma the pte to be checked belongs
- * @addr: the address corresponding to the pte to be checked
- * @ptent: the pte to be checked
- * @target: the pointer the target page or swap ent will be stored(can be NULL)
- *
- * Returns
- *   0(MC_TARGET_NONE): if the pte is not a target for move charge.
- *   1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
- *     move charge. if @target is not NULL, the page is stored in target->page
- *     with extra refcnt got(Callers should handle it).
- *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
- *     target for charge migration. if @target is not NULL, the entry is stored
- *     in target->ent.
- *
- * Called with pte lock held.
- */
 union mc_target {
        struct page     *page;
        swp_entry_t     ent;
@@ -4513,6 +4497,25 @@ out:
        return ret;
 }
+/**
+ * get_mctgt_type - get target type of moving charge
+ * @vma: the vma the pte to be checked belongs
+ * @addr: the address corresponding to the pte to be checked
+ * @ptent: the pte to be checked
+ * @target: the pointer the target page or swap ent will be stored(can be NULL)
+ *
+ * Returns
+ *   0(MC_TARGET_NONE): if the pte is not a target for move charge.
+ *   1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
+ *     move charge. if @target is not NULL, the page is stored in target->page
+ *     with extra refcnt got(Callers should handle it).
+ *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
+ *     target for charge migration. if @target is not NULL, the entry is stored
+ *     in target->ent.
+ *
+ * Called with pte lock held.
+ */
 static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
                unsigned long addr, pte_t ptent, union mc_target *target)
 {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index caf2a14c37ad..b8ee0806415f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -300,7 +300,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
                 * multiple nodes we check that this pfn does not already
                 * reside in some other nodes.
                 */
-                if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node))
+                if (pfn_valid(pfn) && (early_pfn_to_nid(pfn) == node))
                        register_page_bootmem_info_section(pfn);
        }
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 5bb2f7698ad7..dfb1ab61fb23 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -443,13 +443,29 @@ static bool __oom_reap_task(struct task_struct *tsk)
 {
        struct mmu_gather tlb;
        struct vm_area_struct *vma;
-        struct mm_struct *mm;
+        struct mm_struct *mm = NULL;
        struct task_struct *p;
        struct zap_details details = {.check_swap_entries = true,
                                      .ignore_dirty = true};
        bool ret = true;
        /*
+         * We have to make sure to not race with the victim exit path
+         * and cause premature new oom victim selection:
+         * __oom_reap_task              exit_mm
+         *   atomic_inc_not_zero
+         *                                mmput
+         *                                  atomic_dec_and_test
+         *                                exit_oom_victim
+         *                              [...]
+         *                              out_of_memory
+         *                                select_bad_process
+         *                                  # no TIF_MEMDIE task selects new victim
+         *  unmap_page_range # frees some memory
+         */
+        mutex_lock(&oom_lock);
+        /*
         * Make sure we find the associated mm_struct even when the particular
         * thread has already terminated and cleared its mm.
         * We might have race with exit path so consider our work done if there
@@ -457,19 +473,19 @@ static bool __oom_reap_task(struct task_struct *tsk)
         */
        p = find_lock_task_mm(tsk);
        if (!p)
-                return true;
+                goto unlock_oom;
        mm = p->mm;
        if (!atomic_inc_not_zero(&mm->mm_users)) {
                task_unlock(p);
-                return true;
+                goto unlock_oom;
        }
        task_unlock(p);
        if (!down_read_trylock(&mm->mmap_sem)) {
                ret = false;
-                goto out;
+                goto unlock_oom;
        }
        tlb_gather_mmu(&tlb, mm, 0, -1);
@@ -511,13 +527,15 @@ static bool __oom_reap_task(struct task_struct *tsk)
         * to release its memory.
         */
        set_bit(MMF_OOM_REAPED, &mm->flags);
-out:
+unlock_oom:
+        mutex_unlock(&oom_lock);
        /*
         * Drop our reference but make sure the mmput slow path is called from a
         * different context because we shouldn't risk we get stuck there and
         * put the oom_reaper out of the way.
         */
-        mmput_async(mm);
+        if (mm)
+                mmput_async(mm);
        return ret;
 }
@@ -611,8 +629,6 @@ void try_oom_reaper(struct task_struct *tsk)
                        if (!process_shares_mm(p, mm))
                                continue;
-                        if (same_thread_group(p, tsk))
-                                continue;
                        if (fatal_signal_pending(p))
                                continue;
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 2d864e64f7fe..44a4c029c8e7 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -390,8 +390,10 @@ void __init page_ext_init(void)
                         * We know some arch can have a nodes layout such as
                         * -------------pfn-------------->
                         * N0 | N1 | N2 | N0 | N1 | N2|....
+                         *
+                         * Take into account DEFERRED_STRUCT_PAGE_INIT.
                         */
-                        if (pfn_to_nid(pfn) != nid)
+                        if (early_pfn_to_nid(pfn) != nid)
                                continue;
                        if (init_section_page_ext(pfn, nid))
                                goto oom;
diff --git a/mm/rmap.c b/mm/rmap.c
index 8a839935b18c..0ea5d9071b32 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1098,6 +1098,8 @@ void page_move_anon_rmap(struct page *page,
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_VMA(!anon_vma, vma);
+        if (IS_ENABLED(CONFIG_DEBUG_VM) && PageTransHuge(page))
+                address &= HPAGE_PMD_MASK;
        VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
        anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-27 17:56:59 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-27 17:56:59 -0400
commit	af7d93729c7c2beadea8ec5a6e66c53bef0e6290 (patch)
tree	d807ab034c13fe7e758c8ca11fb8ee38e9ceb38c
parent	564884fbdecaea56fb65f2f32963059d3049b967 (diff)
parent	11e685672a0861ce136cc4e7f6fdd11e5390b1fa (diff)