10 files changed, 91 insertions, 108 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 1b46e6e74881..6afae32571ca 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -516,7 +516,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
        }
        if (ret & VM_FAULT_RETRY) {
-                if (nonblocking)
+                if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
                        *nonblocking = 0;
                return -EBUSY;
        }
@@ -890,7 +890,10 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
                                break;
                }
                if (*locked) {
-                        /* VM_FAULT_RETRY didn't trigger */
+                        /*
+                         * VM_FAULT_RETRY didn't trigger or it was a
+                         * FOLL_NOWAIT.
+                         */
                        if (!pages_done)
                                pages_done = ret;
                        break;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c204e3d132b..a963f2034dfc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1583,7 +1583,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                page = NULL;
        } else {
                h->surplus_huge_pages++;
-                h->nr_huge_pages_node[page_to_nid(page)]++;
+                h->surplus_huge_pages_node[page_to_nid(page)]++;
        }
 out_unlock:
diff --git a/mm/memblock.c b/mm/memblock.c
index 5a9ca2a1751b..b6ba6b7adadc 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1107,7 +1107,7 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
        struct memblock_type *type = &memblock.memory;
        unsigned int right = type->cnt;
        unsigned int mid, left = 0;
-        phys_addr_t addr = PFN_PHYS(pfn + 1);
+        phys_addr_t addr = PFN_PHYS(++pfn);
        do {
                mid = (right + left) / 2;
@@ -1118,15 +1118,15 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
                                  type->regions[mid].size))
                        left = mid + 1;
                else {
-                        /* addr is within the region, so pfn + 1 is valid */
+                        /* addr is within the region, so pfn is valid */
-                        return min(pfn + 1, max_pfn);
+                        return pfn;
                }
        } while (left < right);
        if (right == type->cnt)
-                return max_pfn;
+                return -1UL;
        else
-                return min(PHYS_PFN(type->regions[right].base), max_pfn);
+                return PHYS_PFN(type->regions[right].base);
 }
 /**
diff --git a/mm/mlock.c b/mm/mlock.c
index 79398200e423..74e5a6547c3d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -64,6 +64,12 @@ void clear_page_mlock(struct page *page)
        mod_zone_page_state(page_zone(page), NR_MLOCK,
                            -hpage_nr_pages(page));
        count_vm_event(UNEVICTABLE_PGCLEARED);
+        /*
+         * The previous TestClearPageMlocked() corresponds to the smp_mb()
+         * in __pagevec_lru_add_fn().
+         *
+         * See __pagevec_lru_add_fn for more explanation.
+         */
        if (!isolate_lru_page(page)) {
                putback_lru_page(page);
        } else {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 81e18ceef579..3d974cb2a1a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -46,6 +46,7 @@
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
+#include <xen/xen.h>
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
@@ -347,6 +348,9 @@ static inline bool update_defer_init(pg_data_t *pgdat,
        /* Always populate low zones for address-constrained allocations */
        if (zone_end < pgdat_end_pfn(pgdat))
                return true;
+        /* Xen PV domains need page structures early */
+        if (xen_pv_domain())
+                return true;
        (*nr_initialised)++;
        if ((*nr_initialised > pgdat->static_init_pgcnt) &&
            (pfn & (PAGES_PER_SECTION - 1)) == 0) {
@@ -5355,9 +5359,14 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                        /*
                         * Skip to the pfn preceding the next valid one (or
                         * end_pfn), such that we hit a valid pfn (or end_pfn)
-                         * on our next iteration of the loop.
+                         * on our next iteration of the loop. Note that it needs
+                         * to be pageblock aligned even when the region itself
+                         * is not. move_freepages_block() can shift ahead of
+                         * the valid region but still depends on correct page
+                         * metadata.
                         */
-                        pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
+                        pfn = (memblock_next_valid_pfn(pfn, end_pfn) &
+                                        ~(pageblock_nr_pages-1)) - 1;
 #endif
                        continue;
                }
diff --git a/mm/swap.c b/mm/swap.c
index 567a7b96e41d..0f17330dd0e5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -446,30 +446,6 @@ void lru_cache_add(struct page *page)
 }
 /**
- * add_page_to_unevictable_list - add a page to the unevictable list
- * @page:  the page to be added to the unevictable list
- *
- * Add page directly to its zone's unevictable list.  To avoid races with
- * tasks that might be making the page evictable, through eg. munlock,
- * munmap or exit, while it's not on the lru, we want to add the page
- * while it's locked or otherwise "invisible" to other tasks.  This is
- * difficult to do when using the pagevec cache, so bypass that.
- */
-void add_page_to_unevictable_list(struct page *page)
-{
-        struct pglist_data *pgdat = page_pgdat(page);
-        struct lruvec *lruvec;
-        spin_lock_irq(&pgdat->lru_lock);
-        lruvec = mem_cgroup_page_lruvec(page, pgdat);
-        ClearPageActive(page);
-        SetPageUnevictable(page);
-        SetPageLRU(page);
-        add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
-        spin_unlock_irq(&pgdat->lru_lock);
-}
-/**
 * lru_cache_add_active_or_unevictable
 * @page:  the page to be added to LRU
 * @vma:   vma in which page is mapped for determining reclaimability
@@ -484,13 +460,9 @@ void lru_cache_add_active_or_unevictable(struct page *page,
 {
        VM_BUG_ON_PAGE(PageLRU(page), page);
-        if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+        if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
                SetPageActive(page);
-                lru_cache_add(page);
+        else if (!TestSetPageMlocked(page)) {
-                return;
-        }
-        if (!TestSetPageMlocked(page)) {
                /*
                 * We use the irq-unsafe __mod_zone_page_stat because this
                 * counter is not modified from interrupt context, and the pte
@@ -500,7 +472,7 @@ void lru_cache_add_active_or_unevictable(struct page *page,
                                    hpage_nr_pages(page));
                count_vm_event(UNEVICTABLE_PGMLOCKED);
        }
-        add_page_to_unevictable_list(page);
+        lru_cache_add(page);
 }
 /*
@@ -886,15 +858,55 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
                                 void *arg)
 {
-        int file = page_is_file_cache(page);
+        enum lru_list lru;
-        int active = PageActive(page);
+        int was_unevictable = TestClearPageUnevictable(page);
-        enum lru_list lru = page_lru(page);
        VM_BUG_ON_PAGE(PageLRU(page), page);
        SetPageLRU(page);
+        /*
+         * Page becomes evictable in two ways:
+         * 1) Within LRU lock [munlock_vma_pages() and __munlock_pagevec()].
+         * 2) Before acquiring LRU lock to put the page to correct LRU and then
+         *   a) do PageLRU check with lock [check_move_unevictable_pages]
+         *   b) do PageLRU check before lock [clear_page_mlock]
+         *
+         * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need
+         * following strict ordering:
+         *
+         * #0: __pagevec_lru_add_fn             #1: clear_page_mlock
+         *
+         * SetPageLRU()                         TestClearPageMlocked()
+         * smp_mb() // explicit ordering        // above provides strict
+         *                                      // ordering
+         * PageMlocked()                        PageLRU()
+         *
+         *
+         * if '#1' does not observe setting of PG_lru by '#0' and fails
+         * isolation, the explicit barrier will make sure that page_evictable
+         * check will put the page in correct LRU. Without smp_mb(), SetPageLRU
+         * can be reordered after PageMlocked check and can make '#1' to fail
+         * the isolation of the page whose Mlocked bit is cleared (#0 is also
+         * looking at the same page) and the evictable page will be stranded
+         * in an unevictable LRU.
+         */
+        smp_mb();
+        if (page_evictable(page)) {
+                lru = page_lru(page);
+                update_page_reclaim_stat(lruvec, page_is_file_cache(page),
+                                         PageActive(page));
+                if (was_unevictable)
+                        count_vm_event(UNEVICTABLE_PGRESCUED);
+        } else {
+                lru = LRU_UNEVICTABLE;
+                ClearPageActive(page);
+                SetPageUnevictable(page);
+                if (!was_unevictable)
+                        count_vm_event(UNEVICTABLE_PGCULLED);
+        }
        add_page_to_lru_list(page, lruvec, lru);
-        update_page_reclaim_stat(lruvec, file, active);
        trace_mm_lru_insertion(page, lru);
 }
@@ -913,7 +925,7 @@ EXPORT_SYMBOL(__pagevec_lru_add);
 * @pvec:       Where the resulting entries are placed
 * @mapping:    The address_space to search
 * @start:      The starting entry index
- * @nr_pages:   The maximum number of pages
+ * @nr_entries: The maximum number of pages
 * @indices:    The cache indices corresponding to the entries in @pvec
 *
 * pagevec_lookup_entries() will search for and return a group of up
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 673942094328..ebff729cc956 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1943,11 +1943,15 @@ void *vmalloc_exec(unsigned long size)
 }
 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
-#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
 #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
-#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
 #else
-#define GFP_VMALLOC32 GFP_KERNEL
+/*
+ * 64b systems should always have either DMA or DMA32 zones. For others
+ * GFP_DMA32 should do the right thing and use the normal zone.
+ */
+#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
 #endif
 /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 444749669187..bee53495a829 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -769,64 +769,7 @@ int remove_mapping(struct address_space *mapping, struct page *page)
 */
 void putback_lru_page(struct page *page)
 {
-        bool is_unevictable;
+        lru_cache_add(page);
-        int was_unevictable = PageUnevictable(page);
-        VM_BUG_ON_PAGE(PageLRU(page), page);
-redo:
-        ClearPageUnevictable(page);
-        if (page_evictable(page)) {
-                /*
-                 * For evictable pages, we can use the cache.
-                 * In event of a race, worst case is we end up with an
-                 * unevictable page on [in]active list.
-                 * We know how to handle that.
-                 */
-                is_unevictable = false;
-                lru_cache_add(page);
-        } else {
-                /*
-                 * Put unevictable pages directly on zone's unevictable
-                 * list.
-                 */
-                is_unevictable = true;
-                add_page_to_unevictable_list(page);
-                /*
-                 * When racing with an mlock or AS_UNEVICTABLE clearing
-                 * (page is unlocked) make sure that if the other thread
-                 * does not observe our setting of PG_lru and fails
-                 * isolation/check_move_unevictable_pages,
-                 * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
-                 * the page back to the evictable list.
-                 *
-                 * The other side is TestClearPageMlocked() or shmem_lock().
-                 */
-                smp_mb();
-        }
-        /*
-         * page's status can change while we move it among lru. If an evictable
-         * page is on unevictable list, it never be freed. To avoid that,
-         * check after we added it to the list, again.
-         */
-        if (is_unevictable && page_evictable(page)) {
-                if (!isolate_lru_page(page)) {
-                        put_page(page);
-                        goto redo;
-                }
-                /* This means someone else dropped this page from LRU
-                 * So, it will be freed or putback to LRU again. There is
-                 * nothing to do here.
-                 */
-        }
-        if (was_unevictable && !is_unevictable)
-                count_vm_event(UNEVICTABLE_PGRESCUED);
-        else if (!was_unevictable && is_unevictable)
-                count_vm_event(UNEVICTABLE_PGCULLED);
        put_page(page);         /* drop ref from isolate */
 }
diff --git a/mm/zpool.c b/mm/zpool.c
index f8cb83e7699b..01a771e304fa 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -360,7 +360,7 @@ u64 zpool_get_total_size(struct zpool *zpool)
 /**
 * zpool_evictable() - Test if zpool is potentially evictable
- * @pool        The zpool to test
+ * @zpool:      The zpool to test
 *
 * Zpool is only potentially evictable when it's created with struct
 * zpool_ops.evict and its driver implements struct zpool_driver.shrink.
diff --git a/mm/zswap.c b/mm/zswap.c
index c004aa4fd3f4..61a5c41972db 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1007,6 +1007,12 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        u8 *src, *dst;
        struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
+        /* THP isn't supported */
+        if (PageTransHuge(page)) {
+                ret = -EINVAL;
+                goto reject;
+        }
        if (!zswap_enabled || !tree) {
                ret = -ENODEV;
                goto reject;