Merge branch 'modules-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux into for-next

The IPMI SI driver was split into different pieces, merge the module tree to accountfor that. Signed-off-by: Corey Minyard <cminyard@mvista.com>
author: Corey Minyard <cminyard@mvista.com> 2017-11-02 12:19:15 -0400
committer: Corey Minyard <cminyard@mvista.com> 2017-11-02 12:19:15 -0400
commit: 6297fabd93f93182245383ba7de56bef829a796b (patch)
tree: 804f5d28ada61b402d56281c9a047308d26347f4 /mm
parent: d7e17fe4f7a7d961cc4375c7d868bd353a039bc7 (diff)
parent: ece1996a21eeb344b49200e627c6660111009c10 (diff)
22 files changed, 144 insertions, 106 deletions
diff --git a/mm/cma.c b/mm/cma.c
index c0da318c020e..022e52bd8370 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -460,7 +460,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
        trace_cma_alloc(pfn, page, count, align);
-        if (ret) {
+        if (ret && !(gfp_mask & __GFP_NOWARN)) {
                pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n",
                        __func__, count, ret);
                cma_debug_show_areas(cma);
diff --git a/mm/compaction.c b/mm/compaction.c
index fb548e4c7bd4..03d31a875341 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1999,17 +1999,14 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
        if (pgdat->kcompactd_max_order < order)
                pgdat->kcompactd_max_order = order;
-        /*
-         * Pairs with implicit barrier in wait_event_freezable()
-         * such that wakeups are not missed in the lockless
-         * waitqueue_active() call.
-         */
-        smp_acquire__after_ctrl_dep();
        if (pgdat->kcompactd_classzone_idx > classzone_idx)
                pgdat->kcompactd_classzone_idx = classzone_idx;
-        if (!waitqueue_active(&pgdat->kcompactd_wait))
+        /*
+         * Pairs with implicit barrier in wait_event_freezable()
+         * such that wakeups are not missed.
+         */
+        if (!wq_has_sleeper(&pgdat->kcompactd_wait))
                return;
        if (!kcompactd_node_suitable(pgdat))
diff --git a/mm/filemap.c b/mm/filemap.c
index db250d0e0565..594d73fef8b4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -620,6 +620,14 @@ int file_check_and_advance_wb_err(struct file *file)
                trace_file_check_and_advance_wb_err(file, old);
                spin_unlock(&file->f_lock);
        }
+        /*
+         * We're mostly using this function as a drop in replacement for
+         * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect
+         * that the legacy code would have had on these flags.
+         */
+        clear_bit(AS_EIO, &mapping->flags);
+        clear_bit(AS_ENOSPC, &mapping->flags);
        return err;
 }
 EXPORT_SYMBOL(file_check_and_advance_wb_err);
diff --git a/mm/ksm.c b/mm/ksm.c
index 15dd7415f7b3..6cb60f46cce5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1990,6 +1990,7 @@ static void stable_tree_append(struct rmap_item *rmap_item,
 */
 static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 {
+        struct mm_struct *mm = rmap_item->mm;
        struct rmap_item *tree_rmap_item;
        struct page *tree_page = NULL;
        struct stable_node *stable_node;
@@ -2062,9 +2063,11 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
        if (ksm_use_zero_pages && (checksum == zero_checksum)) {
                struct vm_area_struct *vma;
-                vma = find_mergeable_vma(rmap_item->mm, rmap_item->address);
+                down_read(&mm->mmap_sem);
+                vma = find_mergeable_vma(mm, rmap_item->address);
                err = try_to_merge_one_page(vma, page,
                                            ZERO_PAGE(rmap_item->address));
+                up_read(&mm->mmap_sem);
                /*
                 * In case of failure, the page was not really empty, so we
                 * need to continue. Otherwise we're done.
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 7a40fa2be858..f141f0c80ff3 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -325,12 +325,12 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
 {
        int size = memcg_nr_cache_ids;
-        nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL);
+        nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL);
        if (!nlru->memcg_lrus)
                return -ENOMEM;
        if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
-                kfree(nlru->memcg_lrus);
+                kvfree(nlru->memcg_lrus);
                return -ENOMEM;
        }
@@ -340,7 +340,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
 {
        __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
-        kfree(nlru->memcg_lrus);
+        kvfree(nlru->memcg_lrus);
 }
 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
@@ -351,12 +351,12 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
        BUG_ON(old_size > new_size);
        old = nlru->memcg_lrus;
-        new = kmalloc(new_size * sizeof(void *), GFP_KERNEL);
+        new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL);
        if (!new)
                return -ENOMEM;
        if (__memcg_init_list_lru_node(new, old_size, new_size)) {
-                kfree(new);
+                kvfree(new);
                return -ENOMEM;
        }
@@ -373,7 +373,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
        nlru->memcg_lrus = new;
        spin_unlock_irq(&nlru->lock);
-        kfree(old);
+        kvfree(old);
        return 0;
 }
diff --git a/mm/madvise.c b/mm/madvise.c
index 21261ff0466f..fd70d6aabc3e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -625,18 +625,26 @@ static int madvise_inject_error(int behavior,
 {
        struct page *page;
        struct zone *zone;
+        unsigned int order;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        for (; start < end; start += PAGE_SIZE <<
-                                compound_order(compound_head(page))) {
+        for (; start < end; start += PAGE_SIZE << order) {
                int ret;
                ret = get_user_pages_fast(start, 1, 0, &page);
                if (ret != 1)
                        return ret;
+                /*
+                 * When soft offlining hugepages, after migrating the page
+                 * we dissolve it, therefore in the second loop "page" will
+                 * no longer be a compound page, and order will be 0.
+                 */
+                order = compound_order(compound_head(page));
                if (PageHWPoison(page)) {
                        put_page(page);
                        continue;
@@ -749,6 +757,9 @@ madvise_behavior_valid(int behavior)
 *  MADV_DONTFORK - omit this area from child's address space when forking:
 *              typically, to avoid COWing pages pinned by get_user_pages().
 *  MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
+ *  MADV_WIPEONFORK - present the child process with zero-filled memory in this
+ *              range after a fork.
+ *  MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK
 *  MADV_HWPOISON - trigger memory error handler as if the given memory range
 *              were corrupted by unrecoverable hardware memory failure.
 *  MADV_SOFT_OFFLINE - try to soft-offline the given range of memory.
@@ -769,7 +780,9 @@ madvise_behavior_valid(int behavior)
 *  zero    - success
 *  -EINVAL - start + len < 0, start is not page-aligned,
 *              "behavior" is not a valid value, or application
- *              is attempting to release locked or shared pages.
+ *              is attempting to release locked or shared pages,
+ *              or the specified address range includes file, Huge TLB,
+ *              MAP_SHARED or VMPFNMAP range.
 *  -ENOMEM - addresses in the specified range are not currently
 *              mapped, or are outside the AS of the process.
 *  -EIO    - an I/O error occurred while paging in data.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 15af3da5af02..d5f3a62887cf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1777,6 +1777,10 @@ static void drain_local_stock(struct work_struct *dummy)
        struct memcg_stock_pcp *stock;
        unsigned long flags;
+        /*
+         * The only protection from memory hotplug vs. drain_stock races is
+         * that we always operate on local CPU stock here with IRQ disabled
+         */
        local_irq_save(flags);
        stock = this_cpu_ptr(&memcg_stock);
@@ -1821,27 +1825,33 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
        /* If someone's already draining, avoid adding running more workers. */
        if (!mutex_trylock(&percpu_charge_mutex))
                return;
-        /* Notify other cpus that system-wide "drain" is running */
+        /*
-        get_online_cpus();
+         * Notify other cpus that system-wide "drain" is running
+         * We do not care about races with the cpu hotplug because cpu down
+         * as well as workers from this path always operate on the local
+         * per-cpu data. CPU up doesn't touch memcg_stock at all.
+         */
        curcpu = get_cpu();
        for_each_online_cpu(cpu) {
                struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
                struct mem_cgroup *memcg;
                memcg = stock->cached;
-                if (!memcg || !stock->nr_pages)
+                if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))
                        continue;
-                if (!mem_cgroup_is_descendant(memcg, root_memcg))
+                if (!mem_cgroup_is_descendant(memcg, root_memcg)) {
+                        css_put(&memcg->css);
                        continue;
+                }
                if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
                        if (cpu == curcpu)
                                drain_local_stock(&stock->work);
                        else
                                schedule_work_on(cpu, &stock->work);
                }
+                css_put(&memcg->css);
        }
        put_cpu();
-        put_online_cpus();
        mutex_unlock(&percpu_charge_mutex);
 }
@@ -5648,7 +5658,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)
 static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 {
        VM_BUG_ON_PAGE(PageLRU(page), page);
-        VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
+        VM_BUG_ON_PAGE(page_count(page) && !is_zone_device_page(page) &&
+                        !PageHWPoison(page) , page);
        if (!page->mem_cgroup)
                return;
diff --git a/mm/memory.c b/mm/memory.c
index ec4e15494901..a728bed16c20 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -845,7 +845,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                 * vm_normal_page() so that we do not have to special case all
                 * call site of vm_normal_page().
                 */
-                if (likely(pfn < highest_memmap_pfn)) {
+                if (likely(pfn <= highest_memmap_pfn)) {
                        struct page *page = pfn_to_page(pfn);
                        if (is_device_public_page(page)) {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e882cb6da994..d4b5f29906b9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -328,6 +328,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
                if (err && (err != -EEXIST))
                        break;
                err = 0;
+                cond_resched();
        }
        vmemmap_populate_print_last();
 out:
@@ -337,7 +338,7 @@ EXPORT_SYMBOL_GPL(__add_pages);
 #ifdef CONFIG_MEMORY_HOTREMOVE
 /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
-static int find_smallest_section_pfn(int nid, struct zone *zone,
+static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
                                     unsigned long start_pfn,
                                     unsigned long end_pfn)
 {
@@ -362,7 +363,7 @@ static int find_smallest_section_pfn(int nid, struct zone *zone,
 }
 /* find the biggest valid pfn in the range [start_pfn, end_pfn). */
-static int find_biggest_section_pfn(int nid, struct zone *zone,
+static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
                                    unsigned long start_pfn,
                                    unsigned long end_pfn)
 {
@@ -550,7 +551,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
                return ret;
        scn_nr = __section_nr(ms);
-        start_pfn = section_nr_to_pfn(scn_nr);
+        start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
        __remove_zone(zone, start_pfn);
        sparse_remove_one_section(zone, ms, map_offset);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 006ba625c0b8..a2af6d58a68f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1920,8 +1920,11 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
        struct page *page;
        page = __alloc_pages(gfp, order, nid);
-        if (page && page_to_nid(page) == nid)
+        if (page && page_to_nid(page) == nid) {
-                inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
+                preempt_disable();
+                __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
+                preempt_enable();
+        }
        return page;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 6954c1435833..e00814ca390e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2146,8 +2146,9 @@ static int migrate_vma_collect_hole(unsigned long start,
        unsigned long addr;
        for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
-                migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE;
+                migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
                migrate->dst[migrate->npages] = 0;
+                migrate->npages++;
                migrate->cpages++;
        }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 99736e026712..dee0f75c3013 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -40,6 +40,7 @@
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
 #include <linux/init.h>
+#include <linux/mmu_notifier.h>
 #include <asm/tlb.h>
 #include "internal.h"
@@ -495,6 +496,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
        }
        /*
+         * If the mm has notifiers then we would need to invalidate them around
+         * unmap_page_range and that is risky because notifiers can sleep and
+         * what they do is basically undeterministic.  So let's have a short
+         * sleep to give the oom victim some more time.
+         * TODO: we really want to get rid of this ugly hack and make sure that
+         * notifiers cannot block for unbounded amount of time and add
+         * mmu_notifier_invalidate_range_{start,end} around unmap_page_range
+         */
+        if (mm_has_notifiers(mm)) {
+                up_read(&mm->mmap_sem);
+                schedule_timeout_idle(HZ);
+                goto unlock_oom;
+        }
+        /*
         * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
         * work on the mm anymore. The check for MMF_OOM_SKIP must run
         * under mmap_sem for reading because it serializes against the
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c841af88836a..77e4d3c5c57b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1190,7 +1190,7 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
 }
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-static void init_reserved_page(unsigned long pfn)
+static void __meminit init_reserved_page(unsigned long pfn)
 {
        pg_data_t *pgdat;
        int nid, zid;
@@ -5367,6 +5367,7 @@ not_early:
                        __init_single_page(page, pfn, zone, nid);
                        set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+                        cond_resched();
                } else {
                        __init_single_pfn(pfn, zone, nid);
                }
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 6a03946469a9..53afbb919a1c 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -6,17 +6,6 @@
 #include "internal.h"
-static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
-{
-        pmd_t pmde;
-        /*
-         * Make sure we don't re-load pmd between present and !trans_huge check.
-         * We need a consistent view.
-         */
-        pmde = READ_ONCE(*pvmw->pmd);
-        return pmd_present(pmde) && !pmd_trans_huge(pmde);
-}
 static inline bool not_found(struct page_vma_mapped_walk *pvmw)
 {
        page_vma_mapped_walk_done(pvmw);
@@ -116,6 +105,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
+        pmd_t pmde;
        /* The only possible pmd mapping has been handled on last iteration */
        if (pvmw->pmd && !pvmw->pte)
@@ -148,7 +138,13 @@ restart:
        if (!pud_present(*pud))
                return false;
        pvmw->pmd = pmd_offset(pud, pvmw->address);
-        if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) {
+        /*
+         * Make sure the pmd value isn't cached in a register by the
+         * compiler and used as a stale value after we've observed a
+         * subsequent update.
+         */
+        pmde = READ_ONCE(*pvmw->pmd);
+        if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
                pvmw->ptl = pmd_lock(mm, pvmw->pmd);
                if (likely(pmd_trans_huge(*pvmw->pmd))) {
                        if (pvmw->flags & PVMW_MIGRATION)
@@ -167,17 +163,15 @@ restart:
                                                return not_found(pvmw);
                                        return true;
                                }
-                        } else
+                        }
-                                WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
                        return not_found(pvmw);
                } else {
                        /* THP pmd was split under us: handle on pte level */
                        spin_unlock(pvmw->ptl);
                        pvmw->ptl = NULL;
                }
-        } else {
+        } else if (!pmd_present(pmde)) {
-                if (!check_pmd(pvmw))
+                return false;
-                        return false;
        }
        if (!map_pte(pvmw))
                goto next_pte;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 6142484e88f7..7a58460bfd27 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -73,7 +73,7 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
                     last_alloc + 1 : 0;
        as_len = 0;
-        start = chunk->start_offset;
+        start = chunk->start_offset / PCPU_MIN_ALLOC_SIZE;
        /*
         * If a bit is set in the allocation map, the bound_map identifies
diff --git a/mm/percpu.c b/mm/percpu.c
index 59d44d61f5f1..aa121cef76de 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -353,6 +353,8 @@ static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,
                                        block->contig_hint_start);
                        return;
                }
+                /* reset to satisfy the second predicate above */
+                block_off = 0;
                *bits = block->right_free;
                *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free;
@@ -407,6 +409,8 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
                        *bit_off = pcpu_block_off_to_off(i, block->first_free);
                        return;
                }
+                /* reset to satisfy the second predicate above */
+                block_off = 0;
                *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free,
                                 align);
diff --git a/mm/rodata_test.c b/mm/rodata_test.c
index 6bb4deb12e78..d908c8769b48 100644
--- a/mm/rodata_test.c
+++ b/mm/rodata_test.c
@@ -14,7 +14,7 @@
 #include <linux/uaccess.h>
 #include <asm/sections.h>
-const int rodata_test_data = 0xC3;
+static const int rodata_test_data = 0xC3;
 void rodata_test(void)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 904a83be82de..80164599ca5d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -165,9 +165,9 @@ static int init_memcg_params(struct kmem_cache *s,
        if (!memcg_nr_cache_ids)
                return 0;
-        arr = kzalloc(sizeof(struct memcg_cache_array) +
+        arr = kvzalloc(sizeof(struct memcg_cache_array) +
-                      memcg_nr_cache_ids * sizeof(void *),
+                       memcg_nr_cache_ids * sizeof(void *),
-                      GFP_KERNEL);
+                       GFP_KERNEL);
        if (!arr)
                return -ENOMEM;
@@ -178,15 +178,23 @@ static int init_memcg_params(struct kmem_cache *s,
 static void destroy_memcg_params(struct kmem_cache *s)
 {
        if (is_root_cache(s))
-                kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
+                kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
+}
+static void free_memcg_params(struct rcu_head *rcu)
+{
+        struct memcg_cache_array *old;
+        old = container_of(rcu, struct memcg_cache_array, rcu);
+        kvfree(old);
 }
 static int update_memcg_params(struct kmem_cache *s, int new_array_size)
 {
        struct memcg_cache_array *old, *new;
-        new = kzalloc(sizeof(struct memcg_cache_array) +
+        new = kvzalloc(sizeof(struct memcg_cache_array) +
-                      new_array_size * sizeof(void *), GFP_KERNEL);
+                       new_array_size * sizeof(void *), GFP_KERNEL);
        if (!new)
                return -ENOMEM;
@@ -198,7 +206,7 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size)
        rcu_assign_pointer(s->memcg_params.memcg_caches, new);
        if (old)
-                kfree_rcu(old, rcu);
+                call_rcu(&old->rcu, free_memcg_params);
        return 0;
 }
diff --git a/mm/swap.c b/mm/swap.c
index 9295ae960d66..a77d68f2c1b6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -575,7 +575,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
                            void *arg)
 {
        if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
-            !PageUnevictable(page)) {
+            !PageSwapCache(page) && !PageUnevictable(page)) {
                bool active = PageActive(page);
                del_page_from_lru_list(page, lruvec,
@@ -665,7 +665,7 @@ void deactivate_file_page(struct page *page)
 void mark_page_lazyfree(struct page *page)
 {
        if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
-            !PageUnevictable(page)) {
+            !PageSwapCache(page) && !PageUnevictable(page)) {
                struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
                get_page(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 71ce2d1ccbf7..05b6803f0cce 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
 static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
 bool swap_vma_readahead = true;
-#define SWAP_RA_MAX_ORDER_DEFAULT       3
-static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
 #define SWAP_RA_WIN_SHIFT       (PAGE_SHIFT / 2)
 #define SWAP_RA_HITS_MASK       ((1UL << SWAP_RA_WIN_SHIFT) - 1)
 #define SWAP_RA_HITS_MAX        SWAP_RA_HITS_MASK
@@ -242,6 +238,17 @@ int add_to_swap(struct page *page)
                 * clear SWAP_HAS_CACHE flag.
                 */
                goto fail;
+        /*
+         * Normally the page will be dirtied in unmap because its pte should be
+         * dirty. A special case is MADV_FREE page. The page'e pte could have
+         * dirty bit cleared but the page's SwapBacked bit is still set because
+         * clearing the dirty bit and SwapBacked bit has no lock protected. For
+         * such page, unmap will not set dirty bit for it, so page reclaim will
+         * not write the page out. This can cause data corruption when the page
+         * is swap in later. Always setting the dirty bit for the page solves
+         * the problem.
+         */
+        set_page_dirty(page);
        return 1;
@@ -653,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
        pte_t *tpte;
 #endif
+        max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
+                             SWAP_RA_ORDER_CEILING);
+        if (max_win == 1) {
+                swap_ra->win = 1;
+                return NULL;
+        }
        faddr = vmf->address;
        entry = pte_to_swp_entry(vmf->orig_pte);
        if ((unlikely(non_swap_entry(entry))))
@@ -661,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
        if (page)
                return page;
-        max_win = 1 << READ_ONCE(swap_ra_max_order);
-        if (max_win == 1) {
-                swap_ra->win = 1;
-                return NULL;
-        }
        fpfn = PFN_DOWN(faddr);
        swap_ra_info = GET_SWAP_RA_VAL(vma);
        pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
@@ -775,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
        __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
               vma_ra_enabled_store);
-static ssize_t vma_ra_max_order_show(struct kobject *kobj,
-                                     struct kobj_attribute *attr, char *buf)
-{
-        return sprintf(buf, "%d\n", swap_ra_max_order);
-}
-static ssize_t vma_ra_max_order_store(struct kobject *kobj,
-                                      struct kobj_attribute *attr,
-                                      const char *buf, size_t count)
-{
-        int err, v;
-        err = kstrtoint(buf, 10, &v);
-        if (err || v > SWAP_RA_ORDER_CEILING || v <= 0)
-                return -EINVAL;
-        swap_ra_max_order = v;
-        return count;
-}
-static struct kobj_attribute vma_ra_max_order_attr =
-        __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
-               vma_ra_max_order_store);
 static struct attribute *swap_attrs[] = {
        &vma_ra_enabled_attr.attr,
-        &vma_ra_max_order_attr.attr,
        NULL,
 };
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8a43db6284eb..673942094328 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1695,11 +1695,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        for (i = 0; i < area->nr_pages; i++) {
                struct page *page;
-                if (fatal_signal_pending(current)) {
-                        area->nr_pages = i;
-                        goto fail_no_warn;
-                }
                if (node == NUMA_NO_NODE)
                        page = alloc_page(alloc_mask|highmem_mask);
                else
@@ -1723,7 +1718,6 @@ fail:
        warn_alloc(gfp_mask, NULL,
                          "vmalloc: allocation failure, allocated %ld of %ld bytes",
                          (area->nr_pages*PAGE_SIZE), area->size);
-fail_no_warn:
        vfree(area->addr);
        return NULL;
 }
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 486550df32be..b2ba2ba585f3 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -250,6 +250,7 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
        WARN_ON(!list_empty(&zhdr->buddy));
        set_bit(PAGE_STALE, &page->private);
+        clear_bit(NEEDS_COMPACTING, &page->private);
        spin_lock(&pool->lock);
        if (!list_empty(&page->lru))
                list_del(&page->lru);
@@ -303,7 +304,6 @@ static void free_pages_work(struct work_struct *w)
                list_del(&zhdr->buddy);
                if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))
                        continue;
-                clear_bit(NEEDS_COMPACTING, &page->private);
                spin_unlock(&pool->stale_lock);
                cancel_work_sync(&zhdr->work);
                free_z3fold_page(page);
@@ -624,10 +624,8 @@ lookup:
         * stale pages list. cancel_work_sync() can sleep so we must make
         * sure it won't be called in case we're in atomic context.
         */
-        if (zhdr && (can_sleep || !work_pending(&zhdr->work) ||
+        if (zhdr && (can_sleep || !work_pending(&zhdr->work))) {
-            !unlikely(work_busy(&zhdr->work)))) {
                list_del(&zhdr->buddy);
-                clear_bit(NEEDS_COMPACTING, &page->private);
                spin_unlock(&pool->stale_lock);
                if (can_sleep)
                        cancel_work_sync(&zhdr->work);
@@ -875,16 +873,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
                                goto next;
                }
 next:
+                spin_lock(&pool->lock);
                if (test_bit(PAGE_HEADLESS, &page->private)) {
                        if (ret == 0) {
+                                spin_unlock(&pool->lock);
                                free_z3fold_page(page);
                                return 0;
                        }
                } else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
                        atomic64_dec(&pool->pages_nr);
+                        spin_unlock(&pool->lock);
                        return 0;
                }
-                spin_lock(&pool->lock);
                /*
                 * Add to the beginning of LRU.
author	Corey Minyard <cminyard@mvista.com>	2017-11-02 12:19:15 -0400
committer	Corey Minyard <cminyard@mvista.com>	2017-11-02 12:19:15 -0400
commit	6297fabd93f93182245383ba7de56bef829a796b (patch)
tree	804f5d28ada61b402d56281c9a047308d26347f4 /mm
parent	d7e17fe4f7a7d961cc4375c7d868bd353a039bc7 (diff)
parent	ece1996a21eeb344b49200e627c6660111009c10 (diff)