Merge branch 'akpm' (patches from Andrew)

Merge misc updates from Andrew Morton: "Bite-sized chunks this time, to avoid the MTA ratelimiting woes. - fs/notify updates - ocfs2 - some of MM" That laconic "some MM" is mainly the removal of remap_file_pages(), which is a big simplification of the VM, and which gets rid of a *lot* of random cruft and special cases because we no longer support the non-linear mappings that it used. From a user interface perspective, nothing has changed, because the remap_file_pages() syscall still exists, it's just done by emulating the old behavior by creating a lot of individual small mappings instead of one non-linear one. The emulation is slower than the old "native" non-linear mappings, but nobody really uses or cares about remap_file_pages(), and simplifying the VM is a big advantage. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (78 commits) memcg: zap memcg_slab_caches and memcg_slab_mutex memcg: zap memcg_name argument of memcg_create_kmem_cache memcg: zap __memcg_{charge,uncharge}_slab mm/page_alloc.c: place zone_id check before VM_BUG_ON_PAGE check mm: hugetlb: fix type of hugetlb_treat_as_movable variable mm, hugetlb: remove unnecessary lower bound on sysctl handlers"? mm: memory: merge shared-writable dirtying branches in do_wp_page() mm: memory: remove ->vm_file check on shared writable vmas xtensa: drop _PAGE_FILE and pte_file()-related helpers x86: drop _PAGE_FILE and pte_file()-related helpers unicore32: drop pte_file()-related helpers um: drop _PAGE_FILE and pte_file()-related helpers tile: drop pte_file()-related helpers sparc: drop pte_file()-related helpers sh: drop _PAGE_FILE and pte_file()-related helpers score: drop _PAGE_FILE and pte_file()-related helpers s390: drop pte_file()-related helpers parisc: drop _PAGE_FILE and pte_file()-related helpers openrisc: drop _PAGE_FILE and pte_file()-related helpers nios2: drop _PAGE_FILE and pte_file()-related helpers ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-02-10 19:45:56 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-02-10 19:45:56 -0500
commit: 992de5a8eca7cbd3215e3eb2c439b2c11582a58b (patch)
tree: 863988f84c1dd57a02fa337ecbce49263a3b9511 /mm
parent: b2718bffb4088faf13092db30c1ebf088ddee52e (diff)
parent: d5b3cf7139b8770af4ed8bb36a1ab9d290ac39e9 (diff)
27 files changed, 411 insertions, 1098 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 4bf586e66378..3548460ab7b6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,7 +3,7 @@
 #
 mmu-y                   := nommu.o
-mmu-$(CONFIG_MMU)       := fremap.o gup.o highmem.o memory.o mincore.o \
+mmu-$(CONFIG_MMU)       := gup.o highmem.o memory.o mincore.o \
                           mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
                           vmalloc.o pagewalk.o pgtable-generic.o
diff --git a/mm/debug.c b/mm/debug.c
index 0e58f3211f89..d69cb5a7ba9a 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -130,7 +130,6 @@ static const struct trace_print_flags vmaflags_names[] = {
        {VM_ACCOUNT,                    "account"       },
        {VM_NORESERVE,                  "noreserve"     },
        {VM_HUGETLB,                    "hugetlb"       },
-        {VM_NONLINEAR,                  "nonlinear"     },
 #if defined(CONFIG_X86)
        {VM_PAT,                        "pat"           },
 #elif defined(CONFIG_PPC)
diff --git a/mm/filemap.c b/mm/filemap.c
index 673e4581a2e5..bf7a27142704 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2087,7 +2087,6 @@ const struct vm_operations_struct generic_file_vm_ops = {
        .fault          = filemap_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = filemap_page_mkwrite,
-        .remap_pages    = generic_file_remap_pages,
 };
 /* This is used for a general mmap of a disk file */
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 0d105aeff82f..70c09da1a419 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -301,7 +301,6 @@ out:
 static const struct vm_operations_struct xip_file_vm_ops = {
        .fault  = xip_file_fault,
        .page_mkwrite   = filemap_page_mkwrite,
-        .remap_pages = generic_file_remap_pages,
 };
 int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
diff --git a/mm/fremap.c b/mm/fremap.c
deleted file mode 100644
index 2805d71cf476..000000000000
--- a/mm/fremap.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- *   linux/mm/fremap.c
- * 
- * Explicit pagetable population and nonlinear (random) mappings support.
- *
- * started by Ingo Molnar, Copyright (C) 2002, 2003
- */
-#include <linux/export.h>
-#include <linux/backing-dev.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/file.h>
-#include <linux/mman.h>
-#include <linux/pagemap.h>
-#include <linux/swapops.h>
-#include <linux/rmap.h>
-#include <linux/syscalls.h>
-#include <linux/mmu_notifier.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include "internal.h"
-static int mm_counter(struct page *page)
-{
-        return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES;
-}
-static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
-                        unsigned long addr, pte_t *ptep)
-{
-        pte_t pte = *ptep;
-        struct page *page;
-        swp_entry_t entry;
-        if (pte_present(pte)) {
-                flush_cache_page(vma, addr, pte_pfn(pte));
-                pte = ptep_clear_flush_notify(vma, addr, ptep);
-                page = vm_normal_page(vma, addr, pte);
-                if (page) {
-                        if (pte_dirty(pte))
-                                set_page_dirty(page);
-                        update_hiwater_rss(mm);
-                        dec_mm_counter(mm, mm_counter(page));
-                        page_remove_rmap(page);
-                        page_cache_release(page);
-                }
-        } else {        /* zap_pte() is not called when pte_none() */
-                if (!pte_file(pte)) {
-                        update_hiwater_rss(mm);
-                        entry = pte_to_swp_entry(pte);
-                        if (non_swap_entry(entry)) {
-                                if (is_migration_entry(entry)) {
-                                        page = migration_entry_to_page(entry);
-                                        dec_mm_counter(mm, mm_counter(page));
-                                }
-                        } else {
-                                free_swap_and_cache(entry);
-                                dec_mm_counter(mm, MM_SWAPENTS);
-                        }
-                }
-                pte_clear_not_present_full(mm, addr, ptep, 0);
-        }
-}
-/*
- * Install a file pte to a given virtual memory address, release any
- * previously existing mapping.
- */
-static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
-                unsigned long addr, unsigned long pgoff, pgprot_t prot)
-{
-        int err = -ENOMEM;
-        pte_t *pte, ptfile;
-        spinlock_t *ptl;
-        pte = get_locked_pte(mm, addr, &ptl);
-        if (!pte)
-                goto out;
-        ptfile = pgoff_to_pte(pgoff);
-        if (!pte_none(*pte))
-                zap_pte(mm, vma, addr, pte);
-        set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile));
-        /*
-         * We don't need to run update_mmu_cache() here because the "file pte"
-         * being installed by install_file_pte() is not a real pte - it's a
-         * non-present entry (like a swap entry), noting what file offset should
-         * be mapped there when there's a fault (in a non-linear vma where
-         * that's not obvious).
-         */
-        pte_unmap_unlock(pte, ptl);
-        err = 0;
-out:
-        return err;
-}
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
-                             unsigned long size, pgoff_t pgoff)
-{
-        struct mm_struct *mm = vma->vm_mm;
-        int err;
-        do {
-                err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
-                if (err)
-                        return err;
-                size -= PAGE_SIZE;
-                addr += PAGE_SIZE;
-                pgoff++;
-        } while (size);
-        return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-/**
- * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma
- * @start: start of the remapped virtual memory range
- * @size: size of the remapped virtual memory range
- * @prot: new protection bits of the range (see NOTE)
- * @pgoff: to-be-mapped page of the backing store file
- * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
- *
- * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma
- * (shared backing store file).
- *
- * This syscall works purely via pagetables, so it's the most efficient
- * way to map the same (large) file into a given virtual window. Unlike
- * mmap()/mremap() it does not create any new vmas. The new mappings are
- * also safe across swapout.
- *
- * NOTE: the @prot parameter right now is ignored (but must be zero),
- * and the vma's default protection is used. Arbitrary protections
- * might be implemented in the future.
- */
-SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
-                unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
-{
-        struct mm_struct *mm = current->mm;
-        struct address_space *mapping;
-        struct vm_area_struct *vma;
-        int err = -EINVAL;
-        int has_write_lock = 0;
-        vm_flags_t vm_flags = 0;
-        pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
-                        "See Documentation/vm/remap_file_pages.txt.\n",
-                        current->comm, current->pid);
-        if (prot)
-                return err;
-        /*
-         * Sanitize the syscall parameters:
-         */
-        start = start & PAGE_MASK;
-        size = size & PAGE_MASK;
-        /* Does the address range wrap, or is the span zero-sized? */
-        if (start + size <= start)
-                return err;
-        /* Does pgoff wrap? */
-        if (pgoff + (size >> PAGE_SHIFT) < pgoff)
-                return err;
-        /* Can we represent this offset inside this architecture's pte's? */
-#if PTE_FILE_MAX_BITS < BITS_PER_LONG
-        if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
-                return err;
-#endif
-        /* We need down_write() to change vma->vm_flags. */
-        down_read(&mm->mmap_sem);
- retry:
-        vma = find_vma(mm, start);
-        /*
-         * Make sure the vma is shared, that it supports prefaulting,
-         * and that the remapped range is valid and fully within
-         * the single existing vma.
-         */
-        if (!vma || !(vma->vm_flags & VM_SHARED))
-                goto out;
-        if (!vma->vm_ops || !vma->vm_ops->remap_pages)
-                goto out;
-        if (start < vma->vm_start || start + size > vma->vm_end)
-                goto out;
-        /* Must set VM_NONLINEAR before any pages are populated. */
-        if (!(vma->vm_flags & VM_NONLINEAR)) {
-                /*
-                 * vm_private_data is used as a swapout cursor
-                 * in a VM_NONLINEAR vma.
-                 */
-                if (vma->vm_private_data)
-                        goto out;
-                /* Don't need a nonlinear mapping, exit success */
-                if (pgoff == linear_page_index(vma, start)) {
-                        err = 0;
-                        goto out;
-                }
-                if (!has_write_lock) {
-get_write_lock:
-                        up_read(&mm->mmap_sem);
-                        down_write(&mm->mmap_sem);
-                        has_write_lock = 1;
-                        goto retry;
-                }
-                mapping = vma->vm_file->f_mapping;
-                /*
-                 * page_mkclean doesn't work on nonlinear vmas, so if
-                 * dirty pages need to be accounted, emulate with linear
-                 * vmas.
-                 */
-                if (mapping_cap_account_dirty(mapping)) {
-                        unsigned long addr;
-                        struct file *file = get_file(vma->vm_file);
-                        /* mmap_region may free vma; grab the info now */
-                        vm_flags = vma->vm_flags;
-                        addr = mmap_region(file, start, size, vm_flags, pgoff);
-                        fput(file);
-                        if (IS_ERR_VALUE(addr)) {
-                                err = addr;
-                        } else {
-                                BUG_ON(addr != start);
-                                err = 0;
-                        }
-                        goto out_freed;
-                }
-                i_mmap_lock_write(mapping);
-                flush_dcache_mmap_lock(mapping);
-                vma->vm_flags |= VM_NONLINEAR;
-                vma_interval_tree_remove(vma, &mapping->i_mmap);
-                vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
-                flush_dcache_mmap_unlock(mapping);
-                i_mmap_unlock_write(mapping);
-        }
-        if (vma->vm_flags & VM_LOCKED) {
-                /*
-                 * drop PG_Mlocked flag for over-mapped range
-                 */
-                if (!has_write_lock)
-                        goto get_write_lock;
-                vm_flags = vma->vm_flags;
-                munlock_vma_pages_range(vma, start, start + size);
-                vma->vm_flags = vm_flags;
-        }
-        mmu_notifier_invalidate_range_start(mm, start, start + size);
-        err = vma->vm_ops->remap_pages(vma, start, size, pgoff);
-        mmu_notifier_invalidate_range_end(mm, start, start + size);
-        /*
-         * We can't clear VM_NONLINEAR because we'd have to do
-         * it after ->populate completes, and that would prevent
-         * downgrading the lock.  (Locks can't be upgraded).
-         */
-out:
-        if (vma)
-                vm_flags = vma->vm_flags;
-out_freed:
-        if (likely(!has_write_lock))
-                up_read(&mm->mmap_sem);
-        else
-                up_write(&mm->mmap_sem);
-        if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK)))
-                mm_populate(start, size);
-        return err;
-}
diff --git a/mm/gup.c b/mm/gup.c
index 8dd50ce6326f..12bc2bc33da7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -55,7 +55,7 @@ retry:
                 */
                if (likely(!(flags & FOLL_MIGRATION)))
                        goto no_page;
-                if (pte_none(pte) || pte_file(pte))
+                if (pte_none(pte))
                        goto no_page;
                entry = pte_to_swp_entry(pte);
                if (!is_migration_entry(entry))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 85032de5e20f..be0e5d0db5ec 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,7 @@
 #include <linux/node.h>
 #include "internal.h"
-unsigned long hugepages_treat_as_movable;
+int hugepages_treat_as_movable;
 int hugetlb_max_hstate __read_mostly;
 unsigned int default_hstate_idx;
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
index 8da581fa9060..f2c2492681bf 100644
--- a/mm/interval_tree.c
+++ b/mm/interval_tree.c
@@ -21,8 +21,8 @@ static inline unsigned long vma_last_pgoff(struct vm_area_struct *v)
        return v->vm_pgoff + ((v->vm_end - v->vm_start) >> PAGE_SHIFT) - 1;
 }
-INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.linear.rb,
+INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.rb,
-                     unsigned long, shared.linear.rb_subtree_last,
+                     unsigned long, shared.rb_subtree_last,
                     vma_start_pgoff, vma_last_pgoff,, vma_interval_tree)
 /* Insert node immediately after prev in the interval tree */
@@ -36,26 +36,26 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node,
        VM_BUG_ON_VMA(vma_start_pgoff(node) != vma_start_pgoff(prev), node);
-        if (!prev->shared.linear.rb.rb_right) {
+        if (!prev->shared.rb.rb_right) {
                parent = prev;
-                link = &prev->shared.linear.rb.rb_right;
+                link = &prev->shared.rb.rb_right;
        } else {
-                parent = rb_entry(prev->shared.linear.rb.rb_right,
+                parent = rb_entry(prev->shared.rb.rb_right,
-                                  struct vm_area_struct, shared.linear.rb);
+                                  struct vm_area_struct, shared.rb);
-                if (parent->shared.linear.rb_subtree_last < last)
+                if (parent->shared.rb_subtree_last < last)
-                        parent->shared.linear.rb_subtree_last = last;
+                        parent->shared.rb_subtree_last = last;
-                while (parent->shared.linear.rb.rb_left) {
+                while (parent->shared.rb.rb_left) {
-                        parent = rb_entry(parent->shared.linear.rb.rb_left,
+                        parent = rb_entry(parent->shared.rb.rb_left,
-                                struct vm_area_struct, shared.linear.rb);
+                                struct vm_area_struct, shared.rb);
-                        if (parent->shared.linear.rb_subtree_last < last)
+                        if (parent->shared.rb_subtree_last < last)
-                                parent->shared.linear.rb_subtree_last = last;
+                                parent->shared.rb_subtree_last = last;
                }
-                link = &parent->shared.linear.rb.rb_left;
+                link = &parent->shared.rb.rb_left;
        }
-        node->shared.linear.rb_subtree_last = last;
+        node->shared.rb_subtree_last = last;
-        rb_link_node(&node->shared.linear.rb, &parent->shared.linear.rb, link);
+        rb_link_node(&node->shared.rb, &parent->shared.rb, link);
-        rb_insert_augmented(&node->shared.linear.rb, root,
+        rb_insert_augmented(&node->shared.rb, root,
                            &vma_interval_tree_augment);
 }
diff --git a/mm/ksm.c b/mm/ksm.c
index 15647fb0394f..4162dce2eb44 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1748,7 +1748,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
                 */
                if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
                                 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
-                                 VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP))
+                                 VM_HUGETLB | VM_MIXEDMAP))
                        return 0;               /* just ignore the advice */
 #ifdef VM_SAO
diff --git a/mm/madvise.c b/mm/madvise.c
index a271adc93289..d79fb5e8f80a 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -155,7 +155,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
                pte = *(orig_pte + ((index - start) / PAGE_SIZE));
                pte_unmap_unlock(orig_pte, ptl);
-                if (pte_present(pte) || pte_none(pte) || pte_file(pte))
+                if (pte_present(pte) || pte_none(pte))
                        continue;
                entry = pte_to_swp_entry(pte);
                if (unlikely(non_swap_entry(entry)))
@@ -278,14 +278,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
        if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
                return -EINVAL;
-        if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
+        zap_page_range(vma, start, end - start, NULL);
-                struct zap_details details = {
-                        .nonlinear_vma = vma,
-                        .last_index = ULONG_MAX,
-                };
-                zap_page_range(vma, start, end - start, &details);
-        } else
-                zap_page_range(vma, start, end - start, NULL);
        return 0;
 }
@@ -303,7 +296,7 @@ static long madvise_remove(struct vm_area_struct *vma,
        *prev = NULL;   /* tell sys_madvise we drop mmap_sem */
-        if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
+        if (vma->vm_flags & (VM_LOCKED | VM_HUGETLB))
                return -EINVAL;
        f = vma->vm_file;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2f6893c2f01b..f3f8a4f52a0c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -343,9 +343,6 @@ struct mem_cgroup {
        struct cg_proto tcp_mem;
 #endif
 #if defined(CONFIG_MEMCG_KMEM)
-        /* analogous to slab_common's slab_caches list, but per-memcg;
-         * protected by memcg_slab_mutex */
-        struct list_head memcg_slab_caches;
        /* Index in the kmem_cache->memcg_params->memcg_caches array */
        int kmemcg_id;
 #endif
@@ -2476,27 +2473,8 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 }
 #ifdef CONFIG_MEMCG_KMEM
-/*
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
- * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
+                      unsigned long nr_pages)
- * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
- */
-static DEFINE_MUTEX(memcg_slab_mutex);
-/*
- * This is a bit cumbersome, but it is rarely used and avoids a backpointer
- * in the memcg_cache_params struct.
- */
-static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
-{
-        struct kmem_cache *cachep;
-        VM_BUG_ON(p->is_root_cache);
-        cachep = p->root_cache;
-        return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
-}
-static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
-                             unsigned long nr_pages)
 {
        struct page_counter *counter;
        int ret = 0;
@@ -2533,8 +2511,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
        return ret;
 }
-static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
-                                unsigned long nr_pages)
 {
        page_counter_uncharge(&memcg->memory, nr_pages);
        if (do_swap_account)
@@ -2579,10 +2556,7 @@ static int memcg_alloc_cache_id(void)
        else if (size > MEMCG_CACHES_MAX_SIZE)
                size = MEMCG_CACHES_MAX_SIZE;
-        mutex_lock(&memcg_slab_mutex);
        err = memcg_update_all_caches(size);
-        mutex_unlock(&memcg_slab_mutex);
        if (err) {
                ida_simple_remove(&kmem_limited_groups, id);
                return err;
@@ -2605,123 +2579,20 @@ void memcg_update_array_size(int num)
        memcg_limited_groups_array_size = num;
 }
-static void memcg_register_cache(struct mem_cgroup *memcg,
+struct memcg_kmem_cache_create_work {
-                                 struct kmem_cache *root_cache)
-{
-        static char memcg_name_buf[NAME_MAX + 1]; /* protected by
-                                                     memcg_slab_mutex */
-        struct kmem_cache *cachep;
-        int id;
-        lockdep_assert_held(&memcg_slab_mutex);
-        id = memcg_cache_id(memcg);
-        /*
-         * Since per-memcg caches are created asynchronously on first
-         * allocation (see memcg_kmem_get_cache()), several threads can try to
-         * create the same cache, but only one of them may succeed.
-         */
-        if (cache_from_memcg_idx(root_cache, id))
-                return;
-        cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1);
-        cachep = memcg_create_kmem_cache(memcg, root_cache, memcg_name_buf);
-        /*
-         * If we could not create a memcg cache, do not complain, because
-         * that's not critical at all as we can always proceed with the root
-         * cache.
-         */
-        if (!cachep)
-                return;
-        list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
-        /*
-         * Since readers won't lock (see cache_from_memcg_idx()), we need a
-         * barrier here to ensure nobody will see the kmem_cache partially
-         * initialized.
-         */
-        smp_wmb();
-        BUG_ON(root_cache->memcg_params->memcg_caches[id]);
-        root_cache->memcg_params->memcg_caches[id] = cachep;
-}
-static void memcg_unregister_cache(struct kmem_cache *cachep)
-{
-        struct kmem_cache *root_cache;
-        struct mem_cgroup *memcg;
-        int id;
-        lockdep_assert_held(&memcg_slab_mutex);
-        BUG_ON(is_root_cache(cachep));
-        root_cache = cachep->memcg_params->root_cache;
-        memcg = cachep->memcg_params->memcg;
-        id = memcg_cache_id(memcg);
-        BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
-        root_cache->memcg_params->memcg_caches[id] = NULL;
-        list_del(&cachep->memcg_params->list);
-        kmem_cache_destroy(cachep);
-}
-int __memcg_cleanup_cache_params(struct kmem_cache *s)
-{
-        struct kmem_cache *c;
-        int i, failed = 0;
-        mutex_lock(&memcg_slab_mutex);
-        for_each_memcg_cache_index(i) {
-                c = cache_from_memcg_idx(s, i);
-                if (!c)
-                        continue;
-                memcg_unregister_cache(c);
-                if (cache_from_memcg_idx(s, i))
-                        failed++;
-        }
-        mutex_unlock(&memcg_slab_mutex);
-        return failed;
-}
-static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
-{
-        struct kmem_cache *cachep;
-        struct memcg_cache_params *params, *tmp;
-        if (!memcg_kmem_is_active(memcg))
-                return;
-        mutex_lock(&memcg_slab_mutex);
-        list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
-                cachep = memcg_params_to_cache(params);
-                memcg_unregister_cache(cachep);
-        }
-        mutex_unlock(&memcg_slab_mutex);
-}
-struct memcg_register_cache_work {
        struct mem_cgroup *memcg;
        struct kmem_cache *cachep;
        struct work_struct work;
 };
-static void memcg_register_cache_func(struct work_struct *w)
+static void memcg_kmem_cache_create_func(struct work_struct *w)
 {
-        struct memcg_register_cache_work *cw =
+        struct memcg_kmem_cache_create_work *cw =
-                container_of(w, struct memcg_register_cache_work, work);
+                container_of(w, struct memcg_kmem_cache_create_work, work);
        struct mem_cgroup *memcg = cw->memcg;
        struct kmem_cache *cachep = cw->cachep;
-        mutex_lock(&memcg_slab_mutex);
+        memcg_create_kmem_cache(memcg, cachep);
-        memcg_register_cache(memcg, cachep);
-        mutex_unlock(&memcg_slab_mutex);
        css_put(&memcg->css);
        kfree(cw);
@@ -2730,10 +2601,10 @@ static void memcg_register_cache_func(struct work_struct *w)
 /*
 * Enqueue the creation of a per-memcg kmem_cache.
 */
-static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
+static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
-                                            struct kmem_cache *cachep)
+                                               struct kmem_cache *cachep)
 {
-        struct memcg_register_cache_work *cw;
+        struct memcg_kmem_cache_create_work *cw;
        cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
        if (!cw)
@@ -2743,18 +2614,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
        cw->memcg = memcg;
        cw->cachep = cachep;
+        INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
-        INIT_WORK(&cw->work, memcg_register_cache_func);
        schedule_work(&cw->work);
 }
-static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
+static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
-                                          struct kmem_cache *cachep)
+                                             struct kmem_cache *cachep)
 {
        /*
         * We need to stop accounting when we kmalloc, because if the
         * corresponding kmalloc cache is not yet created, the first allocation
-         * in __memcg_schedule_register_cache will recurse.
+         * in __memcg_schedule_kmem_cache_create will recurse.
         *
         * However, it is better to enclose the whole function. Depending on
         * the debugging options enabled, INIT_WORK(), for instance, can
@@ -2763,24 +2634,10 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
         * the safest choice is to do it like this, wrapping the whole function.
         */
        current->memcg_kmem_skip_account = 1;
-        __memcg_schedule_register_cache(memcg, cachep);
+        __memcg_schedule_kmem_cache_create(memcg, cachep);
        current->memcg_kmem_skip_account = 0;
 }
-int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
-{
-        unsigned int nr_pages = 1 << order;
-        return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
-}
-void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
-{
-        unsigned int nr_pages = 1 << order;
-        memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
-}
 /*
 * Return the kmem_cache we're supposed to use for a slab allocation.
 * We try to use the current memcg's version of the cache.
@@ -2825,7 +2682,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
         * could happen with the slab_mutex held. So it's better to
         * defer everything.
         */
-        memcg_schedule_register_cache(memcg, cachep);
+        memcg_schedule_kmem_cache_create(memcg, cachep);
 out:
        css_put(&memcg->css);
        return cachep;
@@ -4154,7 +4011,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 {
-        memcg_unregister_all_caches(memcg);
+        memcg_destroy_kmem_caches(memcg);
        mem_cgroup_sockets_destroy(memcg);
 }
 #else
@@ -4682,7 +4539,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        spin_lock_init(&memcg->event_list_lock);
 #ifdef CONFIG_MEMCG_KMEM
        memcg->kmemcg_id = -1;
-        INIT_LIST_HEAD(&memcg->memcg_slab_caches);
 #endif
        return &memcg->css;
@@ -4926,10 +4782,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                return NULL;
        mapping = vma->vm_file->f_mapping;
-        if (pte_none(ptent))
+        pgoff = linear_page_index(vma, addr);
-                pgoff = linear_page_index(vma, addr);
-        else /* pte_file(ptent) is true */
-                pgoff = pte_to_pgoff(ptent);
        /* page is moved even if it's not RSS of this task(page-faulted). */
 #ifdef CONFIG_SWAP
@@ -4961,7 +4814,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
                page = mc_handle_present_pte(vma, addr, ptent);
        else if (is_swap_pte(ptent))
                page = mc_handle_swap_pte(vma, addr, ptent, &ent);
-        else if (pte_none(ptent) || pte_file(ptent))
+        else if (pte_none(ptent))
                page = mc_handle_file_pte(vma, addr, ptent, &ent);
        if (!page && !ent.val)
diff --git a/mm/memory.c b/mm/memory.c
index d707c4dfbbb4..d63849b5188f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -813,42 +813,40 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        /* pte contains position in swap or file, so copy. */
        if (unlikely(!pte_present(pte))) {
-                if (!pte_file(pte)) {
+                swp_entry_t entry = pte_to_swp_entry(pte);
-                        swp_entry_t entry = pte_to_swp_entry(pte);
+                if (likely(!non_swap_entry(entry))) {
-                        if (likely(!non_swap_entry(entry))) {
+                        if (swap_duplicate(entry) < 0)
-                                if (swap_duplicate(entry) < 0)
+                                return entry.val;
-                                        return entry.val;
+                        /* make sure dst_mm is on swapoff's mmlist. */
-                                /* make sure dst_mm is on swapoff's mmlist. */
+                        if (unlikely(list_empty(&dst_mm->mmlist))) {
-                                if (unlikely(list_empty(&dst_mm->mmlist))) {
+                                spin_lock(&mmlist_lock);
-                                        spin_lock(&mmlist_lock);
+                                if (list_empty(&dst_mm->mmlist))
-                                        if (list_empty(&dst_mm->mmlist))
+                                        list_add(&dst_mm->mmlist,
-                                                list_add(&dst_mm->mmlist,
+                                                        &src_mm->mmlist);
-                                                         &src_mm->mmlist);
+                                spin_unlock(&mmlist_lock);
-                                        spin_unlock(&mmlist_lock);
+                        }
-                                }
+                        rss[MM_SWAPENTS]++;
-                                rss[MM_SWAPENTS]++;
+                } else if (is_migration_entry(entry)) {
-                        } else if (is_migration_entry(entry)) {
+                        page = migration_entry_to_page(entry);
-                                page = migration_entry_to_page(entry);
+                        if (PageAnon(page))
-                                if (PageAnon(page))
+                                rss[MM_ANONPAGES]++;
-                                        rss[MM_ANONPAGES]++;
+                        else
-                                else
+                                rss[MM_FILEPAGES]++;
-                                        rss[MM_FILEPAGES]++;
+                        if (is_write_migration_entry(entry) &&
-                                if (is_write_migration_entry(entry) &&
+                                        is_cow_mapping(vm_flags)) {
-                                    is_cow_mapping(vm_flags)) {
+                                /*
-                                        /*
+                                 * COW mappings require pages in both
-                                         * COW mappings require pages in both
+                                 * parent and child to be set to read.
-                                         * parent and child to be set to read.
+                                 */
-                                         */
+                                make_migration_entry_read(&entry);
-                                        make_migration_entry_read(&entry);
+                                pte = swp_entry_to_pte(entry);
-                                        pte = swp_entry_to_pte(entry);
+                                if (pte_swp_soft_dirty(*src_pte))
-                                        if (pte_swp_soft_dirty(*src_pte))
+                                        pte = pte_swp_mksoft_dirty(pte);
-                                                pte = pte_swp_mksoft_dirty(pte);
+                                set_pte_at(src_mm, addr, src_pte, pte);
-                                        set_pte_at(src_mm, addr, src_pte, pte);
-                                }
                        }
                }
                goto out_set_pte;
@@ -1022,11 +1020,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         * readonly mappings. The tradeoff is that copy_page_range is more
         * efficient than faulting.
         */
-        if (!(vma->vm_flags & (VM_HUGETLB | VM_NONLINEAR |
+        if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) &&
-                               VM_PFNMAP | VM_MIXEDMAP))) {
+                        !vma->anon_vma)
-                if (!vma->anon_vma)
+                return 0;
-                        return 0;
-        }
        if (is_vm_hugetlb_page(vma))
                return copy_hugetlb_page_range(dst_mm, src_mm, vma);
@@ -1084,6 +1080,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
        spinlock_t *ptl;
        pte_t *start_pte;
        pte_t *pte;
+        swp_entry_t entry;
 again:
        init_rss_vec(rss);
@@ -1109,28 +1106,12 @@ again:
                                if (details->check_mapping &&
                                    details->check_mapping != page->mapping)
                                        continue;
-                                /*
-                                 * Each page->index must be checked when
-                                 * invalidating or truncating nonlinear.
-                                 */
-                                if (details->nonlinear_vma &&
-                                    (page->index < details->first_index ||
-                                     page->index > details->last_index))
-                                        continue;
                        }
                        ptent = ptep_get_and_clear_full(mm, addr, pte,
                                                        tlb->fullmm);
                        tlb_remove_tlb_entry(tlb, pte, addr);
                        if (unlikely(!page))
                                continue;
-                        if (unlikely(details) && details->nonlinear_vma
-                            && linear_page_index(details->nonlinear_vma,
-                                                addr) != page->index) {
-                                pte_t ptfile = pgoff_to_pte(page->index);
-                                if (pte_soft_dirty(ptent))
-                                        ptfile = pte_file_mksoft_dirty(ptfile);
-                                set_pte_at(mm, addr, pte, ptfile);
-                        }
                        if (PageAnon(page))
                                rss[MM_ANONPAGES]--;
                        else {
@@ -1153,33 +1134,25 @@ again:
                        }
                        continue;
                }
-                /*
+                /* If details->check_mapping, we leave swap entries. */
-                 * If details->check_mapping, we leave swap entries;
-                 * if details->nonlinear_vma, we leave file entries.
-                 */
                if (unlikely(details))
                        continue;
-                if (pte_file(ptent)) {
-                        if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
-                                print_bad_pte(vma, addr, ptent, NULL);
-                } else {
-                        swp_entry_t entry = pte_to_swp_entry(ptent);
-                        if (!non_swap_entry(entry))
+                entry = pte_to_swp_entry(ptent);
-                                rss[MM_SWAPENTS]--;
+                if (!non_swap_entry(entry))
-                        else if (is_migration_entry(entry)) {
+                        rss[MM_SWAPENTS]--;
-                                struct page *page;
+                else if (is_migration_entry(entry)) {
+                        struct page *page;
-                                page = migration_entry_to_page(entry);
+                        page = migration_entry_to_page(entry);
-                                if (PageAnon(page))
+                        if (PageAnon(page))
-                                        rss[MM_ANONPAGES]--;
+                                rss[MM_ANONPAGES]--;
-                                else
+                        else
-                                        rss[MM_FILEPAGES]--;
+                                rss[MM_FILEPAGES]--;
-                        }
-                        if (unlikely(!free_swap_and_cache(entry)))
-                                print_bad_pte(vma, addr, ptent, NULL);
                }
+                if (unlikely(!free_swap_and_cache(entry)))
+                        print_bad_pte(vma, addr, ptent, NULL);
                pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
        } while (pte++, addr += PAGE_SIZE, addr != end);
@@ -1279,7 +1252,7 @@ static void unmap_page_range(struct mmu_gather *tlb,
        pgd_t *pgd;
        unsigned long next;
-        if (details && !details->check_mapping && !details->nonlinear_vma)
+        if (details && !details->check_mapping)
                details = NULL;
        BUG_ON(addr >= end);
@@ -1373,7 +1346,7 @@ void unmap_vmas(struct mmu_gather *tlb,
 * @vma: vm_area_struct holding the applicable pages
 * @start: starting address of pages to zap
 * @size: number of bytes to zap
- * @details: details of nonlinear truncation or shared cache invalidation
+ * @details: details of shared cache invalidation
 *
 * Caller must protect the VMA list
 */
@@ -1399,7 +1372,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
 * @vma: vm_area_struct holding the applicable pages
 * @address: starting address of pages to zap
 * @size: number of bytes to zap
- * @details: details of nonlinear truncation or shared cache invalidation
+ * @details: details of shared cache invalidation
 *
 * The range must fit into one VMA.
 */
@@ -1924,12 +1897,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 EXPORT_SYMBOL_GPL(apply_to_page_range);
 /*
- * handle_pte_fault chooses page fault handler according to an entry
+ * handle_pte_fault chooses page fault handler according to an entry which was
- * which was read non-atomically.  Before making any commitment, on
+ * read non-atomically.  Before making any commitment, on those architectures
- * those architectures or configurations (e.g. i386 with PAE) which
+ * or configurations (e.g. i386 with PAE) which might give a mix of unmatched
- * might give a mix of unmatched parts, do_swap_page and do_nonlinear_fault
+ * parts, do_swap_page must check under lock before unmapping the pte and
- * must check under lock before unmapping the pte and proceeding
+ * proceeding (but do_wp_page is only called after already making such a check;
- * (but do_wp_page is only called after already making such a check;
 * and do_anonymous_page can safely check later on).
 */
 static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
@@ -2035,7 +2007,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
        pte_t entry;
        int ret = 0;
        int page_mkwrite = 0;
-        struct page *dirty_page = NULL;
+        bool dirty_shared = false;
        unsigned long mmun_start = 0;   /* For mmu_notifiers */
        unsigned long mmun_end = 0;     /* For mmu_notifiers */
        struct mem_cgroup *memcg;
@@ -2086,6 +2058,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                unlock_page(old_page);
        } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
                                        (VM_WRITE|VM_SHARED))) {
+                page_cache_get(old_page);
                /*
                 * Only catch write-faults on shared writable pages,
                 * read-only shared pages can get COWed by
@@ -2093,7 +2066,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 */
                if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
                        int tmp;
-                        page_cache_get(old_page);
                        pte_unmap_unlock(page_table, ptl);
                        tmp = do_page_mkwrite(vma, old_page, address);
                        if (unlikely(!tmp || (tmp &
@@ -2113,11 +2086,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                unlock_page(old_page);
                                goto unlock;
                        }
                        page_mkwrite = 1;
                }
-                dirty_page = old_page;
-                get_page(dirty_page);
+                dirty_shared = true;
 reuse:
                /*
@@ -2136,20 +2108,20 @@ reuse:
                pte_unmap_unlock(page_table, ptl);
                ret |= VM_FAULT_WRITE;
-                if (!dirty_page)
+                if (dirty_shared) {
-                        return ret;
-                if (!page_mkwrite) {
                        struct address_space *mapping;
                        int dirtied;
-                        lock_page(dirty_page);
+                        if (!page_mkwrite)
-                        dirtied = set_page_dirty(dirty_page);
+                                lock_page(old_page);
-                        VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page);
-                        mapping = dirty_page->mapping;
-                        unlock_page(dirty_page);
-                        if (dirtied && mapping) {
+                        dirtied = set_page_dirty(old_page);
+                        VM_BUG_ON_PAGE(PageAnon(old_page), old_page);
+                        mapping = old_page->mapping;
+                        unlock_page(old_page);
+                        page_cache_release(old_page);
+                        if ((dirtied || page_mkwrite) && mapping) {
                                /*
                                 * Some device drivers do not set page.mapping
                                 * but still dirty their pages
@@ -2157,25 +2129,9 @@ reuse:
                                balance_dirty_pages_ratelimited(mapping);
                        }
-                        /* file_update_time outside page_lock */
+                        if (!page_mkwrite)
-                        if (vma->vm_file)
                                file_update_time(vma->vm_file);
                }
-                put_page(dirty_page);
-                if (page_mkwrite) {
-                        struct address_space *mapping = dirty_page->mapping;
-                        set_page_dirty(dirty_page);
-                        unlock_page(dirty_page);
-                        page_cache_release(dirty_page);
-                        if (mapping)    {
-                                /*
-                                 * Some device drivers do not set page.mapping
-                                 * but still dirty their pages
-                                 */
-                                balance_dirty_pages_ratelimited(mapping);
-                        }
-                }
                return ret;
        }
@@ -2333,25 +2289,11 @@ static inline void unmap_mapping_range_tree(struct rb_root *root,
        }
 }
-static inline void unmap_mapping_range_list(struct list_head *head,
-                                            struct zap_details *details)
-{
-        struct vm_area_struct *vma;
-        /*
-         * In nonlinear VMAs there is no correspondence between virtual address
-         * offset and file offset.  So we must perform an exhaustive search
-         * across *all* the pages in each nonlinear VMA, not just the pages
-         * whose virtual address lies outside the file truncation point.
-         */
-        list_for_each_entry(vma, head, shared.nonlinear) {
-                details->nonlinear_vma = vma;
-                unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
-        }
-}
 /**
- * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
+ * unmap_mapping_range - unmap the portion of all mmaps in the specified
+ * address_space corresponding to the specified page range in the underlying
+ * file.
+ *
 * @mapping: the address space containing mmaps to be unmapped.
 * @holebegin: byte in first page to unmap, relative to the start of
 * the underlying file.  This will be rounded down to a PAGE_SIZE
@@ -2380,7 +2322,6 @@ void unmap_mapping_range(struct address_space *mapping,
        }
        details.check_mapping = even_cows? NULL: mapping;
-        details.nonlinear_vma = NULL;
        details.first_index = hba;
        details.last_index = hba + hlen - 1;
        if (details.last_index < details.first_index)
@@ -2390,8 +2331,6 @@ void unmap_mapping_range(struct address_space *mapping,
        i_mmap_lock_write(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
                unmap_mapping_range_tree(&mapping->i_mmap, &details);
-        if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
-                unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
        i_mmap_unlock_write(mapping);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
@@ -2752,8 +2691,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
        entry = mk_pte(page, vma->vm_page_prot);
        if (write)
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-        else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
-                entry = pte_mksoft_dirty(entry);
        if (anon) {
                inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
                page_add_new_anon_rmap(page, vma, address);
@@ -2888,8 +2825,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         * if page by the offset is not ready to be mapped (cold cache or
         * something).
         */
-        if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
+        if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
-            fault_around_bytes >> PAGE_SHIFT > 1) {
                pte = pte_offset_map_lock(mm, pmd, address, &ptl);
                do_fault_around(vma, address, pte, pgoff, flags);
                if (!pte_same(*pte, orig_pte))
@@ -3021,8 +2957,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                balance_dirty_pages_ratelimited(mapping);
        }
-        /* file_update_time outside page_lock */
+        if (!vma->vm_ops->page_mkwrite)
-        if (vma->vm_file && !vma->vm_ops->page_mkwrite)
                file_update_time(vma->vm_file);
        return ret;
@@ -3034,7 +2969,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 * The mmap_sem may have been released depending on flags and our
 * return value.  See filemap_fault() and __lock_page_or_retry().
 */
-static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
                unsigned int flags, pte_t orig_pte)
 {
@@ -3051,46 +2986,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
-/*
- * Fault of a previously existing named mapping. Repopulate the pte
- * from the encoded file_pte if possible. This enables swappable
- * nonlinear vmas.
- *
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
- * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with pte unmapped and unlocked.
- * The mmap_sem may have been released depending on flags and our
- * return value.  See filemap_fault() and __lock_page_or_retry().
- */
-static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-                unsigned long address, pte_t *page_table, pmd_t *pmd,
-                unsigned int flags, pte_t orig_pte)
-{
-        pgoff_t pgoff;
-        flags |= FAULT_FLAG_NONLINEAR;
-        if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
-                return 0;
-        if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
-                /*
-                 * Page table corrupted: show pte and kill process.
-                 */
-                print_bad_pte(vma, address, orig_pte, NULL);
-                return VM_FAULT_SIGBUS;
-        }
-        pgoff = pte_to_pgoff(orig_pte);
-        if (!(flags & FAULT_FLAG_WRITE))
-                return do_read_fault(mm, vma, address, pmd, pgoff, flags,
-                                orig_pte);
-        if (!(vma->vm_flags & VM_SHARED))
-                return do_cow_fault(mm, vma, address, pmd, pgoff, flags,
-                                orig_pte);
-        return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
-}
 static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
                                unsigned long addr, int page_nid,
                                int *flags)
@@ -3218,15 +3113,12 @@ static int handle_pte_fault(struct mm_struct *mm,
                if (pte_none(entry)) {
                        if (vma->vm_ops) {
                                if (likely(vma->vm_ops->fault))
-                                        return do_linear_fault(mm, vma, address,
+                                        return do_fault(mm, vma, address, pte,
-                                                pte, pmd, flags, entry);
+                                                        pmd, flags, entry);
                        }
                        return do_anonymous_page(mm, vma, address,
                                                 pte, pmd, flags);
                }
-                if (pte_file(entry))
-                        return do_nonlinear_fault(mm, vma, address,
-                                        pte, pmd, flags, entry);
                return do_swap_page(mm, vma, address,
                                        pte, pmd, flags, entry);
        }
diff --git a/mm/migrate.c b/mm/migrate.c
index 344cdf692fc8..6e284bcca8bb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -179,37 +179,6 @@ out:
 }
 /*
- * Congratulations to trinity for discovering this bug.
- * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
- * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
- * replace the specified range by file ptes throughout (maybe populated after).
- * If page migration finds a page within that range, while it's still located
- * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
- * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
- * But if the migrating page is in a part of the vma outside the range to be
- * remapped, then it will not be cleared, and remove_migration_ptes() needs to
- * deal with it.  Fortunately, this part of the vma is of course still linear,
- * so we just need to use linear location on the nonlinear list.
- */
-static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
-                struct address_space *mapping, void *arg)
-{
-        struct vm_area_struct *vma;
-        /* hugetlbfs does not support remap_pages, so no huge pgoff worries */
-        pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-        unsigned long addr;
-        list_for_each_entry(vma,
-                &mapping->i_mmap_nonlinear, shared.nonlinear) {
-                addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
-                if (addr >= vma->vm_start && addr < vma->vm_end)
-                        remove_migration_pte(page, vma, addr, arg);
-        }
-        return SWAP_AGAIN;
-}
-/*
 * Get rid of all migration entries and replace them by
 * references to the indicated page.
 */
@@ -218,7 +187,6 @@ static void remove_migration_ptes(struct page *old, struct page *new)
        struct rmap_walk_control rwc = {
                .rmap_one = remove_migration_pte,
                .arg = old,
-                .file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
        };
        rmap_walk(new, &rwc);
diff --git a/mm/mincore.c b/mm/mincore.c
index c8c528b36641..46527c023e0c 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -124,17 +124,13 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
        ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        do {
                pte_t pte = *ptep;
-                pgoff_t pgoff;
                next = addr + PAGE_SIZE;
                if (pte_none(pte))
                        mincore_unmapped_range(vma, addr, next, vec);
                else if (pte_present(pte))
                        *vec = 1;
-                else if (pte_file(pte)) {
+                else { /* pte is a swap entry */
-                        pgoff = pte_to_pgoff(pte);
-                        *vec = mincore_page(vma->vm_file->f_mapping, pgoff);
-                } else { /* pte is a swap entry */
                        swp_entry_t entry = pte_to_swp_entry(pte);
                        if (non_swap_entry(entry)) {
@@ -145,9 +141,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                                *vec = 1;
                        } else {
 #ifdef CONFIG_SWAP
-                                pgoff = entry.val;
                                *vec = mincore_page(swap_address_space(entry),
-                                        pgoff);
+                                        entry.val);
 #else
                                WARN_ON(1);
                                *vec = 1;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7f684d5a8087..14d84666e8ba 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -243,10 +243,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
                mapping_unmap_writable(mapping);
        flush_dcache_mmap_lock(mapping);
-        if (unlikely(vma->vm_flags & VM_NONLINEAR))
+        vma_interval_tree_remove(vma, &mapping->i_mmap);
-                list_del_init(&vma->shared.nonlinear);
-        else
-                vma_interval_tree_remove(vma, &mapping->i_mmap);
        flush_dcache_mmap_unlock(mapping);
 }
@@ -649,10 +646,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
                        atomic_inc(&mapping->i_mmap_writable);
                flush_dcache_mmap_lock(mapping);
-                if (unlikely(vma->vm_flags & VM_NONLINEAR))
+                vma_interval_tree_insert(vma, &mapping->i_mmap);
-                        vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
-                else
-                        vma_interval_tree_insert(vma, &mapping->i_mmap);
                flush_dcache_mmap_unlock(mapping);
        }
 }
@@ -789,14 +783,11 @@ again:			remove_next = 1 + (end > next->vm_end);
        if (file) {
                mapping = file->f_mapping;
-                if (!(vma->vm_flags & VM_NONLINEAR)) {
+                root = &mapping->i_mmap;
-                        root = &mapping->i_mmap;
+                uprobe_munmap(vma, vma->vm_start, vma->vm_end);
-                        uprobe_munmap(vma, vma->vm_start, vma->vm_end);
-                        if (adjust_next)
+                if (adjust_next)
-                                uprobe_munmap(next, next->vm_start,
+                        uprobe_munmap(next, next->vm_start, next->vm_end);
-                                                        next->vm_end);
-                }
                i_mmap_lock_write(mapping);
                if (insert) {
@@ -2634,6 +2625,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
        return vm_munmap(addr, len);
 }
+/*
+ * Emulation of deprecated remap_file_pages() syscall.
+ */
+SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+                unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
+{
+        struct mm_struct *mm = current->mm;
+        struct vm_area_struct *vma;
+        unsigned long populate = 0;
+        unsigned long ret = -EINVAL;
+        struct file *file;
+        pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
+                        "See Documentation/vm/remap_file_pages.txt.\n",
+                        current->comm, current->pid);
+        if (prot)
+                return ret;
+        start = start & PAGE_MASK;
+        size = size & PAGE_MASK;
+        if (start + size <= start)
+                return ret;
+        /* Does pgoff wrap? */
+        if (pgoff + (size >> PAGE_SHIFT) < pgoff)
+                return ret;
+        down_write(&mm->mmap_sem);
+        vma = find_vma(mm, start);
+        if (!vma || !(vma->vm_flags & VM_SHARED))
+                goto out;
+        if (start < vma->vm_start || start + size > vma->vm_end)
+                goto out;
+        if (pgoff == linear_page_index(vma, start)) {
+                ret = 0;
+                goto out;
+        }
+        prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+        prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+        prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+        flags &= MAP_NONBLOCK;
+        flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+        if (vma->vm_flags & VM_LOCKED) {
+                flags |= MAP_LOCKED;
+                /* drop PG_Mlocked flag for over-mapped range */
+                munlock_vma_pages_range(vma, start, start + size);
+        }
+        file = get_file(vma->vm_file);
+        ret = do_mmap_pgoff(vma->vm_file, start, size,
+                        prot, flags, pgoff, &populate);
+        fput(file);
+out:
+        up_write(&mm->mmap_sem);
+        if (populate)
+                mm_populate(ret, populate);
+        if (!IS_ERR_VALUE(ret))
+                ret = 0;
+        return ret;
+}
 static inline void verify_mm_writelocked(struct mm_struct *mm)
 {
 #ifdef CONFIG_DEBUG_VM
@@ -3108,8 +3168,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
 *
 * mmap_sem in write mode is required in order to block all operations
 * that could modify pagetables and free pages without need of
- * altering the vma layout (for example populate_range() with
+ * altering the vma layout. It's also needed in write mode to avoid new
- * nonlinear vmas). It's also needed in write mode to avoid new
 * anon_vmas to be associated with existing vmas.
 *
 * A single task can't take more than one mm_take_all_locks() in a row
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ace93454ce8e..33121662f08b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -105,7 +105,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                        }
                        if (updated)
                                pages++;
-                } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
+                } else if (IS_ENABLED(CONFIG_MIGRATION)) {
                        swp_entry_t entry = pte_to_swp_entry(oldpte);
                        if (is_write_migration_entry(entry)) {
diff --git a/mm/mremap.c b/mm/mremap.c
index 17fa018f5f39..57dadc025c64 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -81,8 +81,6 @@ static pte_t move_soft_dirty_pte(pte_t pte)
                pte = pte_mksoft_dirty(pte);
        else if (is_swap_pte(pte))
                pte = pte_swp_mksoft_dirty(pte);
-        else if (pte_file(pte))
-                pte = pte_file_mksoft_dirty(pte);
 #endif
        return pte;
 }
diff --git a/mm/msync.c b/mm/msync.c
index 992a1673d488..bb04d53ae852 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -86,10 +86,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
                                (vma->vm_flags & VM_SHARED)) {
                        get_file(file);
                        up_read(&mm->mmap_sem);
-                        if (vma->vm_flags & VM_NONLINEAR)
+                        error = vfs_fsync_range(file, fstart, fend, 1);
-                                error = vfs_fsync(file, 1);
-                        else
-                                error = vfs_fsync_range(file, fstart, fend, 1);
                        fput(file);
                        if (error || start >= end)
                                goto out;
diff --git a/mm/nommu.c b/mm/nommu.c
index 28bd8c4dff6f..541bed64e348 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1984,14 +1984,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 EXPORT_SYMBOL(filemap_map_pages);
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
-                             unsigned long size, pgoff_t pgoff)
-{
-        BUG();
-        return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
 static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long addr, void *buf, int len, int write)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e20f9c2fa5a..f121050e8530 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -552,17 +552,15 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
                return 0;
        if (page_is_guard(buddy) && page_order(buddy) == order) {
-                VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
                if (page_zone_id(page) != page_zone_id(buddy))
                        return 0;
+                VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
                return 1;
        }
        if (PageBuddy(buddy) && page_order(buddy) == order) {
-                VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
                /*
                 * zone check is done late to avoid uselessly
                 * calculating zone/node ids for pages that could
@@ -571,6 +569,8 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
                if (page_zone_id(page) != page_zone_id(buddy))
                        return 0;
+                VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
                return 1;
        }
        return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index 71cd5bd0c17d..70b32498d4f2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -590,9 +590,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
                if (!vma->anon_vma || !page__anon_vma ||
                    vma->anon_vma->root != page__anon_vma->root)
                        return -EFAULT;
-        } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
+        } else if (page->mapping) {
-                if (!vma->vm_file ||
+                if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
-                    vma->vm_file->f_mapping != page->mapping)
                        return -EFAULT;
        } else
                return -EFAULT;
@@ -1274,7 +1273,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                if (pte_soft_dirty(pteval))
                        swp_pte = pte_swp_mksoft_dirty(swp_pte);
                set_pte_at(mm, address, pte, swp_pte);
-                BUG_ON(pte_file(*pte));
        } else if (IS_ENABLED(CONFIG_MIGRATION) &&
                   (flags & TTU_MIGRATION)) {
                /* Establish migration entry for a file page */
@@ -1316,211 +1314,6 @@ out_mlock:
        return ret;
 }
-/*
- * objrmap doesn't work for nonlinear VMAs because the assumption that
- * offset-into-file correlates with offset-into-virtual-addresses does not hold.
- * Consequently, given a particular page and its ->index, we cannot locate the
- * ptes which are mapping that page without an exhaustive linear search.
- *
- * So what this code does is a mini "virtual scan" of each nonlinear VMA which
- * maps the file to which the target page belongs.  The ->vm_private_data field
- * holds the current cursor into that scan.  Successive searches will circulate
- * around the vma's virtual address space.
- *
- * So as more replacement pressure is applied to the pages in a nonlinear VMA,
- * more scanning pressure is placed against them as well.   Eventually pages
- * will become fully unmapped and are eligible for eviction.
- *
- * For very sparsely populated VMAs this is a little inefficient - chances are
- * there there won't be many ptes located within the scan cluster.  In this case
- * maybe we could scan further - to the end of the pte page, perhaps.
- *
- * Mlocked pages:  check VM_LOCKED under mmap_sem held for read, if we can
- * acquire it without blocking.  If vma locked, mlock the pages in the cluster,
- * rather than unmapping them.  If we encounter the "check_page" that vmscan is
- * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
- */
-#define CLUSTER_SIZE    min(32*PAGE_SIZE, PMD_SIZE)
-#define CLUSTER_MASK    (~(CLUSTER_SIZE - 1))
-static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
-                struct vm_area_struct *vma, struct page *check_page)
-{
-        struct mm_struct *mm = vma->vm_mm;
-        pmd_t *pmd;
-        pte_t *pte;
-        pte_t pteval;
-        spinlock_t *ptl;
-        struct page *page;
-        unsigned long address;
-        unsigned long mmun_start;       /* For mmu_notifiers */
-        unsigned long mmun_end;         /* For mmu_notifiers */
-        unsigned long end;
-        int ret = SWAP_AGAIN;
-        int locked_vma = 0;
-        address = (vma->vm_start + cursor) & CLUSTER_MASK;
-        end = address + CLUSTER_SIZE;
-        if (address < vma->vm_start)
-                address = vma->vm_start;
-        if (end > vma->vm_end)
-                end = vma->vm_end;
-        pmd = mm_find_pmd(mm, address);
-        if (!pmd)
-                return ret;
-        mmun_start = address;
-        mmun_end   = end;
-        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
-        /*
-         * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
-         * keep the sem while scanning the cluster for mlocking pages.
-         */
-        if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
-                locked_vma = (vma->vm_flags & VM_LOCKED);
-                if (!locked_vma)
-                        up_read(&vma->vm_mm->mmap_sem); /* don't need it */
-        }
-        pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-        /* Update high watermark before we lower rss */
-        update_hiwater_rss(mm);
-        for (; address < end; pte++, address += PAGE_SIZE) {
-                if (!pte_present(*pte))
-                        continue;
-                page = vm_normal_page(vma, address, *pte);
-                BUG_ON(!page || PageAnon(page));
-                if (locked_vma) {
-                        if (page == check_page) {
-                                /* we know we have check_page locked */
-                                mlock_vma_page(page);
-                                ret = SWAP_MLOCK;
-                        } else if (trylock_page(page)) {
-                                /*
-                                 * If we can lock the page, perform mlock.
-                                 * Otherwise leave the page alone, it will be
-                                 * eventually encountered again later.
-                                 */
-                                mlock_vma_page(page);
-                                unlock_page(page);
-                        }
-                        continue;       /* don't unmap */
-                }
-                /*
-                 * No need for _notify because we're within an
-                 * mmu_notifier_invalidate_range_ {start|end} scope.
-                 */
-                if (ptep_clear_flush_young(vma, address, pte))
-                        continue;
-                /* Nuke the page table entry. */
-                flush_cache_page(vma, address, pte_pfn(*pte));
-                pteval = ptep_clear_flush_notify(vma, address, pte);
-                /* If nonlinear, store the file page offset in the pte. */
-                if (page->index != linear_page_index(vma, address)) {
-                        pte_t ptfile = pgoff_to_pte(page->index);
-                        if (pte_soft_dirty(pteval))
-                                ptfile = pte_file_mksoft_dirty(ptfile);
-                        set_pte_at(mm, address, pte, ptfile);
-                }
-                /* Move the dirty bit to the physical page now the pte is gone. */
-                if (pte_dirty(pteval))
-                        set_page_dirty(page);
-                page_remove_rmap(page);
-                page_cache_release(page);
-                dec_mm_counter(mm, MM_FILEPAGES);
-                (*mapcount)--;
-        }
-        pte_unmap_unlock(pte - 1, ptl);
-        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-        if (locked_vma)
-                up_read(&vma->vm_mm->mmap_sem);
-        return ret;
-}
-static int try_to_unmap_nonlinear(struct page *page,
-                struct address_space *mapping, void *arg)
-{
-        struct vm_area_struct *vma;
-        int ret = SWAP_AGAIN;
-        unsigned long cursor;
-        unsigned long max_nl_cursor = 0;
-        unsigned long max_nl_size = 0;
-        unsigned int mapcount;
-        list_for_each_entry(vma,
-                &mapping->i_mmap_nonlinear, shared.nonlinear) {
-                cursor = (unsigned long) vma->vm_private_data;
-                if (cursor > max_nl_cursor)
-                        max_nl_cursor = cursor;
-                cursor = vma->vm_end - vma->vm_start;
-                if (cursor > max_nl_size)
-                        max_nl_size = cursor;
-        }
-        if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
-                return SWAP_FAIL;
-        }
-        /*
-         * We don't try to search for this page in the nonlinear vmas,
-         * and page_referenced wouldn't have found it anyway.  Instead
-         * just walk the nonlinear vmas trying to age and unmap some.
-         * The mapcount of the page we came in with is irrelevant,
-         * but even so use it as a guide to how hard we should try?
-         */
-        mapcount = page_mapcount(page);
-        if (!mapcount)
-                return ret;
-        cond_resched();
-        max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
-        if (max_nl_cursor == 0)
-                max_nl_cursor = CLUSTER_SIZE;
-        do {
-                list_for_each_entry(vma,
-                        &mapping->i_mmap_nonlinear, shared.nonlinear) {
-                        cursor = (unsigned long) vma->vm_private_data;
-                        while (cursor < max_nl_cursor &&
-                                cursor < vma->vm_end - vma->vm_start) {
-                                if (try_to_unmap_cluster(cursor, &mapcount,
-                                                vma, page) == SWAP_MLOCK)
-                                        ret = SWAP_MLOCK;
-                                cursor += CLUSTER_SIZE;
-                                vma->vm_private_data = (void *) cursor;
-                                if ((int)mapcount <= 0)
-                                        return ret;
-                        }
-                        vma->vm_private_data = (void *) max_nl_cursor;
-                }
-                cond_resched();
-                max_nl_cursor += CLUSTER_SIZE;
-        } while (max_nl_cursor <= max_nl_size);
-        /*
-         * Don't loop forever (perhaps all the remaining pages are
-         * in locked vmas).  Reset cursor on all unreserved nonlinear
-         * vmas, now forgetting on which ones it had fallen behind.
-         */
-        list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
-                vma->vm_private_data = NULL;
-        return ret;
-}
 bool is_vma_temporary_stack(struct vm_area_struct *vma)
 {
        int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
@@ -1566,7 +1359,6 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
                .rmap_one = try_to_unmap_one,
                .arg = (void *)flags,
                .done = page_not_mapped,
-                .file_nonlinear = try_to_unmap_nonlinear,
                .anon_lock = page_lock_anon_vma_read,
        };
@@ -1612,12 +1404,6 @@ int try_to_munlock(struct page *page)
                .rmap_one = try_to_unmap_one,
                .arg = (void *)TTU_MUNLOCK,
                .done = page_not_mapped,
-                /*
-                 * We don't bother to try to find the munlocked page in
-                 * nonlinears. It's costly. Instead, later, page reclaim logic
-                 * may call try_to_unmap() and recover PG_mlocked lazily.
-                 */
-                .file_nonlinear = NULL,
                .anon_lock = page_lock_anon_vma_read,
        };
@@ -1748,13 +1534,6 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
                        goto done;
        }
-        if (!rwc->file_nonlinear)
-                goto done;
-        if (list_empty(&mapping->i_mmap_nonlinear))
-                goto done;
-        ret = rwc->file_nonlinear(page, mapping, rwc->arg);
 done:
        i_mmap_unlock_read(mapping);
        return ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index 993e6ba689cc..b3e403181981 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3201,7 +3201,6 @@ static const struct vm_operations_struct shmem_vm_ops = {
        .set_policy     = shmem_set_policy,
        .get_policy     = shmem_get_policy,
 #endif
-        .remap_pages    = generic_file_remap_pages,
 };
 static struct dentry *shmem_mount(struct file_system_type *fs_type,
diff --git a/mm/slab.h b/mm/slab.h
index 1cf4005482dd..90430d6f665e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -235,7 +235,7 @@ static __always_inline int memcg_charge_slab(struct kmem_cache *s,
                return 0;
        if (is_root_cache(s))
                return 0;
-        return __memcg_charge_slab(s, gfp, order);
+        return memcg_charge_kmem(s->memcg_params->memcg, gfp, 1 << order);
 }
 static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
@@ -244,7 +244,7 @@ static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
                return;
        if (is_root_cache(s))
                return;
-        __memcg_uncharge_slab(s, order);
+        memcg_uncharge_kmem(s->memcg_params->memcg, 1 << order);
 }
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index e03dd6f2a272..6e1e4cf65836 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -331,7 +331,7 @@ out:
 out_free_cache:
        memcg_free_cache_params(s);
-        kfree(s);
+        kmem_cache_free(kmem_cache, s);
        goto out;
 }
@@ -425,21 +425,64 @@ out_unlock:
 }
 EXPORT_SYMBOL(kmem_cache_create);
+static int do_kmem_cache_shutdown(struct kmem_cache *s,
+                struct list_head *release, bool *need_rcu_barrier)
+{
+        if (__kmem_cache_shutdown(s) != 0) {
+                printk(KERN_ERR "kmem_cache_destroy %s: "
+                       "Slab cache still has objects\n", s->name);
+                dump_stack();
+                return -EBUSY;
+        }
+        if (s->flags & SLAB_DESTROY_BY_RCU)
+                *need_rcu_barrier = true;
+#ifdef CONFIG_MEMCG_KMEM
+        if (!is_root_cache(s)) {
+                struct kmem_cache *root_cache = s->memcg_params->root_cache;
+                int memcg_id = memcg_cache_id(s->memcg_params->memcg);
+                BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
+                root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
+        }
+#endif
+        list_move(&s->list, release);
+        return 0;
+}
+static void do_kmem_cache_release(struct list_head *release,
+                                  bool need_rcu_barrier)
+{
+        struct kmem_cache *s, *s2;
+        if (need_rcu_barrier)
+                rcu_barrier();
+        list_for_each_entry_safe(s, s2, release, list) {
+#ifdef SLAB_SUPPORTS_SYSFS
+                sysfs_slab_remove(s);
+#else
+                slab_kmem_cache_release(s);
+#endif
+        }
+}
 #ifdef CONFIG_MEMCG_KMEM
 /*
 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
 * @memcg: The memory cgroup the new cache is for.
 * @root_cache: The parent of the new cache.
- * @memcg_name: The name of the memory cgroup (used for naming the new cache).
 *
 * This function attempts to create a kmem cache that will serve allocation
 * requests going from @memcg to @root_cache. The new cache inherits properties
 * from its parent.
 */
-struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+void memcg_create_kmem_cache(struct mem_cgroup *memcg,
-                                           struct kmem_cache *root_cache,
+                             struct kmem_cache *root_cache)
-                                           const char *memcg_name)
 {
+        static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
+        int memcg_id = memcg_cache_id(memcg);
        struct kmem_cache *s = NULL;
        char *cache_name;
@@ -448,8 +491,18 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
        mutex_lock(&slab_mutex);
+        /*
+         * Since per-memcg caches are created asynchronously on first
+         * allocation (see memcg_kmem_get_cache()), several threads can try to
+         * create the same cache, but only one of them may succeed.
+         */
+        if (cache_from_memcg_idx(root_cache, memcg_id))
+                goto out_unlock;
+        cgroup_name(mem_cgroup_css(memcg)->cgroup,
+                    memcg_name_buf, sizeof(memcg_name_buf));
        cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
-                               memcg_cache_id(memcg), memcg_name);
+                               memcg_cache_id(memcg), memcg_name_buf);
        if (!cache_name)
                goto out_unlock;
@@ -457,49 +510,73 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
                                 root_cache->size, root_cache->align,
                                 root_cache->flags, root_cache->ctor,
                                 memcg, root_cache);
+        /*
+         * If we could not create a memcg cache, do not complain, because
+         * that's not critical at all as we can always proceed with the root
+         * cache.
+         */
        if (IS_ERR(s)) {
                kfree(cache_name);
-                s = NULL;
+                goto out_unlock;
        }
+        /*
+         * Since readers won't lock (see cache_from_memcg_idx()), we need a
+         * barrier here to ensure nobody will see the kmem_cache partially
+         * initialized.
+         */
+        smp_wmb();
+        root_cache->memcg_params->memcg_caches[memcg_id] = s;
 out_unlock:
        mutex_unlock(&slab_mutex);
        put_online_mems();
        put_online_cpus();
-        return s;
 }
-static int memcg_cleanup_cache_params(struct kmem_cache *s)
+void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
 {
-        int rc;
+        LIST_HEAD(release);
+        bool need_rcu_barrier = false;
+        struct kmem_cache *s, *s2;
-        if (!s->memcg_params ||
+        get_online_cpus();
-            !s->memcg_params->is_root_cache)
+        get_online_mems();
-                return 0;
-        mutex_unlock(&slab_mutex);
-        rc = __memcg_cleanup_cache_params(s);
        mutex_lock(&slab_mutex);
+        list_for_each_entry_safe(s, s2, &slab_caches, list) {
+                if (is_root_cache(s) || s->memcg_params->memcg != memcg)
+                        continue;
+                /*
+                 * The cgroup is about to be freed and therefore has no charges
+                 * left. Hence, all its caches must be empty by now.
+                 */
+                BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier));
+        }
+        mutex_unlock(&slab_mutex);
-        return rc;
+        put_online_mems();
-}
+        put_online_cpus();
-#else
-static int memcg_cleanup_cache_params(struct kmem_cache *s)
+        do_kmem_cache_release(&release, need_rcu_barrier);
-{
-        return 0;
 }
 #endif /* CONFIG_MEMCG_KMEM */
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
+        memcg_free_cache_params(s);
        kfree(s->name);
        kmem_cache_free(kmem_cache, s);
 }
 void kmem_cache_destroy(struct kmem_cache *s)
 {
+        int i;
+        LIST_HEAD(release);
+        bool need_rcu_barrier = false;
+        bool busy = false;
        get_online_cpus();
        get_online_mems();
@@ -509,35 +586,23 @@ void kmem_cache_destroy(struct kmem_cache *s)
        if (s->refcount)
                goto out_unlock;
-        if (memcg_cleanup_cache_params(s) != 0)
+        for_each_memcg_cache_index(i) {
-                goto out_unlock;
+                struct kmem_cache *c = cache_from_memcg_idx(s, i);
-        if (__kmem_cache_shutdown(s) != 0) {
+                if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier))
-                printk(KERN_ERR "kmem_cache_destroy %s: "
+                        busy = true;
-                       "Slab cache still has objects\n", s->name);
-                dump_stack();
-                goto out_unlock;
        }
-        list_del(&s->list);
+        if (!busy)
+                do_kmem_cache_shutdown(s, &release, &need_rcu_barrier);
-        mutex_unlock(&slab_mutex);
-        if (s->flags & SLAB_DESTROY_BY_RCU)
-                rcu_barrier();
-        memcg_free_cache_params(s);
-#ifdef SLAB_SUPPORTS_SYSFS
-        sysfs_slab_remove(s);
-#else
-        slab_kmem_cache_release(s);
-#endif
-        goto out;
 out_unlock:
        mutex_unlock(&slab_mutex);
-out:
        put_online_mems();
        put_online_cpus();
+        do_kmem_cache_release(&release, need_rcu_barrier);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
diff --git a/mm/slub.c b/mm/slub.c
index fe376fe1f4fe..8b8508adf9c2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2398,13 +2398,24 @@ redo:
         * reading from one cpu area. That does not matter as long
         * as we end up on the original cpu again when doing the cmpxchg.
         *
-         * Preemption is disabled for the retrieval of the tid because that
+         * We should guarantee that tid and kmem_cache are retrieved on
-         * must occur from the current processor. We cannot allow rescheduling
+         * the same cpu. It could be different if CONFIG_PREEMPT so we need
-         * on a different processor between the determination of the pointer
+         * to check if it is matched or not.
-         * and the retrieval of the tid.
         */
-        preempt_disable();
+        do {
-        c = this_cpu_ptr(s->cpu_slab);
+                tid = this_cpu_read(s->cpu_slab->tid);
+                c = raw_cpu_ptr(s->cpu_slab);
+        } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
+        /*
+         * Irqless object alloc/free algorithm used here depends on sequence
+         * of fetching cpu_slab's data. tid should be fetched before anything
+         * on c to guarantee that object and page associated with previous tid
+         * won't be used with current tid. If we fetch tid first, object and
+         * page could be one associated with next tid and our alloc/free
+         * request will be failed. In this case, we will retry. So, no problem.
+         */
+        barrier();
        /*
         * The transaction ids are globally unique per cpu and per operation on
@@ -2412,8 +2423,6 @@ redo:
         * occurs on the right processor and that there was no operation on the
         * linked list in between.
         */
-        tid = c->tid;
-        preempt_enable();
        object = c->freelist;
        page = c->page;
@@ -2512,7 +2521,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
 #endif
 /*
- * Slow patch handling. This may still be called frequently since objects
+ * Slow path handling. This may still be called frequently since objects
 * have a longer lifetime than the cpu slabs in most processing loads.
 *
 * So we still attempt to reduce cache line usage. Just take the slab
@@ -2659,11 +2668,13 @@ redo:
         * data is retrieved via this pointer. If we are on the same cpu
         * during the cmpxchg then the free will succedd.
         */
-        preempt_disable();
+        do {
-        c = this_cpu_ptr(s->cpu_slab);
+                tid = this_cpu_read(s->cpu_slab->tid);
+                c = raw_cpu_ptr(s->cpu_slab);
+        } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
-        tid = c->tid;
+        /* Same with comment on barrier() in slab_alloc_node() */
-        preempt_enable();
+        barrier();
        if (likely(page == c->page)) {
                set_freepointer(s, object, c->freelist);
diff --git a/mm/swap.c b/mm/swap.c
index 8a12b33936b4..5b3087228b99 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1140,10 +1140,8 @@ void __init swap_setup(void)
        if (bdi_init(swapper_spaces[0].backing_dev_info))
                panic("Failed to init swap bdi");
-        for (i = 0; i < MAX_SWAPFILES; i++) {
+        for (i = 0; i < MAX_SWAPFILES; i++)
                spin_lock_init(&swapper_spaces[i].tree_lock);
-                INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
-        }
 #endif
        /* Use a smaller cluster for small-memory machines */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1284f89fca08..9943e5fd74e6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -17,6 +17,9 @@
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/vmstat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 #include <linux/sched.h>
 #include <linux/math64.h>
 #include <linux/writeback.h>
@@ -670,66 +673,6 @@ int fragmentation_index(struct zone *zone, unsigned int order)
 }
 #endif
-#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-static char * const migratetype_names[MIGRATE_TYPES] = {
-        "Unmovable",
-        "Reclaimable",
-        "Movable",
-        "Reserve",
-#ifdef CONFIG_CMA
-        "CMA",
-#endif
-#ifdef CONFIG_MEMORY_ISOLATION
-        "Isolate",
-#endif
-};
-static void *frag_start(struct seq_file *m, loff_t *pos)
-{
-        pg_data_t *pgdat;
-        loff_t node = *pos;
-        for (pgdat = first_online_pgdat();
-             pgdat && node;
-             pgdat = next_online_pgdat(pgdat))
-                --node;
-        return pgdat;
-}
-static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
-{
-        pg_data_t *pgdat = (pg_data_t *)arg;
-        (*pos)++;
-        return next_online_pgdat(pgdat);
-}
-static void frag_stop(struct seq_file *m, void *arg)
-{
-}
-/* Walk all the zones in a node and print using a callback */
-static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
-                void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
-{
-        struct zone *zone;
-        struct zone *node_zones = pgdat->node_zones;
-        unsigned long flags;
-        for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
-                if (!populated_zone(zone))
-                        continue;
-                spin_lock_irqsave(&zone->lock, flags);
-                print(m, pgdat, zone);
-                spin_unlock_irqrestore(&zone->lock, flags);
-        }
-}
-#endif
 #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
 #ifdef CONFIG_ZONE_DMA
 #define TEXT_FOR_DMA(xx) xx "_dma",
@@ -907,7 +850,66 @@ const char * const vmstat_text[] = {
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
+#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
+     defined(CONFIG_PROC_FS)
+static void *frag_start(struct seq_file *m, loff_t *pos)
+{
+        pg_data_t *pgdat;
+        loff_t node = *pos;
+        for (pgdat = first_online_pgdat();
+             pgdat && node;
+             pgdat = next_online_pgdat(pgdat))
+                --node;
+        return pgdat;
+}
+static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+        pg_data_t *pgdat = (pg_data_t *)arg;
+        (*pos)++;
+        return next_online_pgdat(pgdat);
+}
+static void frag_stop(struct seq_file *m, void *arg)
+{
+}
+/* Walk all the zones in a node and print using a callback */
+static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
+                void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
+{
+        struct zone *zone;
+        struct zone *node_zones = pgdat->node_zones;
+        unsigned long flags;
+        for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
+                if (!populated_zone(zone))
+                        continue;
+                spin_lock_irqsave(&zone->lock, flags);
+                print(m, pgdat, zone);
+                spin_unlock_irqrestore(&zone->lock, flags);
+        }
+}
+#endif
 #ifdef CONFIG_PROC_FS
+static char * const migratetype_names[MIGRATE_TYPES] = {
+        "Unmovable",
+        "Reclaimable",
+        "Movable",
+        "Reserve",
+#ifdef CONFIG_CMA
+        "CMA",
+#endif
+#ifdef CONFIG_MEMORY_ISOLATION
+        "Isolate",
+#endif
+};
 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
                                                struct zone *zone)
 {
@@ -1536,8 +1538,6 @@ static int __init setup_vmstat(void)
 module_init(setup_vmstat)
 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
-#include <linux/debugfs.h>
 /*
 * Return an index indicating how much of the available free memory is
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-02-10 19:45:56 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-02-10 19:45:56 -0500
commit	992de5a8eca7cbd3215e3eb2c439b2c11582a58b (patch)
tree	863988f84c1dd57a02fa337ecbce49263a3b9511 /mm
parent	b2718bffb4088faf13092db30c1ebf088ddee52e (diff)
parent	d5b3cf7139b8770af4ed8bb36a1ab9d290ac39e9 (diff)