Merge branch 'akpm' (patches from Andrew)

Merge updates from Andrew Morton: - misc fixes - ocfs2 updates - most of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (118 commits) mm: remove PG_highmem description tools, vm: new option to specify kpageflags file mm/swap.c: make functions and their kernel-doc agree mm, memory_hotplug: fix memmap initialization mm: correct comments regarding do_fault_around() mm: numa: do not trap faults on shared data section pages. hugetlb, mbind: fall back to default policy if vma is NULL hugetlb, mempolicy: fix the mbind hugetlb migration mm, hugetlb: further simplify hugetlb allocation API mm, hugetlb: get rid of surplus page accounting tricks mm, hugetlb: do not rely on overcommit limit during migration mm, hugetlb: integrate giga hugetlb more naturally to the allocation path mm, hugetlb: unify core page allocation accounting and initialization mm/memcontrol.c: try harder to decrease [memory,memsw].limit_in_bytes mm/memcontrol.c: make local symbol static mm/hmm: fix uninitialized use of 'entry' in hmm_vma_walk_pmd() include/linux/mmzone.h: fix explanation of lower bits in the SPARSEMEM mem_map pointer mm/compaction.c: fix comment for try_to_compact_pages() mm/page_ext.c: make page_ext_init a noop when CONFIG_PAGE_EXTENSION but nothing uses it zsmalloc: use U suffix for negative literals being shifted ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-01-31 21:46:22 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-01-31 21:46:22 -0500
commit: 73da9e1a9f310a449eeb9bf5735a9cd475fef5e2 (patch)
tree: 82cd78255b0a480340a8427e7ba5586df8280ac4 /include/linux
parent: b2fe5fa68642860e7de76167c3111623aa0d5de1 (diff)
parent: 3f56a2f8030071cf86520ef4fc3045ba6856e610 (diff)
13 files changed, 254 insertions, 216 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 82a25880714a..36fa6a2a82e3 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -119,6 +119,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
                                                long freed);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
+void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
 void free_huge_page(struct page *page);
 void hugetlb_fix_reserve_counts(struct inode *inode);
 extern struct mutex *hugetlb_fault_mutex_table;
@@ -129,7 +130,6 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
-extern int hugepages_treat_as_movable;
 extern int sysctl_hugetlb_shm_group;
 extern struct list_head huge_boot_pages;
@@ -158,6 +158,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                unsigned long address, unsigned long end, pgprot_t newprot);
 bool is_hugetlb_entry_migration(pte_t pte);
 #else /* !CONFIG_HUGETLB_PAGE */
 static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
@@ -198,6 +199,7 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list)
        return false;
 }
 #define putback_active_hugepage(p)      do {} while (0)
+#define move_hugetlb_state(old, new, reason)    do {} while (0)
 static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                unsigned long address, unsigned long end, pgprot_t newprot)
@@ -271,6 +273,17 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
        return sb->s_fs_info;
 }
+struct hugetlbfs_inode_info {
+        struct shared_policy policy;
+        struct inode vfs_inode;
+        unsigned int seals;
+};
+static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
+{
+        return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
+}
 extern const struct file_operations hugetlbfs_file_operations;
 extern const struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
@@ -343,10 +356,10 @@ struct huge_bootmem_page {
 struct page *alloc_huge_page(struct vm_area_struct *vma,
                                unsigned long addr, int avoid_reserve);
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
-struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
-                                unsigned long addr, int avoid_reserve);
 struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
                                nodemask_t *nmask);
+struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
+                                unsigned long address);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
                        pgoff_t idx);
@@ -524,7 +537,7 @@ struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
 #define alloc_huge_page_node(h, nid) NULL
 #define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL
-#define alloc_huge_page_noerr(v, a, r) NULL
+#define alloc_huge_page_vma(h, vma, address) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
 #define hstate_sizelog(s) NULL
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 69966c461d1c..882046863581 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -108,7 +108,10 @@ struct lruvec_stat {
 */
 struct mem_cgroup_per_node {
        struct lruvec           lruvec;
-        struct lruvec_stat __percpu *lruvec_stat;
+        struct lruvec_stat __percpu *lruvec_stat_cpu;
+        atomic_long_t           lruvec_stat[NR_VM_NODE_STAT_ITEMS];
        unsigned long           lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
        struct mem_cgroup_reclaim_iter  iter[DEF_PRIORITY + 1];
@@ -227,10 +230,10 @@ struct mem_cgroup {
        spinlock_t              move_lock;
        struct task_struct      *move_lock_task;
        unsigned long           move_lock_flags;
-        /*
-         * percpu counter.
+        struct mem_cgroup_stat_cpu __percpu *stat_cpu;
-         */
+        atomic_long_t           stat[MEMCG_NR_STAT];
-        struct mem_cgroup_stat_cpu __percpu *stat;
+        atomic_long_t           events[MEMCG_NR_EVENTS];
        unsigned long           socket_pressure;
@@ -265,6 +268,12 @@ struct mem_cgroup {
        /* WARNING: nodeinfo must be the last member here */
 };
+/*
+ * size of first charge trial. "32" comes from vmscan.c's magic value.
+ * TODO: maybe necessary to use big numbers in big irons.
+ */
+#define MEMCG_CHARGE_BATCH 32U
 extern struct mem_cgroup *root_mem_cgroup;
 static inline bool mem_cgroup_disabled(void)
@@ -272,13 +281,6 @@ static inline bool mem_cgroup_disabled(void)
        return !cgroup_subsys_enabled(memory_cgrp_subsys);
 }
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-                                    enum memcg_event_item event)
-{
-        this_cpu_inc(memcg->stat->events[event]);
-        cgroup_file_notify(&memcg->events_file);
-}
 bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
 int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
@@ -492,32 +494,38 @@ void unlock_page_memcg(struct page *page);
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
                                             int idx)
 {
-        long val = 0;
+        long x = atomic_long_read(&memcg->stat[idx]);
-        int cpu;
+#ifdef CONFIG_SMP
+        if (x < 0)
-        for_each_possible_cpu(cpu)
+                x = 0;
-                val += per_cpu(memcg->stat->count[idx], cpu);
+#endif
+        return x;
-        if (val < 0)
-                val = 0;
-        return val;
 }
 /* idx can be of type enum memcg_stat_item or node_stat_item */
 static inline void __mod_memcg_state(struct mem_cgroup *memcg,
                                     int idx, int val)
 {
-        if (!mem_cgroup_disabled())
+        long x;
-                __this_cpu_add(memcg->stat->count[idx], val);
+        if (mem_cgroup_disabled())
+                return;
+        x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
+        if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+                atomic_long_add(x, &memcg->stat[idx]);
+                x = 0;
+        }
+        __this_cpu_write(memcg->stat_cpu->count[idx], x);
 }
 /* idx can be of type enum memcg_stat_item or node_stat_item */
 static inline void mod_memcg_state(struct mem_cgroup *memcg,
                                   int idx, int val)
 {
-        if (!mem_cgroup_disabled())
+        preempt_disable();
-                this_cpu_add(memcg->stat->count[idx], val);
+        __mod_memcg_state(memcg, idx, val);
+        preempt_enable();
 }
 /**
@@ -555,87 +563,108 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                              enum node_stat_item idx)
 {
        struct mem_cgroup_per_node *pn;
-        long val = 0;
+        long x;
-        int cpu;
        if (mem_cgroup_disabled())
                return node_page_state(lruvec_pgdat(lruvec), idx);
        pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-        for_each_possible_cpu(cpu)
+        x = atomic_long_read(&pn->lruvec_stat[idx]);
-                val += per_cpu(pn->lruvec_stat->count[idx], cpu);
+#ifdef CONFIG_SMP
+        if (x < 0)
-        if (val < 0)
+                x = 0;
-                val = 0;
+#endif
+        return x;
-        return val;
 }
 static inline void __mod_lruvec_state(struct lruvec *lruvec,
                                      enum node_stat_item idx, int val)
 {
        struct mem_cgroup_per_node *pn;
+        long x;
+        /* Update node */
        __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
        if (mem_cgroup_disabled())
                return;
        pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+        /* Update memcg */
        __mod_memcg_state(pn->memcg, idx, val);
-        __this_cpu_add(pn->lruvec_stat->count[idx], val);
+        /* Update lruvec */
+        x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
+        if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+                atomic_long_add(x, &pn->lruvec_stat[idx]);
+                x = 0;
+        }
+        __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
 }
 static inline void mod_lruvec_state(struct lruvec *lruvec,
                                    enum node_stat_item idx, int val)
 {
-        struct mem_cgroup_per_node *pn;
+        preempt_disable();
+        __mod_lruvec_state(lruvec, idx, val);
-        mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+        preempt_enable();
-        if (mem_cgroup_disabled())
-                return;
-        pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-        mod_memcg_state(pn->memcg, idx, val);
-        this_cpu_add(pn->lruvec_stat->count[idx], val);
 }
 static inline void __mod_lruvec_page_state(struct page *page,
                                           enum node_stat_item idx, int val)
 {
-        struct mem_cgroup_per_node *pn;
+        pg_data_t *pgdat = page_pgdat(page);
+        struct lruvec *lruvec;
-        __mod_node_page_state(page_pgdat(page), idx, val);
+        /* Untracked pages have no memcg, no lruvec. Update only the node */
-        if (mem_cgroup_disabled() || !page->mem_cgroup)
+        if (!page->mem_cgroup) {
+                __mod_node_page_state(pgdat, idx, val);
                return;
-        __mod_memcg_state(page->mem_cgroup, idx, val);
+        }
-        pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
-        __this_cpu_add(pn->lruvec_stat->count[idx], val);
+        lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
+        __mod_lruvec_state(lruvec, idx, val);
 }
 static inline void mod_lruvec_page_state(struct page *page,
                                         enum node_stat_item idx, int val)
 {
-        struct mem_cgroup_per_node *pn;
+        preempt_disable();
+        __mod_lruvec_page_state(page, idx, val);
-        mod_node_page_state(page_pgdat(page), idx, val);
+        preempt_enable();
-        if (mem_cgroup_disabled() || !page->mem_cgroup)
-                return;
-        mod_memcg_state(page->mem_cgroup, idx, val);
-        pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
-        this_cpu_add(pn->lruvec_stat->count[idx], val);
 }
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
                                                gfp_t gfp_mask,
                                                unsigned long *total_scanned);
+/* idx can be of type enum memcg_event_item or vm_event_item */
+static inline void __count_memcg_events(struct mem_cgroup *memcg,
+                                        int idx, unsigned long count)
+{
+        unsigned long x;
+        if (mem_cgroup_disabled())
+                return;
+        x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
+        if (unlikely(x > MEMCG_CHARGE_BATCH)) {
+                atomic_long_add(x, &memcg->events[idx]);
+                x = 0;
+        }
+        __this_cpu_write(memcg->stat_cpu->events[idx], x);
+}
 static inline void count_memcg_events(struct mem_cgroup *memcg,
-                                      enum vm_event_item idx,
+                                      int idx, unsigned long count)
-                                      unsigned long count)
 {
-        if (!mem_cgroup_disabled())
+        preempt_disable();
-                this_cpu_add(memcg->stat->events[idx], count);
+        __count_memcg_events(memcg, idx, count);
+        preempt_enable();
 }
-/* idx can be of type enum memcg_stat_item or node_stat_item */
+/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void count_memcg_page_event(struct page *page,
                                          int idx)
 {
@@ -654,12 +683,20 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
        rcu_read_lock();
        memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
        if (likely(memcg)) {
-                this_cpu_inc(memcg->stat->events[idx]);
+                count_memcg_events(memcg, idx, 1);
                if (idx == OOM_KILL)
                        cgroup_file_notify(&memcg->events_file);
        }
        rcu_read_unlock();
 }
+static inline void mem_cgroup_event(struct mem_cgroup *memcg,
+                                    enum memcg_event_item event)
+{
+        count_memcg_events(memcg, event, 1);
+        cgroup_file_notify(&memcg->events_file);
+}
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7fc92384977e..173d2484f6e3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1312,8 +1312,6 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
                unsigned long end, unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
                        struct vm_area_struct *vma);
-void unmap_mapping_range(struct address_space *mapping,
-                loff_t const holebegin, loff_t const holelen, int even_cows);
 int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
                             unsigned long *start, unsigned long *end,
                             pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
@@ -1324,12 +1322,6 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address,
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
                        void *buf, int len, int write);
-static inline void unmap_shared_mapping_range(struct address_space *mapping,
-                loff_t const holebegin, loff_t const holelen)
-{
-        unmap_mapping_range(mapping, holebegin, holelen, 0);
-}
 extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
@@ -1344,6 +1336,10 @@ extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                            unsigned long address, unsigned int fault_flags,
                            bool *unlocked);
+void unmap_mapping_pages(struct address_space *mapping,
+                pgoff_t start, pgoff_t nr, bool even_cows);
+void unmap_mapping_range(struct address_space *mapping,
+                loff_t const holebegin, loff_t const holelen, int even_cows);
 #else
 static inline int handle_mm_fault(struct vm_area_struct *vma,
                unsigned long address, unsigned int flags)
@@ -1360,10 +1356,20 @@ static inline int fixup_user_fault(struct task_struct *tsk,
        BUG();
        return -EFAULT;
 }
+static inline void unmap_mapping_pages(struct address_space *mapping,
+                pgoff_t start, pgoff_t nr, bool even_cows) { }
+static inline void unmap_mapping_range(struct address_space *mapping,
+                loff_t const holebegin, loff_t const holelen, int even_cows) { }
 #endif
-extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
+static inline void unmap_shared_mapping_range(struct address_space *mapping,
-                unsigned int gup_flags);
+                loff_t const holebegin, loff_t const holelen)
+{
+        unmap_mapping_range(mapping, holebegin, holelen, 0);
+}
+extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
+                void *buf, int len, unsigned int gup_flags);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
                void *buf, int len, unsigned int gup_flags);
 extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index cfd0ac4e5e0e..fd1af6b9591d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -31,28 +31,56 @@ struct hmm;
 * it to keep track of whatever it is we are using the page for at the
 * moment. Note that we have no way to track which tasks are using
 * a page, though if it is a pagecache page, rmap structures can tell us
- * who is mapping it.
+ * who is mapping it. If you allocate the page using alloc_pages(), you
+ * can use some of the space in struct page for your own purposes.
 *
- * The objects in struct page are organized in double word blocks in
+ * Pages that were once in the page cache may be found under the RCU lock
- * order to allows us to use atomic double word operations on portions
+ * even after they have been recycled to a different purpose.  The page
- * of struct page. That is currently only used by slub but the arrangement
+ * cache reads and writes some of the fields in struct page to pin the
- * allows the use of atomic double word operations on the flags/mapping
+ * page before checking that it's still in the page cache.  It is vital
- * and lru list pointers also.
+ * that all users of struct page:
+ * 1. Use the first word as PageFlags.
+ * 2. Clear or preserve bit 0 of page->compound_head.  It is used as
+ *    PageTail for compound pages, and the page cache must not see false
+ *    positives.  Some users put a pointer here (guaranteed to be at least
+ *    4-byte aligned), other users avoid using the field altogether.
+ * 3. page->_refcount must either not be used, or must be used in such a
+ *    way that other CPUs temporarily incrementing and then decrementing the
+ *    refcount does not cause problems.  On receiving the page from
+ *    alloc_pages(), the refcount will be positive.
+ * 4. Either preserve page->_mapcount or restore it to -1 before freeing it.
+ *
+ * If you allocate pages of order > 0, you can use the fields in the struct
+ * page associated with each page, but bear in mind that the pages may have
+ * been inserted individually into the page cache, so you must use the above
+ * four fields in a compatible way for each struct page.
+ *
+ * SLUB uses cmpxchg_double() to atomically update its freelist and
+ * counters.  That requires that freelist & counters be adjacent and
+ * double-word aligned.  We align all struct pages to double-word
+ * boundaries, and ensure that 'freelist' is aligned within the
+ * struct.
 */
+#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
+#define _struct_page_alignment  __aligned(2 * sizeof(unsigned long))
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
+#define _slub_counter_t         unsigned long
+#else
+#define _slub_counter_t         unsigned int
+#endif
+#else /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */
+#define _struct_page_alignment
+#define _slub_counter_t         unsigned int
+#endif /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */
 struct page {
        /* First double word block */
        unsigned long flags;            /* Atomic flags, some possibly
                                         * updated asynchronously */
        union {
-                struct address_space *mapping;  /* If low bit clear, points to
+                /* See page-flags.h for the definition of PAGE_MAPPING_FLAGS */
-                                                 * inode address_space, or NULL.
+                struct address_space *mapping;
-                                                 * If page mapped as anonymous
-                                                 * memory, low bit is set, and
-                                                 * it points to anon_vma object
-                                                 * or KSM private structure. See
-                                                 * PAGE_MAPPING_ANON and
-                                                 * PAGE_MAPPING_KSM.
-                                                 */
                void *s_mem;                    /* slab first object */
                atomic_t compound_mapcount;     /* first tail page */
                /* page_deferred_list().next     -- second tail page */
@@ -66,40 +94,27 @@ struct page {
        };
        union {
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+                _slub_counter_t counters;
-        defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
+                unsigned int active;            /* SLAB */
-                /* Used for cmpxchg_double in slub */
+                struct {                        /* SLUB */
-                unsigned long counters;
+                        unsigned inuse:16;
-#else
+                        unsigned objects:15;
-                /*
+                        unsigned frozen:1;
-                 * Keep _refcount separate from slub cmpxchg_double data.
+                };
-                 * As the rest of the double word is protected by slab_lock
+                int units;                      /* SLOB */
-                 * but _refcount is not.
-                 */
+                struct {                        /* Page cache */
-                unsigned counters;
+                        /*
-#endif
+                         * Count of ptes mapped in mms, to show when
-                struct {
+                         * page is mapped & limit reverse map searches.
+                         *
+                         * Extra information about page type may be
+                         * stored here for pages that are never mapped,
+                         * in which case the value MUST BE <= -2.
+                         * See page-flags.h for more details.
+                         */
+                        atomic_t _mapcount;
-                        union {
-                                /*
-                                 * Count of ptes mapped in mms, to show when
-                                 * page is mapped & limit reverse map searches.
-                                 *
-                                 * Extra information about page type may be
-                                 * stored here for pages that are never mapped,
-                                 * in which case the value MUST BE <= -2.
-                                 * See page-flags.h for more details.
-                                 */
-                                atomic_t _mapcount;
-                                unsigned int active;            /* SLAB */
-                                struct {                        /* SLUB */
-                                        unsigned inuse:16;
-                                        unsigned objects:15;
-                                        unsigned frozen:1;
-                                };
-                                int units;                      /* SLOB */
-                        };
                        /*
                         * Usage count, *USE WRAPPER FUNCTION* when manual
                         * accounting. See page_ref.h
@@ -109,8 +124,6 @@ struct page {
        };
        /*
-         * Third double word block
-         *
         * WARNING: bit 0 of the first word encode PageTail(). That means
         * the rest users of the storage space MUST NOT use the bit to
         * avoid collision and false-positive PageTail().
@@ -145,19 +158,9 @@ struct page {
                        unsigned long compound_head; /* If bit zero is set */
                        /* First tail page only */
-#ifdef CONFIG_64BIT
+                        unsigned char compound_dtor;
-                        /*
+                        unsigned char compound_order;
-                         * On 64 bit system we have enough space in struct page
+                        /* two/six bytes available here */
-                         * to encode compound_dtor and compound_order with
-                         * unsigned int. It can help compiler generate better or
-                         * smaller code on some archtectures.
-                         */
-                        unsigned int compound_dtor;
-                        unsigned int compound_order;
-#else
-                        unsigned short int compound_dtor;
-                        unsigned short int compound_order;
-#endif
                };
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
@@ -171,15 +174,14 @@ struct page {
 #endif
        };
-        /* Remainder is not double word aligned */
        union {
-                unsigned long private;          /* Mapping-private opaque data:
+                /*
-                                                 * usually used for buffer_heads
+                 * Mapping-private opaque data:
-                                                 * if PagePrivate set; used for
+                 * Usually used for buffer_heads if PagePrivate
-                                                 * swp_entry_t if PageSwapCache;
+                 * Used for swp_entry_t if PageSwapCache
-                                                 * indicates order in the buddy
+                 * Indicates order in the buddy system if PageBuddy
-                                                 * system if PG_buddy is set.
+                 */
-                                                 */
+                unsigned long private;
 #if USE_SPLIT_PTE_PTLOCKS
 #if ALLOC_SPLIT_PTLOCKS
                spinlock_t *ptl;
@@ -212,15 +214,7 @@ struct page {
 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
        int _last_cpupid;
 #endif
-}
+} _struct_page_alignment;
-/*
- * The struct page can be forced to be double word aligned so that atomic ops
- * on double words work. The SLUB allocator can make use of such a feature.
- */
-#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
-        __aligned(2 * sizeof(unsigned long))
-#endif
-;
 #define PAGE_FRAG_CACHE_MAX_SIZE        __ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER       get_order(PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b25dc9db19fc..2d07a1ed5a31 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_MMU_NOTIFIER_H
 #define _LINUX_MMU_NOTIFIER_H
+#include <linux/types.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/mm_types.h>
@@ -10,6 +11,9 @@
 struct mmu_notifier;
 struct mmu_notifier_ops;
+/* mmu_notifier_ops flags */
+#define MMU_INVALIDATE_DOES_NOT_BLOCK   (0x01)
 #ifdef CONFIG_MMU_NOTIFIER
 /*
@@ -27,6 +31,15 @@ struct mmu_notifier_mm {
 struct mmu_notifier_ops {
        /*
+         * Flags to specify behavior of callbacks for this MMU notifier.
+         * Used to determine which context an operation may be called.
+         *
+         * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_range_* callbacks do not
+         *      block
+         */
+        int flags;
+        /*
         * Called either by mmu_notifier_unregister or when the mm is
         * being destroyed by exit_mmap, always before all pages are
         * freed. This can run concurrently with other mmu notifier
@@ -137,6 +150,10 @@ struct mmu_notifier_ops {
         * page. Pages will no longer be referenced by the linux
         * address space but may still be referenced by sptes until
         * the last refcount is dropped.
+         *
+         * If both of these callbacks cannot block, and invalidate_range
+         * cannot block, mmu_notifier_ops.flags should have
+         * MMU_INVALIDATE_DOES_NOT_BLOCK set.
         */
        void (*invalidate_range_start)(struct mmu_notifier *mn,
                                       struct mm_struct *mm,
@@ -159,12 +176,13 @@ struct mmu_notifier_ops {
         * external TLB range needs to be flushed. For more in depth
         * discussion on this see Documentation/vm/mmu_notifier.txt
         *
-         * The invalidate_range() function is called under the ptl
-         * spin-lock and not allowed to sleep.
-         *
         * Note that this function might be called with just a sub-range
         * of what was passed to invalidate_range_start()/end(), if
         * called between those functions.
+         *
+         * If this callback cannot block, and invalidate_range_{start,end}
+         * cannot block, mmu_notifier_ops.flags should have
+         * MMU_INVALIDATE_DOES_NOT_BLOCK set.
         */
        void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
                                 unsigned long start, unsigned long end);
@@ -218,6 +236,7 @@ extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
                                  bool only_end);
 extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
                                  unsigned long start, unsigned long end);
+extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm);
 static inline void mmu_notifier_release(struct mm_struct *mm)
 {
@@ -457,6 +476,11 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 {
 }
+static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
+{
+        return false;
+}
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 67f2e3c38939..7522a6987595 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1166,8 +1166,16 @@ extern unsigned long usemap_size(void);
 /*
 * We use the lower bits of the mem_map pointer to store
- * a little bit of information.  There should be at least
+ * a little bit of information.  The pointer is calculated
- * 3 bits here due to 32-bit alignment.
+ * as mem_map - section_nr_to_pfn(pnum).  The result is
+ * aligned to the minimum alignment of the two values:
+ *   1. All mem_map arrays are page-aligned.
+ *   2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT
+ *      lowest bits.  PFN_SECTION_SHIFT is arch-specific
+ *      (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
+ *      worst combination is powerpc with 256k pages,
+ *      which results in PFN_SECTION_SHIFT equal 6.
+ * To sum it up, at least 6 bits are available.
 */
 #define SECTION_MARKED_PRESENT  (1UL<<0)
 #define SECTION_HAS_MEM_MAP     (1UL<<1)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3ec44e27aa9d..50c2b8786831 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -46,11 +46,6 @@
 * guarantees that this bit is cleared for a page when it first is entered into
 * the page cache.
 *
- * PG_highmem pages are not permanently mapped into the kernel virtual address
- * space, they need to be kmapped separately for doing IO on the pages.  The
- * struct page (these bits with information) are always mapped into kernel
- * address space...
- *
 * PG_hwpoison indicates that a page got corrupted in hardware and contains
 * data with incorrect ECC bits that triggered a machine check. Accessing is
 * not safe since it may cause another machine check. Don't touch!
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 5fb6580f7f23..6dc456ac6136 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -9,14 +9,14 @@
 #ifndef _LINUX_PAGEVEC_H
 #define _LINUX_PAGEVEC_H
-/* 14 pointers + two long's align the pagevec structure to a power of two */
+/* 15 pointers + header align the pagevec structure to a power of two */
-#define PAGEVEC_SIZE    14
+#define PAGEVEC_SIZE    15
 struct page;
 struct address_space;
 struct pagevec {
-        unsigned long nr;
+        unsigned char nr;
        bool percpu_pvec_drained;
        struct page *pages[PAGEVEC_SIZE];
 };
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 3d49b91b674d..bd422561a75e 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -11,7 +11,7 @@
 /*
 * Routines for handling mm_structs
 */
-extern struct mm_struct * mm_alloc(void);
+extern struct mm_struct *mm_alloc(void);
 /**
 * mmgrab() - Pin a &struct mm_struct.
@@ -35,27 +35,7 @@ static inline void mmgrab(struct mm_struct *mm)
        atomic_inc(&mm->mm_count);
 }
-/* mmdrop drops the mm and the page tables */
+extern void mmdrop(struct mm_struct *mm);
-extern void __mmdrop(struct mm_struct *);
-static inline void mmdrop(struct mm_struct *mm)
-{
-        if (unlikely(atomic_dec_and_test(&mm->mm_count)))
-                __mmdrop(mm);
-}
-static inline void mmdrop_async_fn(struct work_struct *work)
-{
-        struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
-        __mmdrop(mm);
-}
-static inline void mmdrop_async(struct mm_struct *mm)
-{
-        if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
-                INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
-                schedule_work(&mm->async_put_work);
-        }
-}
 /**
 * mmget() - Pin the address space associated with a &struct mm_struct.
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 06b295bec00d..73b5e655a76e 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -112,13 +112,11 @@ extern void shmem_uncharge(struct inode *inode, long pages);
 #ifdef CONFIG_TMPFS
-extern int shmem_add_seals(struct file *file, unsigned int seals);
+extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
-extern int shmem_get_seals(struct file *file);
-extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
 #else
-static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
+static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a)
 {
        return -EINVAL;
 }
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c2b8128799c1..7b6a59f722a3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -332,7 +332,6 @@ extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
-extern void lru_add_drain_all_cpuslocked(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
@@ -345,7 +344,6 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
 /* linux/mm/vmscan.c */
 extern unsigned long zone_reclaimable_pages(struct zone *zone);
-extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                        gfp_t gfp_mask, nodemask_t *mask);
 extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 1779c9817b39..a4c2317d8b9f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -216,23 +216,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
        return x;
 }
-static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat,
-                                        enum node_stat_item item)
-{
-        long x = atomic_long_read(&pgdat->vm_stat[item]);
-#ifdef CONFIG_SMP
-        int cpu;
-        for_each_online_cpu(cpu)
-                x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item];
-        if (x < 0)
-                x = 0;
-#endif
-        return x;
-}
 #ifdef CONFIG_NUMA
 extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item);
 extern unsigned long sum_zone_node_page_state(int node,
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 004ba807df96..7238865e75b0 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -108,4 +108,6 @@ void zpool_register_driver(struct zpool_driver *driver);
 int zpool_unregister_driver(struct zpool_driver *driver);
+bool zpool_evictable(struct zpool *pool);
 #endif
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-01-31 21:46:22 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-01-31 21:46:22 -0500
commit	73da9e1a9f310a449eeb9bf5735a9cd475fef5e2 (patch)
tree	82cd78255b0a480340a8427e7ba5586df8280ac4 /include/linux
parent	b2fe5fa68642860e7de76167c3111623aa0d5de1 (diff)
parent	3f56a2f8030071cf86520ef4fc3045ba6856e610 (diff)