Merge branch 'linus' into sched/core

Reason: Bring bakc upstream modification to resolve conflicts Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
author: Thomas Gleixner <tglx@linutronix.de> 2014-02-21 15:36:40 -0500
committer: Thomas Gleixner <tglx@linutronix.de> 2014-02-21 15:37:09 -0500
commit: d97a860c4f3de98ba5040a22f305b7159fe17cff (patch)
tree: 90c2155ec5a1f3115a9eb7a86f25d1a4610227c6 /mm
parent: 3f67d962c64d9b6de9dab81bdbe6d5c94c80d9b9 (diff)
parent: d158fc7f36a25e19791d25a55da5623399a2644f (diff)
11 files changed, 140 insertions, 64 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index d56d3c145b9f..7a13f6ac5421 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2553,8 +2553,8 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (ret > 0) {
                ssize_t err;
-                err = generic_write_sync(file, pos, ret);
+                err = generic_write_sync(file, iocb->ki_pos - ret, ret);
-                if (err < 0 && ret > 0)
+                if (err < 0)
                        ret = err;
        }
        return ret;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 82166bf974e1..da23eb96779f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1545,6 +1545,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                                entry = pmd_mknonnuma(entry);
                        entry = pmd_modify(entry, newprot);
                        ret = HPAGE_PMD_NR;
+                        set_pmd_at(mm, addr, pmd, entry);
                        BUG_ON(pmd_write(entry));
                } else {
                        struct page *page = pmd_page(*pmd);
@@ -1557,16 +1558,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                         */
                        if (!is_huge_zero_page(page) &&
                            !pmd_numa(*pmd)) {
-                                entry = *pmd;
+                                pmdp_set_numa(mm, addr, pmd);
-                                entry = pmd_mknuma(entry);
                                ret = HPAGE_PMD_NR;
                        }
                }
-                /* Set PMD if cleared earlier */
-                if (ret == HPAGE_PMD_NR)
-                        set_pmd_at(mm, addr, pmd, entry);
                spin_unlock(ptl);
        }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4f08a2d61487..2f2f34a4e77d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -945,8 +945,10 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
                         * to it. Similarly, page lock is shifted.
                         */
                        if (hpage != p) {
-                                put_page(hpage);
+                                if (!(flags & MF_COUNT_INCREASED)) {
-                                get_page(p);
+                                        put_page(hpage);
+                                        get_page(p);
+                                }
                                lock_page(p);
                                unlock_page(hpage);
                                *hpagep = p;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 7332c1785744..769a67a15803 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -58,36 +58,27 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                                if (pte_numa(ptent))
                                        ptent = pte_mknonnuma(ptent);
                                ptent = pte_modify(ptent, newprot);
+                                /*
+                                 * Avoid taking write faults for pages we
+                                 * know to be dirty.
+                                 */
+                                if (dirty_accountable && pte_dirty(ptent))
+                                        ptent = pte_mkwrite(ptent);
+                                ptep_modify_prot_commit(mm, addr, pte, ptent);
                                updated = true;
                        } else {
                                struct page *page;
-                                ptent = *pte;
                                page = vm_normal_page(vma, addr, oldpte);
                                if (page && !PageKsm(page)) {
                                        if (!pte_numa(oldpte)) {
-                                                ptent = pte_mknuma(ptent);
+                                                ptep_set_numa(mm, addr, pte);
-                                                set_pte_at(mm, addr, pte, ptent);
                                                updated = true;
                                        }
                                }
                        }
-                        /*
-                         * Avoid taking write faults for pages we know to be
-                         * dirty.
-                         */
-                        if (dirty_accountable && pte_dirty(ptent)) {
-                                ptent = pte_mkwrite(ptent);
-                                updated = true;
-                        }
                        if (updated)
                                pages++;
-                        /* Only !prot_numa always clears the pte */
-                        if (!prot_numa)
-                                ptep_modify_prot_commit(mm, addr, pte, ptent);
                } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
                        swp_entry_t entry = pte_to_swp_entry(oldpte);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2d30e2cfe804..7106cb1aca8e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2173,11 +2173,12 @@ int __set_page_dirty_nobuffers(struct page *page)
        if (!TestSetPageDirty(page)) {
                struct address_space *mapping = page_mapping(page);
                struct address_space *mapping2;
+                unsigned long flags;
                if (!mapping)
                        return 1;
-                spin_lock_irq(&mapping->tree_lock);
+                spin_lock_irqsave(&mapping->tree_lock, flags);
                mapping2 = page_mapping(page);
                if (mapping2) { /* Race with truncate? */
                        BUG_ON(mapping2 != mapping);
@@ -2186,7 +2187,7 @@ int __set_page_dirty_nobuffers(struct page *page)
                        radix_tree_tag_set(&mapping->page_tree,
                                page_index(page), PAGECACHE_TAG_DIRTY);
                }
-                spin_unlock_irq(&mapping->tree_lock);
+                spin_unlock_irqrestore(&mapping->tree_lock, flags);
                if (mapping->host) {
                        /* !PageAnon && !swapper_space */
                        __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
diff --git a/mm/slab.c b/mm/slab.c
index eb043bf05f4c..b264214c77ea 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1946,7 +1946,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
 /**
 * slab_destroy - destroy and release all objects in a slab
 * @cachep: cache pointer being destroyed
- * @slabp: slab pointer being destroyed
+ * @page: page pointer being destroyed
 *
 * Destroy all the objs in a slab, and release the mem back to the system.
 * Before calling the slab must have been unlinked from the cache.  The
diff --git a/mm/slub.c b/mm/slub.c
index 2b1a6970e46f..25f14ad8f817 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1000,8 +1000,6 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
 /*
 * Tracking of fully allocated slabs for debugging purposes.
- *
- * list_lock must be held.
 */
 static void add_full(struct kmem_cache *s,
        struct kmem_cache_node *n, struct page *page)
@@ -1009,17 +1007,16 @@ static void add_full(struct kmem_cache *s,
        if (!(s->flags & SLAB_STORE_USER))
                return;
+        lockdep_assert_held(&n->list_lock);
        list_add(&page->lru, &n->full);
 }
-/*
+static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
- * list_lock must be held.
- */
-static void remove_full(struct kmem_cache *s, struct page *page)
 {
        if (!(s->flags & SLAB_STORE_USER))
                return;
+        lockdep_assert_held(&n->list_lock);
        list_del(&page->lru);
 }
@@ -1265,7 +1262,8 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
                        void *object, u8 val) { return 1; }
 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
                                        struct page *page) {}
-static inline void remove_full(struct kmem_cache *s, struct page *page) {}
+static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
+                                        struct page *page) {}
 static inline unsigned long kmem_cache_flags(unsigned long object_size,
        unsigned long flags, const char *name,
        void (*ctor)(void *))
@@ -1519,11 +1517,9 @@ static void discard_slab(struct kmem_cache *s, struct page *page)
 /*
 * Management of partially allocated slabs.
- *
- * list_lock must be held.
 */
-static inline void add_partial(struct kmem_cache_node *n,
+static inline void
-                                struct page *page, int tail)
+__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
 {
        n->nr_partial++;
        if (tail == DEACTIVATE_TO_TAIL)
@@ -1532,23 +1528,32 @@ static inline void add_partial(struct kmem_cache_node *n,
                list_add(&page->lru, &n->partial);
 }
-/*
+static inline void add_partial(struct kmem_cache_node *n,
- * list_lock must be held.
+                                struct page *page, int tail)
- */
+{
-static inline void remove_partial(struct kmem_cache_node *n,
+        lockdep_assert_held(&n->list_lock);
-                                        struct page *page)
+        __add_partial(n, page, tail);
+}
+static inline void
+__remove_partial(struct kmem_cache_node *n, struct page *page)
 {
        list_del(&page->lru);
        n->nr_partial--;
 }
+static inline void remove_partial(struct kmem_cache_node *n,
+                                        struct page *page)
+{
+        lockdep_assert_held(&n->list_lock);
+        __remove_partial(n, page);
+}
 /*
 * Remove slab from the partial list, freeze it and
 * return the pointer to the freelist.
 *
 * Returns a list of objects or NULL if it fails.
- *
- * Must hold list_lock since we modify the partial list.
 */
 static inline void *acquire_slab(struct kmem_cache *s,
                struct kmem_cache_node *n, struct page *page,
@@ -1558,6 +1563,8 @@ static inline void *acquire_slab(struct kmem_cache *s,
        unsigned long counters;
        struct page new;
+        lockdep_assert_held(&n->list_lock);
        /*
         * Zap the freelist and set the frozen bit.
         * The old freelist is the list of objects for the
@@ -1902,7 +1909,7 @@ redo:
                else if (l == M_FULL)
-                        remove_full(s, page);
+                        remove_full(s, n, page);
                if (m == M_PARTIAL) {
@@ -2556,7 +2563,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                new.inuse--;
                if ((!new.inuse || !prior) && !was_frozen) {
-                        if (kmem_cache_has_cpu_partial(s) && !prior)
+                        if (kmem_cache_has_cpu_partial(s) && !prior) {
                                /*
                                 * Slab was on no list before and will be
@@ -2566,7 +2573,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                                 */
                                new.frozen = 1;
-                        else { /* Needs to be taken off a list */
+                        } else { /* Needs to be taken off a list */
                                n = get_node(s, page_to_nid(page));
                                /*
@@ -2615,7 +2622,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
         */
        if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
                if (kmem_cache_debug(s))
-                        remove_full(s, page);
+                        remove_full(s, n, page);
                add_partial(n, page, DEACTIVATE_TO_TAIL);
                stat(s, FREE_ADD_PARTIAL);
        }
@@ -2629,9 +2636,10 @@ slab_empty:
                 */
                remove_partial(n, page);
                stat(s, FREE_REMOVE_PARTIAL);
-        } else
+        } else {
                /* Slab must be on the full list */
-                remove_full(s, page);
+                remove_full(s, n, page);
+        }
        spin_unlock_irqrestore(&n->list_lock, flags);
        stat(s, FREE_SLAB);
@@ -2905,7 +2913,11 @@ static void early_kmem_cache_node_alloc(int node)
        init_kmem_cache_node(n);
        inc_slabs_node(kmem_cache_node, node, page->objects);
-        add_partial(n, page, DEACTIVATE_TO_HEAD);
+        /*
+         * No locks need to be taken here as it has just been
+         * initialized and there is no concurrent access.
+         */
+        __add_partial(n, page, DEACTIVATE_TO_HEAD);
 }
 static void free_kmem_cache_nodes(struct kmem_cache *s)
@@ -3191,7 +3203,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
        list_for_each_entry_safe(page, h, &n->partial, lru) {
                if (!page->inuse) {
-                        remove_partial(n, page);
+                        __remove_partial(n, page);
                        discard_slab(s, page);
                } else {
                        list_slab_objects(s, page,
@@ -4314,7 +4326,13 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                        page = ACCESS_ONCE(c->partial);
                        if (page) {
-                                x = page->pobjects;
+                                node = page_to_nid(page);
+                                if (flags & SO_TOTAL)
+                                        WARN_ON_ONCE(1);
+                                else if (flags & SO_OBJECTS)
+                                        WARN_ON_ONCE(1);
+                                else
+                                        x = page->pages;
                                total += x;
                                nodes[node] += x;
                        }
@@ -5178,7 +5196,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
        }
        s->kobj.kset = slab_kset;
-        err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
+        err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
        if (err) {
                kobject_put(&s->kobj);
                return err;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 98e85e9c2b2d..e76ace30d436 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void)
        return ret;
 }
+static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
 void show_swap_cache_info(void)
 {
        printk("%lu pages in swap cache\n", total_swapcache_pages());
@@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry)
        page = find_get_page(swap_address_space(entry), entry.val);
-        if (page)
+        if (page) {
                INC_CACHE_INFO(find_success);
+                if (TestClearPageReadahead(page))
+                        atomic_inc(&swapin_readahead_hits);
+        }
        INC_CACHE_INFO(find_total);
        return page;
@@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
        return found_page;
 }
+static unsigned long swapin_nr_pages(unsigned long offset)
+{
+        static unsigned long prev_offset;
+        unsigned int pages, max_pages, last_ra;
+        static atomic_t last_readahead_pages;
+        max_pages = 1 << ACCESS_ONCE(page_cluster);
+        if (max_pages <= 1)
+                return 1;
+        /*
+         * This heuristic has been found to work well on both sequential and
+         * random loads, swapping to hard disk or to SSD: please don't ask
+         * what the "+ 2" means, it just happens to work well, that's all.
+         */
+        pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
+        if (pages == 2) {
+                /*
+                 * We can have no readahead hits to judge by: but must not get
+                 * stuck here forever, so check for an adjacent offset instead
+                 * (and don't even bother to check whether swap type is same).
+                 */
+                if (offset != prev_offset + 1 && offset != prev_offset - 1)
+                        pages = 1;
+                prev_offset = offset;
+        } else {
+                unsigned int roundup = 4;
+                while (roundup < pages)
+                        roundup <<= 1;
+                pages = roundup;
+        }
+        if (pages > max_pages)
+                pages = max_pages;
+        /* Don't shrink readahead too fast */
+        last_ra = atomic_read(&last_readahead_pages) / 2;
+        if (pages < last_ra)
+                pages = last_ra;
+        atomic_set(&last_readahead_pages, pages);
+        return pages;
+}
 /**
 * swapin_readahead - swap in pages in hope we need them soon
 * @entry: swap entry of this memory
@@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
                        struct vm_area_struct *vma, unsigned long addr)
 {
        struct page *page;
-        unsigned long offset = swp_offset(entry);
+        unsigned long entry_offset = swp_offset(entry);
+        unsigned long offset = entry_offset;
        unsigned long start_offset, end_offset;
-        unsigned long mask = (1UL << page_cluster) - 1;
+        unsigned long mask;
        struct blk_plug plug;
+        mask = swapin_nr_pages(offset) - 1;
+        if (!mask)
+                goto skip;
        /* Read a page_cluster sized and aligned cluster around offset. */
        start_offset = offset & ~mask;
        end_offset = offset | mask;
@@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
                                                gfp_mask, vma, addr);
                if (!page)
                        continue;
+                if (offset != entry_offset)
+                        SetPageReadahead(page);
                page_cache_release(page);
        }
        blk_finish_plug(&plug);
        lru_add_drain();        /* Push any new pages onto the LRU now */
+skip:
        return read_swap_cache_async(entry, gfp_mask, vma, addr);
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c6c13b050a58..4a7f7e6992b6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1923,7 +1923,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        p->swap_map = NULL;
        cluster_info = p->cluster_info;
        p->cluster_info = NULL;
-        p->flags = 0;
        frontswap_map = frontswap_map_get(p);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
@@ -1949,6 +1948,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                mutex_unlock(&inode->i_mutex);
        }
        filp_close(swap_file, NULL);
+        /*
+         * Clear the SWP_USED flag after all resources are freed so that swapon
+         * can reuse this swap_info in alloc_swap_info() safely.  It is ok to
+         * not hold p->lock after we cleared its SWP_WRITEOK.
+         */
+        spin_lock(&swap_lock);
+        p->flags = 0;
+        spin_unlock(&swap_lock);
        err = 0;
        atomic_inc(&proc_poll_event);
        wake_up_interruptible(&proc_poll_wait);
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 196970a4541f..d4042e75f7c7 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/vmstat.h>
 #include <linux/eventfd.h>
+#include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/printk.h>
 #include <linux/vmpressure.h>
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 72496140ac08..def5dd2fbe61 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -851,12 +851,14 @@ const char * const vmstat_text[] = {
        "thp_zero_page_alloc",
        "thp_zero_page_alloc_failed",
 #endif
+#ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
        "nr_tlb_remote_flush",
        "nr_tlb_remote_flush_received",
-#endif
+#endif /* CONFIG_SMP */
        "nr_tlb_local_flush_all",
        "nr_tlb_local_flush_one",
+#endif /* CONFIG_DEBUG_TLBFLUSH */
 #endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
author	Thomas Gleixner <tglx@linutronix.de>	2014-02-21 15:36:40 -0500
committer	Thomas Gleixner <tglx@linutronix.de>	2014-02-21 15:37:09 -0500
commit	d97a860c4f3de98ba5040a22f305b7159fe17cff (patch)
tree	90c2155ec5a1f3115a9eb7a86f25d1a4610227c6 /mm
parent	3f67d962c64d9b6de9dab81bdbe6d5c94c80d9b9 (diff)
parent	d158fc7f36a25e19791d25a55da5623399a2644f (diff)