Merge commit '9e9a928eed8796a0a1aaed7e0b676db86ba84594' into drm-next

Merge drm-fixes into drm-next. Both i915 and radeon need this done for later patches. Conflicts: drivers/gpu/drm/drm_crtc_helper.c drivers/gpu/drm/i915/i915_drv.h drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_execbuffer.c drivers/gpu/drm/i915/i915_gem_gtt.c
author: Dave Airlie <airlied@redhat.com> 2014-06-05 06:28:59 -0400
committer: Dave Airlie <airlied@redhat.com> 2014-06-05 06:28:59 -0400
commit: 8d4ad9d4bb0a618c975a32d77087694ec6336f68 (patch)
tree: d18d12688174a623e3503b11118e44ef8186c90b /mm
parent: 5ea1f752ae04be403a3dc8ec876a60d7f5f6990a (diff)
parent: 9e9a928eed8796a0a1aaed7e0b676db86ba84594 (diff)
18 files changed, 182 insertions, 113 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index ebe5880c29d6..1b5a95f0fa01 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -581,3 +581,18 @@ config PGTABLE_MAPPING
 config GENERIC_EARLY_IOREMAP
        bool
+config MAX_STACK_SIZE_MB
+        int "Maximum user stack size for 32-bit processes (MB)"
+        default 80
+        range 8 256 if METAG
+        range 8 2048
+        depends on STACK_GROWSUP && (!64BIT || COMPAT)
+        help
+          This is the maximum stack size in Megabytes in the VM layout of 32-bit
+          user processes when the stack grows upwards (currently only on parisc
+          and metag arch). The stack will be located at the highest memory
+          address minus the given value, unless the RLIMIT_STACK hard limit is
+          changed to a smaller value in which case that is used.
+          A sane initial value is 80 MB.
diff --git a/mm/compaction.c b/mm/compaction.c
index 37f976287068..627dc2e4320f 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -671,16 +671,20 @@ static void isolate_freepages(struct zone *zone,
                                struct compact_control *cc)
 {
        struct page *page;
-        unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn;
+        unsigned long high_pfn, low_pfn, pfn, z_end_pfn;
        int nr_freepages = cc->nr_freepages;
        struct list_head *freelist = &cc->freepages;
        /*
         * Initialise the free scanner. The starting point is where we last
-         * scanned from (or the end of the zone if starting). The low point
+         * successfully isolated from, zone-cached value, or the end of the
-         * is the end of the pageblock the migration scanner is using.
+         * zone when isolating for the first time. We need this aligned to
+         * the pageblock boundary, because we do pfn -= pageblock_nr_pages
+         * in the for loop.
+         * The low boundary is the end of the pageblock the migration scanner
+         * is using.
         */
-        pfn = cc->free_pfn;
+        pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
        low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
        /*
@@ -700,6 +704,7 @@ static void isolate_freepages(struct zone *zone,
        for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
                                        pfn -= pageblock_nr_pages) {
                unsigned long isolated;
+                unsigned long end_pfn;
                /*
                 * This can iterate a massively long zone without finding any
@@ -734,13 +739,10 @@ static void isolate_freepages(struct zone *zone,
                isolated = 0;
                /*
-                 * As pfn may not start aligned, pfn+pageblock_nr_page
+                 * Take care when isolating in last pageblock of a zone which
-                 * may cross a MAX_ORDER_NR_PAGES boundary and miss
+                 * ends in the middle of a pageblock.
-                 * a pfn_valid check. Ensure isolate_freepages_block()
-                 * only scans within a pageblock
                 */
-                end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+                end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn);
-                end_pfn = min(end_pfn, z_end_pfn);
                isolated = isolate_freepages_block(cc, pfn, end_pfn,
                                                   freelist, false);
                nr_freepages += isolated;
diff --git a/mm/filemap.c b/mm/filemap.c
index 5020b280a771..088358c8006b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -257,9 +257,11 @@ static int filemap_check_errors(struct address_space *mapping)
 {
        int ret = 0;
        /* Check for outstanding write errors */
-        if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
+        if (test_bit(AS_ENOSPC, &mapping->flags) &&
+            test_and_clear_bit(AS_ENOSPC, &mapping->flags))
                ret = -ENOSPC;
-        if (test_and_clear_bit(AS_EIO, &mapping->flags))
+        if (test_bit(AS_EIO, &mapping->flags) &&
+            test_and_clear_bit(AS_EIO, &mapping->flags))
                ret = -EIO;
        return ret;
 }
@@ -906,8 +908,8 @@ EXPORT_SYMBOL(page_cache_prev_hole);
 * Looks up the page cache slot at @mapping & @offset.  If there is a
 * page cache page, it is returned with an increased refcount.
 *
- * If the slot holds a shadow entry of a previously evicted page, it
+ * If the slot holds a shadow entry of a previously evicted page, or a
- * is returned.
+ * swap entry from shmem/tmpfs, it is returned.
 *
 * Otherwise, %NULL is returned.
 */
@@ -928,9 +930,9 @@ repeat:
                        if (radix_tree_deref_retry(page))
                                goto repeat;
                        /*
-                         * Otherwise, shmem/tmpfs must be storing a swap entry
+                         * A shadow entry of a recently evicted page,
-                         * here as an exceptional entry: so return it without
+                         * or a swap entry from shmem/tmpfs.  Return
-                         * attempting to raise page count.
+                         * it without attempting to raise page count.
                         */
                        goto out;
                }
@@ -983,8 +985,8 @@ EXPORT_SYMBOL(find_get_page);
 * page cache page, it is returned locked and with an increased
 * refcount.
 *
- * If the slot holds a shadow entry of a previously evicted page, it
+ * If the slot holds a shadow entry of a previously evicted page, or a
- * is returned.
+ * swap entry from shmem/tmpfs, it is returned.
 *
 * Otherwise, %NULL is returned.
 *
@@ -1099,8 +1101,8 @@ EXPORT_SYMBOL(find_or_create_page);
 * with ascending indexes.  There may be holes in the indices due to
 * not-present pages.
 *
- * Any shadow entries of evicted pages are included in the returned
+ * Any shadow entries of evicted pages, or swap entries from
- * array.
+ * shmem/tmpfs, are included in the returned array.
 *
 * find_get_entries() returns the number of pages and shadow entries
 * which were found.
@@ -1128,9 +1130,9 @@ repeat:
                        if (radix_tree_deref_retry(page))
                                goto restart;
                        /*
-                         * Otherwise, we must be storing a swap entry
+                         * A shadow entry of a recently evicted page,
-                         * here as an exceptional entry: so return it
+                         * or a swap entry from shmem/tmpfs.  Return
-                         * without attempting to raise page count.
+                         * it without attempting to raise page count.
                         */
                        goto export;
                }
@@ -1198,9 +1200,9 @@ repeat:
                                goto restart;
                        }
                        /*
-                         * Otherwise, shmem/tmpfs must be storing a swap entry
+                         * A shadow entry of a recently evicted page,
-                         * here as an exceptional entry: so skip over it -
+                         * or a swap entry from shmem/tmpfs.  Skip
-                         * we only reach this from invalidate_mapping_pages().
+                         * over it.
                         */
                        continue;
                }
@@ -1265,9 +1267,9 @@ repeat:
                                goto restart;
                        }
                        /*
-                         * Otherwise, shmem/tmpfs must be storing a swap entry
+                         * A shadow entry of a recently evicted page,
-                         * here as an exceptional entry: so stop looking for
+                         * or a swap entry from shmem/tmpfs.  Stop
-                         * contiguous pages.
+                         * looking for contiguous pages.
                         */
                        break;
                }
@@ -1341,10 +1343,17 @@ repeat:
                                goto restart;
                        }
                        /*
-                         * This function is never used on a shmem/tmpfs
+                         * A shadow entry of a recently evicted page.
-                         * mapping, so a swap entry won't be found here.
+                         *
+                         * Those entries should never be tagged, but
+                         * this tree walk is lockless and the tags are
+                         * looked up in bulk, one radix tree node at a
+                         * time, so there is a sizable window for page
+                         * reclaim to evict a page we saw tagged.
+                         *
+                         * Skip over it.
                         */
-                        BUG();
+                        continue;
                }
                if (!page_cache_get_speculative(page))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 246192929a2d..c82290b9c1fc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1981,11 +1981,7 @@ static int __init hugetlb_init(void)
 {
        int i;
-        /* Some platform decide whether they support huge pages at boot
+        if (!hugepages_supported())
-         * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
-         * there is no such support
-         */
-        if (HPAGE_SHIFT == 0)
                return 0;
        if (!size_to_hstate(default_hstate_size)) {
@@ -2112,6 +2108,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
        unsigned long tmp;
        int ret;
+        if (!hugepages_supported())
+                return -ENOTSUPP;
        tmp = h->max_huge_pages;
        if (write && h->order >= MAX_ORDER)
@@ -2165,6 +2164,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
        unsigned long tmp;
        int ret;
+        if (!hugepages_supported())
+                return -ENOTSUPP;
        tmp = h->nr_overcommit_huge_pages;
        if (write && h->order >= MAX_ORDER)
@@ -2190,6 +2192,8 @@ out:
 void hugetlb_report_meminfo(struct seq_file *m)
 {
        struct hstate *h = &default_hstate;
+        if (!hugepages_supported())
+                return;
        seq_printf(m,
                        "HugePages_Total:   %5lu\n"
                        "HugePages_Free:    %5lu\n"
@@ -2206,6 +2210,8 @@ void hugetlb_report_meminfo(struct seq_file *m)
 int hugetlb_report_node_meminfo(int nid, char *buf)
 {
        struct hstate *h = &default_hstate;
+        if (!hugepages_supported())
+                return 0;
        return sprintf(buf,
                "Node %d HugePages_Total: %5u\n"
                "Node %d HugePages_Free:  %5u\n"
@@ -2220,6 +2226,9 @@ void hugetlb_show_meminfo(void)
        struct hstate *h;
        int nid;
+        if (!hugepages_supported())
+                return;
        for_each_node_state(nid, N_MEMORY)
                for_each_hstate(h)
                        pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 91d67eaee050..8d2fcdfeff7f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1775,10 +1775,9 @@ void __init kmemleak_init(void)
        int i;
        unsigned long flags;
-        kmemleak_early_log = 0;
 #ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
        if (!kmemleak_skip_disable) {
+                kmemleak_early_log = 0;
                kmemleak_disable();
                return;
        }
@@ -1796,6 +1795,7 @@ void __init kmemleak_init(void)
        /* the kernel is still in UP mode, so disabling the IRQs is enough */
        local_irq_save(flags);
+        kmemleak_early_log = 0;
        if (kmemleak_error) {
                local_irq_restore(flags);
                return;
diff --git a/mm/madvise.c b/mm/madvise.c
index 539eeb96b323..a402f8fdc68e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -195,7 +195,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
        for (; start < end; start += PAGE_SIZE) {
                index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-                page = find_get_page(mapping, index);
+                page = find_get_entry(mapping, index);
                if (!radix_tree_exceptional_entry(page)) {
                        if (page)
                                page_cache_release(page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 29501f040568..5177c6d4a2dd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1077,9 +1077,18 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
        rcu_read_lock();
        do {
-                memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+                /*
-                if (unlikely(!memcg))
+                 * Page cache insertions can happen withou an
+                 * actual mm context, e.g. during disk probing
+                 * on boot, loopback IO, acct() writes etc.
+                 */
+                if (unlikely(!mm))
                        memcg = root_mem_cgroup;
+                else {
+                        memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+                        if (unlikely(!memcg))
+                                memcg = root_mem_cgroup;
+                }
        } while (!css_tryget(&memcg->css));
        rcu_read_unlock();
        return memcg;
@@ -3958,17 +3967,9 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
                return 0;
        }
-        /*
+        memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
-         * Page cache insertions can happen without an actual mm
+        if (!memcg)
-         * context, e.g. during disk probing on boot.
+                return -ENOMEM;
-         */
-        if (unlikely(!mm))
-                memcg = root_mem_cgroup;
-        else {
-                memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
-                if (!memcg)
-                        return -ENOMEM;
-        }
        __mem_cgroup_commit_charge(memcg, page, 1, type, false);
        return 0;
 }
@@ -6686,16 +6687,20 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                pgoff = pte_to_pgoff(ptent);
        /* page is moved even if it's not RSS of this task(page-faulted). */
-        page = find_get_page(mapping, pgoff);
 #ifdef CONFIG_SWAP
        /* shmem/tmpfs may report page out on swap: account for that too. */
-        if (radix_tree_exceptional_entry(page)) {
+        if (shmem_mapping(mapping)) {
-                swp_entry_t swap = radix_to_swp_entry(page);
+                page = find_get_entry(mapping, pgoff);
-                if (do_swap_account)
+                if (radix_tree_exceptional_entry(page)) {
-                        *entry = swap;
+                        swp_entry_t swp = radix_to_swp_entry(page);
-                page = find_get_page(swap_address_space(swap), swap.val);
+                        if (do_swap_account)
-        }
+                                *entry = swp;
+                        page = find_get_page(swap_address_space(swp), swp.val);
+                }
+        } else
+                page = find_get_page(mapping, pgoff);
+#else
+        page = find_get_page(mapping, pgoff);
 #endif
        return page;
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 35ef28acf137..9ccef39a9de2 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1081,15 +1081,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
                        return 0;
                } else if (PageHuge(hpage)) {
                        /*
-                         * Check "just unpoisoned", "filter hit", and
+                         * Check "filter hit" and "race with other subpage."
-                         * "race with other subpage."
                         */
                        lock_page(hpage);
-                        if (!PageHWPoison(hpage)
+                        if (PageHWPoison(hpage)) {
-                            || (hwpoison_filter(p) && TestClearPageHWPoison(p))
+                                if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
-                            || (p != hpage && TestSetPageHWPoison(hpage))) {
+                                    || (p != hpage && TestSetPageHWPoison(hpage))) {
-                                atomic_long_sub(nr_pages, &num_poisoned_pages);
+                                        atomic_long_sub(nr_pages, &num_poisoned_pages);
-                                return 0;
+                                        unlock_page(hpage);
+                                        return 0;
+                                }
                        }
                        set_page_hwpoison_huge_page(hpage);
                        res = dequeue_hwpoisoned_huge_page(hpage);
@@ -1152,6 +1153,8 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
         */
        if (!PageHWPoison(p)) {
                printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
+                atomic_long_sub(nr_pages, &num_poisoned_pages);
+                put_page(hpage);
                res = 0;
                goto out;
        }
diff --git a/mm/mremap.c b/mm/mremap.c
index 0843feb66f3d..05f1180e9f21 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -194,10 +194,17 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
                        break;
                if (pmd_trans_huge(*old_pmd)) {
                        int err = 0;
-                        if (extent == HPAGE_PMD_SIZE)
+                        if (extent == HPAGE_PMD_SIZE) {
+                                VM_BUG_ON(vma->vm_file || !vma->anon_vma);
+                                /* See comment in move_ptes() */
+                                if (need_rmap_locks)
+                                        anon_vma_lock_write(vma->anon_vma);
                                err = move_huge_pmd(vma, new_vma, old_addr,
                                                    new_addr, old_end,
                                                    old_pmd, new_pmd);
+                                if (need_rmap_locks)
+                                        anon_vma_unlock_write(vma->anon_vma);
+                        }
                        if (err > 0) {
                                need_flush = true;
                                continue;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ef413492a149..a4317da60532 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -593,14 +593,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
 * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
 *     => fast response on large errors; small oscillation near setpoint
 */
-static inline long long pos_ratio_polynom(unsigned long setpoint,
+static long long pos_ratio_polynom(unsigned long setpoint,
                                          unsigned long dirty,
                                          unsigned long limit)
 {
        long long pos_ratio;
        long x;
-        x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
+        x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
                    limit - setpoint + 1);
        pos_ratio = x;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -842,7 +842,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
        x_intercept = bdi_setpoint + span;
        if (bdi_dirty < x_intercept - span / 4) {
-                pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
+                pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
                                    x_intercept - bdi_setpoint + 1);
        } else
                pos_ratio /= 4;
diff --git a/mm/percpu.c b/mm/percpu.c
index 63e24fb4387b..2ddf9a990dbd 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -610,7 +610,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
        chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
                                                sizeof(chunk->map[0]));
        if (!chunk->map) {
-                kfree(chunk);
+                pcpu_mem_free(chunk, pcpu_chunk_struct_size);
                return NULL;
        }
diff --git a/mm/slab.c b/mm/slab.c
index 388cb1ae6fbc..19d92181ce24 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -166,7 +166,7 @@ typedef unsigned char freelist_idx_t;
 typedef unsigned short freelist_idx_t;
 #endif
-#define SLAB_OBJ_MAX_NUM (1 << sizeof(freelist_idx_t) * BITS_PER_BYTE)
+#define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
 /*
 * true if a page was allocated from pfmemalloc reserves for network-based
@@ -2572,13 +2572,13 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
        return freelist;
 }
-static inline freelist_idx_t get_free_obj(struct page *page, unsigned char idx)
+static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
 {
        return ((freelist_idx_t *)page->freelist)[idx];
 }
 static inline void set_free_obj(struct page *page,
-                                        unsigned char idx, freelist_idx_t val)
+                                        unsigned int idx, freelist_idx_t val)
 {
        ((freelist_idx_t *)(page->freelist))[idx] = val;
 }
diff --git a/mm/slab.h b/mm/slab.h
index 3045316b7c9d..6bd4c353704f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
 #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
 int __kmem_cache_shutdown(struct kmem_cache *);
+void slab_kmem_cache_release(struct kmem_cache *);
 struct seq_file;
 struct file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f3cfccf76dda..102cc6fca3d3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -323,6 +323,12 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 }
 #endif /* CONFIG_MEMCG_KMEM */
+void slab_kmem_cache_release(struct kmem_cache *s)
+{
+        kfree(s->name);
+        kmem_cache_free(kmem_cache, s);
+}
 void kmem_cache_destroy(struct kmem_cache *s)
 {
        get_online_cpus();
@@ -352,8 +358,11 @@ void kmem_cache_destroy(struct kmem_cache *s)
                rcu_barrier();
        memcg_free_cache_params(s);
-        kfree(s->name);
+#ifdef SLAB_SUPPORTS_SYSFS
-        kmem_cache_free(kmem_cache, s);
+        sysfs_slab_remove(s);
+#else
+        slab_kmem_cache_release(s);
+#endif
        goto out_put_cpus;
 out_unlock:
diff --git a/mm/slub.c b/mm/slub.c
index 5e234f1f8853..2b1ce697fc4b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -210,14 +210,11 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
 #ifdef CONFIG_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
-static void sysfs_slab_remove(struct kmem_cache *);
 static void memcg_propagate_slab_attrs(struct kmem_cache *s);
 #else
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
                                                        { return 0; }
-static inline void sysfs_slab_remove(struct kmem_cache *s) { }
 static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
 #endif
@@ -3238,24 +3235,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)
 int __kmem_cache_shutdown(struct kmem_cache *s)
 {
-        int rc = kmem_cache_close(s);
+        return kmem_cache_close(s);
-        if (!rc) {
-                /*
-                 * Since slab_attr_store may take the slab_mutex, we should
-                 * release the lock while removing the sysfs entry in order to
-                 * avoid a deadlock. Because this is pretty much the last
-                 * operation we do and the lock will be released shortly after
-                 * that in slab_common.c, we could just move sysfs_slab_remove
-                 * to a later point in common code. We should do that when we
-                 * have a common sysfs framework for all allocators.
-                 */
-                mutex_unlock(&slab_mutex);
-                sysfs_slab_remove(s);
-                mutex_lock(&slab_mutex);
-        }
-        return rc;
 }
 /********************************************************************
@@ -5071,15 +5051,18 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 #ifdef CONFIG_MEMCG_KMEM
        int i;
        char *buffer = NULL;
+        struct kmem_cache *root_cache;
-        if (!is_root_cache(s))
+        if (is_root_cache(s))
                return;
+        root_cache = s->memcg_params->root_cache;
        /*
         * This mean this cache had no attribute written. Therefore, no point
         * in copying default values around
         */
-        if (!s->max_attr_size)
+        if (!root_cache->max_attr_size)
                return;
        for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
@@ -5101,7 +5084,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
                 */
                if (buffer)
                        buf = buffer;
-                else if (s->max_attr_size < ARRAY_SIZE(mbuf))
+                else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
                        buf = mbuf;
                else {
                        buffer = (char *) get_zeroed_page(GFP_KERNEL);
@@ -5110,7 +5093,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
                        buf = buffer;
                }
-                attr->show(s->memcg_params->root_cache, buf);
+                attr->show(root_cache, buf);
                attr->store(s, buf, strlen(buf));
        }
@@ -5119,6 +5102,11 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 #endif
 }
+static void kmem_cache_release(struct kobject *k)
+{
+        slab_kmem_cache_release(to_slab(k));
+}
 static const struct sysfs_ops slab_sysfs_ops = {
        .show = slab_attr_show,
        .store = slab_attr_store,
@@ -5126,6 +5114,7 @@ static const struct sysfs_ops slab_sysfs_ops = {
 static struct kobj_type slab_ktype = {
        .sysfs_ops = &slab_sysfs_ops,
+        .release = kmem_cache_release,
 };
 static int uevent_filter(struct kset *kset, struct kobject *kobj)
@@ -5252,7 +5241,7 @@ out_put_kobj:
        goto out;
 }
-static void sysfs_slab_remove(struct kmem_cache *s)
+void sysfs_slab_remove(struct kmem_cache *s)
 {
        if (slab_state < FULL)
                /*
diff --git a/mm/truncate.c b/mm/truncate.c
index e5cc39ab0751..6a78c814bebf 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -484,14 +484,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
        unsigned long count = 0;
        int i;
-        /*
-         * Note: this function may get called on a shmem/tmpfs mapping:
-         * pagevec_lookup() might then return 0 prematurely (because it
-         * got a gangful of swap entries); but it's hardly worth worrying
-         * about - it can rarely have anything to free from such a mapping
-         * (most pages are dirty), and already skips over any difficulties.
-         */
        pagevec_init(&pvec, 0);
        while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
                        min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
diff --git a/mm/util.c b/mm/util.c
index f380af7ea779..d5ea733c5082 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -10,6 +10,7 @@
 #include <linux/swapops.h>
 #include <linux/mman.h>
 #include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
@@ -387,6 +388,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_mmap);
+void kvfree(const void *addr)
+{
+        if (is_vmalloc_addr(addr))
+                vfree(addr);
+        else
+                kfree(addr);
+}
+EXPORT_SYMBOL(kvfree);
 struct address_space *page_mapping(struct page *page)
 {
        struct address_space *mapping = page->mapping;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3f56c8deb3c0..32c661d66a45 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1916,6 +1916,24 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
                get_lru_size(lruvec, LRU_INACTIVE_FILE);
        /*
+         * Prevent the reclaimer from falling into the cache trap: as
+         * cache pages start out inactive, every cache fault will tip
+         * the scan balance towards the file LRU.  And as the file LRU
+         * shrinks, so does the window for rotation from references.
+         * This means we have a runaway feedback loop where a tiny
+         * thrashing file LRU becomes infinitely more attractive than
+         * anon pages.  Try to detect this based on file LRU size.
+         */
+        if (global_reclaim(sc)) {
+                unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
+                if (unlikely(file + free <= high_wmark_pages(zone))) {
+                        scan_balance = SCAN_ANON;
+                        goto out;
+                }
+        }
+        /*
         * There is enough inactive page cache, do not reclaim
         * anything from the anonymous working set right now.
         */
author	Dave Airlie <airlied@redhat.com>	2014-06-05 06:28:59 -0400
committer	Dave Airlie <airlied@redhat.com>	2014-06-05 06:28:59 -0400
commit	8d4ad9d4bb0a618c975a32d77087694ec6336f68 (patch)
tree	d18d12688174a623e3503b11118e44ef8186c90b /mm
parent	5ea1f752ae04be403a3dc8ec876a60d7f5f6990a (diff)
parent	9e9a928eed8796a0a1aaed7e0b676db86ba84594 (diff)