Merge tag 'vexpress/updates-for-3.16' of git://git.linaro.org/people/pawel.moll/linux into next/cleanup

Merge "ARM Versatile Express updates for 3.16" from Pawel Moll: This series reworks VE's platform configuration infrastructure by: - making it possible to instantiate selected devices from the Device Tree, prior to massive population, - converting custom "func" API into standard "regmap", - splitting the existing MFD driver into smaller ones and placing them into relevant directories. The common clock framework driver can now be selected individually (mostly for arm64 sake, where some of them are not used at all). It also simplifies the machine code, by: - moving the shed clock info clocksource driver, - simplifying SMP operations to base them entirely of the DT data, - moving platform ID checks into relevant driver. * tag 'vexpress/updates-for-3.16' of git://git.linaro.org/people/pawel.moll/linux: ARM: vexpress: move HBI check to sysreg driver ARM: vexpress: Simplify SMP operations for DT-powered system ARM: vexpress: remove redundant vexpress_dt_cpus_num to get cpu count clocksource: Sched clock source for Versatile Express clk: versatile: Split config options for sp810 and vexpress_osc mfd: vexpress: Define the device as MFD cells mfd: syscon: Add platform data with a regmap config name mfd: vexpress: Convert custom func API to regmap of: Keep track of populated platform devices + Linux 3.15-rc5 Signed-off-by: Olof Johansson <olof@lixom.net>
author: Olof Johansson <olof@lixom.net> 2014-05-21 17:23:56 -0400
committer: Olof Johansson <olof@lixom.net> 2014-05-21 17:23:56 -0400
commit: 486ad2ede13314346226ee52e92b8e8773221f63 (patch)
tree: 84b6ed35d2701ececc244fda1163b9db58a4d07c /mm
parent: 9e05f9f300828aecb84659cedc6399b7a76683ef (diff)
parent: 6b2c31c71d6fa8896c5f3f2354d790a5bd3f0a1e (diff)
13 files changed, 132 insertions, 89 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 37f976287068..627dc2e4320f 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -671,16 +671,20 @@ static void isolate_freepages(struct zone *zone,
                                struct compact_control *cc)
 {
        struct page *page;
-        unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn;
+        unsigned long high_pfn, low_pfn, pfn, z_end_pfn;
        int nr_freepages = cc->nr_freepages;
        struct list_head *freelist = &cc->freepages;
        /*
         * Initialise the free scanner. The starting point is where we last
-         * scanned from (or the end of the zone if starting). The low point
+         * successfully isolated from, zone-cached value, or the end of the
-         * is the end of the pageblock the migration scanner is using.
+         * zone when isolating for the first time. We need this aligned to
+         * the pageblock boundary, because we do pfn -= pageblock_nr_pages
+         * in the for loop.
+         * The low boundary is the end of the pageblock the migration scanner
+         * is using.
         */
-        pfn = cc->free_pfn;
+        pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
        low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
        /*
@@ -700,6 +704,7 @@ static void isolate_freepages(struct zone *zone,
        for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
                                        pfn -= pageblock_nr_pages) {
                unsigned long isolated;
+                unsigned long end_pfn;
                /*
                 * This can iterate a massively long zone without finding any
@@ -734,13 +739,10 @@ static void isolate_freepages(struct zone *zone,
                isolated = 0;
                /*
-                 * As pfn may not start aligned, pfn+pageblock_nr_page
+                 * Take care when isolating in last pageblock of a zone which
-                 * may cross a MAX_ORDER_NR_PAGES boundary and miss
+                 * ends in the middle of a pageblock.
-                 * a pfn_valid check. Ensure isolate_freepages_block()
-                 * only scans within a pageblock
                 */
-                end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+                end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn);
-                end_pfn = min(end_pfn, z_end_pfn);
                isolated = isolate_freepages_block(cc, pfn, end_pfn,
                                                   freelist, false);
                nr_freepages += isolated;
diff --git a/mm/filemap.c b/mm/filemap.c
index 5020b280a771..000a220e2a41 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -906,8 +906,8 @@ EXPORT_SYMBOL(page_cache_prev_hole);
 * Looks up the page cache slot at @mapping & @offset.  If there is a
 * page cache page, it is returned with an increased refcount.
 *
- * If the slot holds a shadow entry of a previously evicted page, it
+ * If the slot holds a shadow entry of a previously evicted page, or a
- * is returned.
+ * swap entry from shmem/tmpfs, it is returned.
 *
 * Otherwise, %NULL is returned.
 */
@@ -928,9 +928,9 @@ repeat:
                        if (radix_tree_deref_retry(page))
                                goto repeat;
                        /*
-                         * Otherwise, shmem/tmpfs must be storing a swap entry
+                         * A shadow entry of a recently evicted page,
-                         * here as an exceptional entry: so return it without
+                         * or a swap entry from shmem/tmpfs.  Return
-                         * attempting to raise page count.
+                         * it without attempting to raise page count.
                         */
                        goto out;
                }
@@ -983,8 +983,8 @@ EXPORT_SYMBOL(find_get_page);
 * page cache page, it is returned locked and with an increased
 * refcount.
 *
- * If the slot holds a shadow entry of a previously evicted page, it
+ * If the slot holds a shadow entry of a previously evicted page, or a
- * is returned.
+ * swap entry from shmem/tmpfs, it is returned.
 *
 * Otherwise, %NULL is returned.
 *
@@ -1099,8 +1099,8 @@ EXPORT_SYMBOL(find_or_create_page);
 * with ascending indexes.  There may be holes in the indices due to
 * not-present pages.
 *
- * Any shadow entries of evicted pages are included in the returned
+ * Any shadow entries of evicted pages, or swap entries from
- * array.
+ * shmem/tmpfs, are included in the returned array.
 *
 * find_get_entries() returns the number of pages and shadow entries
 * which were found.
@@ -1128,9 +1128,9 @@ repeat:
                        if (radix_tree_deref_retry(page))
                                goto restart;
                        /*
-                         * Otherwise, we must be storing a swap entry
+                         * A shadow entry of a recently evicted page,
-                         * here as an exceptional entry: so return it
+                         * or a swap entry from shmem/tmpfs.  Return
-                         * without attempting to raise page count.
+                         * it without attempting to raise page count.
                         */
                        goto export;
                }
@@ -1198,9 +1198,9 @@ repeat:
                                goto restart;
                        }
                        /*
-                         * Otherwise, shmem/tmpfs must be storing a swap entry
+                         * A shadow entry of a recently evicted page,
-                         * here as an exceptional entry: so skip over it -
+                         * or a swap entry from shmem/tmpfs.  Skip
-                         * we only reach this from invalidate_mapping_pages().
+                         * over it.
                         */
                        continue;
                }
@@ -1265,9 +1265,9 @@ repeat:
                                goto restart;
                        }
                        /*
-                         * Otherwise, shmem/tmpfs must be storing a swap entry
+                         * A shadow entry of a recently evicted page,
-                         * here as an exceptional entry: so stop looking for
+                         * or a swap entry from shmem/tmpfs.  Stop
-                         * contiguous pages.
+                         * looking for contiguous pages.
                         */
                        break;
                }
@@ -1341,10 +1341,17 @@ repeat:
                                goto restart;
                        }
                        /*
-                         * This function is never used on a shmem/tmpfs
+                         * A shadow entry of a recently evicted page.
-                         * mapping, so a swap entry won't be found here.
+                         *
+                         * Those entries should never be tagged, but
+                         * this tree walk is lockless and the tags are
+                         * looked up in bulk, one radix tree node at a
+                         * time, so there is a sizable window for page
+                         * reclaim to evict a page we saw tagged.
+                         *
+                         * Skip over it.
                         */
-                        BUG();
+                        continue;
                }
                if (!page_cache_get_speculative(page))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 246192929a2d..c82290b9c1fc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1981,11 +1981,7 @@ static int __init hugetlb_init(void)
 {
        int i;
-        /* Some platform decide whether they support huge pages at boot
+        if (!hugepages_supported())
-         * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
-         * there is no such support
-         */
-        if (HPAGE_SHIFT == 0)
                return 0;
        if (!size_to_hstate(default_hstate_size)) {
@@ -2112,6 +2108,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
        unsigned long tmp;
        int ret;
+        if (!hugepages_supported())
+                return -ENOTSUPP;
        tmp = h->max_huge_pages;
        if (write && h->order >= MAX_ORDER)
@@ -2165,6 +2164,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
        unsigned long tmp;
        int ret;
+        if (!hugepages_supported())
+                return -ENOTSUPP;
        tmp = h->nr_overcommit_huge_pages;
        if (write && h->order >= MAX_ORDER)
@@ -2190,6 +2192,8 @@ out:
 void hugetlb_report_meminfo(struct seq_file *m)
 {
        struct hstate *h = &default_hstate;
+        if (!hugepages_supported())
+                return;
        seq_printf(m,
                        "HugePages_Total:   %5lu\n"
                        "HugePages_Free:    %5lu\n"
@@ -2206,6 +2210,8 @@ void hugetlb_report_meminfo(struct seq_file *m)
 int hugetlb_report_node_meminfo(int nid, char *buf)
 {
        struct hstate *h = &default_hstate;
+        if (!hugepages_supported())
+                return 0;
        return sprintf(buf,
                "Node %d HugePages_Total: %5u\n"
                "Node %d HugePages_Free:  %5u\n"
@@ -2220,6 +2226,9 @@ void hugetlb_show_meminfo(void)
        struct hstate *h;
        int nid;
+        if (!hugepages_supported())
+                return;
        for_each_node_state(nid, N_MEMORY)
                for_each_hstate(h)
                        pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 29501f040568..c47dffdcb246 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6686,16 +6686,20 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                pgoff = pte_to_pgoff(ptent);
        /* page is moved even if it's not RSS of this task(page-faulted). */
-        page = find_get_page(mapping, pgoff);
 #ifdef CONFIG_SWAP
        /* shmem/tmpfs may report page out on swap: account for that too. */
-        if (radix_tree_exceptional_entry(page)) {
+        if (shmem_mapping(mapping)) {
-                swp_entry_t swap = radix_to_swp_entry(page);
+                page = find_get_entry(mapping, pgoff);
-                if (do_swap_account)
+                if (radix_tree_exceptional_entry(page)) {
-                        *entry = swap;
+                        swp_entry_t swp = radix_to_swp_entry(page);
-                page = find_get_page(swap_address_space(swap), swap.val);
+                        if (do_swap_account)
-        }
+                                *entry = swp;
+                        page = find_get_page(swap_address_space(swp), swp.val);
+                }
+        } else
+                page = find_get_page(mapping, pgoff);
+#else
+        page = find_get_page(mapping, pgoff);
 #endif
        return page;
 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ef413492a149..a4317da60532 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -593,14 +593,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
 * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
 *     => fast response on large errors; small oscillation near setpoint
 */
-static inline long long pos_ratio_polynom(unsigned long setpoint,
+static long long pos_ratio_polynom(unsigned long setpoint,
                                          unsigned long dirty,
                                          unsigned long limit)
 {
        long long pos_ratio;
        long x;
-        x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
+        x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
                    limit - setpoint + 1);
        pos_ratio = x;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -842,7 +842,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
        x_intercept = bdi_setpoint + span;
        if (bdi_dirty < x_intercept - span / 4) {
-                pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
+                pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
                                    x_intercept - bdi_setpoint + 1);
        } else
                pos_ratio /= 4;
diff --git a/mm/slab.c b/mm/slab.c
index 388cb1ae6fbc..19d92181ce24 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -166,7 +166,7 @@ typedef unsigned char freelist_idx_t;
 typedef unsigned short freelist_idx_t;
 #endif
-#define SLAB_OBJ_MAX_NUM (1 << sizeof(freelist_idx_t) * BITS_PER_BYTE)
+#define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
 /*
 * true if a page was allocated from pfmemalloc reserves for network-based
@@ -2572,13 +2572,13 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
        return freelist;
 }
-static inline freelist_idx_t get_free_obj(struct page *page, unsigned char idx)
+static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
 {
        return ((freelist_idx_t *)page->freelist)[idx];
 }
 static inline void set_free_obj(struct page *page,
-                                        unsigned char idx, freelist_idx_t val)
+                                        unsigned int idx, freelist_idx_t val)
 {
        ((freelist_idx_t *)(page->freelist))[idx] = val;
 }
diff --git a/mm/slab.h b/mm/slab.h
index 3045316b7c9d..6bd4c353704f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
 #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
 int __kmem_cache_shutdown(struct kmem_cache *);
+void slab_kmem_cache_release(struct kmem_cache *);
 struct seq_file;
 struct file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f3cfccf76dda..102cc6fca3d3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -323,6 +323,12 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 }
 #endif /* CONFIG_MEMCG_KMEM */
+void slab_kmem_cache_release(struct kmem_cache *s)
+{
+        kfree(s->name);
+        kmem_cache_free(kmem_cache, s);
+}
 void kmem_cache_destroy(struct kmem_cache *s)
 {
        get_online_cpus();
@@ -352,8 +358,11 @@ void kmem_cache_destroy(struct kmem_cache *s)
                rcu_barrier();
        memcg_free_cache_params(s);
-        kfree(s->name);
+#ifdef SLAB_SUPPORTS_SYSFS
-        kmem_cache_free(kmem_cache, s);
+        sysfs_slab_remove(s);
+#else
+        slab_kmem_cache_release(s);
+#endif
        goto out_put_cpus;
 out_unlock:
diff --git a/mm/slub.c b/mm/slub.c
index 5e234f1f8853..2b1ce697fc4b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -210,14 +210,11 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
 #ifdef CONFIG_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
-static void sysfs_slab_remove(struct kmem_cache *);
 static void memcg_propagate_slab_attrs(struct kmem_cache *s);
 #else
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
                                                        { return 0; }
-static inline void sysfs_slab_remove(struct kmem_cache *s) { }
 static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
 #endif
@@ -3238,24 +3235,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)
 int __kmem_cache_shutdown(struct kmem_cache *s)
 {
-        int rc = kmem_cache_close(s);
+        return kmem_cache_close(s);
-        if (!rc) {
-                /*
-                 * Since slab_attr_store may take the slab_mutex, we should
-                 * release the lock while removing the sysfs entry in order to
-                 * avoid a deadlock. Because this is pretty much the last
-                 * operation we do and the lock will be released shortly after
-                 * that in slab_common.c, we could just move sysfs_slab_remove
-                 * to a later point in common code. We should do that when we
-                 * have a common sysfs framework for all allocators.
-                 */
-                mutex_unlock(&slab_mutex);
-                sysfs_slab_remove(s);
-                mutex_lock(&slab_mutex);
-        }
-        return rc;
 }
 /********************************************************************
@@ -5071,15 +5051,18 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 #ifdef CONFIG_MEMCG_KMEM
        int i;
        char *buffer = NULL;
+        struct kmem_cache *root_cache;
-        if (!is_root_cache(s))
+        if (is_root_cache(s))
                return;
+        root_cache = s->memcg_params->root_cache;
        /*
         * This mean this cache had no attribute written. Therefore, no point
         * in copying default values around
         */
-        if (!s->max_attr_size)
+        if (!root_cache->max_attr_size)
                return;
        for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
@@ -5101,7 +5084,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
                 */
                if (buffer)
                        buf = buffer;
-                else if (s->max_attr_size < ARRAY_SIZE(mbuf))
+                else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
                        buf = mbuf;
                else {
                        buffer = (char *) get_zeroed_page(GFP_KERNEL);
@@ -5110,7 +5093,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
                        buf = buffer;
                }
-                attr->show(s->memcg_params->root_cache, buf);
+                attr->show(root_cache, buf);
                attr->store(s, buf, strlen(buf));
        }
@@ -5119,6 +5102,11 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 #endif
 }
+static void kmem_cache_release(struct kobject *k)
+{
+        slab_kmem_cache_release(to_slab(k));
+}
 static const struct sysfs_ops slab_sysfs_ops = {
        .show = slab_attr_show,
        .store = slab_attr_store,
@@ -5126,6 +5114,7 @@ static const struct sysfs_ops slab_sysfs_ops = {
 static struct kobj_type slab_ktype = {
        .sysfs_ops = &slab_sysfs_ops,
+        .release = kmem_cache_release,
 };
 static int uevent_filter(struct kset *kset, struct kobject *kobj)
@@ -5252,7 +5241,7 @@ out_put_kobj:
        goto out;
 }
-static void sysfs_slab_remove(struct kmem_cache *s)
+void sysfs_slab_remove(struct kmem_cache *s)
 {
        if (slab_state < FULL)
                /*
diff --git a/mm/truncate.c b/mm/truncate.c
index e5cc39ab0751..6a78c814bebf 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -484,14 +484,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
        unsigned long count = 0;
        int i;
-        /*
-         * Note: this function may get called on a shmem/tmpfs mapping:
-         * pagevec_lookup() might then return 0 prematurely (because it
-         * got a gangful of swap entries); but it's hardly worth worrying
-         * about - it can rarely have anything to free from such a mapping
-         * (most pages are dirty), and already skips over any difficulties.
-         */
        pagevec_init(&pvec, 0);
        while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
                        min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
diff --git a/mm/util.c b/mm/util.c
index f380af7ea779..d5ea733c5082 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -10,6 +10,7 @@
 #include <linux/swapops.h>
 #include <linux/mman.h>
 #include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
@@ -387,6 +388,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_mmap);
+void kvfree(const void *addr)
+{
+        if (is_vmalloc_addr(addr))
+                vfree(addr);
+        else
+                kfree(addr);
+}
+EXPORT_SYMBOL(kvfree);
 struct address_space *page_mapping(struct page *page)
 {
        struct address_space *mapping = page->mapping;
diff --git a/mm/vmacache.c b/mm/vmacache.c
index d4224b397c0e..1037a3bab505 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -81,10 +81,12 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
        for (i = 0; i < VMACACHE_SIZE; i++) {
                struct vm_area_struct *vma = current->vmacache[i];
-                if (vma && vma->vm_start <= addr && vma->vm_end > addr) {
+                if (!vma)
-                        BUG_ON(vma->vm_mm != mm);
+                        continue;
+                if (WARN_ON_ONCE(vma->vm_mm != mm))
+                        break;
+                if (vma->vm_start <= addr && vma->vm_end > addr)
                        return vma;
-                }
        }
        return NULL;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3f56c8deb3c0..32c661d66a45 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1916,6 +1916,24 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
                get_lru_size(lruvec, LRU_INACTIVE_FILE);
        /*
+         * Prevent the reclaimer from falling into the cache trap: as
+         * cache pages start out inactive, every cache fault will tip
+         * the scan balance towards the file LRU.  And as the file LRU
+         * shrinks, so does the window for rotation from references.
+         * This means we have a runaway feedback loop where a tiny
+         * thrashing file LRU becomes infinitely more attractive than
+         * anon pages.  Try to detect this based on file LRU size.
+         */
+        if (global_reclaim(sc)) {
+                unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
+                if (unlikely(file + free <= high_wmark_pages(zone))) {
+                        scan_balance = SCAN_ANON;
+                        goto out;
+                }
+        }
+        /*
         * There is enough inactive page cache, do not reclaim
         * anything from the anonymous working set right now.
         */
author	Olof Johansson <olof@lixom.net>	2014-05-21 17:23:56 -0400
committer	Olof Johansson <olof@lixom.net>	2014-05-21 17:23:56 -0400
commit	486ad2ede13314346226ee52e92b8e8773221f63 (patch)
tree	84b6ed35d2701ececc244fda1163b9db58a4d07c /mm
parent	9e05f9f300828aecb84659cedc6399b7a76683ef (diff)
parent	6b2c31c71d6fa8896c5f3f2354d790a5bd3f0a1e (diff)