15 files changed, 252 insertions, 282 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index db7c55de92cd..7942b333e46c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -157,3 +157,9 @@ config RESOURCES_64BIT
        default 64BIT
        help
          This option allows memory and IO resources to be 64 bit.
+config ZONE_DMA_FLAG
+        int
+        default "0" if !ZONE_DMA
+        default "1"
diff --git a/mm/filemap.c b/mm/filemap.c
index f30ef28405d3..00414849a867 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -327,7 +327,7 @@ EXPORT_SYMBOL(sync_page_range);
 * @pos:        beginning offset in pages to write
 * @count:      number of bytes to write
 *
- * Note: Holding i_mutex across sync_page_range_nolock is not a good idea
+ * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea
 * as it forces O_SYNC writers to different parts of the same file
 * to be serialised right until io completion.
 */
@@ -784,7 +784,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 * @mapping: target address_space
 * @index: the page index
 *
- * Same as grab_cache_page, but do not wait if the page is unavailable.
+ * Same as grab_cache_page(), but do not wait if the page is unavailable.
 * This is intended for speculative data generators, where the data can
 * be regenerated if the page couldn't be grabbed.  This routine should
 * be safe to call while holding the lock for another page.
diff --git a/mm/highmem.c b/mm/highmem.c
index 0206e7e5018c..51e1c1995fec 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -47,7 +47,8 @@ unsigned int nr_free_highpages (void)
        unsigned int pages = 0;
        for_each_online_pgdat(pgdat)
-                pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+                pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+                        NR_FREE_PAGES);
        return pages;
 }
diff --git a/mm/memory.c b/mm/memory.c
index ef09f0acb1d8..072c1135ad37 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -678,7 +678,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                                if (pte_dirty(ptent))
                                        set_page_dirty(page);
                                if (pte_young(ptent))
-                                        mark_page_accessed(page);
+                                        SetPageReferenced(page);
                                file_rss--;
                        }
                        page_remove_rmap(page, vma);
@@ -1531,8 +1531,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
                                goto unwritable_page;
-                        page_cache_release(old_page);
                        /*
                         * Since we dropped the lock we need to revalidate
                         * the PTE as someone else may have changed it.  If
@@ -1541,6 +1539,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         */
                        page_table = pte_offset_map_lock(mm, pmd, address,
                                                         &ptl);
+                        page_cache_release(old_page);
                        if (!pte_same(*page_table, orig_pte))
                                goto unlock;
                }
@@ -1776,9 +1775,7 @@ restart:
 }
 /**
- * unmap_mapping_range - unmap the portion of all mmaps
+ * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
- * in the specified address_space corresponding to the specified
- * page range in the underlying file.
 * @mapping: the address space containing mmaps to be unmapped.
 * @holebegin: byte in first page to unmap, relative to the start of
 * the underlying file.  This will be rounded down to a PAGE_SIZE
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c2aec0e1090d..259a706bd83e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
 /* Highest zone. An specific allocation for a zone below that is not
   policied. */
-enum zone_type policy_zone = ZONE_DMA;
+enum zone_type policy_zone = 0;
 struct mempolicy default_policy = {
        .refcnt = ATOMIC_INIT(1), /* never free it */
diff --git a/mm/mempool.c b/mm/mempool.c
index ccd8cb8cd41f..cc1ca86dfc24 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -46,9 +46,9 @@ static void free_pool(mempool_t *pool)
 * @pool_data: optional private data available to the user-defined functions.
 *
 * this function creates and allocates a guaranteed size, preallocated
- * memory pool. The pool can be used from the mempool_alloc and mempool_free
+ * memory pool. The pool can be used from the mempool_alloc() and mempool_free()
 * functions. This function might sleep. Both the alloc_fn() and the free_fn()
- * functions might sleep - as long as the mempool_alloc function is not called
+ * functions might sleep - as long as the mempool_alloc() function is not called
 * from IRQ contexts.
 */
 mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
@@ -195,7 +195,7 @@ EXPORT_SYMBOL(mempool_destroy);
 *             mempool_create().
 * @gfp_mask:  the usual allocation bitmask.
 *
- * this function only sleeps if the alloc_fn function sleeps or
+ * this function only sleeps if the alloc_fn() function sleeps or
 * returns NULL. Note that due to preallocation, this function
 * *never* fails when called from process contexts. (it might
 * fail if called from an IRQ context.)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index be0efbde4994..f7e088f5a309 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -515,7 +515,7 @@ static int __cpuinit
 ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
 {
        writeback_set_ratelimit();
-        return 0;
+        return NOTIFY_DONE;
 }
 static struct notifier_block __cpuinitdata ratelimit_nb = {
@@ -549,9 +549,7 @@ void __init page_writeback_init(void)
 }
 /**
- * generic_writepages - walk the list of dirty pages of the given
+ * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them.
- *                      address space and writepage() all of them.
- *
 * @mapping: address space structure to write
 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
 *
@@ -698,7 +696,6 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 /**
 * write_one_page - write out a single page and optionally wait on I/O
- *
 * @page: the page to write
 * @wait: if true, wait on writeout
 *
@@ -737,6 +734,16 @@ int write_one_page(struct page *page, int wait)
 EXPORT_SYMBOL(write_one_page);
 /*
+ * For address_spaces which do not use buffers nor write back.
+ */
+int __set_page_dirty_no_writeback(struct page *page)
+{
+        if (!PageDirty(page))
+                SetPageDirty(page);
+        return 0;
+}
+/*
 * For address_spaces which do not use buffers.  Just tag the page as dirty in
 * its radix tree.
 *
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f12052dc23ff..d461b23a27a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -73,7 +73,9 @@ static void __free_pages_ok(struct page *page, unsigned int order);
 * don't need any ZONE_NORMAL reservation
 */
 int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+#ifdef CONFIG_ZONE_DMA
         256,
+#endif
 #ifdef CONFIG_ZONE_DMA32
         256,
 #endif
@@ -85,7 +87,9 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
 EXPORT_SYMBOL(totalram_pages);
 static char * const zone_names[MAX_NR_ZONES] = {
+#ifdef CONFIG_ZONE_DMA
         "DMA",
+#endif
 #ifdef CONFIG_ZONE_DMA32
         "DMA32",
 #endif
@@ -395,7 +399,7 @@ static inline void __free_one_page(struct page *page,
        VM_BUG_ON(page_idx & (order_size - 1));
        VM_BUG_ON(bad_range(zone, page));
-        zone->free_pages += order_size;
+        __mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
        while (order < MAX_ORDER-1) {
                unsigned long combined_idx;
                struct free_area *area;
@@ -631,7 +635,7 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
                list_del(&page->lru);
                rmv_page_order(page);
                area->nr_free--;
-                zone->free_pages -= 1UL << order;
+                __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
                expand(zone, page, order, current_order, area);
                return page;
        }
@@ -989,7 +993,8 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
                      int classzone_idx, int alloc_flags)
 {
        /* free_pages my go negative - that's OK */
-        long min = mark, free_pages = z->free_pages - (1 << order) + 1;
+        long min = mark;
+        long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
        int o;
        if (alloc_flags & ALLOC_HIGH)
@@ -1439,35 +1444,6 @@ fastcall void free_pages(unsigned long addr, unsigned int order)
 EXPORT_SYMBOL(free_pages);
-/*
- * Total amount of free (allocatable) RAM:
- */
-unsigned int nr_free_pages(void)
-{
-        unsigned int sum = 0;
-        struct zone *zone;
-        for_each_zone(zone)
-                sum += zone->free_pages;
-        return sum;
-}
-EXPORT_SYMBOL(nr_free_pages);
-#ifdef CONFIG_NUMA
-unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
-{
-        unsigned int sum = 0;
-        enum zone_type i;
-        for (i = 0; i < MAX_NR_ZONES; i++)
-                sum += pgdat->node_zones[i].free_pages;
-        return sum;
-}
-#endif
 static unsigned int nr_free_zone_pages(int offset)
 {
        /* Just pick one node, since fallback list is circular */
@@ -1514,7 +1490,7 @@ void si_meminfo(struct sysinfo *val)
 {
        val->totalram = totalram_pages;
        val->sharedram = 0;
-        val->freeram = nr_free_pages();
+        val->freeram = global_page_state(NR_FREE_PAGES);
        val->bufferram = nr_blockdev_pages();
        val->totalhigh = totalhigh_pages;
        val->freehigh = nr_free_highpages();
@@ -1529,10 +1505,11 @@ void si_meminfo_node(struct sysinfo *val, int nid)
        pg_data_t *pgdat = NODE_DATA(nid);
        val->totalram = pgdat->node_present_pages;
-        val->freeram = nr_free_pages_pgdat(pgdat);
+        val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
        val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
-        val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+        val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+                        NR_FREE_PAGES);
 #else
        val->totalhigh = 0;
        val->freehigh = 0;
@@ -1551,9 +1528,6 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 void show_free_areas(void)
 {
        int cpu;
-        unsigned long active;
-        unsigned long inactive;
-        unsigned long free;
        struct zone *zone;
        for_each_zone(zone) {
@@ -1577,16 +1551,14 @@ void show_free_areas(void)
                }
        }
-        get_zone_counts(&active, &inactive, &free);
        printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
-                " free:%u slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
+                " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
-                active,
+                global_page_state(NR_ACTIVE),
-                inactive,
+                global_page_state(NR_INACTIVE),
                global_page_state(NR_FILE_DIRTY),
                global_page_state(NR_WRITEBACK),
                global_page_state(NR_UNSTABLE_NFS),
-                nr_free_pages(),
+                global_page_state(NR_FREE_PAGES),
                global_page_state(NR_SLAB_RECLAIMABLE) +
                        global_page_state(NR_SLAB_UNRECLAIMABLE),
                global_page_state(NR_FILE_MAPPED),
@@ -1612,12 +1584,12 @@ void show_free_areas(void)
                        " all_unreclaimable? %s"
                        "\n",
                        zone->name,
-                        K(zone->free_pages),
+                        K(zone_page_state(zone, NR_FREE_PAGES)),
                        K(zone->pages_min),
                        K(zone->pages_low),
                        K(zone->pages_high),
-                        K(zone->nr_active),
+                        K(zone_page_state(zone, NR_ACTIVE)),
-                        K(zone->nr_inactive),
+                        K(zone_page_state(zone, NR_INACTIVE)),
                        K(zone->present_pages),
                        zone->pages_scanned,
                        (zone->all_unreclaimable ? "yes" : "no")
@@ -2651,11 +2623,11 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
                                "  %s zone: %lu pages exceeds realsize %lu\n",
                                zone_names[j], memmap_pages, realsize);
-                /* Account for reserved DMA pages */
+                /* Account for reserved pages */
-                if (j == ZONE_DMA && realsize > dma_reserve) {
+                if (j == 0 && realsize > dma_reserve) {
                        realsize -= dma_reserve;
-                        printk(KERN_DEBUG "  DMA zone: %lu pages reserved\n",
+                        printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
-                                                                dma_reserve);
+                                        zone_names[0], dma_reserve);
                }
                if (!is_highmem_idx(j))
@@ -2675,7 +2647,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
                spin_lock_init(&zone->lru_lock);
                zone_seqlock_init(zone);
                zone->zone_pgdat = pgdat;
-                zone->free_pages = 0;
                zone->prev_priority = DEF_PRIORITY;
@@ -2684,8 +2655,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
                INIT_LIST_HEAD(&zone->inactive_list);
                zone->nr_scan_active = 0;
                zone->nr_scan_inactive = 0;
-                zone->nr_active = 0;
-                zone->nr_inactive = 0;
                zap_zone_vm_stats(zone);
                atomic_set(&zone->reclaim_in_progress, 0);
                if (!size)
@@ -2877,20 +2846,23 @@ static void __init sort_node_map(void)
                        cmp_node_active_region, NULL);
 }
-/* Find the lowest pfn for a node. This depends on a sorted early_node_map */
+/* Find the lowest pfn for a node */
 unsigned long __init find_min_pfn_for_node(unsigned long nid)
 {
        int i;
+        unsigned long min_pfn = ULONG_MAX;
-        /* Regions in the early_node_map can be in any order */
-        sort_node_map();
        /* Assuming a sorted map, the first range found has the starting pfn */
        for_each_active_range_index_in_nid(i, nid)
-                return early_node_map[i].start_pfn;
+                min_pfn = min(min_pfn, early_node_map[i].start_pfn);
-        printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid);
+        if (min_pfn == ULONG_MAX) {
-        return 0;
+                printk(KERN_WARNING
+                        "Could not find start_pfn for node %lu\n", nid);
+                return 0;
+        }
+        return min_pfn;
 }
 /**
@@ -2939,6 +2911,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
        unsigned long nid;
        enum zone_type i;
+        /* Sort early_node_map as initialisation assumes it is sorted */
+        sort_node_map();
        /* Record where the zone boundaries are */
        memset(arch_zone_lowest_possible_pfn, 0,
                                sizeof(arch_zone_lowest_possible_pfn));
diff --git a/mm/readahead.c b/mm/readahead.c
index 0f539e8e827a..93d9ee692fd8 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -575,10 +575,6 @@ void handle_ra_miss(struct address_space *mapping,
 */
 unsigned long max_sane_readahead(unsigned long nr)
 {
-        unsigned long active;
+        return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
-        unsigned long inactive;
+                + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
-        unsigned long free;
-        __get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id()));
-        return min(nr, (inactive + free) / 2);
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 70da7a0981bf..b38e17169271 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2316,7 +2316,7 @@ static void destroy_inodecache(void)
 static const struct address_space_operations shmem_aops = {
        .writepage      = shmem_writepage,
-        .set_page_dirty = __set_page_dirty_nobuffers,
+        .set_page_dirty = __set_page_dirty_no_writeback,
 #ifdef CONFIG_TMPFS
        .prepare_write  = shmem_prepare_write,
        .commit_write   = simple_commit_write,
diff --git a/mm/slab.c b/mm/slab.c
index c6100628a6ef..70784b848b69 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -793,8 +793,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
         * has cs_{dma,}cachep==NULL. Thus no special case
         * for large kmalloc calls required.
         */
+#ifdef CONFIG_ZONE_DMA
        if (unlikely(gfpflags & GFP_DMA))
                return csizep->cs_dmacachep;
+#endif
        return csizep->cs_cachep;
 }
@@ -1493,13 +1495,15 @@ void __init kmem_cache_init(void)
                                        ARCH_KMALLOC_FLAGS|SLAB_PANIC,
                                        NULL, NULL);
                }
+#ifdef CONFIG_ZONE_DMA
-                sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
+                sizes->cs_dmacachep = kmem_cache_create(
+                                        names->name_dma,
                                        sizes->cs_size,
                                        ARCH_KMALLOC_MINALIGN,
                                        ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
                                                SLAB_PANIC,
                                        NULL, NULL);
+#endif
                sizes++;
                names++;
        }
@@ -2321,7 +2325,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
        cachep->slab_size = slab_size;
        cachep->flags = flags;
        cachep->gfpflags = 0;
-        if (flags & SLAB_CACHE_DMA)
+        if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
                cachep->gfpflags |= GFP_DMA;
        cachep->buffer_size = size;
        cachep->reciprocal_buffer_size = reciprocal_value(size);
@@ -2516,7 +2520,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
 * kmem_cache_destroy - delete a cache
 * @cachep: the cache to destroy
 *
- * Remove a struct kmem_cache object from the slab cache.
+ * Remove a &struct kmem_cache object from the slab cache.
 *
 * It is expected this function will be called by a module when it is
 * unloaded.  This will remove the cache completely, and avoid a duplicate
@@ -2643,10 +2647,12 @@ static void cache_init_objs(struct kmem_cache *cachep,
 static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
 {
-        if (flags & GFP_DMA)
+        if (CONFIG_ZONE_DMA_FLAG) {
-                BUG_ON(!(cachep->gfpflags & GFP_DMA));
+                if (flags & GFP_DMA)
-        else
+                        BUG_ON(!(cachep->gfpflags & GFP_DMA));
-                BUG_ON(cachep->gfpflags & GFP_DMA);
+                else
+                        BUG_ON(cachep->gfpflags & GFP_DMA);
+        }
 }
 static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
@@ -2814,19 +2820,11 @@ failed:
 */
 static void kfree_debugcheck(const void *objp)
 {
-        struct page *page;
        if (!virt_addr_valid(objp)) {
                printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
                       (unsigned long)objp);
                BUG();
        }
-        page = virt_to_page(objp);
-        if (!PageSlab(page)) {
-                printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n",
-                       (unsigned long)objp);
-                BUG();
-        }
 }
 static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
@@ -3197,35 +3195,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
        return objp;
 }
-static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
-                                                gfp_t flags, void *caller)
-{
-        unsigned long save_flags;
-        void *objp = NULL;
-        cache_alloc_debugcheck_before(cachep, flags);
-        local_irq_save(save_flags);
-        if (unlikely(NUMA_BUILD &&
-                        current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
-                objp = alternate_node_alloc(cachep, flags);
-        if (!objp)
-                objp = ____cache_alloc(cachep, flags);
-        /*
-         * We may just have run out of memory on the local node.
-         * ____cache_alloc_node() knows how to locate memory on other nodes
-         */
-        if (NUMA_BUILD && !objp)
-                objp = ____cache_alloc_node(cachep, flags, numa_node_id());
-        local_irq_restore(save_flags);
-        objp = cache_alloc_debugcheck_after(cachep, flags, objp,
-                                            caller);
-        prefetchw(objp);
-        return objp;
-}
 #ifdef CONFIG_NUMA
 /*
 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
@@ -3257,14 +3226,20 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
 * allocator to do its reclaim / fallback magic. We then insert the
 * slab into the proper nodelist and then allocate from it.
 */
-void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
+static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
 {
-        struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy))
+        struct zonelist *zonelist;
-                                        ->node_zonelists[gfp_zone(flags)];
+        gfp_t local_flags;
        struct zone **z;
        void *obj = NULL;
        int nid;
-        gfp_t local_flags = (flags & GFP_LEVEL_MASK);
+        if (flags & __GFP_THISNODE)
+                return NULL;
+        zonelist = &NODE_DATA(slab_node(current->mempolicy))
+                        ->node_zonelists[gfp_zone(flags)];
+        local_flags = (flags & GFP_LEVEL_MASK);
 retry:
        /*
@@ -3374,16 +3349,110 @@ must_grow:
        if (x)
                goto retry;
-        if (!(flags & __GFP_THISNODE))
+        return fallback_alloc(cachep, flags);
-                /* Unable to grow the cache. Fall back to other nodes. */
-                return fallback_alloc(cachep, flags);
-        return NULL;
 done:
        return obj;
 }
-#endif
+/**
+ * kmem_cache_alloc_node - Allocate an object on the specified node
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ * @nodeid: node number of the target node.
+ * @caller: return address of caller, used for debug information
+ *
+ * Identical to kmem_cache_alloc but it will allocate memory on the given
+ * node, which can improve the performance for cpu bound structures.
+ *
+ * Fallback to other node is possible if __GFP_THISNODE is not set.
+ */
+static __always_inline void *
+__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+                   void *caller)
+{
+        unsigned long save_flags;
+        void *ptr;
+        cache_alloc_debugcheck_before(cachep, flags);
+        local_irq_save(save_flags);
+        if (unlikely(nodeid == -1))
+                nodeid = numa_node_id();
+        if (unlikely(!cachep->nodelists[nodeid])) {
+                /* Node not bootstrapped yet */
+                ptr = fallback_alloc(cachep, flags);
+                goto out;
+        }
+        if (nodeid == numa_node_id()) {
+                /*
+                 * Use the locally cached objects if possible.
+                 * However ____cache_alloc does not allow fallback
+                 * to other nodes. It may fail while we still have
+                 * objects on other nodes available.
+                 */
+                ptr = ____cache_alloc(cachep, flags);
+                if (ptr)
+                        goto out;
+        }
+        /* ___cache_alloc_node can fall back to other nodes */
+        ptr = ____cache_alloc_node(cachep, flags, nodeid);
+  out:
+        local_irq_restore(save_flags);
+        ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+        return ptr;
+}
+static __always_inline void *
+__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
+{
+        void *objp;
+        if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
+                objp = alternate_node_alloc(cache, flags);
+                if (objp)
+                        goto out;
+        }
+        objp = ____cache_alloc(cache, flags);
+        /*
+         * We may just have run out of memory on the local node.
+         * ____cache_alloc_node() knows how to locate memory on other nodes
+         */
+        if (!objp)
+                objp = ____cache_alloc_node(cache, flags, numa_node_id());
+  out:
+        return objp;
+}
+#else
+static __always_inline void *
+__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+        return ____cache_alloc(cachep, flags);
+}
+#endif /* CONFIG_NUMA */
+static __always_inline void *
+__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
+{
+        unsigned long save_flags;
+        void *objp;
+        cache_alloc_debugcheck_before(cachep, flags);
+        local_irq_save(save_flags);
+        objp = __do_cache_alloc(cachep, flags);
+        local_irq_restore(save_flags);
+        objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+        prefetchw(objp);
+        return objp;
+}
 /*
 * Caller needs to acquire correct kmem_list's list_lock
@@ -3582,57 +3651,6 @@ out:
 }
 #ifdef CONFIG_NUMA
-/**
- * kmem_cache_alloc_node - Allocate an object on the specified node
- * @cachep: The cache to allocate from.
- * @flags: See kmalloc().
- * @nodeid: node number of the target node.
- * @caller: return address of caller, used for debug information
- *
- * Identical to kmem_cache_alloc but it will allocate memory on the given
- * node, which can improve the performance for cpu bound structures.
- *
- * Fallback to other node is possible if __GFP_THISNODE is not set.
- */
-static __always_inline void *
-__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
-                int nodeid, void *caller)
-{
-        unsigned long save_flags;
-        void *ptr = NULL;
-        cache_alloc_debugcheck_before(cachep, flags);
-        local_irq_save(save_flags);
-        if (unlikely(nodeid == -1))
-                nodeid = numa_node_id();
-        if (likely(cachep->nodelists[nodeid])) {
-                if (nodeid == numa_node_id()) {
-                        /*
-                         * Use the locally cached objects if possible.
-                         * However ____cache_alloc does not allow fallback
-                         * to other nodes. It may fail while we still have
-                         * objects on other nodes available.
-                         */
-                        ptr = ____cache_alloc(cachep, flags);
-                }
-                if (!ptr) {
-                        /* ___cache_alloc_node can fall back to other nodes */
-                        ptr = ____cache_alloc_node(cachep, flags, nodeid);
-                }
-        } else {
-                /* Node not bootstrapped yet */
-                if (!(flags & __GFP_THISNODE))
-                        ptr = fallback_alloc(cachep, flags);
-        }
-        local_irq_restore(save_flags);
-        ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
-        return ptr;
-}
 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
        return __cache_alloc_node(cachep, flags, nodeid,
@@ -3733,6 +3751,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
        BUG_ON(virt_to_cache(objp) != cachep);
        local_irq_save(flags);
+        debug_check_no_locks_freed(objp, obj_size(cachep));
        __cache_free(cachep, objp);
        local_irq_restore(flags);
 }
@@ -4017,18 +4036,17 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
 * If we cannot acquire the cache chain mutex then just give up - we'll try
 * again on the next iteration.
 */
-static void cache_reap(struct work_struct *unused)
+static void cache_reap(struct work_struct *w)
 {
        struct kmem_cache *searchp;
        struct kmem_list3 *l3;
        int node = numa_node_id();
+        struct delayed_work *work =
+                container_of(w, struct delayed_work, work);
-        if (!mutex_trylock(&cache_chain_mutex)) {
+        if (!mutex_trylock(&cache_chain_mutex))
                /* Give up. Setup the next iteration. */
-                schedule_delayed_work(&__get_cpu_var(reap_work),
+                goto out;
-                                      round_jiffies_relative(REAPTIMEOUT_CPUC));
-                return;
-        }
        list_for_each_entry(searchp, &cache_chain, next) {
                check_irq_on();
@@ -4071,9 +4089,9 @@ next:
        mutex_unlock(&cache_chain_mutex);
        next_reap_node();
        refresh_cpu_vm_stats(smp_processor_id());
+out:
        /* Set up the next iteration */
-        schedule_delayed_work(&__get_cpu_var(reap_work),
+        schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
-                round_jiffies_relative(REAPTIMEOUT_CPUC));
 }
 #ifdef CONFIG_PROC_FS
diff --git a/mm/truncate.c b/mm/truncate.c
index 5df947de7654..ebf3fcb4115b 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(cancel_dirty_page);
 *
 * We need to bale out if page->mapping is no longer equal to the original
 * mapping.  This happens a) when the VM reclaimed the page while we waited on
- * its lock, b) when a concurrent invalidate_inode_pages got there first and
+ * its lock, b) when a concurrent invalidate_mapping_pages got there first and
 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
 */
 static void
@@ -106,7 +106,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 }
 /*
- * This is for invalidate_inode_pages().  That function can be called at
+ * This is for invalidate_mapping_pages().  That function can be called at
 * any time, and is not supposed to throw away dirty pages.  But pages can
 * be marked dirty at any time too, so use remove_mapping which safely
 * discards clean, unused pages.
@@ -310,12 +310,7 @@ unlock:
        }
        return ret;
 }
+EXPORT_SYMBOL(invalidate_mapping_pages);
-unsigned long invalidate_inode_pages(struct address_space *mapping)
-{
-        return invalidate_mapping_pages(mapping, 0, ~0UL);
-}
-EXPORT_SYMBOL(invalidate_inode_pages);
 /*
 * This is like invalidate_complete_page(), except it ignores the page's
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 86897ee792d6..9eef486da909 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -699,7 +699,7 @@ finished:
 *      that it is big enough to cover the vma. Will return failure if
 *      that criteria isn't met.
 *
- *      Similar to remap_pfn_range (see mm/memory.c)
+ *      Similar to remap_pfn_range() (see mm/memory.c)
 */
 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
                                                unsigned long pgoff)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7430df68cb64..0655d5fe73e8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -679,7 +679,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                nr_taken = isolate_lru_pages(sc->swap_cluster_max,
                                             &zone->inactive_list,
                                             &page_list, &nr_scan);
-                zone->nr_inactive -= nr_taken;
+                __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
                zone->pages_scanned += nr_scan;
                spin_unlock_irq(&zone->lru_lock);
@@ -740,7 +740,8 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
 static inline int zone_is_near_oom(struct zone *zone)
 {
-        return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
+        return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
+                                + zone_page_state(zone, NR_INACTIVE))*3;
 }
 /*
@@ -825,7 +826,7 @@ force_reclaim_mapped:
        pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
                                    &l_hold, &pgscanned);
        zone->pages_scanned += pgscanned;
-        zone->nr_active -= pgmoved;
+        __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
        spin_unlock_irq(&zone->lru_lock);
        while (!list_empty(&l_hold)) {
@@ -857,7 +858,7 @@ force_reclaim_mapped:
                list_move(&page->lru, &zone->inactive_list);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
-                        zone->nr_inactive += pgmoved;
+                        __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
                        spin_unlock_irq(&zone->lru_lock);
                        pgdeactivate += pgmoved;
                        pgmoved = 0;
@@ -867,7 +868,7 @@ force_reclaim_mapped:
                        spin_lock_irq(&zone->lru_lock);
                }
        }
-        zone->nr_inactive += pgmoved;
+        __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
        pgdeactivate += pgmoved;
        if (buffer_heads_over_limit) {
                spin_unlock_irq(&zone->lru_lock);
@@ -885,14 +886,14 @@ force_reclaim_mapped:
                list_move(&page->lru, &zone->active_list);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
-                        zone->nr_active += pgmoved;
+                        __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
                        pgmoved = 0;
                        spin_unlock_irq(&zone->lru_lock);
                        __pagevec_release(&pvec);
                        spin_lock_irq(&zone->lru_lock);
                }
        }
-        zone->nr_active += pgmoved;
+        __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
        __count_zone_vm_events(PGREFILL, zone, pgscanned);
        __count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -918,14 +919,16 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
         * Add one to `nr_to_scan' just to make sure that the kernel will
         * slowly sift through the active list.
         */
-        zone->nr_scan_active += (zone->nr_active >> priority) + 1;
+        zone->nr_scan_active +=
+                (zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
        nr_active = zone->nr_scan_active;
        if (nr_active >= sc->swap_cluster_max)
                zone->nr_scan_active = 0;
        else
                nr_active = 0;
-        zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1;
+        zone->nr_scan_inactive +=
+                (zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
        nr_inactive = zone->nr_scan_inactive;
        if (nr_inactive >= sc->swap_cluster_max)
                zone->nr_scan_inactive = 0;
@@ -1037,7 +1040,8 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
                if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                        continue;
-                lru_pages += zone->nr_active + zone->nr_inactive;
+                lru_pages += zone_page_state(zone, NR_ACTIVE)
+                                + zone_page_state(zone, NR_INACTIVE);
        }
        for (priority = DEF_PRIORITY; priority >= 0; priority--) {
@@ -1182,7 +1186,8 @@ loop_again:
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
-                        lru_pages += zone->nr_active + zone->nr_inactive;
+                        lru_pages += zone_page_state(zone, NR_ACTIVE)
+                                        + zone_page_state(zone, NR_INACTIVE);
                }
                /*
@@ -1219,8 +1224,9 @@ loop_again:
                        if (zone->all_unreclaimable)
                                continue;
                        if (nr_slab == 0 && zone->pages_scanned >=
-                                    (zone->nr_active + zone->nr_inactive) * 6)
+                                (zone_page_state(zone, NR_ACTIVE)
-                                zone->all_unreclaimable = 1;
+                                + zone_page_state(zone, NR_INACTIVE)) * 6)
+                                        zone->all_unreclaimable = 1;
                        /*
                         * If we've done a decent amount of scanning and
                         * the reclaim ratio is low, start doing writepage
@@ -1385,18 +1391,22 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
                /* For pass = 0 we don't shrink the active list */
                if (pass > 0) {
-                        zone->nr_scan_active += (zone->nr_active >> prio) + 1;
+                        zone->nr_scan_active +=
+                                (zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
                        if (zone->nr_scan_active >= nr_pages || pass > 3) {
                                zone->nr_scan_active = 0;
-                                nr_to_scan = min(nr_pages, zone->nr_active);
+                                nr_to_scan = min(nr_pages,
+                                        zone_page_state(zone, NR_ACTIVE));
                                shrink_active_list(nr_to_scan, zone, sc, prio);
                        }
                }
-                zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1;
+                zone->nr_scan_inactive +=
+                        (zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
                if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
                        zone->nr_scan_inactive = 0;
-                        nr_to_scan = min(nr_pages, zone->nr_inactive);
+                        nr_to_scan = min(nr_pages,
+                                zone_page_state(zone, NR_INACTIVE));
                        ret += shrink_inactive_list(nr_to_scan, zone, sc);
                        if (ret >= nr_pages)
                                return ret;
@@ -1408,12 +1418,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 static unsigned long count_lru_pages(void)
 {
-        struct zone *zone;
+        return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
-        unsigned long ret = 0;
-        for_each_zone(zone)
-                ret += zone->nr_active + zone->nr_inactive;
-        return ret;
 }
 /*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index dc005a0c96ae..6c488d6ac425 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,39 +13,6 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
-void __get_zone_counts(unsigned long *active, unsigned long *inactive,
-                        unsigned long *free, struct pglist_data *pgdat)
-{
-        struct zone *zones = pgdat->node_zones;
-        int i;
-        *active = 0;
-        *inactive = 0;
-        *free = 0;
-        for (i = 0; i < MAX_NR_ZONES; i++) {
-                *active += zones[i].nr_active;
-                *inactive += zones[i].nr_inactive;
-                *free += zones[i].free_pages;
-        }
-}
-void get_zone_counts(unsigned long *active,
-                unsigned long *inactive, unsigned long *free)
-{
-        struct pglist_data *pgdat;
-        *active = 0;
-        *inactive = 0;
-        *free = 0;
-        for_each_online_pgdat(pgdat) {
-                unsigned long l, m, n;
-                __get_zone_counts(&l, &m, &n, pgdat);
-                *active += l;
-                *inactive += m;
-                *free += n;
-        }
-}
 #ifdef CONFIG_VM_EVENT_COUNTERS
 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
 EXPORT_PER_CPU_SYMBOL(vm_event_states);
@@ -239,7 +206,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
 * in between and therefore the atomicity vs. interrupt cannot be exploited
 * in a useful way here.
 */
-static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
+void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
        struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
        s8 *p = pcp->vm_stat_diff + item;
@@ -260,9 +227,8 @@ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
 }
 EXPORT_SYMBOL(__inc_zone_page_state);
-void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-        struct zone *zone = page_zone(page);
        struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
        s8 *p = pcp->vm_stat_diff + item;
@@ -275,6 +241,11 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
                *p = overstep;
        }
 }
+void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+        __dec_zone_state(page_zone(page), item);
+}
 EXPORT_SYMBOL(__dec_zone_page_state);
 void inc_zone_state(struct zone *zone, enum zone_stat_item item)
@@ -437,6 +408,12 @@ const struct seq_operations fragmentation_op = {
        .show   = frag_show,
 };
+#ifdef CONFIG_ZONE_DMA
+#define TEXT_FOR_DMA(xx) xx "_dma",
+#else
+#define TEXT_FOR_DMA(xx)
+#endif
 #ifdef CONFIG_ZONE_DMA32
 #define TEXT_FOR_DMA32(xx) xx "_dma32",
 #else
@@ -449,19 +426,22 @@ const struct seq_operations fragmentation_op = {
 #define TEXT_FOR_HIGHMEM(xx)
 #endif
-#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \
+#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
                                        TEXT_FOR_HIGHMEM(xx)
 static const char * const vmstat_text[] = {
        /* Zoned VM counters */
+        "nr_free_pages",
+        "nr_active",
+        "nr_inactive",
        "nr_anon_pages",
        "nr_mapped",
        "nr_file_pages",
+        "nr_dirty",
+        "nr_writeback",
        "nr_slab_reclaimable",
        "nr_slab_unreclaimable",
        "nr_page_table_pages",
-        "nr_dirty",
-        "nr_writeback",
        "nr_unstable",
        "nr_bounce",
        "nr_vmscan_write",
@@ -529,17 +509,13 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
                           "\n        min      %lu"
                           "\n        low      %lu"
                           "\n        high     %lu"
-                           "\n        active   %lu"
-                           "\n        inactive %lu"
                           "\n        scanned  %lu (a: %lu i: %lu)"
                           "\n        spanned  %lu"
                           "\n        present  %lu",
-                           zone->free_pages,
+                           zone_page_state(zone, NR_FREE_PAGES),
                           zone->pages_min,
                           zone->pages_low,
                           zone->pages_high,
-                           zone->nr_active,
-                           zone->nr_inactive,
                           zone->pages_scanned,
                           zone->nr_scan_active, zone->nr_scan_inactive,
                           zone->spanned_pages,
@@ -563,12 +539,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
                        pageset = zone_pcp(zone, i);
                        for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-                                if (pageset->pcp[j].count)
-                                        break;
-                        }
-                        if (j == ARRAY_SIZE(pageset->pcp))
-                                continue;
-                        for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
                                seq_printf(m,
                                           "\n    cpu: %i pcp: %i"
                                           "\n              count: %i"