15 files changed, 115 insertions, 225 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 379ff0bcbf6e..1b60f30cebfa 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page)
        mapping->nrpages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        BUG_ON(page_mapped(page));
-        mem_cgroup_uncharge_cache_page(page);
        /*
         * Some filesystems seem to re-dirty the page even after
@@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page)
        spin_lock_irq(&mapping->tree_lock);
        __remove_from_page_cache(page);
        spin_unlock_irq(&mapping->tree_lock);
+        mem_cgroup_uncharge_cache_page(page);
 }
 static int sync_page(void *word)
@@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
                if (likely(!error)) {
                        mapping->nrpages++;
                        __inc_zone_page_state(page, NR_FILE_PAGES);
+                        spin_unlock_irq(&mapping->tree_lock);
                } else {
                        page->mapping = NULL;
+                        spin_unlock_irq(&mapping->tree_lock);
                        mem_cgroup_uncharge_cache_page(page);
                        page_cache_release(page);
                }
-                spin_unlock_irq(&mapping->tree_lock);
                radix_tree_preload_end();
        } else
                mem_cgroup_uncharge_cache_page(page);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 28c655ba9353..e83ad2c9228c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -316,7 +316,7 @@ static void resv_map_release(struct kref *ref)
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 {
        VM_BUG_ON(!is_vm_hugetlb_page(vma));
-        if (!(vma->vm_flags & VM_SHARED))
+        if (!(vma->vm_flags & VM_MAYSHARE))
                return (struct resv_map *)(get_vma_private_data(vma) &
                                                        ~HPAGE_RESV_MASK);
        return NULL;
@@ -325,7 +325,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 {
        VM_BUG_ON(!is_vm_hugetlb_page(vma));
-        VM_BUG_ON(vma->vm_flags & VM_SHARED);
+        VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
        set_vma_private_data(vma, (get_vma_private_data(vma) &
                                HPAGE_RESV_MASK) | (unsigned long)map);
@@ -334,7 +334,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
 {
        VM_BUG_ON(!is_vm_hugetlb_page(vma));
-        VM_BUG_ON(vma->vm_flags & VM_SHARED);
+        VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
        set_vma_private_data(vma, get_vma_private_data(vma) | flags);
 }
@@ -353,7 +353,7 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
        if (vma->vm_flags & VM_NORESERVE)
                return;
-        if (vma->vm_flags & VM_SHARED) {
+        if (vma->vm_flags & VM_MAYSHARE) {
                /* Shared mappings always use reserves */
                h->resv_huge_pages--;
        } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
@@ -369,14 +369,14 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
        VM_BUG_ON(!is_vm_hugetlb_page(vma));
-        if (!(vma->vm_flags & VM_SHARED))
+        if (!(vma->vm_flags & VM_MAYSHARE))
                vma->vm_private_data = (void *)0;
 }
 /* Returns true if the VMA has associated reserve pages */
 static int vma_has_reserves(struct vm_area_struct *vma)
 {
-        if (vma->vm_flags & VM_SHARED)
+        if (vma->vm_flags & VM_MAYSHARE)
                return 1;
        if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
                return 1;
@@ -924,7 +924,7 @@ static long vma_needs_reservation(struct hstate *h,
        struct address_space *mapping = vma->vm_file->f_mapping;
        struct inode *inode = mapping->host;
-        if (vma->vm_flags & VM_SHARED) {
+        if (vma->vm_flags & VM_MAYSHARE) {
                pgoff_t idx = vma_hugecache_offset(h, vma, addr);
                return region_chg(&inode->i_mapping->private_list,
                                                        idx, idx + 1);
@@ -949,7 +949,7 @@ static void vma_commit_reservation(struct hstate *h,
        struct address_space *mapping = vma->vm_file->f_mapping;
        struct inode *inode = mapping->host;
-        if (vma->vm_flags & VM_SHARED) {
+        if (vma->vm_flags & VM_MAYSHARE) {
                pgoff_t idx = vma_hugecache_offset(h, vma, addr);
                region_add(&inode->i_mapping->private_list, idx, idx + 1);
@@ -1893,7 +1893,7 @@ retry_avoidcopy:
         * at the time of fork() could consume its reserves on COW instead
         * of the full address range.
         */
-        if (!(vma->vm_flags & VM_SHARED) &&
+        if (!(vma->vm_flags & VM_MAYSHARE) &&
                        is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
                        old_page != pagecache_page)
                outside_reserve = 1;
@@ -2000,7 +2000,7 @@ retry:
                clear_huge_page(page, address, huge_page_size(h));
                __SetPageUptodate(page);
-                if (vma->vm_flags & VM_SHARED) {
+                if (vma->vm_flags & VM_MAYSHARE) {
                        int err;
                        struct inode *inode = mapping->host;
@@ -2104,7 +2104,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                        goto out_mutex;
                }
-                if (!(vma->vm_flags & VM_SHARED))
+                if (!(vma->vm_flags & VM_MAYSHARE))
                        pagecache_page = hugetlbfs_pagecache_page(h,
                                                                vma, address);
        }
@@ -2289,7 +2289,7 @@ int hugetlb_reserve_pages(struct inode *inode,
         * to reserve the full area even if read-only as mprotect() may be
         * called to make the mapping read-write. Assume !vma is a shm mapping
         */
-        if (!vma || vma->vm_flags & VM_SHARED)
+        if (!vma || vma->vm_flags & VM_MAYSHARE)
                chg = region_chg(&inode->i_mapping->private_list, from, to);
        else {
                struct resv_map *resv_map = resv_map_alloc();
@@ -2330,7 +2330,7 @@ int hugetlb_reserve_pages(struct inode *inode,
         * consumed reservations are stored in the map. Hence, nothing
         * else has to be done for private mappings here
         */
-        if (!vma || vma->vm_flags & VM_SHARED)
+        if (!vma || vma->vm_flags & VM_MAYSHARE)
                region_add(&inode->i_mapping->private_list, from, to);
        return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 01c2d8f14685..78eb8552818b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -314,14 +314,6 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
        return mem;
 }
-static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
-{
-        if (!mem)
-                return true;
-        return css_is_removed(&mem->css);
-}
 /*
 * Call callback function against all cgroup under hierarchy tree.
 */
@@ -932,7 +924,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
        if (unlikely(!mem))
                return 0;
-        VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem));
+        VM_BUG_ON(css_is_removed(&mem->css));
        while (1) {
                int ret;
@@ -1488,8 +1480,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
        __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
 }
+#ifdef CONFIG_SWAP
 /*
- * called from __delete_from_swap_cache() and drop "page" account.
+ * called after __delete_from_swap_cache() and drop "page" account.
 * memcg information is recorded to swap_cgroup of "ent"
 */
 void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
@@ -1506,6 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
        if (memcg)
                css_put(&memcg->css);
 }
+#endif
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 /*
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 16ce8b955dcf..f5b7d1760213 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -6,6 +6,7 @@
 #include <linux/stddef.h>
+#include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
@@ -72,3 +73,17 @@ struct zoneref *next_zones_zonelist(struct zoneref *z,
        *zone = zonelist_zone(z);
        return z;
 }
+#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
+int memmap_valid_within(unsigned long pfn,
+                                        struct page *page, struct zone *zone)
+{
+        if (page_to_pfn(page) != pfn)
+                return 0;
+        if (page_zone(page) != zone)
+                return 0;
+        return 1;
+}
+#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 92bcf1db16b2..a7b2460e922b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -284,22 +284,28 @@ static void dump_tasks(const struct mem_cgroup *mem)
        printk(KERN_INFO "[ pid ]   uid  tgid total_vm      rss cpu oom_adj "
               "name\n");
        do_each_thread(g, p) {
-                /*
+                struct mm_struct *mm;
-                 * total_vm and rss sizes do not exist for tasks with a
-                 * detached mm so there's no need to report them.
-                 */
-                if (!p->mm)
-                        continue;
                if (mem && !task_in_mem_cgroup(p, mem))
                        continue;
                if (!thread_group_leader(p))
                        continue;
                task_lock(p);
+                mm = p->mm;
+                if (!mm) {
+                        /*
+                         * total_vm and rss sizes do not exist for tasks with no
+                         * mm so there's no need to report them; they can't be
+                         * oom killed anyway.
+                         */
+                        task_unlock(p);
+                        continue;
+                }
                printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
-                       p->pid, __task_cred(p)->uid, p->tgid,
+                       p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
-                       p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p),
+                       get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
-                       p->oomkilladj, p->comm);
+                       p->comm);
                task_unlock(p);
        } while_each_thread(g, p);
 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 30351f0063ac..bb553c3e955d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -94,12 +94,12 @@ unsigned long vm_dirty_bytes;
 /*
 * The interval between `kupdate'-style writebacks
 */
-unsigned int dirty_writeback_interval = 5 * 100; /* sentiseconds */
+unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
 /*
 * The longest time for which data is allowed to remain dirty
 */
-unsigned int dirty_expire_interval = 30 * 100; /* sentiseconds */
+unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
 /*
 * Flag that makes the machine dump writes/reads and block dirtyings.
@@ -770,7 +770,7 @@ static void wb_kupdate(unsigned long arg)
        sync_supers();
-        oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval);
+        oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
        start_jif = jiffies;
        next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
        nr_to_write = global_page_state(NR_FILE_DIRTY) +
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe753ecf2aa5..474c7e9dd51a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve;
  static int __meminitdata nr_nodemap_entries;
  static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
  static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
  static unsigned long __initdata required_kernelcore;
  static unsigned long __initdata required_movablecore;
  static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3103,64 +3099,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
 }
 /**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-                unsigned long start_pfn, unsigned long end_pfn)
-{
-        mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-                        "Entering push_node_boundaries(%u, %lu, %lu)\n",
-                        nid, start_pfn, end_pfn);
-        /* Initialise the boundary for this node if necessary */
-        if (node_boundary_end_pfn[nid] == 0)
-                node_boundary_start_pfn[nid] = -1UL;
-        /* Update the boundaries */
-        if (node_boundary_start_pfn[nid] > start_pfn)
-                node_boundary_start_pfn[nid] = start_pfn;
-        if (node_boundary_end_pfn[nid] < end_pfn)
-                node_boundary_end_pfn[nid] = end_pfn;
-}
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-                unsigned long *start_pfn, unsigned long *end_pfn)
-{
-        mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-                        "Entering account_node_boundary(%u, %lu, %lu)\n",
-                        nid, *start_pfn, *end_pfn);
-        /* Return if boundary information has not been provided */
-        if (node_boundary_end_pfn[nid] == 0)
-                return;
-        /* Check the boundaries and update if necessary */
-        if (node_boundary_start_pfn[nid] < *start_pfn)
-                *start_pfn = node_boundary_start_pfn[nid];
-        if (node_boundary_end_pfn[nid] > *end_pfn)
-                *end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-                unsigned long start_pfn, unsigned long end_pfn) {}
-static void __meminit account_node_boundary(unsigned int nid,
-                unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-/**
 * get_pfn_range_for_nid - Return the start and end page frames for a node
 * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
 * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
        if (*start_pfn == -1UL)
                *start_pfn = 0;
-        /* Push the node boundaries out if requested */
-        account_node_boundary(nid, start_pfn, end_pfn);
 }
 /*
@@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void)
 {
        memset(early_node_map, 0, sizeof(early_node_map));
        nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-        memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-        memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 /* Compare two active node_active_regions */
diff --git a/mm/percpu.c b/mm/percpu.c
index 1aa5d8fbca12..c0b2c1a76e81 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -23,7 +23,7 @@
 * Allocation is done in offset-size areas of single unit space.  Ie,
 * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
 * c1:u1, c1:u2 and c1:u3.  Percpu access can be done by configuring
- * percpu base registers UNIT_SIZE apart.
+ * percpu base registers pcpu_unit_size apart.
 *
 * There are usually many small percpu allocations many of them as
 * small as 4 bytes.  The allocator organizes chunks into lists
@@ -38,8 +38,8 @@
 * region and negative allocated.  Allocation inside a chunk is done
 * by scanning this map sequentially and serving the first matching
 * entry.  This is mostly copied from the percpu_modalloc() allocator.
- * Chunks are also linked into a rb tree to ease address to chunk
+ * Chunks can be determined from the address using the index field
- * mapping during free.
+ * in the page struct. The index field contains a pointer to the chunk.
 *
 * To use this allocator, arch code should do the followings.
 *
@@ -61,7 +61,6 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/pfn.h>
-#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
@@ -88,7 +87,6 @@
 struct pcpu_chunk {
        struct list_head        list;           /* linked to pcpu_slot lists */
-        struct rb_node          rb_node;        /* key is chunk->vm->addr */
        int                     free_size;      /* free bytes in the chunk */
        int                     contig_hint;    /* max contiguous size hint */
        struct vm_struct        *vm;            /* mapped vmalloc region */
@@ -110,9 +108,21 @@ static size_t pcpu_chunk_struct_size __read_mostly;
 void *pcpu_base_addr __read_mostly;
 EXPORT_SYMBOL_GPL(pcpu_base_addr);
-/* optional reserved chunk, only accessible for reserved allocations */
+/*
+ * The first chunk which always exists.  Note that unlike other
+ * chunks, this one can be allocated and mapped in several different
+ * ways and thus often doesn't live in the vmalloc area.
+ */
+static struct pcpu_chunk *pcpu_first_chunk;
+/*
+ * Optional reserved chunk.  This chunk reserves part of the first
+ * chunk and serves it for reserved allocations.  The amount of
+ * reserved offset is in pcpu_reserved_chunk_limit.  When reserved
+ * area doesn't exist, the following variables contain NULL and 0
+ * respectively.
+ */
 static struct pcpu_chunk *pcpu_reserved_chunk;
-/* offset limit of the reserved chunk */
 static int pcpu_reserved_chunk_limit;
 /*
@@ -121,7 +131,7 @@ static int pcpu_reserved_chunk_limit;
 * There are two locks - pcpu_alloc_mutex and pcpu_lock.  The former
 * protects allocation/reclaim paths, chunks and chunk->page arrays.
 * The latter is a spinlock and protects the index data structures -
- * chunk slots, rbtree, chunks and area maps in chunks.
+ * chunk slots, chunks and area maps in chunks.
 *
 * During allocation, pcpu_alloc_mutex is kept locked all the time and
 * pcpu_lock is grabbed and released as necessary.  All actual memory
@@ -140,7 +150,6 @@ static DEFINE_MUTEX(pcpu_alloc_mutex);	/* protects whole alloc and reclaim */
 static DEFINE_SPINLOCK(pcpu_lock);      /* protects index data structures */
 static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
-static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */
 /* reclaim work to release fully free chunks, scheduled from free path */
 static void pcpu_reclaim(struct work_struct *work);
@@ -191,6 +200,18 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
        return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
 }
+/* set the pointer to a chunk in a page struct */
+static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
+{
+        page->index = (unsigned long)pcpu;
+}
+/* obtain pointer to a chunk from a page struct */
+static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
+{
+        return (struct pcpu_chunk *)page->index;
+}
 /**
 * pcpu_mem_alloc - allocate memory
 * @size: bytes to allocate
@@ -257,93 +278,26 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
        }
 }
-static struct rb_node **pcpu_chunk_rb_search(void *addr,
-                                             struct rb_node **parentp)
-{
-        struct rb_node **p = &pcpu_addr_root.rb_node;
-        struct rb_node *parent = NULL;
-        struct pcpu_chunk *chunk;
-        while (*p) {
-                parent = *p;
-                chunk = rb_entry(parent, struct pcpu_chunk, rb_node);
-                if (addr < chunk->vm->addr)
-                        p = &(*p)->rb_left;
-                else if (addr > chunk->vm->addr)
-                        p = &(*p)->rb_right;
-                else
-                        break;
-        }
-        if (parentp)
-                *parentp = parent;
-        return p;
-}
 /**
- * pcpu_chunk_addr_search - search for chunk containing specified address
+ * pcpu_chunk_addr_search - determine chunk containing specified address
- * @addr: address to search for
+ * @addr: address for which the chunk needs to be determined.
- *
- * Look for chunk which might contain @addr.  More specifically, it
- * searchs for the chunk with the highest start address which isn't
- * beyond @addr.
- *
- * CONTEXT:
- * pcpu_lock.
 *
 * RETURNS:
 * The address of the found chunk.
 */
 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 {
-        struct rb_node *n, *parent;
+        void *first_start = pcpu_first_chunk->vm->addr;
-        struct pcpu_chunk *chunk;
-        /* is it in the reserved chunk? */
+        /* is it in the first chunk? */
-        if (pcpu_reserved_chunk) {
+        if (addr >= first_start && addr < first_start + pcpu_chunk_size) {
-                void *start = pcpu_reserved_chunk->vm->addr;
+                /* is it in the reserved area? */
+                if (addr < first_start + pcpu_reserved_chunk_limit)
-                if (addr >= start && addr < start + pcpu_reserved_chunk_limit)
                        return pcpu_reserved_chunk;
+                return pcpu_first_chunk;
        }
-        /* nah... search the regular ones */
+        return pcpu_get_page_chunk(vmalloc_to_page(addr));
-        n = *pcpu_chunk_rb_search(addr, &parent);
-        if (!n) {
-                /* no exactly matching chunk, the parent is the closest */
-                n = parent;
-                BUG_ON(!n);
-        }
-        chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-        if (addr < chunk->vm->addr) {
-                /* the parent was the next one, look for the previous one */
-                n = rb_prev(n);
-                BUG_ON(!n);
-                chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-        }
-        return chunk;
-}
-/**
- * pcpu_chunk_addr_insert - insert chunk into address rb tree
- * @new: chunk to insert
- *
- * Insert @new into address rb tree.
- *
- * CONTEXT:
- * pcpu_lock.
- */
-static void pcpu_chunk_addr_insert(struct pcpu_chunk *new)
-{
-        struct rb_node **p, *parent;
-        p = pcpu_chunk_rb_search(new->vm->addr, &parent);
-        BUG_ON(*p);
-        rb_link_node(&new->rb_node, parent, p);
-        rb_insert_color(&new->rb_node, &pcpu_addr_root);
 }
 /**
@@ -755,6 +709,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
                                                  alloc_mask, 0);
                        if (!*pagep)
                                goto err;
+                        pcpu_set_page_chunk(*pagep, chunk);
                }
        }
@@ -879,7 +834,6 @@ restart:
        spin_lock_irq(&pcpu_lock);
        pcpu_chunk_relocate(chunk, -1);
-        pcpu_chunk_addr_insert(chunk);
        goto restart;
 area_found:
@@ -968,7 +922,6 @@ static void pcpu_reclaim(struct work_struct *work)
                if (chunk == list_first_entry(head, struct pcpu_chunk, list))
                        continue;
-                rb_erase(&chunk->rb_node, &pcpu_addr_root);
                list_move(&chunk->list, &todo);
        }
@@ -1147,7 +1100,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
        if (reserved_size) {
                schunk->free_size = reserved_size;
-                pcpu_reserved_chunk = schunk;   /* not for dynamic alloc */
+                pcpu_reserved_chunk = schunk;
+                pcpu_reserved_chunk_limit = static_size + reserved_size;
        } else {
                schunk->free_size = dyn_size;
                dyn_size = 0;                   /* dynamic area covered */
@@ -1158,8 +1112,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
        if (schunk->free_size)
                schunk->map[schunk->map_used++] = schunk->free_size;
-        pcpu_reserved_chunk_limit = static_size + schunk->free_size;
        /* init dynamic chunk if necessary */
        if (dyn_size) {
                dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
@@ -1226,13 +1178,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
        }
        /* link the first chunk in */
-        if (!dchunk) {
+        pcpu_first_chunk = dchunk ?: schunk;
-                pcpu_chunk_relocate(schunk, -1);
+        pcpu_chunk_relocate(pcpu_first_chunk, -1);
-                pcpu_chunk_addr_insert(schunk);
-        } else {
-                pcpu_chunk_relocate(dchunk, -1);
-                pcpu_chunk_addr_insert(dchunk);
-        }
        /* we're done */
        pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
diff --git a/mm/rmap.c b/mm/rmap.c
index 16521664010d..23122af32611 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -14,7 +14,7 @@
 * Original design by Rik van Riel <riel@conectiva.com.br> 2001
 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
- * Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004
+ * Contributions by Hugh Dickins 2003, 2004
 */
 /*
diff --git a/mm/slob.c b/mm/slob.c
index 494f05f19417..9b1737b0787b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -60,6 +60,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/swap.h> /* struct reclaim_state */
 #include <linux/cache.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -255,6 +256,8 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
 static void slob_free_pages(void *b, int order)
 {
+        if (current->reclaim_state)
+                current->reclaim_state->reclaimed_slab += 1 << order;
        free_pages((unsigned long)b, order);
 }
@@ -407,7 +410,7 @@ static void slob_free(void *block, int size)
                spin_unlock_irqrestore(&slob_lock, flags);
                clear_slob_page(sp);
                free_slob_page(sp);
-                free_page((unsigned long)b);
+                slob_free_pages(b, 0);
                return;
        }
diff --git a/mm/slub.c b/mm/slub.c
index ea9e7160e2e7..5e805a6fe36c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -9,6 +9,7 @@
 */
 #include <linux/mm.h>
+#include <linux/swap.h> /* struct reclaim_state */
 #include <linux/module.h>
 #include <linux/bit_spinlock.h>
 #include <linux/interrupt.h>
@@ -1170,6 +1171,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
        __ClearPageSlab(page);
        reset_page_mapcount(page);
+        if (current->reclaim_state)
+                current->reclaim_state->reclaimed_slab += pages;
        __free_pages(page, order);
 }
@@ -1909,7 +1912,7 @@ static inline int calculate_order(int size)
         * Doh this slab cannot be placed using slub_max_order.
         */
        order = slab_order(size, 1, MAX_ORDER, 1);
-        if (order <= MAX_ORDER)
+        if (order < MAX_ORDER)
                return order;
        return -ENOSYS;
 }
@@ -2522,6 +2525,7 @@ __setup("slub_min_order=", setup_slub_min_order);
 static int __init setup_slub_max_order(char *str)
 {
        get_option(&str, &slub_max_order);
+        slub_max_order = min(slub_max_order, MAX_ORDER - 1);
        return 1;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3ecea98ecb45..1416e7e9e02d 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 */
 void __delete_from_swap_cache(struct page *page)
 {
-        swp_entry_t ent = {.val = page_private(page)};
        VM_BUG_ON(!PageLocked(page));
        VM_BUG_ON(!PageSwapCache(page));
        VM_BUG_ON(PageWriteback(page));
@@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page)
        total_swapcache_pages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        INC_CACHE_INFO(del_total);
-        mem_cgroup_uncharge_swapcache(page, ent);
 }
 /**
@@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page)
        __delete_from_swap_cache(page);
        spin_unlock_irq(&swapper_space.tree_lock);
+        mem_cgroup_uncharge_swapcache(page, entry);
        swap_free(entry);
        page_cache_release(page);
 }
diff --git a/mm/truncate.c b/mm/truncate.c
index 55206fab7b99..12e1579f9165 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
        BUG_ON(page_has_private(page));
        __remove_from_page_cache(page);
        spin_unlock_irq(&mapping->tree_lock);
+        mem_cgroup_uncharge_cache_page(page);
        page_cache_release(page);       /* pagecache ref */
        return 1;
 failed:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5fa3eda1f03f..d254306562cd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
                swp_entry_t swap = { .val = page_private(page) };
                __delete_from_swap_cache(page);
                spin_unlock_irq(&mapping->tree_lock);
+                mem_cgroup_uncharge_swapcache(page, swap);
                swap_free(swap);
        } else {
                __remove_from_page_cache(page);
                spin_unlock_irq(&mapping->tree_lock);
+                mem_cgroup_uncharge_cache_page(page);
        }
        return 1;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 66f6130976cb..74d66dba0cbe 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -509,22 +509,11 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
                        continue;
                page = pfn_to_page(pfn);
-#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES
-                /*
+                /* Watch for unexpected holes punched in the memmap */
-                 * Ordinarily, memory holes in flatmem still have a valid
+                if (!memmap_valid_within(pfn, page, zone))
-                 * memmap for the PFN range. However, an architecture for
-                 * embedded systems (e.g. ARM) can free up the memmap backing
-                 * holes to save memory on the assumption the memmap is
-                 * never used. The page_zone linkages are then broken even
-                 * though pfn_valid() returns true. Skip the page if the
-                 * linkages are broken. Even if this test passed, the impact
-                 * is that the counters for the movable type are off but
-                 * fragmentation monitoring is likely meaningless on small
-                 * systems.
-                 */
-                if (page_zone(page) != zone)
                        continue;
-#endif
                mtype = get_pageblock_migratetype(page);
                if (mtype < MIGRATE_TYPES)