11 files changed, 186 insertions, 83 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d3e3bd2ffcea..d213feded10d 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -401,7 +401,7 @@ unsigned long __init free_all_bootmem (void)
        return(free_all_bootmem_core(NODE_DATA(0)));
 }
-void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
 {
        bootmem_data_t *bdata;
        void *ptr;
@@ -409,7 +409,14 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
        list_for_each_entry(bdata, &bdata_list, list)
                if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
                        return(ptr);
+        return NULL;
+}
+void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+{
+        void *mem = __alloc_bootmem_nopanic(size,align,goal);
+        if (mem)
+                return mem;
        /*
         * Whoops, we cannot satisfy the allocation request.
         */
diff --git a/mm/madvise.c b/mm/madvise.c
index af3d573b0141..4e196155a0c3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -168,6 +168,9 @@ static long madvise_remove(struct vm_area_struct *vma,
                        return -EINVAL;
        }
+        if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
+                return -EACCES;
        mapping = vma->vm_file->f_mapping;
        offset = (loff_t)(start - vma->vm_start)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index dec8249e972d..8778f58880c4 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1761,7 +1761,6 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
                md->mapcount_max = count;
        md->node[page_to_nid(page)]++;
-        cond_resched();
 }
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/migrate.c b/mm/migrate.c
index 09f6e4aa87fc..d444229f2599 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -16,8 +16,7 @@
 #include <linux/module.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
-#include <linux/buffer_head.h>  /* for try_to_release_page(),
+#include <linux/buffer_head.h>
-                                        buffer_heads_over_limit */
 #include <linux/mm_inline.h>
 #include <linux/pagevec.h>
 #include <linux/rmap.h>
@@ -28,8 +27,6 @@
 #include "internal.h"
-#include "internal.h"
 /* The maximum number of pages to take off the LRU for migration */
 #define MIGRATE_CHUNK_SIZE 256
@@ -176,7 +173,6 @@ unlock_retry:
 retry:
        return -EAGAIN;
 }
-EXPORT_SYMBOL(swap_page);
 /*
 * Remove references for a page and establish the new page with the correct
@@ -234,7 +230,7 @@ int migrate_page_remove_references(struct page *newpage,
        if (!page_mapping(page) || page_count(page) != nr_refs ||
                        *radix_pointer != page) {
                write_unlock_irq(&mapping->tree_lock);
-                return 1;
+                return -EAGAIN;
        }
        /*
diff --git a/mm/mmap.c b/mm/mmap.c
index e780d19aa214..e6ee12344b13 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -121,14 +121,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
                 * only call if we're about to fail.
                 */
                n = nr_free_pages();
+                /*
+                 * Leave reserved pages. The pages are not for anonymous pages.
+                 */
+                if (n <= totalreserve_pages)
+                        goto error;
+                else
+                        n -= totalreserve_pages;
+                /*
+                 * Leave the last 3% for root
+                 */
                if (!cap_sys_admin)
                        n -= n / 32;
                free += n;
                if (free > pages)
                        return 0;
-                vm_unacct_memory(pages);
-                return -ENOMEM;
+                goto error;
        }
        allowed = (totalram_pages - hugetlb_total_pages())
@@ -150,7 +162,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
         */
        if (atomic_read(&vm_committed_space) < (long)allowed)
                return 0;
+error:
        vm_unacct_memory(pages);
        return -ENOMEM;
@@ -220,6 +232,17 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
        if (brk < mm->end_code)
                goto out;
+        /*
+         * Check against rlimit here. If this check is done later after the test
+         * of oldbrk with newbrk then it can escape the test and let the data
+         * segment grow beyond its set limit the in case where the limit is
+         * not page aligned -Ram Gupta
+         */
+        rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+        if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+                goto out;
        newbrk = PAGE_ALIGN(brk);
        oldbrk = PAGE_ALIGN(mm->brk);
        if (oldbrk == newbrk)
@@ -232,11 +255,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
                goto out;
        }
-        /* Check against rlimit.. */
-        rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
-        if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
-                goto out;
        /* Check against existing mmap mappings. */
        if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
                goto out;
diff --git a/mm/nommu.c b/mm/nommu.c
index db45efac17cc..029fadac0fb5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1147,14 +1147,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
                 * only call if we're about to fail.
                 */
                n = nr_free_pages();
+                /*
+                 * Leave reserved pages. The pages are not for anonymous pages.
+                 */
+                if (n <= totalreserve_pages)
+                        goto error;
+                else
+                        n -= totalreserve_pages;
+                /*
+                 * Leave the last 3% for root
+                 */
                if (!cap_sys_admin)
                        n -= n / 32;
                free += n;
                if (free > pages)
                        return 0;
-                vm_unacct_memory(pages);
-                return -ENOMEM;
+                goto error;
        }
        allowed = totalram_pages * sysctl_overcommit_ratio / 100;
@@ -1175,7 +1187,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
         */
        if (atomic_read(&vm_committed_space) < (long)allowed)
                return 0;
+error:
        vm_unacct_memory(pages);
        return -ENOMEM;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 78747afad6b0..042e6436c3ee 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -46,15 +46,25 @@
 unsigned long badness(struct task_struct *p, unsigned long uptime)
 {
        unsigned long points, cpu_time, run_time, s;
-        struct list_head *tsk;
+        struct mm_struct *mm;
+        struct task_struct *child;
-        if (!p->mm)
+        task_lock(p);
+        mm = p->mm;
+        if (!mm) {
+                task_unlock(p);
                return 0;
+        }
        /*
         * The memory size of the process is the basis for the badness.
         */
-        points = p->mm->total_vm;
+        points = mm->total_vm;
+        /*
+         * After this unlock we can no longer dereference local variable `mm'
+         */
+        task_unlock(p);
        /*
         * Processes which fork a lot of child processes are likely
@@ -64,11 +74,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
         * child is eating the vast majority of memory, adding only half
         * to the parents will make the child our kill candidate of choice.
         */
-        list_for_each(tsk, &p->children) {
+        list_for_each_entry(child, &p->children, sibling) {
-                struct task_struct *chld;
+                task_lock(child);
-                chld = list_entry(tsk, struct task_struct, sibling);
+                if (child->mm != mm && child->mm)
-                if (chld->mm != p->mm && chld->mm)
+                        points += child->mm->total_vm/2 + 1;
-                        points += chld->mm->total_vm/2 + 1;
+                task_unlock(child);
        }
        /*
@@ -244,17 +254,24 @@ static void __oom_kill_task(task_t *p, const char *message)
        force_sig(SIGKILL, p);
 }
-static struct mm_struct *oom_kill_task(task_t *p, const char *message)
+static int oom_kill_task(task_t *p, const char *message)
 {
-        struct mm_struct *mm = get_task_mm(p);
+        struct mm_struct *mm;
        task_t * g, * q;
-        if (!mm)
+        mm = p->mm;
-                return NULL;
-        if (mm == &init_mm) {
+        /* WARNING: mm may not be dereferenced since we did not obtain its
-                mmput(mm);
+         * value from get_task_mm(p).  This is OK since all we need to do is
-                return NULL;
+         * compare mm to q->mm below.
-        }
+         *
+         * Furthermore, even if mm contains a non-NULL value, p->mm may
+         * change to NULL at any time since we do not hold task_lock(p).
+         * However, this is of no concern to us.
+         */
+        if (mm == NULL || mm == &init_mm)
+                return 1;
        __oom_kill_task(p, message);
        /*
@@ -266,13 +283,12 @@ static struct mm_struct *oom_kill_task(task_t *p, const char *message)
                        __oom_kill_task(q, message);
        while_each_thread(g, q);
-        return mm;
+        return 0;
 }
-static struct mm_struct *oom_kill_process(struct task_struct *p,
+static int oom_kill_process(struct task_struct *p, unsigned long points,
-                                unsigned long points, const char *message)
+                const char *message)
 {
-        struct mm_struct *mm;
        struct task_struct *c;
        struct list_head *tsk;
@@ -283,9 +299,8 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
                c = list_entry(tsk, struct task_struct, sibling);
                if (c->mm == p->mm)
                        continue;
-                mm = oom_kill_task(c, message);
+                if (!oom_kill_task(c, message))
-                if (mm)
+                        return 0;
-                        return mm;
        }
        return oom_kill_task(p, message);
 }
@@ -300,7 +315,6 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
 */
 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
-        struct mm_struct *mm = NULL;
        task_t *p;
        unsigned long points = 0;
@@ -320,12 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
         */
        switch (constrained_alloc(zonelist, gfp_mask)) {
        case CONSTRAINT_MEMORY_POLICY:
-                mm = oom_kill_process(current, points,
+                oom_kill_process(current, points,
                                "No available memory (MPOL_BIND)");
                break;
        case CONSTRAINT_CPUSET:
-                mm = oom_kill_process(current, points,
+                oom_kill_process(current, points,
                                "No available memory in cpuset");
                break;
@@ -347,8 +361,7 @@ retry:
                        panic("Out of memory and no killable processes...\n");
                }
-                mm = oom_kill_process(p, points, "Out of memory");
+                if (oom_kill_process(p, points, "Out of memory"))
-                if (!mm)
                        goto retry;
                break;
@@ -357,8 +370,6 @@ retry:
 out:
        read_unlock(&tasklist_lock);
        cpuset_unlock();
-        if (mm)
-                mmput(mm);
        /*
         * Give "p" a good chance of killing itself before we
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6dcce3a4bbdc..75d7f48b79bb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -72,13 +72,12 @@ int dirty_background_ratio = 10;
 int vm_dirty_ratio = 40;
 /*
- * The interval between `kupdate'-style writebacks, in centiseconds
+ * The interval between `kupdate'-style writebacks, in jiffies
- * (hundredths of a second)
 */
 int dirty_writeback_interval = 5 * HZ;
 /*
- * The longest number of centiseconds for which data is allowed to remain dirty
+ * The longest number of jiffies for which data is allowed to remain dirty
 */
 int dirty_expire_interval = 30 * HZ;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc523a1f270d..123c60586740 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
 int percpu_pagelist_fraction;
@@ -151,7 +152,8 @@ static void bad_page(struct page *page)
                        1 << PG_reclaim |
                        1 << PG_slab    |
                        1 << PG_swapcache |
-                        1 << PG_writeback );
+                        1 << PG_writeback |
+                        1 << PG_buddy );
        set_page_count(page, 0);
        reset_page_mapcount(page);
        page->mapping = NULL;
@@ -230,18 +232,20 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 * zone->lock is already acquired when we use these.
 * So, we don't need atomic page->flags operations here.
 */
-static inline unsigned long page_order(struct page *page) {
+static inline unsigned long page_order(struct page *page)
+{
        return page_private(page);
 }
-static inline void set_page_order(struct page *page, int order) {
+static inline void set_page_order(struct page *page, int order)
+{
        set_page_private(page, order);
-        __SetPagePrivate(page);
+        __SetPageBuddy(page);
 }
 static inline void rmv_page_order(struct page *page)
 {
-        __ClearPagePrivate(page);
+        __ClearPageBuddy(page);
        set_page_private(page, 0);
 }
@@ -280,11 +284,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
 * This function checks whether a page is free && is the buddy
 * we can do coalesce a page and its buddy if
 * (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
+ * (b) the buddy is in the buddy system &&
- * (c) the buddy is on the buddy system &&
+ * (c) a page and its buddy have the same order.
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
 *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
+ *
+ * For recording page's order, we use page_private(page).
 */
 static inline int page_is_buddy(struct page *page, int order)
 {
@@ -293,11 +299,11 @@ static inline int page_is_buddy(struct page *page, int order)
                return 0;
 #endif
-       if (PagePrivate(page)           &&
+        if (PageBuddy(page) && page_order(page) == order) {
-           (page_order(page) == order) &&
+                BUG_ON(page_count(page) != 0);
-            page_count(page) == 0)
+                return 1;
-               return 1;
+        }
-       return 0;
+        return 0;
 }
 /*
@@ -313,7 +319,7 @@ static inline int page_is_buddy(struct page *page, int order)
 * as necessary, plus some accounting needed to play nicely with other
 * parts of the VM system.
 * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
 * order is recorded in page_private(page) field.
 * So when we are allocating or freeing one, we can derive the state of the
 * other.  That is, if we allocate a small block, and both were   
@@ -376,7 +382,8 @@ static inline int free_pages_check(struct page *page)
                        1 << PG_slab    |
                        1 << PG_swapcache |
                        1 << PG_writeback |
-                        1 << PG_reserved ))))
+                        1 << PG_reserved |
+                        1 << PG_buddy ))))
                bad_page(page);
        if (PageDirty(page))
                __ClearPageDirty(page);
@@ -524,7 +531,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
                        1 << PG_slab    |
                        1 << PG_swapcache |
                        1 << PG_writeback |
-                        1 << PG_reserved ))))
+                        1 << PG_reserved |
+                        1 << PG_buddy ))))
                bad_page(page);
        /*
@@ -2472,6 +2480,38 @@ void __init page_alloc_init(void)
 }
 /*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ *      or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+        struct pglist_data *pgdat;
+        unsigned long reserve_pages = 0;
+        int i, j;
+        for_each_online_pgdat(pgdat) {
+                for (i = 0; i < MAX_NR_ZONES; i++) {
+                        struct zone *zone = pgdat->node_zones + i;
+                        unsigned long max = 0;
+                        /* Find valid and maximum lowmem_reserve in the zone */
+                        for (j = i; j < MAX_NR_ZONES; j++) {
+                                if (zone->lowmem_reserve[j] > max)
+                                        max = zone->lowmem_reserve[j];
+                        }
+                        /* we treat pages_high as reserved pages. */
+                        max += zone->pages_high;
+                        if (max > zone->present_pages)
+                                max = zone->present_pages;
+                        reserve_pages += max;
+                }
+        }
+        totalreserve_pages = reserve_pages;
+}
+/*
 * setup_per_zone_lowmem_reserve - called whenever
 *      sysctl_lower_zone_reserve_ratio changes.  Ensures that each zone
 *      has a correct pages reserved value, so an adequate number of
@@ -2502,6 +2542,9 @@ static void setup_per_zone_lowmem_reserve(void)
                        }
                }
        }
+        /* update totalreserve_pages */
+        calculate_totalreserve_pages();
 }
 /*
@@ -2556,6 +2599,9 @@ void setup_per_zone_pages_min(void)
                zone->pages_high  = zone->pages_min + tmp / 2;
                spin_unlock_irqrestore(&zone->lru_lock, flags);
        }
+        /* update totalreserve_pages */
+        calculate_totalreserve_pages();
 }
 /*
diff --git a/mm/slab.c b/mm/slab.c
index f055c1420216..e6ef9bd52335 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -420,6 +420,7 @@ struct kmem_cache {
        unsigned long max_freeable;
        unsigned long node_allocs;
        unsigned long node_frees;
+        unsigned long node_overflow;
        atomic_t allochit;
        atomic_t allocmiss;
        atomic_t freehit;
@@ -465,6 +466,7 @@ struct kmem_cache {
 #define STATS_INC_ERR(x)        ((x)->errors++)
 #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
 #define STATS_INC_NODEFREES(x)  ((x)->node_frees++)
+#define STATS_INC_ACOVERFLOW(x)   ((x)->node_overflow++)
 #define STATS_SET_FREEABLE(x, i)                                        \
        do {                                                            \
                if ((x)->max_freeable < i)                              \
@@ -484,6 +486,7 @@ struct kmem_cache {
 #define STATS_INC_ERR(x)        do { } while (0)
 #define STATS_INC_NODEALLOCS(x) do { } while (0)
 #define STATS_INC_NODEFREES(x)  do { } while (0)
+#define STATS_INC_ACOVERFLOW(x)   do { } while (0)
 #define STATS_SET_FREEABLE(x, i) do { } while (0)
 #define STATS_INC_ALLOCHIT(x)   do { } while (0)
 #define STATS_INC_ALLOCMISS(x)  do { } while (0)
@@ -1453,7 +1456,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
        int i;
        flags |= cachep->gfpflags;
+#ifndef CONFIG_MMU
+        /* nommu uses slab's for process anonymous memory allocations, so
+         * requires __GFP_COMP to properly refcount higher order allocations"
+         */
+        page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder);
+#else
        page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+#endif
        if (!page)
                return NULL;
        addr = page_address(page);
@@ -2318,13 +2328,15 @@ EXPORT_SYMBOL(kmem_cache_destroy);
 /* Get the memory for a slab management obj. */
 static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
-                                   int colour_off, gfp_t local_flags)
+                                   int colour_off, gfp_t local_flags,
+                                   int nodeid)
 {
        struct slab *slabp;
        if (OFF_SLAB(cachep)) {
                /* Slab management obj is off-slab. */
-                slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+                slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+                                              local_flags, nodeid);
                if (!slabp)
                        return NULL;
        } else {
@@ -2334,6 +2346,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
        slabp->inuse = 0;
        slabp->colouroff = colour_off;
        slabp->s_mem = objp + colour_off;
+        slabp->nodeid = nodeid;
        return slabp;
 }
@@ -2519,7 +2532,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                goto failed;
        /* Get slab management. */
-        slabp = alloc_slabmgmt(cachep, objp, offset, local_flags);
+        slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid);
        if (!slabp)
                goto opps1;
@@ -3080,9 +3093,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
                        if (l3->alien && l3->alien[nodeid]) {
                                alien = l3->alien[nodeid];
                                spin_lock(&alien->lock);
-                                if (unlikely(alien->avail == alien->limit))
+                                if (unlikely(alien->avail == alien->limit)) {
+                                        STATS_INC_ACOVERFLOW(cachep);
                                        __drain_alien_cache(cachep,
                                                            alien, nodeid);
+                                }
                                alien->entry[alien->avail++] = objp;
                                spin_unlock(&alien->lock);
                        } else {
@@ -3760,7 +3775,7 @@ static void print_slabinfo_header(struct seq_file *m)
        seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
 #if STATS
        seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
-                 "<error> <maxfreeable> <nodeallocs> <remotefrees>");
+                 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
        seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
 #endif
        seq_putc(m, '\n');
@@ -3874,11 +3889,12 @@ static int s_show(struct seq_file *m, void *p)
                unsigned long max_freeable = cachep->max_freeable;
                unsigned long node_allocs = cachep->node_allocs;
                unsigned long node_frees = cachep->node_frees;
+                unsigned long overflows = cachep->node_overflow;
                seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
-                                %4lu %4lu %4lu %4lu", allocs, high, grown,
+                                %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
                                reaped, errors, max_freeable, node_allocs,
-                                node_frees);
+                                node_frees, overflows);
        }
        /* cpu stats */
        {
diff --git a/mm/slob.c b/mm/slob.c
index 9bcc7e2cabfd..a68255ba4553 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -354,9 +354,7 @@ void *__alloc_percpu(size_t size)
        if (!pdata)
                return NULL;
-        for (i = 0; i < NR_CPUS; i++) {
+        for_each_possible_cpu(i) {
-                if (!cpu_possible(i))
-                        continue;
                pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
                if (!pdata->ptrs[i])
                        goto unwind_oom;
@@ -383,11 +381,9 @@ free_percpu(const void *objp)
        int i;
        struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
-        for (i = 0; i < NR_CPUS; i++) {
+        for_each_possible_cpu(i)
-                if (!cpu_possible(i))
-                        continue;
                kfree(p->ptrs[i]);
-        }
        kfree(p);
 }
 EXPORT_SYMBOL(free_percpu);