aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/memory.c14
-rw-r--r--mm/migrate.c38
-rw-r--r--mm/mmu_context.c3
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c202
-rw-r--r--mm/percpu.c36
-rw-r--r--mm/vmstat.c15
9 files changed, 129 insertions, 187 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index e4a33b9479b2..9c61158308dc 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -199,7 +199,7 @@ config BOUNCE
199config NR_QUICK 199config NR_QUICK
200 int 200 int
201 depends on QUICKLIST 201 depends on QUICKLIST
202 default "2" if SUPERH || AVR32 202 default "2" if AVR32
203 default "1" 203 default "1"
204 204
205config VIRT_TO_BUS 205config VIRT_TO_BUS
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2d16fa6b8c2d..3a5aeb37c110 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2087,7 +2087,7 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
2087 2087
2088 entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); 2088 entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep)));
2089 if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) { 2089 if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) {
2090 update_mmu_cache(vma, address, entry); 2090 update_mmu_cache(vma, address, ptep);
2091 } 2091 }
2092} 2092}
2093 2093
@@ -2558,7 +2558,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2558 entry = pte_mkyoung(entry); 2558 entry = pte_mkyoung(entry);
2559 if (huge_ptep_set_access_flags(vma, address, ptep, entry, 2559 if (huge_ptep_set_access_flags(vma, address, ptep, entry,
2560 flags & FAULT_FLAG_WRITE)) 2560 flags & FAULT_FLAG_WRITE))
2561 update_mmu_cache(vma, address, entry); 2561 update_mmu_cache(vma, address, ptep);
2562 2562
2563out_page_table_lock: 2563out_page_table_lock:
2564 spin_unlock(&mm->page_table_lock); 2564 spin_unlock(&mm->page_table_lock);
diff --git a/mm/memory.c b/mm/memory.c
index 09e4b1be7b67..72fb5f39bccc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1593,7 +1593,7 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1593 /* Ok, finally just insert the thing.. */ 1593 /* Ok, finally just insert the thing.. */
1594 entry = pte_mkspecial(pfn_pte(pfn, prot)); 1594 entry = pte_mkspecial(pfn_pte(pfn, prot));
1595 set_pte_at(mm, addr, pte, entry); 1595 set_pte_at(mm, addr, pte, entry);
1596 update_mmu_cache(vma, addr, entry); /* XXX: why not for insert_page? */ 1596 update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
1597 1597
1598 retval = 0; 1598 retval = 0;
1599out_unlock: 1599out_unlock:
@@ -2116,7 +2116,7 @@ reuse:
2116 entry = pte_mkyoung(orig_pte); 2116 entry = pte_mkyoung(orig_pte);
2117 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2117 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2118 if (ptep_set_access_flags(vma, address, page_table, entry,1)) 2118 if (ptep_set_access_flags(vma, address, page_table, entry,1))
2119 update_mmu_cache(vma, address, entry); 2119 update_mmu_cache(vma, address, page_table);
2120 ret |= VM_FAULT_WRITE; 2120 ret |= VM_FAULT_WRITE;
2121 goto unlock; 2121 goto unlock;
2122 } 2122 }
@@ -2185,7 +2185,7 @@ gotten:
2185 * new page to be mapped directly into the secondary page table. 2185 * new page to be mapped directly into the secondary page table.
2186 */ 2186 */
2187 set_pte_at_notify(mm, address, page_table, entry); 2187 set_pte_at_notify(mm, address, page_table, entry);
2188 update_mmu_cache(vma, address, entry); 2188 update_mmu_cache(vma, address, page_table);
2189 if (old_page) { 2189 if (old_page) {
2190 /* 2190 /*
2191 * Only after switching the pte to the new page may 2191 * Only after switching the pte to the new page may
@@ -2629,7 +2629,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2629 } 2629 }
2630 2630
2631 /* No need to invalidate - it was non-present before */ 2631 /* No need to invalidate - it was non-present before */
2632 update_mmu_cache(vma, address, pte); 2632 update_mmu_cache(vma, address, page_table);
2633unlock: 2633unlock:
2634 pte_unmap_unlock(page_table, ptl); 2634 pte_unmap_unlock(page_table, ptl);
2635out: 2635out:
@@ -2694,7 +2694,7 @@ setpte:
2694 set_pte_at(mm, address, page_table, entry); 2694 set_pte_at(mm, address, page_table, entry);
2695 2695
2696 /* No need to invalidate - it was non-present before */ 2696 /* No need to invalidate - it was non-present before */
2697 update_mmu_cache(vma, address, entry); 2697 update_mmu_cache(vma, address, page_table);
2698unlock: 2698unlock:
2699 pte_unmap_unlock(page_table, ptl); 2699 pte_unmap_unlock(page_table, ptl);
2700 return 0; 2700 return 0;
@@ -2855,7 +2855,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2855 set_pte_at(mm, address, page_table, entry); 2855 set_pte_at(mm, address, page_table, entry);
2856 2856
2857 /* no need to invalidate: a not-present page won't be cached */ 2857 /* no need to invalidate: a not-present page won't be cached */
2858 update_mmu_cache(vma, address, entry); 2858 update_mmu_cache(vma, address, page_table);
2859 } else { 2859 } else {
2860 if (charged) 2860 if (charged)
2861 mem_cgroup_uncharge_page(page); 2861 mem_cgroup_uncharge_page(page);
@@ -2992,7 +2992,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2992 } 2992 }
2993 entry = pte_mkyoung(entry); 2993 entry = pte_mkyoung(entry);
2994 if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { 2994 if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {
2995 update_mmu_cache(vma, address, entry); 2995 update_mmu_cache(vma, address, pte);
2996 } else { 2996 } else {
2997 /* 2997 /*
2998 * This is needed only for protection faults but the arch code 2998 * This is needed only for protection faults but the arch code
diff --git a/mm/migrate.c b/mm/migrate.c
index 9a0db5bbabe4..edb6101ed774 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -134,7 +134,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
134 page_add_file_rmap(new); 134 page_add_file_rmap(new);
135 135
136 /* No need to invalidate - it was non-present before */ 136 /* No need to invalidate - it was non-present before */
137 update_mmu_cache(vma, addr, pte); 137 update_mmu_cache(vma, addr, ptep);
138unlock: 138unlock:
139 pte_unmap_unlock(ptep, ptl); 139 pte_unmap_unlock(ptep, ptl);
140out: 140out:
@@ -1002,33 +1002,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
1002#define DO_PAGES_STAT_CHUNK_NR 16 1002#define DO_PAGES_STAT_CHUNK_NR 16
1003 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; 1003 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
1004 int chunk_status[DO_PAGES_STAT_CHUNK_NR]; 1004 int chunk_status[DO_PAGES_STAT_CHUNK_NR];
1005 unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1006 int err;
1007 1005
1008 for (i = 0; i < nr_pages; i += chunk_nr) { 1006 while (nr_pages) {
1009 if (chunk_nr > nr_pages - i) 1007 unsigned long chunk_nr;
1010 chunk_nr = nr_pages - i;
1011 1008
1012 err = copy_from_user(chunk_pages, &pages[i], 1009 chunk_nr = nr_pages;
1013 chunk_nr * sizeof(*chunk_pages)); 1010 if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
1014 if (err) { 1011 chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1015 err = -EFAULT; 1012
1016 goto out; 1013 if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
1017 } 1014 break;
1018 1015
1019 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); 1016 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
1020 1017
1021 err = copy_to_user(&status[i], chunk_status, 1018 if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
1022 chunk_nr * sizeof(*chunk_status)); 1019 break;
1023 if (err) {
1024 err = -EFAULT;
1025 goto out;
1026 }
1027 }
1028 err = 0;
1029 1020
1030out: 1021 pages += chunk_nr;
1031 return err; 1022 status += chunk_nr;
1023 nr_pages -= chunk_nr;
1024 }
1025 return nr_pages ? -EFAULT : 0;
1032} 1026}
1033 1027
1034/* 1028/*
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index ded9081f4021..0777654147c9 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -5,6 +5,7 @@
5 5
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/mmu_context.h> 7#include <linux/mmu_context.h>
8#include <linux/module.h>
8#include <linux/sched.h> 9#include <linux/sched.h>
9 10
10#include <asm/mmu_context.h> 11#include <asm/mmu_context.h>
@@ -37,6 +38,7 @@ void use_mm(struct mm_struct *mm)
37 if (active_mm != mm) 38 if (active_mm != mm)
38 mmdrop(active_mm); 39 mmdrop(active_mm);
39} 40}
41EXPORT_SYMBOL_GPL(use_mm);
40 42
41/* 43/*
42 * unuse_mm 44 * unuse_mm
@@ -56,3 +58,4 @@ void unuse_mm(struct mm_struct *mm)
56 enter_lazy_tlb(mm, tsk); 58 enter_lazy_tlb(mm, tsk);
57 task_unlock(tsk); 59 task_unlock(tsk);
58} 60}
61EXPORT_SYMBOL_GPL(unuse_mm);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f52481b1c1e5..237050478f28 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -459,6 +459,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
459 list_for_each_entry(c, &p->children, sibling) { 459 list_for_each_entry(c, &p->children, sibling) {
460 if (c->mm == p->mm) 460 if (c->mm == p->mm)
461 continue; 461 continue;
462 if (mem && !task_in_mem_cgroup(c, mem))
463 continue;
462 if (!oom_kill_task(c)) 464 if (!oom_kill_task(c))
463 return 0; 465 return 0;
464 } 466 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1fa93bd2bb9f..a6b17aa4740b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1009,10 +1009,10 @@ static void drain_pages(unsigned int cpu)
1009 struct per_cpu_pageset *pset; 1009 struct per_cpu_pageset *pset;
1010 struct per_cpu_pages *pcp; 1010 struct per_cpu_pages *pcp;
1011 1011
1012 pset = zone_pcp(zone, cpu); 1012 local_irq_save(flags);
1013 pset = per_cpu_ptr(zone->pageset, cpu);
1013 1014
1014 pcp = &pset->pcp; 1015 pcp = &pset->pcp;
1015 local_irq_save(flags);
1016 free_pcppages_bulk(zone, pcp->count, pcp); 1016 free_pcppages_bulk(zone, pcp->count, pcp);
1017 pcp->count = 0; 1017 pcp->count = 0;
1018 local_irq_restore(flags); 1018 local_irq_restore(flags);
@@ -1096,7 +1096,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1096 arch_free_page(page, 0); 1096 arch_free_page(page, 0);
1097 kernel_map_pages(page, 1, 0); 1097 kernel_map_pages(page, 1, 0);
1098 1098
1099 pcp = &zone_pcp(zone, get_cpu())->pcp;
1100 migratetype = get_pageblock_migratetype(page); 1099 migratetype = get_pageblock_migratetype(page);
1101 set_page_private(page, migratetype); 1100 set_page_private(page, migratetype);
1102 local_irq_save(flags); 1101 local_irq_save(flags);
@@ -1119,6 +1118,7 @@ static void free_hot_cold_page(struct page *page, int cold)
1119 migratetype = MIGRATE_MOVABLE; 1118 migratetype = MIGRATE_MOVABLE;
1120 } 1119 }
1121 1120
1121 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1122 if (cold) 1122 if (cold)
1123 list_add_tail(&page->lru, &pcp->lists[migratetype]); 1123 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1124 else 1124 else
@@ -1131,7 +1131,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1131 1131
1132out: 1132out:
1133 local_irq_restore(flags); 1133 local_irq_restore(flags);
1134 put_cpu();
1135} 1134}
1136 1135
1137void free_hot_page(struct page *page) 1136void free_hot_page(struct page *page)
@@ -1181,17 +1180,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
1181 unsigned long flags; 1180 unsigned long flags;
1182 struct page *page; 1181 struct page *page;
1183 int cold = !!(gfp_flags & __GFP_COLD); 1182 int cold = !!(gfp_flags & __GFP_COLD);
1184 int cpu;
1185 1183
1186again: 1184again:
1187 cpu = get_cpu();
1188 if (likely(order == 0)) { 1185 if (likely(order == 0)) {
1189 struct per_cpu_pages *pcp; 1186 struct per_cpu_pages *pcp;
1190 struct list_head *list; 1187 struct list_head *list;
1191 1188
1192 pcp = &zone_pcp(zone, cpu)->pcp;
1193 list = &pcp->lists[migratetype];
1194 local_irq_save(flags); 1189 local_irq_save(flags);
1190 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1191 list = &pcp->lists[migratetype];
1195 if (list_empty(list)) { 1192 if (list_empty(list)) {
1196 pcp->count += rmqueue_bulk(zone, 0, 1193 pcp->count += rmqueue_bulk(zone, 0,
1197 pcp->batch, list, 1194 pcp->batch, list,
@@ -1232,7 +1229,6 @@ again:
1232 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1229 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1233 zone_statistics(preferred_zone, zone); 1230 zone_statistics(preferred_zone, zone);
1234 local_irq_restore(flags); 1231 local_irq_restore(flags);
1235 put_cpu();
1236 1232
1237 VM_BUG_ON(bad_range(zone, page)); 1233 VM_BUG_ON(bad_range(zone, page));
1238 if (prep_new_page(page, order, gfp_flags)) 1234 if (prep_new_page(page, order, gfp_flags))
@@ -1241,7 +1237,6 @@ again:
1241 1237
1242failed: 1238failed:
1243 local_irq_restore(flags); 1239 local_irq_restore(flags);
1244 put_cpu();
1245 return NULL; 1240 return NULL;
1246} 1241}
1247 1242
@@ -2180,7 +2175,7 @@ void show_free_areas(void)
2180 for_each_online_cpu(cpu) { 2175 for_each_online_cpu(cpu) {
2181 struct per_cpu_pageset *pageset; 2176 struct per_cpu_pageset *pageset;
2182 2177
2183 pageset = zone_pcp(zone, cpu); 2178 pageset = per_cpu_ptr(zone->pageset, cpu);
2184 2179
2185 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", 2180 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
2186 cpu, pageset->pcp.high, 2181 cpu, pageset->pcp.high,
@@ -2745,10 +2740,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)
2745 2740
2746#endif /* CONFIG_NUMA */ 2741#endif /* CONFIG_NUMA */
2747 2742
2743/*
2744 * Boot pageset table. One per cpu which is going to be used for all
2745 * zones and all nodes. The parameters will be set in such a way
2746 * that an item put on a list will immediately be handed over to
2747 * the buddy list. This is safe since pageset manipulation is done
2748 * with interrupts disabled.
2749 *
2750 * The boot_pagesets must be kept even after bootup is complete for
2751 * unused processors and/or zones. They do play a role for bootstrapping
2752 * hotplugged processors.
2753 *
2754 * zoneinfo_show() and maybe other functions do
2755 * not check if the processor is online before following the pageset pointer.
2756 * Other parts of the kernel may not check if the zone is available.
2757 */
2758static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
2759static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
2760
2748/* return values int ....just for stop_machine() */ 2761/* return values int ....just for stop_machine() */
2749static int __build_all_zonelists(void *dummy) 2762static int __build_all_zonelists(void *dummy)
2750{ 2763{
2751 int nid; 2764 int nid;
2765 int cpu;
2752 2766
2753#ifdef CONFIG_NUMA 2767#ifdef CONFIG_NUMA
2754 memset(node_load, 0, sizeof(node_load)); 2768 memset(node_load, 0, sizeof(node_load));
@@ -2759,6 +2773,23 @@ static int __build_all_zonelists(void *dummy)
2759 build_zonelists(pgdat); 2773 build_zonelists(pgdat);
2760 build_zonelist_cache(pgdat); 2774 build_zonelist_cache(pgdat);
2761 } 2775 }
2776
2777 /*
2778 * Initialize the boot_pagesets that are going to be used
2779 * for bootstrapping processors. The real pagesets for
2780 * each zone will be allocated later when the per cpu
2781 * allocator is available.
2782 *
2783 * boot_pagesets are used also for bootstrapping offline
2784 * cpus if the system is already booted because the pagesets
2785 * are needed to initialize allocators on a specific cpu too.
2786 * F.e. the percpu allocator needs the page allocator which
2787 * needs the percpu allocator in order to allocate its pagesets
2788 * (a chicken-egg dilemma).
2789 */
2790 for_each_possible_cpu(cpu)
2791 setup_pageset(&per_cpu(boot_pageset, cpu), 0);
2792
2762 return 0; 2793 return 0;
2763} 2794}
2764 2795
@@ -3096,121 +3127,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
3096 pcp->batch = PAGE_SHIFT * 8; 3127 pcp->batch = PAGE_SHIFT * 8;
3097} 3128}
3098 3129
3099
3100#ifdef CONFIG_NUMA
3101/*
3102 * Boot pageset table. One per cpu which is going to be used for all
3103 * zones and all nodes. The parameters will be set in such a way
3104 * that an item put on a list will immediately be handed over to
3105 * the buddy list. This is safe since pageset manipulation is done
3106 * with interrupts disabled.
3107 *
3108 * Some NUMA counter updates may also be caught by the boot pagesets.
3109 *
3110 * The boot_pagesets must be kept even after bootup is complete for
3111 * unused processors and/or zones. They do play a role for bootstrapping
3112 * hotplugged processors.
3113 *
3114 * zoneinfo_show() and maybe other functions do
3115 * not check if the processor is online before following the pageset pointer.
3116 * Other parts of the kernel may not check if the zone is available.
3117 */
3118static struct per_cpu_pageset boot_pageset[NR_CPUS];
3119
3120/* 3130/*
3121 * Dynamically allocate memory for the 3131 * Allocate per cpu pagesets and initialize them.
3122 * per cpu pageset array in struct zone. 3132 * Before this call only boot pagesets were available.
3133 * Boot pagesets will no longer be used by this processorr
3134 * after setup_per_cpu_pageset().
3123 */ 3135 */
3124static int __cpuinit process_zones(int cpu) 3136void __init setup_per_cpu_pageset(void)
3125{ 3137{
3126 struct zone *zone, *dzone; 3138 struct zone *zone;
3127 int node = cpu_to_node(cpu); 3139 int cpu;
3128
3129 node_set_state(node, N_CPU); /* this node has a cpu */
3130 3140
3131 for_each_populated_zone(zone) { 3141 for_each_populated_zone(zone) {
3132 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), 3142 zone->pageset = alloc_percpu(struct per_cpu_pageset);
3133 GFP_KERNEL, node);
3134 if (!zone_pcp(zone, cpu))
3135 goto bad;
3136
3137 setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
3138
3139 if (percpu_pagelist_fraction)
3140 setup_pagelist_highmark(zone_pcp(zone, cpu),
3141 (zone->present_pages / percpu_pagelist_fraction));
3142 }
3143
3144 return 0;
3145bad:
3146 for_each_zone(dzone) {
3147 if (!populated_zone(dzone))
3148 continue;
3149 if (dzone == zone)
3150 break;
3151 kfree(zone_pcp(dzone, cpu));
3152 zone_pcp(dzone, cpu) = &boot_pageset[cpu];
3153 }
3154 return -ENOMEM;
3155}
3156 3143
3157static inline void free_zone_pagesets(int cpu) 3144 for_each_possible_cpu(cpu) {
3158{ 3145 struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
3159 struct zone *zone;
3160
3161 for_each_zone(zone) {
3162 struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
3163 3146
3164 /* Free per_cpu_pageset if it is slab allocated */ 3147 setup_pageset(pcp, zone_batchsize(zone));
3165 if (pset != &boot_pageset[cpu])
3166 kfree(pset);
3167 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3168 }
3169}
3170 3148
3171static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, 3149 if (percpu_pagelist_fraction)
3172 unsigned long action, 3150 setup_pagelist_highmark(pcp,
3173 void *hcpu) 3151 (zone->present_pages /
3174{ 3152 percpu_pagelist_fraction));
3175 int cpu = (long)hcpu; 3153 }
3176 int ret = NOTIFY_OK;
3177
3178 switch (action) {
3179 case CPU_UP_PREPARE:
3180 case CPU_UP_PREPARE_FROZEN:
3181 if (process_zones(cpu))
3182 ret = NOTIFY_BAD;
3183 break;
3184 case CPU_UP_CANCELED:
3185 case CPU_UP_CANCELED_FROZEN:
3186 case CPU_DEAD:
3187 case CPU_DEAD_FROZEN:
3188 free_zone_pagesets(cpu);
3189 break;
3190 default:
3191 break;
3192 } 3154 }
3193 return ret;
3194} 3155}
3195 3156
3196static struct notifier_block __cpuinitdata pageset_notifier =
3197 { &pageset_cpuup_callback, NULL, 0 };
3198
3199void __init setup_per_cpu_pageset(void)
3200{
3201 int err;
3202
3203 /* Initialize per_cpu_pageset for cpu 0.
3204 * A cpuup callback will do this for every cpu
3205 * as it comes online
3206 */
3207 err = process_zones(smp_processor_id());
3208 BUG_ON(err);
3209 register_cpu_notifier(&pageset_notifier);
3210}
3211
3212#endif
3213
3214static noinline __init_refok 3157static noinline __init_refok
3215int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) 3158int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3216{ 3159{
@@ -3264,7 +3207,7 @@ static int __zone_pcp_update(void *data)
3264 struct per_cpu_pageset *pset; 3207 struct per_cpu_pageset *pset;
3265 struct per_cpu_pages *pcp; 3208 struct per_cpu_pages *pcp;
3266 3209
3267 pset = zone_pcp(zone, cpu); 3210 pset = per_cpu_ptr(zone->pageset, cpu);
3268 pcp = &pset->pcp; 3211 pcp = &pset->pcp;
3269 3212
3270 local_irq_save(flags); 3213 local_irq_save(flags);
@@ -3282,21 +3225,17 @@ void zone_pcp_update(struct zone *zone)
3282 3225
3283static __meminit void zone_pcp_init(struct zone *zone) 3226static __meminit void zone_pcp_init(struct zone *zone)
3284{ 3227{
3285 int cpu; 3228 /*
3286 unsigned long batch = zone_batchsize(zone); 3229 * per cpu subsystem is not up at this point. The following code
3230 * relies on the ability of the linker to provide the
3231 * offset of a (static) per cpu variable into the per cpu area.
3232 */
3233 zone->pageset = &boot_pageset;
3287 3234
3288 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3289#ifdef CONFIG_NUMA
3290 /* Early boot. Slab allocator not functional yet */
3291 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3292 setup_pageset(&boot_pageset[cpu],0);
3293#else
3294 setup_pageset(zone_pcp(zone,cpu), batch);
3295#endif
3296 }
3297 if (zone->present_pages) 3235 if (zone->present_pages)
3298 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", 3236 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
3299 zone->name, zone->present_pages, batch); 3237 zone->name, zone->present_pages,
3238 zone_batchsize(zone));
3300} 3239}
3301 3240
3302__meminit int init_currently_empty_zone(struct zone *zone, 3241__meminit int init_currently_empty_zone(struct zone *zone,
@@ -4869,10 +4808,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4869 if (!write || (ret == -EINVAL)) 4808 if (!write || (ret == -EINVAL))
4870 return ret; 4809 return ret;
4871 for_each_populated_zone(zone) { 4810 for_each_populated_zone(zone) {
4872 for_each_online_cpu(cpu) { 4811 for_each_possible_cpu(cpu) {
4873 unsigned long high; 4812 unsigned long high;
4874 high = zone->present_pages / percpu_pagelist_fraction; 4813 high = zone->present_pages / percpu_pagelist_fraction;
4875 setup_pagelist_highmark(zone_pcp(zone, cpu), high); 4814 setup_pagelist_highmark(
4815 per_cpu_ptr(zone->pageset, cpu), high);
4876 } 4816 }
4877 } 4817 }
4878 return 0; 4818 return 0;
diff --git a/mm/percpu.c b/mm/percpu.c
index 083e7c91e5f6..768419d44ad7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,13 +80,15 @@
80/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ 80/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
81#ifndef __addr_to_pcpu_ptr 81#ifndef __addr_to_pcpu_ptr
82#define __addr_to_pcpu_ptr(addr) \ 82#define __addr_to_pcpu_ptr(addr) \
83 (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ 83 (void __percpu *)((unsigned long)(addr) - \
84 + (unsigned long)__per_cpu_start) 84 (unsigned long)pcpu_base_addr + \
85 (unsigned long)__per_cpu_start)
85#endif 86#endif
86#ifndef __pcpu_ptr_to_addr 87#ifndef __pcpu_ptr_to_addr
87#define __pcpu_ptr_to_addr(ptr) \ 88#define __pcpu_ptr_to_addr(ptr) \
88 (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ 89 (void __force *)((unsigned long)(ptr) + \
89 - (unsigned long)__per_cpu_start) 90 (unsigned long)pcpu_base_addr - \
91 (unsigned long)__per_cpu_start)
90#endif 92#endif
91 93
92struct pcpu_chunk { 94struct pcpu_chunk {
@@ -913,11 +915,10 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
913 int rs, re; 915 int rs, re;
914 916
915 /* quick path, check whether it's empty already */ 917 /* quick path, check whether it's empty already */
916 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 918 rs = page_start;
917 if (rs == page_start && re == page_end) 919 pcpu_next_unpop(chunk, &rs, &re, page_end);
918 return; 920 if (rs == page_start && re == page_end)
919 break; 921 return;
920 }
921 922
922 /* immutable chunks can't be depopulated */ 923 /* immutable chunks can't be depopulated */
923 WARN_ON(chunk->immutable); 924 WARN_ON(chunk->immutable);
@@ -968,11 +969,10 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
968 int rs, re, rc; 969 int rs, re, rc;
969 970
970 /* quick path, check whether all pages are already there */ 971 /* quick path, check whether all pages are already there */
971 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) { 972 rs = page_start;
972 if (rs == page_start && re == page_end) 973 pcpu_next_pop(chunk, &rs, &re, page_end);
973 goto clear; 974 if (rs == page_start && re == page_end)
974 break; 975 goto clear;
975 }
976 976
977 /* need to allocate and map pages, this chunk can't be immutable */ 977 /* need to allocate and map pages, this chunk can't be immutable */
978 WARN_ON(chunk->immutable); 978 WARN_ON(chunk->immutable);
@@ -1067,7 +1067,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
1067 * RETURNS: 1067 * RETURNS:
1068 * Percpu pointer to the allocated area on success, NULL on failure. 1068 * Percpu pointer to the allocated area on success, NULL on failure.
1069 */ 1069 */
1070static void *pcpu_alloc(size_t size, size_t align, bool reserved) 1070static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
1071{ 1071{
1072 static int warn_limit = 10; 1072 static int warn_limit = 10;
1073 struct pcpu_chunk *chunk; 1073 struct pcpu_chunk *chunk;
@@ -1196,7 +1196,7 @@ fail_unlock_mutex:
1196 * RETURNS: 1196 * RETURNS:
1197 * Percpu pointer to the allocated area on success, NULL on failure. 1197 * Percpu pointer to the allocated area on success, NULL on failure.
1198 */ 1198 */
1199void *__alloc_percpu(size_t size, size_t align) 1199void __percpu *__alloc_percpu(size_t size, size_t align)
1200{ 1200{
1201 return pcpu_alloc(size, align, false); 1201 return pcpu_alloc(size, align, false);
1202} 1202}
@@ -1217,7 +1217,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
1217 * RETURNS: 1217 * RETURNS:
1218 * Percpu pointer to the allocated area on success, NULL on failure. 1218 * Percpu pointer to the allocated area on success, NULL on failure.
1219 */ 1219 */
1220void *__alloc_reserved_percpu(size_t size, size_t align) 1220void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1221{ 1221{
1222 return pcpu_alloc(size, align, true); 1222 return pcpu_alloc(size, align, true);
1223} 1223}
@@ -1269,7 +1269,7 @@ static void pcpu_reclaim(struct work_struct *work)
1269 * CONTEXT: 1269 * CONTEXT:
1270 * Can be called from atomic context. 1270 * Can be called from atomic context.
1271 */ 1271 */
1272void free_percpu(void *ptr) 1272void free_percpu(void __percpu *ptr)
1273{ 1273{
1274 void *addr; 1274 void *addr;
1275 struct pcpu_chunk *chunk; 1275 struct pcpu_chunk *chunk;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6051fbab67ba..fc5aa183bc45 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void)
139 threshold = calculate_threshold(zone); 139 threshold = calculate_threshold(zone);
140 140
141 for_each_online_cpu(cpu) 141 for_each_online_cpu(cpu)
142 zone_pcp(zone, cpu)->stat_threshold = threshold; 142 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
143 = threshold;
143 } 144 }
144} 145}
145 146
@@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void)
149void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 150void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
150 int delta) 151 int delta)
151{ 152{
152 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 153 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
154
153 s8 *p = pcp->vm_stat_diff + item; 155 s8 *p = pcp->vm_stat_diff + item;
154 long x; 156 long x;
155 157
@@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
202 */ 204 */
203void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 205void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
204{ 206{
205 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 207 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
206 s8 *p = pcp->vm_stat_diff + item; 208 s8 *p = pcp->vm_stat_diff + item;
207 209
208 (*p)++; 210 (*p)++;
@@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state);
223 225
224void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 226void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
225{ 227{
226 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 228 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
227 s8 *p = pcp->vm_stat_diff + item; 229 s8 *p = pcp->vm_stat_diff + item;
228 230
229 (*p)--; 231 (*p)--;
@@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu)
300 for_each_populated_zone(zone) { 302 for_each_populated_zone(zone) {
301 struct per_cpu_pageset *p; 303 struct per_cpu_pageset *p;
302 304
303 p = zone_pcp(zone, cpu); 305 p = per_cpu_ptr(zone->pageset, cpu);
304 306
305 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 307 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
306 if (p->vm_stat_diff[i]) { 308 if (p->vm_stat_diff[i]) {
@@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
741 for_each_online_cpu(i) { 743 for_each_online_cpu(i) {
742 struct per_cpu_pageset *pageset; 744 struct per_cpu_pageset *pageset;
743 745
744 pageset = zone_pcp(zone, i); 746 pageset = per_cpu_ptr(zone->pageset, i);
745 seq_printf(m, 747 seq_printf(m,
746 "\n cpu: %i" 748 "\n cpu: %i"
747 "\n count: %i" 749 "\n count: %i"
@@ -906,6 +908,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
906 case CPU_ONLINE: 908 case CPU_ONLINE:
907 case CPU_ONLINE_FROZEN: 909 case CPU_ONLINE_FROZEN:
908 start_cpu_timer(cpu); 910 start_cpu_timer(cpu);
911 node_set_state(cpu_to_node(cpu), N_CPU);
909 break; 912 break;
910 case CPU_DOWN_PREPARE: 913 case CPU_DOWN_PREPARE:
911 case CPU_DOWN_PREPARE_FROZEN: 914 case CPU_DOWN_PREPARE_FROZEN: