aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm.h4
-rw-r--r--include/linux/mmzone.h12
-rw-r--r--mm/page_alloc.c202
-rw-r--r--mm/vmstat.c14
4 files changed, 81 insertions, 151 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2265f28eb47a..554fa395aac9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1079,11 +1079,7 @@ extern void si_meminfo(struct sysinfo * val);
1079extern void si_meminfo_node(struct sysinfo *val, int nid); 1079extern void si_meminfo_node(struct sysinfo *val, int nid);
1080extern int after_bootmem; 1080extern int after_bootmem;
1081 1081
1082#ifdef CONFIG_NUMA
1083extern void setup_per_cpu_pageset(void); 1082extern void setup_per_cpu_pageset(void);
1084#else
1085static inline void setup_per_cpu_pageset(void) {}
1086#endif
1087 1083
1088extern void zone_pcp_update(struct zone *zone); 1084extern void zone_pcp_update(struct zone *zone);
1089 1085
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 30fe668c2542..7874201a3556 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -184,13 +184,7 @@ struct per_cpu_pageset {
184 s8 stat_threshold; 184 s8 stat_threshold;
185 s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; 185 s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
186#endif 186#endif
187} ____cacheline_aligned_in_smp; 187};
188
189#ifdef CONFIG_NUMA
190#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
191#else
192#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
193#endif
194 188
195#endif /* !__GENERATING_BOUNDS.H */ 189#endif /* !__GENERATING_BOUNDS.H */
196 190
@@ -306,10 +300,8 @@ struct zone {
306 */ 300 */
307 unsigned long min_unmapped_pages; 301 unsigned long min_unmapped_pages;
308 unsigned long min_slab_pages; 302 unsigned long min_slab_pages;
309 struct per_cpu_pageset *pageset[NR_CPUS];
310#else
311 struct per_cpu_pageset pageset[NR_CPUS];
312#endif 303#endif
304 struct per_cpu_pageset *pageset;
313 /* 305 /*
314 * free areas of different sizes 306 * free areas of different sizes
315 */ 307 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e9f5cc5fb59..6849e870de54 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1008,10 +1008,10 @@ static void drain_pages(unsigned int cpu)
1008 struct per_cpu_pageset *pset; 1008 struct per_cpu_pageset *pset;
1009 struct per_cpu_pages *pcp; 1009 struct per_cpu_pages *pcp;
1010 1010
1011 pset = zone_pcp(zone, cpu); 1011 local_irq_save(flags);
1012 pset = per_cpu_ptr(zone->pageset, cpu);
1012 1013
1013 pcp = &pset->pcp; 1014 pcp = &pset->pcp;
1014 local_irq_save(flags);
1015 free_pcppages_bulk(zone, pcp->count, pcp); 1015 free_pcppages_bulk(zone, pcp->count, pcp);
1016 pcp->count = 0; 1016 pcp->count = 0;
1017 local_irq_restore(flags); 1017 local_irq_restore(flags);
@@ -1095,7 +1095,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1095 arch_free_page(page, 0); 1095 arch_free_page(page, 0);
1096 kernel_map_pages(page, 1, 0); 1096 kernel_map_pages(page, 1, 0);
1097 1097
1098 pcp = &zone_pcp(zone, get_cpu())->pcp;
1099 migratetype = get_pageblock_migratetype(page); 1098 migratetype = get_pageblock_migratetype(page);
1100 set_page_private(page, migratetype); 1099 set_page_private(page, migratetype);
1101 local_irq_save(flags); 1100 local_irq_save(flags);
@@ -1118,6 +1117,7 @@ static void free_hot_cold_page(struct page *page, int cold)
1118 migratetype = MIGRATE_MOVABLE; 1117 migratetype = MIGRATE_MOVABLE;
1119 } 1118 }
1120 1119
1120 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1121 if (cold) 1121 if (cold)
1122 list_add_tail(&page->lru, &pcp->lists[migratetype]); 1122 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1123 else 1123 else
@@ -1130,7 +1130,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1130 1130
1131out: 1131out:
1132 local_irq_restore(flags); 1132 local_irq_restore(flags);
1133 put_cpu();
1134} 1133}
1135 1134
1136void free_hot_page(struct page *page) 1135void free_hot_page(struct page *page)
@@ -1180,17 +1179,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
1180 unsigned long flags; 1179 unsigned long flags;
1181 struct page *page; 1180 struct page *page;
1182 int cold = !!(gfp_flags & __GFP_COLD); 1181 int cold = !!(gfp_flags & __GFP_COLD);
1183 int cpu;
1184 1182
1185again: 1183again:
1186 cpu = get_cpu();
1187 if (likely(order == 0)) { 1184 if (likely(order == 0)) {
1188 struct per_cpu_pages *pcp; 1185 struct per_cpu_pages *pcp;
1189 struct list_head *list; 1186 struct list_head *list;
1190 1187
1191 pcp = &zone_pcp(zone, cpu)->pcp;
1192 list = &pcp->lists[migratetype];
1193 local_irq_save(flags); 1188 local_irq_save(flags);
1189 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1190 list = &pcp->lists[migratetype];
1194 if (list_empty(list)) { 1191 if (list_empty(list)) {
1195 pcp->count += rmqueue_bulk(zone, 0, 1192 pcp->count += rmqueue_bulk(zone, 0,
1196 pcp->batch, list, 1193 pcp->batch, list,
@@ -1231,7 +1228,6 @@ again:
1231 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1228 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1232 zone_statistics(preferred_zone, zone); 1229 zone_statistics(preferred_zone, zone);
1233 local_irq_restore(flags); 1230 local_irq_restore(flags);
1234 put_cpu();
1235 1231
1236 VM_BUG_ON(bad_range(zone, page)); 1232 VM_BUG_ON(bad_range(zone, page));
1237 if (prep_new_page(page, order, gfp_flags)) 1233 if (prep_new_page(page, order, gfp_flags))
@@ -1240,7 +1236,6 @@ again:
1240 1236
1241failed: 1237failed:
1242 local_irq_restore(flags); 1238 local_irq_restore(flags);
1243 put_cpu();
1244 return NULL; 1239 return NULL;
1245} 1240}
1246 1241
@@ -2179,7 +2174,7 @@ void show_free_areas(void)
2179 for_each_online_cpu(cpu) { 2174 for_each_online_cpu(cpu) {
2180 struct per_cpu_pageset *pageset; 2175 struct per_cpu_pageset *pageset;
2181 2176
2182 pageset = zone_pcp(zone, cpu); 2177 pageset = per_cpu_ptr(zone->pageset, cpu);
2183 2178
2184 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", 2179 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
2185 cpu, pageset->pcp.high, 2180 cpu, pageset->pcp.high,
@@ -2744,10 +2739,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)
2744 2739
2745#endif /* CONFIG_NUMA */ 2740#endif /* CONFIG_NUMA */
2746 2741
2742/*
2743 * Boot pageset table. One per cpu which is going to be used for all
2744 * zones and all nodes. The parameters will be set in such a way
2745 * that an item put on a list will immediately be handed over to
2746 * the buddy list. This is safe since pageset manipulation is done
2747 * with interrupts disabled.
2748 *
2749 * The boot_pagesets must be kept even after bootup is complete for
2750 * unused processors and/or zones. They do play a role for bootstrapping
2751 * hotplugged processors.
2752 *
2753 * zoneinfo_show() and maybe other functions do
2754 * not check if the processor is online before following the pageset pointer.
2755 * Other parts of the kernel may not check if the zone is available.
2756 */
2757static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
2758static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
2759
2747/* return values int ....just for stop_machine() */ 2760/* return values int ....just for stop_machine() */
2748static int __build_all_zonelists(void *dummy) 2761static int __build_all_zonelists(void *dummy)
2749{ 2762{
2750 int nid; 2763 int nid;
2764 int cpu;
2751 2765
2752#ifdef CONFIG_NUMA 2766#ifdef CONFIG_NUMA
2753 memset(node_load, 0, sizeof(node_load)); 2767 memset(node_load, 0, sizeof(node_load));
@@ -2758,6 +2772,23 @@ static int __build_all_zonelists(void *dummy)
2758 build_zonelists(pgdat); 2772 build_zonelists(pgdat);
2759 build_zonelist_cache(pgdat); 2773 build_zonelist_cache(pgdat);
2760 } 2774 }
2775
2776 /*
2777 * Initialize the boot_pagesets that are going to be used
2778 * for bootstrapping processors. The real pagesets for
2779 * each zone will be allocated later when the per cpu
2780 * allocator is available.
2781 *
2782 * boot_pagesets are used also for bootstrapping offline
2783 * cpus if the system is already booted because the pagesets
2784 * are needed to initialize allocators on a specific cpu too.
2785 * F.e. the percpu allocator needs the page allocator which
2786 * needs the percpu allocator in order to allocate its pagesets
2787 * (a chicken-egg dilemma).
2788 */
2789 for_each_possible_cpu(cpu)
2790 setup_pageset(&per_cpu(boot_pageset, cpu), 0);
2791
2761 return 0; 2792 return 0;
2762} 2793}
2763 2794
@@ -3095,121 +3126,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
3095 pcp->batch = PAGE_SHIFT * 8; 3126 pcp->batch = PAGE_SHIFT * 8;
3096} 3127}
3097 3128
3098
3099#ifdef CONFIG_NUMA
3100/*
3101 * Boot pageset table. One per cpu which is going to be used for all
3102 * zones and all nodes. The parameters will be set in such a way
3103 * that an item put on a list will immediately be handed over to
3104 * the buddy list. This is safe since pageset manipulation is done
3105 * with interrupts disabled.
3106 *
3107 * Some NUMA counter updates may also be caught by the boot pagesets.
3108 *
3109 * The boot_pagesets must be kept even after bootup is complete for
3110 * unused processors and/or zones. They do play a role for bootstrapping
3111 * hotplugged processors.
3112 *
3113 * zoneinfo_show() and maybe other functions do
3114 * not check if the processor is online before following the pageset pointer.
3115 * Other parts of the kernel may not check if the zone is available.
3116 */
3117static struct per_cpu_pageset boot_pageset[NR_CPUS];
3118
3119/* 3129/*
3120 * Dynamically allocate memory for the 3130 * Allocate per cpu pagesets and initialize them.
3121 * per cpu pageset array in struct zone. 3131 * Before this call only boot pagesets were available.
3132 * Boot pagesets will no longer be used by this processorr
3133 * after setup_per_cpu_pageset().
3122 */ 3134 */
3123static int __cpuinit process_zones(int cpu) 3135void __init setup_per_cpu_pageset(void)
3124{ 3136{
3125 struct zone *zone, *dzone; 3137 struct zone *zone;
3126 int node = cpu_to_node(cpu); 3138 int cpu;
3127
3128 node_set_state(node, N_CPU); /* this node has a cpu */
3129 3139
3130 for_each_populated_zone(zone) { 3140 for_each_populated_zone(zone) {
3131 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), 3141 zone->pageset = alloc_percpu(struct per_cpu_pageset);
3132 GFP_KERNEL, node);
3133 if (!zone_pcp(zone, cpu))
3134 goto bad;
3135
3136 setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
3137
3138 if (percpu_pagelist_fraction)
3139 setup_pagelist_highmark(zone_pcp(zone, cpu),
3140 (zone->present_pages / percpu_pagelist_fraction));
3141 }
3142
3143 return 0;
3144bad:
3145 for_each_zone(dzone) {
3146 if (!populated_zone(dzone))
3147 continue;
3148 if (dzone == zone)
3149 break;
3150 kfree(zone_pcp(dzone, cpu));
3151 zone_pcp(dzone, cpu) = &boot_pageset[cpu];
3152 }
3153 return -ENOMEM;
3154}
3155 3142
3156static inline void free_zone_pagesets(int cpu) 3143 for_each_possible_cpu(cpu) {
3157{ 3144 struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
3158 struct zone *zone;
3159
3160 for_each_zone(zone) {
3161 struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
3162 3145
3163 /* Free per_cpu_pageset if it is slab allocated */ 3146 setup_pageset(pcp, zone_batchsize(zone));
3164 if (pset != &boot_pageset[cpu])
3165 kfree(pset);
3166 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3167 }
3168}
3169 3147
3170static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, 3148 if (percpu_pagelist_fraction)
3171 unsigned long action, 3149 setup_pagelist_highmark(pcp,
3172 void *hcpu) 3150 (zone->present_pages /
3173{ 3151 percpu_pagelist_fraction));
3174 int cpu = (long)hcpu; 3152 }
3175 int ret = NOTIFY_OK;
3176
3177 switch (action) {
3178 case CPU_UP_PREPARE:
3179 case CPU_UP_PREPARE_FROZEN:
3180 if (process_zones(cpu))
3181 ret = NOTIFY_BAD;
3182 break;
3183 case CPU_UP_CANCELED:
3184 case CPU_UP_CANCELED_FROZEN:
3185 case CPU_DEAD:
3186 case CPU_DEAD_FROZEN:
3187 free_zone_pagesets(cpu);
3188 break;
3189 default:
3190 break;
3191 } 3153 }
3192 return ret;
3193} 3154}
3194 3155
3195static struct notifier_block __cpuinitdata pageset_notifier =
3196 { &pageset_cpuup_callback, NULL, 0 };
3197
3198void __init setup_per_cpu_pageset(void)
3199{
3200 int err;
3201
3202 /* Initialize per_cpu_pageset for cpu 0.
3203 * A cpuup callback will do this for every cpu
3204 * as it comes online
3205 */
3206 err = process_zones(smp_processor_id());
3207 BUG_ON(err);
3208 register_cpu_notifier(&pageset_notifier);
3209}
3210
3211#endif
3212
3213static noinline __init_refok 3156static noinline __init_refok
3214int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) 3157int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3215{ 3158{
@@ -3263,7 +3206,7 @@ static int __zone_pcp_update(void *data)
3263 struct per_cpu_pageset *pset; 3206 struct per_cpu_pageset *pset;
3264 struct per_cpu_pages *pcp; 3207 struct per_cpu_pages *pcp;
3265 3208
3266 pset = zone_pcp(zone, cpu); 3209 pset = per_cpu_ptr(zone->pageset, cpu);
3267 pcp = &pset->pcp; 3210 pcp = &pset->pcp;
3268 3211
3269 local_irq_save(flags); 3212 local_irq_save(flags);
@@ -3281,21 +3224,17 @@ void zone_pcp_update(struct zone *zone)
3281 3224
3282static __meminit void zone_pcp_init(struct zone *zone) 3225static __meminit void zone_pcp_init(struct zone *zone)
3283{ 3226{
3284 int cpu; 3227 /*
3285 unsigned long batch = zone_batchsize(zone); 3228 * per cpu subsystem is not up at this point. The following code
3229 * relies on the ability of the linker to provide the
3230 * offset of a (static) per cpu variable into the per cpu area.
3231 */
3232 zone->pageset = &boot_pageset;
3286 3233
3287 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3288#ifdef CONFIG_NUMA
3289 /* Early boot. Slab allocator not functional yet */
3290 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3291 setup_pageset(&boot_pageset[cpu],0);
3292#else
3293 setup_pageset(zone_pcp(zone,cpu), batch);
3294#endif
3295 }
3296 if (zone->present_pages) 3234 if (zone->present_pages)
3297 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", 3235 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
3298 zone->name, zone->present_pages, batch); 3236 zone->name, zone->present_pages,
3237 zone_batchsize(zone));
3299} 3238}
3300 3239
3301__meminit int init_currently_empty_zone(struct zone *zone, 3240__meminit int init_currently_empty_zone(struct zone *zone,
@@ -4809,10 +4748,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4809 if (!write || (ret == -EINVAL)) 4748 if (!write || (ret == -EINVAL))
4810 return ret; 4749 return ret;
4811 for_each_populated_zone(zone) { 4750 for_each_populated_zone(zone) {
4812 for_each_online_cpu(cpu) { 4751 for_each_possible_cpu(cpu) {
4813 unsigned long high; 4752 unsigned long high;
4814 high = zone->present_pages / percpu_pagelist_fraction; 4753 high = zone->present_pages / percpu_pagelist_fraction;
4815 setup_pagelist_highmark(zone_pcp(zone, cpu), high); 4754 setup_pagelist_highmark(
4755 per_cpu_ptr(zone->pageset, cpu), high);
4816 } 4756 }
4817 } 4757 }
4818 return 0; 4758 return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6051fbab67ba..1ba0bb7ad043 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void)
139 threshold = calculate_threshold(zone); 139 threshold = calculate_threshold(zone);
140 140
141 for_each_online_cpu(cpu) 141 for_each_online_cpu(cpu)
142 zone_pcp(zone, cpu)->stat_threshold = threshold; 142 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
143 = threshold;
143 } 144 }
144} 145}
145 146
@@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void)
149void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 150void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
150 int delta) 151 int delta)
151{ 152{
152 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 153 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
154
153 s8 *p = pcp->vm_stat_diff + item; 155 s8 *p = pcp->vm_stat_diff + item;
154 long x; 156 long x;
155 157
@@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
202 */ 204 */
203void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 205void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
204{ 206{
205 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 207 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
206 s8 *p = pcp->vm_stat_diff + item; 208 s8 *p = pcp->vm_stat_diff + item;
207 209
208 (*p)++; 210 (*p)++;
@@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state);
223 225
224void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 226void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
225{ 227{
226 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 228 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
227 s8 *p = pcp->vm_stat_diff + item; 229 s8 *p = pcp->vm_stat_diff + item;
228 230
229 (*p)--; 231 (*p)--;
@@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu)
300 for_each_populated_zone(zone) { 302 for_each_populated_zone(zone) {
301 struct per_cpu_pageset *p; 303 struct per_cpu_pageset *p;
302 304
303 p = zone_pcp(zone, cpu); 305 p = per_cpu_ptr(zone->pageset, cpu);
304 306
305 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 307 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
306 if (p->vm_stat_diff[i]) { 308 if (p->vm_stat_diff[i]) {
@@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
741 for_each_online_cpu(i) { 743 for_each_online_cpu(i) {
742 struct per_cpu_pageset *pageset; 744 struct per_cpu_pageset *pageset;
743 745
744 pageset = zone_pcp(zone, i); 746 pageset = per_cpu_ptr(zone->pageset, i);
745 seq_printf(m, 747 seq_printf(m,
746 "\n cpu: %i" 748 "\n cpu: %i"
747 "\n count: %i" 749 "\n count: %i"