aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c202
1 files changed, 71 insertions, 131 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1fa93bd2bb9f..a6b17aa4740b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1009,10 +1009,10 @@ static void drain_pages(unsigned int cpu)
1009 struct per_cpu_pageset *pset; 1009 struct per_cpu_pageset *pset;
1010 struct per_cpu_pages *pcp; 1010 struct per_cpu_pages *pcp;
1011 1011
1012 pset = zone_pcp(zone, cpu); 1012 local_irq_save(flags);
1013 pset = per_cpu_ptr(zone->pageset, cpu);
1013 1014
1014 pcp = &pset->pcp; 1015 pcp = &pset->pcp;
1015 local_irq_save(flags);
1016 free_pcppages_bulk(zone, pcp->count, pcp); 1016 free_pcppages_bulk(zone, pcp->count, pcp);
1017 pcp->count = 0; 1017 pcp->count = 0;
1018 local_irq_restore(flags); 1018 local_irq_restore(flags);
@@ -1096,7 +1096,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1096 arch_free_page(page, 0); 1096 arch_free_page(page, 0);
1097 kernel_map_pages(page, 1, 0); 1097 kernel_map_pages(page, 1, 0);
1098 1098
1099 pcp = &zone_pcp(zone, get_cpu())->pcp;
1100 migratetype = get_pageblock_migratetype(page); 1099 migratetype = get_pageblock_migratetype(page);
1101 set_page_private(page, migratetype); 1100 set_page_private(page, migratetype);
1102 local_irq_save(flags); 1101 local_irq_save(flags);
@@ -1119,6 +1118,7 @@ static void free_hot_cold_page(struct page *page, int cold)
1119 migratetype = MIGRATE_MOVABLE; 1118 migratetype = MIGRATE_MOVABLE;
1120 } 1119 }
1121 1120
1121 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1122 if (cold) 1122 if (cold)
1123 list_add_tail(&page->lru, &pcp->lists[migratetype]); 1123 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1124 else 1124 else
@@ -1131,7 +1131,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1131 1131
1132out: 1132out:
1133 local_irq_restore(flags); 1133 local_irq_restore(flags);
1134 put_cpu();
1135} 1134}
1136 1135
1137void free_hot_page(struct page *page) 1136void free_hot_page(struct page *page)
@@ -1181,17 +1180,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
1181 unsigned long flags; 1180 unsigned long flags;
1182 struct page *page; 1181 struct page *page;
1183 int cold = !!(gfp_flags & __GFP_COLD); 1182 int cold = !!(gfp_flags & __GFP_COLD);
1184 int cpu;
1185 1183
1186again: 1184again:
1187 cpu = get_cpu();
1188 if (likely(order == 0)) { 1185 if (likely(order == 0)) {
1189 struct per_cpu_pages *pcp; 1186 struct per_cpu_pages *pcp;
1190 struct list_head *list; 1187 struct list_head *list;
1191 1188
1192 pcp = &zone_pcp(zone, cpu)->pcp;
1193 list = &pcp->lists[migratetype];
1194 local_irq_save(flags); 1189 local_irq_save(flags);
1190 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1191 list = &pcp->lists[migratetype];
1195 if (list_empty(list)) { 1192 if (list_empty(list)) {
1196 pcp->count += rmqueue_bulk(zone, 0, 1193 pcp->count += rmqueue_bulk(zone, 0,
1197 pcp->batch, list, 1194 pcp->batch, list,
@@ -1232,7 +1229,6 @@ again:
1232 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1229 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1233 zone_statistics(preferred_zone, zone); 1230 zone_statistics(preferred_zone, zone);
1234 local_irq_restore(flags); 1231 local_irq_restore(flags);
1235 put_cpu();
1236 1232
1237 VM_BUG_ON(bad_range(zone, page)); 1233 VM_BUG_ON(bad_range(zone, page));
1238 if (prep_new_page(page, order, gfp_flags)) 1234 if (prep_new_page(page, order, gfp_flags))
@@ -1241,7 +1237,6 @@ again:
1241 1237
1242failed: 1238failed:
1243 local_irq_restore(flags); 1239 local_irq_restore(flags);
1244 put_cpu();
1245 return NULL; 1240 return NULL;
1246} 1241}
1247 1242
@@ -2180,7 +2175,7 @@ void show_free_areas(void)
2180 for_each_online_cpu(cpu) { 2175 for_each_online_cpu(cpu) {
2181 struct per_cpu_pageset *pageset; 2176 struct per_cpu_pageset *pageset;
2182 2177
2183 pageset = zone_pcp(zone, cpu); 2178 pageset = per_cpu_ptr(zone->pageset, cpu);
2184 2179
2185 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", 2180 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
2186 cpu, pageset->pcp.high, 2181 cpu, pageset->pcp.high,
@@ -2745,10 +2740,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)
2745 2740
2746#endif /* CONFIG_NUMA */ 2741#endif /* CONFIG_NUMA */
2747 2742
2743/*
2744 * Boot pageset table. One per cpu which is going to be used for all
2745 * zones and all nodes. The parameters will be set in such a way
2746 * that an item put on a list will immediately be handed over to
2747 * the buddy list. This is safe since pageset manipulation is done
2748 * with interrupts disabled.
2749 *
2750 * The boot_pagesets must be kept even after bootup is complete for
2751 * unused processors and/or zones. They do play a role for bootstrapping
2752 * hotplugged processors.
2753 *
2754 * zoneinfo_show() and maybe other functions do
2755 * not check if the processor is online before following the pageset pointer.
2756 * Other parts of the kernel may not check if the zone is available.
2757 */
2758static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
2759static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
2760
2748/* return values int ....just for stop_machine() */ 2761/* return values int ....just for stop_machine() */
2749static int __build_all_zonelists(void *dummy) 2762static int __build_all_zonelists(void *dummy)
2750{ 2763{
2751 int nid; 2764 int nid;
2765 int cpu;
2752 2766
2753#ifdef CONFIG_NUMA 2767#ifdef CONFIG_NUMA
2754 memset(node_load, 0, sizeof(node_load)); 2768 memset(node_load, 0, sizeof(node_load));
@@ -2759,6 +2773,23 @@ static int __build_all_zonelists(void *dummy)
2759 build_zonelists(pgdat); 2773 build_zonelists(pgdat);
2760 build_zonelist_cache(pgdat); 2774 build_zonelist_cache(pgdat);
2761 } 2775 }
2776
2777 /*
2778 * Initialize the boot_pagesets that are going to be used
2779 * for bootstrapping processors. The real pagesets for
2780 * each zone will be allocated later when the per cpu
2781 * allocator is available.
2782 *
2783 * boot_pagesets are used also for bootstrapping offline
2784 * cpus if the system is already booted because the pagesets
2785 * are needed to initialize allocators on a specific cpu too.
2786 * F.e. the percpu allocator needs the page allocator which
2787 * needs the percpu allocator in order to allocate its pagesets
2788 * (a chicken-egg dilemma).
2789 */
2790 for_each_possible_cpu(cpu)
2791 setup_pageset(&per_cpu(boot_pageset, cpu), 0);
2792
2762 return 0; 2793 return 0;
2763} 2794}
2764 2795
@@ -3096,121 +3127,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
3096 pcp->batch = PAGE_SHIFT * 8; 3127 pcp->batch = PAGE_SHIFT * 8;
3097} 3128}
3098 3129
3099
3100#ifdef CONFIG_NUMA
3101/*
3102 * Boot pageset table. One per cpu which is going to be used for all
3103 * zones and all nodes. The parameters will be set in such a way
3104 * that an item put on a list will immediately be handed over to
3105 * the buddy list. This is safe since pageset manipulation is done
3106 * with interrupts disabled.
3107 *
3108 * Some NUMA counter updates may also be caught by the boot pagesets.
3109 *
3110 * The boot_pagesets must be kept even after bootup is complete for
3111 * unused processors and/or zones. They do play a role for bootstrapping
3112 * hotplugged processors.
3113 *
3114 * zoneinfo_show() and maybe other functions do
3115 * not check if the processor is online before following the pageset pointer.
3116 * Other parts of the kernel may not check if the zone is available.
3117 */
3118static struct per_cpu_pageset boot_pageset[NR_CPUS];
3119
3120/* 3130/*
3121 * Dynamically allocate memory for the 3131 * Allocate per cpu pagesets and initialize them.
3122 * per cpu pageset array in struct zone. 3132 * Before this call only boot pagesets were available.
3133 * Boot pagesets will no longer be used by this processorr
3134 * after setup_per_cpu_pageset().
3123 */ 3135 */
3124static int __cpuinit process_zones(int cpu) 3136void __init setup_per_cpu_pageset(void)
3125{ 3137{
3126 struct zone *zone, *dzone; 3138 struct zone *zone;
3127 int node = cpu_to_node(cpu); 3139 int cpu;
3128
3129 node_set_state(node, N_CPU); /* this node has a cpu */
3130 3140
3131 for_each_populated_zone(zone) { 3141 for_each_populated_zone(zone) {
3132 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), 3142 zone->pageset = alloc_percpu(struct per_cpu_pageset);
3133 GFP_KERNEL, node);
3134 if (!zone_pcp(zone, cpu))
3135 goto bad;
3136
3137 setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
3138
3139 if (percpu_pagelist_fraction)
3140 setup_pagelist_highmark(zone_pcp(zone, cpu),
3141 (zone->present_pages / percpu_pagelist_fraction));
3142 }
3143
3144 return 0;
3145bad:
3146 for_each_zone(dzone) {
3147 if (!populated_zone(dzone))
3148 continue;
3149 if (dzone == zone)
3150 break;
3151 kfree(zone_pcp(dzone, cpu));
3152 zone_pcp(dzone, cpu) = &boot_pageset[cpu];
3153 }
3154 return -ENOMEM;
3155}
3156 3143
3157static inline void free_zone_pagesets(int cpu) 3144 for_each_possible_cpu(cpu) {
3158{ 3145 struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
3159 struct zone *zone;
3160
3161 for_each_zone(zone) {
3162 struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
3163 3146
3164 /* Free per_cpu_pageset if it is slab allocated */ 3147 setup_pageset(pcp, zone_batchsize(zone));
3165 if (pset != &boot_pageset[cpu])
3166 kfree(pset);
3167 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3168 }
3169}
3170 3148
3171static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, 3149 if (percpu_pagelist_fraction)
3172 unsigned long action, 3150 setup_pagelist_highmark(pcp,
3173 void *hcpu) 3151 (zone->present_pages /
3174{ 3152 percpu_pagelist_fraction));
3175 int cpu = (long)hcpu; 3153 }
3176 int ret = NOTIFY_OK;
3177
3178 switch (action) {
3179 case CPU_UP_PREPARE:
3180 case CPU_UP_PREPARE_FROZEN:
3181 if (process_zones(cpu))
3182 ret = NOTIFY_BAD;
3183 break;
3184 case CPU_UP_CANCELED:
3185 case CPU_UP_CANCELED_FROZEN:
3186 case CPU_DEAD:
3187 case CPU_DEAD_FROZEN:
3188 free_zone_pagesets(cpu);
3189 break;
3190 default:
3191 break;
3192 } 3154 }
3193 return ret;
3194} 3155}
3195 3156
3196static struct notifier_block __cpuinitdata pageset_notifier =
3197 { &pageset_cpuup_callback, NULL, 0 };
3198
3199void __init setup_per_cpu_pageset(void)
3200{
3201 int err;
3202
3203 /* Initialize per_cpu_pageset for cpu 0.
3204 * A cpuup callback will do this for every cpu
3205 * as it comes online
3206 */
3207 err = process_zones(smp_processor_id());
3208 BUG_ON(err);
3209 register_cpu_notifier(&pageset_notifier);
3210}
3211
3212#endif
3213
3214static noinline __init_refok 3157static noinline __init_refok
3215int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) 3158int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3216{ 3159{
@@ -3264,7 +3207,7 @@ static int __zone_pcp_update(void *data)
3264 struct per_cpu_pageset *pset; 3207 struct per_cpu_pageset *pset;
3265 struct per_cpu_pages *pcp; 3208 struct per_cpu_pages *pcp;
3266 3209
3267 pset = zone_pcp(zone, cpu); 3210 pset = per_cpu_ptr(zone->pageset, cpu);
3268 pcp = &pset->pcp; 3211 pcp = &pset->pcp;
3269 3212
3270 local_irq_save(flags); 3213 local_irq_save(flags);
@@ -3282,21 +3225,17 @@ void zone_pcp_update(struct zone *zone)
3282 3225
3283static __meminit void zone_pcp_init(struct zone *zone) 3226static __meminit void zone_pcp_init(struct zone *zone)
3284{ 3227{
3285 int cpu; 3228 /*
3286 unsigned long batch = zone_batchsize(zone); 3229 * per cpu subsystem is not up at this point. The following code
3230 * relies on the ability of the linker to provide the
3231 * offset of a (static) per cpu variable into the per cpu area.
3232 */
3233 zone->pageset = &boot_pageset;
3287 3234
3288 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3289#ifdef CONFIG_NUMA
3290 /* Early boot. Slab allocator not functional yet */
3291 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3292 setup_pageset(&boot_pageset[cpu],0);
3293#else
3294 setup_pageset(zone_pcp(zone,cpu), batch);
3295#endif
3296 }
3297 if (zone->present_pages) 3235 if (zone->present_pages)
3298 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", 3236 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
3299 zone->name, zone->present_pages, batch); 3237 zone->name, zone->present_pages,
3238 zone_batchsize(zone));
3300} 3239}
3301 3240
3302__meminit int init_currently_empty_zone(struct zone *zone, 3241__meminit int init_currently_empty_zone(struct zone *zone,
@@ -4869,10 +4808,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4869 if (!write || (ret == -EINVAL)) 4808 if (!write || (ret == -EINVAL))
4870 return ret; 4809 return ret;
4871 for_each_populated_zone(zone) { 4810 for_each_populated_zone(zone) {
4872 for_each_online_cpu(cpu) { 4811 for_each_possible_cpu(cpu) {
4873 unsigned long high; 4812 unsigned long high;
4874 high = zone->present_pages / percpu_pagelist_fraction; 4813 high = zone->present_pages / percpu_pagelist_fraction;
4875 setup_pagelist_highmark(zone_pcp(zone, cpu), high); 4814 setup_pagelist_highmark(
4815 per_cpu_ptr(zone->pageset, cpu), high);
4876 } 4816 }
4877 } 4817 }
4878 return 0; 4818 return 0;