aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c263
1 files changed, 131 insertions, 132 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8deb9d0fd5b1..a6b17aa4740b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1009,10 +1009,10 @@ static void drain_pages(unsigned int cpu)
1009 struct per_cpu_pageset *pset; 1009 struct per_cpu_pageset *pset;
1010 struct per_cpu_pages *pcp; 1010 struct per_cpu_pages *pcp;
1011 1011
1012 pset = zone_pcp(zone, cpu); 1012 local_irq_save(flags);
1013 pset = per_cpu_ptr(zone->pageset, cpu);
1013 1014
1014 pcp = &pset->pcp; 1015 pcp = &pset->pcp;
1015 local_irq_save(flags);
1016 free_pcppages_bulk(zone, pcp->count, pcp); 1016 free_pcppages_bulk(zone, pcp->count, pcp);
1017 pcp->count = 0; 1017 pcp->count = 0;
1018 local_irq_restore(flags); 1018 local_irq_restore(flags);
@@ -1096,7 +1096,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1096 arch_free_page(page, 0); 1096 arch_free_page(page, 0);
1097 kernel_map_pages(page, 1, 0); 1097 kernel_map_pages(page, 1, 0);
1098 1098
1099 pcp = &zone_pcp(zone, get_cpu())->pcp;
1100 migratetype = get_pageblock_migratetype(page); 1099 migratetype = get_pageblock_migratetype(page);
1101 set_page_private(page, migratetype); 1100 set_page_private(page, migratetype);
1102 local_irq_save(flags); 1101 local_irq_save(flags);
@@ -1119,6 +1118,7 @@ static void free_hot_cold_page(struct page *page, int cold)
1119 migratetype = MIGRATE_MOVABLE; 1118 migratetype = MIGRATE_MOVABLE;
1120 } 1119 }
1121 1120
1121 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1122 if (cold) 1122 if (cold)
1123 list_add_tail(&page->lru, &pcp->lists[migratetype]); 1123 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1124 else 1124 else
@@ -1131,7 +1131,6 @@ static void free_hot_cold_page(struct page *page, int cold)
1131 1131
1132out: 1132out:
1133 local_irq_restore(flags); 1133 local_irq_restore(flags);
1134 put_cpu();
1135} 1134}
1136 1135
1137void free_hot_page(struct page *page) 1136void free_hot_page(struct page *page)
@@ -1181,17 +1180,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
1181 unsigned long flags; 1180 unsigned long flags;
1182 struct page *page; 1181 struct page *page;
1183 int cold = !!(gfp_flags & __GFP_COLD); 1182 int cold = !!(gfp_flags & __GFP_COLD);
1184 int cpu;
1185 1183
1186again: 1184again:
1187 cpu = get_cpu();
1188 if (likely(order == 0)) { 1185 if (likely(order == 0)) {
1189 struct per_cpu_pages *pcp; 1186 struct per_cpu_pages *pcp;
1190 struct list_head *list; 1187 struct list_head *list;
1191 1188
1192 pcp = &zone_pcp(zone, cpu)->pcp;
1193 list = &pcp->lists[migratetype];
1194 local_irq_save(flags); 1189 local_irq_save(flags);
1190 pcp = &this_cpu_ptr(zone->pageset)->pcp;
1191 list = &pcp->lists[migratetype];
1195 if (list_empty(list)) { 1192 if (list_empty(list)) {
1196 pcp->count += rmqueue_bulk(zone, 0, 1193 pcp->count += rmqueue_bulk(zone, 0,
1197 pcp->batch, list, 1194 pcp->batch, list,
@@ -1232,7 +1229,6 @@ again:
1232 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1229 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1233 zone_statistics(preferred_zone, zone); 1230 zone_statistics(preferred_zone, zone);
1234 local_irq_restore(flags); 1231 local_irq_restore(flags);
1235 put_cpu();
1236 1232
1237 VM_BUG_ON(bad_range(zone, page)); 1233 VM_BUG_ON(bad_range(zone, page));
1238 if (prep_new_page(page, order, gfp_flags)) 1234 if (prep_new_page(page, order, gfp_flags))
@@ -1241,7 +1237,6 @@ again:
1241 1237
1242failed: 1238failed:
1243 local_irq_restore(flags); 1239 local_irq_restore(flags);
1244 put_cpu();
1245 return NULL; 1240 return NULL;
1246} 1241}
1247 1242
@@ -2180,7 +2175,7 @@ void show_free_areas(void)
2180 for_each_online_cpu(cpu) { 2175 for_each_online_cpu(cpu) {
2181 struct per_cpu_pageset *pageset; 2176 struct per_cpu_pageset *pageset;
2182 2177
2183 pageset = zone_pcp(zone, cpu); 2178 pageset = per_cpu_ptr(zone->pageset, cpu);
2184 2179
2185 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", 2180 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
2186 cpu, pageset->pcp.high, 2181 cpu, pageset->pcp.high,
@@ -2745,10 +2740,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)
2745 2740
2746#endif /* CONFIG_NUMA */ 2741#endif /* CONFIG_NUMA */
2747 2742
2743/*
2744 * Boot pageset table. One per cpu which is going to be used for all
2745 * zones and all nodes. The parameters will be set in such a way
2746 * that an item put on a list will immediately be handed over to
2747 * the buddy list. This is safe since pageset manipulation is done
2748 * with interrupts disabled.
2749 *
2750 * The boot_pagesets must be kept even after bootup is complete for
2751 * unused processors and/or zones. They do play a role for bootstrapping
2752 * hotplugged processors.
2753 *
2754 * zoneinfo_show() and maybe other functions do
2755 * not check if the processor is online before following the pageset pointer.
2756 * Other parts of the kernel may not check if the zone is available.
2757 */
2758static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
2759static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
2760
2748/* return values int ....just for stop_machine() */ 2761/* return values int ....just for stop_machine() */
2749static int __build_all_zonelists(void *dummy) 2762static int __build_all_zonelists(void *dummy)
2750{ 2763{
2751 int nid; 2764 int nid;
2765 int cpu;
2752 2766
2753#ifdef CONFIG_NUMA 2767#ifdef CONFIG_NUMA
2754 memset(node_load, 0, sizeof(node_load)); 2768 memset(node_load, 0, sizeof(node_load));
@@ -2759,6 +2773,23 @@ static int __build_all_zonelists(void *dummy)
2759 build_zonelists(pgdat); 2773 build_zonelists(pgdat);
2760 build_zonelist_cache(pgdat); 2774 build_zonelist_cache(pgdat);
2761 } 2775 }
2776
2777 /*
2778 * Initialize the boot_pagesets that are going to be used
2779 * for bootstrapping processors. The real pagesets for
2780 * each zone will be allocated later when the per cpu
2781 * allocator is available.
2782 *
2783 * boot_pagesets are used also for bootstrapping offline
2784 * cpus if the system is already booted because the pagesets
2785 * are needed to initialize allocators on a specific cpu too.
2786 * F.e. the percpu allocator needs the page allocator which
2787 * needs the percpu allocator in order to allocate its pagesets
2788 * (a chicken-egg dilemma).
2789 */
2790 for_each_possible_cpu(cpu)
2791 setup_pageset(&per_cpu(boot_pageset, cpu), 0);
2792
2762 return 0; 2793 return 0;
2763} 2794}
2764 2795
@@ -3096,121 +3127,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
3096 pcp->batch = PAGE_SHIFT * 8; 3127 pcp->batch = PAGE_SHIFT * 8;
3097} 3128}
3098 3129
3099
3100#ifdef CONFIG_NUMA
3101/*
3102 * Boot pageset table. One per cpu which is going to be used for all
3103 * zones and all nodes. The parameters will be set in such a way
3104 * that an item put on a list will immediately be handed over to
3105 * the buddy list. This is safe since pageset manipulation is done
3106 * with interrupts disabled.
3107 *
3108 * Some NUMA counter updates may also be caught by the boot pagesets.
3109 *
3110 * The boot_pagesets must be kept even after bootup is complete for
3111 * unused processors and/or zones. They do play a role for bootstrapping
3112 * hotplugged processors.
3113 *
3114 * zoneinfo_show() and maybe other functions do
3115 * not check if the processor is online before following the pageset pointer.
3116 * Other parts of the kernel may not check if the zone is available.
3117 */
3118static struct per_cpu_pageset boot_pageset[NR_CPUS];
3119
3120/* 3130/*
3121 * Dynamically allocate memory for the 3131 * Allocate per cpu pagesets and initialize them.
3122 * per cpu pageset array in struct zone. 3132 * Before this call only boot pagesets were available.
3133 * Boot pagesets will no longer be used by this processorr
3134 * after setup_per_cpu_pageset().
3123 */ 3135 */
3124static int __cpuinit process_zones(int cpu) 3136void __init setup_per_cpu_pageset(void)
3125{ 3137{
3126 struct zone *zone, *dzone; 3138 struct zone *zone;
3127 int node = cpu_to_node(cpu); 3139 int cpu;
3128
3129 node_set_state(node, N_CPU); /* this node has a cpu */
3130 3140
3131 for_each_populated_zone(zone) { 3141 for_each_populated_zone(zone) {
3132 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), 3142 zone->pageset = alloc_percpu(struct per_cpu_pageset);
3133 GFP_KERNEL, node);
3134 if (!zone_pcp(zone, cpu))
3135 goto bad;
3136 3143
3137 setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); 3144 for_each_possible_cpu(cpu) {
3145 struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
3138 3146
3139 if (percpu_pagelist_fraction) 3147 setup_pageset(pcp, zone_batchsize(zone));
3140 setup_pagelist_highmark(zone_pcp(zone, cpu),
3141 (zone->present_pages / percpu_pagelist_fraction));
3142 }
3143 3148
3144 return 0; 3149 if (percpu_pagelist_fraction)
3145bad: 3150 setup_pagelist_highmark(pcp,
3146 for_each_zone(dzone) { 3151 (zone->present_pages /
3147 if (!populated_zone(dzone)) 3152 percpu_pagelist_fraction));
3148 continue; 3153 }
3149 if (dzone == zone)
3150 break;
3151 kfree(zone_pcp(dzone, cpu));
3152 zone_pcp(dzone, cpu) = &boot_pageset[cpu];
3153 }
3154 return -ENOMEM;
3155}
3156
3157static inline void free_zone_pagesets(int cpu)
3158{
3159 struct zone *zone;
3160
3161 for_each_zone(zone) {
3162 struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
3163
3164 /* Free per_cpu_pageset if it is slab allocated */
3165 if (pset != &boot_pageset[cpu])
3166 kfree(pset);
3167 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3168 }
3169}
3170
3171static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
3172 unsigned long action,
3173 void *hcpu)
3174{
3175 int cpu = (long)hcpu;
3176 int ret = NOTIFY_OK;
3177
3178 switch (action) {
3179 case CPU_UP_PREPARE:
3180 case CPU_UP_PREPARE_FROZEN:
3181 if (process_zones(cpu))
3182 ret = NOTIFY_BAD;
3183 break;
3184 case CPU_UP_CANCELED:
3185 case CPU_UP_CANCELED_FROZEN:
3186 case CPU_DEAD:
3187 case CPU_DEAD_FROZEN:
3188 free_zone_pagesets(cpu);
3189 break;
3190 default:
3191 break;
3192 } 3154 }
3193 return ret;
3194}
3195
3196static struct notifier_block __cpuinitdata pageset_notifier =
3197 { &pageset_cpuup_callback, NULL, 0 };
3198
3199void __init setup_per_cpu_pageset(void)
3200{
3201 int err;
3202
3203 /* Initialize per_cpu_pageset for cpu 0.
3204 * A cpuup callback will do this for every cpu
3205 * as it comes online
3206 */
3207 err = process_zones(smp_processor_id());
3208 BUG_ON(err);
3209 register_cpu_notifier(&pageset_notifier);
3210} 3155}
3211 3156
3212#endif
3213
3214static noinline __init_refok 3157static noinline __init_refok
3215int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) 3158int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3216{ 3159{
@@ -3264,7 +3207,7 @@ static int __zone_pcp_update(void *data)
3264 struct per_cpu_pageset *pset; 3207 struct per_cpu_pageset *pset;
3265 struct per_cpu_pages *pcp; 3208 struct per_cpu_pages *pcp;
3266 3209
3267 pset = zone_pcp(zone, cpu); 3210 pset = per_cpu_ptr(zone->pageset, cpu);
3268 pcp = &pset->pcp; 3211 pcp = &pset->pcp;
3269 3212
3270 local_irq_save(flags); 3213 local_irq_save(flags);
@@ -3282,21 +3225,17 @@ void zone_pcp_update(struct zone *zone)
3282 3225
3283static __meminit void zone_pcp_init(struct zone *zone) 3226static __meminit void zone_pcp_init(struct zone *zone)
3284{ 3227{
3285 int cpu; 3228 /*
3286 unsigned long batch = zone_batchsize(zone); 3229 * per cpu subsystem is not up at this point. The following code
3230 * relies on the ability of the linker to provide the
3231 * offset of a (static) per cpu variable into the per cpu area.
3232 */
3233 zone->pageset = &boot_pageset;
3287 3234
3288 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3289#ifdef CONFIG_NUMA
3290 /* Early boot. Slab allocator not functional yet */
3291 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3292 setup_pageset(&boot_pageset[cpu],0);
3293#else
3294 setup_pageset(zone_pcp(zone,cpu), batch);
3295#endif
3296 }
3297 if (zone->present_pages) 3235 if (zone->present_pages)
3298 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", 3236 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
3299 zone->name, zone->present_pages, batch); 3237 zone->name, zone->present_pages,
3238 zone_batchsize(zone));
3300} 3239}
3301 3240
3302__meminit int init_currently_empty_zone(struct zone *zone, 3241__meminit int init_currently_empty_zone(struct zone *zone,
@@ -3435,6 +3374,61 @@ void __init free_bootmem_with_active_regions(int nid,
3435 } 3374 }
3436} 3375}
3437 3376
3377int __init add_from_early_node_map(struct range *range, int az,
3378 int nr_range, int nid)
3379{
3380 int i;
3381 u64 start, end;
3382
3383 /* need to go over early_node_map to find out good range for node */
3384 for_each_active_range_index_in_nid(i, nid) {
3385 start = early_node_map[i].start_pfn;
3386 end = early_node_map[i].end_pfn;
3387 nr_range = add_range(range, az, nr_range, start, end);
3388 }
3389 return nr_range;
3390}
3391
3392#ifdef CONFIG_NO_BOOTMEM
3393void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3394 u64 goal, u64 limit)
3395{
3396 int i;
3397 void *ptr;
3398
3399 /* need to go over early_node_map to find out good range for node */
3400 for_each_active_range_index_in_nid(i, nid) {
3401 u64 addr;
3402 u64 ei_start, ei_last;
3403
3404 ei_last = early_node_map[i].end_pfn;
3405 ei_last <<= PAGE_SHIFT;
3406 ei_start = early_node_map[i].start_pfn;
3407 ei_start <<= PAGE_SHIFT;
3408 addr = find_early_area(ei_start, ei_last,
3409 goal, limit, size, align);
3410
3411 if (addr == -1ULL)
3412 continue;
3413
3414#if 0
3415 printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
3416 nid,
3417 ei_start, ei_last, goal, limit, size,
3418 align, addr);
3419#endif
3420
3421 ptr = phys_to_virt(addr);
3422 memset(ptr, 0, size);
3423 reserve_early_without_check(addr, addr + size, "BOOTMEM");
3424 return ptr;
3425 }
3426
3427 return NULL;
3428}
3429#endif
3430
3431
3438void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) 3432void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3439{ 3433{
3440 int i; 3434 int i;
@@ -4467,7 +4461,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
4467} 4461}
4468 4462
4469#ifndef CONFIG_NEED_MULTIPLE_NODES 4463#ifndef CONFIG_NEED_MULTIPLE_NODES
4470struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; 4464struct pglist_data __refdata contig_page_data = {
4465#ifndef CONFIG_NO_BOOTMEM
4466 .bdata = &bootmem_node_data[0]
4467#endif
4468 };
4471EXPORT_SYMBOL(contig_page_data); 4469EXPORT_SYMBOL(contig_page_data);
4472#endif 4470#endif
4473 4471
@@ -4810,10 +4808,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4810 if (!write || (ret == -EINVAL)) 4808 if (!write || (ret == -EINVAL))
4811 return ret; 4809 return ret;
4812 for_each_populated_zone(zone) { 4810 for_each_populated_zone(zone) {
4813 for_each_online_cpu(cpu) { 4811 for_each_possible_cpu(cpu) {
4814 unsigned long high; 4812 unsigned long high;
4815 high = zone->present_pages / percpu_pagelist_fraction; 4813 high = zone->present_pages / percpu_pagelist_fraction;
4816 setup_pagelist_highmark(zone_pcp(zone, cpu), high); 4814 setup_pagelist_highmark(
4815 per_cpu_ptr(zone->pageset, cpu), high);
4817 } 4816 }
4818 } 4817 }
4819 return 0; 4818 return 0;