diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 202 |
1 files changed, 71 insertions, 131 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1fa93bd2bb9f..a6b17aa4740b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1009,10 +1009,10 @@ static void drain_pages(unsigned int cpu) | |||
1009 | struct per_cpu_pageset *pset; | 1009 | struct per_cpu_pageset *pset; |
1010 | struct per_cpu_pages *pcp; | 1010 | struct per_cpu_pages *pcp; |
1011 | 1011 | ||
1012 | pset = zone_pcp(zone, cpu); | 1012 | local_irq_save(flags); |
1013 | pset = per_cpu_ptr(zone->pageset, cpu); | ||
1013 | 1014 | ||
1014 | pcp = &pset->pcp; | 1015 | pcp = &pset->pcp; |
1015 | local_irq_save(flags); | ||
1016 | free_pcppages_bulk(zone, pcp->count, pcp); | 1016 | free_pcppages_bulk(zone, pcp->count, pcp); |
1017 | pcp->count = 0; | 1017 | pcp->count = 0; |
1018 | local_irq_restore(flags); | 1018 | local_irq_restore(flags); |
@@ -1096,7 +1096,6 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1096 | arch_free_page(page, 0); | 1096 | arch_free_page(page, 0); |
1097 | kernel_map_pages(page, 1, 0); | 1097 | kernel_map_pages(page, 1, 0); |
1098 | 1098 | ||
1099 | pcp = &zone_pcp(zone, get_cpu())->pcp; | ||
1100 | migratetype = get_pageblock_migratetype(page); | 1099 | migratetype = get_pageblock_migratetype(page); |
1101 | set_page_private(page, migratetype); | 1100 | set_page_private(page, migratetype); |
1102 | local_irq_save(flags); | 1101 | local_irq_save(flags); |
@@ -1119,6 +1118,7 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1119 | migratetype = MIGRATE_MOVABLE; | 1118 | migratetype = MIGRATE_MOVABLE; |
1120 | } | 1119 | } |
1121 | 1120 | ||
1121 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | ||
1122 | if (cold) | 1122 | if (cold) |
1123 | list_add_tail(&page->lru, &pcp->lists[migratetype]); | 1123 | list_add_tail(&page->lru, &pcp->lists[migratetype]); |
1124 | else | 1124 | else |
@@ -1131,7 +1131,6 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1131 | 1131 | ||
1132 | out: | 1132 | out: |
1133 | local_irq_restore(flags); | 1133 | local_irq_restore(flags); |
1134 | put_cpu(); | ||
1135 | } | 1134 | } |
1136 | 1135 | ||
1137 | void free_hot_page(struct page *page) | 1136 | void free_hot_page(struct page *page) |
@@ -1181,17 +1180,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |||
1181 | unsigned long flags; | 1180 | unsigned long flags; |
1182 | struct page *page; | 1181 | struct page *page; |
1183 | int cold = !!(gfp_flags & __GFP_COLD); | 1182 | int cold = !!(gfp_flags & __GFP_COLD); |
1184 | int cpu; | ||
1185 | 1183 | ||
1186 | again: | 1184 | again: |
1187 | cpu = get_cpu(); | ||
1188 | if (likely(order == 0)) { | 1185 | if (likely(order == 0)) { |
1189 | struct per_cpu_pages *pcp; | 1186 | struct per_cpu_pages *pcp; |
1190 | struct list_head *list; | 1187 | struct list_head *list; |
1191 | 1188 | ||
1192 | pcp = &zone_pcp(zone, cpu)->pcp; | ||
1193 | list = &pcp->lists[migratetype]; | ||
1194 | local_irq_save(flags); | 1189 | local_irq_save(flags); |
1190 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | ||
1191 | list = &pcp->lists[migratetype]; | ||
1195 | if (list_empty(list)) { | 1192 | if (list_empty(list)) { |
1196 | pcp->count += rmqueue_bulk(zone, 0, | 1193 | pcp->count += rmqueue_bulk(zone, 0, |
1197 | pcp->batch, list, | 1194 | pcp->batch, list, |
@@ -1232,7 +1229,6 @@ again: | |||
1232 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1229 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1233 | zone_statistics(preferred_zone, zone); | 1230 | zone_statistics(preferred_zone, zone); |
1234 | local_irq_restore(flags); | 1231 | local_irq_restore(flags); |
1235 | put_cpu(); | ||
1236 | 1232 | ||
1237 | VM_BUG_ON(bad_range(zone, page)); | 1233 | VM_BUG_ON(bad_range(zone, page)); |
1238 | if (prep_new_page(page, order, gfp_flags)) | 1234 | if (prep_new_page(page, order, gfp_flags)) |
@@ -1241,7 +1237,6 @@ again: | |||
1241 | 1237 | ||
1242 | failed: | 1238 | failed: |
1243 | local_irq_restore(flags); | 1239 | local_irq_restore(flags); |
1244 | put_cpu(); | ||
1245 | return NULL; | 1240 | return NULL; |
1246 | } | 1241 | } |
1247 | 1242 | ||
@@ -2180,7 +2175,7 @@ void show_free_areas(void) | |||
2180 | for_each_online_cpu(cpu) { | 2175 | for_each_online_cpu(cpu) { |
2181 | struct per_cpu_pageset *pageset; | 2176 | struct per_cpu_pageset *pageset; |
2182 | 2177 | ||
2183 | pageset = zone_pcp(zone, cpu); | 2178 | pageset = per_cpu_ptr(zone->pageset, cpu); |
2184 | 2179 | ||
2185 | printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", | 2180 | printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", |
2186 | cpu, pageset->pcp.high, | 2181 | cpu, pageset->pcp.high, |
@@ -2745,10 +2740,29 @@ static void build_zonelist_cache(pg_data_t *pgdat) | |||
2745 | 2740 | ||
2746 | #endif /* CONFIG_NUMA */ | 2741 | #endif /* CONFIG_NUMA */ |
2747 | 2742 | ||
2743 | /* | ||
2744 | * Boot pageset table. One per cpu which is going to be used for all | ||
2745 | * zones and all nodes. The parameters will be set in such a way | ||
2746 | * that an item put on a list will immediately be handed over to | ||
2747 | * the buddy list. This is safe since pageset manipulation is done | ||
2748 | * with interrupts disabled. | ||
2749 | * | ||
2750 | * The boot_pagesets must be kept even after bootup is complete for | ||
2751 | * unused processors and/or zones. They do play a role for bootstrapping | ||
2752 | * hotplugged processors. | ||
2753 | * | ||
2754 | * zoneinfo_show() and maybe other functions do | ||
2755 | * not check if the processor is online before following the pageset pointer. | ||
2756 | * Other parts of the kernel may not check if the zone is available. | ||
2757 | */ | ||
2758 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch); | ||
2759 | static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); | ||
2760 | |||
2748 | /* return values int ....just for stop_machine() */ | 2761 | /* return values int ....just for stop_machine() */ |
2749 | static int __build_all_zonelists(void *dummy) | 2762 | static int __build_all_zonelists(void *dummy) |
2750 | { | 2763 | { |
2751 | int nid; | 2764 | int nid; |
2765 | int cpu; | ||
2752 | 2766 | ||
2753 | #ifdef CONFIG_NUMA | 2767 | #ifdef CONFIG_NUMA |
2754 | memset(node_load, 0, sizeof(node_load)); | 2768 | memset(node_load, 0, sizeof(node_load)); |
@@ -2759,6 +2773,23 @@ static int __build_all_zonelists(void *dummy) | |||
2759 | build_zonelists(pgdat); | 2773 | build_zonelists(pgdat); |
2760 | build_zonelist_cache(pgdat); | 2774 | build_zonelist_cache(pgdat); |
2761 | } | 2775 | } |
2776 | |||
2777 | /* | ||
2778 | * Initialize the boot_pagesets that are going to be used | ||
2779 | * for bootstrapping processors. The real pagesets for | ||
2780 | * each zone will be allocated later when the per cpu | ||
2781 | * allocator is available. | ||
2782 | * | ||
2783 | * boot_pagesets are used also for bootstrapping offline | ||
2784 | * cpus if the system is already booted because the pagesets | ||
2785 | * are needed to initialize allocators on a specific cpu too. | ||
2786 | * F.e. the percpu allocator needs the page allocator which | ||
2787 | * needs the percpu allocator in order to allocate its pagesets | ||
2788 | * (a chicken-egg dilemma). | ||
2789 | */ | ||
2790 | for_each_possible_cpu(cpu) | ||
2791 | setup_pageset(&per_cpu(boot_pageset, cpu), 0); | ||
2792 | |||
2762 | return 0; | 2793 | return 0; |
2763 | } | 2794 | } |
2764 | 2795 | ||
@@ -3096,121 +3127,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, | |||
3096 | pcp->batch = PAGE_SHIFT * 8; | 3127 | pcp->batch = PAGE_SHIFT * 8; |
3097 | } | 3128 | } |
3098 | 3129 | ||
3099 | |||
3100 | #ifdef CONFIG_NUMA | ||
3101 | /* | ||
3102 | * Boot pageset table. One per cpu which is going to be used for all | ||
3103 | * zones and all nodes. The parameters will be set in such a way | ||
3104 | * that an item put on a list will immediately be handed over to | ||
3105 | * the buddy list. This is safe since pageset manipulation is done | ||
3106 | * with interrupts disabled. | ||
3107 | * | ||
3108 | * Some NUMA counter updates may also be caught by the boot pagesets. | ||
3109 | * | ||
3110 | * The boot_pagesets must be kept even after bootup is complete for | ||
3111 | * unused processors and/or zones. They do play a role for bootstrapping | ||
3112 | * hotplugged processors. | ||
3113 | * | ||
3114 | * zoneinfo_show() and maybe other functions do | ||
3115 | * not check if the processor is online before following the pageset pointer. | ||
3116 | * Other parts of the kernel may not check if the zone is available. | ||
3117 | */ | ||
3118 | static struct per_cpu_pageset boot_pageset[NR_CPUS]; | ||
3119 | |||
3120 | /* | 3130 | /* |
3121 | * Dynamically allocate memory for the | 3131 | * Allocate per cpu pagesets and initialize them. |
3122 | * per cpu pageset array in struct zone. | 3132 | * Before this call only boot pagesets were available. |
3133 | * Boot pagesets will no longer be used by this processorr | ||
3134 | * after setup_per_cpu_pageset(). | ||
3123 | */ | 3135 | */ |
3124 | static int __cpuinit process_zones(int cpu) | 3136 | void __init setup_per_cpu_pageset(void) |
3125 | { | 3137 | { |
3126 | struct zone *zone, *dzone; | 3138 | struct zone *zone; |
3127 | int node = cpu_to_node(cpu); | 3139 | int cpu; |
3128 | |||
3129 | node_set_state(node, N_CPU); /* this node has a cpu */ | ||
3130 | 3140 | ||
3131 | for_each_populated_zone(zone) { | 3141 | for_each_populated_zone(zone) { |
3132 | zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), | 3142 | zone->pageset = alloc_percpu(struct per_cpu_pageset); |
3133 | GFP_KERNEL, node); | ||
3134 | if (!zone_pcp(zone, cpu)) | ||
3135 | goto bad; | ||
3136 | |||
3137 | setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); | ||
3138 | |||
3139 | if (percpu_pagelist_fraction) | ||
3140 | setup_pagelist_highmark(zone_pcp(zone, cpu), | ||
3141 | (zone->present_pages / percpu_pagelist_fraction)); | ||
3142 | } | ||
3143 | |||
3144 | return 0; | ||
3145 | bad: | ||
3146 | for_each_zone(dzone) { | ||
3147 | if (!populated_zone(dzone)) | ||
3148 | continue; | ||
3149 | if (dzone == zone) | ||
3150 | break; | ||
3151 | kfree(zone_pcp(dzone, cpu)); | ||
3152 | zone_pcp(dzone, cpu) = &boot_pageset[cpu]; | ||
3153 | } | ||
3154 | return -ENOMEM; | ||
3155 | } | ||
3156 | 3143 | ||
3157 | static inline void free_zone_pagesets(int cpu) | 3144 | for_each_possible_cpu(cpu) { |
3158 | { | 3145 | struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); |
3159 | struct zone *zone; | ||
3160 | |||
3161 | for_each_zone(zone) { | ||
3162 | struct per_cpu_pageset *pset = zone_pcp(zone, cpu); | ||
3163 | 3146 | ||
3164 | /* Free per_cpu_pageset if it is slab allocated */ | 3147 | setup_pageset(pcp, zone_batchsize(zone)); |
3165 | if (pset != &boot_pageset[cpu]) | ||
3166 | kfree(pset); | ||
3167 | zone_pcp(zone, cpu) = &boot_pageset[cpu]; | ||
3168 | } | ||
3169 | } | ||
3170 | 3148 | ||
3171 | static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, | 3149 | if (percpu_pagelist_fraction) |
3172 | unsigned long action, | 3150 | setup_pagelist_highmark(pcp, |
3173 | void *hcpu) | 3151 | (zone->present_pages / |
3174 | { | 3152 | percpu_pagelist_fraction)); |
3175 | int cpu = (long)hcpu; | 3153 | } |
3176 | int ret = NOTIFY_OK; | ||
3177 | |||
3178 | switch (action) { | ||
3179 | case CPU_UP_PREPARE: | ||
3180 | case CPU_UP_PREPARE_FROZEN: | ||
3181 | if (process_zones(cpu)) | ||
3182 | ret = NOTIFY_BAD; | ||
3183 | break; | ||
3184 | case CPU_UP_CANCELED: | ||
3185 | case CPU_UP_CANCELED_FROZEN: | ||
3186 | case CPU_DEAD: | ||
3187 | case CPU_DEAD_FROZEN: | ||
3188 | free_zone_pagesets(cpu); | ||
3189 | break; | ||
3190 | default: | ||
3191 | break; | ||
3192 | } | 3154 | } |
3193 | return ret; | ||
3194 | } | 3155 | } |
3195 | 3156 | ||
3196 | static struct notifier_block __cpuinitdata pageset_notifier = | ||
3197 | { &pageset_cpuup_callback, NULL, 0 }; | ||
3198 | |||
3199 | void __init setup_per_cpu_pageset(void) | ||
3200 | { | ||
3201 | int err; | ||
3202 | |||
3203 | /* Initialize per_cpu_pageset for cpu 0. | ||
3204 | * A cpuup callback will do this for every cpu | ||
3205 | * as it comes online | ||
3206 | */ | ||
3207 | err = process_zones(smp_processor_id()); | ||
3208 | BUG_ON(err); | ||
3209 | register_cpu_notifier(&pageset_notifier); | ||
3210 | } | ||
3211 | |||
3212 | #endif | ||
3213 | |||
3214 | static noinline __init_refok | 3157 | static noinline __init_refok |
3215 | int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | 3158 | int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) |
3216 | { | 3159 | { |
@@ -3264,7 +3207,7 @@ static int __zone_pcp_update(void *data) | |||
3264 | struct per_cpu_pageset *pset; | 3207 | struct per_cpu_pageset *pset; |
3265 | struct per_cpu_pages *pcp; | 3208 | struct per_cpu_pages *pcp; |
3266 | 3209 | ||
3267 | pset = zone_pcp(zone, cpu); | 3210 | pset = per_cpu_ptr(zone->pageset, cpu); |
3268 | pcp = &pset->pcp; | 3211 | pcp = &pset->pcp; |
3269 | 3212 | ||
3270 | local_irq_save(flags); | 3213 | local_irq_save(flags); |
@@ -3282,21 +3225,17 @@ void zone_pcp_update(struct zone *zone) | |||
3282 | 3225 | ||
3283 | static __meminit void zone_pcp_init(struct zone *zone) | 3226 | static __meminit void zone_pcp_init(struct zone *zone) |
3284 | { | 3227 | { |
3285 | int cpu; | 3228 | /* |
3286 | unsigned long batch = zone_batchsize(zone); | 3229 | * per cpu subsystem is not up at this point. The following code |
3230 | * relies on the ability of the linker to provide the | ||
3231 | * offset of a (static) per cpu variable into the per cpu area. | ||
3232 | */ | ||
3233 | zone->pageset = &boot_pageset; | ||
3287 | 3234 | ||
3288 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
3289 | #ifdef CONFIG_NUMA | ||
3290 | /* Early boot. Slab allocator not functional yet */ | ||
3291 | zone_pcp(zone, cpu) = &boot_pageset[cpu]; | ||
3292 | setup_pageset(&boot_pageset[cpu],0); | ||
3293 | #else | ||
3294 | setup_pageset(zone_pcp(zone,cpu), batch); | ||
3295 | #endif | ||
3296 | } | ||
3297 | if (zone->present_pages) | 3235 | if (zone->present_pages) |
3298 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", | 3236 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n", |
3299 | zone->name, zone->present_pages, batch); | 3237 | zone->name, zone->present_pages, |
3238 | zone_batchsize(zone)); | ||
3300 | } | 3239 | } |
3301 | 3240 | ||
3302 | __meminit int init_currently_empty_zone(struct zone *zone, | 3241 | __meminit int init_currently_empty_zone(struct zone *zone, |
@@ -4869,10 +4808,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | |||
4869 | if (!write || (ret == -EINVAL)) | 4808 | if (!write || (ret == -EINVAL)) |
4870 | return ret; | 4809 | return ret; |
4871 | for_each_populated_zone(zone) { | 4810 | for_each_populated_zone(zone) { |
4872 | for_each_online_cpu(cpu) { | 4811 | for_each_possible_cpu(cpu) { |
4873 | unsigned long high; | 4812 | unsigned long high; |
4874 | high = zone->present_pages / percpu_pagelist_fraction; | 4813 | high = zone->present_pages / percpu_pagelist_fraction; |
4875 | setup_pagelist_highmark(zone_pcp(zone, cpu), high); | 4814 | setup_pagelist_highmark( |
4815 | per_cpu_ptr(zone->pageset, cpu), high); | ||
4876 | } | 4816 | } |
4877 | } | 4817 | } |
4878 | return 0; | 4818 | return 0; |