diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 294 |
1 files changed, 202 insertions, 92 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c3edb624fccf..327516b7aee9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -61,10 +61,14 @@ | |||
61 | #include <linux/hugetlb.h> | 61 | #include <linux/hugetlb.h> |
62 | #include <linux/sched/rt.h> | 62 | #include <linux/sched/rt.h> |
63 | 63 | ||
64 | #include <asm/sections.h> | ||
64 | #include <asm/tlbflush.h> | 65 | #include <asm/tlbflush.h> |
65 | #include <asm/div64.h> | 66 | #include <asm/div64.h> |
66 | #include "internal.h" | 67 | #include "internal.h" |
67 | 68 | ||
69 | /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ | ||
70 | static DEFINE_MUTEX(pcp_batch_high_lock); | ||
71 | |||
68 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID | 72 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID |
69 | DEFINE_PER_CPU(int, numa_node); | 73 | DEFINE_PER_CPU(int, numa_node); |
70 | EXPORT_PER_CPU_SYMBOL(numa_node); | 74 | EXPORT_PER_CPU_SYMBOL(numa_node); |
@@ -100,6 +104,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = { | |||
100 | }; | 104 | }; |
101 | EXPORT_SYMBOL(node_states); | 105 | EXPORT_SYMBOL(node_states); |
102 | 106 | ||
107 | /* Protect totalram_pages and zone->managed_pages */ | ||
108 | static DEFINE_SPINLOCK(managed_page_count_lock); | ||
109 | |||
103 | unsigned long totalram_pages __read_mostly; | 110 | unsigned long totalram_pages __read_mostly; |
104 | unsigned long totalreserve_pages __read_mostly; | 111 | unsigned long totalreserve_pages __read_mostly; |
105 | /* | 112 | /* |
@@ -739,14 +746,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
739 | local_irq_restore(flags); | 746 | local_irq_restore(flags); |
740 | } | 747 | } |
741 | 748 | ||
742 | /* | 749 | void __init __free_pages_bootmem(struct page *page, unsigned int order) |
743 | * Read access to zone->managed_pages is safe because it's unsigned long, | ||
744 | * but we still need to serialize writers. Currently all callers of | ||
745 | * __free_pages_bootmem() except put_page_bootmem() should only be used | ||
746 | * at boot time. So for shorter boot time, we shift the burden to | ||
747 | * put_page_bootmem() to serialize writers. | ||
748 | */ | ||
749 | void __meminit __free_pages_bootmem(struct page *page, unsigned int order) | ||
750 | { | 750 | { |
751 | unsigned int nr_pages = 1 << order; | 751 | unsigned int nr_pages = 1 << order; |
752 | unsigned int loop; | 752 | unsigned int loop; |
@@ -781,11 +781,7 @@ void __init init_cma_reserved_pageblock(struct page *page) | |||
781 | set_page_refcounted(page); | 781 | set_page_refcounted(page); |
782 | set_pageblock_migratetype(page, MIGRATE_CMA); | 782 | set_pageblock_migratetype(page, MIGRATE_CMA); |
783 | __free_pages(page, pageblock_order); | 783 | __free_pages(page, pageblock_order); |
784 | totalram_pages += pageblock_nr_pages; | 784 | adjust_managed_page_count(page, pageblock_nr_pages); |
785 | #ifdef CONFIG_HIGHMEM | ||
786 | if (PageHighMem(page)) | ||
787 | totalhigh_pages += pageblock_nr_pages; | ||
788 | #endif | ||
789 | } | 785 | } |
790 | #endif | 786 | #endif |
791 | 787 | ||
@@ -1179,10 +1175,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
1179 | { | 1175 | { |
1180 | unsigned long flags; | 1176 | unsigned long flags; |
1181 | int to_drain; | 1177 | int to_drain; |
1178 | unsigned long batch; | ||
1182 | 1179 | ||
1183 | local_irq_save(flags); | 1180 | local_irq_save(flags); |
1184 | if (pcp->count >= pcp->batch) | 1181 | batch = ACCESS_ONCE(pcp->batch); |
1185 | to_drain = pcp->batch; | 1182 | if (pcp->count >= batch) |
1183 | to_drain = batch; | ||
1186 | else | 1184 | else |
1187 | to_drain = pcp->count; | 1185 | to_drain = pcp->count; |
1188 | if (to_drain > 0) { | 1186 | if (to_drain > 0) { |
@@ -1350,8 +1348,9 @@ void free_hot_cold_page(struct page *page, int cold) | |||
1350 | list_add(&page->lru, &pcp->lists[migratetype]); | 1348 | list_add(&page->lru, &pcp->lists[migratetype]); |
1351 | pcp->count++; | 1349 | pcp->count++; |
1352 | if (pcp->count >= pcp->high) { | 1350 | if (pcp->count >= pcp->high) { |
1353 | free_pcppages_bulk(zone, pcp->batch, pcp); | 1351 | unsigned long batch = ACCESS_ONCE(pcp->batch); |
1354 | pcp->count -= pcp->batch; | 1352 | free_pcppages_bulk(zone, batch, pcp); |
1353 | pcp->count -= batch; | ||
1355 | } | 1354 | } |
1356 | 1355 | ||
1357 | out: | 1356 | out: |
@@ -2839,7 +2838,7 @@ EXPORT_SYMBOL(free_pages_exact); | |||
2839 | * nr_free_zone_pages() counts the number of counts pages which are beyond the | 2838 | * nr_free_zone_pages() counts the number of counts pages which are beyond the |
2840 | * high watermark within all zones at or below a given zone index. For each | 2839 | * high watermark within all zones at or below a given zone index. For each |
2841 | * zone, the number of pages is calculated as: | 2840 | * zone, the number of pages is calculated as: |
2842 | * present_pages - high_pages | 2841 | * managed_pages - high_pages |
2843 | */ | 2842 | */ |
2844 | static unsigned long nr_free_zone_pages(int offset) | 2843 | static unsigned long nr_free_zone_pages(int offset) |
2845 | { | 2844 | { |
@@ -2906,9 +2905,13 @@ EXPORT_SYMBOL(si_meminfo); | |||
2906 | #ifdef CONFIG_NUMA | 2905 | #ifdef CONFIG_NUMA |
2907 | void si_meminfo_node(struct sysinfo *val, int nid) | 2906 | void si_meminfo_node(struct sysinfo *val, int nid) |
2908 | { | 2907 | { |
2908 | int zone_type; /* needs to be signed */ | ||
2909 | unsigned long managed_pages = 0; | ||
2909 | pg_data_t *pgdat = NODE_DATA(nid); | 2910 | pg_data_t *pgdat = NODE_DATA(nid); |
2910 | 2911 | ||
2911 | val->totalram = pgdat->node_present_pages; | 2912 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) |
2913 | managed_pages += pgdat->node_zones[zone_type].managed_pages; | ||
2914 | val->totalram = managed_pages; | ||
2912 | val->freeram = node_page_state(nid, NR_FREE_PAGES); | 2915 | val->freeram = node_page_state(nid, NR_FREE_PAGES); |
2913 | #ifdef CONFIG_HIGHMEM | 2916 | #ifdef CONFIG_HIGHMEM |
2914 | val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages; | 2917 | val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages; |
@@ -3250,18 +3253,25 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
3250 | static DEFINE_MUTEX(zl_order_mutex); | 3253 | static DEFINE_MUTEX(zl_order_mutex); |
3251 | 3254 | ||
3252 | mutex_lock(&zl_order_mutex); | 3255 | mutex_lock(&zl_order_mutex); |
3253 | if (write) | 3256 | if (write) { |
3254 | strcpy(saved_string, (char*)table->data); | 3257 | if (strlen((char *)table->data) >= NUMA_ZONELIST_ORDER_LEN) { |
3258 | ret = -EINVAL; | ||
3259 | goto out; | ||
3260 | } | ||
3261 | strcpy(saved_string, (char *)table->data); | ||
3262 | } | ||
3255 | ret = proc_dostring(table, write, buffer, length, ppos); | 3263 | ret = proc_dostring(table, write, buffer, length, ppos); |
3256 | if (ret) | 3264 | if (ret) |
3257 | goto out; | 3265 | goto out; |
3258 | if (write) { | 3266 | if (write) { |
3259 | int oldval = user_zonelist_order; | 3267 | int oldval = user_zonelist_order; |
3260 | if (__parse_numa_zonelist_order((char*)table->data)) { | 3268 | |
3269 | ret = __parse_numa_zonelist_order((char *)table->data); | ||
3270 | if (ret) { | ||
3261 | /* | 3271 | /* |
3262 | * bogus value. restore saved string | 3272 | * bogus value. restore saved string |
3263 | */ | 3273 | */ |
3264 | strncpy((char*)table->data, saved_string, | 3274 | strncpy((char *)table->data, saved_string, |
3265 | NUMA_ZONELIST_ORDER_LEN); | 3275 | NUMA_ZONELIST_ORDER_LEN); |
3266 | user_zonelist_order = oldval; | 3276 | user_zonelist_order = oldval; |
3267 | } else if (oldval != user_zonelist_order) { | 3277 | } else if (oldval != user_zonelist_order) { |
@@ -3425,8 +3435,8 @@ static int default_zonelist_order(void) | |||
3425 | z = &NODE_DATA(nid)->node_zones[zone_type]; | 3435 | z = &NODE_DATA(nid)->node_zones[zone_type]; |
3426 | if (populated_zone(z)) { | 3436 | if (populated_zone(z)) { |
3427 | if (zone_type < ZONE_NORMAL) | 3437 | if (zone_type < ZONE_NORMAL) |
3428 | low_kmem_size += z->present_pages; | 3438 | low_kmem_size += z->managed_pages; |
3429 | total_size += z->present_pages; | 3439 | total_size += z->managed_pages; |
3430 | } else if (zone_type == ZONE_NORMAL) { | 3440 | } else if (zone_type == ZONE_NORMAL) { |
3431 | /* | 3441 | /* |
3432 | * If any node has only lowmem, then node order | 3442 | * If any node has only lowmem, then node order |
@@ -3705,12 +3715,12 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) | |||
3705 | mminit_verify_zonelist(); | 3715 | mminit_verify_zonelist(); |
3706 | cpuset_init_current_mems_allowed(); | 3716 | cpuset_init_current_mems_allowed(); |
3707 | } else { | 3717 | } else { |
3708 | /* we have to stop all cpus to guarantee there is no user | ||
3709 | of zonelist */ | ||
3710 | #ifdef CONFIG_MEMORY_HOTPLUG | 3718 | #ifdef CONFIG_MEMORY_HOTPLUG |
3711 | if (zone) | 3719 | if (zone) |
3712 | setup_zone_pageset(zone); | 3720 | setup_zone_pageset(zone); |
3713 | #endif | 3721 | #endif |
3722 | /* we have to stop all cpus to guarantee there is no user | ||
3723 | of zonelist */ | ||
3714 | stop_machine(__build_all_zonelists, pgdat, NULL); | 3724 | stop_machine(__build_all_zonelists, pgdat, NULL); |
3715 | /* cpuset refresh routine should be here */ | 3725 | /* cpuset refresh routine should be here */ |
3716 | } | 3726 | } |
@@ -4032,7 +4042,40 @@ static int __meminit zone_batchsize(struct zone *zone) | |||
4032 | #endif | 4042 | #endif |
4033 | } | 4043 | } |
4034 | 4044 | ||
4035 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | 4045 | /* |
4046 | * pcp->high and pcp->batch values are related and dependent on one another: | ||
4047 | * ->batch must never be higher then ->high. | ||
4048 | * The following function updates them in a safe manner without read side | ||
4049 | * locking. | ||
4050 | * | ||
4051 | * Any new users of pcp->batch and pcp->high should ensure they can cope with | ||
4052 | * those fields changing asynchronously (acording the the above rule). | ||
4053 | * | ||
4054 | * mutex_is_locked(&pcp_batch_high_lock) required when calling this function | ||
4055 | * outside of boot time (or some other assurance that no concurrent updaters | ||
4056 | * exist). | ||
4057 | */ | ||
4058 | static void pageset_update(struct per_cpu_pages *pcp, unsigned long high, | ||
4059 | unsigned long batch) | ||
4060 | { | ||
4061 | /* start with a fail safe value for batch */ | ||
4062 | pcp->batch = 1; | ||
4063 | smp_wmb(); | ||
4064 | |||
4065 | /* Update high, then batch, in order */ | ||
4066 | pcp->high = high; | ||
4067 | smp_wmb(); | ||
4068 | |||
4069 | pcp->batch = batch; | ||
4070 | } | ||
4071 | |||
4072 | /* a companion to pageset_set_high() */ | ||
4073 | static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch) | ||
4074 | { | ||
4075 | pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch)); | ||
4076 | } | ||
4077 | |||
4078 | static void pageset_init(struct per_cpu_pageset *p) | ||
4036 | { | 4079 | { |
4037 | struct per_cpu_pages *pcp; | 4080 | struct per_cpu_pages *pcp; |
4038 | int migratetype; | 4081 | int migratetype; |
@@ -4041,45 +4084,55 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | |||
4041 | 4084 | ||
4042 | pcp = &p->pcp; | 4085 | pcp = &p->pcp; |
4043 | pcp->count = 0; | 4086 | pcp->count = 0; |
4044 | pcp->high = 6 * batch; | ||
4045 | pcp->batch = max(1UL, 1 * batch); | ||
4046 | for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) | 4087 | for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) |
4047 | INIT_LIST_HEAD(&pcp->lists[migratetype]); | 4088 | INIT_LIST_HEAD(&pcp->lists[migratetype]); |
4048 | } | 4089 | } |
4049 | 4090 | ||
4091 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | ||
4092 | { | ||
4093 | pageset_init(p); | ||
4094 | pageset_set_batch(p, batch); | ||
4095 | } | ||
4096 | |||
4050 | /* | 4097 | /* |
4051 | * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist | 4098 | * pageset_set_high() sets the high water mark for hot per_cpu_pagelist |
4052 | * to the value high for the pageset p. | 4099 | * to the value high for the pageset p. |
4053 | */ | 4100 | */ |
4054 | 4101 | static void pageset_set_high(struct per_cpu_pageset *p, | |
4055 | static void setup_pagelist_highmark(struct per_cpu_pageset *p, | ||
4056 | unsigned long high) | 4102 | unsigned long high) |
4057 | { | 4103 | { |
4058 | struct per_cpu_pages *pcp; | 4104 | unsigned long batch = max(1UL, high / 4); |
4105 | if ((high / 4) > (PAGE_SHIFT * 8)) | ||
4106 | batch = PAGE_SHIFT * 8; | ||
4059 | 4107 | ||
4060 | pcp = &p->pcp; | 4108 | pageset_update(&p->pcp, high, batch); |
4061 | pcp->high = high; | ||
4062 | pcp->batch = max(1UL, high/4); | ||
4063 | if ((high/4) > (PAGE_SHIFT * 8)) | ||
4064 | pcp->batch = PAGE_SHIFT * 8; | ||
4065 | } | 4109 | } |
4066 | 4110 | ||
4067 | static void __meminit setup_zone_pageset(struct zone *zone) | 4111 | static void __meminit pageset_set_high_and_batch(struct zone *zone, |
4112 | struct per_cpu_pageset *pcp) | ||
4068 | { | 4113 | { |
4069 | int cpu; | 4114 | if (percpu_pagelist_fraction) |
4070 | 4115 | pageset_set_high(pcp, | |
4071 | zone->pageset = alloc_percpu(struct per_cpu_pageset); | 4116 | (zone->managed_pages / |
4117 | percpu_pagelist_fraction)); | ||
4118 | else | ||
4119 | pageset_set_batch(pcp, zone_batchsize(zone)); | ||
4120 | } | ||
4072 | 4121 | ||
4073 | for_each_possible_cpu(cpu) { | 4122 | static void __meminit zone_pageset_init(struct zone *zone, int cpu) |
4074 | struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); | 4123 | { |
4124 | struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); | ||
4075 | 4125 | ||
4076 | setup_pageset(pcp, zone_batchsize(zone)); | 4126 | pageset_init(pcp); |
4127 | pageset_set_high_and_batch(zone, pcp); | ||
4128 | } | ||
4077 | 4129 | ||
4078 | if (percpu_pagelist_fraction) | 4130 | static void __meminit setup_zone_pageset(struct zone *zone) |
4079 | setup_pagelist_highmark(pcp, | 4131 | { |
4080 | (zone->managed_pages / | 4132 | int cpu; |
4081 | percpu_pagelist_fraction)); | 4133 | zone->pageset = alloc_percpu(struct per_cpu_pageset); |
4082 | } | 4134 | for_each_possible_cpu(cpu) |
4135 | zone_pageset_init(zone, cpu); | ||
4083 | } | 4136 | } |
4084 | 4137 | ||
4085 | /* | 4138 | /* |
@@ -5150,35 +5203,101 @@ early_param("movablecore", cmdline_parse_movablecore); | |||
5150 | 5203 | ||
5151 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 5204 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
5152 | 5205 | ||
5153 | unsigned long free_reserved_area(unsigned long start, unsigned long end, | 5206 | void adjust_managed_page_count(struct page *page, long count) |
5154 | int poison, char *s) | 5207 | { |
5208 | spin_lock(&managed_page_count_lock); | ||
5209 | page_zone(page)->managed_pages += count; | ||
5210 | totalram_pages += count; | ||
5211 | #ifdef CONFIG_HIGHMEM | ||
5212 | if (PageHighMem(page)) | ||
5213 | totalhigh_pages += count; | ||
5214 | #endif | ||
5215 | spin_unlock(&managed_page_count_lock); | ||
5216 | } | ||
5217 | EXPORT_SYMBOL(adjust_managed_page_count); | ||
5218 | |||
5219 | unsigned long free_reserved_area(void *start, void *end, int poison, char *s) | ||
5155 | { | 5220 | { |
5156 | unsigned long pages, pos; | 5221 | void *pos; |
5222 | unsigned long pages = 0; | ||
5157 | 5223 | ||
5158 | pos = start = PAGE_ALIGN(start); | 5224 | start = (void *)PAGE_ALIGN((unsigned long)start); |
5159 | end &= PAGE_MASK; | 5225 | end = (void *)((unsigned long)end & PAGE_MASK); |
5160 | for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { | 5226 | for (pos = start; pos < end; pos += PAGE_SIZE, pages++) { |
5161 | if (poison) | 5227 | if ((unsigned int)poison <= 0xFF) |
5162 | memset((void *)pos, poison, PAGE_SIZE); | 5228 | memset(pos, poison, PAGE_SIZE); |
5163 | free_reserved_page(virt_to_page((void *)pos)); | 5229 | free_reserved_page(virt_to_page(pos)); |
5164 | } | 5230 | } |
5165 | 5231 | ||
5166 | if (pages && s) | 5232 | if (pages && s) |
5167 | pr_info("Freeing %s memory: %ldK (%lx - %lx)\n", | 5233 | pr_info("Freeing %s memory: %ldK (%p - %p)\n", |
5168 | s, pages << (PAGE_SHIFT - 10), start, end); | 5234 | s, pages << (PAGE_SHIFT - 10), start, end); |
5169 | 5235 | ||
5170 | return pages; | 5236 | return pages; |
5171 | } | 5237 | } |
5238 | EXPORT_SYMBOL(free_reserved_area); | ||
5172 | 5239 | ||
5173 | #ifdef CONFIG_HIGHMEM | 5240 | #ifdef CONFIG_HIGHMEM |
5174 | void free_highmem_page(struct page *page) | 5241 | void free_highmem_page(struct page *page) |
5175 | { | 5242 | { |
5176 | __free_reserved_page(page); | 5243 | __free_reserved_page(page); |
5177 | totalram_pages++; | 5244 | totalram_pages++; |
5245 | page_zone(page)->managed_pages++; | ||
5178 | totalhigh_pages++; | 5246 | totalhigh_pages++; |
5179 | } | 5247 | } |
5180 | #endif | 5248 | #endif |
5181 | 5249 | ||
5250 | |||
5251 | void __init mem_init_print_info(const char *str) | ||
5252 | { | ||
5253 | unsigned long physpages, codesize, datasize, rosize, bss_size; | ||
5254 | unsigned long init_code_size, init_data_size; | ||
5255 | |||
5256 | physpages = get_num_physpages(); | ||
5257 | codesize = _etext - _stext; | ||
5258 | datasize = _edata - _sdata; | ||
5259 | rosize = __end_rodata - __start_rodata; | ||
5260 | bss_size = __bss_stop - __bss_start; | ||
5261 | init_data_size = __init_end - __init_begin; | ||
5262 | init_code_size = _einittext - _sinittext; | ||
5263 | |||
5264 | /* | ||
5265 | * Detect special cases and adjust section sizes accordingly: | ||
5266 | * 1) .init.* may be embedded into .data sections | ||
5267 | * 2) .init.text.* may be out of [__init_begin, __init_end], | ||
5268 | * please refer to arch/tile/kernel/vmlinux.lds.S. | ||
5269 | * 3) .rodata.* may be embedded into .text or .data sections. | ||
5270 | */ | ||
5271 | #define adj_init_size(start, end, size, pos, adj) \ | ||
5272 | if (start <= pos && pos < end && size > adj) \ | ||
5273 | size -= adj; | ||
5274 | |||
5275 | adj_init_size(__init_begin, __init_end, init_data_size, | ||
5276 | _sinittext, init_code_size); | ||
5277 | adj_init_size(_stext, _etext, codesize, _sinittext, init_code_size); | ||
5278 | adj_init_size(_sdata, _edata, datasize, __init_begin, init_data_size); | ||
5279 | adj_init_size(_stext, _etext, codesize, __start_rodata, rosize); | ||
5280 | adj_init_size(_sdata, _edata, datasize, __start_rodata, rosize); | ||
5281 | |||
5282 | #undef adj_init_size | ||
5283 | |||
5284 | printk("Memory: %luK/%luK available " | ||
5285 | "(%luK kernel code, %luK rwdata, %luK rodata, " | ||
5286 | "%luK init, %luK bss, %luK reserved" | ||
5287 | #ifdef CONFIG_HIGHMEM | ||
5288 | ", %luK highmem" | ||
5289 | #endif | ||
5290 | "%s%s)\n", | ||
5291 | nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10), | ||
5292 | codesize >> 10, datasize >> 10, rosize >> 10, | ||
5293 | (init_data_size + init_code_size) >> 10, bss_size >> 10, | ||
5294 | (physpages - totalram_pages) << (PAGE_SHIFT-10), | ||
5295 | #ifdef CONFIG_HIGHMEM | ||
5296 | totalhigh_pages << (PAGE_SHIFT-10), | ||
5297 | #endif | ||
5298 | str ? ", " : "", str ? str : ""); | ||
5299 | } | ||
5300 | |||
5182 | /** | 5301 | /** |
5183 | * set_dma_reserve - set the specified number of pages reserved in the first zone | 5302 | * set_dma_reserve - set the specified number of pages reserved in the first zone |
5184 | * @new_dma_reserve: The number of pages to mark reserved | 5303 | * @new_dma_reserve: The number of pages to mark reserved |
@@ -5540,7 +5659,6 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | |||
5540 | * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist | 5659 | * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist |
5541 | * can have before it gets flushed back to buddy allocator. | 5660 | * can have before it gets flushed back to buddy allocator. |
5542 | */ | 5661 | */ |
5543 | |||
5544 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | 5662 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, |
5545 | void __user *buffer, size_t *length, loff_t *ppos) | 5663 | void __user *buffer, size_t *length, loff_t *ppos) |
5546 | { | 5664 | { |
@@ -5551,14 +5669,16 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | |||
5551 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); | 5669 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); |
5552 | if (!write || (ret < 0)) | 5670 | if (!write || (ret < 0)) |
5553 | return ret; | 5671 | return ret; |
5672 | |||
5673 | mutex_lock(&pcp_batch_high_lock); | ||
5554 | for_each_populated_zone(zone) { | 5674 | for_each_populated_zone(zone) { |
5555 | for_each_possible_cpu(cpu) { | 5675 | unsigned long high; |
5556 | unsigned long high; | 5676 | high = zone->managed_pages / percpu_pagelist_fraction; |
5557 | high = zone->managed_pages / percpu_pagelist_fraction; | 5677 | for_each_possible_cpu(cpu) |
5558 | setup_pagelist_highmark( | 5678 | pageset_set_high(per_cpu_ptr(zone->pageset, cpu), |
5559 | per_cpu_ptr(zone->pageset, cpu), high); | 5679 | high); |
5560 | } | ||
5561 | } | 5680 | } |
5681 | mutex_unlock(&pcp_batch_high_lock); | ||
5562 | return 0; | 5682 | return 0; |
5563 | } | 5683 | } |
5564 | 5684 | ||
@@ -6047,32 +6167,18 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages) | |||
6047 | #endif | 6167 | #endif |
6048 | 6168 | ||
6049 | #ifdef CONFIG_MEMORY_HOTPLUG | 6169 | #ifdef CONFIG_MEMORY_HOTPLUG |
6050 | static int __meminit __zone_pcp_update(void *data) | 6170 | /* |
6051 | { | 6171 | * The zone indicated has a new number of managed_pages; batch sizes and percpu |
6052 | struct zone *zone = data; | 6172 | * page high values need to be recalulated. |
6053 | int cpu; | 6173 | */ |
6054 | unsigned long batch = zone_batchsize(zone), flags; | ||
6055 | |||
6056 | for_each_possible_cpu(cpu) { | ||
6057 | struct per_cpu_pageset *pset; | ||
6058 | struct per_cpu_pages *pcp; | ||
6059 | |||
6060 | pset = per_cpu_ptr(zone->pageset, cpu); | ||
6061 | pcp = &pset->pcp; | ||
6062 | |||
6063 | local_irq_save(flags); | ||
6064 | if (pcp->count > 0) | ||
6065 | free_pcppages_bulk(zone, pcp->count, pcp); | ||
6066 | drain_zonestat(zone, pset); | ||
6067 | setup_pageset(pset, batch); | ||
6068 | local_irq_restore(flags); | ||
6069 | } | ||
6070 | return 0; | ||
6071 | } | ||
6072 | |||
6073 | void __meminit zone_pcp_update(struct zone *zone) | 6174 | void __meminit zone_pcp_update(struct zone *zone) |
6074 | { | 6175 | { |
6075 | stop_machine(__zone_pcp_update, zone, NULL); | 6176 | unsigned cpu; |
6177 | mutex_lock(&pcp_batch_high_lock); | ||
6178 | for_each_possible_cpu(cpu) | ||
6179 | pageset_set_high_and_batch(zone, | ||
6180 | per_cpu_ptr(zone->pageset, cpu)); | ||
6181 | mutex_unlock(&pcp_batch_high_lock); | ||
6076 | } | 6182 | } |
6077 | #endif | 6183 | #endif |
6078 | 6184 | ||
@@ -6142,6 +6248,10 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | |||
6142 | list_del(&page->lru); | 6248 | list_del(&page->lru); |
6143 | rmv_page_order(page); | 6249 | rmv_page_order(page); |
6144 | zone->free_area[order].nr_free--; | 6250 | zone->free_area[order].nr_free--; |
6251 | #ifdef CONFIG_HIGHMEM | ||
6252 | if (PageHighMem(page)) | ||
6253 | totalhigh_pages -= 1 << order; | ||
6254 | #endif | ||
6145 | for (i = 0; i < (1 << order); i++) | 6255 | for (i = 0; i < (1 << order); i++) |
6146 | SetPageReserved((page+i)); | 6256 | SetPageReserved((page+i)); |
6147 | pfn += (1 << order); | 6257 | pfn += (1 << order); |