aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 16:11:15 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 16:11:15 -0500
commitf6e858a00af788bab0fd4c0b7f5cd788000edc18 (patch)
treef9403ca3671be9821dbf83e726e61dbe75fbca6b /mm/page_alloc.c
parent193c0d682525987db59ac3a24531a77e4947aa95 (diff)
parent98870901cce098bbe94d90d2c41d8d1fa8d94392 (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc VM changes from Andrew Morton: "The rest of most-of-MM. The other MM bits await a slab merge. This patch includes the addition of a huge zero_page. Not a performance boost but it an save large amounts of physical memory in some situations. Also a bunch of Fujitsu engineers are working on memory hotplug. Which, as it turns out, was badly broken. About half of their patches are included here; the remainder are 3.8 material." However, this merge disables CONFIG_MOVABLE_NODE, which was totally broken. We don't add new features with "default y", nor do we add Kconfig questions that are incomprehensible to most people without any help text. Does the feature even make sense without compaction or memory hotplug? * akpm: (54 commits) mm/bootmem.c: remove unused wrapper function reserve_bootmem_generic() mm/memory.c: remove unused code from do_wp_page() asm-generic, mm: pgtable: consolidate zero page helpers mm/hugetlb.c: fix warning on freeing hwpoisoned hugepage hwpoison, hugetlbfs: fix RSS-counter warning hwpoison, hugetlbfs: fix "bad pmd" warning in unmapping hwpoisoned hugepage mm: protect against concurrent vma expansion memcg: do not check for mm in __mem_cgroup_count_vm_event tmpfs: support SEEK_DATA and SEEK_HOLE (reprise) mm: provide more accurate estimation of pages occupied by memmap fs/buffer.c: remove redundant initialization in alloc_page_buffers() fs/buffer.c: do not inline exported function writeback: fix a typo in comment mm: introduce new field "managed_pages" to struct zone mm, oom: remove statically defined arch functions of same name mm, oom: remove redundant sleep in pagefault oom handler mm, oom: cleanup pagefault oom handler memory_hotplug: allow online/offline memory to result movable node numa: add CONFIG_MOVABLE_NODE for movable-dedicated node mm, memcg: avoid unnecessary function call when memcg is disabled ...
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c115
1 files changed, 80 insertions, 35 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eee3efa58c91..83637dfba110 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -90,6 +90,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
90#ifdef CONFIG_HIGHMEM 90#ifdef CONFIG_HIGHMEM
91 [N_HIGH_MEMORY] = { { [0] = 1UL } }, 91 [N_HIGH_MEMORY] = { { [0] = 1UL } },
92#endif 92#endif
93#ifdef CONFIG_MOVABLE_NODE
94 [N_MEMORY] = { { [0] = 1UL } },
95#endif
93 [N_CPU] = { { [0] = 1UL } }, 96 [N_CPU] = { { [0] = 1UL } },
94#endif /* NUMA */ 97#endif /* NUMA */
95}; 98};
@@ -732,6 +735,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
732 local_irq_restore(flags); 735 local_irq_restore(flags);
733} 736}
734 737
738/*
739 * Read access to zone->managed_pages is safe because it's unsigned long,
740 * but we still need to serialize writers. Currently all callers of
741 * __free_pages_bootmem() except put_page_bootmem() should only be used
742 * at boot time. So for shorter boot time, we shift the burden to
743 * put_page_bootmem() to serialize writers.
744 */
735void __meminit __free_pages_bootmem(struct page *page, unsigned int order) 745void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
736{ 746{
737 unsigned int nr_pages = 1 << order; 747 unsigned int nr_pages = 1 << order;
@@ -747,6 +757,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
747 set_page_count(p, 0); 757 set_page_count(p, 0);
748 } 758 }
749 759
760 page_zone(page)->managed_pages += 1 << order;
750 set_page_refcounted(page); 761 set_page_refcounted(page);
751 __free_pages(page, order); 762 __free_pages(page, order);
752} 763}
@@ -1695,7 +1706,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
1695 * 1706 *
1696 * If the zonelist cache is present in the passed in zonelist, then 1707 * If the zonelist cache is present in the passed in zonelist, then
1697 * returns a pointer to the allowed node mask (either the current 1708 * returns a pointer to the allowed node mask (either the current
1698 * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) 1709 * tasks mems_allowed, or node_states[N_MEMORY].)
1699 * 1710 *
1700 * If the zonelist cache is not available for this zonelist, does 1711 * If the zonelist cache is not available for this zonelist, does
1701 * nothing and returns NULL. 1712 * nothing and returns NULL.
@@ -1724,7 +1735,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
1724 1735
1725 allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? 1736 allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
1726 &cpuset_current_mems_allowed : 1737 &cpuset_current_mems_allowed :
1727 &node_states[N_HIGH_MEMORY]; 1738 &node_states[N_MEMORY];
1728 return allowednodes; 1739 return allowednodes;
1729} 1740}
1730 1741
@@ -2981,6 +2992,7 @@ void show_free_areas(unsigned int filter)
2981 " isolated(anon):%lukB" 2992 " isolated(anon):%lukB"
2982 " isolated(file):%lukB" 2993 " isolated(file):%lukB"
2983 " present:%lukB" 2994 " present:%lukB"
2995 " managed:%lukB"
2984 " mlocked:%lukB" 2996 " mlocked:%lukB"
2985 " dirty:%lukB" 2997 " dirty:%lukB"
2986 " writeback:%lukB" 2998 " writeback:%lukB"
@@ -3010,6 +3022,7 @@ void show_free_areas(unsigned int filter)
3010 K(zone_page_state(zone, NR_ISOLATED_ANON)), 3022 K(zone_page_state(zone, NR_ISOLATED_ANON)),
3011 K(zone_page_state(zone, NR_ISOLATED_FILE)), 3023 K(zone_page_state(zone, NR_ISOLATED_FILE)),
3012 K(zone->present_pages), 3024 K(zone->present_pages),
3025 K(zone->managed_pages),
3013 K(zone_page_state(zone, NR_MLOCK)), 3026 K(zone_page_state(zone, NR_MLOCK)),
3014 K(zone_page_state(zone, NR_FILE_DIRTY)), 3027 K(zone_page_state(zone, NR_FILE_DIRTY)),
3015 K(zone_page_state(zone, NR_WRITEBACK)), 3028 K(zone_page_state(zone, NR_WRITEBACK)),
@@ -3238,7 +3251,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
3238 return node; 3251 return node;
3239 } 3252 }
3240 3253
3241 for_each_node_state(n, N_HIGH_MEMORY) { 3254 for_each_node_state(n, N_MEMORY) {
3242 3255
3243 /* Don't want a node to appear more than once */ 3256 /* Don't want a node to appear more than once */
3244 if (node_isset(n, *used_node_mask)) 3257 if (node_isset(n, *used_node_mask))
@@ -3380,7 +3393,7 @@ static int default_zonelist_order(void)
3380 * local memory, NODE_ORDER may be suitable. 3393 * local memory, NODE_ORDER may be suitable.
3381 */ 3394 */
3382 average_size = total_size / 3395 average_size = total_size /
3383 (nodes_weight(node_states[N_HIGH_MEMORY]) + 1); 3396 (nodes_weight(node_states[N_MEMORY]) + 1);
3384 for_each_online_node(nid) { 3397 for_each_online_node(nid) {
3385 low_kmem_size = 0; 3398 low_kmem_size = 0;
3386 total_size = 0; 3399 total_size = 0;
@@ -4476,6 +4489,26 @@ void __init set_pageblock_order(void)
4476 4489
4477#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ 4490#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
4478 4491
4492static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
4493 unsigned long present_pages)
4494{
4495 unsigned long pages = spanned_pages;
4496
4497 /*
4498 * Provide a more accurate estimation if there are holes within
4499 * the zone and SPARSEMEM is in use. If there are holes within the
4500 * zone, each populated memory region may cost us one or two extra
4501 * memmap pages due to alignment because memmap pages for each
4502 * populated regions may not naturally algined on page boundary.
4503 * So the (present_pages >> 4) heuristic is a tradeoff for that.
4504 */
4505 if (spanned_pages > present_pages + (present_pages >> 4) &&
4506 IS_ENABLED(CONFIG_SPARSEMEM))
4507 pages = present_pages;
4508
4509 return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
4510}
4511
4479/* 4512/*
4480 * Set up the zone data structures: 4513 * Set up the zone data structures:
4481 * - mark all pages reserved 4514 * - mark all pages reserved
@@ -4499,48 +4532,56 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4499 4532
4500 for (j = 0; j < MAX_NR_ZONES; j++) { 4533 for (j = 0; j < MAX_NR_ZONES; j++) {
4501 struct zone *zone = pgdat->node_zones + j; 4534 struct zone *zone = pgdat->node_zones + j;
4502 unsigned long size, realsize, memmap_pages; 4535 unsigned long size, realsize, freesize, memmap_pages;
4503 4536
4504 size = zone_spanned_pages_in_node(nid, j, zones_size); 4537 size = zone_spanned_pages_in_node(nid, j, zones_size);
4505 realsize = size - zone_absent_pages_in_node(nid, j, 4538 realsize = freesize = size - zone_absent_pages_in_node(nid, j,
4506 zholes_size); 4539 zholes_size);
4507 4540
4508 /* 4541 /*
4509 * Adjust realsize so that it accounts for how much memory 4542 * Adjust freesize so that it accounts for how much memory
4510 * is used by this zone for memmap. This affects the watermark 4543 * is used by this zone for memmap. This affects the watermark
4511 * and per-cpu initialisations 4544 * and per-cpu initialisations
4512 */ 4545 */
4513 memmap_pages = 4546 memmap_pages = calc_memmap_size(size, realsize);
4514 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; 4547 if (freesize >= memmap_pages) {
4515 if (realsize >= memmap_pages) { 4548 freesize -= memmap_pages;
4516 realsize -= memmap_pages;
4517 if (memmap_pages) 4549 if (memmap_pages)
4518 printk(KERN_DEBUG 4550 printk(KERN_DEBUG
4519 " %s zone: %lu pages used for memmap\n", 4551 " %s zone: %lu pages used for memmap\n",
4520 zone_names[j], memmap_pages); 4552 zone_names[j], memmap_pages);
4521 } else 4553 } else
4522 printk(KERN_WARNING 4554 printk(KERN_WARNING
4523 " %s zone: %lu pages exceeds realsize %lu\n", 4555 " %s zone: %lu pages exceeds freesize %lu\n",
4524 zone_names[j], memmap_pages, realsize); 4556 zone_names[j], memmap_pages, freesize);
4525 4557
4526 /* Account for reserved pages */ 4558 /* Account for reserved pages */
4527 if (j == 0 && realsize > dma_reserve) { 4559 if (j == 0 && freesize > dma_reserve) {
4528 realsize -= dma_reserve; 4560 freesize -= dma_reserve;
4529 printk(KERN_DEBUG " %s zone: %lu pages reserved\n", 4561 printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
4530 zone_names[0], dma_reserve); 4562 zone_names[0], dma_reserve);
4531 } 4563 }
4532 4564
4533 if (!is_highmem_idx(j)) 4565 if (!is_highmem_idx(j))
4534 nr_kernel_pages += realsize; 4566 nr_kernel_pages += freesize;
4535 nr_all_pages += realsize; 4567 /* Charge for highmem memmap if there are enough kernel pages */
4568 else if (nr_kernel_pages > memmap_pages * 2)
4569 nr_kernel_pages -= memmap_pages;
4570 nr_all_pages += freesize;
4536 4571
4537 zone->spanned_pages = size; 4572 zone->spanned_pages = size;
4538 zone->present_pages = realsize; 4573 zone->present_pages = freesize;
4574 /*
4575 * Set an approximate value for lowmem here, it will be adjusted
4576 * when the bootmem allocator frees pages into the buddy system.
4577 * And all highmem pages will be managed by the buddy system.
4578 */
4579 zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
4539#ifdef CONFIG_NUMA 4580#ifdef CONFIG_NUMA
4540 zone->node = nid; 4581 zone->node = nid;
4541 zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) 4582 zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)
4542 / 100; 4583 / 100;
4543 zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; 4584 zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
4544#endif 4585#endif
4545 zone->name = zone_names[j]; 4586 zone->name = zone_names[j];
4546 spin_lock_init(&zone->lock); 4587 spin_lock_init(&zone->lock);
@@ -4731,7 +4772,7 @@ unsigned long __init find_min_pfn_with_active_regions(void)
4731/* 4772/*
4732 * early_calculate_totalpages() 4773 * early_calculate_totalpages()
4733 * Sum pages in active regions for movable zone. 4774 * Sum pages in active regions for movable zone.
4734 * Populate N_HIGH_MEMORY for calculating usable_nodes. 4775 * Populate N_MEMORY for calculating usable_nodes.
4735 */ 4776 */
4736static unsigned long __init early_calculate_totalpages(void) 4777static unsigned long __init early_calculate_totalpages(void)
4737{ 4778{
@@ -4744,7 +4785,7 @@ static unsigned long __init early_calculate_totalpages(void)
4744 4785
4745 totalpages += pages; 4786 totalpages += pages;
4746 if (pages) 4787 if (pages)
4747 node_set_state(nid, N_HIGH_MEMORY); 4788 node_set_state(nid, N_MEMORY);
4748 } 4789 }
4749 return totalpages; 4790 return totalpages;
4750} 4791}
@@ -4761,9 +4802,9 @@ static void __init find_zone_movable_pfns_for_nodes(void)
4761 unsigned long usable_startpfn; 4802 unsigned long usable_startpfn;
4762 unsigned long kernelcore_node, kernelcore_remaining; 4803 unsigned long kernelcore_node, kernelcore_remaining;
4763 /* save the state before borrow the nodemask */ 4804 /* save the state before borrow the nodemask */
4764 nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; 4805 nodemask_t saved_node_state = node_states[N_MEMORY];
4765 unsigned long totalpages = early_calculate_totalpages(); 4806 unsigned long totalpages = early_calculate_totalpages();
4766 int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); 4807 int usable_nodes = nodes_weight(node_states[N_MEMORY]);
4767 4808
4768 /* 4809 /*
4769 * If movablecore was specified, calculate what size of 4810 * If movablecore was specified, calculate what size of
@@ -4798,7 +4839,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
4798restart: 4839restart:
4799 /* Spread kernelcore memory as evenly as possible throughout nodes */ 4840 /* Spread kernelcore memory as evenly as possible throughout nodes */
4800 kernelcore_node = required_kernelcore / usable_nodes; 4841 kernelcore_node = required_kernelcore / usable_nodes;
4801 for_each_node_state(nid, N_HIGH_MEMORY) { 4842 for_each_node_state(nid, N_MEMORY) {
4802 unsigned long start_pfn, end_pfn; 4843 unsigned long start_pfn, end_pfn;
4803 4844
4804 /* 4845 /*
@@ -4890,23 +4931,27 @@ restart:
4890 4931
4891out: 4932out:
4892 /* restore the node_state */ 4933 /* restore the node_state */
4893 node_states[N_HIGH_MEMORY] = saved_node_state; 4934 node_states[N_MEMORY] = saved_node_state;
4894} 4935}
4895 4936
4896/* Any regular memory on that node ? */ 4937/* Any regular or high memory on that node ? */
4897static void __init check_for_regular_memory(pg_data_t *pgdat) 4938static void check_for_memory(pg_data_t *pgdat, int nid)
4898{ 4939{
4899#ifdef CONFIG_HIGHMEM
4900 enum zone_type zone_type; 4940 enum zone_type zone_type;
4901 4941
4902 for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { 4942 if (N_MEMORY == N_NORMAL_MEMORY)
4943 return;
4944
4945 for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {
4903 struct zone *zone = &pgdat->node_zones[zone_type]; 4946 struct zone *zone = &pgdat->node_zones[zone_type];
4904 if (zone->present_pages) { 4947 if (zone->present_pages) {
4905 node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); 4948 node_set_state(nid, N_HIGH_MEMORY);
4949 if (N_NORMAL_MEMORY != N_HIGH_MEMORY &&
4950 zone_type <= ZONE_NORMAL)
4951 node_set_state(nid, N_NORMAL_MEMORY);
4906 break; 4952 break;
4907 } 4953 }
4908 } 4954 }
4909#endif
4910} 4955}
4911 4956
4912/** 4957/**
@@ -4989,8 +5034,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4989 5034
4990 /* Any memory on that node */ 5035 /* Any memory on that node */
4991 if (pgdat->node_present_pages) 5036 if (pgdat->node_present_pages)
4992 node_set_state(nid, N_HIGH_MEMORY); 5037 node_set_state(nid, N_MEMORY);
4993 check_for_regular_memory(pgdat); 5038 check_for_memory(pgdat, nid);
4994 } 5039 }
4995} 5040}
4996 5041
@@ -5727,7 +5772,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
5727 unsigned int tries = 0; 5772 unsigned int tries = 0;
5728 int ret = 0; 5773 int ret = 0;
5729 5774
5730 migrate_prep_local(); 5775 migrate_prep();
5731 5776
5732 while (pfn < end || !list_empty(&cc->migratepages)) { 5777 while (pfn < end || !list_empty(&cc->migratepages)) {
5733 if (fatal_signal_pending(current)) { 5778 if (fatal_signal_pending(current)) {