summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorTaku Izumi <izumi.taku@jp.fujitsu.com>2016-03-15 17:55:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-15 19:55:16 -0400
commitd91749c1dda71a7030c054a5ab8dc5419bc6730b (patch)
treec0721698321d38d72d10f1e4661bcdb654b9b000 /mm/page_alloc.c
parent02c43638ec46fffd0c54b0d10819e36e0bc622f7 (diff)
mm/page_alloc.c: calculate zone_start_pfn at zone_spanned_pages_in_node()
Xeon E7 v3 based systems supports Address Range Mirroring and UEFI BIOS complied with UEFI spec 2.5 can notify which ranges are mirrored (reliable) via EFI memory map. Now Linux kernel utilize its information and allocates boot time memory from reliable region. My requirement is: - allocate kernel memory from mirrored region - allocate user memory from non-mirrored region In order to meet my requirement, ZONE_MOVABLE is useful. By arranging non-mirrored range into ZONE_MOVABLE, mirrored memory is used for kernel allocations. My idea is to extend existing "kernelcore" option and introduces kernelcore=mirror option. By specifying "mirror" instead of specifying the amount of memory, non-mirrored region will be arranged into ZONE_MOVABLE. Earlier discussions are at: https://lkml.org/lkml/2015/10/9/24 https://lkml.org/lkml/2015/10/15/9 https://lkml.org/lkml/2015/11/27/18 https://lkml.org/lkml/2015/12/8/836 For example, suppose 2-nodes system with the following memory range: node 0 [mem 0x0000000000001000-0x000000109fffffff] node 1 [mem 0x00000010a0000000-0x000000209fffffff] and the following ranges are marked as reliable (mirrored): [0x0000000000000000-0x0000000100000000] [0x0000000100000000-0x0000000180000000] [0x0000000800000000-0x0000000880000000] [0x00000010a0000000-0x0000001120000000] [0x00000017a0000000-0x0000001820000000] If you specify kernelcore=mirror, ZONE_NORMAL and ZONE_MOVABLE are arranged like bellow: - node 0: ZONE_NORMAL : [0x0000000100000000-0x00000010a0000000] ZONE_MOVABLE: [0x0000000180000000-0x00000010a0000000] - node 1: ZONE_NORMAL : [0x00000010a0000000-0x00000020a0000000] ZONE_MOVABLE: [0x0000001120000000-0x00000020a0000000] In overlapped range, pages to be ZONE_MOVABLE in ZONE_NORMAL are treated as absent pages, and vice versa. This patch (of 2): Currently each zone's zone_start_pfn is calculated at free_area_init_core(). However zone's range is fixed at the time when invoking zone_spanned_pages_in_node(). This patch changes how each zone->zone_start_pfn is calculated in zone_spanned_pages_in_node(). Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Steve Capper <steve.capper@linaro.org> Cc: Sudeep Holla <sudeep.holla@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c40
1 files changed, 29 insertions, 11 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 838ca8bb64f7..0d20a19151a4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4953,31 +4953,31 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
4953 unsigned long zone_type, 4953 unsigned long zone_type,
4954 unsigned long node_start_pfn, 4954 unsigned long node_start_pfn,
4955 unsigned long node_end_pfn, 4955 unsigned long node_end_pfn,
4956 unsigned long *zone_start_pfn,
4957 unsigned long *zone_end_pfn,
4956 unsigned long *ignored) 4958 unsigned long *ignored)
4957{ 4959{
4958 unsigned long zone_start_pfn, zone_end_pfn;
4959
4960 /* When hotadd a new node from cpu_up(), the node should be empty */ 4960 /* When hotadd a new node from cpu_up(), the node should be empty */
4961 if (!node_start_pfn && !node_end_pfn) 4961 if (!node_start_pfn && !node_end_pfn)
4962 return 0; 4962 return 0;
4963 4963
4964 /* Get the start and end of the zone */ 4964 /* Get the start and end of the zone */
4965 zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; 4965 *zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
4966 zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; 4966 *zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
4967 adjust_zone_range_for_zone_movable(nid, zone_type, 4967 adjust_zone_range_for_zone_movable(nid, zone_type,
4968 node_start_pfn, node_end_pfn, 4968 node_start_pfn, node_end_pfn,
4969 &zone_start_pfn, &zone_end_pfn); 4969 zone_start_pfn, zone_end_pfn);
4970 4970
4971 /* Check that this node has pages within the zone's required range */ 4971 /* Check that this node has pages within the zone's required range */
4972 if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn) 4972 if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn)
4973 return 0; 4973 return 0;
4974 4974
4975 /* Move the zone boundaries inside the node if necessary */ 4975 /* Move the zone boundaries inside the node if necessary */
4976 zone_end_pfn = min(zone_end_pfn, node_end_pfn); 4976 *zone_end_pfn = min(*zone_end_pfn, node_end_pfn);
4977 zone_start_pfn = max(zone_start_pfn, node_start_pfn); 4977 *zone_start_pfn = max(*zone_start_pfn, node_start_pfn);
4978 4978
4979 /* Return the spanned pages */ 4979 /* Return the spanned pages */
4980 return zone_end_pfn - zone_start_pfn; 4980 return *zone_end_pfn - *zone_start_pfn;
4981} 4981}
4982 4982
4983/* 4983/*
@@ -5042,8 +5042,18 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
5042 unsigned long zone_type, 5042 unsigned long zone_type,
5043 unsigned long node_start_pfn, 5043 unsigned long node_start_pfn,
5044 unsigned long node_end_pfn, 5044 unsigned long node_end_pfn,
5045 unsigned long *zone_start_pfn,
5046 unsigned long *zone_end_pfn,
5045 unsigned long *zones_size) 5047 unsigned long *zones_size)
5046{ 5048{
5049 unsigned int zone;
5050
5051 *zone_start_pfn = node_start_pfn;
5052 for (zone = 0; zone < zone_type; zone++)
5053 *zone_start_pfn += zones_size[zone];
5054
5055 *zone_end_pfn = *zone_start_pfn + zones_size[zone_type];
5056
5047 return zones_size[zone_type]; 5057 return zones_size[zone_type];
5048} 5058}
5049 5059
@@ -5072,15 +5082,22 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
5072 5082
5073 for (i = 0; i < MAX_NR_ZONES; i++) { 5083 for (i = 0; i < MAX_NR_ZONES; i++) {
5074 struct zone *zone = pgdat->node_zones + i; 5084 struct zone *zone = pgdat->node_zones + i;
5085 unsigned long zone_start_pfn, zone_end_pfn;
5075 unsigned long size, real_size; 5086 unsigned long size, real_size;
5076 5087
5077 size = zone_spanned_pages_in_node(pgdat->node_id, i, 5088 size = zone_spanned_pages_in_node(pgdat->node_id, i,
5078 node_start_pfn, 5089 node_start_pfn,
5079 node_end_pfn, 5090 node_end_pfn,
5091 &zone_start_pfn,
5092 &zone_end_pfn,
5080 zones_size); 5093 zones_size);
5081 real_size = size - zone_absent_pages_in_node(pgdat->node_id, i, 5094 real_size = size - zone_absent_pages_in_node(pgdat->node_id, i,
5082 node_start_pfn, node_end_pfn, 5095 node_start_pfn, node_end_pfn,
5083 zholes_size); 5096 zholes_size);
5097 if (size)
5098 zone->zone_start_pfn = zone_start_pfn;
5099 else
5100 zone->zone_start_pfn = 0;
5084 zone->spanned_pages = size; 5101 zone->spanned_pages = size;
5085 zone->present_pages = real_size; 5102 zone->present_pages = real_size;
5086 5103
@@ -5201,7 +5218,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5201{ 5218{
5202 enum zone_type j; 5219 enum zone_type j;
5203 int nid = pgdat->node_id; 5220 int nid = pgdat->node_id;
5204 unsigned long zone_start_pfn = pgdat->node_start_pfn;
5205 int ret; 5221 int ret;
5206 5222
5207 pgdat_resize_init(pgdat); 5223 pgdat_resize_init(pgdat);
@@ -5222,6 +5238,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5222 for (j = 0; j < MAX_NR_ZONES; j++) { 5238 for (j = 0; j < MAX_NR_ZONES; j++) {
5223 struct zone *zone = pgdat->node_zones + j; 5239 struct zone *zone = pgdat->node_zones + j;
5224 unsigned long size, realsize, freesize, memmap_pages; 5240 unsigned long size, realsize, freesize, memmap_pages;
5241 unsigned long zone_start_pfn = zone->zone_start_pfn;
5225 5242
5226 size = zone->spanned_pages; 5243 size = zone->spanned_pages;
5227 realsize = freesize = zone->present_pages; 5244 realsize = freesize = zone->present_pages;
@@ -5290,7 +5307,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5290 ret = init_currently_empty_zone(zone, zone_start_pfn, size); 5307 ret = init_currently_empty_zone(zone, zone_start_pfn, size);
5291 BUG_ON(ret); 5308 BUG_ON(ret);
5292 memmap_init(size, nid, j, zone_start_pfn); 5309 memmap_init(size, nid, j, zone_start_pfn);
5293 zone_start_pfn += size;
5294 } 5310 }
5295} 5311}
5296 5312
@@ -5358,6 +5374,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
5358 pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, 5374 pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
5359 (u64)start_pfn << PAGE_SHIFT, 5375 (u64)start_pfn << PAGE_SHIFT,
5360 end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); 5376 end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
5377#else
5378 start_pfn = node_start_pfn;
5361#endif 5379#endif
5362 calculate_node_totalpages(pgdat, start_pfn, end_pfn, 5380 calculate_node_totalpages(pgdat, start_pfn, end_pfn,
5363 zones_size, zholes_size); 5381 zones_size, zholes_size);