diff options
author | Oscar Salvador <osalvador@suse.de> | 2018-08-22 00:53:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-22 13:52:45 -0400 |
commit | 03e85f9d5f1f8c74f127c5f7a87575d74a78d248 (patch) | |
tree | 385ea9d3415d2a2633ea278904050ef8eb7d4fec /mm/page_alloc.c | |
parent | 0188dc98ad5c7c361d46175623471d4be0fb8610 (diff) |
mm/page_alloc: Introduce free_area_init_core_hotplug
Currently, whenever a new node is created/re-used from the memhotplug
path, we call free_area_init_node()->free_area_init_core(). But there is
some code that we do not really need to run when we are coming from such
path.
free_area_init_core() performs the following actions:
1) Initializes pgdat internals, such as spinlock, waitqueues and more.
2) Account # nr_all_pages and # nr_kernel_pages. These values are used later on
when creating hash tables.
3) Account number of managed_pages per zone, substracting dma_reserved and
memmap pages.
4) Initializes some fields of the zone structure data
5) Calls init_currently_empty_zone to initialize all the freelists
6) Calls memmap_init to initialize all pages belonging to certain zone
When called from memhotplug path, free_area_init_core() only performs
actions #1 and #4.
Action #2 is pointless as the zones do not have any pages since either the
node was freed, or we are re-using it, eitherway all zones belonging to
this node should have 0 pages. For the same reason, action #3 results
always in manages_pages being 0.
Action #5 and #6 are performed later on when onlining the pages:
online_pages()->move_pfn_range_to_zone()->init_currently_empty_zone()
online_pages()->move_pfn_range_to_zone()->memmap_init_zone()
This patch does two things:
First, moves the node/zone initializtion to their own function, so it
allows us to create a small version of free_area_init_core, where we only
perform:
1) Initialization of pgdat internals, such as spinlock, waitqueues and more
4) Initialization of some fields of the zone structure data
These two functions are: pgdat_init_internals() and zone_init_internals().
The second thing this patch does, is to introduce
free_area_init_core_hotplug(), the memhotplug version of
free_area_init_core():
Currently, we call free_area_init_node() from the memhotplug path. In
there, we set some pgdat's fields, and call calculate_node_totalpages().
calculate_node_totalpages() calculates the # of pages the node has.
Since the node is either new, or we are re-using it, the zones belonging
to this node should not have any pages, so there is no point to calculate
this now.
Actually, we re-set these values to 0 later on with the calls to:
reset_node_managed_pages()
reset_node_present_pages()
The # of pages per node and the # of pages per zone will be calculated when
onlining the pages:
online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_zone_range()
online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_pgdat_range()
Also, since free_area_init_core/free_area_init_node will now only get called during early init, let us replace
__paginginit with __init, so their code gets freed up.
[osalvador@techadventures.net: fix section usage]
Link: http://lkml.kernel.org/r/20180731101752.GA473@techadventures.net
[osalvador@suse.de: v6]
Link: http://lkml.kernel.org/r/20180801122348.21588-6-osalvador@techadventures.net
Link: http://lkml.kernel.org/r/20180730101757.28058-5-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 78 |
1 files changed, 54 insertions, 24 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5b939bd1bff9..c677c1506d73 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, | |||
6140 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE | 6140 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE |
6141 | 6141 | ||
6142 | /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ | 6142 | /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ |
6143 | void __meminit set_pageblock_order(void) | 6143 | void __init set_pageblock_order(void) |
6144 | { | 6144 | { |
6145 | unsigned int order; | 6145 | unsigned int order; |
6146 | 6146 | ||
@@ -6168,13 +6168,13 @@ void __meminit set_pageblock_order(void) | |||
6168 | * include/linux/pageblock-flags.h for the values of pageblock_order based on | 6168 | * include/linux/pageblock-flags.h for the values of pageblock_order based on |
6169 | * the kernel config | 6169 | * the kernel config |
6170 | */ | 6170 | */ |
6171 | void __meminit set_pageblock_order(void) | 6171 | void __init set_pageblock_order(void) |
6172 | { | 6172 | { |
6173 | } | 6173 | } |
6174 | 6174 | ||
6175 | #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ | 6175 | #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ |
6176 | 6176 | ||
6177 | static unsigned long __meminit calc_memmap_size(unsigned long spanned_pages, | 6177 | static unsigned long __init calc_memmap_size(unsigned long spanned_pages, |
6178 | unsigned long present_pages) | 6178 | unsigned long present_pages) |
6179 | { | 6179 | { |
6180 | unsigned long pages = spanned_pages; | 6180 | unsigned long pages = spanned_pages; |
@@ -6225,19 +6225,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat) | |||
6225 | static void pgdat_init_kcompactd(struct pglist_data *pgdat) {} | 6225 | static void pgdat_init_kcompactd(struct pglist_data *pgdat) {} |
6226 | #endif | 6226 | #endif |
6227 | 6227 | ||
6228 | /* | 6228 | static void __meminit pgdat_init_internals(struct pglist_data *pgdat) |
6229 | * Set up the zone data structures: | ||
6230 | * - mark all pages reserved | ||
6231 | * - mark all memory queues empty | ||
6232 | * - clear the memory bitmaps | ||
6233 | * | ||
6234 | * NOTE: pgdat should get zeroed by caller. | ||
6235 | */ | ||
6236 | static void __meminit free_area_init_core(struct pglist_data *pgdat) | ||
6237 | { | 6229 | { |
6238 | enum zone_type j; | ||
6239 | int nid = pgdat->node_id; | ||
6240 | |||
6241 | pgdat_resize_init(pgdat); | 6230 | pgdat_resize_init(pgdat); |
6242 | 6231 | ||
6243 | pgdat_init_numabalancing(pgdat); | 6232 | pgdat_init_numabalancing(pgdat); |
@@ -6250,7 +6239,54 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat) | |||
6250 | pgdat_page_ext_init(pgdat); | 6239 | pgdat_page_ext_init(pgdat); |
6251 | spin_lock_init(&pgdat->lru_lock); | 6240 | spin_lock_init(&pgdat->lru_lock); |
6252 | lruvec_init(node_lruvec(pgdat)); | 6241 | lruvec_init(node_lruvec(pgdat)); |
6242 | } | ||
6243 | |||
6244 | static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, | ||
6245 | unsigned long remaining_pages) | ||
6246 | { | ||
6247 | zone->managed_pages = remaining_pages; | ||
6248 | zone_set_nid(zone, nid); | ||
6249 | zone->name = zone_names[idx]; | ||
6250 | zone->zone_pgdat = NODE_DATA(nid); | ||
6251 | spin_lock_init(&zone->lock); | ||
6252 | zone_seqlock_init(zone); | ||
6253 | zone_pcp_init(zone); | ||
6254 | } | ||
6255 | |||
6256 | /* | ||
6257 | * Set up the zone data structures | ||
6258 | * - init pgdat internals | ||
6259 | * - init all zones belonging to this node | ||
6260 | * | ||
6261 | * NOTE: this function is only called during memory hotplug | ||
6262 | */ | ||
6263 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
6264 | void __ref free_area_init_core_hotplug(int nid) | ||
6265 | { | ||
6266 | enum zone_type z; | ||
6267 | pg_data_t *pgdat = NODE_DATA(nid); | ||
6268 | |||
6269 | pgdat_init_internals(pgdat); | ||
6270 | for (z = 0; z < MAX_NR_ZONES; z++) | ||
6271 | zone_init_internals(&pgdat->node_zones[z], z, nid, 0); | ||
6272 | } | ||
6273 | #endif | ||
6274 | |||
6275 | /* | ||
6276 | * Set up the zone data structures: | ||
6277 | * - mark all pages reserved | ||
6278 | * - mark all memory queues empty | ||
6279 | * - clear the memory bitmaps | ||
6280 | * | ||
6281 | * NOTE: pgdat should get zeroed by caller. | ||
6282 | * NOTE: this function is only called during early init. | ||
6283 | */ | ||
6284 | static void __init free_area_init_core(struct pglist_data *pgdat) | ||
6285 | { | ||
6286 | enum zone_type j; | ||
6287 | int nid = pgdat->node_id; | ||
6253 | 6288 | ||
6289 | pgdat_init_internals(pgdat); | ||
6254 | pgdat->per_cpu_nodestats = &boot_nodestats; | 6290 | pgdat->per_cpu_nodestats = &boot_nodestats; |
6255 | 6291 | ||
6256 | for (j = 0; j < MAX_NR_ZONES; j++) { | 6292 | for (j = 0; j < MAX_NR_ZONES; j++) { |
@@ -6298,13 +6334,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat) | |||
6298 | * when the bootmem allocator frees pages into the buddy system. | 6334 | * when the bootmem allocator frees pages into the buddy system. |
6299 | * And all highmem pages will be managed by the buddy system. | 6335 | * And all highmem pages will be managed by the buddy system. |
6300 | */ | 6336 | */ |
6301 | zone->managed_pages = freesize; | 6337 | zone_init_internals(zone, j, nid, freesize); |
6302 | zone_set_nid(zone, nid); | ||
6303 | zone->name = zone_names[j]; | ||
6304 | zone->zone_pgdat = pgdat; | ||
6305 | spin_lock_init(&zone->lock); | ||
6306 | zone_seqlock_init(zone); | ||
6307 | zone_pcp_init(zone); | ||
6308 | 6338 | ||
6309 | if (!size) | 6339 | if (!size) |
6310 | continue; | 6340 | continue; |
@@ -6379,7 +6409,7 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat) | |||
6379 | static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} | 6409 | static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} |
6380 | #endif | 6410 | #endif |
6381 | 6411 | ||
6382 | void __meminit free_area_init_node(int nid, unsigned long *zones_size, | 6412 | void __init free_area_init_node(int nid, unsigned long *zones_size, |
6383 | unsigned long node_start_pfn, | 6413 | unsigned long node_start_pfn, |
6384 | unsigned long *zholes_size) | 6414 | unsigned long *zholes_size) |
6385 | { | 6415 | { |
@@ -6418,7 +6448,7 @@ void __meminit free_area_init_node(int nid, unsigned long *zones_size, | |||
6418 | * may be accessed (for example page_to_pfn() on some configuration accesses | 6448 | * may be accessed (for example page_to_pfn() on some configuration accesses |
6419 | * flags). We must explicitly zero those struct pages. | 6449 | * flags). We must explicitly zero those struct pages. |
6420 | */ | 6450 | */ |
6421 | void __meminit zero_resv_unavail(void) | 6451 | void __init zero_resv_unavail(void) |
6422 | { | 6452 | { |
6423 | phys_addr_t start, end; | 6453 | phys_addr_t start, end; |
6424 | unsigned long pfn; | 6454 | unsigned long pfn; |