aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorOscar Salvador <osalvador@suse.de>2018-08-22 00:53:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-22 13:52:45 -0400
commit03e85f9d5f1f8c74f127c5f7a87575d74a78d248 (patch)
tree385ea9d3415d2a2633ea278904050ef8eb7d4fec /mm/page_alloc.c
parent0188dc98ad5c7c361d46175623471d4be0fb8610 (diff)
mm/page_alloc: Introduce free_area_init_core_hotplug
Currently, whenever a new node is created/re-used from the memhotplug path, we call free_area_init_node()->free_area_init_core(). But there is some code that we do not really need to run when we are coming from such path. free_area_init_core() performs the following actions: 1) Initializes pgdat internals, such as spinlock, waitqueues and more. 2) Account # nr_all_pages and # nr_kernel_pages. These values are used later on when creating hash tables. 3) Account number of managed_pages per zone, substracting dma_reserved and memmap pages. 4) Initializes some fields of the zone structure data 5) Calls init_currently_empty_zone to initialize all the freelists 6) Calls memmap_init to initialize all pages belonging to certain zone When called from memhotplug path, free_area_init_core() only performs actions #1 and #4. Action #2 is pointless as the zones do not have any pages since either the node was freed, or we are re-using it, eitherway all zones belonging to this node should have 0 pages. For the same reason, action #3 results always in manages_pages being 0. Action #5 and #6 are performed later on when onlining the pages: online_pages()->move_pfn_range_to_zone()->init_currently_empty_zone() online_pages()->move_pfn_range_to_zone()->memmap_init_zone() This patch does two things: First, moves the node/zone initializtion to their own function, so it allows us to create a small version of free_area_init_core, where we only perform: 1) Initialization of pgdat internals, such as spinlock, waitqueues and more 4) Initialization of some fields of the zone structure data These two functions are: pgdat_init_internals() and zone_init_internals(). The second thing this patch does, is to introduce free_area_init_core_hotplug(), the memhotplug version of free_area_init_core(): Currently, we call free_area_init_node() from the memhotplug path. In there, we set some pgdat's fields, and call calculate_node_totalpages(). calculate_node_totalpages() calculates the # of pages the node has. Since the node is either new, or we are re-using it, the zones belonging to this node should not have any pages, so there is no point to calculate this now. Actually, we re-set these values to 0 later on with the calls to: reset_node_managed_pages() reset_node_present_pages() The # of pages per node and the # of pages per zone will be calculated when onlining the pages: online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_zone_range() online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_pgdat_range() Also, since free_area_init_core/free_area_init_node will now only get called during early init, let us replace __paginginit with __init, so their code gets freed up. [osalvador@techadventures.net: fix section usage] Link: http://lkml.kernel.org/r/20180731101752.GA473@techadventures.net [osalvador@suse.de: v6] Link: http://lkml.kernel.org/r/20180801122348.21588-6-osalvador@techadventures.net Link: http://lkml.kernel.org/r/20180730101757.28058-5-osalvador@techadventures.net Signed-off-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c78
1 files changed, 54 insertions, 24 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b939bd1bff9..c677c1506d73 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
6140#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 6140#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
6141 6141
6142/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ 6142/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
6143void __meminit set_pageblock_order(void) 6143void __init set_pageblock_order(void)
6144{ 6144{
6145 unsigned int order; 6145 unsigned int order;
6146 6146
@@ -6168,13 +6168,13 @@ void __meminit set_pageblock_order(void)
6168 * include/linux/pageblock-flags.h for the values of pageblock_order based on 6168 * include/linux/pageblock-flags.h for the values of pageblock_order based on
6169 * the kernel config 6169 * the kernel config
6170 */ 6170 */
6171void __meminit set_pageblock_order(void) 6171void __init set_pageblock_order(void)
6172{ 6172{
6173} 6173}
6174 6174
6175#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ 6175#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
6176 6176
6177static unsigned long __meminit calc_memmap_size(unsigned long spanned_pages, 6177static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
6178 unsigned long present_pages) 6178 unsigned long present_pages)
6179{ 6179{
6180 unsigned long pages = spanned_pages; 6180 unsigned long pages = spanned_pages;
@@ -6225,19 +6225,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat)
6225static void pgdat_init_kcompactd(struct pglist_data *pgdat) {} 6225static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
6226#endif 6226#endif
6227 6227
6228/* 6228static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
6229 * Set up the zone data structures:
6230 * - mark all pages reserved
6231 * - mark all memory queues empty
6232 * - clear the memory bitmaps
6233 *
6234 * NOTE: pgdat should get zeroed by caller.
6235 */
6236static void __meminit free_area_init_core(struct pglist_data *pgdat)
6237{ 6229{
6238 enum zone_type j;
6239 int nid = pgdat->node_id;
6240
6241 pgdat_resize_init(pgdat); 6230 pgdat_resize_init(pgdat);
6242 6231
6243 pgdat_init_numabalancing(pgdat); 6232 pgdat_init_numabalancing(pgdat);
@@ -6250,7 +6239,54 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
6250 pgdat_page_ext_init(pgdat); 6239 pgdat_page_ext_init(pgdat);
6251 spin_lock_init(&pgdat->lru_lock); 6240 spin_lock_init(&pgdat->lru_lock);
6252 lruvec_init(node_lruvec(pgdat)); 6241 lruvec_init(node_lruvec(pgdat));
6242}
6243
6244static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
6245 unsigned long remaining_pages)
6246{
6247 zone->managed_pages = remaining_pages;
6248 zone_set_nid(zone, nid);
6249 zone->name = zone_names[idx];
6250 zone->zone_pgdat = NODE_DATA(nid);
6251 spin_lock_init(&zone->lock);
6252 zone_seqlock_init(zone);
6253 zone_pcp_init(zone);
6254}
6255
6256/*
6257 * Set up the zone data structures
6258 * - init pgdat internals
6259 * - init all zones belonging to this node
6260 *
6261 * NOTE: this function is only called during memory hotplug
6262 */
6263#ifdef CONFIG_MEMORY_HOTPLUG
6264void __ref free_area_init_core_hotplug(int nid)
6265{
6266 enum zone_type z;
6267 pg_data_t *pgdat = NODE_DATA(nid);
6268
6269 pgdat_init_internals(pgdat);
6270 for (z = 0; z < MAX_NR_ZONES; z++)
6271 zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
6272}
6273#endif
6274
6275/*
6276 * Set up the zone data structures:
6277 * - mark all pages reserved
6278 * - mark all memory queues empty
6279 * - clear the memory bitmaps
6280 *
6281 * NOTE: pgdat should get zeroed by caller.
6282 * NOTE: this function is only called during early init.
6283 */
6284static void __init free_area_init_core(struct pglist_data *pgdat)
6285{
6286 enum zone_type j;
6287 int nid = pgdat->node_id;
6253 6288
6289 pgdat_init_internals(pgdat);
6254 pgdat->per_cpu_nodestats = &boot_nodestats; 6290 pgdat->per_cpu_nodestats = &boot_nodestats;
6255 6291
6256 for (j = 0; j < MAX_NR_ZONES; j++) { 6292 for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -6298,13 +6334,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
6298 * when the bootmem allocator frees pages into the buddy system. 6334 * when the bootmem allocator frees pages into the buddy system.
6299 * And all highmem pages will be managed by the buddy system. 6335 * And all highmem pages will be managed by the buddy system.
6300 */ 6336 */
6301 zone->managed_pages = freesize; 6337 zone_init_internals(zone, j, nid, freesize);
6302 zone_set_nid(zone, nid);
6303 zone->name = zone_names[j];
6304 zone->zone_pgdat = pgdat;
6305 spin_lock_init(&zone->lock);
6306 zone_seqlock_init(zone);
6307 zone_pcp_init(zone);
6308 6338
6309 if (!size) 6339 if (!size)
6310 continue; 6340 continue;
@@ -6379,7 +6409,7 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
6379static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} 6409static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
6380#endif 6410#endif
6381 6411
6382void __meminit free_area_init_node(int nid, unsigned long *zones_size, 6412void __init free_area_init_node(int nid, unsigned long *zones_size,
6383 unsigned long node_start_pfn, 6413 unsigned long node_start_pfn,
6384 unsigned long *zholes_size) 6414 unsigned long *zholes_size)
6385{ 6415{
@@ -6418,7 +6448,7 @@ void __meminit free_area_init_node(int nid, unsigned long *zones_size,
6418 * may be accessed (for example page_to_pfn() on some configuration accesses 6448 * may be accessed (for example page_to_pfn() on some configuration accesses
6419 * flags). We must explicitly zero those struct pages. 6449 * flags). We must explicitly zero those struct pages.
6420 */ 6450 */
6421void __meminit zero_resv_unavail(void) 6451void __init zero_resv_unavail(void)
6422{ 6452{
6423 phys_addr_t start, end; 6453 phys_addr_t start, end;
6424 unsigned long pfn; 6454 unsigned long pfn;