aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArun KS <arunks@codeaurora.org>2018-12-28 03:34:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 15:11:47 -0500
commit9705bea5f833f4fc21d5bef5fce7348427f76ea4 (patch)
tree37604aa5b680736eb176283f970e1c1d7d27bbf3
parent3d6357de8aa09e1966770dc1171c72679946464f (diff)
mm: convert zone->managed_pages to atomic variable
totalram_pages, zone->managed_pages and totalhigh_pages updates are protected by managed_page_count_lock, but readers never care about it. Convert these variables to atomic to avoid readers potentially seeing a store tear. This patch converts zone->managed_pages. Subsequent patches will convert totalram_panges, totalhigh_pages and eventually managed_page_count_lock will be removed. Main motivation was that managed_page_count_lock handling was complicating things. It was discussed in length here, https://lore.kernel.org/patchwork/patch/995739/#1181785 So it seemes better to remove the lock and convert variables to atomic, with preventing poteintial store-to-read tearing as a bonus. Link: http://lkml.kernel.org/r/1542090790-21750-3-git-send-email-arunks@codeaurora.org Signed-off-by: Arun KS <arunks@codeaurora.org> Suggested-by: Michal Hocko <mhocko@suse.com> Suggested-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Reviewed-by: David Hildenbrand <david@redhat.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Pavel Tatashin <pasha.tatashin@soleen.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c2
-rw-r--r--include/linux/mmzone.h9
-rw-r--r--lib/show_mem.c2
-rw-r--r--mm/memblock.c2
-rw-r--r--mm/page_alloc.c44
-rw-r--r--mm/vmstat.c4
6 files changed, 34 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index c02adbbeef2a..b7bc7d7d048f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -853,7 +853,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
853 */ 853 */
854 pgdat = NODE_DATA(numa_node_id); 854 pgdat = NODE_DATA(numa_node_id);
855 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 855 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
856 mem_in_bytes += pgdat->node_zones[zone_type].managed_pages; 856 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
857 mem_in_bytes <<= PAGE_SHIFT; 857 mem_in_bytes <<= PAGE_SHIFT;
858 858
859 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); 859 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 077d797d1f60..a23e34e21178 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -435,7 +435,7 @@ struct zone {
435 * adjust_managed_page_count() should be used instead of directly 435 * adjust_managed_page_count() should be used instead of directly
436 * touching zone->managed_pages and totalram_pages. 436 * touching zone->managed_pages and totalram_pages.
437 */ 437 */
438 unsigned long managed_pages; 438 atomic_long_t managed_pages;
439 unsigned long spanned_pages; 439 unsigned long spanned_pages;
440 unsigned long present_pages; 440 unsigned long present_pages;
441 441
@@ -524,6 +524,11 @@ enum pgdat_flags {
524 PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ 524 PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
525}; 525};
526 526
527static inline unsigned long zone_managed_pages(struct zone *zone)
528{
529 return (unsigned long)atomic_long_read(&zone->managed_pages);
530}
531
527static inline unsigned long zone_end_pfn(const struct zone *zone) 532static inline unsigned long zone_end_pfn(const struct zone *zone)
528{ 533{
529 return zone->zone_start_pfn + zone->spanned_pages; 534 return zone->zone_start_pfn + zone->spanned_pages;
@@ -820,7 +825,7 @@ static inline bool is_dev_zone(const struct zone *zone)
820 */ 825 */
821static inline bool managed_zone(struct zone *zone) 826static inline bool managed_zone(struct zone *zone)
822{ 827{
823 return zone->managed_pages; 828 return zone_managed_pages(zone);
824} 829}
825 830
826/* Returns true if a zone has memory */ 831/* Returns true if a zone has memory */
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 0beaa1d899aa..eefe67d50e84 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -28,7 +28,7 @@ void show_mem(unsigned int filter, nodemask_t *nodemask)
28 continue; 28 continue;
29 29
30 total += zone->present_pages; 30 total += zone->present_pages;
31 reserved += zone->present_pages - zone->managed_pages; 31 reserved += zone->present_pages - zone_managed_pages(zone);
32 32
33 if (is_highmem_idx(zoneid)) 33 if (is_highmem_idx(zoneid))
34 highmem += zone->present_pages; 34 highmem += zone->present_pages;
diff --git a/mm/memblock.c b/mm/memblock.c
index 81ae63ca78d0..0068f87af1e8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1950,7 +1950,7 @@ void reset_node_managed_pages(pg_data_t *pgdat)
1950 struct zone *z; 1950 struct zone *z;
1951 1951
1952 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 1952 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
1953 z->managed_pages = 0; 1953 atomic_long_set(&z->managed_pages, 0);
1954} 1954}
1955 1955
1956void __init reset_all_zones_managed_pages(void) 1956void __init reset_all_zones_managed_pages(void)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b79e79caea99..4b5c4ff68f18 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1280,7 +1280,7 @@ static void __init __free_pages_boot_core(struct page *page, unsigned int order)
1280 __ClearPageReserved(p); 1280 __ClearPageReserved(p);
1281 set_page_count(p, 0); 1281 set_page_count(p, 0);
1282 1282
1283 page_zone(page)->managed_pages += nr_pages; 1283 atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
1284 set_page_refcounted(page); 1284 set_page_refcounted(page);
1285 __free_pages(page, order); 1285 __free_pages(page, order);
1286} 1286}
@@ -2259,7 +2259,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
2259 * Limit the number reserved to 1 pageblock or roughly 1% of a zone. 2259 * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
2260 * Check is race-prone but harmless. 2260 * Check is race-prone but harmless.
2261 */ 2261 */
2262 max_managed = (zone->managed_pages / 100) + pageblock_nr_pages; 2262 max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages;
2263 if (zone->nr_reserved_highatomic >= max_managed) 2263 if (zone->nr_reserved_highatomic >= max_managed)
2264 return; 2264 return;
2265 2265
@@ -4661,7 +4661,7 @@ static unsigned long nr_free_zone_pages(int offset)
4661 struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); 4661 struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
4662 4662
4663 for_each_zone_zonelist(zone, z, zonelist, offset) { 4663 for_each_zone_zonelist(zone, z, zonelist, offset) {
4664 unsigned long size = zone->managed_pages; 4664 unsigned long size = zone_managed_pages(zone);
4665 unsigned long high = high_wmark_pages(zone); 4665 unsigned long high = high_wmark_pages(zone);
4666 if (size > high) 4666 if (size > high)
4667 sum += size - high; 4667 sum += size - high;
@@ -4768,7 +4768,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
4768 pg_data_t *pgdat = NODE_DATA(nid); 4768 pg_data_t *pgdat = NODE_DATA(nid);
4769 4769
4770 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 4770 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
4771 managed_pages += pgdat->node_zones[zone_type].managed_pages; 4771 managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
4772 val->totalram = managed_pages; 4772 val->totalram = managed_pages;
4773 val->sharedram = node_page_state(pgdat, NR_SHMEM); 4773 val->sharedram = node_page_state(pgdat, NR_SHMEM);
4774 val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); 4774 val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
@@ -4777,7 +4777,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
4777 struct zone *zone = &pgdat->node_zones[zone_type]; 4777 struct zone *zone = &pgdat->node_zones[zone_type];
4778 4778
4779 if (is_highmem(zone)) { 4779 if (is_highmem(zone)) {
4780 managed_highpages += zone->managed_pages; 4780 managed_highpages += zone_managed_pages(zone);
4781 free_highpages += zone_page_state(zone, NR_FREE_PAGES); 4781 free_highpages += zone_page_state(zone, NR_FREE_PAGES);
4782 } 4782 }
4783 } 4783 }
@@ -4984,7 +4984,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
4984 K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), 4984 K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
4985 K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), 4985 K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
4986 K(zone->present_pages), 4986 K(zone->present_pages),
4987 K(zone->managed_pages), 4987 K(zone_managed_pages(zone)),
4988 K(zone_page_state(zone, NR_MLOCK)), 4988 K(zone_page_state(zone, NR_MLOCK)),
4989 zone_page_state(zone, NR_KERNEL_STACK_KB), 4989 zone_page_state(zone, NR_KERNEL_STACK_KB),
4990 K(zone_page_state(zone, NR_PAGETABLE)), 4990 K(zone_page_state(zone, NR_PAGETABLE)),
@@ -5656,7 +5656,7 @@ static int zone_batchsize(struct zone *zone)
5656 * The per-cpu-pages pools are set to around 1000th of the 5656 * The per-cpu-pages pools are set to around 1000th of the
5657 * size of the zone. 5657 * size of the zone.
5658 */ 5658 */
5659 batch = zone->managed_pages / 1024; 5659 batch = zone_managed_pages(zone) / 1024;
5660 /* But no more than a meg. */ 5660 /* But no more than a meg. */
5661 if (batch * PAGE_SIZE > 1024 * 1024) 5661 if (batch * PAGE_SIZE > 1024 * 1024)
5662 batch = (1024 * 1024) / PAGE_SIZE; 5662 batch = (1024 * 1024) / PAGE_SIZE;
@@ -5766,7 +5766,7 @@ static void pageset_set_high_and_batch(struct zone *zone,
5766{ 5766{
5767 if (percpu_pagelist_fraction) 5767 if (percpu_pagelist_fraction)
5768 pageset_set_high(pcp, 5768 pageset_set_high(pcp,
5769 (zone->managed_pages / 5769 (zone_managed_pages(zone) /
5770 percpu_pagelist_fraction)); 5770 percpu_pagelist_fraction));
5771 else 5771 else
5772 pageset_set_batch(pcp, zone_batchsize(zone)); 5772 pageset_set_batch(pcp, zone_batchsize(zone));
@@ -6323,7 +6323,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
6323static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, 6323static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
6324 unsigned long remaining_pages) 6324 unsigned long remaining_pages)
6325{ 6325{
6326 zone->managed_pages = remaining_pages; 6326 atomic_long_set(&zone->managed_pages, remaining_pages);
6327 zone_set_nid(zone, nid); 6327 zone_set_nid(zone, nid);
6328 zone->name = zone_names[idx]; 6328 zone->name = zone_names[idx];
6329 zone->zone_pgdat = NODE_DATA(nid); 6329 zone->zone_pgdat = NODE_DATA(nid);
@@ -7076,7 +7076,7 @@ early_param("movablecore", cmdline_parse_movablecore);
7076void adjust_managed_page_count(struct page *page, long count) 7076void adjust_managed_page_count(struct page *page, long count)
7077{ 7077{
7078 spin_lock(&managed_page_count_lock); 7078 spin_lock(&managed_page_count_lock);
7079 page_zone(page)->managed_pages += count; 7079 atomic_long_add(count, &page_zone(page)->managed_pages);
7080 totalram_pages += count; 7080 totalram_pages += count;
7081#ifdef CONFIG_HIGHMEM 7081#ifdef CONFIG_HIGHMEM
7082 if (PageHighMem(page)) 7082 if (PageHighMem(page))
@@ -7124,7 +7124,7 @@ void free_highmem_page(struct page *page)
7124{ 7124{
7125 __free_reserved_page(page); 7125 __free_reserved_page(page);
7126 totalram_pages++; 7126 totalram_pages++;
7127 page_zone(page)->managed_pages++; 7127 atomic_long_inc(&page_zone(page)->managed_pages);
7128 totalhigh_pages++; 7128 totalhigh_pages++;
7129} 7129}
7130#endif 7130#endif
@@ -7257,7 +7257,7 @@ static void calculate_totalreserve_pages(void)
7257 for (i = 0; i < MAX_NR_ZONES; i++) { 7257 for (i = 0; i < MAX_NR_ZONES; i++) {
7258 struct zone *zone = pgdat->node_zones + i; 7258 struct zone *zone = pgdat->node_zones + i;
7259 long max = 0; 7259 long max = 0;
7260 unsigned long managed_pages = zone->managed_pages; 7260 unsigned long managed_pages = zone_managed_pages(zone);
7261 7261
7262 /* Find valid and maximum lowmem_reserve in the zone */ 7262 /* Find valid and maximum lowmem_reserve in the zone */
7263 for (j = i; j < MAX_NR_ZONES; j++) { 7263 for (j = i; j < MAX_NR_ZONES; j++) {
@@ -7293,7 +7293,7 @@ static void setup_per_zone_lowmem_reserve(void)
7293 for_each_online_pgdat(pgdat) { 7293 for_each_online_pgdat(pgdat) {
7294 for (j = 0; j < MAX_NR_ZONES; j++) { 7294 for (j = 0; j < MAX_NR_ZONES; j++) {
7295 struct zone *zone = pgdat->node_zones + j; 7295 struct zone *zone = pgdat->node_zones + j;
7296 unsigned long managed_pages = zone->managed_pages; 7296 unsigned long managed_pages = zone_managed_pages(zone);
7297 7297
7298 zone->lowmem_reserve[j] = 0; 7298 zone->lowmem_reserve[j] = 0;
7299 7299
@@ -7311,7 +7311,7 @@ static void setup_per_zone_lowmem_reserve(void)
7311 lower_zone->lowmem_reserve[j] = 7311 lower_zone->lowmem_reserve[j] =
7312 managed_pages / sysctl_lowmem_reserve_ratio[idx]; 7312 managed_pages / sysctl_lowmem_reserve_ratio[idx];
7313 } 7313 }
7314 managed_pages += lower_zone->managed_pages; 7314 managed_pages += zone_managed_pages(lower_zone);
7315 } 7315 }
7316 } 7316 }
7317 } 7317 }
@@ -7330,14 +7330,14 @@ static void __setup_per_zone_wmarks(void)
7330 /* Calculate total number of !ZONE_HIGHMEM pages */ 7330 /* Calculate total number of !ZONE_HIGHMEM pages */
7331 for_each_zone(zone) { 7331 for_each_zone(zone) {
7332 if (!is_highmem(zone)) 7332 if (!is_highmem(zone))
7333 lowmem_pages += zone->managed_pages; 7333 lowmem_pages += zone_managed_pages(zone);
7334 } 7334 }
7335 7335
7336 for_each_zone(zone) { 7336 for_each_zone(zone) {
7337 u64 tmp; 7337 u64 tmp;
7338 7338
7339 spin_lock_irqsave(&zone->lock, flags); 7339 spin_lock_irqsave(&zone->lock, flags);
7340 tmp = (u64)pages_min * zone->managed_pages; 7340 tmp = (u64)pages_min * zone_managed_pages(zone);
7341 do_div(tmp, lowmem_pages); 7341 do_div(tmp, lowmem_pages);
7342 if (is_highmem(zone)) { 7342 if (is_highmem(zone)) {
7343 /* 7343 /*
@@ -7351,7 +7351,7 @@ static void __setup_per_zone_wmarks(void)
7351 */ 7351 */
7352 unsigned long min_pages; 7352 unsigned long min_pages;
7353 7353
7354 min_pages = zone->managed_pages / 1024; 7354 min_pages = zone_managed_pages(zone) / 1024;
7355 min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL); 7355 min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
7356 zone->watermark[WMARK_MIN] = min_pages; 7356 zone->watermark[WMARK_MIN] = min_pages;
7357 } else { 7357 } else {
@@ -7368,7 +7368,7 @@ static void __setup_per_zone_wmarks(void)
7368 * ensure a minimum size on small systems. 7368 * ensure a minimum size on small systems.
7369 */ 7369 */
7370 tmp = max_t(u64, tmp >> 2, 7370 tmp = max_t(u64, tmp >> 2,
7371 mult_frac(zone->managed_pages, 7371 mult_frac(zone_managed_pages(zone),
7372 watermark_scale_factor, 10000)); 7372 watermark_scale_factor, 10000));
7373 7373
7374 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; 7374 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
@@ -7498,8 +7498,8 @@ static void setup_min_unmapped_ratio(void)
7498 pgdat->min_unmapped_pages = 0; 7498 pgdat->min_unmapped_pages = 0;
7499 7499
7500 for_each_zone(zone) 7500 for_each_zone(zone)
7501 zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages * 7501 zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) *
7502 sysctl_min_unmapped_ratio) / 100; 7502 sysctl_min_unmapped_ratio) / 100;
7503} 7503}
7504 7504
7505 7505
@@ -7526,8 +7526,8 @@ static void setup_min_slab_ratio(void)
7526 pgdat->min_slab_pages = 0; 7526 pgdat->min_slab_pages = 0;
7527 7527
7528 for_each_zone(zone) 7528 for_each_zone(zone)
7529 zone->zone_pgdat->min_slab_pages += (zone->managed_pages * 7529 zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) *
7530 sysctl_min_slab_ratio) / 100; 7530 sysctl_min_slab_ratio) / 100;
7531} 7531}
7532 7532
7533int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, 7533int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 9c624595e904..83b30edc2f7f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -227,7 +227,7 @@ int calculate_normal_threshold(struct zone *zone)
227 * 125 1024 10 16-32 GB 9 227 * 125 1024 10 16-32 GB 9
228 */ 228 */
229 229
230 mem = zone->managed_pages >> (27 - PAGE_SHIFT); 230 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
231 231
232 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 232 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
233 233
@@ -1569,7 +1569,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1569 high_wmark_pages(zone), 1569 high_wmark_pages(zone),
1570 zone->spanned_pages, 1570 zone->spanned_pages,
1571 zone->present_pages, 1571 zone->present_pages,
1572 zone->managed_pages); 1572 zone_managed_pages(zone));
1573 1573
1574 seq_printf(m, 1574 seq_printf(m,
1575 "\n protection: (%ld", 1575 "\n protection: (%ld",