summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2019-07-11 23:59:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-12 14:05:46 -0400
commite03a5125ec7bd1d4e8b062816a8813436876dc7c (patch)
tree6fac3b0f329ee4d87c0fbb176607c81935f5760b /mm/page_alloc.c
parentec11408a1630eed2cb03db55b8b372267f5f1032 (diff)
mm/large system hash: clear hashdist when only one node with memory is booted
CONFIG_NUMA on 64-bit CPUs currently enables hashdist unconditionally even when booting on single node machines. This causes the large system hashes to be allocated with vmalloc, and mapped with small pages. This change clears hashdist if only one node has come up with memory. This results in the important large inode and dentry hashes using memblock allocations. All others are within 4MB size up to about 128GB of RAM, which allows them to be allocated from the linear map on most non-NUMA images. Other big hashes like futex and TCP should eventually be moved over to the same style of allocation as those vfs caches that use HASH_EARLY if !hashdist, so they don't exceed MAX_ORDER on very large non-NUMA images. This brings dTLB misses for linux kernel tree `git diff` from ~45,000 to ~8,000 on a Kaby Lake KVM guest with 8MB dentry hash and mitigations=off (performance is in the noise, under 1% difference, page tables are likely to be well cached for this workload). Link: http://lkml.kernel.org/r/20190605144814.29319-2-npiggin@gmail.com Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c31
1 files changed, 18 insertions, 13 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 05143e0f821f..3a555ce69006 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7534,10 +7534,28 @@ static int page_alloc_cpu_dead(unsigned int cpu)
7534 return 0; 7534 return 0;
7535} 7535}
7536 7536
7537#ifdef CONFIG_NUMA
7538int hashdist = HASHDIST_DEFAULT;
7539
7540static int __init set_hashdist(char *str)
7541{
7542 if (!str)
7543 return 0;
7544 hashdist = simple_strtoul(str, &str, 0);
7545 return 1;
7546}
7547__setup("hashdist=", set_hashdist);
7548#endif
7549
7537void __init page_alloc_init(void) 7550void __init page_alloc_init(void)
7538{ 7551{
7539 int ret; 7552 int ret;
7540 7553
7554#ifdef CONFIG_NUMA
7555 if (num_node_state(N_MEMORY) == 1)
7556 hashdist = 0;
7557#endif
7558
7541 ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD, 7559 ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD,
7542 "mm/page_alloc:dead", NULL, 7560 "mm/page_alloc:dead", NULL,
7543 page_alloc_cpu_dead); 7561 page_alloc_cpu_dead);
@@ -7922,19 +7940,6 @@ out:
7922 return ret; 7940 return ret;
7923} 7941}
7924 7942
7925#ifdef CONFIG_NUMA
7926int hashdist = HASHDIST_DEFAULT;
7927
7928static int __init set_hashdist(char *str)
7929{
7930 if (!str)
7931 return 0;
7932 hashdist = simple_strtoul(str, &str, 0);
7933 return 1;
7934}
7935__setup("hashdist=", set_hashdist);
7936#endif
7937
7938#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES 7943#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
7939/* 7944/*
7940 * Returns the number of pages that arch has reserved but 7945 * Returns the number of pages that arch has reserved but