summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/vm.txt16
-rw-r--r--include/linux/vmstat.h10
-rw-r--r--kernel/sysctl.c9
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/vmstat.c71
6 files changed, 115 insertions, 0 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 3e579740b49f..055c8b3e1018 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -58,6 +58,7 @@ Currently, these files are in /proc/sys/vm:
58- percpu_pagelist_fraction 58- percpu_pagelist_fraction
59- stat_interval 59- stat_interval
60- stat_refresh 60- stat_refresh
61- numa_stat
61- swappiness 62- swappiness
62- user_reserve_kbytes 63- user_reserve_kbytes
63- vfs_cache_pressure 64- vfs_cache_pressure
@@ -799,6 +800,21 @@ with no ill effects: errors and warnings on these stats are suppressed.)
799 800
800============================================================== 801==============================================================
801 802
803numa_stat
804
805This interface allows runtime configuration of numa statistics.
806
807When page allocation performance becomes a bottleneck and you can tolerate
808some possible tool breakage and decreased numa counter precision, you can
809do:
810 echo 0 > /proc/sys/vm/numa_stat
811
812When page allocation performance is not a bottleneck and you want all
813tooling to work, you can do:
814 echo 1 > /proc/sys/vm/numa_stat
815
816==============================================================
817
802swappiness 818swappiness
803 819
804This control is used to define how aggressive the kernel will swap 820This control is used to define how aggressive the kernel will swap
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 1e0cb72e0598..1779c9817b39 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -7,9 +7,19 @@
7#include <linux/mmzone.h> 7#include <linux/mmzone.h>
8#include <linux/vm_event_item.h> 8#include <linux/vm_event_item.h>
9#include <linux/atomic.h> 9#include <linux/atomic.h>
10#include <linux/static_key.h>
10 11
11extern int sysctl_stat_interval; 12extern int sysctl_stat_interval;
12 13
14#ifdef CONFIG_NUMA
15#define ENABLE_NUMA_STAT 1
16#define DISABLE_NUMA_STAT 0
17extern int sysctl_vm_numa_stat;
18DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key);
19extern int sysctl_vm_numa_stat_handler(struct ctl_table *table,
20 int write, void __user *buffer, size_t *length, loff_t *ppos);
21#endif
22
13#ifdef CONFIG_VM_EVENT_COUNTERS 23#ifdef CONFIG_VM_EVENT_COUNTERS
14/* 24/*
15 * Light weight per cpu counter implementation. 25 * Light weight per cpu counter implementation.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7638e2f7fff8..4a13a389e99b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1356,6 +1356,15 @@ static struct ctl_table vm_table[] = {
1356 .mode = 0644, 1356 .mode = 0644,
1357 .proc_handler = &hugetlb_mempolicy_sysctl_handler, 1357 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1358 }, 1358 },
1359 {
1360 .procname = "numa_stat",
1361 .data = &sysctl_vm_numa_stat,
1362 .maxlen = sizeof(int),
1363 .mode = 0644,
1364 .proc_handler = sysctl_vm_numa_stat_handler,
1365 .extra1 = &zero,
1366 .extra2 = &one,
1367 },
1359#endif 1368#endif
1360 { 1369 {
1361 .procname = "hugetlb_shm_group", 1370 .procname = "hugetlb_shm_group",
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index dad166b736ba..4ce44d3ff03d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1915,6 +1915,9 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1915 struct page *page; 1915 struct page *page;
1916 1916
1917 page = __alloc_pages(gfp, order, nid); 1917 page = __alloc_pages(gfp, order, nid);
1918 /* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */
1919 if (!static_branch_likely(&vm_numa_stat_key))
1920 return page;
1918 if (page && page_to_nid(page) == nid) { 1921 if (page && page_to_nid(page) == nid) {
1919 preempt_disable(); 1922 preempt_disable();
1920 __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); 1923 __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7ca668e946e5..67f523c4711a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -82,6 +82,8 @@ DEFINE_PER_CPU(int, numa_node);
82EXPORT_PER_CPU_SYMBOL(numa_node); 82EXPORT_PER_CPU_SYMBOL(numa_node);
83#endif 83#endif
84 84
85DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key);
86
85#ifdef CONFIG_HAVE_MEMORYLESS_NODES 87#ifdef CONFIG_HAVE_MEMORYLESS_NODES
86/* 88/*
87 * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. 89 * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
@@ -2777,6 +2779,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
2777#ifdef CONFIG_NUMA 2779#ifdef CONFIG_NUMA
2778 enum numa_stat_item local_stat = NUMA_LOCAL; 2780 enum numa_stat_item local_stat = NUMA_LOCAL;
2779 2781
2782 /* skip numa counters update if numa stats is disabled */
2783 if (!static_branch_likely(&vm_numa_stat_key))
2784 return;
2785
2780 if (z->node != numa_node_id()) 2786 if (z->node != numa_node_id())
2781 local_stat = NUMA_OTHER; 2787 local_stat = NUMA_OTHER;
2782 2788
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7d11554861e4..40b2db6db6b1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -32,6 +32,77 @@
32 32
33#define NUMA_STATS_THRESHOLD (U16_MAX - 2) 33#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
34 34
35#ifdef CONFIG_NUMA
36int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
37
38/* zero numa counters within a zone */
39static void zero_zone_numa_counters(struct zone *zone)
40{
41 int item, cpu;
42
43 for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
44 atomic_long_set(&zone->vm_numa_stat[item], 0);
45 for_each_online_cpu(cpu)
46 per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
47 = 0;
48 }
49}
50
51/* zero numa counters of all the populated zones */
52static void zero_zones_numa_counters(void)
53{
54 struct zone *zone;
55
56 for_each_populated_zone(zone)
57 zero_zone_numa_counters(zone);
58}
59
60/* zero global numa counters */
61static void zero_global_numa_counters(void)
62{
63 int item;
64
65 for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
66 atomic_long_set(&vm_numa_stat[item], 0);
67}
68
69static void invalid_numa_statistics(void)
70{
71 zero_zones_numa_counters();
72 zero_global_numa_counters();
73}
74
75static DEFINE_MUTEX(vm_numa_stat_lock);
76
77int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
78 void __user *buffer, size_t *length, loff_t *ppos)
79{
80 int ret, oldval;
81
82 mutex_lock(&vm_numa_stat_lock);
83 if (write)
84 oldval = sysctl_vm_numa_stat;
85 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
86 if (ret || !write)
87 goto out;
88
89 if (oldval == sysctl_vm_numa_stat)
90 goto out;
91 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
92 static_branch_enable(&vm_numa_stat_key);
93 pr_info("enable numa statistics\n");
94 } else {
95 static_branch_disable(&vm_numa_stat_key);
96 invalid_numa_statistics();
97 pr_info("disable numa statistics, and clear numa counters\n");
98 }
99
100out:
101 mutex_unlock(&vm_numa_stat_lock);
102 return ret;
103}
104#endif
105
35#ifdef CONFIG_VM_EVENT_COUNTERS 106#ifdef CONFIG_VM_EVENT_COUNTERS
36DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 107DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
37EXPORT_PER_CPU_SYMBOL(vm_event_states); 108EXPORT_PER_CPU_SYMBOL(vm_event_states);