aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmstat.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r--mm/vmstat.c161
1 files changed, 155 insertions, 6 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c7e4b8458023..daea02833e2e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -87,8 +87,10 @@ void vm_events_fold_cpu(int cpu)
87 * vm_stat contains the global counters 87 * vm_stat contains the global counters
88 */ 88 */
89atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; 89atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
90atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
90atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; 91atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
91EXPORT_SYMBOL(vm_zone_stat); 92EXPORT_SYMBOL(vm_zone_stat);
93EXPORT_SYMBOL(vm_numa_stat);
92EXPORT_SYMBOL(vm_node_stat); 94EXPORT_SYMBOL(vm_node_stat);
93 95
94#ifdef CONFIG_SMP 96#ifdef CONFIG_SMP
@@ -192,7 +194,10 @@ void refresh_zone_stat_thresholds(void)
192 194
193 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 195 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
194 = threshold; 196 = threshold;
195 197#ifdef CONFIG_NUMA
198 per_cpu_ptr(zone->pageset, cpu)->numa_stat_threshold
199 = threshold;
200#endif
196 /* Base nodestat threshold on the largest populated zone. */ 201 /* Base nodestat threshold on the largest populated zone. */
197 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold; 202 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
198 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold 203 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
@@ -226,9 +231,14 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
226 continue; 231 continue;
227 232
228 threshold = (*calculate_pressure)(zone); 233 threshold = (*calculate_pressure)(zone);
229 for_each_online_cpu(cpu) 234 for_each_online_cpu(cpu) {
230 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 235 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
231 = threshold; 236 = threshold;
237#ifdef CONFIG_NUMA
238 per_cpu_ptr(zone->pageset, cpu)->numa_stat_threshold
239 = threshold;
240#endif
241 }
232 } 242 }
233} 243}
234 244
@@ -604,6 +614,32 @@ EXPORT_SYMBOL(dec_node_page_state);
604 * Fold a differential into the global counters. 614 * Fold a differential into the global counters.
605 * Returns the number of counters updated. 615 * Returns the number of counters updated.
606 */ 616 */
617#ifdef CONFIG_NUMA
618static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
619{
620 int i;
621 int changes = 0;
622
623 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
624 if (zone_diff[i]) {
625 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
626 changes++;
627 }
628
629 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
630 if (numa_diff[i]) {
631 atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
632 changes++;
633 }
634
635 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
636 if (node_diff[i]) {
637 atomic_long_add(node_diff[i], &vm_node_stat[i]);
638 changes++;
639 }
640 return changes;
641}
642#else
607static int fold_diff(int *zone_diff, int *node_diff) 643static int fold_diff(int *zone_diff, int *node_diff)
608{ 644{
609 int i; 645 int i;
@@ -622,6 +658,7 @@ static int fold_diff(int *zone_diff, int *node_diff)
622 } 658 }
623 return changes; 659 return changes;
624} 660}
661#endif /* CONFIG_NUMA */
625 662
626/* 663/*
627 * Update the zone counters for the current cpu. 664 * Update the zone counters for the current cpu.
@@ -645,6 +682,9 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
645 struct zone *zone; 682 struct zone *zone;
646 int i; 683 int i;
647 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 684 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
685#ifdef CONFIG_NUMA
686 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
687#endif
648 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 688 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
649 int changes = 0; 689 int changes = 0;
650 690
@@ -666,6 +706,18 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
666 } 706 }
667 } 707 }
668#ifdef CONFIG_NUMA 708#ifdef CONFIG_NUMA
709 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
710 int v;
711
712 v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
713 if (v) {
714
715 atomic_long_add(v, &zone->vm_numa_stat[i]);
716 global_numa_diff[i] += v;
717 __this_cpu_write(p->expire, 3);
718 }
719 }
720
669 if (do_pagesets) { 721 if (do_pagesets) {
670 cond_resched(); 722 cond_resched();
671 /* 723 /*
@@ -712,7 +764,12 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
712 } 764 }
713 } 765 }
714 766
767#ifdef CONFIG_NUMA
768 changes += fold_diff(global_zone_diff, global_numa_diff,
769 global_node_diff);
770#else
715 changes += fold_diff(global_zone_diff, global_node_diff); 771 changes += fold_diff(global_zone_diff, global_node_diff);
772#endif
716 return changes; 773 return changes;
717} 774}
718 775
@@ -727,6 +784,9 @@ void cpu_vm_stats_fold(int cpu)
727 struct zone *zone; 784 struct zone *zone;
728 int i; 785 int i;
729 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 786 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
787#ifdef CONFIG_NUMA
788 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
789#endif
730 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 790 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
731 791
732 for_each_populated_zone(zone) { 792 for_each_populated_zone(zone) {
@@ -743,6 +803,18 @@ void cpu_vm_stats_fold(int cpu)
743 atomic_long_add(v, &zone->vm_stat[i]); 803 atomic_long_add(v, &zone->vm_stat[i]);
744 global_zone_diff[i] += v; 804 global_zone_diff[i] += v;
745 } 805 }
806
807#ifdef CONFIG_NUMA
808 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
809 if (p->vm_numa_stat_diff[i]) {
810 int v;
811
812 v = p->vm_numa_stat_diff[i];
813 p->vm_numa_stat_diff[i] = 0;
814 atomic_long_add(v, &zone->vm_numa_stat[i]);
815 global_numa_diff[i] += v;
816 }
817#endif
746 } 818 }
747 819
748 for_each_online_pgdat(pgdat) { 820 for_each_online_pgdat(pgdat) {
@@ -761,7 +833,11 @@ void cpu_vm_stats_fold(int cpu)
761 } 833 }
762 } 834 }
763 835
836#ifdef CONFIG_NUMA
837 fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
838#else
764 fold_diff(global_zone_diff, global_node_diff); 839 fold_diff(global_zone_diff, global_node_diff);
840#endif
765} 841}
766 842
767/* 843/*
@@ -779,10 +855,38 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
779 atomic_long_add(v, &zone->vm_stat[i]); 855 atomic_long_add(v, &zone->vm_stat[i]);
780 atomic_long_add(v, &vm_zone_stat[i]); 856 atomic_long_add(v, &vm_zone_stat[i]);
781 } 857 }
858
859#ifdef CONFIG_NUMA
860 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
861 if (pset->vm_numa_stat_diff[i]) {
862 int v = pset->vm_numa_stat_diff[i];
863
864 pset->vm_numa_stat_diff[i] = 0;
865 atomic_long_add(v, &zone->vm_numa_stat[i]);
866 atomic_long_add(v, &vm_numa_stat[i]);
867 }
868#endif
782} 869}
783#endif 870#endif
784 871
785#ifdef CONFIG_NUMA 872#ifdef CONFIG_NUMA
873void __inc_numa_state(struct zone *zone,
874 enum numa_stat_item item)
875{
876 struct per_cpu_pageset __percpu *pcp = zone->pageset;
877 s8 __percpu *p = pcp->vm_numa_stat_diff + item;
878 s8 v, t;
879
880 v = __this_cpu_inc_return(*p);
881 t = __this_cpu_read(pcp->numa_stat_threshold);
882 if (unlikely(v > t)) {
883 s8 overstep = t >> 1;
884
885 zone_numa_state_add(v + overstep, zone, item);
886 __this_cpu_write(*p, -overstep);
887 }
888}
889
786/* 890/*
787 * Determine the per node value of a stat item. This function 891 * Determine the per node value of a stat item. This function
788 * is called frequently in a NUMA machine, so try to be as 892 * is called frequently in a NUMA machine, so try to be as
@@ -801,6 +905,19 @@ unsigned long sum_zone_node_page_state(int node,
801 return count; 905 return count;
802} 906}
803 907
908unsigned long sum_zone_numa_state(int node,
909 enum numa_stat_item item)
910{
911 struct zone *zones = NODE_DATA(node)->node_zones;
912 int i;
913 unsigned long count = 0;
914
915 for (i = 0; i < MAX_NR_ZONES; i++)
916 count += zone_numa_state(zones + i, item);
917
918 return count;
919}
920
804/* 921/*
805 * Determine the per node value of a stat item. 922 * Determine the per node value of a stat item.
806 */ 923 */
@@ -937,6 +1054,9 @@ const char * const vmstat_text[] = {
937#if IS_ENABLED(CONFIG_ZSMALLOC) 1054#if IS_ENABLED(CONFIG_ZSMALLOC)
938 "nr_zspages", 1055 "nr_zspages",
939#endif 1056#endif
1057 "nr_free_cma",
1058
1059 /* enum numa_stat_item counters */
940#ifdef CONFIG_NUMA 1060#ifdef CONFIG_NUMA
941 "numa_hit", 1061 "numa_hit",
942 "numa_miss", 1062 "numa_miss",
@@ -945,7 +1065,6 @@ const char * const vmstat_text[] = {
945 "numa_local", 1065 "numa_local",
946 "numa_other", 1066 "numa_other",
947#endif 1067#endif
948 "nr_free_cma",
949 1068
950 /* Node-based counters */ 1069 /* Node-based counters */
951 "nr_inactive_anon", 1070 "nr_inactive_anon",
@@ -1106,7 +1225,6 @@ const char * const vmstat_text[] = {
1106}; 1225};
1107#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ 1226#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
1108 1227
1109
1110#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ 1228#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1111 defined(CONFIG_PROC_FS) 1229 defined(CONFIG_PROC_FS)
1112static void *frag_start(struct seq_file *m, loff_t *pos) 1230static void *frag_start(struct seq_file *m, loff_t *pos)
@@ -1384,7 +1502,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1384 seq_printf(m, "\n per-node stats"); 1502 seq_printf(m, "\n per-node stats");
1385 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 1503 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1386 seq_printf(m, "\n %-12s %lu", 1504 seq_printf(m, "\n %-12s %lu",
1387 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], 1505 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
1506 NR_VM_NUMA_STAT_ITEMS],
1388 node_page_state(pgdat, i)); 1507 node_page_state(pgdat, i));
1389 } 1508 }
1390 } 1509 }
@@ -1421,6 +1540,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1421 seq_printf(m, "\n %-12s %lu", vmstat_text[i], 1540 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1422 zone_page_state(zone, i)); 1541 zone_page_state(zone, i));
1423 1542
1543#ifdef CONFIG_NUMA
1544 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1545 seq_printf(m, "\n %-12s %lu",
1546 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
1547 zone_numa_state(zone, i));
1548#endif
1549
1424 seq_printf(m, "\n pagesets"); 1550 seq_printf(m, "\n pagesets");
1425 for_each_online_cpu(i) { 1551 for_each_online_cpu(i) {
1426 struct per_cpu_pageset *pageset; 1552 struct per_cpu_pageset *pageset;
@@ -1497,6 +1623,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
1497 if (*pos >= ARRAY_SIZE(vmstat_text)) 1623 if (*pos >= ARRAY_SIZE(vmstat_text))
1498 return NULL; 1624 return NULL;
1499 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + 1625 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1626 NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
1500 NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) + 1627 NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
1501 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); 1628 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1502 1629
@@ -1512,6 +1639,12 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
1512 v[i] = global_zone_page_state(i); 1639 v[i] = global_zone_page_state(i);
1513 v += NR_VM_ZONE_STAT_ITEMS; 1640 v += NR_VM_ZONE_STAT_ITEMS;
1514 1641
1642#ifdef CONFIG_NUMA
1643 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1644 v[i] = global_numa_state(i);
1645 v += NR_VM_NUMA_STAT_ITEMS;
1646#endif
1647
1515 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 1648 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
1516 v[i] = global_node_page_state(i); 1649 v[i] = global_node_page_state(i);
1517 v += NR_VM_NODE_STAT_ITEMS; 1650 v += NR_VM_NODE_STAT_ITEMS;
@@ -1613,6 +1746,16 @@ int vmstat_refresh(struct ctl_table *table, int write,
1613 err = -EINVAL; 1746 err = -EINVAL;
1614 } 1747 }
1615 } 1748 }
1749#ifdef CONFIG_NUMA
1750 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
1751 val = atomic_long_read(&vm_numa_stat[i]);
1752 if (val < 0) {
1753 pr_warn("%s: %s %ld\n",
1754 __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
1755 err = -EINVAL;
1756 }
1757 }
1758#endif
1616 if (err) 1759 if (err)
1617 return err; 1760 return err;
1618 if (write) 1761 if (write)
@@ -1654,13 +1797,19 @@ static bool need_update(int cpu)
1654 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); 1797 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1655 1798
1656 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1); 1799 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1800#ifdef CONFIG_NUMA
1801 BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 1);
1802#endif
1657 /* 1803 /*
1658 * The fast way of checking if there are any vmstat diffs. 1804 * The fast way of checking if there are any vmstat diffs.
1659 * This works because the diffs are byte sized items. 1805 * This works because the diffs are byte sized items.
1660 */ 1806 */
1661 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) 1807 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
1662 return true; 1808 return true;
1663 1809#ifdef CONFIG_NUMA
1810 if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
1811 return true;
1812#endif
1664 } 1813 }
1665 return false; 1814 return false;
1666} 1815}