diff options
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r-- | mm/vmstat.c | 161 |
1 files changed, 155 insertions, 6 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c index c7e4b8458023..daea02833e2e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -87,8 +87,10 @@ void vm_events_fold_cpu(int cpu) | |||
87 | * vm_stat contains the global counters | 87 | * vm_stat contains the global counters |
88 | */ | 88 | */ |
89 | atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; | 89 | atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; |
90 | atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp; | ||
90 | atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; | 91 | atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; |
91 | EXPORT_SYMBOL(vm_zone_stat); | 92 | EXPORT_SYMBOL(vm_zone_stat); |
93 | EXPORT_SYMBOL(vm_numa_stat); | ||
92 | EXPORT_SYMBOL(vm_node_stat); | 94 | EXPORT_SYMBOL(vm_node_stat); |
93 | 95 | ||
94 | #ifdef CONFIG_SMP | 96 | #ifdef CONFIG_SMP |
@@ -192,7 +194,10 @@ void refresh_zone_stat_thresholds(void) | |||
192 | 194 | ||
193 | per_cpu_ptr(zone->pageset, cpu)->stat_threshold | 195 | per_cpu_ptr(zone->pageset, cpu)->stat_threshold |
194 | = threshold; | 196 | = threshold; |
195 | 197 | #ifdef CONFIG_NUMA | |
198 | per_cpu_ptr(zone->pageset, cpu)->numa_stat_threshold | ||
199 | = threshold; | ||
200 | #endif | ||
196 | /* Base nodestat threshold on the largest populated zone. */ | 201 | /* Base nodestat threshold on the largest populated zone. */ |
197 | pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold; | 202 | pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold; |
198 | per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold | 203 | per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold |
@@ -226,9 +231,14 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat, | |||
226 | continue; | 231 | continue; |
227 | 232 | ||
228 | threshold = (*calculate_pressure)(zone); | 233 | threshold = (*calculate_pressure)(zone); |
229 | for_each_online_cpu(cpu) | 234 | for_each_online_cpu(cpu) { |
230 | per_cpu_ptr(zone->pageset, cpu)->stat_threshold | 235 | per_cpu_ptr(zone->pageset, cpu)->stat_threshold |
231 | = threshold; | 236 | = threshold; |
237 | #ifdef CONFIG_NUMA | ||
238 | per_cpu_ptr(zone->pageset, cpu)->numa_stat_threshold | ||
239 | = threshold; | ||
240 | #endif | ||
241 | } | ||
232 | } | 242 | } |
233 | } | 243 | } |
234 | 244 | ||
@@ -604,6 +614,32 @@ EXPORT_SYMBOL(dec_node_page_state); | |||
604 | * Fold a differential into the global counters. | 614 | * Fold a differential into the global counters. |
605 | * Returns the number of counters updated. | 615 | * Returns the number of counters updated. |
606 | */ | 616 | */ |
617 | #ifdef CONFIG_NUMA | ||
618 | static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) | ||
619 | { | ||
620 | int i; | ||
621 | int changes = 0; | ||
622 | |||
623 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | ||
624 | if (zone_diff[i]) { | ||
625 | atomic_long_add(zone_diff[i], &vm_zone_stat[i]); | ||
626 | changes++; | ||
627 | } | ||
628 | |||
629 | for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) | ||
630 | if (numa_diff[i]) { | ||
631 | atomic_long_add(numa_diff[i], &vm_numa_stat[i]); | ||
632 | changes++; | ||
633 | } | ||
634 | |||
635 | for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) | ||
636 | if (node_diff[i]) { | ||
637 | atomic_long_add(node_diff[i], &vm_node_stat[i]); | ||
638 | changes++; | ||
639 | } | ||
640 | return changes; | ||
641 | } | ||
642 | #else | ||
607 | static int fold_diff(int *zone_diff, int *node_diff) | 643 | static int fold_diff(int *zone_diff, int *node_diff) |
608 | { | 644 | { |
609 | int i; | 645 | int i; |
@@ -622,6 +658,7 @@ static int fold_diff(int *zone_diff, int *node_diff) | |||
622 | } | 658 | } |
623 | return changes; | 659 | return changes; |
624 | } | 660 | } |
661 | #endif /* CONFIG_NUMA */ | ||
625 | 662 | ||
626 | /* | 663 | /* |
627 | * Update the zone counters for the current cpu. | 664 | * Update the zone counters for the current cpu. |
@@ -645,6 +682,9 @@ static int refresh_cpu_vm_stats(bool do_pagesets) | |||
645 | struct zone *zone; | 682 | struct zone *zone; |
646 | int i; | 683 | int i; |
647 | int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; | 684 | int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; |
685 | #ifdef CONFIG_NUMA | ||
686 | int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; | ||
687 | #endif | ||
648 | int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; | 688 | int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; |
649 | int changes = 0; | 689 | int changes = 0; |
650 | 690 | ||
@@ -666,6 +706,18 @@ static int refresh_cpu_vm_stats(bool do_pagesets) | |||
666 | } | 706 | } |
667 | } | 707 | } |
668 | #ifdef CONFIG_NUMA | 708 | #ifdef CONFIG_NUMA |
709 | for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { | ||
710 | int v; | ||
711 | |||
712 | v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0); | ||
713 | if (v) { | ||
714 | |||
715 | atomic_long_add(v, &zone->vm_numa_stat[i]); | ||
716 | global_numa_diff[i] += v; | ||
717 | __this_cpu_write(p->expire, 3); | ||
718 | } | ||
719 | } | ||
720 | |||
669 | if (do_pagesets) { | 721 | if (do_pagesets) { |
670 | cond_resched(); | 722 | cond_resched(); |
671 | /* | 723 | /* |
@@ -712,7 +764,12 @@ static int refresh_cpu_vm_stats(bool do_pagesets) | |||
712 | } | 764 | } |
713 | } | 765 | } |
714 | 766 | ||
767 | #ifdef CONFIG_NUMA | ||
768 | changes += fold_diff(global_zone_diff, global_numa_diff, | ||
769 | global_node_diff); | ||
770 | #else | ||
715 | changes += fold_diff(global_zone_diff, global_node_diff); | 771 | changes += fold_diff(global_zone_diff, global_node_diff); |
772 | #endif | ||
716 | return changes; | 773 | return changes; |
717 | } | 774 | } |
718 | 775 | ||
@@ -727,6 +784,9 @@ void cpu_vm_stats_fold(int cpu) | |||
727 | struct zone *zone; | 784 | struct zone *zone; |
728 | int i; | 785 | int i; |
729 | int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; | 786 | int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; |
787 | #ifdef CONFIG_NUMA | ||
788 | int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; | ||
789 | #endif | ||
730 | int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; | 790 | int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; |
731 | 791 | ||
732 | for_each_populated_zone(zone) { | 792 | for_each_populated_zone(zone) { |
@@ -743,6 +803,18 @@ void cpu_vm_stats_fold(int cpu) | |||
743 | atomic_long_add(v, &zone->vm_stat[i]); | 803 | atomic_long_add(v, &zone->vm_stat[i]); |
744 | global_zone_diff[i] += v; | 804 | global_zone_diff[i] += v; |
745 | } | 805 | } |
806 | |||
807 | #ifdef CONFIG_NUMA | ||
808 | for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) | ||
809 | if (p->vm_numa_stat_diff[i]) { | ||
810 | int v; | ||
811 | |||
812 | v = p->vm_numa_stat_diff[i]; | ||
813 | p->vm_numa_stat_diff[i] = 0; | ||
814 | atomic_long_add(v, &zone->vm_numa_stat[i]); | ||
815 | global_numa_diff[i] += v; | ||
816 | } | ||
817 | #endif | ||
746 | } | 818 | } |
747 | 819 | ||
748 | for_each_online_pgdat(pgdat) { | 820 | for_each_online_pgdat(pgdat) { |
@@ -761,7 +833,11 @@ void cpu_vm_stats_fold(int cpu) | |||
761 | } | 833 | } |
762 | } | 834 | } |
763 | 835 | ||
836 | #ifdef CONFIG_NUMA | ||
837 | fold_diff(global_zone_diff, global_numa_diff, global_node_diff); | ||
838 | #else | ||
764 | fold_diff(global_zone_diff, global_node_diff); | 839 | fold_diff(global_zone_diff, global_node_diff); |
840 | #endif | ||
765 | } | 841 | } |
766 | 842 | ||
767 | /* | 843 | /* |
@@ -779,10 +855,38 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) | |||
779 | atomic_long_add(v, &zone->vm_stat[i]); | 855 | atomic_long_add(v, &zone->vm_stat[i]); |
780 | atomic_long_add(v, &vm_zone_stat[i]); | 856 | atomic_long_add(v, &vm_zone_stat[i]); |
781 | } | 857 | } |
858 | |||
859 | #ifdef CONFIG_NUMA | ||
860 | for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) | ||
861 | if (pset->vm_numa_stat_diff[i]) { | ||
862 | int v = pset->vm_numa_stat_diff[i]; | ||
863 | |||
864 | pset->vm_numa_stat_diff[i] = 0; | ||
865 | atomic_long_add(v, &zone->vm_numa_stat[i]); | ||
866 | atomic_long_add(v, &vm_numa_stat[i]); | ||
867 | } | ||
868 | #endif | ||
782 | } | 869 | } |
783 | #endif | 870 | #endif |
784 | 871 | ||
785 | #ifdef CONFIG_NUMA | 872 | #ifdef CONFIG_NUMA |
873 | void __inc_numa_state(struct zone *zone, | ||
874 | enum numa_stat_item item) | ||
875 | { | ||
876 | struct per_cpu_pageset __percpu *pcp = zone->pageset; | ||
877 | s8 __percpu *p = pcp->vm_numa_stat_diff + item; | ||
878 | s8 v, t; | ||
879 | |||
880 | v = __this_cpu_inc_return(*p); | ||
881 | t = __this_cpu_read(pcp->numa_stat_threshold); | ||
882 | if (unlikely(v > t)) { | ||
883 | s8 overstep = t >> 1; | ||
884 | |||
885 | zone_numa_state_add(v + overstep, zone, item); | ||
886 | __this_cpu_write(*p, -overstep); | ||
887 | } | ||
888 | } | ||
889 | |||
786 | /* | 890 | /* |
787 | * Determine the per node value of a stat item. This function | 891 | * Determine the per node value of a stat item. This function |
788 | * is called frequently in a NUMA machine, so try to be as | 892 | * is called frequently in a NUMA machine, so try to be as |
@@ -801,6 +905,19 @@ unsigned long sum_zone_node_page_state(int node, | |||
801 | return count; | 905 | return count; |
802 | } | 906 | } |
803 | 907 | ||
908 | unsigned long sum_zone_numa_state(int node, | ||
909 | enum numa_stat_item item) | ||
910 | { | ||
911 | struct zone *zones = NODE_DATA(node)->node_zones; | ||
912 | int i; | ||
913 | unsigned long count = 0; | ||
914 | |||
915 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
916 | count += zone_numa_state(zones + i, item); | ||
917 | |||
918 | return count; | ||
919 | } | ||
920 | |||
804 | /* | 921 | /* |
805 | * Determine the per node value of a stat item. | 922 | * Determine the per node value of a stat item. |
806 | */ | 923 | */ |
@@ -937,6 +1054,9 @@ const char * const vmstat_text[] = { | |||
937 | #if IS_ENABLED(CONFIG_ZSMALLOC) | 1054 | #if IS_ENABLED(CONFIG_ZSMALLOC) |
938 | "nr_zspages", | 1055 | "nr_zspages", |
939 | #endif | 1056 | #endif |
1057 | "nr_free_cma", | ||
1058 | |||
1059 | /* enum numa_stat_item counters */ | ||
940 | #ifdef CONFIG_NUMA | 1060 | #ifdef CONFIG_NUMA |
941 | "numa_hit", | 1061 | "numa_hit", |
942 | "numa_miss", | 1062 | "numa_miss", |
@@ -945,7 +1065,6 @@ const char * const vmstat_text[] = { | |||
945 | "numa_local", | 1065 | "numa_local", |
946 | "numa_other", | 1066 | "numa_other", |
947 | #endif | 1067 | #endif |
948 | "nr_free_cma", | ||
949 | 1068 | ||
950 | /* Node-based counters */ | 1069 | /* Node-based counters */ |
951 | "nr_inactive_anon", | 1070 | "nr_inactive_anon", |
@@ -1106,7 +1225,6 @@ const char * const vmstat_text[] = { | |||
1106 | }; | 1225 | }; |
1107 | #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ | 1226 | #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ |
1108 | 1227 | ||
1109 | |||
1110 | #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ | 1228 | #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ |
1111 | defined(CONFIG_PROC_FS) | 1229 | defined(CONFIG_PROC_FS) |
1112 | static void *frag_start(struct seq_file *m, loff_t *pos) | 1230 | static void *frag_start(struct seq_file *m, loff_t *pos) |
@@ -1384,7 +1502,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, | |||
1384 | seq_printf(m, "\n per-node stats"); | 1502 | seq_printf(m, "\n per-node stats"); |
1385 | for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { | 1503 | for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { |
1386 | seq_printf(m, "\n %-12s %lu", | 1504 | seq_printf(m, "\n %-12s %lu", |
1387 | vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], | 1505 | vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + |
1506 | NR_VM_NUMA_STAT_ITEMS], | ||
1388 | node_page_state(pgdat, i)); | 1507 | node_page_state(pgdat, i)); |
1389 | } | 1508 | } |
1390 | } | 1509 | } |
@@ -1421,6 +1540,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, | |||
1421 | seq_printf(m, "\n %-12s %lu", vmstat_text[i], | 1540 | seq_printf(m, "\n %-12s %lu", vmstat_text[i], |
1422 | zone_page_state(zone, i)); | 1541 | zone_page_state(zone, i)); |
1423 | 1542 | ||
1543 | #ifdef CONFIG_NUMA | ||
1544 | for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) | ||
1545 | seq_printf(m, "\n %-12s %lu", | ||
1546 | vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], | ||
1547 | zone_numa_state(zone, i)); | ||
1548 | #endif | ||
1549 | |||
1424 | seq_printf(m, "\n pagesets"); | 1550 | seq_printf(m, "\n pagesets"); |
1425 | for_each_online_cpu(i) { | 1551 | for_each_online_cpu(i) { |
1426 | struct per_cpu_pageset *pageset; | 1552 | struct per_cpu_pageset *pageset; |
@@ -1497,6 +1623,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) | |||
1497 | if (*pos >= ARRAY_SIZE(vmstat_text)) | 1623 | if (*pos >= ARRAY_SIZE(vmstat_text)) |
1498 | return NULL; | 1624 | return NULL; |
1499 | stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + | 1625 | stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + |
1626 | NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) + | ||
1500 | NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) + | 1627 | NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) + |
1501 | NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); | 1628 | NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); |
1502 | 1629 | ||
@@ -1512,6 +1639,12 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) | |||
1512 | v[i] = global_zone_page_state(i); | 1639 | v[i] = global_zone_page_state(i); |
1513 | v += NR_VM_ZONE_STAT_ITEMS; | 1640 | v += NR_VM_ZONE_STAT_ITEMS; |
1514 | 1641 | ||
1642 | #ifdef CONFIG_NUMA | ||
1643 | for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) | ||
1644 | v[i] = global_numa_state(i); | ||
1645 | v += NR_VM_NUMA_STAT_ITEMS; | ||
1646 | #endif | ||
1647 | |||
1515 | for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) | 1648 | for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) |
1516 | v[i] = global_node_page_state(i); | 1649 | v[i] = global_node_page_state(i); |
1517 | v += NR_VM_NODE_STAT_ITEMS; | 1650 | v += NR_VM_NODE_STAT_ITEMS; |
@@ -1613,6 +1746,16 @@ int vmstat_refresh(struct ctl_table *table, int write, | |||
1613 | err = -EINVAL; | 1746 | err = -EINVAL; |
1614 | } | 1747 | } |
1615 | } | 1748 | } |
1749 | #ifdef CONFIG_NUMA | ||
1750 | for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { | ||
1751 | val = atomic_long_read(&vm_numa_stat[i]); | ||
1752 | if (val < 0) { | ||
1753 | pr_warn("%s: %s %ld\n", | ||
1754 | __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val); | ||
1755 | err = -EINVAL; | ||
1756 | } | ||
1757 | } | ||
1758 | #endif | ||
1616 | if (err) | 1759 | if (err) |
1617 | return err; | 1760 | return err; |
1618 | if (write) | 1761 | if (write) |
@@ -1654,13 +1797,19 @@ static bool need_update(int cpu) | |||
1654 | struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); | 1797 | struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); |
1655 | 1798 | ||
1656 | BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1); | 1799 | BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1); |
1800 | #ifdef CONFIG_NUMA | ||
1801 | BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 1); | ||
1802 | #endif | ||
1657 | /* | 1803 | /* |
1658 | * The fast way of checking if there are any vmstat diffs. | 1804 | * The fast way of checking if there are any vmstat diffs. |
1659 | * This works because the diffs are byte sized items. | 1805 | * This works because the diffs are byte sized items. |
1660 | */ | 1806 | */ |
1661 | if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) | 1807 | if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) |
1662 | return true; | 1808 | return true; |
1663 | 1809 | #ifdef CONFIG_NUMA | |
1810 | if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS)) | ||
1811 | return true; | ||
1812 | #endif | ||
1664 | } | 1813 | } |
1665 | return false; | 1814 | return false; |
1666 | } | 1815 | } |