aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-02 10:52:48 -0400
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:48 -0500
commit03c5a6e16322c997bf8f264851bfa3f532ad515f (patch)
treedf5b09acdcd6d171286afa3f77a7ff56336c8ca6
parent4b96a29ba891dd59734cb7be80a900fe93aa2d9f (diff)
mm: numa: Add pte updates, hinting and migration stats
It is tricky to quantify the basic cost of automatic NUMA placement in a meaningful manner. This patch adds some vmstats that can be used as part of a basic costing model. u = basic unit = sizeof(void *) Ca = cost of struct page access = sizeof(struct page) / u Cpte = Cost PTE access = Ca Cupdate = Cost PTE update = (2 * Cpte) + (2 * Wlock) where Cpte is incurred twice for a read and a write and Wlock is a constant representing the cost of taking or releasing a lock Cnumahint = Cost of a minor page fault = some high constant e.g. 1000 Cpagerw = Cost to read or write a full page = Ca + PAGE_SIZE/u Ci = Cost of page isolation = Ca + Wi where Wi is a constant that should reflect the approximate cost of the locking operation Cpagecopy = Cpagerw + (Cpagerw * Wnuma) + Ci + (Ci * Wnuma) where Wnuma is the approximate NUMA factor. 1 is local. 1.2 would imply that remote accesses are 20% more expensive Balancing cost = Cpte * numa_pte_updates + Cnumahint * numa_hint_faults + Ci * numa_pages_migrated + Cpagecopy * numa_pages_migrated Note that numa_pages_migrated is used as a measure of how many pages were isolated even though it would miss pages that failed to migrate. A vmstat counter could have been added for it but the isolation cost is pretty marginal in comparison to the overall cost so it seemed overkill. The ideal way to measure automatic placement benefit would be to count the number of remote accesses versus local accesses and do something like benefit = (remote_accesses_before - remove_access_after) * Wnuma but the information is not readily available. As a workload converges, the expection would be that the number of remote numa hints would reduce to 0. convergence = numa_hint_faults_local / numa_hint_faults where this is measured for the last N number of numa hints recorded. When the workload is fully converged the value is 1. This can measure if the placement policy is converging and how fast it is doing it. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com>
-rw-r--r--include/linux/vm_event_item.h6
-rw-r--r--include/linux/vmstat.h8
-rw-r--r--mm/huge_memory.c5
-rw-r--r--mm/memory.c12
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/vmstat.c6
7 files changed, 41 insertions, 1 deletions
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index a1f750b8e72a..55600049e794 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -38,6 +38,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
38 KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, 38 KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
39 KSWAPD_SKIP_CONGESTION_WAIT, 39 KSWAPD_SKIP_CONGESTION_WAIT,
40 PAGEOUTRUN, ALLOCSTALL, PGROTATED, 40 PAGEOUTRUN, ALLOCSTALL, PGROTATED,
41#ifdef CONFIG_NUMA_BALANCING
42 NUMA_PTE_UPDATES,
43 NUMA_HINT_FAULTS,
44 NUMA_HINT_FAULTS_LOCAL,
45 NUMA_PAGE_MIGRATE,
46#endif
41#ifdef CONFIG_MIGRATION 47#ifdef CONFIG_MIGRATION
42 PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, 48 PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
43#endif 49#endif
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 92a86b2cce33..a13291f7da88 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -80,6 +80,14 @@ static inline void vm_events_fold_cpu(int cpu)
80 80
81#endif /* CONFIG_VM_EVENT_COUNTERS */ 81#endif /* CONFIG_VM_EVENT_COUNTERS */
82 82
83#ifdef CONFIG_NUMA_BALANCING
84#define count_vm_numa_event(x) count_vm_event(x)
85#define count_vm_numa_events(x, y) count_vm_events(x, y)
86#else
87#define count_vm_numa_event(x) do {} while (0)
88#define count_vm_numa_events(x, y) do {} while (0)
89#endif /* CONFIG_NUMA_BALANCING */
90
83#define __count_zone_vm_events(item, zone, delta) \ 91#define __count_zone_vm_events(item, zone, delta) \
84 __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ 92 __count_vm_events(item##_NORMAL - ZONE_NORMAL + \
85 zone_idx(zone), delta) 93 zone_idx(zone), delta)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ee8133794a56..f3a477fffd09 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1026,6 +1026,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1026 struct page *page = NULL; 1026 struct page *page = NULL;
1027 unsigned long haddr = addr & HPAGE_PMD_MASK; 1027 unsigned long haddr = addr & HPAGE_PMD_MASK;
1028 int target_nid; 1028 int target_nid;
1029 int current_nid = -1;
1029 1030
1030 spin_lock(&mm->page_table_lock); 1031 spin_lock(&mm->page_table_lock);
1031 if (unlikely(!pmd_same(pmd, *pmdp))) 1032 if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,6 +1035,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1034 page = pmd_page(pmd); 1035 page = pmd_page(pmd);
1035 get_page(page); 1036 get_page(page);
1036 spin_unlock(&mm->page_table_lock); 1037 spin_unlock(&mm->page_table_lock);
1038 current_nid = page_to_nid(page);
1039 count_vm_numa_event(NUMA_HINT_FAULTS);
1040 if (current_nid == numa_node_id())
1041 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1037 1042
1038 target_nid = mpol_misplaced(page, vma, haddr); 1043 target_nid = mpol_misplaced(page, vma, haddr);
1039 if (target_nid == -1) 1044 if (target_nid == -1)
diff --git a/mm/memory.c b/mm/memory.c
index 8012c1907895..8a7b4ccbe136 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3477,6 +3477,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3477 set_pte_at(mm, addr, ptep, pte); 3477 set_pte_at(mm, addr, ptep, pte);
3478 update_mmu_cache(vma, addr, ptep); 3478 update_mmu_cache(vma, addr, ptep);
3479 3479
3480 count_vm_numa_event(NUMA_HINT_FAULTS);
3480 page = vm_normal_page(vma, addr, pte); 3481 page = vm_normal_page(vma, addr, pte);
3481 if (!page) { 3482 if (!page) {
3482 pte_unmap_unlock(ptep, ptl); 3483 pte_unmap_unlock(ptep, ptl);
@@ -3485,6 +3486,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3485 3486
3486 get_page(page); 3487 get_page(page);
3487 current_nid = page_to_nid(page); 3488 current_nid = page_to_nid(page);
3489 if (current_nid == numa_node_id())
3490 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
3488 target_nid = mpol_misplaced(page, vma, addr); 3491 target_nid = mpol_misplaced(page, vma, addr);
3489 pte_unmap_unlock(ptep, ptl); 3492 pte_unmap_unlock(ptep, ptl);
3490 if (target_nid == -1) { 3493 if (target_nid == -1) {
@@ -3517,6 +3520,9 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3517 unsigned long offset; 3520 unsigned long offset;
3518 spinlock_t *ptl; 3521 spinlock_t *ptl;
3519 bool numa = false; 3522 bool numa = false;
3523 int local_nid = numa_node_id();
3524 unsigned long nr_faults = 0;
3525 unsigned long nr_faults_local = 0;
3520 3526
3521 spin_lock(&mm->page_table_lock); 3527 spin_lock(&mm->page_table_lock);
3522 pmd = *pmdp; 3528 pmd = *pmdp;
@@ -3565,10 +3571,16 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3565 curr_nid = page_to_nid(page); 3571 curr_nid = page_to_nid(page);
3566 task_numa_fault(curr_nid, 1); 3572 task_numa_fault(curr_nid, 1);
3567 3573
3574 nr_faults++;
3575 if (curr_nid == local_nid)
3576 nr_faults_local++;
3577
3568 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); 3578 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3569 } 3579 }
3570 pte_unmap_unlock(orig_pte, ptl); 3580 pte_unmap_unlock(orig_pte, ptl);
3571 3581
3582 count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults);
3583 count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local);
3572 return 0; 3584 return 0;
3573} 3585}
3574#else 3586#else
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a7a62fe7c280..516491fbfaa8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -583,6 +583,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
583 BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE); 583 BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
584 584
585 nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1); 585 nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
586 if (nr_updated)
587 count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
586 588
587 return nr_updated; 589 return nr_updated;
588} 590}
diff --git a/mm/migrate.c b/mm/migrate.c
index c7d550011a64..23bba5d6edff 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1514,7 +1514,8 @@ int migrate_misplaced_page(struct page *page, int node)
1514 if (nr_remaining) { 1514 if (nr_remaining) {
1515 putback_lru_pages(&migratepages); 1515 putback_lru_pages(&migratepages);
1516 isolated = 0; 1516 isolated = 0;
1517 } 1517 } else
1518 count_vm_numa_event(NUMA_PAGE_MIGRATE);
1518 } 1519 }
1519 BUG_ON(!list_empty(&migratepages)); 1520 BUG_ON(!list_empty(&migratepages));
1520out: 1521out:
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 3a067fabe190..c0f1f6db5182 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -774,6 +774,12 @@ const char * const vmstat_text[] = {
774 774
775 "pgrotated", 775 "pgrotated",
776 776
777#ifdef CONFIG_NUMA_BALANCING
778 "numa_pte_updates",
779 "numa_hint_faults",
780 "numa_hint_faults_local",
781 "numa_pages_migrated",
782#endif
777#ifdef CONFIG_MIGRATION 783#ifdef CONFIG_MIGRATION
778 "pgmigrate_success", 784 "pgmigrate_success",
779 "pgmigrate_fail", 785 "pgmigrate_fail",