aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-02 10:52:48 -0400
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:48 -0500
commit03c5a6e16322c997bf8f264851bfa3f532ad515f (patch)
treedf5b09acdcd6d171286afa3f77a7ff56336c8ca6 /mm/memory.c
parent4b96a29ba891dd59734cb7be80a900fe93aa2d9f (diff)
mm: numa: Add pte updates, hinting and migration stats
It is tricky to quantify the basic cost of automatic NUMA placement in a meaningful manner. This patch adds some vmstats that can be used as part of a basic costing model. u = basic unit = sizeof(void *) Ca = cost of struct page access = sizeof(struct page) / u Cpte = Cost PTE access = Ca Cupdate = Cost PTE update = (2 * Cpte) + (2 * Wlock) where Cpte is incurred twice for a read and a write and Wlock is a constant representing the cost of taking or releasing a lock Cnumahint = Cost of a minor page fault = some high constant e.g. 1000 Cpagerw = Cost to read or write a full page = Ca + PAGE_SIZE/u Ci = Cost of page isolation = Ca + Wi where Wi is a constant that should reflect the approximate cost of the locking operation Cpagecopy = Cpagerw + (Cpagerw * Wnuma) + Ci + (Ci * Wnuma) where Wnuma is the approximate NUMA factor. 1 is local. 1.2 would imply that remote accesses are 20% more expensive Balancing cost = Cpte * numa_pte_updates + Cnumahint * numa_hint_faults + Ci * numa_pages_migrated + Cpagecopy * numa_pages_migrated Note that numa_pages_migrated is used as a measure of how many pages were isolated even though it would miss pages that failed to migrate. A vmstat counter could have been added for it but the isolation cost is pretty marginal in comparison to the overall cost so it seemed overkill. The ideal way to measure automatic placement benefit would be to count the number of remote accesses versus local accesses and do something like benefit = (remote_accesses_before - remove_access_after) * Wnuma but the information is not readily available. As a workload converges, the expection would be that the number of remote numa hints would reduce to 0. convergence = numa_hint_faults_local / numa_hint_faults where this is measured for the last N number of numa hints recorded. When the workload is fully converged the value is 1. This can measure if the placement policy is converging and how fast it is doing it. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 8012c1907895..8a7b4ccbe136 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3477,6 +3477,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3477 set_pte_at(mm, addr, ptep, pte); 3477 set_pte_at(mm, addr, ptep, pte);
3478 update_mmu_cache(vma, addr, ptep); 3478 update_mmu_cache(vma, addr, ptep);
3479 3479
3480 count_vm_numa_event(NUMA_HINT_FAULTS);
3480 page = vm_normal_page(vma, addr, pte); 3481 page = vm_normal_page(vma, addr, pte);
3481 if (!page) { 3482 if (!page) {
3482 pte_unmap_unlock(ptep, ptl); 3483 pte_unmap_unlock(ptep, ptl);
@@ -3485,6 +3486,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3485 3486
3486 get_page(page); 3487 get_page(page);
3487 current_nid = page_to_nid(page); 3488 current_nid = page_to_nid(page);
3489 if (current_nid == numa_node_id())
3490 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
3488 target_nid = mpol_misplaced(page, vma, addr); 3491 target_nid = mpol_misplaced(page, vma, addr);
3489 pte_unmap_unlock(ptep, ptl); 3492 pte_unmap_unlock(ptep, ptl);
3490 if (target_nid == -1) { 3493 if (target_nid == -1) {
@@ -3517,6 +3520,9 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3517 unsigned long offset; 3520 unsigned long offset;
3518 spinlock_t *ptl; 3521 spinlock_t *ptl;
3519 bool numa = false; 3522 bool numa = false;
3523 int local_nid = numa_node_id();
3524 unsigned long nr_faults = 0;
3525 unsigned long nr_faults_local = 0;
3520 3526
3521 spin_lock(&mm->page_table_lock); 3527 spin_lock(&mm->page_table_lock);
3522 pmd = *pmdp; 3528 pmd = *pmdp;
@@ -3565,10 +3571,16 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3565 curr_nid = page_to_nid(page); 3571 curr_nid = page_to_nid(page);
3566 task_numa_fault(curr_nid, 1); 3572 task_numa_fault(curr_nid, 1);
3567 3573
3574 nr_faults++;
3575 if (curr_nid == local_nid)
3576 nr_faults_local++;
3577
3568 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); 3578 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3569 } 3579 }
3570 pte_unmap_unlock(orig_pte, ptl); 3580 pte_unmap_unlock(orig_pte, ptl);
3571 3581
3582 count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults);
3583 count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local);
3572 return 0; 3584 return 0;
3573} 3585}
3574#else 3586#else