aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-14 20:24:32 -0500
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:49 -0500
commit9532fec118d485ea37ab6e3ea372d68cd8b4cd0d (patch)
tree5076f3da1ff244df554e99b8701749423a6b92ad
parent5606e3877ad8baea42f3a71ebde0a03622bbb551 (diff)
mm: numa: Migrate pages handled during a pmd_numa hinting fault
To say that the PMD handling code was incorrectly transferred from autonuma is an understatement. The intention was to handle a PMDs worth of pages in the same fault and effectively batch the taking of the PTL and page migration. The copied version instead has the impact of clearing a number of pte_numa PTE entries and whether any page migration takes place depends on racing. This just happens to work in some cases. This patch handles pte_numa faults in batch when a pmd_numa fault is handled. The pages are migrated if they are currently misplaced. Essentially this is making an assumption that NUMA locality is on a PMD boundary but that could be addressed by only setting pmd_numa if all the pages within that PMD are on the same node if necessary. Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r--mm/memory.c51
-rw-r--r--mm/mprotect.c25
2 files changed, 54 insertions, 22 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 8a7b4ccbe136..84c6d9eab182 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3449,6 +3449,18 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3449 return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); 3449 return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
3450} 3450}
3451 3451
3452int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
3453 unsigned long addr, int current_nid)
3454{
3455 get_page(page);
3456
3457 count_vm_numa_event(NUMA_HINT_FAULTS);
3458 if (current_nid == numa_node_id())
3459 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
3460
3461 return mpol_misplaced(page, vma, addr);
3462}
3463
3452int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, 3464int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3453 unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) 3465 unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
3454{ 3466{
@@ -3477,18 +3489,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3477 set_pte_at(mm, addr, ptep, pte); 3489 set_pte_at(mm, addr, ptep, pte);
3478 update_mmu_cache(vma, addr, ptep); 3490 update_mmu_cache(vma, addr, ptep);
3479 3491
3480 count_vm_numa_event(NUMA_HINT_FAULTS);
3481 page = vm_normal_page(vma, addr, pte); 3492 page = vm_normal_page(vma, addr, pte);
3482 if (!page) { 3493 if (!page) {
3483 pte_unmap_unlock(ptep, ptl); 3494 pte_unmap_unlock(ptep, ptl);
3484 return 0; 3495 return 0;
3485 } 3496 }
3486 3497
3487 get_page(page);
3488 current_nid = page_to_nid(page); 3498 current_nid = page_to_nid(page);
3489 if (current_nid == numa_node_id()) 3499 target_nid = numa_migrate_prep(page, vma, addr, current_nid);
3490 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
3491 target_nid = mpol_misplaced(page, vma, addr);
3492 pte_unmap_unlock(ptep, ptl); 3500 pte_unmap_unlock(ptep, ptl);
3493 if (target_nid == -1) { 3501 if (target_nid == -1) {
3494 /* 3502 /*
@@ -3505,7 +3513,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3505 current_nid = target_nid; 3513 current_nid = target_nid;
3506 3514
3507out: 3515out:
3508 task_numa_fault(current_nid, 1); 3516 if (current_nid != -1)
3517 task_numa_fault(current_nid, 1);
3509 return 0; 3518 return 0;
3510} 3519}
3511 3520
@@ -3521,8 +3530,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3521 spinlock_t *ptl; 3530 spinlock_t *ptl;
3522 bool numa = false; 3531 bool numa = false;
3523 int local_nid = numa_node_id(); 3532 int local_nid = numa_node_id();
3524 unsigned long nr_faults = 0;
3525 unsigned long nr_faults_local = 0;
3526 3533
3527 spin_lock(&mm->page_table_lock); 3534 spin_lock(&mm->page_table_lock);
3528 pmd = *pmdp; 3535 pmd = *pmdp;
@@ -3545,7 +3552,8 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3545 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { 3552 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
3546 pte_t pteval = *pte; 3553 pte_t pteval = *pte;
3547 struct page *page; 3554 struct page *page;
3548 int curr_nid; 3555 int curr_nid = local_nid;
3556 int target_nid;
3549 if (!pte_present(pteval)) 3557 if (!pte_present(pteval))
3550 continue; 3558 continue;
3551 if (!pte_numa(pteval)) 3559 if (!pte_numa(pteval))
@@ -3566,21 +3574,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3566 /* only check non-shared pages */ 3574 /* only check non-shared pages */
3567 if (unlikely(page_mapcount(page) != 1)) 3575 if (unlikely(page_mapcount(page) != 1))
3568 continue; 3576 continue;
3569 pte_unmap_unlock(pte, ptl);
3570 3577
3571 curr_nid = page_to_nid(page); 3578 /*
3572 task_numa_fault(curr_nid, 1); 3579 * Note that the NUMA fault is later accounted to either
3580 * the node that is currently running or where the page is
3581 * migrated to.
3582 */
3583 curr_nid = local_nid;
3584 target_nid = numa_migrate_prep(page, vma, addr,
3585 page_to_nid(page));
3586 if (target_nid == -1) {
3587 put_page(page);
3588 continue;
3589 }
3573 3590
3574 nr_faults++; 3591 /* Migrate to the requested node */
3575 if (curr_nid == local_nid) 3592 pte_unmap_unlock(pte, ptl);
3576 nr_faults_local++; 3593 if (migrate_misplaced_page(page, target_nid))
3594 curr_nid = target_nid;
3595 task_numa_fault(curr_nid, 1);
3577 3596
3578 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); 3597 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3579 } 3598 }
3580 pte_unmap_unlock(orig_pte, ptl); 3599 pte_unmap_unlock(orig_pte, ptl);
3581 3600
3582 count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults);
3583 count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local);
3584 return 0; 3601 return 0;
3585} 3602}
3586#else 3603#else
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 7ef6ae964e8f..dce6fb48edc4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -37,12 +37,14 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
37 37
38static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 38static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
39 unsigned long addr, unsigned long end, pgprot_t newprot, 39 unsigned long addr, unsigned long end, pgprot_t newprot,
40 int dirty_accountable, int prot_numa) 40 int dirty_accountable, int prot_numa, bool *ret_all_same_node)
41{ 41{
42 struct mm_struct *mm = vma->vm_mm; 42 struct mm_struct *mm = vma->vm_mm;
43 pte_t *pte, oldpte; 43 pte_t *pte, oldpte;
44 spinlock_t *ptl; 44 spinlock_t *ptl;
45 unsigned long pages = 0; 45 unsigned long pages = 0;
46 bool all_same_node = true;
47 int last_nid = -1;
46 48
47 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 49 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
48 arch_enter_lazy_mmu_mode(); 50 arch_enter_lazy_mmu_mode();
@@ -61,6 +63,12 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
61 63
62 page = vm_normal_page(vma, addr, oldpte); 64 page = vm_normal_page(vma, addr, oldpte);
63 if (page) { 65 if (page) {
66 int this_nid = page_to_nid(page);
67 if (last_nid == -1)
68 last_nid = this_nid;
69 if (last_nid != this_nid)
70 all_same_node = false;
71
64 /* only check non-shared pages */ 72 /* only check non-shared pages */
65 if (!pte_numa(oldpte) && 73 if (!pte_numa(oldpte) &&
66 page_mapcount(page) == 1) { 74 page_mapcount(page) == 1) {
@@ -81,7 +89,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
81 89
82 if (updated) 90 if (updated)
83 pages++; 91 pages++;
84
85 ptep_modify_prot_commit(mm, addr, pte, ptent); 92 ptep_modify_prot_commit(mm, addr, pte, ptent);
86 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 93 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
87 swp_entry_t entry = pte_to_swp_entry(oldpte); 94 swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -101,6 +108,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
101 arch_leave_lazy_mmu_mode(); 108 arch_leave_lazy_mmu_mode();
102 pte_unmap_unlock(pte - 1, ptl); 109 pte_unmap_unlock(pte - 1, ptl);
103 110
111 *ret_all_same_node = all_same_node;
104 return pages; 112 return pages;
105} 113}
106 114
@@ -127,6 +135,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
127 pmd_t *pmd; 135 pmd_t *pmd;
128 unsigned long next; 136 unsigned long next;
129 unsigned long pages = 0; 137 unsigned long pages = 0;
138 bool all_same_node;
130 139
131 pmd = pmd_offset(pud, addr); 140 pmd = pmd_offset(pud, addr);
132 do { 141 do {
@@ -143,9 +152,15 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
143 if (pmd_none_or_clear_bad(pmd)) 152 if (pmd_none_or_clear_bad(pmd))
144 continue; 153 continue;
145 pages += change_pte_range(vma, pmd, addr, next, newprot, 154 pages += change_pte_range(vma, pmd, addr, next, newprot,
146 dirty_accountable, prot_numa); 155 dirty_accountable, prot_numa, &all_same_node);
147 156
148 if (prot_numa) 157 /*
158 * If we are changing protections for NUMA hinting faults then
159 * set pmd_numa if the examined pages were all on the same
160 * node. This allows a regular PMD to be handled as one fault
161 * and effectively batches the taking of the PTL
162 */
163 if (prot_numa && all_same_node)
149 change_pmd_protnuma(vma->vm_mm, addr, pmd); 164 change_pmd_protnuma(vma->vm_mm, addr, pmd);
150 } while (pmd++, addr = next, addr != end); 165 } while (pmd++, addr = next, addr != end);
151 166