diff options
author | Mel Gorman <mgorman@suse.de> | 2012-11-14 20:24:32 -0500 |
---|---|---|
committer | Mel Gorman <mgorman@suse.de> | 2012-12-11 09:42:49 -0500 |
commit | 9532fec118d485ea37ab6e3ea372d68cd8b4cd0d (patch) | |
tree | 5076f3da1ff244df554e99b8701749423a6b92ad | |
parent | 5606e3877ad8baea42f3a71ebde0a03622bbb551 (diff) |
mm: numa: Migrate pages handled during a pmd_numa hinting fault
To say that the PMD handling code was incorrectly transferred from autonuma
is an understatement. The intention was to handle a PMDs worth of pages
in the same fault and effectively batch the taking of the PTL and page
migration. The copied version instead has the impact of clearing a number
of pte_numa PTE entries and whether any page migration takes place depends
on racing. This just happens to work in some cases.
This patch handles pte_numa faults in batch when a pmd_numa fault is
handled. The pages are migrated if they are currently misplaced.
Essentially this is making an assumption that NUMA locality is
on a PMD boundary but that could be addressed by only setting
pmd_numa if all the pages within that PMD are on the same node
if necessary.
Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r-- | mm/memory.c | 51 | ||||
-rw-r--r-- | mm/mprotect.c | 25 |
2 files changed, 54 insertions, 22 deletions
diff --git a/mm/memory.c b/mm/memory.c index 8a7b4ccbe136..84c6d9eab182 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3449,6 +3449,18 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3449 | return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); | 3449 | return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); |
3450 | } | 3450 | } |
3451 | 3451 | ||
3452 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, | ||
3453 | unsigned long addr, int current_nid) | ||
3454 | { | ||
3455 | get_page(page); | ||
3456 | |||
3457 | count_vm_numa_event(NUMA_HINT_FAULTS); | ||
3458 | if (current_nid == numa_node_id()) | ||
3459 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | ||
3460 | |||
3461 | return mpol_misplaced(page, vma, addr); | ||
3462 | } | ||
3463 | |||
3452 | int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | 3464 | int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
3453 | unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) | 3465 | unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) |
3454 | { | 3466 | { |
@@ -3477,18 +3489,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3477 | set_pte_at(mm, addr, ptep, pte); | 3489 | set_pte_at(mm, addr, ptep, pte); |
3478 | update_mmu_cache(vma, addr, ptep); | 3490 | update_mmu_cache(vma, addr, ptep); |
3479 | 3491 | ||
3480 | count_vm_numa_event(NUMA_HINT_FAULTS); | ||
3481 | page = vm_normal_page(vma, addr, pte); | 3492 | page = vm_normal_page(vma, addr, pte); |
3482 | if (!page) { | 3493 | if (!page) { |
3483 | pte_unmap_unlock(ptep, ptl); | 3494 | pte_unmap_unlock(ptep, ptl); |
3484 | return 0; | 3495 | return 0; |
3485 | } | 3496 | } |
3486 | 3497 | ||
3487 | get_page(page); | ||
3488 | current_nid = page_to_nid(page); | 3498 | current_nid = page_to_nid(page); |
3489 | if (current_nid == numa_node_id()) | 3499 | target_nid = numa_migrate_prep(page, vma, addr, current_nid); |
3490 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | ||
3491 | target_nid = mpol_misplaced(page, vma, addr); | ||
3492 | pte_unmap_unlock(ptep, ptl); | 3500 | pte_unmap_unlock(ptep, ptl); |
3493 | if (target_nid == -1) { | 3501 | if (target_nid == -1) { |
3494 | /* | 3502 | /* |
@@ -3505,7 +3513,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3505 | current_nid = target_nid; | 3513 | current_nid = target_nid; |
3506 | 3514 | ||
3507 | out: | 3515 | out: |
3508 | task_numa_fault(current_nid, 1); | 3516 | if (current_nid != -1) |
3517 | task_numa_fault(current_nid, 1); | ||
3509 | return 0; | 3518 | return 0; |
3510 | } | 3519 | } |
3511 | 3520 | ||
@@ -3521,8 +3530,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3521 | spinlock_t *ptl; | 3530 | spinlock_t *ptl; |
3522 | bool numa = false; | 3531 | bool numa = false; |
3523 | int local_nid = numa_node_id(); | 3532 | int local_nid = numa_node_id(); |
3524 | unsigned long nr_faults = 0; | ||
3525 | unsigned long nr_faults_local = 0; | ||
3526 | 3533 | ||
3527 | spin_lock(&mm->page_table_lock); | 3534 | spin_lock(&mm->page_table_lock); |
3528 | pmd = *pmdp; | 3535 | pmd = *pmdp; |
@@ -3545,7 +3552,8 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3545 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { | 3552 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { |
3546 | pte_t pteval = *pte; | 3553 | pte_t pteval = *pte; |
3547 | struct page *page; | 3554 | struct page *page; |
3548 | int curr_nid; | 3555 | int curr_nid = local_nid; |
3556 | int target_nid; | ||
3549 | if (!pte_present(pteval)) | 3557 | if (!pte_present(pteval)) |
3550 | continue; | 3558 | continue; |
3551 | if (!pte_numa(pteval)) | 3559 | if (!pte_numa(pteval)) |
@@ -3566,21 +3574,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3566 | /* only check non-shared pages */ | 3574 | /* only check non-shared pages */ |
3567 | if (unlikely(page_mapcount(page) != 1)) | 3575 | if (unlikely(page_mapcount(page) != 1)) |
3568 | continue; | 3576 | continue; |
3569 | pte_unmap_unlock(pte, ptl); | ||
3570 | 3577 | ||
3571 | curr_nid = page_to_nid(page); | 3578 | /* |
3572 | task_numa_fault(curr_nid, 1); | 3579 | * Note that the NUMA fault is later accounted to either |
3580 | * the node that is currently running or where the page is | ||
3581 | * migrated to. | ||
3582 | */ | ||
3583 | curr_nid = local_nid; | ||
3584 | target_nid = numa_migrate_prep(page, vma, addr, | ||
3585 | page_to_nid(page)); | ||
3586 | if (target_nid == -1) { | ||
3587 | put_page(page); | ||
3588 | continue; | ||
3589 | } | ||
3573 | 3590 | ||
3574 | nr_faults++; | 3591 | /* Migrate to the requested node */ |
3575 | if (curr_nid == local_nid) | 3592 | pte_unmap_unlock(pte, ptl); |
3576 | nr_faults_local++; | 3593 | if (migrate_misplaced_page(page, target_nid)) |
3594 | curr_nid = target_nid; | ||
3595 | task_numa_fault(curr_nid, 1); | ||
3577 | 3596 | ||
3578 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | 3597 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); |
3579 | } | 3598 | } |
3580 | pte_unmap_unlock(orig_pte, ptl); | 3599 | pte_unmap_unlock(orig_pte, ptl); |
3581 | 3600 | ||
3582 | count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults); | ||
3583 | count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local); | ||
3584 | return 0; | 3601 | return 0; |
3585 | } | 3602 | } |
3586 | #else | 3603 | #else |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 7ef6ae964e8f..dce6fb48edc4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -37,12 +37,14 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | |||
37 | 37 | ||
38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
39 | unsigned long addr, unsigned long end, pgprot_t newprot, | 39 | unsigned long addr, unsigned long end, pgprot_t newprot, |
40 | int dirty_accountable, int prot_numa) | 40 | int dirty_accountable, int prot_numa, bool *ret_all_same_node) |
41 | { | 41 | { |
42 | struct mm_struct *mm = vma->vm_mm; | 42 | struct mm_struct *mm = vma->vm_mm; |
43 | pte_t *pte, oldpte; | 43 | pte_t *pte, oldpte; |
44 | spinlock_t *ptl; | 44 | spinlock_t *ptl; |
45 | unsigned long pages = 0; | 45 | unsigned long pages = 0; |
46 | bool all_same_node = true; | ||
47 | int last_nid = -1; | ||
46 | 48 | ||
47 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 49 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
48 | arch_enter_lazy_mmu_mode(); | 50 | arch_enter_lazy_mmu_mode(); |
@@ -61,6 +63,12 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
61 | 63 | ||
62 | page = vm_normal_page(vma, addr, oldpte); | 64 | page = vm_normal_page(vma, addr, oldpte); |
63 | if (page) { | 65 | if (page) { |
66 | int this_nid = page_to_nid(page); | ||
67 | if (last_nid == -1) | ||
68 | last_nid = this_nid; | ||
69 | if (last_nid != this_nid) | ||
70 | all_same_node = false; | ||
71 | |||
64 | /* only check non-shared pages */ | 72 | /* only check non-shared pages */ |
65 | if (!pte_numa(oldpte) && | 73 | if (!pte_numa(oldpte) && |
66 | page_mapcount(page) == 1) { | 74 | page_mapcount(page) == 1) { |
@@ -81,7 +89,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
81 | 89 | ||
82 | if (updated) | 90 | if (updated) |
83 | pages++; | 91 | pages++; |
84 | |||
85 | ptep_modify_prot_commit(mm, addr, pte, ptent); | 92 | ptep_modify_prot_commit(mm, addr, pte, ptent); |
86 | } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { | 93 | } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { |
87 | swp_entry_t entry = pte_to_swp_entry(oldpte); | 94 | swp_entry_t entry = pte_to_swp_entry(oldpte); |
@@ -101,6 +108,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
101 | arch_leave_lazy_mmu_mode(); | 108 | arch_leave_lazy_mmu_mode(); |
102 | pte_unmap_unlock(pte - 1, ptl); | 109 | pte_unmap_unlock(pte - 1, ptl); |
103 | 110 | ||
111 | *ret_all_same_node = all_same_node; | ||
104 | return pages; | 112 | return pages; |
105 | } | 113 | } |
106 | 114 | ||
@@ -127,6 +135,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * | |||
127 | pmd_t *pmd; | 135 | pmd_t *pmd; |
128 | unsigned long next; | 136 | unsigned long next; |
129 | unsigned long pages = 0; | 137 | unsigned long pages = 0; |
138 | bool all_same_node; | ||
130 | 139 | ||
131 | pmd = pmd_offset(pud, addr); | 140 | pmd = pmd_offset(pud, addr); |
132 | do { | 141 | do { |
@@ -143,9 +152,15 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * | |||
143 | if (pmd_none_or_clear_bad(pmd)) | 152 | if (pmd_none_or_clear_bad(pmd)) |
144 | continue; | 153 | continue; |
145 | pages += change_pte_range(vma, pmd, addr, next, newprot, | 154 | pages += change_pte_range(vma, pmd, addr, next, newprot, |
146 | dirty_accountable, prot_numa); | 155 | dirty_accountable, prot_numa, &all_same_node); |
147 | 156 | ||
148 | if (prot_numa) | 157 | /* |
158 | * If we are changing protections for NUMA hinting faults then | ||
159 | * set pmd_numa if the examined pages were all on the same | ||
160 | * node. This allows a regular PMD to be handled as one fault | ||
161 | * and effectively batches the taking of the PTL | ||
162 | */ | ||
163 | if (prot_numa && all_same_node) | ||
149 | change_pmd_protnuma(vma->vm_mm, addr, pmd); | 164 | change_pmd_protnuma(vma->vm_mm, addr, pmd); |
150 | } while (pmd++, addr = next, addr != end); | 165 | } while (pmd++, addr = next, addr != end); |
151 | 166 | ||