diff options
author | Mel Gorman <mgorman@suse.de> | 2012-11-14 20:24:32 -0500 |
---|---|---|
committer | Mel Gorman <mgorman@suse.de> | 2012-12-11 09:42:49 -0500 |
commit | 9532fec118d485ea37ab6e3ea372d68cd8b4cd0d (patch) | |
tree | 5076f3da1ff244df554e99b8701749423a6b92ad /mm/mprotect.c | |
parent | 5606e3877ad8baea42f3a71ebde0a03622bbb551 (diff) |
mm: numa: Migrate pages handled during a pmd_numa hinting fault
To say that the PMD handling code was incorrectly transferred from autonuma
is an understatement. The intention was to handle a PMDs worth of pages
in the same fault and effectively batch the taking of the PTL and page
migration. The copied version instead has the impact of clearing a number
of pte_numa PTE entries and whether any page migration takes place depends
on racing. This just happens to work in some cases.
This patch handles pte_numa faults in batch when a pmd_numa fault is
handled. The pages are migrated if they are currently misplaced.
Essentially this is making an assumption that NUMA locality is
on a PMD boundary but that could be addressed by only setting
pmd_numa if all the pages within that PMD are on the same node
if necessary.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Diffstat (limited to 'mm/mprotect.c')
-rw-r--r-- | mm/mprotect.c | 25 |
1 files changed, 20 insertions, 5 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c index 7ef6ae964e8f..dce6fb48edc4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -37,12 +37,14 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | |||
37 | 37 | ||
38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
39 | unsigned long addr, unsigned long end, pgprot_t newprot, | 39 | unsigned long addr, unsigned long end, pgprot_t newprot, |
40 | int dirty_accountable, int prot_numa) | 40 | int dirty_accountable, int prot_numa, bool *ret_all_same_node) |
41 | { | 41 | { |
42 | struct mm_struct *mm = vma->vm_mm; | 42 | struct mm_struct *mm = vma->vm_mm; |
43 | pte_t *pte, oldpte; | 43 | pte_t *pte, oldpte; |
44 | spinlock_t *ptl; | 44 | spinlock_t *ptl; |
45 | unsigned long pages = 0; | 45 | unsigned long pages = 0; |
46 | bool all_same_node = true; | ||
47 | int last_nid = -1; | ||
46 | 48 | ||
47 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 49 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
48 | arch_enter_lazy_mmu_mode(); | 50 | arch_enter_lazy_mmu_mode(); |
@@ -61,6 +63,12 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
61 | 63 | ||
62 | page = vm_normal_page(vma, addr, oldpte); | 64 | page = vm_normal_page(vma, addr, oldpte); |
63 | if (page) { | 65 | if (page) { |
66 | int this_nid = page_to_nid(page); | ||
67 | if (last_nid == -1) | ||
68 | last_nid = this_nid; | ||
69 | if (last_nid != this_nid) | ||
70 | all_same_node = false; | ||
71 | |||
64 | /* only check non-shared pages */ | 72 | /* only check non-shared pages */ |
65 | if (!pte_numa(oldpte) && | 73 | if (!pte_numa(oldpte) && |
66 | page_mapcount(page) == 1) { | 74 | page_mapcount(page) == 1) { |
@@ -81,7 +89,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
81 | 89 | ||
82 | if (updated) | 90 | if (updated) |
83 | pages++; | 91 | pages++; |
84 | |||
85 | ptep_modify_prot_commit(mm, addr, pte, ptent); | 92 | ptep_modify_prot_commit(mm, addr, pte, ptent); |
86 | } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { | 93 | } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { |
87 | swp_entry_t entry = pte_to_swp_entry(oldpte); | 94 | swp_entry_t entry = pte_to_swp_entry(oldpte); |
@@ -101,6 +108,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
101 | arch_leave_lazy_mmu_mode(); | 108 | arch_leave_lazy_mmu_mode(); |
102 | pte_unmap_unlock(pte - 1, ptl); | 109 | pte_unmap_unlock(pte - 1, ptl); |
103 | 110 | ||
111 | *ret_all_same_node = all_same_node; | ||
104 | return pages; | 112 | return pages; |
105 | } | 113 | } |
106 | 114 | ||
@@ -127,6 +135,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * | |||
127 | pmd_t *pmd; | 135 | pmd_t *pmd; |
128 | unsigned long next; | 136 | unsigned long next; |
129 | unsigned long pages = 0; | 137 | unsigned long pages = 0; |
138 | bool all_same_node; | ||
130 | 139 | ||
131 | pmd = pmd_offset(pud, addr); | 140 | pmd = pmd_offset(pud, addr); |
132 | do { | 141 | do { |
@@ -143,9 +152,15 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * | |||
143 | if (pmd_none_or_clear_bad(pmd)) | 152 | if (pmd_none_or_clear_bad(pmd)) |
144 | continue; | 153 | continue; |
145 | pages += change_pte_range(vma, pmd, addr, next, newprot, | 154 | pages += change_pte_range(vma, pmd, addr, next, newprot, |
146 | dirty_accountable, prot_numa); | 155 | dirty_accountable, prot_numa, &all_same_node); |
147 | 156 | ||
148 | if (prot_numa) | 157 | /* |
158 | * If we are changing protections for NUMA hinting faults then | ||
159 | * set pmd_numa if the examined pages were all on the same | ||
160 | * node. This allows a regular PMD to be handled as one fault | ||
161 | * and effectively batches the taking of the PTL | ||
162 | */ | ||
163 | if (prot_numa && all_same_node) | ||
149 | change_pmd_protnuma(vma->vm_mm, addr, pmd); | 164 | change_pmd_protnuma(vma->vm_mm, addr, pmd); |
150 | } while (pmd++, addr = next, addr != end); | 165 | } while (pmd++, addr = next, addr != end); |
151 | 166 | ||