aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-10-25 08:16:32 -0400
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:44 -0500
commit4b10e7d562c90d0a72f324832c26653947a07381 (patch)
tree733e0fc3ac3fdfe27a312bc72e4ffb07bbf0aa56
parentb24f53a0bea38b266d219ee651b22dba727c44ae (diff)
mm: mempolicy: Implement change_prot_numa() in terms of change_protection()
This patch converts change_prot_numa() to use change_protection(). As pte_numa and friends check the PTE bits directly it is necessary for change_protection() to use pmd_mknuma(). Hence the required modifications to change_protection() are a little clumsy but the end result is that most of the numa page table helpers are just one or two instructions. Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r--include/linux/huge_mm.h3
-rw-r--r--include/linux/mm.h4
-rw-r--r--mm/huge_memory.c14
-rw-r--r--mm/mempolicy.c137
-rw-r--r--mm/mprotect.c72
5 files changed, 85 insertions, 145 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index dabb5108d6c0..027ad04ef3a8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -27,7 +27,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
27 unsigned long new_addr, unsigned long old_end, 27 unsigned long new_addr, unsigned long old_end,
28 pmd_t *old_pmd, pmd_t *new_pmd); 28 pmd_t *old_pmd, pmd_t *new_pmd);
29extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, 29extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
30 unsigned long addr, pgprot_t newprot); 30 unsigned long addr, pgprot_t newprot,
31 int prot_numa);
31 32
32enum transparent_hugepage_flag { 33enum transparent_hugepage_flag {
33 TRANSPARENT_HUGEPAGE_FLAG, 34 TRANSPARENT_HUGEPAGE_FLAG,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 471185e29bab..d04c2f0aab36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1080,7 +1080,7 @@ extern unsigned long do_mremap(unsigned long addr,
1080 unsigned long flags, unsigned long new_addr); 1080 unsigned long flags, unsigned long new_addr);
1081extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, 1081extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
1082 unsigned long end, pgprot_t newprot, 1082 unsigned long end, pgprot_t newprot,
1083 int dirty_accountable); 1083 int dirty_accountable, int prot_numa);
1084extern int mprotect_fixup(struct vm_area_struct *vma, 1084extern int mprotect_fixup(struct vm_area_struct *vma,
1085 struct vm_area_struct **pprev, unsigned long start, 1085 struct vm_area_struct **pprev, unsigned long start,
1086 unsigned long end, unsigned long newflags); 1086 unsigned long end, unsigned long newflags);
@@ -1552,7 +1552,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
1552#endif 1552#endif
1553 1553
1554#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE 1554#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
1555void change_prot_numa(struct vm_area_struct *vma, 1555unsigned long change_prot_numa(struct vm_area_struct *vma,
1556 unsigned long start, unsigned long end); 1556 unsigned long start, unsigned long end);
1557#endif 1557#endif
1558 1558
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5723b551c023..d79f7a55bf6f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1147,7 +1147,7 @@ out:
1147} 1147}
1148 1148
1149int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, 1149int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1150 unsigned long addr, pgprot_t newprot) 1150 unsigned long addr, pgprot_t newprot, int prot_numa)
1151{ 1151{
1152 struct mm_struct *mm = vma->vm_mm; 1152 struct mm_struct *mm = vma->vm_mm;
1153 int ret = 0; 1153 int ret = 0;
@@ -1155,7 +1155,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1155 if (__pmd_trans_huge_lock(pmd, vma) == 1) { 1155 if (__pmd_trans_huge_lock(pmd, vma) == 1) {
1156 pmd_t entry; 1156 pmd_t entry;
1157 entry = pmdp_get_and_clear(mm, addr, pmd); 1157 entry = pmdp_get_and_clear(mm, addr, pmd);
1158 entry = pmd_modify(entry, newprot); 1158 if (!prot_numa)
1159 entry = pmd_modify(entry, newprot);
1160 else {
1161 struct page *page = pmd_page(*pmd);
1162
1163 /* only check non-shared pages */
1164 if (page_mapcount(page) == 1 &&
1165 !pmd_numa(*pmd)) {
1166 entry = pmd_mknuma(entry);
1167 }
1168 }
1159 set_pmd_at(mm, addr, pmd, entry); 1169 set_pmd_at(mm, addr, pmd, entry);
1160 spin_unlock(&vma->vm_mm->page_table_lock); 1170 spin_unlock(&vma->vm_mm->page_table_lock);
1161 ret = 1; 1171 ret = 1;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 51d3ebd8561e..75d4600a5e92 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -568,134 +568,23 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
568 568
569#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE 569#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
570/* 570/*
571 * Here we search for not shared page mappings (mapcount == 1) and we 571 * This is used to mark a range of virtual addresses to be inaccessible.
572 * set up the pmd/pte_numa on those mappings so the very next access 572 * These are later cleared by a NUMA hinting fault. Depending on these
573 * will fire a NUMA hinting page fault. 573 * faults, pages may be migrated for better NUMA placement.
574 *
575 * This is assuming that NUMA faults are handled using PROT_NONE. If
576 * an architecture makes a different choice, it will need further
577 * changes to the core.
574 */ 578 */
575static int 579unsigned long change_prot_numa(struct vm_area_struct *vma,
576change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma, 580 unsigned long addr, unsigned long end)
577 unsigned long address)
578{
579 pgd_t *pgd;
580 pud_t *pud;
581 pmd_t *pmd;
582 pte_t *pte, *_pte;
583 struct page *page;
584 unsigned long _address, end;
585 spinlock_t *ptl;
586 int ret = 0;
587
588 VM_BUG_ON(address & ~PAGE_MASK);
589
590 pgd = pgd_offset(mm, address);
591 if (!pgd_present(*pgd))
592 goto out;
593
594 pud = pud_offset(pgd, address);
595 if (!pud_present(*pud))
596 goto out;
597
598 pmd = pmd_offset(pud, address);
599 if (pmd_none(*pmd))
600 goto out;
601
602 if (pmd_trans_huge_lock(pmd, vma) == 1) {
603 int page_nid;
604 ret = HPAGE_PMD_NR;
605
606 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
607
608 if (pmd_numa(*pmd)) {
609 spin_unlock(&mm->page_table_lock);
610 goto out;
611 }
612
613 page = pmd_page(*pmd);
614
615 /* only check non-shared pages */
616 if (page_mapcount(page) != 1) {
617 spin_unlock(&mm->page_table_lock);
618 goto out;
619 }
620
621 page_nid = page_to_nid(page);
622
623 if (pmd_numa(*pmd)) {
624 spin_unlock(&mm->page_table_lock);
625 goto out;
626 }
627
628 set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
629 ret += HPAGE_PMD_NR;
630 /* defer TLB flush to lower the overhead */
631 spin_unlock(&mm->page_table_lock);
632 goto out;
633 }
634
635 if (pmd_trans_unstable(pmd))
636 goto out;
637 VM_BUG_ON(!pmd_present(*pmd));
638
639 end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
640 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
641 for (_address = address, _pte = pte; _address < end;
642 _pte++, _address += PAGE_SIZE) {
643 pte_t pteval = *_pte;
644 if (!pte_present(pteval))
645 continue;
646 if (pte_numa(pteval))
647 continue;
648 page = vm_normal_page(vma, _address, pteval);
649 if (unlikely(!page))
650 continue;
651 /* only check non-shared pages */
652 if (page_mapcount(page) != 1)
653 continue;
654
655 set_pte_at(mm, _address, _pte, pte_mknuma(pteval));
656
657 /* defer TLB flush to lower the overhead */
658 ret++;
659 }
660 pte_unmap_unlock(pte, ptl);
661
662 if (ret && !pmd_numa(*pmd)) {
663 spin_lock(&mm->page_table_lock);
664 set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
665 spin_unlock(&mm->page_table_lock);
666 /* defer TLB flush to lower the overhead */
667 }
668
669out:
670 return ret;
671}
672
673/* Assumes mmap_sem is held */
674void
675change_prot_numa(struct vm_area_struct *vma,
676 unsigned long address, unsigned long end)
677{ 581{
678 struct mm_struct *mm = vma->vm_mm; 582 int nr_updated;
679 int progress = 0; 583 BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
680
681 while (address < end) {
682 VM_BUG_ON(address < vma->vm_start ||
683 address + PAGE_SIZE > vma->vm_end);
684 584
685 progress += change_prot_numa_range(mm, vma, address); 585 nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
686 address = (address + PMD_SIZE) & PMD_MASK;
687 }
688 586
689 /* 587 return nr_updated;
690 * Flush the TLB for the mm to start the NUMA hinting
691 * page faults after we finish scanning this vma part
692 * if there were any PTE updates
693 */
694 if (progress) {
695 mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
696 flush_tlb_range(vma, address, end);
697 mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
698 }
699} 588}
700#else 589#else
701static unsigned long change_prot_numa(struct vm_area_struct *vma, 590static unsigned long change_prot_numa(struct vm_area_struct *vma,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 7c3628a8b486..7ef6ae964e8f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -35,10 +35,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
35} 35}
36#endif 36#endif
37 37
38static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, 38static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
39 unsigned long addr, unsigned long end, pgprot_t newprot, 39 unsigned long addr, unsigned long end, pgprot_t newprot,
40 int dirty_accountable) 40 int dirty_accountable, int prot_numa)
41{ 41{
42 struct mm_struct *mm = vma->vm_mm;
42 pte_t *pte, oldpte; 43 pte_t *pte, oldpte;
43 spinlock_t *ptl; 44 spinlock_t *ptl;
44 unsigned long pages = 0; 45 unsigned long pages = 0;
@@ -49,19 +50,39 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
49 oldpte = *pte; 50 oldpte = *pte;
50 if (pte_present(oldpte)) { 51 if (pte_present(oldpte)) {
51 pte_t ptent; 52 pte_t ptent;
53 bool updated = false;
52 54
53 ptent = ptep_modify_prot_start(mm, addr, pte); 55 ptent = ptep_modify_prot_start(mm, addr, pte);
54 ptent = pte_modify(ptent, newprot); 56 if (!prot_numa) {
57 ptent = pte_modify(ptent, newprot);
58 updated = true;
59 } else {
60 struct page *page;
61
62 page = vm_normal_page(vma, addr, oldpte);
63 if (page) {
64 /* only check non-shared pages */
65 if (!pte_numa(oldpte) &&
66 page_mapcount(page) == 1) {
67 ptent = pte_mknuma(ptent);
68 updated = true;
69 }
70 }
71 }
55 72
56 /* 73 /*
57 * Avoid taking write faults for pages we know to be 74 * Avoid taking write faults for pages we know to be
58 * dirty. 75 * dirty.
59 */ 76 */
60 if (dirty_accountable && pte_dirty(ptent)) 77 if (dirty_accountable && pte_dirty(ptent)) {
61 ptent = pte_mkwrite(ptent); 78 ptent = pte_mkwrite(ptent);
79 updated = true;
80 }
81
82 if (updated)
83 pages++;
62 84
63 ptep_modify_prot_commit(mm, addr, pte, ptent); 85 ptep_modify_prot_commit(mm, addr, pte, ptent);
64 pages++;
65 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 86 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
66 swp_entry_t entry = pte_to_swp_entry(oldpte); 87 swp_entry_t entry = pte_to_swp_entry(oldpte);
67 88
@@ -83,9 +104,25 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
83 return pages; 104 return pages;
84} 105}
85 106
107#ifdef CONFIG_NUMA_BALANCING
108static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
109 pmd_t *pmd)
110{
111 spin_lock(&mm->page_table_lock);
112 set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
113 spin_unlock(&mm->page_table_lock);
114}
115#else
116static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
117 pmd_t *pmd)
118{
119 BUG();
120}
121#endif /* CONFIG_NUMA_BALANCING */
122
86static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, 123static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
87 unsigned long addr, unsigned long end, pgprot_t newprot, 124 unsigned long addr, unsigned long end, pgprot_t newprot,
88 int dirty_accountable) 125 int dirty_accountable, int prot_numa)
89{ 126{
90 pmd_t *pmd; 127 pmd_t *pmd;
91 unsigned long next; 128 unsigned long next;
@@ -97,7 +134,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
97 if (pmd_trans_huge(*pmd)) { 134 if (pmd_trans_huge(*pmd)) {
98 if (next - addr != HPAGE_PMD_SIZE) 135 if (next - addr != HPAGE_PMD_SIZE)
99 split_huge_page_pmd(vma->vm_mm, pmd); 136 split_huge_page_pmd(vma->vm_mm, pmd);
100 else if (change_huge_pmd(vma, pmd, addr, newprot)) { 137 else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) {
101 pages += HPAGE_PMD_NR; 138 pages += HPAGE_PMD_NR;
102 continue; 139 continue;
103 } 140 }
@@ -105,8 +142,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
105 } 142 }
106 if (pmd_none_or_clear_bad(pmd)) 143 if (pmd_none_or_clear_bad(pmd))
107 continue; 144 continue;
108 pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot, 145 pages += change_pte_range(vma, pmd, addr, next, newprot,
109 dirty_accountable); 146 dirty_accountable, prot_numa);
147
148 if (prot_numa)
149 change_pmd_protnuma(vma->vm_mm, addr, pmd);
110 } while (pmd++, addr = next, addr != end); 150 } while (pmd++, addr = next, addr != end);
111 151
112 return pages; 152 return pages;
@@ -114,7 +154,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
114 154
115static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 155static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
116 unsigned long addr, unsigned long end, pgprot_t newprot, 156 unsigned long addr, unsigned long end, pgprot_t newprot,
117 int dirty_accountable) 157 int dirty_accountable, int prot_numa)
118{ 158{
119 pud_t *pud; 159 pud_t *pud;
120 unsigned long next; 160 unsigned long next;
@@ -126,7 +166,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
126 if (pud_none_or_clear_bad(pud)) 166 if (pud_none_or_clear_bad(pud))
127 continue; 167 continue;
128 pages += change_pmd_range(vma, pud, addr, next, newprot, 168 pages += change_pmd_range(vma, pud, addr, next, newprot,
129 dirty_accountable); 169 dirty_accountable, prot_numa);
130 } while (pud++, addr = next, addr != end); 170 } while (pud++, addr = next, addr != end);
131 171
132 return pages; 172 return pages;
@@ -134,7 +174,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
134 174
135static unsigned long change_protection_range(struct vm_area_struct *vma, 175static unsigned long change_protection_range(struct vm_area_struct *vma,
136 unsigned long addr, unsigned long end, pgprot_t newprot, 176 unsigned long addr, unsigned long end, pgprot_t newprot,
137 int dirty_accountable) 177 int dirty_accountable, int prot_numa)
138{ 178{
139 struct mm_struct *mm = vma->vm_mm; 179 struct mm_struct *mm = vma->vm_mm;
140 pgd_t *pgd; 180 pgd_t *pgd;
@@ -150,7 +190,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
150 if (pgd_none_or_clear_bad(pgd)) 190 if (pgd_none_or_clear_bad(pgd))
151 continue; 191 continue;
152 pages += change_pud_range(vma, pgd, addr, next, newprot, 192 pages += change_pud_range(vma, pgd, addr, next, newprot,
153 dirty_accountable); 193 dirty_accountable, prot_numa);
154 } while (pgd++, addr = next, addr != end); 194 } while (pgd++, addr = next, addr != end);
155 195
156 /* Only flush the TLB if we actually modified any entries: */ 196 /* Only flush the TLB if we actually modified any entries: */
@@ -162,7 +202,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
162 202
163unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, 203unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
164 unsigned long end, pgprot_t newprot, 204 unsigned long end, pgprot_t newprot,
165 int dirty_accountable) 205 int dirty_accountable, int prot_numa)
166{ 206{
167 struct mm_struct *mm = vma->vm_mm; 207 struct mm_struct *mm = vma->vm_mm;
168 unsigned long pages; 208 unsigned long pages;
@@ -171,7 +211,7 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
171 if (is_vm_hugetlb_page(vma)) 211 if (is_vm_hugetlb_page(vma))
172 pages = hugetlb_change_protection(vma, start, end, newprot); 212 pages = hugetlb_change_protection(vma, start, end, newprot);
173 else 213 else
174 pages = change_protection_range(vma, start, end, newprot, dirty_accountable); 214 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
175 mmu_notifier_invalidate_range_end(mm, start, end); 215 mmu_notifier_invalidate_range_end(mm, start, end);
176 216
177 return pages; 217 return pages;
@@ -249,7 +289,7 @@ success:
249 dirty_accountable = 1; 289 dirty_accountable = 1;
250 } 290 }
251 291
252 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); 292 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0);
253 293
254 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 294 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
255 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 295 vm_stat_account(mm, newflags, vma->vm_file, nrpages);