summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2014-02-11 22:43:38 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-02-16 19:19:36 -0500
commit56eecdb912b536a4fa97fb5bfe5a940a54d79be6 (patch)
treeb42119ab8f9e2a49298f08ba29f78f348a3b579c
parent9d85d5863fa4818eb7fa306563bf830c0210c3a6 (diff)
mm: Use ptep/pmdp_set_numa() for updating _PAGE_NUMA bit
Archs like ppc64 doesn't do tlb flush in set_pte/pmd functions when using a hash table MMU for various reasons (the flush is handled as part of the PTE modification when necessary). ppc64 thus doesn't implement flush_tlb_range for hash based MMUs. Additionally ppc64 require the tlb flushing to be batched within ptl locks. The reason to do that is to ensure that the hash page table is in sync with linux page table. We track the hpte index in linux pte and if we clear them without flushing hash and drop the ptl lock, we can have another cpu update the pte and can end up with duplicate entry in the hash table, which is fatal. We also want to keep set_pte_at simpler by not requiring them to do hash flush for performance reason. We do that by assuming that set_pte_at() is never *ever* called on a PTE that is already valid. This was the case until the NUMA code went in which broke that assumption. Fix that by introducing a new pair of helpers to set _PAGE_NUMA in a way similar to ptep/pmdp_set_wrprotect(), with a generic implementation using set_pte_at() and a powerpc specific one using the appropriate mechanism needed to keep the hash table in sync. Acked-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/pgtable.h22
-rw-r--r--include/asm-generic/pgtable.h39
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/mprotect.c4
4 files changed, 64 insertions, 10 deletions
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index f83b6f3e1b39..3ebb188c3ff5 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -75,12 +75,34 @@ static inline pte_t pte_mknuma(pte_t pte)
75 return pte; 75 return pte;
76} 76}
77 77
78#define ptep_set_numa ptep_set_numa
79static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
80 pte_t *ptep)
81{
82 if ((pte_val(*ptep) & _PAGE_PRESENT) == 0)
83 VM_BUG_ON(1);
84
85 pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0);
86 return;
87}
88
78#define pmd_numa pmd_numa 89#define pmd_numa pmd_numa
79static inline int pmd_numa(pmd_t pmd) 90static inline int pmd_numa(pmd_t pmd)
80{ 91{
81 return pte_numa(pmd_pte(pmd)); 92 return pte_numa(pmd_pte(pmd));
82} 93}
83 94
95#define pmdp_set_numa pmdp_set_numa
96static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
97 pmd_t *pmdp)
98{
99 if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0)
100 VM_BUG_ON(1);
101
102 pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA);
103 return;
104}
105
84#define pmd_mknonnuma pmd_mknonnuma 106#define pmd_mknonnuma pmd_mknonnuma
85static inline pmd_t pmd_mknonnuma(pmd_t pmd) 107static inline pmd_t pmd_mknonnuma(pmd_t pmd)
86{ 108{
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8e4f41d9af4d..34c7bdc06014 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -701,6 +701,18 @@ static inline pte_t pte_mknuma(pte_t pte)
701} 701}
702#endif 702#endif
703 703
704#ifndef ptep_set_numa
705static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
706 pte_t *ptep)
707{
708 pte_t ptent = *ptep;
709
710 ptent = pte_mknuma(ptent);
711 set_pte_at(mm, addr, ptep, ptent);
712 return;
713}
714#endif
715
704#ifndef pmd_mknuma 716#ifndef pmd_mknuma
705static inline pmd_t pmd_mknuma(pmd_t pmd) 717static inline pmd_t pmd_mknuma(pmd_t pmd)
706{ 718{
@@ -708,6 +720,18 @@ static inline pmd_t pmd_mknuma(pmd_t pmd)
708 return pmd_clear_flags(pmd, _PAGE_PRESENT); 720 return pmd_clear_flags(pmd, _PAGE_PRESENT);
709} 721}
710#endif 722#endif
723
724#ifndef pmdp_set_numa
725static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
726 pmd_t *pmdp)
727{
728 pmd_t pmd = *pmdp;
729
730 pmd = pmd_mknuma(pmd);
731 set_pmd_at(mm, addr, pmdp, pmd);
732 return;
733}
734#endif
711#else 735#else
712extern int pte_numa(pte_t pte); 736extern int pte_numa(pte_t pte);
713extern int pmd_numa(pmd_t pmd); 737extern int pmd_numa(pmd_t pmd);
@@ -715,6 +739,8 @@ extern pte_t pte_mknonnuma(pte_t pte);
715extern pmd_t pmd_mknonnuma(pmd_t pmd); 739extern pmd_t pmd_mknonnuma(pmd_t pmd);
716extern pte_t pte_mknuma(pte_t pte); 740extern pte_t pte_mknuma(pte_t pte);
717extern pmd_t pmd_mknuma(pmd_t pmd); 741extern pmd_t pmd_mknuma(pmd_t pmd);
742extern void ptep_set_numa(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
743extern void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp);
718#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ 744#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
719#else 745#else
720static inline int pmd_numa(pmd_t pmd) 746static inline int pmd_numa(pmd_t pmd)
@@ -742,10 +768,23 @@ static inline pte_t pte_mknuma(pte_t pte)
742 return pte; 768 return pte;
743} 769}
744 770
771static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
772 pte_t *ptep)
773{
774 return;
775}
776
777
745static inline pmd_t pmd_mknuma(pmd_t pmd) 778static inline pmd_t pmd_mknuma(pmd_t pmd)
746{ 779{
747 return pmd; 780 return pmd;
748} 781}
782
783static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
784 pmd_t *pmdp)
785{
786 return ;
787}
749#endif /* CONFIG_NUMA_BALANCING */ 788#endif /* CONFIG_NUMA_BALANCING */
750 789
751#endif /* CONFIG_MMU */ 790#endif /* CONFIG_MMU */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 82166bf974e1..da23eb96779f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1545,6 +1545,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1545 entry = pmd_mknonnuma(entry); 1545 entry = pmd_mknonnuma(entry);
1546 entry = pmd_modify(entry, newprot); 1546 entry = pmd_modify(entry, newprot);
1547 ret = HPAGE_PMD_NR; 1547 ret = HPAGE_PMD_NR;
1548 set_pmd_at(mm, addr, pmd, entry);
1548 BUG_ON(pmd_write(entry)); 1549 BUG_ON(pmd_write(entry));
1549 } else { 1550 } else {
1550 struct page *page = pmd_page(*pmd); 1551 struct page *page = pmd_page(*pmd);
@@ -1557,16 +1558,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1557 */ 1558 */
1558 if (!is_huge_zero_page(page) && 1559 if (!is_huge_zero_page(page) &&
1559 !pmd_numa(*pmd)) { 1560 !pmd_numa(*pmd)) {
1560 entry = *pmd; 1561 pmdp_set_numa(mm, addr, pmd);
1561 entry = pmd_mknuma(entry);
1562 ret = HPAGE_PMD_NR; 1562 ret = HPAGE_PMD_NR;
1563 } 1563 }
1564 } 1564 }
1565
1566 /* Set PMD if cleared earlier */
1567 if (ret == HPAGE_PMD_NR)
1568 set_pmd_at(mm, addr, pmd, entry);
1569
1570 spin_unlock(ptl); 1565 spin_unlock(ptl);
1571 } 1566 }
1572 1567
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 33eab902f10e..769a67a15803 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -69,12 +69,10 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
69 } else { 69 } else {
70 struct page *page; 70 struct page *page;
71 71
72 ptent = *pte;
73 page = vm_normal_page(vma, addr, oldpte); 72 page = vm_normal_page(vma, addr, oldpte);
74 if (page && !PageKsm(page)) { 73 if (page && !PageKsm(page)) {
75 if (!pte_numa(oldpte)) { 74 if (!pte_numa(oldpte)) {
76 ptent = pte_mknuma(ptent); 75 ptep_set_numa(mm, addr, pte);
77 set_pte_at(mm, addr, pte, ptent);
78 updated = true; 76 updated = true;
79 } 77 }
80 } 78 }