diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2014-02-11 22:43:38 -0500 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2014-02-16 19:19:36 -0500 |
commit | 56eecdb912b536a4fa97fb5bfe5a940a54d79be6 (patch) | |
tree | b42119ab8f9e2a49298f08ba29f78f348a3b579c | |
parent | 9d85d5863fa4818eb7fa306563bf830c0210c3a6 (diff) |
mm: Use ptep/pmdp_set_numa() for updating _PAGE_NUMA bit
Archs like ppc64 doesn't do tlb flush in set_pte/pmd functions when using
a hash table MMU for various reasons (the flush is handled as part of
the PTE modification when necessary).
ppc64 thus doesn't implement flush_tlb_range for hash based MMUs.
Additionally ppc64 require the tlb flushing to be batched within ptl locks.
The reason to do that is to ensure that the hash page table is in sync with
linux page table.
We track the hpte index in linux pte and if we clear them without flushing
hash and drop the ptl lock, we can have another cpu update the pte and can
end up with duplicate entry in the hash table, which is fatal.
We also want to keep set_pte_at simpler by not requiring them to do hash
flush for performance reason. We do that by assuming that set_pte_at() is
never *ever* called on a PTE that is already valid.
This was the case until the NUMA code went in which broke that assumption.
Fix that by introducing a new pair of helpers to set _PAGE_NUMA in a
way similar to ptep/pmdp_set_wrprotect(), with a generic implementation
using set_pte_at() and a powerpc specific one using the appropriate
mechanism needed to keep the hash table in sync.
Acked-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r-- | arch/powerpc/include/asm/pgtable.h | 22 | ||||
-rw-r--r-- | include/asm-generic/pgtable.h | 39 | ||||
-rw-r--r-- | mm/huge_memory.c | 9 | ||||
-rw-r--r-- | mm/mprotect.c | 4 |
4 files changed, 64 insertions, 10 deletions
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index f83b6f3e1b39..3ebb188c3ff5 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h | |||
@@ -75,12 +75,34 @@ static inline pte_t pte_mknuma(pte_t pte) | |||
75 | return pte; | 75 | return pte; |
76 | } | 76 | } |
77 | 77 | ||
78 | #define ptep_set_numa ptep_set_numa | ||
79 | static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, | ||
80 | pte_t *ptep) | ||
81 | { | ||
82 | if ((pte_val(*ptep) & _PAGE_PRESENT) == 0) | ||
83 | VM_BUG_ON(1); | ||
84 | |||
85 | pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0); | ||
86 | return; | ||
87 | } | ||
88 | |||
78 | #define pmd_numa pmd_numa | 89 | #define pmd_numa pmd_numa |
79 | static inline int pmd_numa(pmd_t pmd) | 90 | static inline int pmd_numa(pmd_t pmd) |
80 | { | 91 | { |
81 | return pte_numa(pmd_pte(pmd)); | 92 | return pte_numa(pmd_pte(pmd)); |
82 | } | 93 | } |
83 | 94 | ||
95 | #define pmdp_set_numa pmdp_set_numa | ||
96 | static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, | ||
97 | pmd_t *pmdp) | ||
98 | { | ||
99 | if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0) | ||
100 | VM_BUG_ON(1); | ||
101 | |||
102 | pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA); | ||
103 | return; | ||
104 | } | ||
105 | |||
84 | #define pmd_mknonnuma pmd_mknonnuma | 106 | #define pmd_mknonnuma pmd_mknonnuma |
85 | static inline pmd_t pmd_mknonnuma(pmd_t pmd) | 107 | static inline pmd_t pmd_mknonnuma(pmd_t pmd) |
86 | { | 108 | { |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e4f41d9af4d..34c7bdc06014 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -701,6 +701,18 @@ static inline pte_t pte_mknuma(pte_t pte) | |||
701 | } | 701 | } |
702 | #endif | 702 | #endif |
703 | 703 | ||
704 | #ifndef ptep_set_numa | ||
705 | static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, | ||
706 | pte_t *ptep) | ||
707 | { | ||
708 | pte_t ptent = *ptep; | ||
709 | |||
710 | ptent = pte_mknuma(ptent); | ||
711 | set_pte_at(mm, addr, ptep, ptent); | ||
712 | return; | ||
713 | } | ||
714 | #endif | ||
715 | |||
704 | #ifndef pmd_mknuma | 716 | #ifndef pmd_mknuma |
705 | static inline pmd_t pmd_mknuma(pmd_t pmd) | 717 | static inline pmd_t pmd_mknuma(pmd_t pmd) |
706 | { | 718 | { |
@@ -708,6 +720,18 @@ static inline pmd_t pmd_mknuma(pmd_t pmd) | |||
708 | return pmd_clear_flags(pmd, _PAGE_PRESENT); | 720 | return pmd_clear_flags(pmd, _PAGE_PRESENT); |
709 | } | 721 | } |
710 | #endif | 722 | #endif |
723 | |||
724 | #ifndef pmdp_set_numa | ||
725 | static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, | ||
726 | pmd_t *pmdp) | ||
727 | { | ||
728 | pmd_t pmd = *pmdp; | ||
729 | |||
730 | pmd = pmd_mknuma(pmd); | ||
731 | set_pmd_at(mm, addr, pmdp, pmd); | ||
732 | return; | ||
733 | } | ||
734 | #endif | ||
711 | #else | 735 | #else |
712 | extern int pte_numa(pte_t pte); | 736 | extern int pte_numa(pte_t pte); |
713 | extern int pmd_numa(pmd_t pmd); | 737 | extern int pmd_numa(pmd_t pmd); |
@@ -715,6 +739,8 @@ extern pte_t pte_mknonnuma(pte_t pte); | |||
715 | extern pmd_t pmd_mknonnuma(pmd_t pmd); | 739 | extern pmd_t pmd_mknonnuma(pmd_t pmd); |
716 | extern pte_t pte_mknuma(pte_t pte); | 740 | extern pte_t pte_mknuma(pte_t pte); |
717 | extern pmd_t pmd_mknuma(pmd_t pmd); | 741 | extern pmd_t pmd_mknuma(pmd_t pmd); |
742 | extern void ptep_set_numa(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | ||
743 | extern void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); | ||
718 | #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ | 744 | #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ |
719 | #else | 745 | #else |
720 | static inline int pmd_numa(pmd_t pmd) | 746 | static inline int pmd_numa(pmd_t pmd) |
@@ -742,10 +768,23 @@ static inline pte_t pte_mknuma(pte_t pte) | |||
742 | return pte; | 768 | return pte; |
743 | } | 769 | } |
744 | 770 | ||
771 | static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, | ||
772 | pte_t *ptep) | ||
773 | { | ||
774 | return; | ||
775 | } | ||
776 | |||
777 | |||
745 | static inline pmd_t pmd_mknuma(pmd_t pmd) | 778 | static inline pmd_t pmd_mknuma(pmd_t pmd) |
746 | { | 779 | { |
747 | return pmd; | 780 | return pmd; |
748 | } | 781 | } |
782 | |||
783 | static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, | ||
784 | pmd_t *pmdp) | ||
785 | { | ||
786 | return ; | ||
787 | } | ||
749 | #endif /* CONFIG_NUMA_BALANCING */ | 788 | #endif /* CONFIG_NUMA_BALANCING */ |
750 | 789 | ||
751 | #endif /* CONFIG_MMU */ | 790 | #endif /* CONFIG_MMU */ |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 82166bf974e1..da23eb96779f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1545,6 +1545,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1545 | entry = pmd_mknonnuma(entry); | 1545 | entry = pmd_mknonnuma(entry); |
1546 | entry = pmd_modify(entry, newprot); | 1546 | entry = pmd_modify(entry, newprot); |
1547 | ret = HPAGE_PMD_NR; | 1547 | ret = HPAGE_PMD_NR; |
1548 | set_pmd_at(mm, addr, pmd, entry); | ||
1548 | BUG_ON(pmd_write(entry)); | 1549 | BUG_ON(pmd_write(entry)); |
1549 | } else { | 1550 | } else { |
1550 | struct page *page = pmd_page(*pmd); | 1551 | struct page *page = pmd_page(*pmd); |
@@ -1557,16 +1558,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1557 | */ | 1558 | */ |
1558 | if (!is_huge_zero_page(page) && | 1559 | if (!is_huge_zero_page(page) && |
1559 | !pmd_numa(*pmd)) { | 1560 | !pmd_numa(*pmd)) { |
1560 | entry = *pmd; | 1561 | pmdp_set_numa(mm, addr, pmd); |
1561 | entry = pmd_mknuma(entry); | ||
1562 | ret = HPAGE_PMD_NR; | 1562 | ret = HPAGE_PMD_NR; |
1563 | } | 1563 | } |
1564 | } | 1564 | } |
1565 | |||
1566 | /* Set PMD if cleared earlier */ | ||
1567 | if (ret == HPAGE_PMD_NR) | ||
1568 | set_pmd_at(mm, addr, pmd, entry); | ||
1569 | |||
1570 | spin_unlock(ptl); | 1565 | spin_unlock(ptl); |
1571 | } | 1566 | } |
1572 | 1567 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 33eab902f10e..769a67a15803 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -69,12 +69,10 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
69 | } else { | 69 | } else { |
70 | struct page *page; | 70 | struct page *page; |
71 | 71 | ||
72 | ptent = *pte; | ||
73 | page = vm_normal_page(vma, addr, oldpte); | 72 | page = vm_normal_page(vma, addr, oldpte); |
74 | if (page && !PageKsm(page)) { | 73 | if (page && !PageKsm(page)) { |
75 | if (!pte_numa(oldpte)) { | 74 | if (!pte_numa(oldpte)) { |
76 | ptent = pte_mknuma(ptent); | 75 | ptep_set_numa(mm, addr, pte); |
77 | set_pte_at(mm, addr, pte, ptent); | ||
78 | updated = true; | 76 | updated = true; |
79 | } | 77 | } |
80 | } | 78 | } |