aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2014-04-18 18:07:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-18 19:40:09 -0400
commit29c7787075c92ca8af353acd5301481e6f37082f (patch)
tree023f53747a1eb7934dd6c954e8764fbe9d91c819 /include
parent8229f1a044894f84324292608c149f0b4563532b (diff)
mm: use paravirt friendly ops for NUMA hinting ptes
David Vrabel identified a regression when using automatic NUMA balancing under Xen whereby page table entries were getting corrupted due to the use of native PTE operations. Quoting him Xen PV guest page tables require that their entries use machine addresses if the preset bit (_PAGE_PRESENT) is set, and (for successful migration) non-present PTEs must use pseudo-physical addresses. This is because on migration MFNs in present PTEs are translated to PFNs (canonicalised) so they may be translated back to the new MFN in the destination domain (uncanonicalised). pte_mknonnuma(), pmd_mknonnuma(), pte_mknuma() and pmd_mknuma() set and clear the _PAGE_PRESENT bit using pte_set_flags(), pte_clear_flags(), etc. In a Xen PV guest, these functions must translate MFNs to PFNs when clearing _PAGE_PRESENT and translate PFNs to MFNs when setting _PAGE_PRESENT. His suggested fix converted p[te|md]_[set|clear]_flags to using paravirt-friendly ops but this is overkill. He suggested an alternative of using p[te|md]_modify in the NUMA page table operations but this is does more work than necessary and would require looking up a VMA for protections. This patch modifies the NUMA page table operations to use paravirt friendly operations to set/clear the flags of interest. Unfortunately this will take a performance hit when updating the PTEs on CONFIG_PARAVIRT but I do not see a way around it that does not break Xen. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: David Vrabel <david.vrabel@citrix.com> Tested-by: David Vrabel <david.vrabel@citrix.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Peter Anvin <hpa@zytor.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Steven Noonan <steven@uplinklabs.net> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h31
1 files changed, 23 insertions, 8 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 1ec08c198b66..a8015a7a55bb 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -693,24 +693,35 @@ static inline int pmd_numa(pmd_t pmd)
693#ifndef pte_mknonnuma 693#ifndef pte_mknonnuma
694static inline pte_t pte_mknonnuma(pte_t pte) 694static inline pte_t pte_mknonnuma(pte_t pte)
695{ 695{
696 pte = pte_clear_flags(pte, _PAGE_NUMA); 696 pteval_t val = pte_val(pte);
697 return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); 697
698 val &= ~_PAGE_NUMA;
699 val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
700 return __pte(val);
698} 701}
699#endif 702#endif
700 703
701#ifndef pmd_mknonnuma 704#ifndef pmd_mknonnuma
702static inline pmd_t pmd_mknonnuma(pmd_t pmd) 705static inline pmd_t pmd_mknonnuma(pmd_t pmd)
703{ 706{
704 pmd = pmd_clear_flags(pmd, _PAGE_NUMA); 707 pmdval_t val = pmd_val(pmd);
705 return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); 708
709 val &= ~_PAGE_NUMA;
710 val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
711
712 return __pmd(val);
706} 713}
707#endif 714#endif
708 715
709#ifndef pte_mknuma 716#ifndef pte_mknuma
710static inline pte_t pte_mknuma(pte_t pte) 717static inline pte_t pte_mknuma(pte_t pte)
711{ 718{
712 pte = pte_set_flags(pte, _PAGE_NUMA); 719 pteval_t val = pte_val(pte);
713 return pte_clear_flags(pte, _PAGE_PRESENT); 720
721 val &= ~_PAGE_PRESENT;
722 val |= _PAGE_NUMA;
723
724 return __pte(val);
714} 725}
715#endif 726#endif
716 727
@@ -729,8 +740,12 @@ static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
729#ifndef pmd_mknuma 740#ifndef pmd_mknuma
730static inline pmd_t pmd_mknuma(pmd_t pmd) 741static inline pmd_t pmd_mknuma(pmd_t pmd)
731{ 742{
732 pmd = pmd_set_flags(pmd, _PAGE_NUMA); 743 pmdval_t val = pmd_val(pmd);
733 return pmd_clear_flags(pmd, _PAGE_PRESENT); 744
745 val &= ~_PAGE_PRESENT;
746 val |= _PAGE_NUMA;
747
748 return __pmd(val);
734} 749}
735#endif 750#endif
736 751