diff options
author | Mel Gorman <mgorman@suse.de> | 2014-04-18 18:07:21 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-18 19:40:09 -0400 |
commit | 29c7787075c92ca8af353acd5301481e6f37082f (patch) | |
tree | 023f53747a1eb7934dd6c954e8764fbe9d91c819 /include | |
parent | 8229f1a044894f84324292608c149f0b4563532b (diff) |
mm: use paravirt friendly ops for NUMA hinting ptes
David Vrabel identified a regression when using automatic NUMA balancing
under Xen whereby page table entries were getting corrupted due to the
use of native PTE operations. Quoting him
Xen PV guest page tables require that their entries use machine
addresses if the preset bit (_PAGE_PRESENT) is set, and (for
successful migration) non-present PTEs must use pseudo-physical
addresses. This is because on migration MFNs in present PTEs are
translated to PFNs (canonicalised) so they may be translated back
to the new MFN in the destination domain (uncanonicalised).
pte_mknonnuma(), pmd_mknonnuma(), pte_mknuma() and pmd_mknuma()
set and clear the _PAGE_PRESENT bit using pte_set_flags(),
pte_clear_flags(), etc.
In a Xen PV guest, these functions must translate MFNs to PFNs
when clearing _PAGE_PRESENT and translate PFNs to MFNs when setting
_PAGE_PRESENT.
His suggested fix converted p[te|md]_[set|clear]_flags to using
paravirt-friendly ops but this is overkill. He suggested an alternative
of using p[te|md]_modify in the NUMA page table operations but this is
does more work than necessary and would require looking up a VMA for
protections.
This patch modifies the NUMA page table operations to use paravirt
friendly operations to set/clear the flags of interest. Unfortunately
this will take a performance hit when updating the PTEs on
CONFIG_PARAVIRT but I do not see a way around it that does not break
Xen.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David Vrabel <david.vrabel@citrix.com>
Tested-by: David Vrabel <david.vrabel@citrix.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/pgtable.h | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 1ec08c198b66..a8015a7a55bb 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -693,24 +693,35 @@ static inline int pmd_numa(pmd_t pmd) | |||
693 | #ifndef pte_mknonnuma | 693 | #ifndef pte_mknonnuma |
694 | static inline pte_t pte_mknonnuma(pte_t pte) | 694 | static inline pte_t pte_mknonnuma(pte_t pte) |
695 | { | 695 | { |
696 | pte = pte_clear_flags(pte, _PAGE_NUMA); | 696 | pteval_t val = pte_val(pte); |
697 | return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); | 697 | |
698 | val &= ~_PAGE_NUMA; | ||
699 | val |= (_PAGE_PRESENT|_PAGE_ACCESSED); | ||
700 | return __pte(val); | ||
698 | } | 701 | } |
699 | #endif | 702 | #endif |
700 | 703 | ||
701 | #ifndef pmd_mknonnuma | 704 | #ifndef pmd_mknonnuma |
702 | static inline pmd_t pmd_mknonnuma(pmd_t pmd) | 705 | static inline pmd_t pmd_mknonnuma(pmd_t pmd) |
703 | { | 706 | { |
704 | pmd = pmd_clear_flags(pmd, _PAGE_NUMA); | 707 | pmdval_t val = pmd_val(pmd); |
705 | return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); | 708 | |
709 | val &= ~_PAGE_NUMA; | ||
710 | val |= (_PAGE_PRESENT|_PAGE_ACCESSED); | ||
711 | |||
712 | return __pmd(val); | ||
706 | } | 713 | } |
707 | #endif | 714 | #endif |
708 | 715 | ||
709 | #ifndef pte_mknuma | 716 | #ifndef pte_mknuma |
710 | static inline pte_t pte_mknuma(pte_t pte) | 717 | static inline pte_t pte_mknuma(pte_t pte) |
711 | { | 718 | { |
712 | pte = pte_set_flags(pte, _PAGE_NUMA); | 719 | pteval_t val = pte_val(pte); |
713 | return pte_clear_flags(pte, _PAGE_PRESENT); | 720 | |
721 | val &= ~_PAGE_PRESENT; | ||
722 | val |= _PAGE_NUMA; | ||
723 | |||
724 | return __pte(val); | ||
714 | } | 725 | } |
715 | #endif | 726 | #endif |
716 | 727 | ||
@@ -729,8 +740,12 @@ static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, | |||
729 | #ifndef pmd_mknuma | 740 | #ifndef pmd_mknuma |
730 | static inline pmd_t pmd_mknuma(pmd_t pmd) | 741 | static inline pmd_t pmd_mknuma(pmd_t pmd) |
731 | { | 742 | { |
732 | pmd = pmd_set_flags(pmd, _PAGE_NUMA); | 743 | pmdval_t val = pmd_val(pmd); |
733 | return pmd_clear_flags(pmd, _PAGE_PRESENT); | 744 | |
745 | val &= ~_PAGE_PRESENT; | ||
746 | val |= _PAGE_NUMA; | ||
747 | |||
748 | return __pmd(val); | ||
734 | } | 749 | } |
735 | #endif | 750 | #endif |
736 | 751 | ||