aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2014-04-18 18:07:21 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-05-31 00:52:12 -0400
commit6fe8c0a06b858e6225fcfb80e3be0a319ff84ea7 (patch)
tree5f1c9550a25ae313b9b495c192881c23d036e689 /include
parentb934932aa9b3e8e2361122cd0560b63c478081da (diff)
mm: use paravirt friendly ops for NUMA hinting ptes
commit 29c7787075c92ca8af353acd5301481e6f37082f upstream. David Vrabel identified a regression when using automatic NUMA balancing under Xen whereby page table entries were getting corrupted due to the use of native PTE operations. Quoting him Xen PV guest page tables require that their entries use machine addresses if the preset bit (_PAGE_PRESENT) is set, and (for successful migration) non-present PTEs must use pseudo-physical addresses. This is because on migration MFNs in present PTEs are translated to PFNs (canonicalised) so they may be translated back to the new MFN in the destination domain (uncanonicalised). pte_mknonnuma(), pmd_mknonnuma(), pte_mknuma() and pmd_mknuma() set and clear the _PAGE_PRESENT bit using pte_set_flags(), pte_clear_flags(), etc. In a Xen PV guest, these functions must translate MFNs to PFNs when clearing _PAGE_PRESENT and translate PFNs to MFNs when setting _PAGE_PRESENT. His suggested fix converted p[te|md]_[set|clear]_flags to using paravirt-friendly ops but this is overkill. He suggested an alternative of using p[te|md]_modify in the NUMA page table operations but this is does more work than necessary and would require looking up a VMA for protections. This patch modifies the NUMA page table operations to use paravirt friendly operations to set/clear the flags of interest. Unfortunately this will take a performance hit when updating the PTEs on CONFIG_PARAVIRT but I do not see a way around it that does not break Xen. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: David Vrabel <david.vrabel@citrix.com> Tested-by: David Vrabel <david.vrabel@citrix.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Peter Anvin <hpa@zytor.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Steven Noonan <steven@uplinklabs.net> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h31
1 files changed, 23 insertions, 8 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b58268a5ddd4..17bccd3a4b03 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -620,32 +620,47 @@ static inline int pmd_numa(pmd_t pmd)
620#ifndef pte_mknonnuma 620#ifndef pte_mknonnuma
621static inline pte_t pte_mknonnuma(pte_t pte) 621static inline pte_t pte_mknonnuma(pte_t pte)
622{ 622{
623 pte = pte_clear_flags(pte, _PAGE_NUMA); 623 pteval_t val = pte_val(pte);
624 return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); 624
625 val &= ~_PAGE_NUMA;
626 val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
627 return __pte(val);
625} 628}
626#endif 629#endif
627 630
628#ifndef pmd_mknonnuma 631#ifndef pmd_mknonnuma
629static inline pmd_t pmd_mknonnuma(pmd_t pmd) 632static inline pmd_t pmd_mknonnuma(pmd_t pmd)
630{ 633{
631 pmd = pmd_clear_flags(pmd, _PAGE_NUMA); 634 pmdval_t val = pmd_val(pmd);
632 return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); 635
636 val &= ~_PAGE_NUMA;
637 val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
638
639 return __pmd(val);
633} 640}
634#endif 641#endif
635 642
636#ifndef pte_mknuma 643#ifndef pte_mknuma
637static inline pte_t pte_mknuma(pte_t pte) 644static inline pte_t pte_mknuma(pte_t pte)
638{ 645{
639 pte = pte_set_flags(pte, _PAGE_NUMA); 646 pteval_t val = pte_val(pte);
640 return pte_clear_flags(pte, _PAGE_PRESENT); 647
648 val &= ~_PAGE_PRESENT;
649 val |= _PAGE_NUMA;
650
651 return __pte(val);
641} 652}
642#endif 653#endif
643 654
644#ifndef pmd_mknuma 655#ifndef pmd_mknuma
645static inline pmd_t pmd_mknuma(pmd_t pmd) 656static inline pmd_t pmd_mknuma(pmd_t pmd)
646{ 657{
647 pmd = pmd_set_flags(pmd, _PAGE_NUMA); 658 pmdval_t val = pmd_val(pmd);
648 return pmd_clear_flags(pmd, _PAGE_PRESENT); 659
660 val &= ~_PAGE_PRESENT;
661 val |= _PAGE_NUMA;
662
663 return __pmd(val);
649} 664}
650#endif 665#endif
651#else 666#else