diff options
author | Mel Gorman <mgorman@suse.de> | 2015-02-12 17:58:32 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:08 -0500 |
commit | 21d9ee3eda7792c45880b2f11bff8e95c9a061fb (patch) | |
tree | 2e20932b8f1526e6d1f48add9e818ed43d7be8ee /arch/x86/include/asm/pgtable.h | |
parent | 4d9424669946532be754a6e116618dcb58430cb4 (diff) |
mm: remove remaining references to NUMA hinting bits and helpers
This patch removes the NUMA PTE bits and associated helpers. As a
side-effect it increases the maximum possible swap space on x86-64.
One potential source of problems is races between the marking of PTEs
PROT_NONE, NUMA hinting faults and migration. It must be guaranteed that
a PTE being protected is not faulted in parallel, seen as a pte_none and
corrupting memory. The base case is safe but transhuge has problems in
the past due to an different migration mechanism and a dependance on page
lock to serialise migrations and warrants a closer look.
task_work hinting update parallel fault
------------------------ --------------
change_pmd_range
change_huge_pmd
__pmd_trans_huge_lock
pmdp_get_and_clear
__handle_mm_fault
pmd_none
do_huge_pmd_anonymous_page
read? pmd_lock blocks until hinting complete, fail !pmd_none test
write? __do_huge_pmd_anonymous_page acquires pmd_lock, checks pmd_none
pmd_modify
set_pmd_at
task_work hinting update parallel migration
------------------------ ------------------
change_pmd_range
change_huge_pmd
__pmd_trans_huge_lock
pmdp_get_and_clear
__handle_mm_fault
do_huge_pmd_numa_page
migrate_misplaced_transhuge_page
pmd_lock waits for updates to complete, recheck pmd_same
pmd_modify
set_pmd_at
Both of those are safe and the case where a transhuge page is inserted
during a protection update is unchanged. The case where two processes try
migrating at the same time is unchanged by this series so should still be
ok. I could not find a case where we are accidentally depending on the
PTE not being cleared and flushed. If one is missed, it'll manifest as
corruption problems that start triggering shortly after this series is
merged and only happen when NUMA balancing is enabled.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86/include/asm/pgtable.h')
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 22 |
1 files changed, 3 insertions, 19 deletions
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index f519b0b529dd..34d42a7d5595 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -300,7 +300,7 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) | |||
300 | 300 | ||
301 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | 301 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) |
302 | { | 302 | { |
303 | return pmd_clear_flags(pmd, _PAGE_PRESENT); | 303 | return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE); |
304 | } | 304 | } |
305 | 305 | ||
306 | #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY | 306 | #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY |
@@ -443,13 +443,6 @@ static inline int pte_same(pte_t a, pte_t b) | |||
443 | 443 | ||
444 | static inline int pte_present(pte_t a) | 444 | static inline int pte_present(pte_t a) |
445 | { | 445 | { |
446 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE | | ||
447 | _PAGE_NUMA); | ||
448 | } | ||
449 | |||
450 | #define pte_present_nonuma pte_present_nonuma | ||
451 | static inline int pte_present_nonuma(pte_t a) | ||
452 | { | ||
453 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); | 446 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); |
454 | } | 447 | } |
455 | 448 | ||
@@ -459,7 +452,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a) | |||
459 | if (pte_flags(a) & _PAGE_PRESENT) | 452 | if (pte_flags(a) & _PAGE_PRESENT) |
460 | return true; | 453 | return true; |
461 | 454 | ||
462 | if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) && | 455 | if ((pte_flags(a) & _PAGE_PROTNONE) && |
463 | mm_tlb_flush_pending(mm)) | 456 | mm_tlb_flush_pending(mm)) |
464 | return true; | 457 | return true; |
465 | 458 | ||
@@ -479,8 +472,7 @@ static inline int pmd_present(pmd_t pmd) | |||
479 | * the _PAGE_PSE flag will remain set at all times while the | 472 | * the _PAGE_PSE flag will remain set at all times while the |
480 | * _PAGE_PRESENT bit is clear). | 473 | * _PAGE_PRESENT bit is clear). |
481 | */ | 474 | */ |
482 | return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE | | 475 | return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); |
483 | _PAGE_NUMA); | ||
484 | } | 476 | } |
485 | 477 | ||
486 | #ifdef CONFIG_NUMA_BALANCING | 478 | #ifdef CONFIG_NUMA_BALANCING |
@@ -555,11 +547,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) | |||
555 | 547 | ||
556 | static inline int pmd_bad(pmd_t pmd) | 548 | static inline int pmd_bad(pmd_t pmd) |
557 | { | 549 | { |
558 | #ifdef CONFIG_NUMA_BALANCING | ||
559 | /* pmd_numa check */ | ||
560 | if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA) | ||
561 | return 0; | ||
562 | #endif | ||
563 | return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; | 550 | return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; |
564 | } | 551 | } |
565 | 552 | ||
@@ -878,19 +865,16 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, | |||
878 | #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY | 865 | #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY |
879 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | 866 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) |
880 | { | 867 | { |
881 | VM_BUG_ON(pte_present_nonuma(pte)); | ||
882 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); | 868 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); |
883 | } | 869 | } |
884 | 870 | ||
885 | static inline int pte_swp_soft_dirty(pte_t pte) | 871 | static inline int pte_swp_soft_dirty(pte_t pte) |
886 | { | 872 | { |
887 | VM_BUG_ON(pte_present_nonuma(pte)); | ||
888 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; | 873 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; |
889 | } | 874 | } |
890 | 875 | ||
891 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | 876 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) |
892 | { | 877 | { |
893 | VM_BUG_ON(pte_present_nonuma(pte)); | ||
894 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); | 878 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); |
895 | } | 879 | } |
896 | #endif | 880 | #endif |