diff options
author | Mel Gorman <mgorman@suse.de> | 2014-06-04 19:06:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-04 19:53:55 -0400 |
commit | c46a7c817e662a820373bb76b88d0ad67d6abe5d (patch) | |
tree | 41a7d24c818c9cfe71f335f7fdae162896aa3b23 | |
parent | 4468dd76f51f8be75d4f04f1d721e379596e7262 (diff) |
x86: define _PAGE_NUMA by reusing software bits on the PMD and PTE levels
_PAGE_NUMA is currently an alias of _PROT_PROTNONE to trap NUMA hinting
faults on x86. Care is taken such that _PAGE_NUMA is used only in
situations where the VMA flags distinguish between NUMA hinting faults
and prot_none faults. This decision was x86-specific and conceptually
it is difficult requiring special casing to distinguish between PROTNONE
and NUMA ptes based on context.
Fundamentally, we only need the _PAGE_NUMA bit to tell the difference
between an entry that is really unmapped and a page that is protected
for NUMA hinting faults as if the PTE is not present then a fault will
be trapped.
Swap PTEs on x86-64 use the bits after _PAGE_GLOBAL for the offset.
This patch shrinks the maximum possible swap size and uses the bit to
uniquely distinguish between NUMA hinting ptes and swap ptes.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/powerpc/include/asm/pgtable.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 15 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 66 | ||||
-rw-r--r-- | arch/x86/mm/pageattr-test.c | 2 | ||||
-rw-r--r-- | include/asm-generic/pgtable.h | 8 | ||||
-rw-r--r-- | include/linux/swapops.h | 2 | ||||
-rw-r--r-- | mm/memory.c | 17 |
8 files changed, 75 insertions, 49 deletions
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 3ebb188c3ff5..d98c1ecc3266 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h | |||
@@ -44,6 +44,12 @@ static inline int pte_present(pte_t pte) | |||
44 | return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA); | 44 | return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA); |
45 | } | 45 | } |
46 | 46 | ||
47 | #define pte_present_nonuma pte_present_nonuma | ||
48 | static inline int pte_present_nonuma(pte_t pte) | ||
49 | { | ||
50 | return pte_val(pte) & (_PAGE_PRESENT); | ||
51 | } | ||
52 | |||
47 | #define pte_numa pte_numa | 53 | #define pte_numa pte_numa |
48 | static inline int pte_numa(pte_t pte) | 54 | static inline int pte_numa(pte_t pte) |
49 | { | 55 | { |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b459ddf27d64..66276c1d23bb 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -131,7 +131,8 @@ static inline int pte_exec(pte_t pte) | |||
131 | 131 | ||
132 | static inline int pte_special(pte_t pte) | 132 | static inline int pte_special(pte_t pte) |
133 | { | 133 | { |
134 | return pte_flags(pte) & _PAGE_SPECIAL; | 134 | return (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_SPECIAL)) == |
135 | (_PAGE_PRESENT|_PAGE_SPECIAL); | ||
135 | } | 136 | } |
136 | 137 | ||
137 | static inline unsigned long pte_pfn(pte_t pte) | 138 | static inline unsigned long pte_pfn(pte_t pte) |
@@ -452,6 +453,12 @@ static inline int pte_present(pte_t a) | |||
452 | _PAGE_NUMA); | 453 | _PAGE_NUMA); |
453 | } | 454 | } |
454 | 455 | ||
456 | #define pte_present_nonuma pte_present_nonuma | ||
457 | static inline int pte_present_nonuma(pte_t a) | ||
458 | { | ||
459 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); | ||
460 | } | ||
461 | |||
455 | #define pte_accessible pte_accessible | 462 | #define pte_accessible pte_accessible |
456 | static inline bool pte_accessible(struct mm_struct *mm, pte_t a) | 463 | static inline bool pte_accessible(struct mm_struct *mm, pte_t a) |
457 | { | 464 | { |
@@ -860,19 +867,19 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, | |||
860 | 867 | ||
861 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | 868 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) |
862 | { | 869 | { |
863 | VM_BUG_ON(pte_present(pte)); | 870 | VM_BUG_ON(pte_present_nonuma(pte)); |
864 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); | 871 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); |
865 | } | 872 | } |
866 | 873 | ||
867 | static inline int pte_swp_soft_dirty(pte_t pte) | 874 | static inline int pte_swp_soft_dirty(pte_t pte) |
868 | { | 875 | { |
869 | VM_BUG_ON(pte_present(pte)); | 876 | VM_BUG_ON(pte_present_nonuma(pte)); |
870 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; | 877 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; |
871 | } | 878 | } |
872 | 879 | ||
873 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | 880 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) |
874 | { | 881 | { |
875 | VM_BUG_ON(pte_present(pte)); | 882 | VM_BUG_ON(pte_present_nonuma(pte)); |
876 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); | 883 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); |
877 | } | 884 | } |
878 | 885 | ||
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e22c1dbf7feb..6d6ecd09883c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -145,8 +145,16 @@ static inline int pgd_large(pgd_t pgd) { return 0; } | |||
145 | /* Encode and de-code a swap entry */ | 145 | /* Encode and de-code a swap entry */ |
146 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | 146 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
147 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) | 147 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
148 | #ifdef CONFIG_NUMA_BALANCING | ||
149 | /* Automatic NUMA balancing needs to be distinguishable from swap entries */ | ||
150 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2) | ||
151 | #else | ||
148 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) | 152 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) |
153 | #endif | ||
149 | #else | 154 | #else |
155 | #ifdef CONFIG_NUMA_BALANCING | ||
156 | #error Incompatible format for automatic NUMA balancing | ||
157 | #endif | ||
150 | #define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) | 158 | #define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) |
151 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) | 159 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) |
152 | #endif | 160 | #endif |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index eb3d44945133..f216963760e5 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -16,15 +16,26 @@ | |||
16 | #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ | 16 | #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ |
17 | #define _PAGE_BIT_PAT 7 /* on 4KB pages */ | 17 | #define _PAGE_BIT_PAT 7 /* on 4KB pages */ |
18 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | 18 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ |
19 | #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ | 19 | #define _PAGE_BIT_SOFTW1 9 /* available for programmer */ |
20 | #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ | 20 | #define _PAGE_BIT_SOFTW2 10 /* " */ |
21 | #define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */ | 21 | #define _PAGE_BIT_SOFTW3 11 /* " */ |
22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ | 22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ |
23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 | 23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 |
24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 | 24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 |
25 | #define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */ | 25 | #define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ |
26 | #define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */ | ||
27 | #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ | ||
28 | #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ | ||
26 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | 29 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ |
27 | 30 | ||
31 | /* | ||
32 | * Swap offsets on configurations that allow automatic NUMA balancing use the | ||
33 | * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from | ||
34 | * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the | ||
35 | * maximum possible swap space from 16TB to 8TB. | ||
36 | */ | ||
37 | #define _PAGE_BIT_NUMA (_PAGE_BIT_GLOBAL+1) | ||
38 | |||
28 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ | 39 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ |
29 | /* - if the user mapped it with PROT_NONE; pte_present gives true */ | 40 | /* - if the user mapped it with PROT_NONE; pte_present gives true */ |
30 | #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL | 41 | #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL |
@@ -40,7 +51,7 @@ | |||
40 | #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) | 51 | #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) |
41 | #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) | 52 | #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) |
42 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) | 53 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) |
43 | #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) | 54 | #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) |
44 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) | 55 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) |
45 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) | 56 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) |
46 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) | 57 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) |
@@ -61,8 +72,6 @@ | |||
61 | * they do not conflict with each other. | 72 | * they do not conflict with each other. |
62 | */ | 73 | */ |
63 | 74 | ||
64 | #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN | ||
65 | |||
66 | #ifdef CONFIG_MEM_SOFT_DIRTY | 75 | #ifdef CONFIG_MEM_SOFT_DIRTY |
67 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) | 76 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) |
68 | #else | 77 | #else |
@@ -70,6 +79,21 @@ | |||
70 | #endif | 79 | #endif |
71 | 80 | ||
72 | /* | 81 | /* |
82 | * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page | ||
83 | * that is not present. The hinting fault gathers numa placement statistics | ||
84 | * (see pte_numa()). The bit is always zero when the PTE is not present. | ||
85 | * | ||
86 | * The bit picked must be always zero when the pmd is present and not | ||
87 | * present, so that we don't lose information when we set it while | ||
88 | * atomically clearing the present bit. | ||
89 | */ | ||
90 | #ifdef CONFIG_NUMA_BALANCING | ||
91 | #define _PAGE_NUMA (_AT(pteval_t, 1) << _PAGE_BIT_NUMA) | ||
92 | #else | ||
93 | #define _PAGE_NUMA (_AT(pteval_t, 0)) | ||
94 | #endif | ||
95 | |||
96 | /* | ||
73 | * Tracking soft dirty bit when a page goes to a swap is tricky. | 97 | * Tracking soft dirty bit when a page goes to a swap is tricky. |
74 | * We need a bit which can be stored in pte _and_ not conflict | 98 | * We need a bit which can be stored in pte _and_ not conflict |
75 | * with swap entry format. On x86 bits 6 and 7 are *not* involved | 99 | * with swap entry format. On x86 bits 6 and 7 are *not* involved |
@@ -94,26 +118,6 @@ | |||
94 | #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) | 118 | #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) |
95 | #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) | 119 | #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) |
96 | 120 | ||
97 | /* | ||
98 | * _PAGE_NUMA indicates that this page will trigger a numa hinting | ||
99 | * minor page fault to gather numa placement statistics (see | ||
100 | * pte_numa()). The bit picked (8) is within the range between | ||
101 | * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't | ||
102 | * require changes to the swp entry format because that bit is always | ||
103 | * zero when the pte is not present. | ||
104 | * | ||
105 | * The bit picked must be always zero when the pmd is present and not | ||
106 | * present, so that we don't lose information when we set it while | ||
107 | * atomically clearing the present bit. | ||
108 | * | ||
109 | * Because we shared the same bit (8) with _PAGE_PROTNONE this can be | ||
110 | * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE | ||
111 | * couldn't reach, like handle_mm_fault() (see access_error in | ||
112 | * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for | ||
113 | * handle_mm_fault() to be invoked). | ||
114 | */ | ||
115 | #define _PAGE_NUMA _PAGE_PROTNONE | ||
116 | |||
117 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ | 121 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ |
118 | _PAGE_ACCESSED | _PAGE_DIRTY) | 122 | _PAGE_ACCESSED | _PAGE_DIRTY) |
119 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ | 123 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ |
@@ -122,8 +126,8 @@ | |||
122 | /* Set of bits not changed in pte_modify */ | 126 | /* Set of bits not changed in pte_modify */ |
123 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ | 127 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ |
124 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ | 128 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ |
125 | _PAGE_SOFT_DIRTY) | 129 | _PAGE_SOFT_DIRTY | _PAGE_NUMA) |
126 | #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) | 130 | #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA) |
127 | 131 | ||
128 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) | 132 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) |
129 | #define _PAGE_CACHE_WB (0) | 133 | #define _PAGE_CACHE_WB (0) |
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 461bc8289024..6629f397b467 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c | |||
@@ -35,7 +35,7 @@ enum { | |||
35 | 35 | ||
36 | static int pte_testbit(pte_t pte) | 36 | static int pte_testbit(pte_t pte) |
37 | { | 37 | { |
38 | return pte_flags(pte) & _PAGE_UNUSED1; | 38 | return pte_flags(pte) & _PAGE_SOFTW1; |
39 | } | 39 | } |
40 | 40 | ||
41 | struct split_state { | 41 | struct split_state { |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index a8015a7a55bb..53b2acc38213 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -233,6 +233,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | |||
233 | # define pte_accessible(mm, pte) ((void)(pte), 1) | 233 | # define pte_accessible(mm, pte) ((void)(pte), 1) |
234 | #endif | 234 | #endif |
235 | 235 | ||
236 | #ifndef pte_present_nonuma | ||
237 | #define pte_present_nonuma(pte) pte_present(pte) | ||
238 | #endif | ||
239 | |||
236 | #ifndef flush_tlb_fix_spurious_fault | 240 | #ifndef flush_tlb_fix_spurious_fault |
237 | #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) | 241 | #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) |
238 | #endif | 242 | #endif |
@@ -670,7 +674,7 @@ static inline int pmd_trans_unstable(pmd_t *pmd) | |||
670 | static inline int pte_numa(pte_t pte) | 674 | static inline int pte_numa(pte_t pte) |
671 | { | 675 | { |
672 | return (pte_flags(pte) & | 676 | return (pte_flags(pte) & |
673 | (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; | 677 | (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA; |
674 | } | 678 | } |
675 | #endif | 679 | #endif |
676 | 680 | ||
@@ -678,7 +682,7 @@ static inline int pte_numa(pte_t pte) | |||
678 | static inline int pmd_numa(pmd_t pmd) | 682 | static inline int pmd_numa(pmd_t pmd) |
679 | { | 683 | { |
680 | return (pmd_flags(pmd) & | 684 | return (pmd_flags(pmd) & |
681 | (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; | 685 | (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA; |
682 | } | 686 | } |
683 | #endif | 687 | #endif |
684 | 688 | ||
diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c0f75261a728..6adfb7bfbf44 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h | |||
@@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry) | |||
54 | /* check whether a pte points to a swap entry */ | 54 | /* check whether a pte points to a swap entry */ |
55 | static inline int is_swap_pte(pte_t pte) | 55 | static inline int is_swap_pte(pte_t pte) |
56 | { | 56 | { |
57 | return !pte_none(pte) && !pte_present(pte) && !pte_file(pte); | 57 | return !pte_none(pte) && !pte_present_nonuma(pte) && !pte_file(pte); |
58 | } | 58 | } |
59 | #endif | 59 | #endif |
60 | 60 | ||
diff --git a/mm/memory.c b/mm/memory.c index e302ae1dcce0..0897830011f3 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -756,7 +756,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | |||
756 | unsigned long pfn = pte_pfn(pte); | 756 | unsigned long pfn = pte_pfn(pte); |
757 | 757 | ||
758 | if (HAVE_PTE_SPECIAL) { | 758 | if (HAVE_PTE_SPECIAL) { |
759 | if (likely(!pte_special(pte))) | 759 | if (likely(!pte_special(pte) || pte_numa(pte))) |
760 | goto check_pfn; | 760 | goto check_pfn; |
761 | if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) | 761 | if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) |
762 | return NULL; | 762 | return NULL; |
@@ -782,14 +782,15 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | |||
782 | } | 782 | } |
783 | } | 783 | } |
784 | 784 | ||
785 | if (is_zero_pfn(pfn)) | ||
786 | return NULL; | ||
787 | check_pfn: | 785 | check_pfn: |
788 | if (unlikely(pfn > highest_memmap_pfn)) { | 786 | if (unlikely(pfn > highest_memmap_pfn)) { |
789 | print_bad_pte(vma, addr, pte, NULL); | 787 | print_bad_pte(vma, addr, pte, NULL); |
790 | return NULL; | 788 | return NULL; |
791 | } | 789 | } |
792 | 790 | ||
791 | if (is_zero_pfn(pfn)) | ||
792 | return NULL; | ||
793 | |||
793 | /* | 794 | /* |
794 | * NOTE! We still have PageReserved() pages in the page tables. | 795 | * NOTE! We still have PageReserved() pages in the page tables. |
795 | * eg. VDSO mappings can cause them to exist. | 796 | * eg. VDSO mappings can cause them to exist. |
@@ -1722,13 +1723,9 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1722 | VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); | 1723 | VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); |
1723 | 1724 | ||
1724 | /* | 1725 | /* |
1725 | * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault | 1726 | * If FOLL_FORCE is set then do not force a full fault as the hinting |
1726 | * would be called on PROT_NONE ranges. We must never invoke | 1727 | * fault information is unrelated to the reference behaviour of a task |
1727 | * handle_mm_fault on PROT_NONE ranges or the NUMA hinting | 1728 | * using the address space |
1728 | * page faults would unprotect the PROT_NONE ranges if | ||
1729 | * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd | ||
1730 | * bitflag. So to avoid that, don't set FOLL_NUMA if | ||
1731 | * FOLL_FORCE is set. | ||
1732 | */ | 1729 | */ |
1733 | if (!(gup_flags & FOLL_FORCE)) | 1730 | if (!(gup_flags & FOLL_FORCE)) |
1734 | gup_flags |= FOLL_NUMA; | 1731 | gup_flags |= FOLL_NUMA; |