aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/include/asm/pgtable.h54
-rw-r--r--arch/powerpc/include/asm/pte-common.h5
-rw-r--r--arch/powerpc/include/asm/pte-hash64.h6
-rw-r--r--arch/x86/include/asm/pgtable.h22
-rw-r--r--arch/x86/include/asm/pgtable_64.h5
-rw-r--r--arch/x86/include/asm/pgtable_types.h41
-rw-r--r--include/asm-generic/pgtable.h155
-rw-r--r--include/linux/swapops.h2
8 files changed, 7 insertions, 283 deletions
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 1146006d3477..79fee2eb8d56 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -55,64 +55,12 @@ static inline int pmd_protnone(pmd_t pmd)
55{ 55{
56 return pte_protnone(pmd_pte(pmd)); 56 return pte_protnone(pmd_pte(pmd));
57} 57}
58 58#endif /* CONFIG_NUMA_BALANCING */
59static inline int pte_present(pte_t pte)
60{
61 return pte_val(pte) & _PAGE_NUMA_MASK;
62}
63
64#define pte_present_nonuma pte_present_nonuma
65static inline int pte_present_nonuma(pte_t pte)
66{
67 return pte_val(pte) & (_PAGE_PRESENT);
68}
69
70#define ptep_set_numa ptep_set_numa
71static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
72 pte_t *ptep)
73{
74 if ((pte_val(*ptep) & _PAGE_PRESENT) == 0)
75 VM_BUG_ON(1);
76
77 pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0);
78 return;
79}
80
81#define pmdp_set_numa pmdp_set_numa
82static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
83 pmd_t *pmdp)
84{
85 if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0)
86 VM_BUG_ON(1);
87
88 pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA);
89 return;
90}
91
92/*
93 * Generic NUMA pte helpers expect pteval_t and pmdval_t types to exist
94 * which was inherited from x86. For the purposes of powerpc pte_basic_t and
95 * pmd_t are equivalent
96 */
97#define pteval_t pte_basic_t
98#define pmdval_t pmd_t
99static inline pteval_t ptenuma_flags(pte_t pte)
100{
101 return pte_val(pte) & _PAGE_NUMA_MASK;
102}
103
104static inline pmdval_t pmdnuma_flags(pmd_t pmd)
105{
106 return pmd_val(pmd) & _PAGE_NUMA_MASK;
107}
108
109# else
110 59
111static inline int pte_present(pte_t pte) 60static inline int pte_present(pte_t pte)
112{ 61{
113 return pte_val(pte) & _PAGE_PRESENT; 62 return pte_val(pte) & _PAGE_PRESENT;
114} 63}
115#endif /* CONFIG_NUMA_BALANCING */
116 64
117/* Conversion functions: convert a page and protection to a page entry, 65/* Conversion functions: convert a page and protection to a page entry,
118 * and a page entry and page directory to the page they refer to. 66 * and a page entry and page directory to the page they refer to.
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 2aef9b7a0eb2..c5a755ef7011 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -104,11 +104,6 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
104 _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ 104 _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \
105 _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) 105 _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
106 106
107#ifdef CONFIG_NUMA_BALANCING
108/* Mask of bits that distinguish present and numa ptes */
109#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PRESENT)
110#endif
111
112/* 107/*
113 * We define 2 sets of base prot bits, one for basic pages (ie, 108 * We define 2 sets of base prot bits, one for basic pages (ie,
114 * cacheable kernel and user pages) and one for non cacheable 109 * cacheable kernel and user pages) and one for non cacheable
diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h
index 2505d8eab15c..55aea0caf95e 100644
--- a/arch/powerpc/include/asm/pte-hash64.h
+++ b/arch/powerpc/include/asm/pte-hash64.h
@@ -27,12 +27,6 @@
27#define _PAGE_RW 0x0200 /* software: user write access allowed */ 27#define _PAGE_RW 0x0200 /* software: user write access allowed */
28#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ 28#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
29 29
30/*
31 * Used for tracking numa faults
32 */
33#define _PAGE_NUMA 0x00000010 /* Gather numa placement stats */
34
35
36/* No separate kernel read-only */ 30/* No separate kernel read-only */
37#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ 31#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
38#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW 32#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f519b0b529dd..34d42a7d5595 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -300,7 +300,7 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
300 300
301static inline pmd_t pmd_mknotpresent(pmd_t pmd) 301static inline pmd_t pmd_mknotpresent(pmd_t pmd)
302{ 302{
303 return pmd_clear_flags(pmd, _PAGE_PRESENT); 303 return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
304} 304}
305 305
306#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY 306#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -443,13 +443,6 @@ static inline int pte_same(pte_t a, pte_t b)
443 443
444static inline int pte_present(pte_t a) 444static inline int pte_present(pte_t a)
445{ 445{
446 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
447 _PAGE_NUMA);
448}
449
450#define pte_present_nonuma pte_present_nonuma
451static inline int pte_present_nonuma(pte_t a)
452{
453 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); 446 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
454} 447}
455 448
@@ -459,7 +452,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
459 if (pte_flags(a) & _PAGE_PRESENT) 452 if (pte_flags(a) & _PAGE_PRESENT)
460 return true; 453 return true;
461 454
462 if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) && 455 if ((pte_flags(a) & _PAGE_PROTNONE) &&
463 mm_tlb_flush_pending(mm)) 456 mm_tlb_flush_pending(mm))
464 return true; 457 return true;
465 458
@@ -479,8 +472,7 @@ static inline int pmd_present(pmd_t pmd)
479 * the _PAGE_PSE flag will remain set at all times while the 472 * the _PAGE_PSE flag will remain set at all times while the
480 * _PAGE_PRESENT bit is clear). 473 * _PAGE_PRESENT bit is clear).
481 */ 474 */
482 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE | 475 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
483 _PAGE_NUMA);
484} 476}
485 477
486#ifdef CONFIG_NUMA_BALANCING 478#ifdef CONFIG_NUMA_BALANCING
@@ -555,11 +547,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
555 547
556static inline int pmd_bad(pmd_t pmd) 548static inline int pmd_bad(pmd_t pmd)
557{ 549{
558#ifdef CONFIG_NUMA_BALANCING
559 /* pmd_numa check */
560 if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
561 return 0;
562#endif
563 return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; 550 return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
564} 551}
565 552
@@ -878,19 +865,16 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
878#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY 865#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
879static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 866static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
880{ 867{
881 VM_BUG_ON(pte_present_nonuma(pte));
882 return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); 868 return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
883} 869}
884 870
885static inline int pte_swp_soft_dirty(pte_t pte) 871static inline int pte_swp_soft_dirty(pte_t pte)
886{ 872{
887 VM_BUG_ON(pte_present_nonuma(pte));
888 return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; 873 return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
889} 874}
890 875
891static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 876static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
892{ 877{
893 VM_BUG_ON(pte_present_nonuma(pte));
894 return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); 878 return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
895} 879}
896#endif 880#endif
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e227970f983e..2ee781114d34 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -142,12 +142,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
142 142
143/* Encode and de-code a swap entry */ 143/* Encode and de-code a swap entry */
144#define SWP_TYPE_BITS 5 144#define SWP_TYPE_BITS 5
145#ifdef CONFIG_NUMA_BALANCING
146/* Automatic NUMA balancing needs to be distinguishable from swap entries */
147#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
148#else
149#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) 145#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
150#endif
151 146
152#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) 147#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
153 148
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3e0230c94cff..8c7c10802e9c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -27,14 +27,6 @@
27#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ 27#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
28#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ 28#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
29 29
30/*
31 * Swap offsets on configurations that allow automatic NUMA balancing use the
32 * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from
33 * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the
34 * maximum possible swap space from 16TB to 8TB.
35 */
36#define _PAGE_BIT_NUMA (_PAGE_BIT_GLOBAL+1)
37
38/* If _PAGE_BIT_PRESENT is clear, we use these: */ 30/* If _PAGE_BIT_PRESENT is clear, we use these: */
39/* - if the user mapped it with PROT_NONE; pte_present gives true */ 31/* - if the user mapped it with PROT_NONE; pte_present gives true */
40#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL 32#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
@@ -76,21 +68,6 @@
76#endif 68#endif
77 69
78/* 70/*
79 * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page
80 * that is not present. The hinting fault gathers numa placement statistics
81 * (see pte_numa()). The bit is always zero when the PTE is not present.
82 *
83 * The bit picked must be always zero when the pmd is present and not
84 * present, so that we don't lose information when we set it while
85 * atomically clearing the present bit.
86 */
87#ifdef CONFIG_NUMA_BALANCING
88#define _PAGE_NUMA (_AT(pteval_t, 1) << _PAGE_BIT_NUMA)
89#else
90#define _PAGE_NUMA (_AT(pteval_t, 0))
91#endif
92
93/*
94 * Tracking soft dirty bit when a page goes to a swap is tricky. 71 * Tracking soft dirty bit when a page goes to a swap is tricky.
95 * We need a bit which can be stored in pte _and_ not conflict 72 * We need a bit which can be stored in pte _and_ not conflict
96 * with swap entry format. On x86 bits 6 and 7 are *not* involved 73 * with swap entry format. On x86 bits 6 and 7 are *not* involved
@@ -122,8 +99,8 @@
122/* Set of bits not changed in pte_modify */ 99/* Set of bits not changed in pte_modify */
123#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ 100#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
124 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ 101 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
125 _PAGE_SOFT_DIRTY | _PAGE_NUMA) 102 _PAGE_SOFT_DIRTY)
126#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA) 103#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
127 104
128/* 105/*
129 * The cache modes defined here are used to translate between pure SW usage 106 * The cache modes defined here are used to translate between pure SW usage
@@ -324,20 +301,6 @@ static inline pteval_t pte_flags(pte_t pte)
324 return native_pte_val(pte) & PTE_FLAGS_MASK; 301 return native_pte_val(pte) & PTE_FLAGS_MASK;
325} 302}
326 303
327#ifdef CONFIG_NUMA_BALANCING
328/* Set of bits that distinguishes present, prot_none and numa ptes */
329#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)
330static inline pteval_t ptenuma_flags(pte_t pte)
331{
332 return pte_flags(pte) & _PAGE_NUMA_MASK;
333}
334
335static inline pmdval_t pmdnuma_flags(pmd_t pmd)
336{
337 return pmd_flags(pmd) & _PAGE_NUMA_MASK;
338}
339#endif /* CONFIG_NUMA_BALANCING */
340
341#define pgprot_val(x) ((x).pgprot) 304#define pgprot_val(x) ((x).pgprot)
342#define __pgprot(x) ((pgprot_t) { (x) } ) 305#define __pgprot(x) ((pgprot_t) { (x) } )
343 306
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 067922c06c29..4d46085c1b90 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -244,10 +244,6 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
244# define pte_accessible(mm, pte) ((void)(pte), 1) 244# define pte_accessible(mm, pte) ((void)(pte), 1)
245#endif 245#endif
246 246
247#ifndef pte_present_nonuma
248#define pte_present_nonuma(pte) pte_present(pte)
249#endif
250
251#ifndef flush_tlb_fix_spurious_fault 247#ifndef flush_tlb_fix_spurious_fault
252#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) 248#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
253#endif 249#endif
@@ -693,157 +689,6 @@ static inline int pmd_protnone(pmd_t pmd)
693} 689}
694#endif /* CONFIG_NUMA_BALANCING */ 690#endif /* CONFIG_NUMA_BALANCING */
695 691
696#ifdef CONFIG_NUMA_BALANCING
697/*
698 * _PAGE_NUMA distinguishes between an unmapped page table entry, an entry that
699 * is protected for PROT_NONE and a NUMA hinting fault entry. If the
700 * architecture defines __PAGE_PROTNONE then it should take that into account
701 * but those that do not can rely on the fact that the NUMA hinting scanner
702 * skips inaccessible VMAs.
703 *
704 * pte/pmd_present() returns true if pte/pmd_numa returns true. Page
705 * fault triggers on those regions if pte/pmd_numa returns true
706 * (because _PAGE_PRESENT is not set).
707 */
708#ifndef pte_numa
709static inline int pte_numa(pte_t pte)
710{
711 return ptenuma_flags(pte) == _PAGE_NUMA;
712}
713#endif
714
715#ifndef pmd_numa
716static inline int pmd_numa(pmd_t pmd)
717{
718 return pmdnuma_flags(pmd) == _PAGE_NUMA;
719}
720#endif
721
722/*
723 * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically
724 * because they're called by the NUMA hinting minor page fault. If we
725 * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler
726 * would be forced to set it later while filling the TLB after we
727 * return to userland. That would trigger a second write to memory
728 * that we optimize away by setting _PAGE_ACCESSED here.
729 */
730#ifndef pte_mknonnuma
731static inline pte_t pte_mknonnuma(pte_t pte)
732{
733 pteval_t val = pte_val(pte);
734
735 val &= ~_PAGE_NUMA;
736 val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
737 return __pte(val);
738}
739#endif
740
741#ifndef pmd_mknonnuma
742static inline pmd_t pmd_mknonnuma(pmd_t pmd)
743{
744 pmdval_t val = pmd_val(pmd);
745
746 val &= ~_PAGE_NUMA;
747 val |= (_PAGE_PRESENT|_PAGE_ACCESSED);
748
749 return __pmd(val);
750}
751#endif
752
753#ifndef pte_mknuma
754static inline pte_t pte_mknuma(pte_t pte)
755{
756 pteval_t val = pte_val(pte);
757
758 VM_BUG_ON(!(val & _PAGE_PRESENT));
759
760 val &= ~_PAGE_PRESENT;
761 val |= _PAGE_NUMA;
762
763 return __pte(val);
764}
765#endif
766
767#ifndef ptep_set_numa
768static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
769 pte_t *ptep)
770{
771 pte_t ptent = *ptep;
772
773 ptent = pte_mknuma(ptent);
774 set_pte_at(mm, addr, ptep, ptent);
775 return;
776}
777#endif
778
779#ifndef pmd_mknuma
780static inline pmd_t pmd_mknuma(pmd_t pmd)
781{
782 pmdval_t val = pmd_val(pmd);
783
784 val &= ~_PAGE_PRESENT;
785 val |= _PAGE_NUMA;
786
787 return __pmd(val);
788}
789#endif
790
791#ifndef pmdp_set_numa
792static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
793 pmd_t *pmdp)
794{
795 pmd_t pmd = *pmdp;
796
797 pmd = pmd_mknuma(pmd);
798 set_pmd_at(mm, addr, pmdp, pmd);
799 return;
800}
801#endif
802#else
803static inline int pmd_numa(pmd_t pmd)
804{
805 return 0;
806}
807
808static inline int pte_numa(pte_t pte)
809{
810 return 0;
811}
812
813static inline pte_t pte_mknonnuma(pte_t pte)
814{
815 return pte;
816}
817
818static inline pmd_t pmd_mknonnuma(pmd_t pmd)
819{
820 return pmd;
821}
822
823static inline pte_t pte_mknuma(pte_t pte)
824{
825 return pte;
826}
827
828static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
829 pte_t *ptep)
830{
831 return;
832}
833
834
835static inline pmd_t pmd_mknuma(pmd_t pmd)
836{
837 return pmd;
838}
839
840static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
841 pmd_t *pmdp)
842{
843 return ;
844}
845#endif /* CONFIG_NUMA_BALANCING */
846
847#endif /* CONFIG_MMU */ 692#endif /* CONFIG_MMU */
848 693
849#endif /* !__ASSEMBLY__ */ 694#endif /* !__ASSEMBLY__ */
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 831a3168ab35..cedf3d3c373f 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry)
54/* check whether a pte points to a swap entry */ 54/* check whether a pte points to a swap entry */
55static inline int is_swap_pte(pte_t pte) 55static inline int is_swap_pte(pte_t pte)
56{ 56{
57 return !pte_none(pte) && !pte_present_nonuma(pte); 57 return !pte_none(pte) && !pte_present(pte);
58} 58}
59#endif 59#endif
60 60