diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-26 21:08:18 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-26 21:08:18 -0500 |
| commit | e57d9f638af9673f38d9f09de66fa0a28303127d (patch) | |
| tree | 1948825bba57b4563ef0a5e7dd7e90634441b66e | |
| parent | d6e867a6ae13bc02cd01c535764e5b051d26cf28 (diff) | |
| parent | 6848ac7ca39a226ede5df7af0efcc4ef0611e99c (diff) | |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar:
"The main changes in this cycle were:
- Update and clean up x86 fault handling, by Andy Lutomirski.
- Drop usage of __flush_tlb_all() in kernel_physical_mapping_init()
and related fallout, by Dan Williams.
- CPA cleanups and reorganization by Peter Zijlstra: simplify the
flow and remove a few warts.
- Other misc cleanups"
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits)
x86/mm/dump_pagetables: Use DEFINE_SHOW_ATTRIBUTE()
x86/mm/cpa: Rename @addrinarray to @numpages
x86/mm/cpa: Better use CLFLUSHOPT
x86/mm/cpa: Fold cpa_flush_range() and cpa_flush_array() into a single cpa_flush() function
x86/mm/cpa: Make cpa_data::numpages invariant
x86/mm/cpa: Optimize cpa_flush_array() TLB invalidation
x86/mm/cpa: Simplify the code after making cpa->vaddr invariant
x86/mm/cpa: Make cpa_data::vaddr invariant
x86/mm/cpa: Add __cpa_addr() helper
x86/mm/cpa: Add ARRAY and PAGES_ARRAY selftests
x86/mm: Drop usage of __flush_tlb_all() in kernel_physical_mapping_init()
x86/mm: Validate kernel_physical_mapping_init() PTE population
generic/pgtable: Introduce set_pte_safe()
generic/pgtable: Introduce {p4d,pgd}_same()
generic/pgtable: Make {pmd, pud}_same() unconditionally available
x86/fault: Clean up the page fault oops decoder a bit
x86/fault: Decode page fault OOPSes better
x86/vsyscall/64: Use X86_PF constants in the simulated #PF error code
x86/oops: Show the correct CS value in show_regs()
x86/fault: Don't try to recover from an implicit supervisor access
...
| -rw-r--r-- | arch/x86/entry/vsyscall/vsyscall_64.c | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/disabled-features.h | 8 | ||||
| -rw-r--r-- | arch/x86/include/asm/pgalloc.h | 27 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 5 | ||||
| -rw-r--r-- | arch/x86/mm/debug_pagetables.c | 58 | ||||
| -rw-r--r-- | arch/x86/mm/fault.c | 244 | ||||
| -rw-r--r-- | arch/x86/mm/init_64.c | 30 | ||||
| -rw-r--r-- | arch/x86/mm/mm_internal.h | 2 | ||||
| -rw-r--r-- | arch/x86/mm/pageattr-test.c | 31 | ||||
| -rw-r--r-- | arch/x86/mm/pageattr.c | 271 | ||||
| -rw-r--r-- | arch/x86/mm/tlb.c | 4 | ||||
| -rw-r--r-- | include/asm-generic/5level-fixup.h | 1 | ||||
| -rw-r--r-- | include/asm-generic/pgtable-nop4d-hack.h | 1 | ||||
| -rw-r--r-- | include/asm-generic/pgtable-nop4d.h | 1 | ||||
| -rw-r--r-- | include/asm-generic/pgtable-nopud.h | 1 | ||||
| -rw-r--r-- | include/asm-generic/pgtable.h | 56 |
16 files changed, 396 insertions, 346 deletions
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 85fd85d52ffd..d78bcc03e60e 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c | |||
| @@ -102,7 +102,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) | |||
| 102 | if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { | 102 | if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { |
| 103 | struct thread_struct *thread = ¤t->thread; | 103 | struct thread_struct *thread = ¤t->thread; |
| 104 | 104 | ||
| 105 | thread->error_code = 6; /* user fault, no page, write */ | 105 | thread->error_code = X86_PF_USER | X86_PF_WRITE; |
| 106 | thread->cr2 = ptr; | 106 | thread->cr2 = ptr; |
| 107 | thread->trap_nr = X86_TRAP_PF; | 107 | thread->trap_nr = X86_TRAP_PF; |
| 108 | 108 | ||
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 33833d1909af..a5ea841cc6d2 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h | |||
| @@ -16,6 +16,12 @@ | |||
| 16 | # define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) | 16 | # define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) |
| 17 | #endif | 17 | #endif |
| 18 | 18 | ||
| 19 | #ifdef CONFIG_X86_SMAP | ||
| 20 | # define DISABLE_SMAP 0 | ||
| 21 | #else | ||
| 22 | # define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31)) | ||
| 23 | #endif | ||
| 24 | |||
| 19 | #ifdef CONFIG_X86_INTEL_UMIP | 25 | #ifdef CONFIG_X86_INTEL_UMIP |
| 20 | # define DISABLE_UMIP 0 | 26 | # define DISABLE_UMIP 0 |
| 21 | #else | 27 | #else |
| @@ -68,7 +74,7 @@ | |||
| 68 | #define DISABLED_MASK6 0 | 74 | #define DISABLED_MASK6 0 |
| 69 | #define DISABLED_MASK7 (DISABLE_PTI) | 75 | #define DISABLED_MASK7 (DISABLE_PTI) |
| 70 | #define DISABLED_MASK8 0 | 76 | #define DISABLED_MASK8 0 |
| 71 | #define DISABLED_MASK9 (DISABLE_MPX) | 77 | #define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP) |
| 72 | #define DISABLED_MASK10 0 | 78 | #define DISABLED_MASK10 0 |
| 73 | #define DISABLED_MASK11 0 | 79 | #define DISABLED_MASK11 0 |
| 74 | #define DISABLED_MASK12 0 | 80 | #define DISABLED_MASK12 0 |
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index ec7f43327033..1ea41aaef68b 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h | |||
| @@ -80,6 +80,13 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, | |||
| 80 | set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); | 80 | set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | static inline void pmd_populate_kernel_safe(struct mm_struct *mm, | ||
| 84 | pmd_t *pmd, pte_t *pte) | ||
| 85 | { | ||
| 86 | paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); | ||
| 87 | set_pmd_safe(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); | ||
| 88 | } | ||
| 89 | |||
| 83 | static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, | 90 | static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, |
| 84 | struct page *pte) | 91 | struct page *pte) |
| 85 | { | 92 | { |
| @@ -132,6 +139,12 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
| 132 | paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); | 139 | paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); |
| 133 | set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); | 140 | set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); |
| 134 | } | 141 | } |
| 142 | |||
| 143 | static inline void pud_populate_safe(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | ||
| 144 | { | ||
| 145 | paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); | ||
| 146 | set_pud_safe(pud, __pud(_PAGE_TABLE | __pa(pmd))); | ||
| 147 | } | ||
| 135 | #endif /* CONFIG_X86_PAE */ | 148 | #endif /* CONFIG_X86_PAE */ |
| 136 | 149 | ||
| 137 | #if CONFIG_PGTABLE_LEVELS > 3 | 150 | #if CONFIG_PGTABLE_LEVELS > 3 |
| @@ -141,6 +154,12 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) | |||
| 141 | set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud))); | 154 | set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud))); |
| 142 | } | 155 | } |
| 143 | 156 | ||
| 157 | static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) | ||
| 158 | { | ||
| 159 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); | ||
| 160 | set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud))); | ||
| 161 | } | ||
| 162 | |||
| 144 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) | 163 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) |
| 145 | { | 164 | { |
| 146 | gfp_t gfp = GFP_KERNEL_ACCOUNT; | 165 | gfp_t gfp = GFP_KERNEL_ACCOUNT; |
| @@ -173,6 +192,14 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) | |||
| 173 | set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); | 192 | set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); |
| 174 | } | 193 | } |
| 175 | 194 | ||
| 195 | static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) | ||
| 196 | { | ||
| 197 | if (!pgtable_l5_enabled()) | ||
| 198 | return; | ||
| 199 | paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); | ||
| 200 | set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); | ||
| 201 | } | ||
| 202 | |||
| 176 | static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) | 203 | static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) |
| 177 | { | 204 | { |
| 178 | gfp_t gfp = GFP_KERNEL_ACCOUNT; | 205 | gfp_t gfp = GFP_KERNEL_ACCOUNT; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 721d02bd2d0d..6a62f4af9fcf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -68,7 +68,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) | |||
| 68 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; | 68 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
| 69 | unsigned long d0, d1, d2, d3, d6, d7; | 69 | unsigned long d0, d1, d2, d3, d6, d7; |
| 70 | unsigned int fsindex, gsindex; | 70 | unsigned int fsindex, gsindex; |
| 71 | unsigned int ds, cs, es; | 71 | unsigned int ds, es; |
| 72 | 72 | ||
| 73 | show_iret_regs(regs); | 73 | show_iret_regs(regs); |
| 74 | 74 | ||
| @@ -100,7 +100,6 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) | |||
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | asm("movl %%ds,%0" : "=r" (ds)); | 102 | asm("movl %%ds,%0" : "=r" (ds)); |
| 103 | asm("movl %%cs,%0" : "=r" (cs)); | ||
| 104 | asm("movl %%es,%0" : "=r" (es)); | 103 | asm("movl %%es,%0" : "=r" (es)); |
| 105 | asm("movl %%fs,%0" : "=r" (fsindex)); | 104 | asm("movl %%fs,%0" : "=r" (fsindex)); |
| 106 | asm("movl %%gs,%0" : "=r" (gsindex)); | 105 | asm("movl %%gs,%0" : "=r" (gsindex)); |
| @@ -116,7 +115,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) | |||
| 116 | 115 | ||
| 117 | printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", | 116 | printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
| 118 | fs, fsindex, gs, gsindex, shadowgs); | 117 | fs, fsindex, gs, gsindex, shadowgs); |
| 119 | printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, | 118 | printk(KERN_DEFAULT "CS: %04lx DS: %04x ES: %04x CR0: %016lx\n", regs->cs, ds, |
| 120 | es, cr0); | 119 | es, cr0); |
| 121 | printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, | 120 | printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, |
| 122 | cr4); | 121 | cr4); |
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 225fe2f0bfec..cd84f067e41d 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c | |||
| @@ -10,20 +10,9 @@ static int ptdump_show(struct seq_file *m, void *v) | |||
| 10 | return 0; | 10 | return 0; |
| 11 | } | 11 | } |
| 12 | 12 | ||
| 13 | static int ptdump_open(struct inode *inode, struct file *filp) | 13 | DEFINE_SHOW_ATTRIBUTE(ptdump); |
| 14 | { | ||
| 15 | return single_open(filp, ptdump_show, NULL); | ||
| 16 | } | ||
| 17 | |||
| 18 | static const struct file_operations ptdump_fops = { | ||
| 19 | .owner = THIS_MODULE, | ||
| 20 | .open = ptdump_open, | ||
| 21 | .read = seq_read, | ||
| 22 | .llseek = seq_lseek, | ||
| 23 | .release = single_release, | ||
| 24 | }; | ||
| 25 | 14 | ||
| 26 | static int ptdump_show_curknl(struct seq_file *m, void *v) | 15 | static int ptdump_curknl_show(struct seq_file *m, void *v) |
| 27 | { | 16 | { |
| 28 | if (current->mm->pgd) { | 17 | if (current->mm->pgd) { |
| 29 | down_read(¤t->mm->mmap_sem); | 18 | down_read(¤t->mm->mmap_sem); |
| @@ -33,23 +22,12 @@ static int ptdump_show_curknl(struct seq_file *m, void *v) | |||
| 33 | return 0; | 22 | return 0; |
| 34 | } | 23 | } |
| 35 | 24 | ||
| 36 | static int ptdump_open_curknl(struct inode *inode, struct file *filp) | 25 | DEFINE_SHOW_ATTRIBUTE(ptdump_curknl); |
| 37 | { | ||
| 38 | return single_open(filp, ptdump_show_curknl, NULL); | ||
| 39 | } | ||
| 40 | |||
| 41 | static const struct file_operations ptdump_curknl_fops = { | ||
| 42 | .owner = THIS_MODULE, | ||
| 43 | .open = ptdump_open_curknl, | ||
| 44 | .read = seq_read, | ||
| 45 | .llseek = seq_lseek, | ||
| 46 | .release = single_release, | ||
| 47 | }; | ||
| 48 | 26 | ||
| 49 | #ifdef CONFIG_PAGE_TABLE_ISOLATION | 27 | #ifdef CONFIG_PAGE_TABLE_ISOLATION |
| 50 | static struct dentry *pe_curusr; | 28 | static struct dentry *pe_curusr; |
| 51 | 29 | ||
| 52 | static int ptdump_show_curusr(struct seq_file *m, void *v) | 30 | static int ptdump_curusr_show(struct seq_file *m, void *v) |
| 53 | { | 31 | { |
| 54 | if (current->mm->pgd) { | 32 | if (current->mm->pgd) { |
| 55 | down_read(¤t->mm->mmap_sem); | 33 | down_read(¤t->mm->mmap_sem); |
| @@ -59,42 +37,20 @@ static int ptdump_show_curusr(struct seq_file *m, void *v) | |||
| 59 | return 0; | 37 | return 0; |
| 60 | } | 38 | } |
| 61 | 39 | ||
| 62 | static int ptdump_open_curusr(struct inode *inode, struct file *filp) | 40 | DEFINE_SHOW_ATTRIBUTE(ptdump_curusr); |
| 63 | { | ||
| 64 | return single_open(filp, ptdump_show_curusr, NULL); | ||
| 65 | } | ||
| 66 | |||
| 67 | static const struct file_operations ptdump_curusr_fops = { | ||
| 68 | .owner = THIS_MODULE, | ||
| 69 | .open = ptdump_open_curusr, | ||
| 70 | .read = seq_read, | ||
| 71 | .llseek = seq_lseek, | ||
| 72 | .release = single_release, | ||
| 73 | }; | ||
| 74 | #endif | 41 | #endif |
| 75 | 42 | ||
| 76 | #if defined(CONFIG_EFI) && defined(CONFIG_X86_64) | 43 | #if defined(CONFIG_EFI) && defined(CONFIG_X86_64) |
| 77 | static struct dentry *pe_efi; | 44 | static struct dentry *pe_efi; |
| 78 | 45 | ||
| 79 | static int ptdump_show_efi(struct seq_file *m, void *v) | 46 | static int ptdump_efi_show(struct seq_file *m, void *v) |
| 80 | { | 47 | { |
| 81 | if (efi_mm.pgd) | 48 | if (efi_mm.pgd) |
| 82 | ptdump_walk_pgd_level_debugfs(m, efi_mm.pgd, false); | 49 | ptdump_walk_pgd_level_debugfs(m, efi_mm.pgd, false); |
| 83 | return 0; | 50 | return 0; |
| 84 | } | 51 | } |
| 85 | 52 | ||
| 86 | static int ptdump_open_efi(struct inode *inode, struct file *filp) | 53 | DEFINE_SHOW_ATTRIBUTE(ptdump_efi); |
| 87 | { | ||
| 88 | return single_open(filp, ptdump_show_efi, NULL); | ||
| 89 | } | ||
| 90 | |||
| 91 | static const struct file_operations ptdump_efi_fops = { | ||
| 92 | .owner = THIS_MODULE, | ||
| 93 | .open = ptdump_open_efi, | ||
| 94 | .read = seq_read, | ||
| 95 | .llseek = seq_lseek, | ||
| 96 | .release = single_release, | ||
| 97 | }; | ||
| 98 | #endif | 54 | #endif |
| 99 | 55 | ||
| 100 | static struct dentry *dir, *pe_knl, *pe_curknl; | 56 | static struct dentry *dir, *pe_knl, *pe_curknl; |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 71d4b9d4d43f..2ff25ad33233 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <asm/vm86.h> /* struct vm86 */ | 27 | #include <asm/vm86.h> /* struct vm86 */ |
| 28 | #include <asm/mmu_context.h> /* vma_pkey() */ | 28 | #include <asm/mmu_context.h> /* vma_pkey() */ |
| 29 | #include <asm/efi.h> /* efi_recover_from_page_fault()*/ | 29 | #include <asm/efi.h> /* efi_recover_from_page_fault()*/ |
| 30 | #include <asm/desc.h> /* store_idt(), ... */ | ||
| 30 | 31 | ||
| 31 | #define CREATE_TRACE_POINTS | 32 | #define CREATE_TRACE_POINTS |
| 32 | #include <asm/trace/exceptions.h> | 33 | #include <asm/trace/exceptions.h> |
| @@ -571,10 +572,55 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) | |||
| 571 | return 0; | 572 | return 0; |
| 572 | } | 573 | } |
| 573 | 574 | ||
| 575 | static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) | ||
| 576 | { | ||
| 577 | u32 offset = (index >> 3) * sizeof(struct desc_struct); | ||
| 578 | unsigned long addr; | ||
| 579 | struct ldttss_desc desc; | ||
| 580 | |||
| 581 | if (index == 0) { | ||
| 582 | pr_alert("%s: NULL\n", name); | ||
| 583 | return; | ||
| 584 | } | ||
| 585 | |||
| 586 | if (offset + sizeof(struct ldttss_desc) >= gdt->size) { | ||
| 587 | pr_alert("%s: 0x%hx -- out of bounds\n", name, index); | ||
| 588 | return; | ||
| 589 | } | ||
| 590 | |||
| 591 | if (probe_kernel_read(&desc, (void *)(gdt->address + offset), | ||
| 592 | sizeof(struct ldttss_desc))) { | ||
| 593 | pr_alert("%s: 0x%hx -- GDT entry is not readable\n", | ||
| 594 | name, index); | ||
| 595 | return; | ||
| 596 | } | ||
| 597 | |||
| 598 | addr = desc.base0 | (desc.base1 << 16) | (desc.base2 << 24); | ||
| 599 | #ifdef CONFIG_X86_64 | ||
| 600 | addr |= ((u64)desc.base3 << 32); | ||
| 601 | #endif | ||
| 602 | pr_alert("%s: 0x%hx -- base=0x%lx limit=0x%x\n", | ||
| 603 | name, index, addr, (desc.limit0 | (desc.limit1 << 16))); | ||
| 604 | } | ||
| 605 | |||
| 606 | /* | ||
| 607 | * This helper function transforms the #PF error_code bits into | ||
| 608 | * "[PROT] [USER]" type of descriptive, almost human-readable error strings: | ||
| 609 | */ | ||
| 610 | static void err_str_append(unsigned long error_code, char *buf, unsigned long mask, const char *txt) | ||
| 611 | { | ||
| 612 | if (error_code & mask) { | ||
| 613 | if (buf[0]) | ||
| 614 | strcat(buf, " "); | ||
| 615 | strcat(buf, txt); | ||
| 616 | } | ||
| 617 | } | ||
| 618 | |||
| 574 | static void | 619 | static void |
| 575 | show_fault_oops(struct pt_regs *regs, unsigned long error_code, | 620 | show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address) |
| 576 | unsigned long address) | ||
| 577 | { | 621 | { |
| 622 | char err_txt[64]; | ||
| 623 | |||
| 578 | if (!oops_may_print()) | 624 | if (!oops_may_print()) |
| 579 | return; | 625 | return; |
| 580 | 626 | ||
| @@ -602,6 +648,52 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, | |||
| 602 | address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", | 648 | address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", |
| 603 | (void *)address); | 649 | (void *)address); |
| 604 | 650 | ||
| 651 | err_txt[0] = 0; | ||
| 652 | |||
| 653 | /* | ||
| 654 | * Note: length of these appended strings including the separation space and the | ||
| 655 | * zero delimiter must fit into err_txt[]. | ||
| 656 | */ | ||
| 657 | err_str_append(error_code, err_txt, X86_PF_PROT, "[PROT]" ); | ||
| 658 | err_str_append(error_code, err_txt, X86_PF_WRITE, "[WRITE]"); | ||
| 659 | err_str_append(error_code, err_txt, X86_PF_USER, "[USER]" ); | ||
| 660 | err_str_append(error_code, err_txt, X86_PF_RSVD, "[RSVD]" ); | ||
| 661 | err_str_append(error_code, err_txt, X86_PF_INSTR, "[INSTR]"); | ||
| 662 | err_str_append(error_code, err_txt, X86_PF_PK, "[PK]" ); | ||
| 663 | |||
| 664 | pr_alert("#PF error: %s\n", error_code ? err_txt : "[normal kernel read fault]"); | ||
| 665 | |||
| 666 | if (!(error_code & X86_PF_USER) && user_mode(regs)) { | ||
| 667 | struct desc_ptr idt, gdt; | ||
| 668 | u16 ldtr, tr; | ||
| 669 | |||
| 670 | pr_alert("This was a system access from user code\n"); | ||
| 671 | |||
| 672 | /* | ||
| 673 | * This can happen for quite a few reasons. The more obvious | ||
| 674 | * ones are faults accessing the GDT, or LDT. Perhaps | ||
| 675 | * surprisingly, if the CPU tries to deliver a benign or | ||
| 676 | * contributory exception from user code and gets a page fault | ||
| 677 | * during delivery, the page fault can be delivered as though | ||
| 678 | * it originated directly from user code. This could happen | ||
| 679 | * due to wrong permissions on the IDT, GDT, LDT, TSS, or | ||
| 680 | * kernel or IST stack. | ||
| 681 | */ | ||
| 682 | store_idt(&idt); | ||
| 683 | |||
| 684 | /* Usable even on Xen PV -- it's just slow. */ | ||
| 685 | native_store_gdt(&gdt); | ||
| 686 | |||
| 687 | pr_alert("IDT: 0x%lx (limit=0x%hx) GDT: 0x%lx (limit=0x%hx)\n", | ||
| 688 | idt.address, idt.size, gdt.address, gdt.size); | ||
| 689 | |||
| 690 | store_ldt(ldtr); | ||
| 691 | show_ldttss(&gdt, "LDTR", ldtr); | ||
| 692 | |||
| 693 | store_tr(tr); | ||
| 694 | show_ldttss(&gdt, "TR", tr); | ||
| 695 | } | ||
| 696 | |||
| 605 | dump_pagetable(address); | 697 | dump_pagetable(address); |
| 606 | } | 698 | } |
| 607 | 699 | ||
| @@ -621,16 +713,30 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, | |||
| 621 | tsk->comm, address); | 713 | tsk->comm, address); |
| 622 | dump_pagetable(address); | 714 | dump_pagetable(address); |
| 623 | 715 | ||
| 624 | tsk->thread.cr2 = address; | ||
| 625 | tsk->thread.trap_nr = X86_TRAP_PF; | ||
| 626 | tsk->thread.error_code = error_code; | ||
| 627 | |||
| 628 | if (__die("Bad pagetable", regs, error_code)) | 716 | if (__die("Bad pagetable", regs, error_code)) |
| 629 | sig = 0; | 717 | sig = 0; |
| 630 | 718 | ||
| 631 | oops_end(flags, regs, sig); | 719 | oops_end(flags, regs, sig); |
| 632 | } | 720 | } |
| 633 | 721 | ||
| 722 | static void set_signal_archinfo(unsigned long address, | ||
| 723 | unsigned long error_code) | ||
| 724 | { | ||
| 725 | struct task_struct *tsk = current; | ||
| 726 | |||
| 727 | /* | ||
| 728 | * To avoid leaking information about the kernel page | ||
| 729 | * table layout, pretend that user-mode accesses to | ||
| 730 | * kernel addresses are always protection faults. | ||
| 731 | */ | ||
| 732 | if (address >= TASK_SIZE_MAX) | ||
| 733 | error_code |= X86_PF_PROT; | ||
| 734 | |||
| 735 | tsk->thread.trap_nr = X86_TRAP_PF; | ||
| 736 | tsk->thread.error_code = error_code | X86_PF_USER; | ||
| 737 | tsk->thread.cr2 = address; | ||
| 738 | } | ||
| 739 | |||
| 634 | static noinline void | 740 | static noinline void |
| 635 | no_context(struct pt_regs *regs, unsigned long error_code, | 741 | no_context(struct pt_regs *regs, unsigned long error_code, |
| 636 | unsigned long address, int signal, int si_code) | 742 | unsigned long address, int signal, int si_code) |
| @@ -639,6 +745,15 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
| 639 | unsigned long flags; | 745 | unsigned long flags; |
| 640 | int sig; | 746 | int sig; |
| 641 | 747 | ||
| 748 | if (user_mode(regs)) { | ||
| 749 | /* | ||
| 750 | * This is an implicit supervisor-mode access from user | ||
| 751 | * mode. Bypass all the kernel-mode recovery code and just | ||
| 752 | * OOPS. | ||
| 753 | */ | ||
| 754 | goto oops; | ||
| 755 | } | ||
| 756 | |||
| 642 | /* Are we prepared to handle this kernel fault? */ | 757 | /* Are we prepared to handle this kernel fault? */ |
| 643 | if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { | 758 | if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { |
| 644 | /* | 759 | /* |
| @@ -656,9 +771,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
| 656 | * faulting through the emulate_vsyscall() logic. | 771 | * faulting through the emulate_vsyscall() logic. |
| 657 | */ | 772 | */ |
| 658 | if (current->thread.sig_on_uaccess_err && signal) { | 773 | if (current->thread.sig_on_uaccess_err && signal) { |
| 659 | tsk->thread.trap_nr = X86_TRAP_PF; | 774 | set_signal_archinfo(address, error_code); |
| 660 | tsk->thread.error_code = error_code | X86_PF_USER; | ||
| 661 | tsk->thread.cr2 = address; | ||
| 662 | 775 | ||
| 663 | /* XXX: hwpoison faults will set the wrong code. */ | 776 | /* XXX: hwpoison faults will set the wrong code. */ |
| 664 | force_sig_fault(signal, si_code, (void __user *)address, | 777 | force_sig_fault(signal, si_code, (void __user *)address, |
| @@ -726,6 +839,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
| 726 | if (IS_ENABLED(CONFIG_EFI)) | 839 | if (IS_ENABLED(CONFIG_EFI)) |
| 727 | efi_recover_from_page_fault(address); | 840 | efi_recover_from_page_fault(address); |
| 728 | 841 | ||
| 842 | oops: | ||
| 729 | /* | 843 | /* |
| 730 | * Oops. The kernel tried to access some bad page. We'll have to | 844 | * Oops. The kernel tried to access some bad page. We'll have to |
| 731 | * terminate things with extreme prejudice: | 845 | * terminate things with extreme prejudice: |
| @@ -737,10 +851,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
| 737 | if (task_stack_end_corrupted(tsk)) | 851 | if (task_stack_end_corrupted(tsk)) |
| 738 | printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); | 852 | printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); |
| 739 | 853 | ||
| 740 | tsk->thread.cr2 = address; | ||
| 741 | tsk->thread.trap_nr = X86_TRAP_PF; | ||
| 742 | tsk->thread.error_code = error_code; | ||
| 743 | |||
| 744 | sig = SIGKILL; | 854 | sig = SIGKILL; |
| 745 | if (__die("Oops", regs, error_code)) | 855 | if (__die("Oops", regs, error_code)) |
| 746 | sig = 0; | 856 | sig = 0; |
| @@ -794,7 +904,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
| 794 | struct task_struct *tsk = current; | 904 | struct task_struct *tsk = current; |
| 795 | 905 | ||
| 796 | /* User mode accesses just cause a SIGSEGV */ | 906 | /* User mode accesses just cause a SIGSEGV */ |
| 797 | if (error_code & X86_PF_USER) { | 907 | if (user_mode(regs) && (error_code & X86_PF_USER)) { |
| 798 | /* | 908 | /* |
| 799 | * It's possible to have interrupts off here: | 909 | * It's possible to have interrupts off here: |
| 800 | */ | 910 | */ |
| @@ -821,9 +931,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
| 821 | if (likely(show_unhandled_signals)) | 931 | if (likely(show_unhandled_signals)) |
| 822 | show_signal_msg(regs, error_code, address, tsk); | 932 | show_signal_msg(regs, error_code, address, tsk); |
| 823 | 933 | ||
| 824 | tsk->thread.cr2 = address; | 934 | set_signal_archinfo(address, error_code); |
| 825 | tsk->thread.error_code = error_code; | ||
| 826 | tsk->thread.trap_nr = X86_TRAP_PF; | ||
| 827 | 935 | ||
| 828 | if (si_code == SEGV_PKUERR) | 936 | if (si_code == SEGV_PKUERR) |
| 829 | force_sig_pkuerr((void __user *)address, pkey); | 937 | force_sig_pkuerr((void __user *)address, pkey); |
| @@ -937,9 +1045,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, | |||
| 937 | if (is_prefetch(regs, error_code, address)) | 1045 | if (is_prefetch(regs, error_code, address)) |
| 938 | return; | 1046 | return; |
| 939 | 1047 | ||
| 940 | tsk->thread.cr2 = address; | 1048 | set_signal_archinfo(address, error_code); |
| 941 | tsk->thread.error_code = error_code; | ||
| 942 | tsk->thread.trap_nr = X86_TRAP_PF; | ||
| 943 | 1049 | ||
| 944 | #ifdef CONFIG_MEMORY_FAILURE | 1050 | #ifdef CONFIG_MEMORY_FAILURE |
| 945 | if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { | 1051 | if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { |
| @@ -1148,23 +1254,6 @@ static int fault_in_kernel_space(unsigned long address) | |||
| 1148 | return address >= TASK_SIZE_MAX; | 1254 | return address >= TASK_SIZE_MAX; |
| 1149 | } | 1255 | } |
| 1150 | 1256 | ||
| 1151 | static inline bool smap_violation(int error_code, struct pt_regs *regs) | ||
| 1152 | { | ||
| 1153 | if (!IS_ENABLED(CONFIG_X86_SMAP)) | ||
| 1154 | return false; | ||
| 1155 | |||
| 1156 | if (!static_cpu_has(X86_FEATURE_SMAP)) | ||
| 1157 | return false; | ||
| 1158 | |||
| 1159 | if (error_code & X86_PF_USER) | ||
| 1160 | return false; | ||
| 1161 | |||
| 1162 | if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) | ||
| 1163 | return false; | ||
| 1164 | |||
| 1165 | return true; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | /* | 1257 | /* |
| 1169 | * Called for all faults where 'address' is part of the kernel address | 1258 | * Called for all faults where 'address' is part of the kernel address |
| 1170 | * space. Might get called for faults that originate from *code* that | 1259 | * space. Might get called for faults that originate from *code* that |
| @@ -1230,7 +1319,6 @@ void do_user_addr_fault(struct pt_regs *regs, | |||
| 1230 | unsigned long hw_error_code, | 1319 | unsigned long hw_error_code, |
| 1231 | unsigned long address) | 1320 | unsigned long address) |
| 1232 | { | 1321 | { |
| 1233 | unsigned long sw_error_code; | ||
| 1234 | struct vm_area_struct *vma; | 1322 | struct vm_area_struct *vma; |
| 1235 | struct task_struct *tsk; | 1323 | struct task_struct *tsk; |
| 1236 | struct mm_struct *mm; | 1324 | struct mm_struct *mm; |
| @@ -1252,10 +1340,16 @@ void do_user_addr_fault(struct pt_regs *regs, | |||
| 1252 | pgtable_bad(regs, hw_error_code, address); | 1340 | pgtable_bad(regs, hw_error_code, address); |
| 1253 | 1341 | ||
| 1254 | /* | 1342 | /* |
| 1255 | * Check for invalid kernel (supervisor) access to user | 1343 | * If SMAP is on, check for invalid kernel (supervisor) access to user |
| 1256 | * pages in the user address space. | 1344 | * pages in the user address space. The odd case here is WRUSS, |
| 1345 | * which, according to the preliminary documentation, does not respect | ||
| 1346 | * SMAP and will have the USER bit set so, in all cases, SMAP | ||
| 1347 | * enforcement appears to be consistent with the USER bit. | ||
| 1257 | */ | 1348 | */ |
| 1258 | if (unlikely(smap_violation(hw_error_code, regs))) { | 1349 | if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) && |
| 1350 | !(hw_error_code & X86_PF_USER) && | ||
| 1351 | !(regs->flags & X86_EFLAGS_AC))) | ||
| 1352 | { | ||
| 1259 | bad_area_nosemaphore(regs, hw_error_code, address); | 1353 | bad_area_nosemaphore(regs, hw_error_code, address); |
| 1260 | return; | 1354 | return; |
| 1261 | } | 1355 | } |
| @@ -1270,13 +1364,6 @@ void do_user_addr_fault(struct pt_regs *regs, | |||
| 1270 | } | 1364 | } |
| 1271 | 1365 | ||
| 1272 | /* | 1366 | /* |
| 1273 | * hw_error_code is literally the "page fault error code" passed to | ||
| 1274 | * the kernel directly from the hardware. But, we will shortly be | ||
| 1275 | * modifying it in software, so give it a new name. | ||
| 1276 | */ | ||
| 1277 | sw_error_code = hw_error_code; | ||
| 1278 | |||
| 1279 | /* | ||
| 1280 | * It's safe to allow irq's after cr2 has been saved and the | 1367 | * It's safe to allow irq's after cr2 has been saved and the |
| 1281 | * vmalloc fault has been handled. | 1368 | * vmalloc fault has been handled. |
| 1282 | * | 1369 | * |
| @@ -1285,26 +1372,6 @@ void do_user_addr_fault(struct pt_regs *regs, | |||
| 1285 | */ | 1372 | */ |
| 1286 | if (user_mode(regs)) { | 1373 | if (user_mode(regs)) { |
| 1287 | local_irq_enable(); | 1374 | local_irq_enable(); |
| 1288 | /* | ||
| 1289 | * Up to this point, X86_PF_USER set in hw_error_code | ||
| 1290 | * indicated a user-mode access. But, after this, | ||
| 1291 | * X86_PF_USER in sw_error_code will indicate either | ||
| 1292 | * that, *or* an implicit kernel(supervisor)-mode access | ||
| 1293 | * which originated from user mode. | ||
| 1294 | */ | ||
| 1295 | if (!(hw_error_code & X86_PF_USER)) { | ||
| 1296 | /* | ||
| 1297 | * The CPU was in user mode, but the CPU says | ||
| 1298 | * the fault was not a user-mode access. | ||
| 1299 | * Must be an implicit kernel-mode access, | ||
| 1300 | * which we do not expect to happen in the | ||
| 1301 | * user address space. | ||
| 1302 | */ | ||
| 1303 | pr_warn_once("kernel-mode error from user-mode: %lx\n", | ||
| 1304 | hw_error_code); | ||
| 1305 | |||
| 1306 | sw_error_code |= X86_PF_USER; | ||
| 1307 | } | ||
| 1308 | flags |= FAULT_FLAG_USER; | 1375 | flags |= FAULT_FLAG_USER; |
| 1309 | } else { | 1376 | } else { |
| 1310 | if (regs->flags & X86_EFLAGS_IF) | 1377 | if (regs->flags & X86_EFLAGS_IF) |
| @@ -1313,9 +1380,9 @@ void do_user_addr_fault(struct pt_regs *regs, | |||
| 1313 | 1380 | ||
| 1314 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | 1381 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
| 1315 | 1382 | ||
| 1316 | if (sw_error_code & X86_PF_WRITE) | 1383 | if (hw_error_code & X86_PF_WRITE) |
| 1317 | flags |= FAULT_FLAG_WRITE; | 1384 | flags |= FAULT_FLAG_WRITE; |
| 1318 | if (sw_error_code & X86_PF_INSTR) | 1385 | if (hw_error_code & X86_PF_INSTR) |
| 1319 | flags |= FAULT_FLAG_INSTRUCTION; | 1386 | flags |= FAULT_FLAG_INSTRUCTION; |
| 1320 | 1387 | ||
| 1321 | #ifdef CONFIG_X86_64 | 1388 | #ifdef CONFIG_X86_64 |
| @@ -1328,7 +1395,7 @@ void do_user_addr_fault(struct pt_regs *regs, | |||
| 1328 | * The vsyscall page does not have a "real" VMA, so do this | 1395 | * The vsyscall page does not have a "real" VMA, so do this |
| 1329 | * emulation before we go searching for VMAs. | 1396 | * emulation before we go searching for VMAs. |
| 1330 | */ | 1397 | */ |
| 1331 | if ((sw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) { | 1398 | if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) { |
| 1332 | if (emulate_vsyscall(regs, address)) | 1399 | if (emulate_vsyscall(regs, address)) |
| 1333 | return; | 1400 | return; |
| 1334 | } | 1401 | } |
| @@ -1344,18 +1411,15 @@ void do_user_addr_fault(struct pt_regs *regs, | |||
| 1344 | * Only do the expensive exception table search when we might be at | 1411 | * Only do the expensive exception table search when we might be at |
| 1345 | * risk of a deadlock. This happens if we | 1412 | * risk of a deadlock. This happens if we |
| 1346 | * 1. Failed to acquire mmap_sem, and | 1413 | * 1. Failed to acquire mmap_sem, and |
| 1347 | * 2. The access did not originate in userspace. Note: either the | 1414 | * 2. The access did not originate in userspace. |
| 1348 | * hardware or earlier page fault code may set X86_PF_USER | ||
| 1349 | * in sw_error_code. | ||
| 1350 | */ | 1415 | */ |
| 1351 | if (unlikely(!down_read_trylock(&mm->mmap_sem))) { | 1416 | if (unlikely(!down_read_trylock(&mm->mmap_sem))) { |
| 1352 | if (!(sw_error_code & X86_PF_USER) && | 1417 | if (!user_mode(regs) && !search_exception_tables(regs->ip)) { |
| 1353 | !search_exception_tables(regs->ip)) { | ||
| 1354 | /* | 1418 | /* |
| 1355 | * Fault from code in kernel from | 1419 | * Fault from code in kernel from |
| 1356 | * which we do not expect faults. | 1420 | * which we do not expect faults. |
| 1357 | */ | 1421 | */ |
| 1358 | bad_area_nosemaphore(regs, sw_error_code, address); | 1422 | bad_area_nosemaphore(regs, hw_error_code, address); |
| 1359 | return; | 1423 | return; |
| 1360 | } | 1424 | } |
| 1361 | retry: | 1425 | retry: |
| @@ -1371,29 +1435,17 @@ retry: | |||
| 1371 | 1435 | ||
| 1372 | vma = find_vma(mm, address); | 1436 | vma = find_vma(mm, address); |
| 1373 | if (unlikely(!vma)) { | 1437 | if (unlikely(!vma)) { |
| 1374 | bad_area(regs, sw_error_code, address); | 1438 | bad_area(regs, hw_error_code, address); |
| 1375 | return; | 1439 | return; |
| 1376 | } | 1440 | } |
| 1377 | if (likely(vma->vm_start <= address)) | 1441 | if (likely(vma->vm_start <= address)) |
| 1378 | goto good_area; | 1442 | goto good_area; |
| 1379 | if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { | 1443 | if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { |
| 1380 | bad_area(regs, sw_error_code, address); | 1444 | bad_area(regs, hw_error_code, address); |
| 1381 | return; | 1445 | return; |
| 1382 | } | 1446 | } |
| 1383 | if (sw_error_code & X86_PF_USER) { | ||
| 1384 | /* | ||
| 1385 | * Accessing the stack below %sp is always a bug. | ||
| 1386 | * The large cushion allows instructions like enter | ||
| 1387 | * and pusha to work. ("enter $65535, $31" pushes | ||
| 1388 | * 32 pointers and then decrements %sp by 65535.) | ||
| 1389 | */ | ||
| 1390 | if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { | ||
| 1391 | bad_area(regs, sw_error_code, address); | ||
| 1392 | return; | ||
| 1393 | } | ||
| 1394 | } | ||
| 1395 | if (unlikely(expand_stack(vma, address))) { | 1447 | if (unlikely(expand_stack(vma, address))) { |
| 1396 | bad_area(regs, sw_error_code, address); | 1448 | bad_area(regs, hw_error_code, address); |
| 1397 | return; | 1449 | return; |
| 1398 | } | 1450 | } |
| 1399 | 1451 | ||
| @@ -1402,8 +1454,8 @@ retry: | |||
| 1402 | * we can handle it.. | 1454 | * we can handle it.. |
| 1403 | */ | 1455 | */ |
| 1404 | good_area: | 1456 | good_area: |
| 1405 | if (unlikely(access_error(sw_error_code, vma))) { | 1457 | if (unlikely(access_error(hw_error_code, vma))) { |
| 1406 | bad_area_access_error(regs, sw_error_code, address, vma); | 1458 | bad_area_access_error(regs, hw_error_code, address, vma); |
| 1407 | return; | 1459 | return; |
| 1408 | } | 1460 | } |
| 1409 | 1461 | ||
| @@ -1442,13 +1494,13 @@ good_area: | |||
| 1442 | return; | 1494 | return; |
| 1443 | 1495 | ||
| 1444 | /* Not returning to user mode? Handle exceptions or die: */ | 1496 | /* Not returning to user mode? Handle exceptions or die: */ |
| 1445 | no_context(regs, sw_error_code, address, SIGBUS, BUS_ADRERR); | 1497 | no_context(regs, hw_error_code, address, SIGBUS, BUS_ADRERR); |
| 1446 | return; | 1498 | return; |
| 1447 | } | 1499 | } |
| 1448 | 1500 | ||
| 1449 | up_read(&mm->mmap_sem); | 1501 | up_read(&mm->mmap_sem); |
| 1450 | if (unlikely(fault & VM_FAULT_ERROR)) { | 1502 | if (unlikely(fault & VM_FAULT_ERROR)) { |
| 1451 | mm_fault_error(regs, sw_error_code, address, fault); | 1503 | mm_fault_error(regs, hw_error_code, address, fault); |
| 1452 | return; | 1504 | return; |
| 1453 | } | 1505 | } |
| 1454 | 1506 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5fab264948c2..484c1b92f078 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -432,7 +432,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, | |||
| 432 | E820_TYPE_RAM) && | 432 | E820_TYPE_RAM) && |
| 433 | !e820__mapped_any(paddr & PAGE_MASK, paddr_next, | 433 | !e820__mapped_any(paddr & PAGE_MASK, paddr_next, |
| 434 | E820_TYPE_RESERVED_KERN)) | 434 | E820_TYPE_RESERVED_KERN)) |
| 435 | set_pte(pte, __pte(0)); | 435 | set_pte_safe(pte, __pte(0)); |
| 436 | continue; | 436 | continue; |
| 437 | } | 437 | } |
| 438 | 438 | ||
| @@ -452,7 +452,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, | |||
| 452 | pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, | 452 | pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, |
| 453 | pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); | 453 | pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); |
| 454 | pages++; | 454 | pages++; |
| 455 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); | 455 | set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); |
| 456 | paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; | 456 | paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; |
| 457 | } | 457 | } |
| 458 | 458 | ||
| @@ -487,7 +487,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, | |||
| 487 | E820_TYPE_RAM) && | 487 | E820_TYPE_RAM) && |
| 488 | !e820__mapped_any(paddr & PMD_MASK, paddr_next, | 488 | !e820__mapped_any(paddr & PMD_MASK, paddr_next, |
| 489 | E820_TYPE_RESERVED_KERN)) | 489 | E820_TYPE_RESERVED_KERN)) |
| 490 | set_pmd(pmd, __pmd(0)); | 490 | set_pmd_safe(pmd, __pmd(0)); |
| 491 | continue; | 491 | continue; |
| 492 | } | 492 | } |
| 493 | 493 | ||
| @@ -524,7 +524,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, | |||
| 524 | if (page_size_mask & (1<<PG_LEVEL_2M)) { | 524 | if (page_size_mask & (1<<PG_LEVEL_2M)) { |
| 525 | pages++; | 525 | pages++; |
| 526 | spin_lock(&init_mm.page_table_lock); | 526 | spin_lock(&init_mm.page_table_lock); |
| 527 | set_pte((pte_t *)pmd, | 527 | set_pte_safe((pte_t *)pmd, |
| 528 | pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, | 528 | pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, |
| 529 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | 529 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); |
| 530 | spin_unlock(&init_mm.page_table_lock); | 530 | spin_unlock(&init_mm.page_table_lock); |
| @@ -536,7 +536,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, | |||
| 536 | paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot); | 536 | paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot); |
| 537 | 537 | ||
| 538 | spin_lock(&init_mm.page_table_lock); | 538 | spin_lock(&init_mm.page_table_lock); |
| 539 | pmd_populate_kernel(&init_mm, pmd, pte); | 539 | pmd_populate_kernel_safe(&init_mm, pmd, pte); |
| 540 | spin_unlock(&init_mm.page_table_lock); | 540 | spin_unlock(&init_mm.page_table_lock); |
| 541 | } | 541 | } |
| 542 | update_page_count(PG_LEVEL_2M, pages); | 542 | update_page_count(PG_LEVEL_2M, pages); |
| @@ -573,7 +573,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, | |||
| 573 | E820_TYPE_RAM) && | 573 | E820_TYPE_RAM) && |
| 574 | !e820__mapped_any(paddr & PUD_MASK, paddr_next, | 574 | !e820__mapped_any(paddr & PUD_MASK, paddr_next, |
| 575 | E820_TYPE_RESERVED_KERN)) | 575 | E820_TYPE_RESERVED_KERN)) |
| 576 | set_pud(pud, __pud(0)); | 576 | set_pud_safe(pud, __pud(0)); |
| 577 | continue; | 577 | continue; |
| 578 | } | 578 | } |
| 579 | 579 | ||
| @@ -584,7 +584,6 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, | |||
| 584 | paddr_end, | 584 | paddr_end, |
| 585 | page_size_mask, | 585 | page_size_mask, |
| 586 | prot); | 586 | prot); |
| 587 | __flush_tlb_all(); | ||
| 588 | continue; | 587 | continue; |
| 589 | } | 588 | } |
| 590 | /* | 589 | /* |
| @@ -611,7 +610,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, | |||
| 611 | if (page_size_mask & (1<<PG_LEVEL_1G)) { | 610 | if (page_size_mask & (1<<PG_LEVEL_1G)) { |
| 612 | pages++; | 611 | pages++; |
| 613 | spin_lock(&init_mm.page_table_lock); | 612 | spin_lock(&init_mm.page_table_lock); |
| 614 | set_pte((pte_t *)pud, | 613 | set_pte_safe((pte_t *)pud, |
| 615 | pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, | 614 | pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, |
| 616 | PAGE_KERNEL_LARGE)); | 615 | PAGE_KERNEL_LARGE)); |
| 617 | spin_unlock(&init_mm.page_table_lock); | 616 | spin_unlock(&init_mm.page_table_lock); |
| @@ -624,10 +623,9 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, | |||
| 624 | page_size_mask, prot); | 623 | page_size_mask, prot); |
| 625 | 624 | ||
| 626 | spin_lock(&init_mm.page_table_lock); | 625 | spin_lock(&init_mm.page_table_lock); |
| 627 | pud_populate(&init_mm, pud, pmd); | 626 | pud_populate_safe(&init_mm, pud, pmd); |
| 628 | spin_unlock(&init_mm.page_table_lock); | 627 | spin_unlock(&init_mm.page_table_lock); |
| 629 | } | 628 | } |
| 630 | __flush_tlb_all(); | ||
| 631 | 629 | ||
| 632 | update_page_count(PG_LEVEL_1G, pages); | 630 | update_page_count(PG_LEVEL_1G, pages); |
| 633 | 631 | ||
| @@ -659,7 +657,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, | |||
| 659 | E820_TYPE_RAM) && | 657 | E820_TYPE_RAM) && |
| 660 | !e820__mapped_any(paddr & P4D_MASK, paddr_next, | 658 | !e820__mapped_any(paddr & P4D_MASK, paddr_next, |
| 661 | E820_TYPE_RESERVED_KERN)) | 659 | E820_TYPE_RESERVED_KERN)) |
| 662 | set_p4d(p4d, __p4d(0)); | 660 | set_p4d_safe(p4d, __p4d(0)); |
| 663 | continue; | 661 | continue; |
| 664 | } | 662 | } |
| 665 | 663 | ||
| @@ -668,7 +666,6 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, | |||
| 668 | paddr_last = phys_pud_init(pud, paddr, | 666 | paddr_last = phys_pud_init(pud, paddr, |
| 669 | paddr_end, | 667 | paddr_end, |
| 670 | page_size_mask); | 668 | page_size_mask); |
| 671 | __flush_tlb_all(); | ||
| 672 | continue; | 669 | continue; |
| 673 | } | 670 | } |
| 674 | 671 | ||
| @@ -677,10 +674,9 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, | |||
| 677 | page_size_mask); | 674 | page_size_mask); |
| 678 | 675 | ||
| 679 | spin_lock(&init_mm.page_table_lock); | 676 | spin_lock(&init_mm.page_table_lock); |
| 680 | p4d_populate(&init_mm, p4d, pud); | 677 | p4d_populate_safe(&init_mm, p4d, pud); |
| 681 | spin_unlock(&init_mm.page_table_lock); | 678 | spin_unlock(&init_mm.page_table_lock); |
| 682 | } | 679 | } |
| 683 | __flush_tlb_all(); | ||
| 684 | 680 | ||
| 685 | return paddr_last; | 681 | return paddr_last; |
| 686 | } | 682 | } |
| @@ -723,9 +719,9 @@ kernel_physical_mapping_init(unsigned long paddr_start, | |||
| 723 | 719 | ||
| 724 | spin_lock(&init_mm.page_table_lock); | 720 | spin_lock(&init_mm.page_table_lock); |
| 725 | if (pgtable_l5_enabled()) | 721 | if (pgtable_l5_enabled()) |
| 726 | pgd_populate(&init_mm, pgd, p4d); | 722 | pgd_populate_safe(&init_mm, pgd, p4d); |
| 727 | else | 723 | else |
| 728 | p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); | 724 | p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); |
| 729 | spin_unlock(&init_mm.page_table_lock); | 725 | spin_unlock(&init_mm.page_table_lock); |
| 730 | pgd_changed = true; | 726 | pgd_changed = true; |
| 731 | } | 727 | } |
| @@ -733,8 +729,6 @@ kernel_physical_mapping_init(unsigned long paddr_start, | |||
| 733 | if (pgd_changed) | 729 | if (pgd_changed) |
| 734 | sync_global_pgds(vaddr_start, vaddr_end - 1); | 730 | sync_global_pgds(vaddr_start, vaddr_end - 1); |
| 735 | 731 | ||
| 736 | __flush_tlb_all(); | ||
| 737 | |||
| 738 | return paddr_last; | 732 | return paddr_last; |
| 739 | } | 733 | } |
| 740 | 734 | ||
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index 4e1f6e1b8159..319bde386d5f 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h | |||
| @@ -19,4 +19,6 @@ extern int after_bootmem; | |||
| 19 | 19 | ||
| 20 | void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache); | 20 | void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache); |
| 21 | 21 | ||
| 22 | extern unsigned long tlb_single_page_flush_ceiling; | ||
| 23 | |||
| 22 | #endif /* __X86_MM_INTERNAL_H */ | 24 | #endif /* __X86_MM_INTERNAL_H */ |
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 08f8f76a4852..facce271e8b9 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c | |||
| @@ -23,7 +23,8 @@ | |||
| 23 | static __read_mostly int print = 1; | 23 | static __read_mostly int print = 1; |
| 24 | 24 | ||
| 25 | enum { | 25 | enum { |
| 26 | NTEST = 400, | 26 | NTEST = 3 * 100, |
| 27 | NPAGES = 100, | ||
| 27 | #ifdef CONFIG_X86_64 | 28 | #ifdef CONFIG_X86_64 |
| 28 | LPS = (1 << PMD_SHIFT), | 29 | LPS = (1 << PMD_SHIFT), |
| 29 | #elif defined(CONFIG_X86_PAE) | 30 | #elif defined(CONFIG_X86_PAE) |
| @@ -110,6 +111,9 @@ static int print_split(struct split_state *s) | |||
| 110 | static unsigned long addr[NTEST]; | 111 | static unsigned long addr[NTEST]; |
| 111 | static unsigned int len[NTEST]; | 112 | static unsigned int len[NTEST]; |
| 112 | 113 | ||
| 114 | static struct page *pages[NPAGES]; | ||
| 115 | static unsigned long addrs[NPAGES]; | ||
| 116 | |||
| 113 | /* Change the global bit on random pages in the direct mapping */ | 117 | /* Change the global bit on random pages in the direct mapping */ |
| 114 | static int pageattr_test(void) | 118 | static int pageattr_test(void) |
| 115 | { | 119 | { |
| @@ -120,7 +124,6 @@ static int pageattr_test(void) | |||
| 120 | unsigned int level; | 124 | unsigned int level; |
| 121 | int i, k; | 125 | int i, k; |
| 122 | int err; | 126 | int err; |
| 123 | unsigned long test_addr; | ||
| 124 | 127 | ||
| 125 | if (print) | 128 | if (print) |
| 126 | printk(KERN_INFO "CPA self-test:\n"); | 129 | printk(KERN_INFO "CPA self-test:\n"); |
| @@ -137,7 +140,7 @@ static int pageattr_test(void) | |||
| 137 | unsigned long pfn = prandom_u32() % max_pfn_mapped; | 140 | unsigned long pfn = prandom_u32() % max_pfn_mapped; |
| 138 | 141 | ||
| 139 | addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT); | 142 | addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT); |
| 140 | len[i] = prandom_u32() % 100; | 143 | len[i] = prandom_u32() % NPAGES; |
| 141 | len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1); | 144 | len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1); |
| 142 | 145 | ||
| 143 | if (len[i] == 0) | 146 | if (len[i] == 0) |
| @@ -167,14 +170,29 @@ static int pageattr_test(void) | |||
| 167 | break; | 170 | break; |
| 168 | } | 171 | } |
| 169 | __set_bit(pfn + k, bm); | 172 | __set_bit(pfn + k, bm); |
| 173 | addrs[k] = addr[i] + k*PAGE_SIZE; | ||
| 174 | pages[k] = pfn_to_page(pfn + k); | ||
| 170 | } | 175 | } |
| 171 | if (!addr[i] || !pte || !k) { | 176 | if (!addr[i] || !pte || !k) { |
| 172 | addr[i] = 0; | 177 | addr[i] = 0; |
| 173 | continue; | 178 | continue; |
| 174 | } | 179 | } |
| 175 | 180 | ||
| 176 | test_addr = addr[i]; | 181 | switch (i % 3) { |
| 177 | err = change_page_attr_set(&test_addr, len[i], PAGE_CPA_TEST, 0); | 182 | case 0: |
| 183 | err = change_page_attr_set(&addr[i], len[i], PAGE_CPA_TEST, 0); | ||
| 184 | break; | ||
| 185 | |||
| 186 | case 1: | ||
| 187 | err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); | ||
| 188 | break; | ||
| 189 | |||
| 190 | case 2: | ||
| 191 | err = cpa_set_pages_array(pages, len[i], PAGE_CPA_TEST); | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | |||
| 195 | |||
| 178 | if (err < 0) { | 196 | if (err < 0) { |
| 179 | printk(KERN_ERR "CPA %d failed %d\n", i, err); | 197 | printk(KERN_ERR "CPA %d failed %d\n", i, err); |
| 180 | failed++; | 198 | failed++; |
| @@ -206,8 +224,7 @@ static int pageattr_test(void) | |||
| 206 | failed++; | 224 | failed++; |
| 207 | continue; | 225 | continue; |
| 208 | } | 226 | } |
| 209 | test_addr = addr[i]; | 227 | err = change_page_attr_clear(&addr[i], len[i], PAGE_CPA_TEST, 0); |
| 210 | err = change_page_attr_clear(&test_addr, len[i], PAGE_CPA_TEST, 0); | ||
| 211 | if (err < 0) { | 228 | if (err < 0) { |
| 212 | printk(KERN_ERR "CPA reverting failed: %d\n", err); | 229 | printk(KERN_ERR "CPA reverting failed: %d\n", err); |
| 213 | failed++; | 230 | failed++; |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e44fe1a63f72..4f8972311a77 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -26,6 +26,8 @@ | |||
| 26 | #include <asm/pat.h> | 26 | #include <asm/pat.h> |
| 27 | #include <asm/set_memory.h> | 27 | #include <asm/set_memory.h> |
| 28 | 28 | ||
| 29 | #include "mm_internal.h" | ||
| 30 | |||
| 29 | /* | 31 | /* |
| 30 | * The current flushing context - we pass it instead of 5 arguments: | 32 | * The current flushing context - we pass it instead of 5 arguments: |
| 31 | */ | 33 | */ |
| @@ -35,11 +37,11 @@ struct cpa_data { | |||
| 35 | pgprot_t mask_set; | 37 | pgprot_t mask_set; |
| 36 | pgprot_t mask_clr; | 38 | pgprot_t mask_clr; |
| 37 | unsigned long numpages; | 39 | unsigned long numpages; |
| 38 | int flags; | 40 | unsigned long curpage; |
| 39 | unsigned long pfn; | 41 | unsigned long pfn; |
| 40 | unsigned force_split : 1, | 42 | unsigned int flags; |
| 43 | unsigned int force_split : 1, | ||
| 41 | force_static_prot : 1; | 44 | force_static_prot : 1; |
| 42 | int curpage; | ||
| 43 | struct page **pages; | 45 | struct page **pages; |
| 44 | }; | 46 | }; |
| 45 | 47 | ||
| @@ -228,19 +230,28 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn) | |||
| 228 | 230 | ||
| 229 | #endif | 231 | #endif |
| 230 | 232 | ||
| 233 | static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) | ||
| 234 | { | ||
| 235 | if (cpa->flags & CPA_PAGES_ARRAY) { | ||
| 236 | struct page *page = cpa->pages[idx]; | ||
| 237 | |||
| 238 | if (unlikely(PageHighMem(page))) | ||
| 239 | return 0; | ||
| 240 | |||
| 241 | return (unsigned long)page_address(page); | ||
| 242 | } | ||
| 243 | |||
| 244 | if (cpa->flags & CPA_ARRAY) | ||
| 245 | return cpa->vaddr[idx]; | ||
| 246 | |||
| 247 | return *cpa->vaddr + idx * PAGE_SIZE; | ||
| 248 | } | ||
| 249 | |||
| 231 | /* | 250 | /* |
| 232 | * Flushing functions | 251 | * Flushing functions |
| 233 | */ | 252 | */ |
| 234 | 253 | ||
| 235 | /** | 254 | static void clflush_cache_range_opt(void *vaddr, unsigned int size) |
| 236 | * clflush_cache_range - flush a cache range with clflush | ||
| 237 | * @vaddr: virtual start address | ||
| 238 | * @size: number of bytes to flush | ||
| 239 | * | ||
| 240 | * clflushopt is an unordered instruction which needs fencing with mfence or | ||
| 241 | * sfence to avoid ordering issues. | ||
| 242 | */ | ||
| 243 | void clflush_cache_range(void *vaddr, unsigned int size) | ||
| 244 | { | 255 | { |
| 245 | const unsigned long clflush_size = boot_cpu_data.x86_clflush_size; | 256 | const unsigned long clflush_size = boot_cpu_data.x86_clflush_size; |
| 246 | void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1)); | 257 | void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1)); |
| @@ -249,11 +260,22 @@ void clflush_cache_range(void *vaddr, unsigned int size) | |||
| 249 | if (p >= vend) | 260 | if (p >= vend) |
| 250 | return; | 261 | return; |
| 251 | 262 | ||
| 252 | mb(); | ||
| 253 | |||
| 254 | for (; p < vend; p += clflush_size) | 263 | for (; p < vend; p += clflush_size) |
| 255 | clflushopt(p); | 264 | clflushopt(p); |
| 265 | } | ||
| 256 | 266 | ||
| 267 | /** | ||
| 268 | * clflush_cache_range - flush a cache range with clflush | ||
| 269 | * @vaddr: virtual start address | ||
| 270 | * @size: number of bytes to flush | ||
| 271 | * | ||
| 272 | * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or | ||
| 273 | * SFENCE to avoid ordering issues. | ||
| 274 | */ | ||
| 275 | void clflush_cache_range(void *vaddr, unsigned int size) | ||
| 276 | { | ||
| 277 | mb(); | ||
| 278 | clflush_cache_range_opt(vaddr, size); | ||
| 257 | mb(); | 279 | mb(); |
| 258 | } | 280 | } |
| 259 | EXPORT_SYMBOL_GPL(clflush_cache_range); | 281 | EXPORT_SYMBOL_GPL(clflush_cache_range); |
| @@ -285,87 +307,49 @@ static void cpa_flush_all(unsigned long cache) | |||
| 285 | on_each_cpu(__cpa_flush_all, (void *) cache, 1); | 307 | on_each_cpu(__cpa_flush_all, (void *) cache, 1); |
| 286 | } | 308 | } |
| 287 | 309 | ||
| 288 | static bool __inv_flush_all(int cache) | 310 | void __cpa_flush_tlb(void *data) |
| 289 | { | 311 | { |
| 290 | BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); | 312 | struct cpa_data *cpa = data; |
| 313 | unsigned int i; | ||
| 291 | 314 | ||
| 292 | if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { | 315 | for (i = 0; i < cpa->numpages; i++) |
| 293 | cpa_flush_all(cache); | 316 | __flush_tlb_one_kernel(__cpa_addr(cpa, i)); |
| 294 | return true; | ||
| 295 | } | ||
| 296 | |||
| 297 | return false; | ||
| 298 | } | 317 | } |
| 299 | 318 | ||
| 300 | static void cpa_flush_range(unsigned long start, int numpages, int cache) | 319 | static void cpa_flush(struct cpa_data *data, int cache) |
| 301 | { | 320 | { |
| 302 | unsigned int i, level; | 321 | struct cpa_data *cpa = data; |
| 303 | unsigned long addr; | 322 | unsigned int i; |
| 304 | 323 | ||
| 305 | WARN_ON(PAGE_ALIGN(start) != start); | 324 | BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); |
| 306 | |||
| 307 | if (__inv_flush_all(cache)) | ||
| 308 | return; | ||
| 309 | |||
| 310 | flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages); | ||
| 311 | 325 | ||
| 312 | if (!cache) | 326 | if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { |
| 327 | cpa_flush_all(cache); | ||
| 313 | return; | 328 | return; |
| 314 | |||
| 315 | /* | ||
| 316 | * We only need to flush on one CPU, | ||
| 317 | * clflush is a MESI-coherent instruction that | ||
| 318 | * will cause all other CPUs to flush the same | ||
| 319 | * cachelines: | ||
| 320 | */ | ||
| 321 | for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) { | ||
| 322 | pte_t *pte = lookup_address(addr, &level); | ||
| 323 | |||
| 324 | /* | ||
| 325 | * Only flush present addresses: | ||
| 326 | */ | ||
| 327 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) | ||
| 328 | clflush_cache_range((void *) addr, PAGE_SIZE); | ||
| 329 | } | 329 | } |
| 330 | } | ||
| 331 | 330 | ||
| 332 | static void cpa_flush_array(unsigned long baddr, unsigned long *start, | 331 | if (cpa->numpages <= tlb_single_page_flush_ceiling) |
| 333 | int numpages, int cache, | 332 | on_each_cpu(__cpa_flush_tlb, cpa, 1); |
| 334 | int in_flags, struct page **pages) | 333 | else |
| 335 | { | 334 | flush_tlb_all(); |
| 336 | unsigned int i, level; | ||
| 337 | |||
| 338 | if (__inv_flush_all(cache)) | ||
| 339 | return; | ||
| 340 | |||
| 341 | flush_tlb_all(); | ||
| 342 | 335 | ||
| 343 | if (!cache) | 336 | if (!cache) |
| 344 | return; | 337 | return; |
| 345 | 338 | ||
| 346 | /* | 339 | mb(); |
| 347 | * We only need to flush on one CPU, | 340 | for (i = 0; i < cpa->numpages; i++) { |
| 348 | * clflush is a MESI-coherent instruction that | 341 | unsigned long addr = __cpa_addr(cpa, i); |
| 349 | * will cause all other CPUs to flush the same | 342 | unsigned int level; |
| 350 | * cachelines: | ||
| 351 | */ | ||
| 352 | for (i = 0; i < numpages; i++) { | ||
| 353 | unsigned long addr; | ||
| 354 | pte_t *pte; | ||
| 355 | |||
| 356 | if (in_flags & CPA_PAGES_ARRAY) | ||
| 357 | addr = (unsigned long)page_address(pages[i]); | ||
| 358 | else | ||
| 359 | addr = start[i]; | ||
| 360 | 343 | ||
| 361 | pte = lookup_address(addr, &level); | 344 | pte_t *pte = lookup_address(addr, &level); |
| 362 | 345 | ||
| 363 | /* | 346 | /* |
| 364 | * Only flush present addresses: | 347 | * Only flush present addresses: |
| 365 | */ | 348 | */ |
| 366 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) | 349 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) |
| 367 | clflush_cache_range((void *)addr, PAGE_SIZE); | 350 | clflush_cache_range_opt((void *)addr, PAGE_SIZE); |
| 368 | } | 351 | } |
| 352 | mb(); | ||
| 369 | } | 353 | } |
| 370 | 354 | ||
| 371 | static bool overlaps(unsigned long r1_start, unsigned long r1_end, | 355 | static bool overlaps(unsigned long r1_start, unsigned long r1_end, |
| @@ -1476,15 +1460,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) | |||
| 1476 | unsigned int level; | 1460 | unsigned int level; |
| 1477 | pte_t *kpte, old_pte; | 1461 | pte_t *kpte, old_pte; |
| 1478 | 1462 | ||
| 1479 | if (cpa->flags & CPA_PAGES_ARRAY) { | 1463 | address = __cpa_addr(cpa, cpa->curpage); |
| 1480 | struct page *page = cpa->pages[cpa->curpage]; | ||
| 1481 | if (unlikely(PageHighMem(page))) | ||
| 1482 | return 0; | ||
| 1483 | address = (unsigned long)page_address(page); | ||
| 1484 | } else if (cpa->flags & CPA_ARRAY) | ||
| 1485 | address = cpa->vaddr[cpa->curpage]; | ||
| 1486 | else | ||
| 1487 | address = *cpa->vaddr; | ||
| 1488 | repeat: | 1464 | repeat: |
| 1489 | kpte = _lookup_address_cpa(cpa, address, &level); | 1465 | kpte = _lookup_address_cpa(cpa, address, &level); |
| 1490 | if (!kpte) | 1466 | if (!kpte) |
| @@ -1565,22 +1541,14 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 1565 | * No need to redo, when the primary call touched the direct | 1541 | * No need to redo, when the primary call touched the direct |
| 1566 | * mapping already: | 1542 | * mapping already: |
| 1567 | */ | 1543 | */ |
| 1568 | if (cpa->flags & CPA_PAGES_ARRAY) { | 1544 | vaddr = __cpa_addr(cpa, cpa->curpage); |
| 1569 | struct page *page = cpa->pages[cpa->curpage]; | ||
| 1570 | if (unlikely(PageHighMem(page))) | ||
| 1571 | return 0; | ||
| 1572 | vaddr = (unsigned long)page_address(page); | ||
| 1573 | } else if (cpa->flags & CPA_ARRAY) | ||
| 1574 | vaddr = cpa->vaddr[cpa->curpage]; | ||
| 1575 | else | ||
| 1576 | vaddr = *cpa->vaddr; | ||
| 1577 | |||
| 1578 | if (!(within(vaddr, PAGE_OFFSET, | 1545 | if (!(within(vaddr, PAGE_OFFSET, |
| 1579 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { | 1546 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { |
| 1580 | 1547 | ||
| 1581 | alias_cpa = *cpa; | 1548 | alias_cpa = *cpa; |
| 1582 | alias_cpa.vaddr = &laddr; | 1549 | alias_cpa.vaddr = &laddr; |
| 1583 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | 1550 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); |
| 1551 | alias_cpa.curpage = 0; | ||
| 1584 | 1552 | ||
| 1585 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | 1553 | ret = __change_page_attr_set_clr(&alias_cpa, 0); |
| 1586 | if (ret) | 1554 | if (ret) |
| @@ -1600,6 +1568,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 1600 | alias_cpa = *cpa; | 1568 | alias_cpa = *cpa; |
| 1601 | alias_cpa.vaddr = &temp_cpa_vaddr; | 1569 | alias_cpa.vaddr = &temp_cpa_vaddr; |
| 1602 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | 1570 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); |
| 1571 | alias_cpa.curpage = 0; | ||
| 1603 | 1572 | ||
| 1604 | /* | 1573 | /* |
| 1605 | * The high mapping range is imprecise, so ignore the | 1574 | * The high mapping range is imprecise, so ignore the |
| @@ -1615,14 +1584,15 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 1615 | static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | 1584 | static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) |
| 1616 | { | 1585 | { |
| 1617 | unsigned long numpages = cpa->numpages; | 1586 | unsigned long numpages = cpa->numpages; |
| 1618 | int ret; | 1587 | unsigned long rempages = numpages; |
| 1588 | int ret = 0; | ||
| 1619 | 1589 | ||
| 1620 | while (numpages) { | 1590 | while (rempages) { |
| 1621 | /* | 1591 | /* |
| 1622 | * Store the remaining nr of pages for the large page | 1592 | * Store the remaining nr of pages for the large page |
| 1623 | * preservation check. | 1593 | * preservation check. |
| 1624 | */ | 1594 | */ |
| 1625 | cpa->numpages = numpages; | 1595 | cpa->numpages = rempages; |
| 1626 | /* for array changes, we can't use large page */ | 1596 | /* for array changes, we can't use large page */ |
| 1627 | if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) | 1597 | if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) |
| 1628 | cpa->numpages = 1; | 1598 | cpa->numpages = 1; |
| @@ -1633,12 +1603,12 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
| 1633 | if (!debug_pagealloc_enabled()) | 1603 | if (!debug_pagealloc_enabled()) |
| 1634 | spin_unlock(&cpa_lock); | 1604 | spin_unlock(&cpa_lock); |
| 1635 | if (ret) | 1605 | if (ret) |
| 1636 | return ret; | 1606 | goto out; |
| 1637 | 1607 | ||
| 1638 | if (checkalias) { | 1608 | if (checkalias) { |
| 1639 | ret = cpa_process_alias(cpa); | 1609 | ret = cpa_process_alias(cpa); |
| 1640 | if (ret) | 1610 | if (ret) |
| 1641 | return ret; | 1611 | goto out; |
| 1642 | } | 1612 | } |
| 1643 | 1613 | ||
| 1644 | /* | 1614 | /* |
| @@ -1646,15 +1616,15 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
| 1646 | * CPA operation. Either a large page has been | 1616 | * CPA operation. Either a large page has been |
| 1647 | * preserved or a single page update happened. | 1617 | * preserved or a single page update happened. |
| 1648 | */ | 1618 | */ |
| 1649 | BUG_ON(cpa->numpages > numpages || !cpa->numpages); | 1619 | BUG_ON(cpa->numpages > rempages || !cpa->numpages); |
| 1650 | numpages -= cpa->numpages; | 1620 | rempages -= cpa->numpages; |
| 1651 | if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) | 1621 | cpa->curpage += cpa->numpages; |
| 1652 | cpa->curpage++; | ||
| 1653 | else | ||
| 1654 | *cpa->vaddr += cpa->numpages * PAGE_SIZE; | ||
| 1655 | |||
| 1656 | } | 1622 | } |
| 1657 | return 0; | 1623 | |
| 1624 | out: | ||
| 1625 | /* Restore the original numpages */ | ||
| 1626 | cpa->numpages = numpages; | ||
| 1627 | return ret; | ||
| 1658 | } | 1628 | } |
| 1659 | 1629 | ||
| 1660 | /* | 1630 | /* |
| @@ -1687,7 +1657,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
| 1687 | { | 1657 | { |
| 1688 | struct cpa_data cpa; | 1658 | struct cpa_data cpa; |
| 1689 | int ret, cache, checkalias; | 1659 | int ret, cache, checkalias; |
| 1690 | unsigned long baddr = 0; | ||
| 1691 | 1660 | ||
| 1692 | memset(&cpa, 0, sizeof(cpa)); | 1661 | memset(&cpa, 0, sizeof(cpa)); |
| 1693 | 1662 | ||
| @@ -1721,11 +1690,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
| 1721 | */ | 1690 | */ |
| 1722 | WARN_ON_ONCE(1); | 1691 | WARN_ON_ONCE(1); |
| 1723 | } | 1692 | } |
| 1724 | /* | ||
| 1725 | * Save address for cache flush. *addr is modified in the call | ||
| 1726 | * to __change_page_attr_set_clr() below. | ||
| 1727 | */ | ||
| 1728 | baddr = make_addr_canonical_again(*addr); | ||
| 1729 | } | 1693 | } |
| 1730 | 1694 | ||
| 1731 | /* Must avoid aliasing mappings in the highmem code */ | 1695 | /* Must avoid aliasing mappings in the highmem code */ |
| @@ -1773,13 +1737,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
| 1773 | goto out; | 1737 | goto out; |
| 1774 | } | 1738 | } |
| 1775 | 1739 | ||
| 1776 | if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { | 1740 | cpa_flush(&cpa, cache); |
| 1777 | cpa_flush_array(baddr, addr, numpages, cache, | ||
| 1778 | cpa.flags, pages); | ||
| 1779 | } else { | ||
| 1780 | cpa_flush_range(baddr, numpages, cache); | ||
| 1781 | } | ||
| 1782 | |||
| 1783 | out: | 1741 | out: |
| 1784 | return ret; | 1742 | return ret; |
| 1785 | } | 1743 | } |
| @@ -1850,14 +1808,14 @@ out_err: | |||
| 1850 | } | 1808 | } |
| 1851 | EXPORT_SYMBOL(set_memory_uc); | 1809 | EXPORT_SYMBOL(set_memory_uc); |
| 1852 | 1810 | ||
| 1853 | static int _set_memory_array(unsigned long *addr, int addrinarray, | 1811 | static int _set_memory_array(unsigned long *addr, int numpages, |
| 1854 | enum page_cache_mode new_type) | 1812 | enum page_cache_mode new_type) |
| 1855 | { | 1813 | { |
| 1856 | enum page_cache_mode set_type; | 1814 | enum page_cache_mode set_type; |
| 1857 | int i, j; | 1815 | int i, j; |
| 1858 | int ret; | 1816 | int ret; |
| 1859 | 1817 | ||
| 1860 | for (i = 0; i < addrinarray; i++) { | 1818 | for (i = 0; i < numpages; i++) { |
| 1861 | ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, | 1819 | ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, |
| 1862 | new_type, NULL); | 1820 | new_type, NULL); |
| 1863 | if (ret) | 1821 | if (ret) |
| @@ -1868,11 +1826,11 @@ static int _set_memory_array(unsigned long *addr, int addrinarray, | |||
| 1868 | set_type = (new_type == _PAGE_CACHE_MODE_WC) ? | 1826 | set_type = (new_type == _PAGE_CACHE_MODE_WC) ? |
| 1869 | _PAGE_CACHE_MODE_UC_MINUS : new_type; | 1827 | _PAGE_CACHE_MODE_UC_MINUS : new_type; |
| 1870 | 1828 | ||
| 1871 | ret = change_page_attr_set(addr, addrinarray, | 1829 | ret = change_page_attr_set(addr, numpages, |
| 1872 | cachemode2pgprot(set_type), 1); | 1830 | cachemode2pgprot(set_type), 1); |
| 1873 | 1831 | ||
| 1874 | if (!ret && new_type == _PAGE_CACHE_MODE_WC) | 1832 | if (!ret && new_type == _PAGE_CACHE_MODE_WC) |
| 1875 | ret = change_page_attr_set_clr(addr, addrinarray, | 1833 | ret = change_page_attr_set_clr(addr, numpages, |
| 1876 | cachemode2pgprot( | 1834 | cachemode2pgprot( |
| 1877 | _PAGE_CACHE_MODE_WC), | 1835 | _PAGE_CACHE_MODE_WC), |
| 1878 | __pgprot(_PAGE_CACHE_MASK), | 1836 | __pgprot(_PAGE_CACHE_MASK), |
| @@ -1889,36 +1847,34 @@ out_free: | |||
| 1889 | return ret; | 1847 | return ret; |
| 1890 | } | 1848 | } |
| 1891 | 1849 | ||
| 1892 | int set_memory_array_uc(unsigned long *addr, int addrinarray) | 1850 | int set_memory_array_uc(unsigned long *addr, int numpages) |
| 1893 | { | 1851 | { |
| 1894 | return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_UC_MINUS); | 1852 | return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_UC_MINUS); |
| 1895 | } | 1853 | } |
| 1896 | EXPORT_SYMBOL(set_memory_array_uc); | 1854 | EXPORT_SYMBOL(set_memory_array_uc); |
| 1897 | 1855 | ||
| 1898 | int set_memory_array_wc(unsigned long *addr, int addrinarray) | 1856 | int set_memory_array_wc(unsigned long *addr, int numpages) |
| 1899 | { | 1857 | { |
| 1900 | return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WC); | 1858 | return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_WC); |
| 1901 | } | 1859 | } |
| 1902 | EXPORT_SYMBOL(set_memory_array_wc); | 1860 | EXPORT_SYMBOL(set_memory_array_wc); |
| 1903 | 1861 | ||
| 1904 | int set_memory_array_wt(unsigned long *addr, int addrinarray) | 1862 | int set_memory_array_wt(unsigned long *addr, int numpages) |
| 1905 | { | 1863 | { |
| 1906 | return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT); | 1864 | return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_WT); |
| 1907 | } | 1865 | } |
| 1908 | EXPORT_SYMBOL_GPL(set_memory_array_wt); | 1866 | EXPORT_SYMBOL_GPL(set_memory_array_wt); |
| 1909 | 1867 | ||
| 1910 | int _set_memory_wc(unsigned long addr, int numpages) | 1868 | int _set_memory_wc(unsigned long addr, int numpages) |
| 1911 | { | 1869 | { |
| 1912 | int ret; | 1870 | int ret; |
| 1913 | unsigned long addr_copy = addr; | ||
| 1914 | 1871 | ||
| 1915 | ret = change_page_attr_set(&addr, numpages, | 1872 | ret = change_page_attr_set(&addr, numpages, |
| 1916 | cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), | 1873 | cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), |
| 1917 | 0); | 1874 | 0); |
| 1918 | if (!ret) { | 1875 | if (!ret) { |
| 1919 | ret = change_page_attr_set_clr(&addr_copy, numpages, | 1876 | ret = change_page_attr_set_clr(&addr, numpages, |
| 1920 | cachemode2pgprot( | 1877 | cachemode2pgprot(_PAGE_CACHE_MODE_WC), |
| 1921 | _PAGE_CACHE_MODE_WC), | ||
| 1922 | __pgprot(_PAGE_CACHE_MASK), | 1878 | __pgprot(_PAGE_CACHE_MASK), |
| 1923 | 0, 0, NULL); | 1879 | 0, 0, NULL); |
| 1924 | } | 1880 | } |
| @@ -1985,18 +1941,18 @@ int set_memory_wb(unsigned long addr, int numpages) | |||
| 1985 | } | 1941 | } |
| 1986 | EXPORT_SYMBOL(set_memory_wb); | 1942 | EXPORT_SYMBOL(set_memory_wb); |
| 1987 | 1943 | ||
| 1988 | int set_memory_array_wb(unsigned long *addr, int addrinarray) | 1944 | int set_memory_array_wb(unsigned long *addr, int numpages) |
| 1989 | { | 1945 | { |
| 1990 | int i; | 1946 | int i; |
| 1991 | int ret; | 1947 | int ret; |
| 1992 | 1948 | ||
| 1993 | /* WB cache mode is hard wired to all cache attribute bits being 0 */ | 1949 | /* WB cache mode is hard wired to all cache attribute bits being 0 */ |
| 1994 | ret = change_page_attr_clear(addr, addrinarray, | 1950 | ret = change_page_attr_clear(addr, numpages, |
| 1995 | __pgprot(_PAGE_CACHE_MASK), 1); | 1951 | __pgprot(_PAGE_CACHE_MASK), 1); |
| 1996 | if (ret) | 1952 | if (ret) |
| 1997 | return ret; | 1953 | return ret; |
| 1998 | 1954 | ||
| 1999 | for (i = 0; i < addrinarray; i++) | 1955 | for (i = 0; i < numpages; i++) |
| 2000 | free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE); | 1956 | free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE); |
| 2001 | 1957 | ||
| 2002 | return 0; | 1958 | return 0; |
| @@ -2066,7 +2022,6 @@ int set_memory_global(unsigned long addr, int numpages) | |||
| 2066 | static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) | 2022 | static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) |
| 2067 | { | 2023 | { |
| 2068 | struct cpa_data cpa; | 2024 | struct cpa_data cpa; |
| 2069 | unsigned long start; | ||
| 2070 | int ret; | 2025 | int ret; |
| 2071 | 2026 | ||
| 2072 | /* Nothing to do if memory encryption is not active */ | 2027 | /* Nothing to do if memory encryption is not active */ |
| @@ -2077,8 +2032,6 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) | |||
| 2077 | if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr)) | 2032 | if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr)) |
| 2078 | addr &= PAGE_MASK; | 2033 | addr &= PAGE_MASK; |
| 2079 | 2034 | ||
| 2080 | start = addr; | ||
| 2081 | |||
| 2082 | memset(&cpa, 0, sizeof(cpa)); | 2035 | memset(&cpa, 0, sizeof(cpa)); |
| 2083 | cpa.vaddr = &addr; | 2036 | cpa.vaddr = &addr; |
| 2084 | cpa.numpages = numpages; | 2037 | cpa.numpages = numpages; |
| @@ -2093,18 +2046,18 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) | |||
| 2093 | /* | 2046 | /* |
| 2094 | * Before changing the encryption attribute, we need to flush caches. | 2047 | * Before changing the encryption attribute, we need to flush caches. |
| 2095 | */ | 2048 | */ |
| 2096 | cpa_flush_range(start, numpages, 1); | 2049 | cpa_flush(&cpa, 1); |
| 2097 | 2050 | ||
| 2098 | ret = __change_page_attr_set_clr(&cpa, 1); | 2051 | ret = __change_page_attr_set_clr(&cpa, 1); |
| 2099 | 2052 | ||
| 2100 | /* | 2053 | /* |
| 2101 | * After changing the encryption attribute, we need to flush TLBs | 2054 | * After changing the encryption attribute, we need to flush TLBs again |
| 2102 | * again in case any speculative TLB caching occurred (but no need | 2055 | * in case any speculative TLB caching occurred (but no need to flush |
| 2103 | * to flush caches again). We could just use cpa_flush_all(), but | 2056 | * caches again). We could just use cpa_flush_all(), but in case TLB |
| 2104 | * in case TLB flushing gets optimized in the cpa_flush_range() | 2057 | * flushing gets optimized in the cpa_flush() path use the same logic |
| 2105 | * path use the same logic as above. | 2058 | * as above. |
| 2106 | */ | 2059 | */ |
| 2107 | cpa_flush_range(start, numpages, 0); | 2060 | cpa_flush(&cpa, 0); |
| 2108 | 2061 | ||
| 2109 | return ret; | 2062 | return ret; |
| 2110 | } | 2063 | } |
| @@ -2129,7 +2082,7 @@ int set_pages_uc(struct page *page, int numpages) | |||
| 2129 | } | 2082 | } |
| 2130 | EXPORT_SYMBOL(set_pages_uc); | 2083 | EXPORT_SYMBOL(set_pages_uc); |
| 2131 | 2084 | ||
| 2132 | static int _set_pages_array(struct page **pages, int addrinarray, | 2085 | static int _set_pages_array(struct page **pages, int numpages, |
| 2133 | enum page_cache_mode new_type) | 2086 | enum page_cache_mode new_type) |
| 2134 | { | 2087 | { |
| 2135 | unsigned long start; | 2088 | unsigned long start; |
| @@ -2139,7 +2092,7 @@ static int _set_pages_array(struct page **pages, int addrinarray, | |||
| 2139 | int free_idx; | 2092 | int free_idx; |
| 2140 | int ret; | 2093 | int ret; |
| 2141 | 2094 | ||
| 2142 | for (i = 0; i < addrinarray; i++) { | 2095 | for (i = 0; i < numpages; i++) { |
| 2143 | if (PageHighMem(pages[i])) | 2096 | if (PageHighMem(pages[i])) |
| 2144 | continue; | 2097 | continue; |
| 2145 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; | 2098 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; |
| @@ -2152,10 +2105,10 @@ static int _set_pages_array(struct page **pages, int addrinarray, | |||
| 2152 | set_type = (new_type == _PAGE_CACHE_MODE_WC) ? | 2105 | set_type = (new_type == _PAGE_CACHE_MODE_WC) ? |
| 2153 | _PAGE_CACHE_MODE_UC_MINUS : new_type; | 2106 | _PAGE_CACHE_MODE_UC_MINUS : new_type; |
| 2154 | 2107 | ||
| 2155 | ret = cpa_set_pages_array(pages, addrinarray, | 2108 | ret = cpa_set_pages_array(pages, numpages, |
| 2156 | cachemode2pgprot(set_type)); | 2109 | cachemode2pgprot(set_type)); |
| 2157 | if (!ret && new_type == _PAGE_CACHE_MODE_WC) | 2110 | if (!ret && new_type == _PAGE_CACHE_MODE_WC) |
| 2158 | ret = change_page_attr_set_clr(NULL, addrinarray, | 2111 | ret = change_page_attr_set_clr(NULL, numpages, |
| 2159 | cachemode2pgprot( | 2112 | cachemode2pgprot( |
| 2160 | _PAGE_CACHE_MODE_WC), | 2113 | _PAGE_CACHE_MODE_WC), |
| 2161 | __pgprot(_PAGE_CACHE_MASK), | 2114 | __pgprot(_PAGE_CACHE_MASK), |
| @@ -2175,21 +2128,21 @@ err_out: | |||
| 2175 | return -EINVAL; | 2128 | return -EINVAL; |
| 2176 | } | 2129 | } |
| 2177 | 2130 | ||
| 2178 | int set_pages_array_uc(struct page **pages, int addrinarray) | 2131 | int set_pages_array_uc(struct page **pages, int numpages) |
| 2179 | { | 2132 | { |
| 2180 | return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_UC_MINUS); | 2133 | return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS); |
| 2181 | } | 2134 | } |
| 2182 | EXPORT_SYMBOL(set_pages_array_uc); | 2135 | EXPORT_SYMBOL(set_pages_array_uc); |
| 2183 | 2136 | ||
| 2184 | int set_pages_array_wc(struct page **pages, int addrinarray) | 2137 | int set_pages_array_wc(struct page **pages, int numpages) |
| 2185 | { | 2138 | { |
| 2186 | return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WC); | 2139 | return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC); |
| 2187 | } | 2140 | } |
| 2188 | EXPORT_SYMBOL(set_pages_array_wc); | 2141 | EXPORT_SYMBOL(set_pages_array_wc); |
| 2189 | 2142 | ||
| 2190 | int set_pages_array_wt(struct page **pages, int addrinarray) | 2143 | int set_pages_array_wt(struct page **pages, int numpages) |
| 2191 | { | 2144 | { |
| 2192 | return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT); | 2145 | return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT); |
| 2193 | } | 2146 | } |
| 2194 | EXPORT_SYMBOL_GPL(set_pages_array_wt); | 2147 | EXPORT_SYMBOL_GPL(set_pages_array_wt); |
| 2195 | 2148 | ||
| @@ -2201,7 +2154,7 @@ int set_pages_wb(struct page *page, int numpages) | |||
| 2201 | } | 2154 | } |
| 2202 | EXPORT_SYMBOL(set_pages_wb); | 2155 | EXPORT_SYMBOL(set_pages_wb); |
| 2203 | 2156 | ||
| 2204 | int set_pages_array_wb(struct page **pages, int addrinarray) | 2157 | int set_pages_array_wb(struct page **pages, int numpages) |
| 2205 | { | 2158 | { |
| 2206 | int retval; | 2159 | int retval; |
| 2207 | unsigned long start; | 2160 | unsigned long start; |
| @@ -2209,12 +2162,12 @@ int set_pages_array_wb(struct page **pages, int addrinarray) | |||
| 2209 | int i; | 2162 | int i; |
| 2210 | 2163 | ||
| 2211 | /* WB cache mode is hard wired to all cache attribute bits being 0 */ | 2164 | /* WB cache mode is hard wired to all cache attribute bits being 0 */ |
| 2212 | retval = cpa_clear_pages_array(pages, addrinarray, | 2165 | retval = cpa_clear_pages_array(pages, numpages, |
| 2213 | __pgprot(_PAGE_CACHE_MASK)); | 2166 | __pgprot(_PAGE_CACHE_MASK)); |
| 2214 | if (retval) | 2167 | if (retval) |
| 2215 | return retval; | 2168 | return retval; |
| 2216 | 2169 | ||
| 2217 | for (i = 0; i < addrinarray; i++) { | 2170 | for (i = 0; i < numpages; i++) { |
| 2218 | if (PageHighMem(pages[i])) | 2171 | if (PageHighMem(pages[i])) |
| 2219 | continue; | 2172 | continue; |
| 2220 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; | 2173 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 03b6b4c2238d..999d6d8f0bef 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | #include <asm/apic.h> | 15 | #include <asm/apic.h> |
| 16 | #include <asm/uv/uv.h> | 16 | #include <asm/uv/uv.h> |
| 17 | 17 | ||
| 18 | #include "mm_internal.h" | ||
| 19 | |||
| 18 | /* | 20 | /* |
| 19 | * TLB flushing, formerly SMP-only | 21 | * TLB flushing, formerly SMP-only |
| 20 | * c/o Linus Torvalds. | 22 | * c/o Linus Torvalds. |
| @@ -721,7 +723,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
| 721 | * | 723 | * |
| 722 | * This is in units of pages. | 724 | * This is in units of pages. |
| 723 | */ | 725 | */ |
| 724 | static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; | 726 | unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; |
| 725 | 727 | ||
| 726 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | 728 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, |
| 727 | unsigned long end, unsigned int stride_shift, | 729 | unsigned long end, unsigned int stride_shift, |
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h index 73474bb52344..bb6cb347018c 100644 --- a/include/asm-generic/5level-fixup.h +++ b/include/asm-generic/5level-fixup.h | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #define p4d_clear(p4d) pgd_clear(p4d) | 26 | #define p4d_clear(p4d) pgd_clear(p4d) |
| 27 | #define p4d_val(p4d) pgd_val(p4d) | 27 | #define p4d_val(p4d) pgd_val(p4d) |
| 28 | #define p4d_populate(mm, p4d, pud) pgd_populate(mm, p4d, pud) | 28 | #define p4d_populate(mm, p4d, pud) pgd_populate(mm, p4d, pud) |
| 29 | #define p4d_populate_safe(mm, p4d, pud) pgd_populate(mm, p4d, pud) | ||
| 29 | #define p4d_page(p4d) pgd_page(p4d) | 30 | #define p4d_page(p4d) pgd_page(p4d) |
| 30 | #define p4d_page_vaddr(p4d) pgd_page_vaddr(p4d) | 31 | #define p4d_page_vaddr(p4d) pgd_page_vaddr(p4d) |
| 31 | 32 | ||
diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h index 1d6dd38c0e5e..829bdb0d6327 100644 --- a/include/asm-generic/pgtable-nop4d-hack.h +++ b/include/asm-generic/pgtable-nop4d-hack.h | |||
| @@ -31,6 +31,7 @@ static inline void pgd_clear(pgd_t *pgd) { } | |||
| 31 | #define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) | 31 | #define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) |
| 32 | 32 | ||
| 33 | #define pgd_populate(mm, pgd, pud) do { } while (0) | 33 | #define pgd_populate(mm, pgd, pud) do { } while (0) |
| 34 | #define pgd_populate_safe(mm, pgd, pud) do { } while (0) | ||
| 34 | /* | 35 | /* |
| 35 | * (puds are folded into pgds so this doesn't get actually called, | 36 | * (puds are folded into pgds so this doesn't get actually called, |
| 36 | * but the define is needed for a generic inline function.) | 37 | * but the define is needed for a generic inline function.) |
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h index 04cb913797bc..aebab905e6cd 100644 --- a/include/asm-generic/pgtable-nop4d.h +++ b/include/asm-generic/pgtable-nop4d.h | |||
| @@ -26,6 +26,7 @@ static inline void pgd_clear(pgd_t *pgd) { } | |||
| 26 | #define p4d_ERROR(p4d) (pgd_ERROR((p4d).pgd)) | 26 | #define p4d_ERROR(p4d) (pgd_ERROR((p4d).pgd)) |
| 27 | 27 | ||
| 28 | #define pgd_populate(mm, pgd, p4d) do { } while (0) | 28 | #define pgd_populate(mm, pgd, p4d) do { } while (0) |
| 29 | #define pgd_populate_safe(mm, pgd, p4d) do { } while (0) | ||
| 29 | /* | 30 | /* |
| 30 | * (p4ds are folded into pgds so this doesn't get actually called, | 31 | * (p4ds are folded into pgds so this doesn't get actually called, |
| 31 | * but the define is needed for a generic inline function.) | 32 | * but the define is needed for a generic inline function.) |
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 9bef475db6fe..c77a1d301155 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h | |||
| @@ -35,6 +35,7 @@ static inline void p4d_clear(p4d_t *p4d) { } | |||
| 35 | #define pud_ERROR(pud) (p4d_ERROR((pud).p4d)) | 35 | #define pud_ERROR(pud) (p4d_ERROR((pud).p4d)) |
| 36 | 36 | ||
| 37 | #define p4d_populate(mm, p4d, pud) do { } while (0) | 37 | #define p4d_populate(mm, p4d, pud) do { } while (0) |
| 38 | #define p4d_populate_safe(mm, p4d, pud) do { } while (0) | ||
| 38 | /* | 39 | /* |
| 39 | * (puds are folded into p4ds so this doesn't get actually called, | 40 | * (puds are folded into p4ds so this doesn't get actually called, |
| 40 | * but the define is needed for a generic inline function.) | 41 | * but the define is needed for a generic inline function.) |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 359fb935ded6..a9cac82e9a7a 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
| @@ -375,7 +375,6 @@ static inline int pte_unused(pte_t pte) | |||
| 375 | #endif | 375 | #endif |
| 376 | 376 | ||
| 377 | #ifndef __HAVE_ARCH_PMD_SAME | 377 | #ifndef __HAVE_ARCH_PMD_SAME |
| 378 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 379 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | 378 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) |
| 380 | { | 379 | { |
| 381 | return pmd_val(pmd_a) == pmd_val(pmd_b); | 380 | return pmd_val(pmd_a) == pmd_val(pmd_b); |
| @@ -385,21 +384,60 @@ static inline int pud_same(pud_t pud_a, pud_t pud_b) | |||
| 385 | { | 384 | { |
| 386 | return pud_val(pud_a) == pud_val(pud_b); | 385 | return pud_val(pud_a) == pud_val(pud_b); |
| 387 | } | 386 | } |
| 388 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ | 387 | #endif |
| 389 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | 388 | |
| 389 | #ifndef __HAVE_ARCH_P4D_SAME | ||
| 390 | static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b) | ||
| 390 | { | 391 | { |
| 391 | BUILD_BUG(); | 392 | return p4d_val(p4d_a) == p4d_val(p4d_b); |
| 392 | return 0; | ||
| 393 | } | 393 | } |
| 394 | #endif | ||
| 394 | 395 | ||
| 395 | static inline int pud_same(pud_t pud_a, pud_t pud_b) | 396 | #ifndef __HAVE_ARCH_PGD_SAME |
| 397 | static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) | ||
| 396 | { | 398 | { |
| 397 | BUILD_BUG(); | 399 | return pgd_val(pgd_a) == pgd_val(pgd_b); |
| 398 | return 0; | ||
| 399 | } | 400 | } |
| 400 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
| 401 | #endif | 401 | #endif |
| 402 | 402 | ||
| 403 | /* | ||
| 404 | * Use set_p*_safe(), and elide TLB flushing, when confident that *no* | ||
| 405 | * TLB flush will be required as a result of the "set". For example, use | ||
| 406 | * in scenarios where it is known ahead of time that the routine is | ||
| 407 | * setting non-present entries, or re-setting an existing entry to the | ||
| 408 | * same value. Otherwise, use the typical "set" helpers and flush the | ||
| 409 | * TLB. | ||
| 410 | */ | ||
| 411 | #define set_pte_safe(ptep, pte) \ | ||
| 412 | ({ \ | ||
| 413 | WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \ | ||
| 414 | set_pte(ptep, pte); \ | ||
| 415 | }) | ||
| 416 | |||
| 417 | #define set_pmd_safe(pmdp, pmd) \ | ||
| 418 | ({ \ | ||
| 419 | WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \ | ||
| 420 | set_pmd(pmdp, pmd); \ | ||
| 421 | }) | ||
| 422 | |||
| 423 | #define set_pud_safe(pudp, pud) \ | ||
| 424 | ({ \ | ||
| 425 | WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \ | ||
| 426 | set_pud(pudp, pud); \ | ||
| 427 | }) | ||
| 428 | |||
| 429 | #define set_p4d_safe(p4dp, p4d) \ | ||
| 430 | ({ \ | ||
| 431 | WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ | ||
| 432 | set_p4d(p4dp, p4d); \ | ||
| 433 | }) | ||
| 434 | |||
| 435 | #define set_pgd_safe(pgdp, pgd) \ | ||
| 436 | ({ \ | ||
| 437 | WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ | ||
| 438 | set_pgd(pgdp, pgd); \ | ||
| 439 | }) | ||
| 440 | |||
| 403 | #ifndef __HAVE_ARCH_DO_SWAP_PAGE | 441 | #ifndef __HAVE_ARCH_DO_SWAP_PAGE |
| 404 | /* | 442 | /* |
| 405 | * Some architectures support metadata associated with a page. When a | 443 | * Some architectures support metadata associated with a page. When a |
