aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 14:16:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 14:16:05 -0400
commite831101a73fbc8339ef1d1909dad3ef64f089e70 (patch)
treec764ca5cb72cdf24ff26357dd12e16f9c7235627 /arch/arm64/mm
parentf9abf53af4c78b08da44d841d23308c4f4d74c83 (diff)
parentfd6380b75065fd2ff51b5f7cbbe6be77d71ea9c7 (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - Kexec support for arm64 - Kprobes support - Expose MIDR_EL1 and REVIDR_EL1 CPU identification registers to sysfs - Trapping of user space cache maintenance operations and emulation in the kernel (CPU errata workaround) - Clean-up of the early page tables creation (kernel linear mapping, EFI run-time maps) to avoid splitting larger blocks (e.g. pmds) into smaller ones (e.g. ptes) - VDSO support for CLOCK_MONOTONIC_RAW in clock_gettime() - ARCH_HAS_KCOV enabled for arm64 - Optimise IP checksum helpers - SWIOTLB optimisation to only allocate/initialise the buffer if the available RAM is beyond the 32-bit mask - Properly handle the "nosmp" command line argument - Fix for the initialisation of the CPU debug state during early boot - vdso-offsets.h build dependency workaround - Build fix when RANDOMIZE_BASE is enabled with MODULES off * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (64 commits) arm64: arm: Fix-up the removal of the arm64 regs_query_register_name() prototype arm64: Only select ARM64_MODULE_PLTS if MODULES=y arm64: mm: run pgtable_page_ctor() on non-swapper translation table pages arm64: mm: make create_mapping_late() non-allocating arm64: Honor nosmp kernel command line option arm64: Fix incorrect per-cpu usage for boot CPU arm64: kprobes: Add KASAN instrumentation around stack accesses arm64: kprobes: Cleanup jprobe_return arm64: kprobes: Fix overflow when saving stack arm64: kprobes: WARN if attempting to step with PSTATE.D=1 arm64: debug: remove unused local_dbg_{enable, disable} macros arm64: debug: remove redundant spsr manipulation arm64: debug: unmask PSTATE.D earlier arm64: localise Image objcopy flags arm64: ptrace: remove extra define for CPSR's E bit kprobes: Add arm64 case in kprobe example module arm64: Add kernel return probes support (kretprobes) arm64: Add trampoline code for kretprobes arm64: kprobes instruction simulation support arm64: Treat all entry code as non-kprobe-able ...
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r--arch/arm64/mm/cache.S2
-rw-r--r--arch/arm64/mm/dma-mapping.c37
-rw-r--r--arch/arm64/mm/dump.c32
-rw-r--r--arch/arm64/mm/fault.c41
-rw-r--r--arch/arm64/mm/init.c13
-rw-r--r--arch/arm64/mm/mmu.c154
-rw-r--r--arch/arm64/mm/proc.S2
7 files changed, 129 insertions, 152 deletions
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 50ff9ba3a236..07d7352d7c38 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -52,7 +52,7 @@ ENTRY(__flush_cache_user_range)
52 sub x3, x2, #1 52 sub x3, x2, #1
53 bic x4, x0, x3 53 bic x4, x0, x3
541: 541:
55USER(9f, dc cvau, x4 ) // clean D line to PoU 55user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
56 add x4, x4, x2 56 add x4, x4, x2
57 cmp x4, x1 57 cmp x4, x1
58 b.lo 1b 58 b.lo 1b
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c566ec83719f..f6c55afab3e2 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -19,6 +19,7 @@
19 19
20#include <linux/gfp.h> 20#include <linux/gfp.h>
21#include <linux/acpi.h> 21#include <linux/acpi.h>
22#include <linux/bootmem.h>
22#include <linux/export.h> 23#include <linux/export.h>
23#include <linux/slab.h> 24#include <linux/slab.h>
24#include <linux/genalloc.h> 25#include <linux/genalloc.h>
@@ -29,6 +30,8 @@
29 30
30#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
31 32
33static int swiotlb __read_mostly;
34
32static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, 35static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
33 bool coherent) 36 bool coherent)
34{ 37{
@@ -341,6 +344,13 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
341 return ret; 344 return ret;
342} 345}
343 346
347static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
348{
349 if (swiotlb)
350 return swiotlb_dma_supported(hwdev, mask);
351 return 1;
352}
353
344static struct dma_map_ops swiotlb_dma_ops = { 354static struct dma_map_ops swiotlb_dma_ops = {
345 .alloc = __dma_alloc, 355 .alloc = __dma_alloc,
346 .free = __dma_free, 356 .free = __dma_free,
@@ -354,7 +364,7 @@ static struct dma_map_ops swiotlb_dma_ops = {
354 .sync_single_for_device = __swiotlb_sync_single_for_device, 364 .sync_single_for_device = __swiotlb_sync_single_for_device,
355 .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, 365 .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
356 .sync_sg_for_device = __swiotlb_sync_sg_for_device, 366 .sync_sg_for_device = __swiotlb_sync_sg_for_device,
357 .dma_supported = swiotlb_dma_supported, 367 .dma_supported = __swiotlb_dma_supported,
358 .mapping_error = swiotlb_dma_mapping_error, 368 .mapping_error = swiotlb_dma_mapping_error,
359}; 369};
360 370
@@ -513,6 +523,9 @@ EXPORT_SYMBOL(dummy_dma_ops);
513 523
514static int __init arm64_dma_init(void) 524static int __init arm64_dma_init(void)
515{ 525{
526 if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
527 swiotlb = 1;
528
516 return atomic_pool_init(); 529 return atomic_pool_init();
517} 530}
518arch_initcall(arm64_dma_init); 531arch_initcall(arm64_dma_init);
@@ -848,15 +861,16 @@ static int __iommu_attach_notifier(struct notifier_block *nb,
848{ 861{
849 struct iommu_dma_notifier_data *master, *tmp; 862 struct iommu_dma_notifier_data *master, *tmp;
850 863
851 if (action != BUS_NOTIFY_ADD_DEVICE) 864 if (action != BUS_NOTIFY_BIND_DRIVER)
852 return 0; 865 return 0;
853 866
854 mutex_lock(&iommu_dma_notifier_lock); 867 mutex_lock(&iommu_dma_notifier_lock);
855 list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { 868 list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) {
856 if (do_iommu_attach(master->dev, master->ops, 869 if (data == master->dev && do_iommu_attach(master->dev,
857 master->dma_base, master->size)) { 870 master->ops, master->dma_base, master->size)) {
858 list_del(&master->list); 871 list_del(&master->list);
859 kfree(master); 872 kfree(master);
873 break;
860 } 874 }
861 } 875 }
862 mutex_unlock(&iommu_dma_notifier_lock); 876 mutex_unlock(&iommu_dma_notifier_lock);
@@ -870,17 +884,8 @@ static int __init register_iommu_dma_ops_notifier(struct bus_type *bus)
870 884
871 if (!nb) 885 if (!nb)
872 return -ENOMEM; 886 return -ENOMEM;
873 /* 887
874 * The device must be attached to a domain before the driver probe
875 * routine gets a chance to start allocating DMA buffers. However,
876 * the IOMMU driver also needs a chance to configure the iommu_group
877 * via its add_device callback first, so we need to make the attach
878 * happen between those two points. Since the IOMMU core uses a bus
879 * notifier with default priority for add_device, do the same but
880 * with a lower priority to ensure the appropriate ordering.
881 */
882 nb->notifier_call = __iommu_attach_notifier; 888 nb->notifier_call = __iommu_attach_notifier;
883 nb->priority = -100;
884 889
885 ret = bus_register_notifier(bus, nb); 890 ret = bus_register_notifier(bus, nb);
886 if (ret) { 891 if (ret) {
@@ -904,10 +909,6 @@ static int __init __iommu_dma_init(void)
904 if (!ret) 909 if (!ret)
905 ret = register_iommu_dma_ops_notifier(&pci_bus_type); 910 ret = register_iommu_dma_ops_notifier(&pci_bus_type);
906#endif 911#endif
907
908 /* handle devices queued before this arch_initcall */
909 if (!ret)
910 __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL);
911 return ret; 912 return ret;
912} 913}
913arch_initcall(__iommu_dma_init); 914arch_initcall(__iommu_dma_init);
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index ccfde237d6e6..f94b80eb295d 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -27,11 +27,7 @@
27#include <asm/memory.h> 27#include <asm/memory.h>
28#include <asm/pgtable.h> 28#include <asm/pgtable.h>
29#include <asm/pgtable-hwdef.h> 29#include <asm/pgtable-hwdef.h>
30 30#include <asm/ptdump.h>
31struct addr_marker {
32 unsigned long start_address;
33 const char *name;
34};
35 31
36static const struct addr_marker address_markers[] = { 32static const struct addr_marker address_markers[] = {
37#ifdef CONFIG_KASAN 33#ifdef CONFIG_KASAN
@@ -290,7 +286,8 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
290 } 286 }
291} 287}
292 288
293static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) 289static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
290 unsigned long start)
294{ 291{
295 pgd_t *pgd = pgd_offset(mm, 0UL); 292 pgd_t *pgd = pgd_offset(mm, 0UL);
296 unsigned i; 293 unsigned i;
@@ -309,12 +306,13 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long st
309 306
310static int ptdump_show(struct seq_file *m, void *v) 307static int ptdump_show(struct seq_file *m, void *v)
311{ 308{
309 struct ptdump_info *info = m->private;
312 struct pg_state st = { 310 struct pg_state st = {
313 .seq = m, 311 .seq = m,
314 .marker = address_markers, 312 .marker = info->markers,
315 }; 313 };
316 314
317 walk_pgd(&st, &init_mm, VA_START); 315 walk_pgd(&st, info->mm, info->base_addr);
318 316
319 note_page(&st, 0, 0, 0); 317 note_page(&st, 0, 0, 0);
320 return 0; 318 return 0;
@@ -322,7 +320,7 @@ static int ptdump_show(struct seq_file *m, void *v)
322 320
323static int ptdump_open(struct inode *inode, struct file *file) 321static int ptdump_open(struct inode *inode, struct file *file)
324{ 322{
325 return single_open(file, ptdump_show, NULL); 323 return single_open(file, ptdump_show, inode->i_private);
326} 324}
327 325
328static const struct file_operations ptdump_fops = { 326static const struct file_operations ptdump_fops = {
@@ -332,7 +330,7 @@ static const struct file_operations ptdump_fops = {
332 .release = single_release, 330 .release = single_release,
333}; 331};
334 332
335static int ptdump_init(void) 333int ptdump_register(struct ptdump_info *info, const char *name)
336{ 334{
337 struct dentry *pe; 335 struct dentry *pe;
338 unsigned i, j; 336 unsigned i, j;
@@ -342,8 +340,18 @@ static int ptdump_init(void)
342 for (j = 0; j < pg_level[i].num; j++) 340 for (j = 0; j < pg_level[i].num; j++)
343 pg_level[i].mask |= pg_level[i].bits[j].mask; 341 pg_level[i].mask |= pg_level[i].bits[j].mask;
344 342
345 pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, 343 pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
346 &ptdump_fops);
347 return pe ? 0 : -ENOMEM; 344 return pe ? 0 : -ENOMEM;
348} 345}
346
347static struct ptdump_info kernel_ptdump_info = {
348 .mm = &init_mm,
349 .markers = address_markers,
350 .base_addr = VA_START,
351};
352
353static int ptdump_init(void)
354{
355 return ptdump_register(&kernel_ptdump_info, "kernel_page_tables");
356}
349device_initcall(ptdump_init); 357device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 031820d989a8..c8beaa0da7df 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -41,6 +41,28 @@
41 41
42static const char *fault_name(unsigned int esr); 42static const char *fault_name(unsigned int esr);
43 43
44#ifdef CONFIG_KPROBES
45static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
46{
47 int ret = 0;
48
49 /* kprobe_running() needs smp_processor_id() */
50 if (!user_mode(regs)) {
51 preempt_disable();
52 if (kprobe_running() && kprobe_fault_handler(regs, esr))
53 ret = 1;
54 preempt_enable();
55 }
56
57 return ret;
58}
59#else
60static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
61{
62 return 0;
63}
64#endif
65
44/* 66/*
45 * Dump out the page tables associated with 'addr' in mm 'mm'. 67 * Dump out the page tables associated with 'addr' in mm 'mm'.
46 */ 68 */
@@ -202,8 +224,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
202#define VM_FAULT_BADMAP 0x010000 224#define VM_FAULT_BADMAP 0x010000
203#define VM_FAULT_BADACCESS 0x020000 225#define VM_FAULT_BADACCESS 0x020000
204 226
205#define ESR_LNX_EXEC (1 << 24)
206
207static int __do_page_fault(struct mm_struct *mm, unsigned long addr, 227static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
208 unsigned int mm_flags, unsigned long vm_flags, 228 unsigned int mm_flags, unsigned long vm_flags,
209 struct task_struct *tsk) 229 struct task_struct *tsk)
@@ -242,14 +262,19 @@ out:
242 return fault; 262 return fault;
243} 263}
244 264
245static inline int permission_fault(unsigned int esr) 265static inline bool is_permission_fault(unsigned int esr)
246{ 266{
247 unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; 267 unsigned int ec = ESR_ELx_EC(esr);
248 unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; 268 unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
249 269
250 return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); 270 return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
251} 271}
252 272
273static bool is_el0_instruction_abort(unsigned int esr)
274{
275 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
276}
277
253static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, 278static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
254 struct pt_regs *regs) 279 struct pt_regs *regs)
255{ 280{
@@ -259,6 +284,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
259 unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; 284 unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
260 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 285 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
261 286
287 if (notify_page_fault(regs, esr))
288 return 0;
289
262 tsk = current; 290 tsk = current;
263 mm = tsk->mm; 291 mm = tsk->mm;
264 292
@@ -272,14 +300,14 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
272 if (user_mode(regs)) 300 if (user_mode(regs))
273 mm_flags |= FAULT_FLAG_USER; 301 mm_flags |= FAULT_FLAG_USER;
274 302
275 if (esr & ESR_LNX_EXEC) { 303 if (is_el0_instruction_abort(esr)) {
276 vm_flags = VM_EXEC; 304 vm_flags = VM_EXEC;
277 } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { 305 } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
278 vm_flags = VM_WRITE; 306 vm_flags = VM_WRITE;
279 mm_flags |= FAULT_FLAG_WRITE; 307 mm_flags |= FAULT_FLAG_WRITE;
280 } 308 }
281 309
282 if (permission_fault(esr) && (addr < USER_DS)) { 310 if (is_permission_fault(esr) && (addr < USER_DS)) {
283 /* regs->orig_addr_limit may be 0 if we entered from EL0 */ 311 /* regs->orig_addr_limit may be 0 if we entered from EL0 */
284 if (regs->orig_addr_limit == KERNEL_DS) 312 if (regs->orig_addr_limit == KERNEL_DS)
285 die("Accessing user space memory with fs=KERNEL_DS", regs, esr); 313 die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
@@ -630,6 +658,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
630 658
631 return rv; 659 return rv;
632} 660}
661NOKPROBE_SYMBOL(do_debug_exception);
633 662
634#ifdef CONFIG_ARM64_PAN 663#ifdef CONFIG_ARM64_PAN
635void cpu_enable_pan(void *__unused) 664void cpu_enable_pan(void *__unused)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d45f8627012c..2ade7a6a10a7 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -160,12 +160,10 @@ static void __init arm64_memory_present(void)
160static void __init arm64_memory_present(void) 160static void __init arm64_memory_present(void)
161{ 161{
162 struct memblock_region *reg; 162 struct memblock_region *reg;
163 int nid = 0;
164 163
165 for_each_memblock(memory, reg) { 164 for_each_memblock(memory, reg) {
166#ifdef CONFIG_NUMA 165 int nid = memblock_get_region_node(reg);
167 nid = reg->nid; 166
168#endif
169 memory_present(nid, memblock_region_memory_base_pfn(reg), 167 memory_present(nid, memblock_region_memory_base_pfn(reg),
170 memblock_region_memory_end_pfn(reg)); 168 memblock_region_memory_end_pfn(reg));
171 } 169 }
@@ -403,7 +401,8 @@ static void __init free_unused_memmap(void)
403 */ 401 */
404void __init mem_init(void) 402void __init mem_init(void)
405{ 403{
406 swiotlb_init(1); 404 if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
405 swiotlb_init(1);
407 406
408 set_max_mapnr(pfn_to_page(max_pfn) - mem_map); 407 set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
409 408
@@ -430,9 +429,9 @@ void __init mem_init(void)
430 pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", 429 pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n",
431 MLG(VMALLOC_START, VMALLOC_END)); 430 MLG(VMALLOC_START, VMALLOC_END));
432 pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", 431 pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n",
433 MLK_ROUNDUP(_text, __start_rodata)); 432 MLK_ROUNDUP(_text, _etext));
434 pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", 433 pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n",
435 MLK_ROUNDUP(__start_rodata, _etext)); 434 MLK_ROUNDUP(__start_rodata, __init_begin));
436 pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", 435 pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n",
437 MLK_ROUNDUP(__init_begin, __init_end)); 436 MLK_ROUNDUP(__init_begin, __init_end));
438 pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", 437 pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0f85a46c3e18..51a558195bb9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -77,7 +77,6 @@ static phys_addr_t __init early_pgtable_alloc(void)
77 void *ptr; 77 void *ptr;
78 78
79 phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 79 phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
80 BUG_ON(!phys);
81 80
82 /* 81 /*
83 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE 82 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
@@ -97,24 +96,6 @@ static phys_addr_t __init early_pgtable_alloc(void)
97 return phys; 96 return phys;
98} 97}
99 98
100/*
101 * remap a PMD into pages
102 */
103static void split_pmd(pmd_t *pmd, pte_t *pte)
104{
105 unsigned long pfn = pmd_pfn(*pmd);
106 int i = 0;
107
108 do {
109 /*
110 * Need to have the least restrictive permissions available
111 * permissions will be fixed up later
112 */
113 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
114 pfn++;
115 } while (pte++, i++, i < PTRS_PER_PTE);
116}
117
118static void alloc_init_pte(pmd_t *pmd, unsigned long addr, 99static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
119 unsigned long end, unsigned long pfn, 100 unsigned long end, unsigned long pfn,
120 pgprot_t prot, 101 pgprot_t prot,
@@ -122,15 +103,13 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
122{ 103{
123 pte_t *pte; 104 pte_t *pte;
124 105
125 if (pmd_none(*pmd) || pmd_sect(*pmd)) { 106 BUG_ON(pmd_sect(*pmd));
107 if (pmd_none(*pmd)) {
126 phys_addr_t pte_phys; 108 phys_addr_t pte_phys;
127 BUG_ON(!pgtable_alloc); 109 BUG_ON(!pgtable_alloc);
128 pte_phys = pgtable_alloc(); 110 pte_phys = pgtable_alloc();
129 pte = pte_set_fixmap(pte_phys); 111 pte = pte_set_fixmap(pte_phys);
130 if (pmd_sect(*pmd))
131 split_pmd(pmd, pte);
132 __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); 112 __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
133 flush_tlb_all();
134 pte_clear_fixmap(); 113 pte_clear_fixmap();
135 } 114 }
136 BUG_ON(pmd_bad(*pmd)); 115 BUG_ON(pmd_bad(*pmd));
@@ -144,41 +123,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
144 pte_clear_fixmap(); 123 pte_clear_fixmap();
145} 124}
146 125
147static void split_pud(pud_t *old_pud, pmd_t *pmd)
148{
149 unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
150 pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
151 int i = 0;
152
153 do {
154 set_pmd(pmd, __pmd(addr | pgprot_val(prot)));
155 addr += PMD_SIZE;
156 } while (pmd++, i++, i < PTRS_PER_PMD);
157}
158
159#ifdef CONFIG_DEBUG_PAGEALLOC
160static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
161{
162
163 /*
164 * If debug_page_alloc is enabled we must map the linear map
165 * using pages. However, other mappings created by
166 * create_mapping_noalloc must use sections in some cases. Allow
167 * sections to be used in those cases, where no pgtable_alloc
168 * function is provided.
169 */
170 return !pgtable_alloc || !debug_pagealloc_enabled();
171}
172#else
173static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
174{
175 return true;
176}
177#endif
178
179static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, 126static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
180 phys_addr_t phys, pgprot_t prot, 127 phys_addr_t phys, pgprot_t prot,
181 phys_addr_t (*pgtable_alloc)(void)) 128 phys_addr_t (*pgtable_alloc)(void),
129 bool allow_block_mappings)
182{ 130{
183 pmd_t *pmd; 131 pmd_t *pmd;
184 unsigned long next; 132 unsigned long next;
@@ -186,20 +134,13 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
186 /* 134 /*
187 * Check for initial section mappings in the pgd/pud and remove them. 135 * Check for initial section mappings in the pgd/pud and remove them.
188 */ 136 */
189 if (pud_none(*pud) || pud_sect(*pud)) { 137 BUG_ON(pud_sect(*pud));
138 if (pud_none(*pud)) {
190 phys_addr_t pmd_phys; 139 phys_addr_t pmd_phys;
191 BUG_ON(!pgtable_alloc); 140 BUG_ON(!pgtable_alloc);
192 pmd_phys = pgtable_alloc(); 141 pmd_phys = pgtable_alloc();
193 pmd = pmd_set_fixmap(pmd_phys); 142 pmd = pmd_set_fixmap(pmd_phys);
194 if (pud_sect(*pud)) {
195 /*
196 * need to have the 1G of mappings continue to be
197 * present
198 */
199 split_pud(pud, pmd);
200 }
201 __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); 143 __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
202 flush_tlb_all();
203 pmd_clear_fixmap(); 144 pmd_clear_fixmap();
204 } 145 }
205 BUG_ON(pud_bad(*pud)); 146 BUG_ON(pud_bad(*pud));
@@ -209,7 +150,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
209 next = pmd_addr_end(addr, end); 150 next = pmd_addr_end(addr, end);
210 /* try section mapping first */ 151 /* try section mapping first */
211 if (((addr | next | phys) & ~SECTION_MASK) == 0 && 152 if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
212 block_mappings_allowed(pgtable_alloc)) { 153 allow_block_mappings) {
213 pmd_t old_pmd =*pmd; 154 pmd_t old_pmd =*pmd;
214 pmd_set_huge(pmd, phys, prot); 155 pmd_set_huge(pmd, phys, prot);
215 /* 156 /*
@@ -248,7 +189,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
248 189
249static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, 190static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
250 phys_addr_t phys, pgprot_t prot, 191 phys_addr_t phys, pgprot_t prot,
251 phys_addr_t (*pgtable_alloc)(void)) 192 phys_addr_t (*pgtable_alloc)(void),
193 bool allow_block_mappings)
252{ 194{
253 pud_t *pud; 195 pud_t *pud;
254 unsigned long next; 196 unsigned long next;
@@ -268,8 +210,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
268 /* 210 /*
269 * For 4K granule only, attempt to put down a 1GB block 211 * For 4K granule only, attempt to put down a 1GB block
270 */ 212 */
271 if (use_1G_block(addr, next, phys) && 213 if (use_1G_block(addr, next, phys) && allow_block_mappings) {
272 block_mappings_allowed(pgtable_alloc)) {
273 pud_t old_pud = *pud; 214 pud_t old_pud = *pud;
274 pud_set_huge(pud, phys, prot); 215 pud_set_huge(pud, phys, prot);
275 216
@@ -290,7 +231,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
290 } 231 }
291 } else { 232 } else {
292 alloc_init_pmd(pud, addr, next, phys, prot, 233 alloc_init_pmd(pud, addr, next, phys, prot,
293 pgtable_alloc); 234 pgtable_alloc, allow_block_mappings);
294 } 235 }
295 phys += next - addr; 236 phys += next - addr;
296 } while (pud++, addr = next, addr != end); 237 } while (pud++, addr = next, addr != end);
@@ -298,15 +239,14 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
298 pud_clear_fixmap(); 239 pud_clear_fixmap();
299} 240}
300 241
301/* 242static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
302 * Create the page directory entries and any necessary page tables for the 243 unsigned long virt, phys_addr_t size,
303 * mapping specified by 'md'. 244 pgprot_t prot,
304 */ 245 phys_addr_t (*pgtable_alloc)(void),
305static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, 246 bool allow_block_mappings)
306 phys_addr_t size, pgprot_t prot,
307 phys_addr_t (*pgtable_alloc)(void))
308{ 247{
309 unsigned long addr, length, end, next; 248 unsigned long addr, length, end, next;
249 pgd_t *pgd = pgd_offset_raw(pgdir, virt);
310 250
311 /* 251 /*
312 * If the virtual and physical address don't have the same offset 252 * If the virtual and physical address don't have the same offset
@@ -322,29 +262,23 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
322 end = addr + length; 262 end = addr + length;
323 do { 263 do {
324 next = pgd_addr_end(addr, end); 264 next = pgd_addr_end(addr, end);
325 alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); 265 alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
266 allow_block_mappings);
326 phys += next - addr; 267 phys += next - addr;
327 } while (pgd++, addr = next, addr != end); 268 } while (pgd++, addr = next, addr != end);
328} 269}
329 270
330static phys_addr_t late_pgtable_alloc(void) 271static phys_addr_t pgd_pgtable_alloc(void)
331{ 272{
332 void *ptr = (void *)__get_free_page(PGALLOC_GFP); 273 void *ptr = (void *)__get_free_page(PGALLOC_GFP);
333 BUG_ON(!ptr); 274 if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
275 BUG();
334 276
335 /* Ensure the zeroed page is visible to the page table walker */ 277 /* Ensure the zeroed page is visible to the page table walker */
336 dsb(ishst); 278 dsb(ishst);
337 return __pa(ptr); 279 return __pa(ptr);
338} 280}
339 281
340static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
341 unsigned long virt, phys_addr_t size,
342 pgprot_t prot,
343 phys_addr_t (*alloc)(void))
344{
345 init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
346}
347
348/* 282/*
349 * This function can only be used to modify existing table entries, 283 * This function can only be used to modify existing table entries,
350 * without allocating new levels of table. Note that this permits the 284 * without allocating new levels of table. Note that this permits the
@@ -358,16 +292,17 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
358 &phys, virt); 292 &phys, virt);
359 return; 293 return;
360 } 294 }
361 __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, 295 __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true);
362 NULL);
363} 296}
364 297
365void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, 298void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
366 unsigned long virt, phys_addr_t size, 299 unsigned long virt, phys_addr_t size,
367 pgprot_t prot) 300 pgprot_t prot, bool allow_block_mappings)
368{ 301{
302 BUG_ON(mm == &init_mm);
303
369 __create_pgd_mapping(mm->pgd, phys, virt, size, prot, 304 __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
370 late_pgtable_alloc); 305 pgd_pgtable_alloc, allow_block_mappings);
371} 306}
372 307
373static void create_mapping_late(phys_addr_t phys, unsigned long virt, 308static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -380,51 +315,54 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
380 } 315 }
381 316
382 __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, 317 __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
383 late_pgtable_alloc); 318 NULL, !debug_pagealloc_enabled());
384} 319}
385 320
386static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) 321static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
387{ 322{
388 unsigned long kernel_start = __pa(_text); 323 unsigned long kernel_start = __pa(_text);
389 unsigned long kernel_end = __pa(_etext); 324 unsigned long kernel_end = __pa(__init_begin);
390 325
391 /* 326 /*
392 * Take care not to create a writable alias for the 327 * Take care not to create a writable alias for the
393 * read-only text and rodata sections of the kernel image. 328 * read-only text and rodata sections of the kernel image.
394 */ 329 */
395 330
396 /* No overlap with the kernel text */ 331 /* No overlap with the kernel text/rodata */
397 if (end < kernel_start || start >= kernel_end) { 332 if (end < kernel_start || start >= kernel_end) {
398 __create_pgd_mapping(pgd, start, __phys_to_virt(start), 333 __create_pgd_mapping(pgd, start, __phys_to_virt(start),
399 end - start, PAGE_KERNEL, 334 end - start, PAGE_KERNEL,
400 early_pgtable_alloc); 335 early_pgtable_alloc,
336 !debug_pagealloc_enabled());
401 return; 337 return;
402 } 338 }
403 339
404 /* 340 /*
405 * This block overlaps the kernel text mapping. 341 * This block overlaps the kernel text/rodata mappings.
406 * Map the portion(s) which don't overlap. 342 * Map the portion(s) which don't overlap.
407 */ 343 */
408 if (start < kernel_start) 344 if (start < kernel_start)
409 __create_pgd_mapping(pgd, start, 345 __create_pgd_mapping(pgd, start,
410 __phys_to_virt(start), 346 __phys_to_virt(start),
411 kernel_start - start, PAGE_KERNEL, 347 kernel_start - start, PAGE_KERNEL,
412 early_pgtable_alloc); 348 early_pgtable_alloc,
349 !debug_pagealloc_enabled());
413 if (kernel_end < end) 350 if (kernel_end < end)
414 __create_pgd_mapping(pgd, kernel_end, 351 __create_pgd_mapping(pgd, kernel_end,
415 __phys_to_virt(kernel_end), 352 __phys_to_virt(kernel_end),
416 end - kernel_end, PAGE_KERNEL, 353 end - kernel_end, PAGE_KERNEL,
417 early_pgtable_alloc); 354 early_pgtable_alloc,
355 !debug_pagealloc_enabled());
418 356
419 /* 357 /*
420 * Map the linear alias of the [_text, _etext) interval as 358 * Map the linear alias of the [_text, __init_begin) interval as
421 * read-only/non-executable. This makes the contents of the 359 * read-only/non-executable. This makes the contents of the
422 * region accessible to subsystems such as hibernate, but 360 * region accessible to subsystems such as hibernate, but
423 * protects it from inadvertent modification or execution. 361 * protects it from inadvertent modification or execution.
424 */ 362 */
425 __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), 363 __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
426 kernel_end - kernel_start, PAGE_KERNEL_RO, 364 kernel_end - kernel_start, PAGE_KERNEL_RO,
427 early_pgtable_alloc); 365 early_pgtable_alloc, !debug_pagealloc_enabled());
428} 366}
429 367
430static void __init map_mem(pgd_t *pgd) 368static void __init map_mem(pgd_t *pgd)
@@ -449,14 +387,14 @@ void mark_rodata_ro(void)
449{ 387{
450 unsigned long section_size; 388 unsigned long section_size;
451 389
452 section_size = (unsigned long)__start_rodata - (unsigned long)_text; 390 section_size = (unsigned long)_etext - (unsigned long)_text;
453 create_mapping_late(__pa(_text), (unsigned long)_text, 391 create_mapping_late(__pa(_text), (unsigned long)_text,
454 section_size, PAGE_KERNEL_ROX); 392 section_size, PAGE_KERNEL_ROX);
455 /* 393 /*
456 * mark .rodata as read only. Use _etext rather than __end_rodata to 394 * mark .rodata as read only. Use __init_begin rather than __end_rodata
457 * cover NOTES and EXCEPTION_TABLE. 395 * to cover NOTES and EXCEPTION_TABLE.
458 */ 396 */
459 section_size = (unsigned long)_etext - (unsigned long)__start_rodata; 397 section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
460 create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, 398 create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
461 section_size, PAGE_KERNEL_RO); 399 section_size, PAGE_KERNEL_RO);
462} 400}
@@ -481,7 +419,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
481 BUG_ON(!PAGE_ALIGNED(size)); 419 BUG_ON(!PAGE_ALIGNED(size));
482 420
483 __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, 421 __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
484 early_pgtable_alloc); 422 early_pgtable_alloc, !debug_pagealloc_enabled());
485 423
486 vma->addr = va_start; 424 vma->addr = va_start;
487 vma->phys_addr = pa_start; 425 vma->phys_addr = pa_start;
@@ -499,8 +437,8 @@ static void __init map_kernel(pgd_t *pgd)
499{ 437{
500 static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; 438 static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
501 439
502 map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text); 440 map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
503 map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata); 441 map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata);
504 map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, 442 map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
505 &vmlinux_init); 443 &vmlinux_init);
506 map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); 444 map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c4317879b938..5bb61de23201 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -180,6 +180,8 @@ ENTRY(__cpu_setup)
180 msr cpacr_el1, x0 // Enable FP/ASIMD 180 msr cpacr_el1, x0 // Enable FP/ASIMD
181 mov x0, #1 << 12 // Reset mdscr_el1 and disable 181 mov x0, #1 << 12 // Reset mdscr_el1 and disable
182 msr mdscr_el1, x0 // access to the DCC from EL0 182 msr mdscr_el1, x0 // access to the DCC from EL0
183 isb // Unmask debug exceptions now,
184 enable_dbg // since this is per-cpu
183 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 185 reset_pmuserenr_el0 x0 // Disable PMU access from EL0
184 /* 186 /*
185 * Memory region attributes for LPAE: 187 * Memory region attributes for LPAE: