diff options
| -rw-r--r-- | arch/arm64/Makefile | 3 | ||||
| -rw-r--r-- | arch/arm64/include/asm/cacheflush.h | 2 | ||||
| -rw-r--r-- | arch/arm64/include/asm/module.h | 46 | ||||
| -rw-r--r-- | arch/arm64/kernel/Makefile | 3 | ||||
| -rw-r--r-- | arch/arm64/kernel/cpu_ops.c | 6 | ||||
| -rw-r--r-- | arch/arm64/kernel/fpsimd.c | 6 | ||||
| -rw-r--r-- | arch/arm64/kernel/ftrace-mod.S | 18 | ||||
| -rw-r--r-- | arch/arm64/kernel/ftrace.c | 14 | ||||
| -rw-r--r-- | arch/arm64/kernel/module-plts.c | 50 | ||||
| -rw-r--r-- | arch/arm64/kernel/module.lds | 1 | ||||
| -rw-r--r-- | arch/arm64/kernel/perf_event.c | 6 | ||||
| -rw-r--r-- | arch/arm64/mm/context.c | 28 | ||||
| -rw-r--r-- | arch/arm64/mm/pgd.c | 2 |
13 files changed, 92 insertions, 93 deletions
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1..b481b4a7c011 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile | |||
| @@ -83,9 +83,6 @@ endif | |||
| 83 | 83 | ||
| 84 | ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) | 84 | ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) |
| 85 | KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds | 85 | KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds |
| 86 | ifeq ($(CONFIG_DYNAMIC_FTRACE),y) | ||
| 87 | KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o | ||
| 88 | endif | ||
| 89 | endif | 86 | endif |
| 90 | 87 | ||
| 91 | # Default value | 88 | # Default value |
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b1..955130762a3c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h | |||
| @@ -38,7 +38,7 @@ | |||
| 38 | * | 38 | * |
| 39 | * See Documentation/cachetlb.txt for more information. Please note that | 39 | * See Documentation/cachetlb.txt for more information. Please note that |
| 40 | * the implementation assumes non-aliasing VIPT D-cache and (aliasing) | 40 | * the implementation assumes non-aliasing VIPT D-cache and (aliasing) |
| 41 | * VIPT or ASID-tagged VIVT I-cache. | 41 | * VIPT I-cache. |
| 42 | * | 42 | * |
| 43 | * flush_cache_mm(mm) | 43 | * flush_cache_mm(mm) |
| 44 | * | 44 | * |
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8..4f766178fa6f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h | |||
| @@ -32,7 +32,7 @@ struct mod_arch_specific { | |||
| 32 | struct mod_plt_sec init; | 32 | struct mod_plt_sec init; |
| 33 | 33 | ||
| 34 | /* for CONFIG_DYNAMIC_FTRACE */ | 34 | /* for CONFIG_DYNAMIC_FTRACE */ |
| 35 | void *ftrace_trampoline; | 35 | struct plt_entry *ftrace_trampoline; |
| 36 | }; | 36 | }; |
| 37 | #endif | 37 | #endif |
| 38 | 38 | ||
| @@ -45,4 +45,48 @@ extern u64 module_alloc_base; | |||
| 45 | #define module_alloc_base ((u64)_etext - MODULES_VSIZE) | 45 | #define module_alloc_base ((u64)_etext - MODULES_VSIZE) |
| 46 | #endif | 46 | #endif |
| 47 | 47 | ||
| 48 | struct plt_entry { | ||
| 49 | /* | ||
| 50 | * A program that conforms to the AArch64 Procedure Call Standard | ||
| 51 | * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or | ||
| 52 | * IP1 (x17) may be inserted at any branch instruction that is | ||
| 53 | * exposed to a relocation that supports long branches. Since that | ||
| 54 | * is exactly what we are dealing with here, we are free to use x16 | ||
| 55 | * as a scratch register in the PLT veneers. | ||
| 56 | */ | ||
| 57 | __le32 mov0; /* movn x16, #0x.... */ | ||
| 58 | __le32 mov1; /* movk x16, #0x...., lsl #16 */ | ||
| 59 | __le32 mov2; /* movk x16, #0x...., lsl #32 */ | ||
| 60 | __le32 br; /* br x16 */ | ||
| 61 | }; | ||
| 62 | |||
| 63 | static inline struct plt_entry get_plt_entry(u64 val) | ||
| 64 | { | ||
| 65 | /* | ||
| 66 | * MOVK/MOVN/MOVZ opcode: | ||
| 67 | * +--------+------------+--------+-----------+-------------+---------+ | ||
| 68 | * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | | ||
| 69 | * +--------+------------+--------+-----------+-------------+---------+ | ||
| 70 | * | ||
| 71 | * Rd := 0x10 (x16) | ||
| 72 | * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) | ||
| 73 | * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) | ||
| 74 | * sf := 1 (64-bit variant) | ||
| 75 | */ | ||
| 76 | return (struct plt_entry){ | ||
| 77 | cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), | ||
| 78 | cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), | ||
| 79 | cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), | ||
| 80 | cpu_to_le32(0xd61f0200) | ||
| 81 | }; | ||
| 82 | } | ||
| 83 | |||
| 84 | static inline bool plt_entries_equal(const struct plt_entry *a, | ||
| 85 | const struct plt_entry *b) | ||
| 86 | { | ||
| 87 | return a->mov0 == b->mov0 && | ||
| 88 | a->mov1 == b->mov1 && | ||
| 89 | a->mov2 == b->mov2; | ||
| 90 | } | ||
| 91 | |||
| 48 | #endif /* __ASM_MODULE_H */ | 92 | #endif /* __ASM_MODULE_H */ |
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd790895..067baace74a0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile | |||
| @@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds | |||
| 61 | ifeq ($(CONFIG_DEBUG_EFI),y) | 61 | ifeq ($(CONFIG_DEBUG_EFI),y) |
| 62 | AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" | 62 | AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" |
| 63 | endif | 63 | endif |
| 64 | |||
| 65 | # will be included by each individual module but not by the core kernel itself | ||
| 66 | extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o | ||
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b..ea001241bdd4 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c | |||
| @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops; | |||
| 31 | 31 | ||
| 32 | const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; | 32 | const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; |
| 33 | 33 | ||
| 34 | static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { | 34 | static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { |
| 35 | &smp_spin_table_ops, | 35 | &smp_spin_table_ops, |
| 36 | &cpu_psci_ops, | 36 | &cpu_psci_ops, |
| 37 | NULL, | 37 | NULL, |
| 38 | }; | 38 | }; |
| 39 | 39 | ||
| 40 | static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { | 40 | static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = { |
| 41 | #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL | 41 | #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL |
| 42 | &acpi_parking_protocol_ops, | 42 | &acpi_parking_protocol_ops, |
| 43 | #endif | 43 | #endif |
| @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { | |||
| 47 | 47 | ||
| 48 | static const struct cpu_operations * __init cpu_get_ops(const char *name) | 48 | static const struct cpu_operations * __init cpu_get_ops(const char *name) |
| 49 | { | 49 | { |
| 50 | const struct cpu_operations **ops; | 50 | const struct cpu_operations *const *ops; |
| 51 | 51 | ||
| 52 | ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; | 52 | ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; |
| 53 | 53 | ||
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e..5084e699447a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c | |||
| @@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state) | |||
| 1026 | 1026 | ||
| 1027 | local_bh_disable(); | 1027 | local_bh_disable(); |
| 1028 | 1028 | ||
| 1029 | if (system_supports_sve() && test_thread_flag(TIF_SVE)) { | 1029 | current->thread.fpsimd_state = *state; |
| 1030 | current->thread.fpsimd_state = *state; | 1030 | if (system_supports_sve() && test_thread_flag(TIF_SVE)) |
| 1031 | fpsimd_to_sve(current); | 1031 | fpsimd_to_sve(current); |
| 1032 | } | 1032 | |
| 1033 | task_fpsimd_load(); | 1033 | task_fpsimd_load(); |
| 1034 | 1034 | ||
| 1035 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { | 1035 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { |
diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff..000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null | |||
| @@ -1,18 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/linkage.h> | ||
| 10 | #include <asm/assembler.h> | ||
| 11 | |||
| 12 | .section ".text.ftrace_trampoline", "ax" | ||
| 13 | .align 3 | ||
| 14 | 0: .quad 0 | ||
| 15 | __ftrace_trampoline: | ||
| 16 | ldr x16, 0b | ||
| 17 | br x16 | ||
| 18 | ENDPROC(__ftrace_trampoline) | ||
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5b..50986e388d2b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c | |||
| @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
| 76 | 76 | ||
| 77 | if (offset < -SZ_128M || offset >= SZ_128M) { | 77 | if (offset < -SZ_128M || offset >= SZ_128M) { |
| 78 | #ifdef CONFIG_ARM64_MODULE_PLTS | 78 | #ifdef CONFIG_ARM64_MODULE_PLTS |
| 79 | unsigned long *trampoline; | 79 | struct plt_entry trampoline; |
| 80 | struct module *mod; | 80 | struct module *mod; |
| 81 | 81 | ||
| 82 | /* | 82 | /* |
| @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
| 104 | * is added in the future, but for now, the pr_err() below | 104 | * is added in the future, but for now, the pr_err() below |
| 105 | * deals with a theoretical issue only. | 105 | * deals with a theoretical issue only. |
| 106 | */ | 106 | */ |
| 107 | trampoline = (unsigned long *)mod->arch.ftrace_trampoline; | 107 | trampoline = get_plt_entry(addr); |
| 108 | if (trampoline[0] != addr) { | 108 | if (!plt_entries_equal(mod->arch.ftrace_trampoline, |
| 109 | if (trampoline[0] != 0) { | 109 | &trampoline)) { |
| 110 | if (!plt_entries_equal(mod->arch.ftrace_trampoline, | ||
| 111 | &(struct plt_entry){})) { | ||
| 110 | pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); | 112 | pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); |
| 111 | return -EINVAL; | 113 | return -EINVAL; |
| 112 | } | 114 | } |
| 113 | 115 | ||
| 114 | /* point the trampoline to our ftrace entry point */ | 116 | /* point the trampoline to our ftrace entry point */ |
| 115 | module_disable_ro(mod); | 117 | module_disable_ro(mod); |
| 116 | trampoline[0] = addr; | 118 | *mod->arch.ftrace_trampoline = trampoline; |
| 117 | module_enable_ro(mod, true); | 119 | module_enable_ro(mod, true); |
| 118 | 120 | ||
| 119 | /* update trampoline before patching in the branch */ | 121 | /* update trampoline before patching in the branch */ |
| 120 | smp_wmb(); | 122 | smp_wmb(); |
| 121 | } | 123 | } |
| 122 | addr = (unsigned long)&trampoline[1]; | 124 | addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; |
| 123 | #else /* CONFIG_ARM64_MODULE_PLTS */ | 125 | #else /* CONFIG_ARM64_MODULE_PLTS */ |
| 124 | return -EINVAL; | 126 | return -EINVAL; |
| 125 | #endif /* CONFIG_ARM64_MODULE_PLTS */ | 127 | #endif /* CONFIG_ARM64_MODULE_PLTS */ |
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409..ea640f92fe5a 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c | |||
| @@ -11,21 +11,6 @@ | |||
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 12 | #include <linux/sort.h> | 12 | #include <linux/sort.h> |
| 13 | 13 | ||
| 14 | struct plt_entry { | ||
| 15 | /* | ||
| 16 | * A program that conforms to the AArch64 Procedure Call Standard | ||
| 17 | * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or | ||
| 18 | * IP1 (x17) may be inserted at any branch instruction that is | ||
| 19 | * exposed to a relocation that supports long branches. Since that | ||
| 20 | * is exactly what we are dealing with here, we are free to use x16 | ||
| 21 | * as a scratch register in the PLT veneers. | ||
| 22 | */ | ||
| 23 | __le32 mov0; /* movn x16, #0x.... */ | ||
| 24 | __le32 mov1; /* movk x16, #0x...., lsl #16 */ | ||
| 25 | __le32 mov2; /* movk x16, #0x...., lsl #32 */ | ||
| 26 | __le32 br; /* br x16 */ | ||
| 27 | }; | ||
| 28 | |||
| 29 | static bool in_init(const struct module *mod, void *loc) | 14 | static bool in_init(const struct module *mod, void *loc) |
| 30 | { | 15 | { |
| 31 | return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; | 16 | return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; |
| @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, | |||
| 40 | int i = pltsec->plt_num_entries; | 25 | int i = pltsec->plt_num_entries; |
| 41 | u64 val = sym->st_value + rela->r_addend; | 26 | u64 val = sym->st_value + rela->r_addend; |
| 42 | 27 | ||
| 43 | /* | 28 | plt[i] = get_plt_entry(val); |
| 44 | * MOVK/MOVN/MOVZ opcode: | ||
| 45 | * +--------+------------+--------+-----------+-------------+---------+ | ||
| 46 | * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | | ||
| 47 | * +--------+------------+--------+-----------+-------------+---------+ | ||
| 48 | * | ||
| 49 | * Rd := 0x10 (x16) | ||
| 50 | * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) | ||
| 51 | * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) | ||
| 52 | * sf := 1 (64-bit variant) | ||
| 53 | */ | ||
| 54 | plt[i] = (struct plt_entry){ | ||
| 55 | cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), | ||
| 56 | cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), | ||
| 57 | cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), | ||
| 58 | cpu_to_le32(0xd61f0200) | ||
| 59 | }; | ||
| 60 | 29 | ||
| 61 | /* | 30 | /* |
| 62 | * Check if the entry we just created is a duplicate. Given that the | 31 | * Check if the entry we just created is a duplicate. Given that the |
| 63 | * relocations are sorted, this will be the last entry we allocated. | 32 | * relocations are sorted, this will be the last entry we allocated. |
| 64 | * (if one exists). | 33 | * (if one exists). |
| 65 | */ | 34 | */ |
| 66 | if (i > 0 && | 35 | if (i > 0 && plt_entries_equal(plt + i, plt + i - 1)) |
| 67 | plt[i].mov0 == plt[i - 1].mov0 && | ||
| 68 | plt[i].mov1 == plt[i - 1].mov1 && | ||
| 69 | plt[i].mov2 == plt[i - 1].mov2) | ||
| 70 | return (u64)&plt[i - 1]; | 36 | return (u64)&plt[i - 1]; |
| 71 | 37 | ||
| 72 | pltsec->plt_num_entries++; | 38 | pltsec->plt_num_entries++; |
| @@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | |||
| 154 | unsigned long core_plts = 0; | 120 | unsigned long core_plts = 0; |
| 155 | unsigned long init_plts = 0; | 121 | unsigned long init_plts = 0; |
| 156 | Elf64_Sym *syms = NULL; | 122 | Elf64_Sym *syms = NULL; |
| 123 | Elf_Shdr *tramp = NULL; | ||
| 157 | int i; | 124 | int i; |
| 158 | 125 | ||
| 159 | /* | 126 | /* |
| @@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | |||
| 165 | mod->arch.core.plt = sechdrs + i; | 132 | mod->arch.core.plt = sechdrs + i; |
| 166 | else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) | 133 | else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) |
| 167 | mod->arch.init.plt = sechdrs + i; | 134 | mod->arch.init.plt = sechdrs + i; |
| 135 | else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && | ||
| 136 | !strcmp(secstrings + sechdrs[i].sh_name, | ||
| 137 | ".text.ftrace_trampoline")) | ||
| 138 | tramp = sechdrs + i; | ||
| 168 | else if (sechdrs[i].sh_type == SHT_SYMTAB) | 139 | else if (sechdrs[i].sh_type == SHT_SYMTAB) |
| 169 | syms = (Elf64_Sym *)sechdrs[i].sh_addr; | 140 | syms = (Elf64_Sym *)sechdrs[i].sh_addr; |
| 170 | } | 141 | } |
| @@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | |||
| 215 | mod->arch.init.plt_num_entries = 0; | 186 | mod->arch.init.plt_num_entries = 0; |
| 216 | mod->arch.init.plt_max_entries = init_plts; | 187 | mod->arch.init.plt_max_entries = init_plts; |
| 217 | 188 | ||
| 189 | if (tramp) { | ||
| 190 | tramp->sh_type = SHT_NOBITS; | ||
| 191 | tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; | ||
| 192 | tramp->sh_addralign = __alignof__(struct plt_entry); | ||
| 193 | tramp->sh_size = sizeof(struct plt_entry); | ||
| 194 | } | ||
| 195 | |||
| 218 | return 0; | 196 | return 0; |
| 219 | } | 197 | } |
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48..22e36a21c113 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | SECTIONS { | 1 | SECTIONS { |
| 2 | .plt (NOLOAD) : { BYTE(0) } | 2 | .plt (NOLOAD) : { BYTE(0) } |
| 3 | .init.plt (NOLOAD) : { BYTE(0) } | 3 | .init.plt (NOLOAD) : { BYTE(0) } |
| 4 | .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } | ||
| 4 | } | 5 | } |
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..3affca3dd96a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c | |||
| @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | |||
| 262 | 262 | ||
| 263 | [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, | 263 | [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, |
| 264 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, | 264 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, |
| 265 | |||
| 266 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, | ||
| 267 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, | ||
| 268 | |||
| 269 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, | ||
| 270 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, | ||
| 271 | }; | 265 | }; |
| 272 | 266 | ||
| 273 | static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | 267 | static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] |
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..6f4017046323 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c | |||
| @@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu) | |||
| 96 | 96 | ||
| 97 | set_reserved_asid_bits(); | 97 | set_reserved_asid_bits(); |
| 98 | 98 | ||
| 99 | /* | ||
| 100 | * Ensure the generation bump is observed before we xchg the | ||
| 101 | * active_asids. | ||
| 102 | */ | ||
| 103 | smp_wmb(); | ||
| 104 | |||
| 105 | for_each_possible_cpu(i) { | 99 | for_each_possible_cpu(i) { |
| 106 | asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); | 100 | asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); |
| 107 | /* | 101 | /* |
| @@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu) | |||
| 117 | per_cpu(reserved_asids, i) = asid; | 111 | per_cpu(reserved_asids, i) = asid; |
| 118 | } | 112 | } |
| 119 | 113 | ||
| 120 | /* Queue a TLB invalidate and flush the I-cache if necessary. */ | 114 | /* |
| 115 | * Queue a TLB invalidation for each CPU to perform on next | ||
| 116 | * context-switch | ||
| 117 | */ | ||
| 121 | cpumask_setall(&tlb_flush_pending); | 118 | cpumask_setall(&tlb_flush_pending); |
| 122 | } | 119 | } |
| 123 | 120 | ||
| @@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) | |||
| 202 | asid = atomic64_read(&mm->context.id); | 199 | asid = atomic64_read(&mm->context.id); |
| 203 | 200 | ||
| 204 | /* | 201 | /* |
| 205 | * The memory ordering here is subtle. We rely on the control | 202 | * The memory ordering here is subtle. |
| 206 | * dependency between the generation read and the update of | 203 | * If our ASID matches the current generation, then we update |
| 207 | * active_asids to ensure that we are synchronised with a | 204 | * our active_asids entry with a relaxed xchg. Racing with a |
| 208 | * parallel rollover (i.e. this pairs with the smp_wmb() in | 205 | * concurrent rollover means that either: |
| 209 | * flush_context). | 206 | * |
| 207 | * - We get a zero back from the xchg and end up waiting on the | ||
| 208 | * lock. Taking the lock synchronises with the rollover and so | ||
| 209 | * we are forced to see the updated generation. | ||
| 210 | * | ||
| 211 | * - We get a valid ASID back from the xchg, which means the | ||
| 212 | * relaxed xchg in flush_context will treat us as reserved | ||
| 213 | * because atomic RmWs are totally ordered for a given location. | ||
| 210 | */ | 214 | */ |
| 211 | if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) | 215 | if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) |
| 212 | && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) | 216 | && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) |
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a170..051e71ec3335 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c | |||
| @@ -26,7 +26,7 @@ | |||
| 26 | #include <asm/page.h> | 26 | #include <asm/page.h> |
| 27 | #include <asm/tlbflush.h> | 27 | #include <asm/tlbflush.h> |
| 28 | 28 | ||
| 29 | static struct kmem_cache *pgd_cache; | 29 | static struct kmem_cache *pgd_cache __ro_after_init; |
| 30 | 30 | ||
| 31 | pgd_t *pgd_alloc(struct mm_struct *mm) | 31 | pgd_t *pgd_alloc(struct mm_struct *mm) |
| 32 | { | 32 | { |
