summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2014-11-24 10:54:35 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2015-05-08 05:42:34 -0400
commit7d485f647c1f4a6976264c90447fb0dbf07b111d (patch)
tree1ddeb6618cebf5b4a48ca225de772120b734e701
parente748994f5cc59e82ef28e31bae680f15fdadb26f (diff)
ARM: 8220/1: allow modules outside of bl range
Loading modules far away from the kernel in memory is problematic because the 'bl' instruction only has limited reach, and modules are not built with PLTs. Instead of using the -mlong-calls option (which affects all compiler emitted bl instructions, but not the ones in assembler), this patch allocates some additional space at module load time, and populates it with PLT like veneers when encountering relocations that are out of range. This should work with all relocations against symbols exported by the kernel, including those resulting from GCC generated implicit function calls for ftrace etc. The module memory size increases by about 5% on average, regardless of whether any PLT entries were actually needed. However, due to the page based rounding that occurs when allocating module memory, the average memory footprint increase is negligible. Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/Kconfig17
-rw-r--r--arch/arm/Makefile4
-rw-r--r--arch/arm/include/asm/module.h12
-rw-r--r--arch/arm/kernel/Makefile1
-rw-r--r--arch/arm/kernel/module-plts.c181
-rw-r--r--arch/arm/kernel/module.c32
-rw-r--r--arch/arm/kernel/module.lds4
7 files changed, 248 insertions, 3 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 45df48ba0b12..d0950ce75f3e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -60,7 +60,7 @@ config ARM
60 select HAVE_KPROBES if !XIP_KERNEL 60 select HAVE_KPROBES if !XIP_KERNEL
61 select HAVE_KRETPROBES if (HAVE_KPROBES) 61 select HAVE_KRETPROBES if (HAVE_KPROBES)
62 select HAVE_MEMBLOCK 62 select HAVE_MEMBLOCK
63 select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND 63 select HAVE_MOD_ARCH_SPECIFIC
64 select HAVE_OPROFILE if (HAVE_PERF_EVENTS) 64 select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
65 select HAVE_OPTPROBES if !THUMB2_KERNEL 65 select HAVE_OPTPROBES if !THUMB2_KERNEL
66 select HAVE_PERF_EVENTS 66 select HAVE_PERF_EVENTS
@@ -1681,6 +1681,21 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
1681config ARCH_WANT_GENERAL_HUGETLB 1681config ARCH_WANT_GENERAL_HUGETLB
1682 def_bool y 1682 def_bool y
1683 1683
1684config ARM_MODULE_PLTS
1685 bool "Use PLTs to allow module memory to spill over into vmalloc area"
1686 depends on MODULES
1687 help
1688 Allocate PLTs when loading modules so that jumps and calls whose
1689 targets are too far away for their relative offsets to be encoded
1690 in the instructions themselves can be bounced via veneers in the
1691 module's PLT. This allows modules to be allocated in the generic
1692 vmalloc area after the dedicated module memory area has been
1693 exhausted. The modules will use slightly more memory, but after
1694 rounding up to page size, the actual memory footprint is usually
1695 the same.
1696
1697 Say y if you are getting out of memory errors while loading modules
1698
1684source "mm/Kconfig" 1699source "mm/Kconfig"
1685 1700
1686config FORCE_MAX_ZONEORDER 1701config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 985227cbbd1b..ffb53e86599e 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -19,6 +19,10 @@ LDFLAGS_vmlinux += --be8
19LDFLAGS_MODULE += --be8 19LDFLAGS_MODULE += --be8
20endif 20endif
21 21
22ifeq ($(CONFIG_ARM_MODULE_PLTS),y)
23LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds
24endif
25
22OBJCOPYFLAGS :=-O binary -R .comment -S 26OBJCOPYFLAGS :=-O binary -R .comment -S
23GZFLAGS :=-9 27GZFLAGS :=-9
24#KBUILD_CFLAGS +=-pipe 28#KBUILD_CFLAGS +=-pipe
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index ed690c49ef93..e358b7966c06 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -16,11 +16,21 @@ enum {
16 ARM_SEC_UNLIKELY, 16 ARM_SEC_UNLIKELY,
17 ARM_SEC_MAX, 17 ARM_SEC_MAX,
18}; 18};
19#endif
19 20
20struct mod_arch_specific { 21struct mod_arch_specific {
22#ifdef CONFIG_ARM_UNWIND
21 struct unwind_table *unwind[ARM_SEC_MAX]; 23 struct unwind_table *unwind[ARM_SEC_MAX];
22};
23#endif 24#endif
25#ifdef CONFIG_ARM_MODULE_PLTS
26 struct elf32_shdr *core_plt;
27 struct elf32_shdr *init_plt;
28 int core_plt_count;
29 int init_plt_count;
30#endif
31};
32
33u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val);
24 34
25/* 35/*
26 * Add the ARM architecture version to the version magic string 36 * Add the ARM architecture version to the version magic string
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 752725dcbf42..32c0990d1968 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o
34obj-$(CONFIG_ISA_DMA_API) += dma.o 34obj-$(CONFIG_ISA_DMA_API) += dma.o
35obj-$(CONFIG_FIQ) += fiq.o fiqasm.o 35obj-$(CONFIG_FIQ) += fiq.o fiqasm.o
36obj-$(CONFIG_MODULES) += armksyms.o module.o 36obj-$(CONFIG_MODULES) += armksyms.o module.o
37obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o
37obj-$(CONFIG_ISA_DMA) += dma-isa.o 38obj-$(CONFIG_ISA_DMA) += dma-isa.o
38obj-$(CONFIG_PCI) += bios32.o isa.o 39obj-$(CONFIG_PCI) += bios32.o isa.o
39obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o 40obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
new file mode 100644
index 000000000000..71a65c49871d
--- /dev/null
+++ b/arch/arm/kernel/module-plts.c
@@ -0,0 +1,181 @@
1/*
2 * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/elf.h>
10#include <linux/kernel.h>
11#include <linux/module.h>
12
13#include <asm/cache.h>
14#include <asm/opcodes.h>
15
16#define PLT_ENT_STRIDE L1_CACHE_BYTES
17#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32))
18#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT)
19
20#ifdef CONFIG_THUMB2_KERNEL
21#define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \
22 (PLT_ENT_STRIDE - 4))
23#else
24#define PLT_ENT_LDR __opcode_to_mem_arm(0xe59ff000 | \
25 (PLT_ENT_STRIDE - 8))
26#endif
27
28struct plt_entries {
29 u32 ldr[PLT_ENT_COUNT];
30 u32 lit[PLT_ENT_COUNT];
31};
32
33static bool in_init(const struct module *mod, u32 addr)
34{
35 return addr - (u32)mod->module_init < mod->init_size;
36}
37
38u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
39{
40 struct plt_entries *plt, *plt_end;
41 int c, *count;
42
43 if (in_init(mod, loc)) {
44 plt = (void *)mod->arch.init_plt->sh_addr;
45 plt_end = (void *)plt + mod->arch.init_plt->sh_size;
46 count = &mod->arch.init_plt_count;
47 } else {
48 plt = (void *)mod->arch.core_plt->sh_addr;
49 plt_end = (void *)plt + mod->arch.core_plt->sh_size;
50 count = &mod->arch.core_plt_count;
51 }
52
53 /* Look for an existing entry pointing to 'val' */
54 for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
55 int i;
56
57 if (!c) {
58 /* Populate a new set of entries */
59 *plt = (struct plt_entries){
60 { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
61 { val, }
62 };
63 ++*count;
64 return (u32)plt->ldr;
65 }
66 for (i = 0; i < PLT_ENT_COUNT; i++) {
67 if (!plt->lit[i]) {
68 plt->lit[i] = val;
69 ++*count;
70 }
71 if (plt->lit[i] == val)
72 return (u32)&plt->ldr[i];
73 }
74 }
75 BUG();
76}
77
78static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
79 u32 mask)
80{
81 u32 *loc1, *loc2;
82 int i;
83
84 for (i = 0; i < num; i++) {
85 if (rel[i].r_info != rel[num].r_info)
86 continue;
87
88 /*
89 * Identical relocation types against identical symbols can
90 * still result in different PLT entries if the addend in the
91 * place is different. So resolve the target of the relocation
92 * to compare the values.
93 */
94 loc1 = (u32 *)(base + rel[i].r_offset);
95 loc2 = (u32 *)(base + rel[num].r_offset);
96 if (((*loc1 ^ *loc2) & mask) == 0)
97 return 1;
98 }
99 return 0;
100}
101
102/* Count how many PLT entries we may need */
103static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
104{
105 unsigned int ret = 0;
106 int i;
107
108 /*
109 * Sure, this is order(n^2), but it's usually short, and not
110 * time critical
111 */
112 for (i = 0; i < num; i++)
113 switch (ELF32_R_TYPE(rel[i].r_info)) {
114 case R_ARM_CALL:
115 case R_ARM_PC24:
116 case R_ARM_JUMP24:
117 if (!duplicate_rel(base, rel, i,
118 __opcode_to_mem_arm(0x00ffffff)))
119 ret++;
120 break;
121 case R_ARM_THM_CALL:
122 case R_ARM_THM_JUMP24:
123 if (!duplicate_rel(base, rel, i,
124 __opcode_to_mem_thumb32(0x07ff2fff)))
125 ret++;
126 }
127 return ret;
128}
129
130int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
131 char *secstrings, struct module *mod)
132{
133 unsigned long core_plts = 0, init_plts = 0;
134 Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
135
136 /*
137 * To store the PLTs, we expand the .text section for core module code
138 * and the .init.text section for initialization code.
139 */
140 for (s = sechdrs; s < sechdrs_end; ++s)
141 if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
142 mod->arch.core_plt = s;
143 else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
144 mod->arch.init_plt = s;
145
146 if (!mod->arch.core_plt || !mod->arch.init_plt) {
147 pr_err("%s: sections missing\n", mod->name);
148 return -ENOEXEC;
149 }
150
151 for (s = sechdrs + 1; s < sechdrs_end; ++s) {
152 const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
153 int numrels = s->sh_size / sizeof(Elf32_Rel);
154 Elf32_Shdr *dstsec = sechdrs + s->sh_info;
155
156 if (s->sh_type != SHT_REL)
157 continue;
158
159 if (strstr(secstrings + s->sh_name, ".init"))
160 init_plts += count_plts(dstsec->sh_addr, rels, numrels);
161 else
162 core_plts += count_plts(dstsec->sh_addr, rels, numrels);
163 }
164
165 mod->arch.core_plt->sh_type = SHT_NOBITS;
166 mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
167 mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
168 mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
169 sizeof(struct plt_entries));
170 mod->arch.core_plt_count = 0;
171
172 mod->arch.init_plt->sh_type = SHT_NOBITS;
173 mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
174 mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
175 mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
176 sizeof(struct plt_entries));
177 mod->arch.init_plt_count = 0;
178 pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
179 mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
180 return 0;
181}
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index af791f4a6205..efdddcb97dd1 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -40,7 +40,12 @@
40#ifdef CONFIG_MMU 40#ifdef CONFIG_MMU
41void *module_alloc(unsigned long size) 41void *module_alloc(unsigned long size)
42{ 42{
43 return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, 43 void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
44 GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
45 __builtin_return_address(0));
46 if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
47 return p;
48 return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
44 GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, 49 GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
45 __builtin_return_address(0)); 50 __builtin_return_address(0));
46} 51}
@@ -110,6 +115,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
110 offset -= 0x04000000; 115 offset -= 0x04000000;
111 116
112 offset += sym->st_value - loc; 117 offset += sym->st_value - loc;
118
119 /*
120 * Route through a PLT entry if 'offset' exceeds the
121 * supported range. Note that 'offset + loc + 8'
122 * contains the absolute jump target, i.e.,
123 * @sym + addend, corrected for the +8 PC bias.
124 */
125 if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
126 (offset <= (s32)0xfe000000 ||
127 offset >= (s32)0x02000000))
128 offset = get_module_plt(module, loc,
129 offset + loc + 8)
130 - loc - 8;
131
113 if (offset <= (s32)0xfe000000 || 132 if (offset <= (s32)0xfe000000 ||
114 offset >= (s32)0x02000000) { 133 offset >= (s32)0x02000000) {
115 pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", 134 pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
@@ -203,6 +222,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
203 offset -= 0x02000000; 222 offset -= 0x02000000;
204 offset += sym->st_value - loc; 223 offset += sym->st_value - loc;
205 224
225 /*
226 * Route through a PLT entry if 'offset' exceeds the
227 * supported range.
228 */
229 if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
230 (offset <= (s32)0xff000000 ||
231 offset >= (s32)0x01000000))
232 offset = get_module_plt(module, loc,
233 offset + loc + 4)
234 - loc - 4;
235
206 if (offset <= (s32)0xff000000 || 236 if (offset <= (s32)0xff000000 ||
207 offset >= (s32)0x01000000) { 237 offset >= (s32)0x01000000) {
208 pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", 238 pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
new file mode 100644
index 000000000000..3682fa107918
--- /dev/null
+++ b/arch/arm/kernel/module.lds
@@ -0,0 +1,4 @@
1SECTIONS {
2 .core.plt : { BYTE(0) }
3 .init.plt : { BYTE(0) }
4}