diff options
-rw-r--r-- | arch/arm/include/asm/module.h | 6 | ||||
-rw-r--r-- | arch/arm/kernel/module-plts.c | 243 | ||||
-rw-r--r-- | arch/arm/kernel/module.lds | 3 |
3 files changed, 147 insertions, 105 deletions
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index e358b7966c06..464748b9fd7d 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h | |||
@@ -23,10 +23,8 @@ struct mod_arch_specific { | |||
23 | struct unwind_table *unwind[ARM_SEC_MAX]; | 23 | struct unwind_table *unwind[ARM_SEC_MAX]; |
24 | #endif | 24 | #endif |
25 | #ifdef CONFIG_ARM_MODULE_PLTS | 25 | #ifdef CONFIG_ARM_MODULE_PLTS |
26 | struct elf32_shdr *core_plt; | 26 | struct elf32_shdr *plt; |
27 | struct elf32_shdr *init_plt; | 27 | int plt_count; |
28 | int core_plt_count; | ||
29 | int init_plt_count; | ||
30 | #endif | 28 | #endif |
31 | }; | 29 | }; |
32 | 30 | ||
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 0c7efc3446c0..3a5cba90c971 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/elf.h> | 9 | #include <linux/elf.h> |
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/sort.h> | ||
12 | 13 | ||
13 | #include <asm/cache.h> | 14 | #include <asm/cache.h> |
14 | #include <asm/opcodes.h> | 15 | #include <asm/opcodes.h> |
@@ -30,154 +31,198 @@ struct plt_entries { | |||
30 | u32 lit[PLT_ENT_COUNT]; | 31 | u32 lit[PLT_ENT_COUNT]; |
31 | }; | 32 | }; |
32 | 33 | ||
33 | static bool in_init(const struct module *mod, u32 addr) | 34 | u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) |
34 | { | 35 | { |
35 | return addr - (u32)mod->init_layout.base < mod->init_layout.size; | 36 | struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr; |
37 | int idx = 0; | ||
38 | |||
39 | /* | ||
40 | * Look for an existing entry pointing to 'val'. Given that the | ||
41 | * relocations are sorted, this will be the last entry we allocated. | ||
42 | * (if one exists). | ||
43 | */ | ||
44 | if (mod->arch.plt_count > 0) { | ||
45 | plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT; | ||
46 | idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT; | ||
47 | |||
48 | if (plt->lit[idx] == val) | ||
49 | return (u32)&plt->ldr[idx]; | ||
50 | |||
51 | idx = (idx + 1) % PLT_ENT_COUNT; | ||
52 | if (!idx) | ||
53 | plt++; | ||
54 | } | ||
55 | |||
56 | mod->arch.plt_count++; | ||
57 | BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size); | ||
58 | |||
59 | if (!idx) | ||
60 | /* Populate a new set of entries */ | ||
61 | *plt = (struct plt_entries){ | ||
62 | { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, | ||
63 | { val, } | ||
64 | }; | ||
65 | else | ||
66 | plt->lit[idx] = val; | ||
67 | |||
68 | return (u32)&plt->ldr[idx]; | ||
36 | } | 69 | } |
37 | 70 | ||
38 | u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) | 71 | #define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) |
72 | |||
73 | static int cmp_rel(const void *a, const void *b) | ||
39 | { | 74 | { |
40 | struct plt_entries *plt, *plt_end; | 75 | const Elf32_Rel *x = a, *y = b; |
41 | int c, *count; | 76 | int i; |
42 | |||
43 | if (in_init(mod, loc)) { | ||
44 | plt = (void *)mod->arch.init_plt->sh_addr; | ||
45 | plt_end = (void *)plt + mod->arch.init_plt->sh_size; | ||
46 | count = &mod->arch.init_plt_count; | ||
47 | } else { | ||
48 | plt = (void *)mod->arch.core_plt->sh_addr; | ||
49 | plt_end = (void *)plt + mod->arch.core_plt->sh_size; | ||
50 | count = &mod->arch.core_plt_count; | ||
51 | } | ||
52 | 77 | ||
53 | /* Look for an existing entry pointing to 'val' */ | 78 | /* sort by type and symbol index */ |
54 | for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { | 79 | i = cmp_3way(ELF32_R_TYPE(x->r_info), ELF32_R_TYPE(y->r_info)); |
55 | int i; | 80 | if (i == 0) |
56 | 81 | i = cmp_3way(ELF32_R_SYM(x->r_info), ELF32_R_SYM(y->r_info)); | |
57 | if (!c) { | 82 | return i; |
58 | /* Populate a new set of entries */ | 83 | } |
59 | *plt = (struct plt_entries){ | 84 | |
60 | { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, | 85 | static bool is_zero_addend_relocation(Elf32_Addr base, const Elf32_Rel *rel) |
61 | { val, } | 86 | { |
62 | }; | 87 | u32 *tval = (u32 *)(base + rel->r_offset); |
63 | ++*count; | 88 | |
64 | return (u32)plt->ldr; | 89 | /* |
65 | } | 90 | * Do a bitwise compare on the raw addend rather than fully decoding |
66 | for (i = 0; i < PLT_ENT_COUNT; i++) { | 91 | * the offset and doing an arithmetic comparison. |
67 | if (!plt->lit[i]) { | 92 | * Note that a zero-addend jump/call relocation is encoded taking the |
68 | plt->lit[i] = val; | 93 | * PC bias into account, i.e., -8 for ARM and -4 for Thumb2. |
69 | ++*count; | 94 | */ |
70 | } | 95 | switch (ELF32_R_TYPE(rel->r_info)) { |
71 | if (plt->lit[i] == val) | 96 | u16 upper, lower; |
72 | return (u32)&plt->ldr[i]; | 97 | |
73 | } | 98 | case R_ARM_THM_CALL: |
99 | case R_ARM_THM_JUMP24: | ||
100 | upper = __mem_to_opcode_thumb16(((u16 *)tval)[0]); | ||
101 | lower = __mem_to_opcode_thumb16(((u16 *)tval)[1]); | ||
102 | |||
103 | return (upper & 0x7ff) == 0x7ff && (lower & 0x2fff) == 0x2ffe; | ||
104 | |||
105 | case R_ARM_CALL: | ||
106 | case R_ARM_PC24: | ||
107 | case R_ARM_JUMP24: | ||
108 | return (__mem_to_opcode_arm(*tval) & 0xffffff) == 0xfffffe; | ||
74 | } | 109 | } |
75 | BUG(); | 110 | BUG(); |
76 | } | 111 | } |
77 | 112 | ||
78 | static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, | 113 | static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num) |
79 | u32 mask) | ||
80 | { | 114 | { |
81 | u32 *loc1, *loc2; | 115 | const Elf32_Rel *prev; |
82 | int i; | ||
83 | 116 | ||
84 | for (i = 0; i < num; i++) { | 117 | /* |
85 | if (rel[i].r_info != rel[num].r_info) | 118 | * Entries are sorted by type and symbol index. That means that, |
86 | continue; | 119 | * if a duplicate entry exists, it must be in the preceding |
120 | * slot. | ||
121 | */ | ||
122 | if (!num) | ||
123 | return false; | ||
87 | 124 | ||
88 | /* | 125 | prev = rel + num - 1; |
89 | * Identical relocation types against identical symbols can | 126 | return cmp_rel(rel + num, prev) == 0 && |
90 | * still result in different PLT entries if the addend in the | 127 | is_zero_addend_relocation(base, prev); |
91 | * place is different. So resolve the target of the relocation | ||
92 | * to compare the values. | ||
93 | */ | ||
94 | loc1 = (u32 *)(base + rel[i].r_offset); | ||
95 | loc2 = (u32 *)(base + rel[num].r_offset); | ||
96 | if (((*loc1 ^ *loc2) & mask) == 0) | ||
97 | return 1; | ||
98 | } | ||
99 | return 0; | ||
100 | } | 128 | } |
101 | 129 | ||
102 | /* Count how many PLT entries we may need */ | 130 | /* Count how many PLT entries we may need */ |
103 | static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) | 131 | static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, |
132 | const Elf32_Rel *rel, int num) | ||
104 | { | 133 | { |
105 | unsigned int ret = 0; | 134 | unsigned int ret = 0; |
135 | const Elf32_Sym *s; | ||
106 | int i; | 136 | int i; |
107 | 137 | ||
108 | /* | 138 | for (i = 0; i < num; i++) { |
109 | * Sure, this is order(n^2), but it's usually short, and not | ||
110 | * time critical | ||
111 | */ | ||
112 | for (i = 0; i < num; i++) | ||
113 | switch (ELF32_R_TYPE(rel[i].r_info)) { | 139 | switch (ELF32_R_TYPE(rel[i].r_info)) { |
114 | case R_ARM_CALL: | 140 | case R_ARM_CALL: |
115 | case R_ARM_PC24: | 141 | case R_ARM_PC24: |
116 | case R_ARM_JUMP24: | 142 | case R_ARM_JUMP24: |
117 | if (!duplicate_rel(base, rel, i, | ||
118 | __opcode_to_mem_arm(0x00ffffff))) | ||
119 | ret++; | ||
120 | break; | ||
121 | #ifdef CONFIG_THUMB2_KERNEL | ||
122 | case R_ARM_THM_CALL: | 143 | case R_ARM_THM_CALL: |
123 | case R_ARM_THM_JUMP24: | 144 | case R_ARM_THM_JUMP24: |
124 | if (!duplicate_rel(base, rel, i, | 145 | /* |
125 | __opcode_to_mem_thumb32(0x07ff2fff))) | 146 | * We only have to consider branch targets that resolve |
147 | * to undefined symbols. This is not simply a heuristic, | ||
148 | * it is a fundamental limitation, since the PLT itself | ||
149 | * is part of the module, and needs to be within range | ||
150 | * as well, so modules can never grow beyond that limit. | ||
151 | */ | ||
152 | s = syms + ELF32_R_SYM(rel[i].r_info); | ||
153 | if (s->st_shndx != SHN_UNDEF) | ||
154 | break; | ||
155 | |||
156 | /* | ||
157 | * Jump relocations with non-zero addends against | ||
158 | * undefined symbols are supported by the ELF spec, but | ||
159 | * do not occur in practice (e.g., 'jump n bytes past | ||
160 | * the entry point of undefined function symbol f'). | ||
161 | * So we need to support them, but there is no need to | ||
162 | * take them into consideration when trying to optimize | ||
163 | * this code. So let's only check for duplicates when | ||
164 | * the addend is zero. | ||
165 | */ | ||
166 | if (!is_zero_addend_relocation(base, rel + i) || | ||
167 | !duplicate_rel(base, rel, i)) | ||
126 | ret++; | 168 | ret++; |
127 | #endif | ||
128 | } | 169 | } |
170 | } | ||
129 | return ret; | 171 | return ret; |
130 | } | 172 | } |
131 | 173 | ||
132 | int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | 174 | int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, |
133 | char *secstrings, struct module *mod) | 175 | char *secstrings, struct module *mod) |
134 | { | 176 | { |
135 | unsigned long core_plts = 0, init_plts = 0; | 177 | unsigned long plts = 0; |
136 | Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; | 178 | Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; |
179 | Elf32_Sym *syms = NULL; | ||
137 | 180 | ||
138 | /* | 181 | /* |
139 | * To store the PLTs, we expand the .text section for core module code | 182 | * To store the PLTs, we expand the .text section for core module code |
140 | * and the .init.text section for initialization code. | 183 | * and for initialization code. |
141 | */ | 184 | */ |
142 | for (s = sechdrs; s < sechdrs_end; ++s) | 185 | for (s = sechdrs; s < sechdrs_end; ++s) { |
143 | if (strcmp(".core.plt", secstrings + s->sh_name) == 0) | 186 | if (strcmp(".plt", secstrings + s->sh_name) == 0) |
144 | mod->arch.core_plt = s; | 187 | mod->arch.plt = s; |
145 | else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) | 188 | else if (s->sh_type == SHT_SYMTAB) |
146 | mod->arch.init_plt = s; | 189 | syms = (Elf32_Sym *)s->sh_addr; |
147 | 190 | } | |
148 | if (!mod->arch.core_plt || !mod->arch.init_plt) { | 191 | |
149 | pr_err("%s: sections missing\n", mod->name); | 192 | if (!mod->arch.plt) { |
193 | pr_err("%s: module PLT section missing\n", mod->name); | ||
194 | return -ENOEXEC; | ||
195 | } | ||
196 | if (!syms) { | ||
197 | pr_err("%s: module symtab section missing\n", mod->name); | ||
150 | return -ENOEXEC; | 198 | return -ENOEXEC; |
151 | } | 199 | } |
152 | 200 | ||
153 | for (s = sechdrs + 1; s < sechdrs_end; ++s) { | 201 | for (s = sechdrs + 1; s < sechdrs_end; ++s) { |
154 | const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; | 202 | Elf32_Rel *rels = (void *)ehdr + s->sh_offset; |
155 | int numrels = s->sh_size / sizeof(Elf32_Rel); | 203 | int numrels = s->sh_size / sizeof(Elf32_Rel); |
156 | Elf32_Shdr *dstsec = sechdrs + s->sh_info; | 204 | Elf32_Shdr *dstsec = sechdrs + s->sh_info; |
157 | 205 | ||
158 | if (s->sh_type != SHT_REL) | 206 | if (s->sh_type != SHT_REL) |
159 | continue; | 207 | continue; |
160 | 208 | ||
161 | if (strstr(secstrings + s->sh_name, ".init")) | 209 | /* ignore relocations that operate on non-exec sections */ |
162 | init_plts += count_plts(dstsec->sh_addr, rels, numrels); | 210 | if (!(dstsec->sh_flags & SHF_EXECINSTR)) |
163 | else | 211 | continue; |
164 | core_plts += count_plts(dstsec->sh_addr, rels, numrels); | 212 | |
213 | /* sort by type and symbol index */ | ||
214 | sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL); | ||
215 | |||
216 | plts += count_plts(syms, dstsec->sh_addr, rels, numrels); | ||
165 | } | 217 | } |
166 | 218 | ||
167 | mod->arch.core_plt->sh_type = SHT_NOBITS; | 219 | mod->arch.plt->sh_type = SHT_NOBITS; |
168 | mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; | 220 | mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; |
169 | mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; | 221 | mod->arch.plt->sh_addralign = L1_CACHE_BYTES; |
170 | mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, | 222 | mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE, |
171 | sizeof(struct plt_entries)); | 223 | sizeof(struct plt_entries)); |
172 | mod->arch.core_plt_count = 0; | 224 | mod->arch.plt_count = 0; |
173 | 225 | ||
174 | mod->arch.init_plt->sh_type = SHT_NOBITS; | 226 | pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size); |
175 | mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; | ||
176 | mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; | ||
177 | mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, | ||
178 | sizeof(struct plt_entries)); | ||
179 | mod->arch.init_plt_count = 0; | ||
180 | pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, | ||
181 | mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); | ||
182 | return 0; | 227 | return 0; |
183 | } | 228 | } |
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds index 3682fa107918..05881e2b414c 100644 --- a/arch/arm/kernel/module.lds +++ b/arch/arm/kernel/module.lds | |||
@@ -1,4 +1,3 @@ | |||
1 | SECTIONS { | 1 | SECTIONS { |
2 | .core.plt : { BYTE(0) } | 2 | .plt : { BYTE(0) } |
3 | .init.plt : { BYTE(0) } | ||
4 | } | 3 | } |