aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2016-08-17 07:45:21 -0400
committerArd Biesheuvel <ard.biesheuvel@linaro.org>2016-08-30 12:45:34 -0400
commit1031a7e674d1de481d641c3723d5f53b776f621f (patch)
treef3b8a43b577bc7faed23d04221ef1e52403936e7
parent05123fef098220323e60834d5520b15d277e0415 (diff)
ARM: kernel: sort relocation sections before allocating PLTs
The PLT allocation routines try to establish an upper bound on the number of PLT entries that will be required at relocation time, and optimize this by disregarding duplicates (i.e., PLT entries that will end up pointing to the same function). This is currently a O(n^2) algorithm, but we can greatly simplify this by - sorting the relocation section so that relocations that can use the same PLT entry will be listed adjacently, - disregard jump/call relocations with addends; these are highly unusual, for relocations against SHN_UNDEF symbols, and so we can simply allocate a PLT entry for each one we encounter, without trying to optimize away duplicates. Tested-by: Jongsung Kim <neidhard.kim@lge.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
-rw-r--r--arch/arm/kernel/module-plts.c98
1 files changed, 69 insertions, 29 deletions
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 6f93a905eeee..ad1b98fbcd98 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -9,6 +9,7 @@
9#include <linux/elf.h> 9#include <linux/elf.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/sort.h>
12 13
13#include <asm/cache.h> 14#include <asm/cache.h>
14#include <asm/opcodes.h> 15#include <asm/opcodes.h>
@@ -63,28 +64,63 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
63 BUG(); 64 BUG();
64} 65}
65 66
66static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, 67#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
67 u32 mask) 68
69static int cmp_rel(const void *a, const void *b)
68{ 70{
69 u32 *loc1, *loc2; 71 const Elf32_Rel *x = a, *y = b;
70 int i; 72 int i;
71 73
72 for (i = 0; i < num; i++) { 74 /* sort by type and symbol index */
73 if (rel[i].r_info != rel[num].r_info) 75 i = cmp_3way(ELF32_R_TYPE(x->r_info), ELF32_R_TYPE(y->r_info));
74 continue; 76 if (i == 0)
77 i = cmp_3way(ELF32_R_SYM(x->r_info), ELF32_R_SYM(y->r_info));
78 return i;
79}
75 80
76 /* 81static bool is_zero_addend_relocation(Elf32_Addr base, const Elf32_Rel *rel)
77 * Identical relocation types against identical symbols can 82{
78 * still result in different PLT entries if the addend in the 83 u32 *tval = (u32 *)(base + rel->r_offset);
79 * place is different. So resolve the target of the relocation 84
80 * to compare the values. 85 /*
81 */ 86 * Do a bitwise compare on the raw addend rather than fully decoding
82 loc1 = (u32 *)(base + rel[i].r_offset); 87 * the offset and doing an arithmetic comparison.
83 loc2 = (u32 *)(base + rel[num].r_offset); 88 * Note that a zero-addend jump/call relocation is encoded taking the
84 if (((*loc1 ^ *loc2) & mask) == 0) 89 * PC bias into account, i.e., -8 for ARM and -4 for Thumb2.
85 return 1; 90 */
91 switch (ELF32_R_TYPE(rel->r_info)) {
92 u16 upper, lower;
93
94 case R_ARM_THM_CALL:
95 case R_ARM_THM_JUMP24:
96 upper = __mem_to_opcode_thumb16(((u16 *)tval)[0]);
97 lower = __mem_to_opcode_thumb16(((u16 *)tval)[1]);
98
99 return (upper & 0x7ff) == 0x7ff && (lower & 0x2fff) == 0x2ffe;
100
101 case R_ARM_CALL:
102 case R_ARM_PC24:
103 case R_ARM_JUMP24:
104 return (__mem_to_opcode_arm(*tval) & 0xffffff) == 0xfffffe;
86 } 105 }
87 return 0; 106 BUG();
107}
108
109static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
110{
111 const Elf32_Rel *prev;
112
113 /*
114 * Entries are sorted by type and symbol index. That means that,
115 * if a duplicate entry exists, it must be in the preceding
116 * slot.
117 */
118 if (!num)
119 return false;
120
121 prev = rel + num - 1;
122 return cmp_rel(rel + num, prev) == 0 &&
123 is_zero_addend_relocation(base, prev);
88} 124}
89 125
90/* Count how many PLT entries we may need */ 126/* Count how many PLT entries we may need */
@@ -93,18 +129,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
93{ 129{
94 unsigned int ret = 0; 130 unsigned int ret = 0;
95 const Elf32_Sym *s; 131 const Elf32_Sym *s;
96 u32 mask;
97 int i; 132 int i;
98 133
99 if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
100 mask = __opcode_to_mem_thumb32(0x07ff2fff);
101 else
102 mask = __opcode_to_mem_arm(0x00ffffff);
103
104 /*
105 * Sure, this is order(n^2), but it's usually short, and not
106 * time critical
107 */
108 for (i = 0; i < num; i++) { 134 for (i = 0; i < num; i++) {
109 switch (ELF32_R_TYPE(rel[i].r_info)) { 135 switch (ELF32_R_TYPE(rel[i].r_info)) {
110 case R_ARM_CALL: 136 case R_ARM_CALL:
@@ -123,7 +149,18 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
123 if (s->st_shndx != SHN_UNDEF) 149 if (s->st_shndx != SHN_UNDEF)
124 break; 150 break;
125 151
126 if (!duplicate_rel(base, rel, i, mask)) 152 /*
153 * Jump relocations with non-zero addends against
154 * undefined symbols are supported by the ELF spec, but
155 * do not occur in practice (e.g., 'jump n bytes past
156 * the entry point of undefined function symbol f').
157 * So we need to support them, but there is no need to
158 * take them into consideration when trying to optimize
159 * this code. So let's only check for duplicates when
160 * the addend is zero.
161 */
162 if (!is_zero_addend_relocation(base, rel + i) ||
163 !duplicate_rel(base, rel, i))
127 ret++; 164 ret++;
128 } 165 }
129 } 166 }
@@ -158,7 +195,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
158 } 195 }
159 196
160 for (s = sechdrs + 1; s < sechdrs_end; ++s) { 197 for (s = sechdrs + 1; s < sechdrs_end; ++s) {
161 const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; 198 Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
162 int numrels = s->sh_size / sizeof(Elf32_Rel); 199 int numrels = s->sh_size / sizeof(Elf32_Rel);
163 Elf32_Shdr *dstsec = sechdrs + s->sh_info; 200 Elf32_Shdr *dstsec = sechdrs + s->sh_info;
164 201
@@ -169,6 +206,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
169 if (!(dstsec->sh_flags & SHF_EXECINSTR)) 206 if (!(dstsec->sh_flags & SHF_EXECINSTR))
170 continue; 207 continue;
171 208
209 /* sort by type and symbol index */
210 sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
211
172 plts += count_plts(syms, dstsec->sh_addr, rels, numrels); 212 plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
173 } 213 }
174 214