diff options
author | Jon Medhurst <tixy@yxit.co.uk> | 2011-06-14 10:54:28 -0400 |
---|---|---|
committer | Tixy <tixy@medhuaa1.miniserver.com> | 2011-07-13 13:32:45 -0400 |
commit | 3d4a99785abee0687f8fad9a055d2f0c61a9dd57 (patch) | |
tree | 3a7688c8b37935dcab093b86d6afb6564e52dab2 /arch/arm/kernel/kprobes-common.c | |
parent | 235a4ce79feb8d5351f9164981bc57d5e29f974b (diff) |
ARM: kprobes: Optimise emulation of LDM and STM
This patch improves the performance of LDM and STM instruction
emulation. This is desirable because.
- jprobes and kretprobes probe the first instruction in a function and,
when the frame pointer is omitted, this instruction is often a STM
used to push registers onto the stack.
- The STM and LDM instructions are common in the body and tail of
functions.
- At the same time as being a common instruction form, they also have
one of the slowest and most complicated simulation routines.
The approach taken to optimisation is to use emulation rather than
simulation, that is, a modified form of the instruction is run with
an appropriate register context.
Benchmarking on an OMAP3530 shows the optimised emulation is between 2
and 3 times faster than the simulation routines. On a Kirkwood based
device the relative performance was very significantly better than this.
Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Diffstat (limited to 'arch/arm/kernel/kprobes-common.c')
-rw-r--r-- | arch/arm/kernel/kprobes-common.c | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c index 43d663cafdd1..32bb0f236684 100644 --- a/arch/arm/kernel/kprobes-common.c +++ b/arch/arm/kernel/kprobes-common.c | |||
@@ -220,13 +220,81 @@ static void __kprobes simulate_ldm1_pc(struct kprobe *p, struct pt_regs *regs) | |||
220 | load_write_pc(regs->ARM_pc, regs); | 220 | load_write_pc(regs->ARM_pc, regs); |
221 | } | 221 | } |
222 | 222 | ||
223 | static void __kprobes | ||
224 | emulate_generic_r0_12_noflags(struct kprobe *p, struct pt_regs *regs) | ||
225 | { | ||
226 | register void *rregs asm("r1") = regs; | ||
227 | register void *rfn asm("lr") = p->ainsn.insn_fn; | ||
228 | |||
229 | __asm__ __volatile__ ( | ||
230 | "stmdb sp!, {%[regs], r11} \n\t" | ||
231 | "ldmia %[regs], {r0-r12} \n\t" | ||
232 | #if __LINUX_ARM_ARCH__ >= 6 | ||
233 | "blx %[fn] \n\t" | ||
234 | #else | ||
235 | "str %[fn], [sp, #-4]! \n\t" | ||
236 | "adr lr, 1f \n\t" | ||
237 | "ldr pc, [sp], #4 \n\t" | ||
238 | "1: \n\t" | ||
239 | #endif | ||
240 | "ldr lr, [sp], #4 \n\t" /* lr = regs */ | ||
241 | "stmia lr, {r0-r12} \n\t" | ||
242 | "ldr r11, [sp], #4 \n\t" | ||
243 | : [regs] "=r" (rregs), [fn] "=r" (rfn) | ||
244 | : "0" (rregs), "1" (rfn) | ||
245 | : "r0", "r2", "r3", "r4", "r5", "r6", "r7", | ||
246 | "r8", "r9", "r10", "r12", "memory", "cc" | ||
247 | ); | ||
248 | } | ||
249 | |||
250 | static void __kprobes | ||
251 | emulate_generic_r2_14_noflags(struct kprobe *p, struct pt_regs *regs) | ||
252 | { | ||
253 | emulate_generic_r0_12_noflags(p, (struct pt_regs *)(regs->uregs+2)); | ||
254 | } | ||
255 | |||
256 | static void __kprobes | ||
257 | emulate_ldm_r3_15(struct kprobe *p, struct pt_regs *regs) | ||
258 | { | ||
259 | emulate_generic_r0_12_noflags(p, (struct pt_regs *)(regs->uregs+3)); | ||
260 | load_write_pc(regs->ARM_pc, regs); | ||
261 | } | ||
262 | |||
223 | enum kprobe_insn __kprobes | 263 | enum kprobe_insn __kprobes |
224 | kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi) | 264 | kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi) |
225 | { | 265 | { |
226 | kprobe_insn_handler_t *handler = 0; | 266 | kprobe_insn_handler_t *handler = 0; |
227 | unsigned reglist = insn & 0xffff; | 267 | unsigned reglist = insn & 0xffff; |
228 | int is_ldm = insn & 0x100000; | 268 | int is_ldm = insn & 0x100000; |
269 | int rn = (insn >> 16) & 0xf; | ||
270 | |||
271 | if (rn <= 12 && (reglist & 0xe000) == 0) { | ||
272 | /* Instruction only uses registers in the range R0..R12 */ | ||
273 | handler = emulate_generic_r0_12_noflags; | ||
274 | |||
275 | } else if (rn >= 2 && (reglist & 0x8003) == 0) { | ||
276 | /* Instruction only uses registers in the range R2..R14 */ | ||
277 | rn -= 2; | ||
278 | reglist >>= 2; | ||
279 | handler = emulate_generic_r2_14_noflags; | ||
280 | |||
281 | } else if (rn >= 3 && (reglist & 0x0007) == 0) { | ||
282 | /* Instruction only uses registers in the range R3..R15 */ | ||
283 | if (is_ldm && (reglist & 0x8000)) { | ||
284 | rn -= 3; | ||
285 | reglist >>= 3; | ||
286 | handler = emulate_ldm_r3_15; | ||
287 | } | ||
288 | } | ||
289 | |||
290 | if (handler) { | ||
291 | /* We can emulate the instruction in (possibly) modified form */ | ||
292 | asi->insn[0] = (insn & 0xfff00000) | (rn << 16) | reglist; | ||
293 | asi->insn_handler = handler; | ||
294 | return INSN_GOOD; | ||
295 | } | ||
229 | 296 | ||
297 | /* Fallback to slower simulation... */ | ||
230 | if (reglist & 0x8000) | 298 | if (reglist & 0x8000) |
231 | handler = is_ldm ? simulate_ldm1_pc : simulate_stm1_pc; | 299 | handler = is_ldm ? simulate_ldm1_pc : simulate_stm1_pc; |
232 | else | 300 | else |