diff options
author | Yang Shi <yang.shi@linaro.org> | 2016-05-16 19:36:26 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-05-17 14:03:33 -0400 |
commit | 4c1cd4fdfd14ecd417962f8c2166506132697f7c (patch) | |
tree | 14fba1a53e6426a3e4c12773dacd5fe81780b3b3 | |
parent | cd9e2e5d3ff148be9ea210f622ce3e8e8292fcd6 (diff) |
bpf: arm64: remove callee-save registers use for tmp registers
In the current implementation of ARM64 eBPF JIT, R23 and R24 are used for
tmp registers, which are callee-saved registers. This leads to variable size
of JIT prologue and epilogue. The latest blinding constant change prefers to
constant size of prologue and epilogue. AAPCS reserves R9 ~ R15 for temp
registers which not need to be saved/restored during function call. So, replace
R23 and R24 to R10 and R11, and remove tmp_used flag to save 2 instructions for
some jited BPF program.
CC: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 34 |
1 files changed, 5 insertions, 29 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index d0d51903c7e0..49ba37e4bfc0 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c | |||
@@ -51,9 +51,9 @@ static const int bpf2a64[] = { | |||
51 | [BPF_REG_9] = A64_R(22), | 51 | [BPF_REG_9] = A64_R(22), |
52 | /* read-only frame pointer to access stack */ | 52 | /* read-only frame pointer to access stack */ |
53 | [BPF_REG_FP] = A64_R(25), | 53 | [BPF_REG_FP] = A64_R(25), |
54 | /* temporary register for internal BPF JIT */ | 54 | /* temporary registers for internal BPF JIT */ |
55 | [TMP_REG_1] = A64_R(23), | 55 | [TMP_REG_1] = A64_R(10), |
56 | [TMP_REG_2] = A64_R(24), | 56 | [TMP_REG_2] = A64_R(11), |
57 | /* temporary register for blinding constants */ | 57 | /* temporary register for blinding constants */ |
58 | [BPF_REG_AX] = A64_R(9), | 58 | [BPF_REG_AX] = A64_R(9), |
59 | }; | 59 | }; |
@@ -61,7 +61,6 @@ static const int bpf2a64[] = { | |||
61 | struct jit_ctx { | 61 | struct jit_ctx { |
62 | const struct bpf_prog *prog; | 62 | const struct bpf_prog *prog; |
63 | int idx; | 63 | int idx; |
64 | int tmp_used; | ||
65 | int epilogue_offset; | 64 | int epilogue_offset; |
66 | int *offset; | 65 | int *offset; |
67 | u32 *image; | 66 | u32 *image; |
@@ -154,8 +153,6 @@ static void build_prologue(struct jit_ctx *ctx) | |||
154 | const u8 r8 = bpf2a64[BPF_REG_8]; | 153 | const u8 r8 = bpf2a64[BPF_REG_8]; |
155 | const u8 r9 = bpf2a64[BPF_REG_9]; | 154 | const u8 r9 = bpf2a64[BPF_REG_9]; |
156 | const u8 fp = bpf2a64[BPF_REG_FP]; | 155 | const u8 fp = bpf2a64[BPF_REG_FP]; |
157 | const u8 tmp1 = bpf2a64[TMP_REG_1]; | ||
158 | const u8 tmp2 = bpf2a64[TMP_REG_2]; | ||
159 | 156 | ||
160 | /* | 157 | /* |
161 | * BPF prog stack layout | 158 | * BPF prog stack layout |
@@ -167,7 +164,7 @@ static void build_prologue(struct jit_ctx *ctx) | |||
167 | * | ... | callee saved registers | 164 | * | ... | callee saved registers |
168 | * +-----+ | 165 | * +-----+ |
169 | * | | x25/x26 | 166 | * | | x25/x26 |
170 | * BPF fp register => -80:+-----+ <= (BPF_FP) | 167 | * BPF fp register => -64:+-----+ <= (BPF_FP) |
171 | * | | | 168 | * | | |
172 | * | ... | BPF prog stack | 169 | * | ... | BPF prog stack |
173 | * | | | 170 | * | | |
@@ -189,8 +186,6 @@ static void build_prologue(struct jit_ctx *ctx) | |||
189 | /* Save callee-saved register */ | 186 | /* Save callee-saved register */ |
190 | emit(A64_PUSH(r6, r7, A64_SP), ctx); | 187 | emit(A64_PUSH(r6, r7, A64_SP), ctx); |
191 | emit(A64_PUSH(r8, r9, A64_SP), ctx); | 188 | emit(A64_PUSH(r8, r9, A64_SP), ctx); |
192 | if (ctx->tmp_used) | ||
193 | emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx); | ||
194 | 189 | ||
195 | /* Save fp (x25) and x26. SP requires 16 bytes alignment */ | 190 | /* Save fp (x25) and x26. SP requires 16 bytes alignment */ |
196 | emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx); | 191 | emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx); |
@@ -210,8 +205,6 @@ static void build_epilogue(struct jit_ctx *ctx) | |||
210 | const u8 r8 = bpf2a64[BPF_REG_8]; | 205 | const u8 r8 = bpf2a64[BPF_REG_8]; |
211 | const u8 r9 = bpf2a64[BPF_REG_9]; | 206 | const u8 r9 = bpf2a64[BPF_REG_9]; |
212 | const u8 fp = bpf2a64[BPF_REG_FP]; | 207 | const u8 fp = bpf2a64[BPF_REG_FP]; |
213 | const u8 tmp1 = bpf2a64[TMP_REG_1]; | ||
214 | const u8 tmp2 = bpf2a64[TMP_REG_2]; | ||
215 | 208 | ||
216 | /* We're done with BPF stack */ | 209 | /* We're done with BPF stack */ |
217 | emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); | 210 | emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); |
@@ -220,8 +213,6 @@ static void build_epilogue(struct jit_ctx *ctx) | |||
220 | emit(A64_POP(fp, A64_R(26), A64_SP), ctx); | 213 | emit(A64_POP(fp, A64_R(26), A64_SP), ctx); |
221 | 214 | ||
222 | /* Restore callee-saved register */ | 215 | /* Restore callee-saved register */ |
223 | if (ctx->tmp_used) | ||
224 | emit(A64_POP(tmp1, tmp2, A64_SP), ctx); | ||
225 | emit(A64_POP(r8, r9, A64_SP), ctx); | 216 | emit(A64_POP(r8, r9, A64_SP), ctx); |
226 | emit(A64_POP(r6, r7, A64_SP), ctx); | 217 | emit(A64_POP(r6, r7, A64_SP), ctx); |
227 | 218 | ||
@@ -317,7 +308,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |||
317 | emit(A64_UDIV(is64, dst, dst, src), ctx); | 308 | emit(A64_UDIV(is64, dst, dst, src), ctx); |
318 | break; | 309 | break; |
319 | case BPF_MOD: | 310 | case BPF_MOD: |
320 | ctx->tmp_used = 1; | ||
321 | emit(A64_UDIV(is64, tmp, dst, src), ctx); | 311 | emit(A64_UDIV(is64, tmp, dst, src), ctx); |
322 | emit(A64_MUL(is64, tmp, tmp, src), ctx); | 312 | emit(A64_MUL(is64, tmp, tmp, src), ctx); |
323 | emit(A64_SUB(is64, dst, dst, tmp), ctx); | 313 | emit(A64_SUB(is64, dst, dst, tmp), ctx); |
@@ -390,49 +380,41 @@ emit_bswap_uxt: | |||
390 | /* dst = dst OP imm */ | 380 | /* dst = dst OP imm */ |
391 | case BPF_ALU | BPF_ADD | BPF_K: | 381 | case BPF_ALU | BPF_ADD | BPF_K: |
392 | case BPF_ALU64 | BPF_ADD | BPF_K: | 382 | case BPF_ALU64 | BPF_ADD | BPF_K: |
393 | ctx->tmp_used = 1; | ||
394 | emit_a64_mov_i(is64, tmp, imm, ctx); | 383 | emit_a64_mov_i(is64, tmp, imm, ctx); |
395 | emit(A64_ADD(is64, dst, dst, tmp), ctx); | 384 | emit(A64_ADD(is64, dst, dst, tmp), ctx); |
396 | break; | 385 | break; |
397 | case BPF_ALU | BPF_SUB | BPF_K: | 386 | case BPF_ALU | BPF_SUB | BPF_K: |
398 | case BPF_ALU64 | BPF_SUB | BPF_K: | 387 | case BPF_ALU64 | BPF_SUB | BPF_K: |
399 | ctx->tmp_used = 1; | ||
400 | emit_a64_mov_i(is64, tmp, imm, ctx); | 388 | emit_a64_mov_i(is64, tmp, imm, ctx); |
401 | emit(A64_SUB(is64, dst, dst, tmp), ctx); | 389 | emit(A64_SUB(is64, dst, dst, tmp), ctx); |
402 | break; | 390 | break; |
403 | case BPF_ALU | BPF_AND | BPF_K: | 391 | case BPF_ALU | BPF_AND | BPF_K: |
404 | case BPF_ALU64 | BPF_AND | BPF_K: | 392 | case BPF_ALU64 | BPF_AND | BPF_K: |
405 | ctx->tmp_used = 1; | ||
406 | emit_a64_mov_i(is64, tmp, imm, ctx); | 393 | emit_a64_mov_i(is64, tmp, imm, ctx); |
407 | emit(A64_AND(is64, dst, dst, tmp), ctx); | 394 | emit(A64_AND(is64, dst, dst, tmp), ctx); |
408 | break; | 395 | break; |
409 | case BPF_ALU | BPF_OR | BPF_K: | 396 | case BPF_ALU | BPF_OR | BPF_K: |
410 | case BPF_ALU64 | BPF_OR | BPF_K: | 397 | case BPF_ALU64 | BPF_OR | BPF_K: |
411 | ctx->tmp_used = 1; | ||
412 | emit_a64_mov_i(is64, tmp, imm, ctx); | 398 | emit_a64_mov_i(is64, tmp, imm, ctx); |
413 | emit(A64_ORR(is64, dst, dst, tmp), ctx); | 399 | emit(A64_ORR(is64, dst, dst, tmp), ctx); |
414 | break; | 400 | break; |
415 | case BPF_ALU | BPF_XOR | BPF_K: | 401 | case BPF_ALU | BPF_XOR | BPF_K: |
416 | case BPF_ALU64 | BPF_XOR | BPF_K: | 402 | case BPF_ALU64 | BPF_XOR | BPF_K: |
417 | ctx->tmp_used = 1; | ||
418 | emit_a64_mov_i(is64, tmp, imm, ctx); | 403 | emit_a64_mov_i(is64, tmp, imm, ctx); |
419 | emit(A64_EOR(is64, dst, dst, tmp), ctx); | 404 | emit(A64_EOR(is64, dst, dst, tmp), ctx); |
420 | break; | 405 | break; |
421 | case BPF_ALU | BPF_MUL | BPF_K: | 406 | case BPF_ALU | BPF_MUL | BPF_K: |
422 | case BPF_ALU64 | BPF_MUL | BPF_K: | 407 | case BPF_ALU64 | BPF_MUL | BPF_K: |
423 | ctx->tmp_used = 1; | ||
424 | emit_a64_mov_i(is64, tmp, imm, ctx); | 408 | emit_a64_mov_i(is64, tmp, imm, ctx); |
425 | emit(A64_MUL(is64, dst, dst, tmp), ctx); | 409 | emit(A64_MUL(is64, dst, dst, tmp), ctx); |
426 | break; | 410 | break; |
427 | case BPF_ALU | BPF_DIV | BPF_K: | 411 | case BPF_ALU | BPF_DIV | BPF_K: |
428 | case BPF_ALU64 | BPF_DIV | BPF_K: | 412 | case BPF_ALU64 | BPF_DIV | BPF_K: |
429 | ctx->tmp_used = 1; | ||
430 | emit_a64_mov_i(is64, tmp, imm, ctx); | 413 | emit_a64_mov_i(is64, tmp, imm, ctx); |
431 | emit(A64_UDIV(is64, dst, dst, tmp), ctx); | 414 | emit(A64_UDIV(is64, dst, dst, tmp), ctx); |
432 | break; | 415 | break; |
433 | case BPF_ALU | BPF_MOD | BPF_K: | 416 | case BPF_ALU | BPF_MOD | BPF_K: |
434 | case BPF_ALU64 | BPF_MOD | BPF_K: | 417 | case BPF_ALU64 | BPF_MOD | BPF_K: |
435 | ctx->tmp_used = 1; | ||
436 | emit_a64_mov_i(is64, tmp2, imm, ctx); | 418 | emit_a64_mov_i(is64, tmp2, imm, ctx); |
437 | emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); | 419 | emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); |
438 | emit(A64_MUL(is64, tmp, tmp, tmp2), ctx); | 420 | emit(A64_MUL(is64, tmp, tmp, tmp2), ctx); |
@@ -503,12 +485,10 @@ emit_cond_jmp: | |||
503 | case BPF_JMP | BPF_JNE | BPF_K: | 485 | case BPF_JMP | BPF_JNE | BPF_K: |
504 | case BPF_JMP | BPF_JSGT | BPF_K: | 486 | case BPF_JMP | BPF_JSGT | BPF_K: |
505 | case BPF_JMP | BPF_JSGE | BPF_K: | 487 | case BPF_JMP | BPF_JSGE | BPF_K: |
506 | ctx->tmp_used = 1; | ||
507 | emit_a64_mov_i(1, tmp, imm, ctx); | 488 | emit_a64_mov_i(1, tmp, imm, ctx); |
508 | emit(A64_CMP(1, dst, tmp), ctx); | 489 | emit(A64_CMP(1, dst, tmp), ctx); |
509 | goto emit_cond_jmp; | 490 | goto emit_cond_jmp; |
510 | case BPF_JMP | BPF_JSET | BPF_K: | 491 | case BPF_JMP | BPF_JSET | BPF_K: |
511 | ctx->tmp_used = 1; | ||
512 | emit_a64_mov_i(1, tmp, imm, ctx); | 492 | emit_a64_mov_i(1, tmp, imm, ctx); |
513 | emit(A64_TST(1, dst, tmp), ctx); | 493 | emit(A64_TST(1, dst, tmp), ctx); |
514 | goto emit_cond_jmp; | 494 | goto emit_cond_jmp; |
@@ -518,7 +498,6 @@ emit_cond_jmp: | |||
518 | const u8 r0 = bpf2a64[BPF_REG_0]; | 498 | const u8 r0 = bpf2a64[BPF_REG_0]; |
519 | const u64 func = (u64)__bpf_call_base + imm; | 499 | const u64 func = (u64)__bpf_call_base + imm; |
520 | 500 | ||
521 | ctx->tmp_used = 1; | ||
522 | emit_a64_mov_i64(tmp, func, ctx); | 501 | emit_a64_mov_i64(tmp, func, ctx); |
523 | emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); | 502 | emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); |
524 | emit(A64_MOV(1, A64_FP, A64_SP), ctx); | 503 | emit(A64_MOV(1, A64_FP, A64_SP), ctx); |
@@ -564,7 +543,6 @@ emit_cond_jmp: | |||
564 | case BPF_LDX | BPF_MEM | BPF_H: | 543 | case BPF_LDX | BPF_MEM | BPF_H: |
565 | case BPF_LDX | BPF_MEM | BPF_B: | 544 | case BPF_LDX | BPF_MEM | BPF_B: |
566 | case BPF_LDX | BPF_MEM | BPF_DW: | 545 | case BPF_LDX | BPF_MEM | BPF_DW: |
567 | ctx->tmp_used = 1; | ||
568 | emit_a64_mov_i(1, tmp, off, ctx); | 546 | emit_a64_mov_i(1, tmp, off, ctx); |
569 | switch (BPF_SIZE(code)) { | 547 | switch (BPF_SIZE(code)) { |
570 | case BPF_W: | 548 | case BPF_W: |
@@ -588,7 +566,6 @@ emit_cond_jmp: | |||
588 | case BPF_ST | BPF_MEM | BPF_B: | 566 | case BPF_ST | BPF_MEM | BPF_B: |
589 | case BPF_ST | BPF_MEM | BPF_DW: | 567 | case BPF_ST | BPF_MEM | BPF_DW: |
590 | /* Load imm to a register then store it */ | 568 | /* Load imm to a register then store it */ |
591 | ctx->tmp_used = 1; | ||
592 | emit_a64_mov_i(1, tmp2, off, ctx); | 569 | emit_a64_mov_i(1, tmp2, off, ctx); |
593 | emit_a64_mov_i(1, tmp, imm, ctx); | 570 | emit_a64_mov_i(1, tmp, imm, ctx); |
594 | switch (BPF_SIZE(code)) { | 571 | switch (BPF_SIZE(code)) { |
@@ -612,7 +589,6 @@ emit_cond_jmp: | |||
612 | case BPF_STX | BPF_MEM | BPF_H: | 589 | case BPF_STX | BPF_MEM | BPF_H: |
613 | case BPF_STX | BPF_MEM | BPF_B: | 590 | case BPF_STX | BPF_MEM | BPF_B: |
614 | case BPF_STX | BPF_MEM | BPF_DW: | 591 | case BPF_STX | BPF_MEM | BPF_DW: |
615 | ctx->tmp_used = 1; | ||
616 | emit_a64_mov_i(1, tmp, off, ctx); | 592 | emit_a64_mov_i(1, tmp, off, ctx); |
617 | switch (BPF_SIZE(code)) { | 593 | switch (BPF_SIZE(code)) { |
618 | case BPF_W: | 594 | case BPF_W: |
@@ -798,7 +774,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
798 | 774 | ||
799 | /* 1. Initial fake pass to compute ctx->idx. */ | 775 | /* 1. Initial fake pass to compute ctx->idx. */ |
800 | 776 | ||
801 | /* Fake pass to fill in ctx->offset and ctx->tmp_used. */ | 777 | /* Fake pass to fill in ctx->offset. */ |
802 | if (build_body(&ctx)) { | 778 | if (build_body(&ctx)) { |
803 | prog = orig_prog; | 779 | prog = orig_prog; |
804 | goto out_off; | 780 | goto out_off; |