aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYang Shi <yang.shi@linaro.org>2016-05-16 19:36:26 -0400
committerDavid S. Miller <davem@davemloft.net>2016-05-17 14:03:33 -0400
commit4c1cd4fdfd14ecd417962f8c2166506132697f7c (patch)
tree14fba1a53e6426a3e4c12773dacd5fe81780b3b3
parentcd9e2e5d3ff148be9ea210f622ce3e8e8292fcd6 (diff)
bpf: arm64: remove callee-save registers use for tmp registers
In the current implementation of ARM64 eBPF JIT, R23 and R24 are used for tmp registers, which are callee-saved registers. This leads to variable size of JIT prologue and epilogue. The latest blinding constant change prefers to constant size of prologue and epilogue. AAPCS reserves R9 ~ R15 for temp registers which not need to be saved/restored during function call. So, replace R23 and R24 to R10 and R11, and remove tmp_used flag to save 2 instructions for some jited BPF program. CC: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Zi Shen Lim <zlim.lnx@gmail.com> Signed-off-by: Yang Shi <yang.shi@linaro.org> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/arm64/net/bpf_jit_comp.c34
1 files changed, 5 insertions, 29 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index d0d51903c7e0..49ba37e4bfc0 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -51,9 +51,9 @@ static const int bpf2a64[] = {
51 [BPF_REG_9] = A64_R(22), 51 [BPF_REG_9] = A64_R(22),
52 /* read-only frame pointer to access stack */ 52 /* read-only frame pointer to access stack */
53 [BPF_REG_FP] = A64_R(25), 53 [BPF_REG_FP] = A64_R(25),
54 /* temporary register for internal BPF JIT */ 54 /* temporary registers for internal BPF JIT */
55 [TMP_REG_1] = A64_R(23), 55 [TMP_REG_1] = A64_R(10),
56 [TMP_REG_2] = A64_R(24), 56 [TMP_REG_2] = A64_R(11),
57 /* temporary register for blinding constants */ 57 /* temporary register for blinding constants */
58 [BPF_REG_AX] = A64_R(9), 58 [BPF_REG_AX] = A64_R(9),
59}; 59};
@@ -61,7 +61,6 @@ static const int bpf2a64[] = {
61struct jit_ctx { 61struct jit_ctx {
62 const struct bpf_prog *prog; 62 const struct bpf_prog *prog;
63 int idx; 63 int idx;
64 int tmp_used;
65 int epilogue_offset; 64 int epilogue_offset;
66 int *offset; 65 int *offset;
67 u32 *image; 66 u32 *image;
@@ -154,8 +153,6 @@ static void build_prologue(struct jit_ctx *ctx)
154 const u8 r8 = bpf2a64[BPF_REG_8]; 153 const u8 r8 = bpf2a64[BPF_REG_8];
155 const u8 r9 = bpf2a64[BPF_REG_9]; 154 const u8 r9 = bpf2a64[BPF_REG_9];
156 const u8 fp = bpf2a64[BPF_REG_FP]; 155 const u8 fp = bpf2a64[BPF_REG_FP];
157 const u8 tmp1 = bpf2a64[TMP_REG_1];
158 const u8 tmp2 = bpf2a64[TMP_REG_2];
159 156
160 /* 157 /*
161 * BPF prog stack layout 158 * BPF prog stack layout
@@ -167,7 +164,7 @@ static void build_prologue(struct jit_ctx *ctx)
167 * | ... | callee saved registers 164 * | ... | callee saved registers
168 * +-----+ 165 * +-----+
169 * | | x25/x26 166 * | | x25/x26
170 * BPF fp register => -80:+-----+ <= (BPF_FP) 167 * BPF fp register => -64:+-----+ <= (BPF_FP)
171 * | | 168 * | |
172 * | ... | BPF prog stack 169 * | ... | BPF prog stack
173 * | | 170 * | |
@@ -189,8 +186,6 @@ static void build_prologue(struct jit_ctx *ctx)
189 /* Save callee-saved register */ 186 /* Save callee-saved register */
190 emit(A64_PUSH(r6, r7, A64_SP), ctx); 187 emit(A64_PUSH(r6, r7, A64_SP), ctx);
191 emit(A64_PUSH(r8, r9, A64_SP), ctx); 188 emit(A64_PUSH(r8, r9, A64_SP), ctx);
192 if (ctx->tmp_used)
193 emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
194 189
195 /* Save fp (x25) and x26. SP requires 16 bytes alignment */ 190 /* Save fp (x25) and x26. SP requires 16 bytes alignment */
196 emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx); 191 emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
@@ -210,8 +205,6 @@ static void build_epilogue(struct jit_ctx *ctx)
210 const u8 r8 = bpf2a64[BPF_REG_8]; 205 const u8 r8 = bpf2a64[BPF_REG_8];
211 const u8 r9 = bpf2a64[BPF_REG_9]; 206 const u8 r9 = bpf2a64[BPF_REG_9];
212 const u8 fp = bpf2a64[BPF_REG_FP]; 207 const u8 fp = bpf2a64[BPF_REG_FP];
213 const u8 tmp1 = bpf2a64[TMP_REG_1];
214 const u8 tmp2 = bpf2a64[TMP_REG_2];
215 208
216 /* We're done with BPF stack */ 209 /* We're done with BPF stack */
217 emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); 210 emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
@@ -220,8 +213,6 @@ static void build_epilogue(struct jit_ctx *ctx)
220 emit(A64_POP(fp, A64_R(26), A64_SP), ctx); 213 emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
221 214
222 /* Restore callee-saved register */ 215 /* Restore callee-saved register */
223 if (ctx->tmp_used)
224 emit(A64_POP(tmp1, tmp2, A64_SP), ctx);
225 emit(A64_POP(r8, r9, A64_SP), ctx); 216 emit(A64_POP(r8, r9, A64_SP), ctx);
226 emit(A64_POP(r6, r7, A64_SP), ctx); 217 emit(A64_POP(r6, r7, A64_SP), ctx);
227 218
@@ -317,7 +308,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
317 emit(A64_UDIV(is64, dst, dst, src), ctx); 308 emit(A64_UDIV(is64, dst, dst, src), ctx);
318 break; 309 break;
319 case BPF_MOD: 310 case BPF_MOD:
320 ctx->tmp_used = 1;
321 emit(A64_UDIV(is64, tmp, dst, src), ctx); 311 emit(A64_UDIV(is64, tmp, dst, src), ctx);
322 emit(A64_MUL(is64, tmp, tmp, src), ctx); 312 emit(A64_MUL(is64, tmp, tmp, src), ctx);
323 emit(A64_SUB(is64, dst, dst, tmp), ctx); 313 emit(A64_SUB(is64, dst, dst, tmp), ctx);
@@ -390,49 +380,41 @@ emit_bswap_uxt:
390 /* dst = dst OP imm */ 380 /* dst = dst OP imm */
391 case BPF_ALU | BPF_ADD | BPF_K: 381 case BPF_ALU | BPF_ADD | BPF_K:
392 case BPF_ALU64 | BPF_ADD | BPF_K: 382 case BPF_ALU64 | BPF_ADD | BPF_K:
393 ctx->tmp_used = 1;
394 emit_a64_mov_i(is64, tmp, imm, ctx); 383 emit_a64_mov_i(is64, tmp, imm, ctx);
395 emit(A64_ADD(is64, dst, dst, tmp), ctx); 384 emit(A64_ADD(is64, dst, dst, tmp), ctx);
396 break; 385 break;
397 case BPF_ALU | BPF_SUB | BPF_K: 386 case BPF_ALU | BPF_SUB | BPF_K:
398 case BPF_ALU64 | BPF_SUB | BPF_K: 387 case BPF_ALU64 | BPF_SUB | BPF_K:
399 ctx->tmp_used = 1;
400 emit_a64_mov_i(is64, tmp, imm, ctx); 388 emit_a64_mov_i(is64, tmp, imm, ctx);
401 emit(A64_SUB(is64, dst, dst, tmp), ctx); 389 emit(A64_SUB(is64, dst, dst, tmp), ctx);
402 break; 390 break;
403 case BPF_ALU | BPF_AND | BPF_K: 391 case BPF_ALU | BPF_AND | BPF_K:
404 case BPF_ALU64 | BPF_AND | BPF_K: 392 case BPF_ALU64 | BPF_AND | BPF_K:
405 ctx->tmp_used = 1;
406 emit_a64_mov_i(is64, tmp, imm, ctx); 393 emit_a64_mov_i(is64, tmp, imm, ctx);
407 emit(A64_AND(is64, dst, dst, tmp), ctx); 394 emit(A64_AND(is64, dst, dst, tmp), ctx);
408 break; 395 break;
409 case BPF_ALU | BPF_OR | BPF_K: 396 case BPF_ALU | BPF_OR | BPF_K:
410 case BPF_ALU64 | BPF_OR | BPF_K: 397 case BPF_ALU64 | BPF_OR | BPF_K:
411 ctx->tmp_used = 1;
412 emit_a64_mov_i(is64, tmp, imm, ctx); 398 emit_a64_mov_i(is64, tmp, imm, ctx);
413 emit(A64_ORR(is64, dst, dst, tmp), ctx); 399 emit(A64_ORR(is64, dst, dst, tmp), ctx);
414 break; 400 break;
415 case BPF_ALU | BPF_XOR | BPF_K: 401 case BPF_ALU | BPF_XOR | BPF_K:
416 case BPF_ALU64 | BPF_XOR | BPF_K: 402 case BPF_ALU64 | BPF_XOR | BPF_K:
417 ctx->tmp_used = 1;
418 emit_a64_mov_i(is64, tmp, imm, ctx); 403 emit_a64_mov_i(is64, tmp, imm, ctx);
419 emit(A64_EOR(is64, dst, dst, tmp), ctx); 404 emit(A64_EOR(is64, dst, dst, tmp), ctx);
420 break; 405 break;
421 case BPF_ALU | BPF_MUL | BPF_K: 406 case BPF_ALU | BPF_MUL | BPF_K:
422 case BPF_ALU64 | BPF_MUL | BPF_K: 407 case BPF_ALU64 | BPF_MUL | BPF_K:
423 ctx->tmp_used = 1;
424 emit_a64_mov_i(is64, tmp, imm, ctx); 408 emit_a64_mov_i(is64, tmp, imm, ctx);
425 emit(A64_MUL(is64, dst, dst, tmp), ctx); 409 emit(A64_MUL(is64, dst, dst, tmp), ctx);
426 break; 410 break;
427 case BPF_ALU | BPF_DIV | BPF_K: 411 case BPF_ALU | BPF_DIV | BPF_K:
428 case BPF_ALU64 | BPF_DIV | BPF_K: 412 case BPF_ALU64 | BPF_DIV | BPF_K:
429 ctx->tmp_used = 1;
430 emit_a64_mov_i(is64, tmp, imm, ctx); 413 emit_a64_mov_i(is64, tmp, imm, ctx);
431 emit(A64_UDIV(is64, dst, dst, tmp), ctx); 414 emit(A64_UDIV(is64, dst, dst, tmp), ctx);
432 break; 415 break;
433 case BPF_ALU | BPF_MOD | BPF_K: 416 case BPF_ALU | BPF_MOD | BPF_K:
434 case BPF_ALU64 | BPF_MOD | BPF_K: 417 case BPF_ALU64 | BPF_MOD | BPF_K:
435 ctx->tmp_used = 1;
436 emit_a64_mov_i(is64, tmp2, imm, ctx); 418 emit_a64_mov_i(is64, tmp2, imm, ctx);
437 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 419 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
438 emit(A64_MUL(is64, tmp, tmp, tmp2), ctx); 420 emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
@@ -503,12 +485,10 @@ emit_cond_jmp:
503 case BPF_JMP | BPF_JNE | BPF_K: 485 case BPF_JMP | BPF_JNE | BPF_K:
504 case BPF_JMP | BPF_JSGT | BPF_K: 486 case BPF_JMP | BPF_JSGT | BPF_K:
505 case BPF_JMP | BPF_JSGE | BPF_K: 487 case BPF_JMP | BPF_JSGE | BPF_K:
506 ctx->tmp_used = 1;
507 emit_a64_mov_i(1, tmp, imm, ctx); 488 emit_a64_mov_i(1, tmp, imm, ctx);
508 emit(A64_CMP(1, dst, tmp), ctx); 489 emit(A64_CMP(1, dst, tmp), ctx);
509 goto emit_cond_jmp; 490 goto emit_cond_jmp;
510 case BPF_JMP | BPF_JSET | BPF_K: 491 case BPF_JMP | BPF_JSET | BPF_K:
511 ctx->tmp_used = 1;
512 emit_a64_mov_i(1, tmp, imm, ctx); 492 emit_a64_mov_i(1, tmp, imm, ctx);
513 emit(A64_TST(1, dst, tmp), ctx); 493 emit(A64_TST(1, dst, tmp), ctx);
514 goto emit_cond_jmp; 494 goto emit_cond_jmp;
@@ -518,7 +498,6 @@ emit_cond_jmp:
518 const u8 r0 = bpf2a64[BPF_REG_0]; 498 const u8 r0 = bpf2a64[BPF_REG_0];
519 const u64 func = (u64)__bpf_call_base + imm; 499 const u64 func = (u64)__bpf_call_base + imm;
520 500
521 ctx->tmp_used = 1;
522 emit_a64_mov_i64(tmp, func, ctx); 501 emit_a64_mov_i64(tmp, func, ctx);
523 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 502 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
524 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 503 emit(A64_MOV(1, A64_FP, A64_SP), ctx);
@@ -564,7 +543,6 @@ emit_cond_jmp:
564 case BPF_LDX | BPF_MEM | BPF_H: 543 case BPF_LDX | BPF_MEM | BPF_H:
565 case BPF_LDX | BPF_MEM | BPF_B: 544 case BPF_LDX | BPF_MEM | BPF_B:
566 case BPF_LDX | BPF_MEM | BPF_DW: 545 case BPF_LDX | BPF_MEM | BPF_DW:
567 ctx->tmp_used = 1;
568 emit_a64_mov_i(1, tmp, off, ctx); 546 emit_a64_mov_i(1, tmp, off, ctx);
569 switch (BPF_SIZE(code)) { 547 switch (BPF_SIZE(code)) {
570 case BPF_W: 548 case BPF_W:
@@ -588,7 +566,6 @@ emit_cond_jmp:
588 case BPF_ST | BPF_MEM | BPF_B: 566 case BPF_ST | BPF_MEM | BPF_B:
589 case BPF_ST | BPF_MEM | BPF_DW: 567 case BPF_ST | BPF_MEM | BPF_DW:
590 /* Load imm to a register then store it */ 568 /* Load imm to a register then store it */
591 ctx->tmp_used = 1;
592 emit_a64_mov_i(1, tmp2, off, ctx); 569 emit_a64_mov_i(1, tmp2, off, ctx);
593 emit_a64_mov_i(1, tmp, imm, ctx); 570 emit_a64_mov_i(1, tmp, imm, ctx);
594 switch (BPF_SIZE(code)) { 571 switch (BPF_SIZE(code)) {
@@ -612,7 +589,6 @@ emit_cond_jmp:
612 case BPF_STX | BPF_MEM | BPF_H: 589 case BPF_STX | BPF_MEM | BPF_H:
613 case BPF_STX | BPF_MEM | BPF_B: 590 case BPF_STX | BPF_MEM | BPF_B:
614 case BPF_STX | BPF_MEM | BPF_DW: 591 case BPF_STX | BPF_MEM | BPF_DW:
615 ctx->tmp_used = 1;
616 emit_a64_mov_i(1, tmp, off, ctx); 592 emit_a64_mov_i(1, tmp, off, ctx);
617 switch (BPF_SIZE(code)) { 593 switch (BPF_SIZE(code)) {
618 case BPF_W: 594 case BPF_W:
@@ -798,7 +774,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
798 774
799 /* 1. Initial fake pass to compute ctx->idx. */ 775 /* 1. Initial fake pass to compute ctx->idx. */
800 776
801 /* Fake pass to fill in ctx->offset and ctx->tmp_used. */ 777 /* Fake pass to fill in ctx->offset. */
802 if (build_body(&ctx)) { 778 if (build_body(&ctx)) {
803 prog = orig_prog; 779 prog = orig_prog;
804 goto out_off; 780 goto out_off;