aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuentin Monnet <quentin.monnet@netronome.com>2018-10-07 07:56:56 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-10-08 04:24:13 -0400
commit445496231445aad46866a858a384b428cd073977 (patch)
tree3a53c4366d29fc7b54ead15c07fb7159c163b7a3
parent2178f3f0dc200557312e783aa683b87794084ae2 (diff)
nfp: bpf: optimise save/restore for R6~R9 based on register usage
When pre-processing the instructions, it is trivial to detect what subprograms are using R6, R7, R8 or R9 as destination registers. If a subprogram uses none of those, then we do not need to jump to the subroutines dedicated to saving and restoring callee-saved registers in its prologue and epilogue. This patch introduces detection of callee-saved registers in subprograms and prevents the JIT from adding calls to those subroutines whenever we can: we save some instructions in the translated program, and some time at runtime on BPF-to-BPF calls and returns. If no subprogram needs to save those registers, we can avoid appending the subroutines at the end of the program. Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c85
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c14
3 files changed, 78 insertions, 23 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 74423d3e714d..b393f9dea584 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -3132,7 +3132,9 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3132 NFP_CSR_ACT_LM_ADDR0); 3132 NFP_CSR_ACT_LM_ADDR0);
3133 } 3133 }
3134 3134
3135 /* The following steps are performed: 3135 /* Two cases for jumping to the callee:
3136 *
3137 * - If callee uses and needs to save R6~R9 then:
3136 * 1. Put the start offset of the callee into imm_b(). This will 3138 * 1. Put the start offset of the callee into imm_b(). This will
3137 * require a fixup step, as we do not necessarily know this 3139 * require a fixup step, as we do not necessarily know this
3138 * address yet. 3140 * address yet.
@@ -3140,8 +3142,12 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3140 * register ret_reg(). 3142 * register ret_reg().
3141 * 3. (After defer slots are consumed) Jump to the subroutine that 3143 * 3. (After defer slots are consumed) Jump to the subroutine that
3142 * pushes the registers to the stack. 3144 * pushes the registers to the stack.
3143 * The subroutine acts as a trampoline, and returns to the address in 3145 * The subroutine acts as a trampoline, and returns to the address in
3144 * imm_b(), i.e. jumps to the callee. 3146 * imm_b(), i.e. jumps to the callee.
3147 *
3148 * - If callee does not need to save R6~R9 then just load return
3149 * address to the caller in ret_reg(), and jump to the callee
3150 * directly.
3145 * 3151 *
3146 * Using ret_reg() to pass the return address to the callee is set here 3152 * Using ret_reg() to pass the return address to the callee is set here
3147 * as a convention. The callee can then push this address onto its 3153 * as a convention. The callee can then push this address onto its
@@ -3157,11 +3163,21 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3157 * execution of the callee, we will not have to push the return 3163 * execution of the callee, we will not have to push the return
3158 * address to the stack for leaf functions. 3164 * address to the stack for leaf functions.
3159 */ 3165 */
3160 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; 3166 if (!meta->jmp_dst) {
3161 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, 3167 pr_err("BUG: BPF-to-BPF call has no destination recorded\n");
3162 RELO_BR_GO_CALL_PUSH_REGS); 3168 return -ELOOP;
3163 offset_br = nfp_prog_current_offset(nfp_prog); 3169 }
3164 wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); 3170 if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) {
3171 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
3172 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2,
3173 RELO_BR_GO_CALL_PUSH_REGS);
3174 offset_br = nfp_prog_current_offset(nfp_prog);
3175 wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL);
3176 } else {
3177 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
3178 emit_br(nfp_prog, BR_UNC, meta->n + 1 + meta->insn.imm, 1);
3179 offset_br = nfp_prog_current_offset(nfp_prog);
3180 }
3165 wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); 3181 wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL);
3166 3182
3167 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) 3183 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
@@ -3227,15 +3243,24 @@ static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3227static int 3243static int
3228nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3244nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3229{ 3245{
3230 /* Pop R6~R9 to the stack via related subroutine. 3246 if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) {
3231 * Pop return address for BPF-to-BPF call from the stack and load it 3247 /* Pop R6~R9 to the stack via related subroutine.
3232 * into ret_reg() before we jump. This means that the subroutine does 3248 * We loaded the return address to the caller into ret_reg().
3233 * not come back here, we make it jump back to the subprogram caller 3249 * This means that the subroutine does not come back here, we
3234 * directly! 3250 * make it jump back to the subprogram caller directly!
3235 */ 3251 */
3236 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, 3252 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1,
3237 RELO_BR_GO_CALL_POP_REGS); 3253 RELO_BR_GO_CALL_POP_REGS);
3238 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); 3254 /* Pop return address from the stack. */
3255 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
3256 } else {
3257 /* Pop return address from the stack. */
3258 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
3259 /* Jump back to caller if no callee-saved registers were used
3260 * by the subprogram.
3261 */
3262 emit_rtn(nfp_prog, ret_reg(nfp_prog), 0);
3263 }
3239 3264
3240 return 0; 3265 return 0;
3241} 3266}
@@ -3410,7 +3435,8 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
3410 return -ELOOP; 3435 return -ELOOP;
3411 } 3436 }
3412 3437
3413 if (is_mbpf_pseudo_call(meta)) { 3438 if (is_mbpf_pseudo_call(meta) &&
3439 nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) {
3414 err = nfp_fixup_immed_relo(nfp_prog, meta, 3440 err = nfp_fixup_immed_relo(nfp_prog, meta,
3415 jmp_dst, br_idx); 3441 jmp_dst, br_idx);
3416 if (err) 3442 if (err)
@@ -3549,6 +3575,17 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
3549 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 3575 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3550} 3576}
3551 3577
3578static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog)
3579{
3580 unsigned int idx;
3581
3582 for (idx = 1; idx < nfp_prog->subprog_cnt; idx++)
3583 if (nfp_prog->subprog[idx].needs_reg_push)
3584 return true;
3585
3586 return false;
3587}
3588
3552static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) 3589static void nfp_push_callee_registers(struct nfp_prog *nfp_prog)
3553{ 3590{
3554 u8 reg; 3591 u8 reg;
@@ -3612,7 +3649,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog)
3612 WARN_ON(1); 3649 WARN_ON(1);
3613 } 3650 }
3614 3651
3615 if (nfp_prog->subprog_cnt == 1) 3652 if (!nfp_prog_needs_callee_reg_save(nfp_prog))
3616 return; 3653 return;
3617 3654
3618 nfp_push_callee_registers(nfp_prog); 3655 nfp_push_callee_registers(nfp_prog);
@@ -4354,10 +4391,20 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
4354 nfp_prog->tgt_abort + bv->start_off); 4391 nfp_prog->tgt_abort + bv->start_off);
4355 break; 4392 break;
4356 case RELO_BR_GO_CALL_PUSH_REGS: 4393 case RELO_BR_GO_CALL_PUSH_REGS:
4394 if (!nfp_prog->tgt_call_push_regs) {
4395 pr_err("BUG: failed to detect subprogram registers needs\n");
4396 err = -EINVAL;
4397 goto err_free_prog;
4398 }
4357 off = nfp_prog->tgt_call_push_regs + bv->start_off; 4399 off = nfp_prog->tgt_call_push_regs + bv->start_off;
4358 br_set_offset(&prog[i], off); 4400 br_set_offset(&prog[i], off);
4359 break; 4401 break;
4360 case RELO_BR_GO_CALL_POP_REGS: 4402 case RELO_BR_GO_CALL_POP_REGS:
4403 if (!nfp_prog->tgt_call_pop_regs) {
4404 pr_err("BUG: failed to detect subprogram registers needs\n");
4405 err = -EINVAL;
4406 goto err_free_prog;
4407 }
4361 off = nfp_prog->tgt_call_pop_regs + bv->start_off; 4408 off = nfp_prog->tgt_call_pop_regs + bv->start_off;
4362 br_set_offset(&prog[i], off); 4409 br_set_offset(&prog[i], off);
4363 break; 4410 break;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 1cef5136c198..44b787a0bd4b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -452,9 +452,11 @@ static inline bool is_mbpf_pseudo_call(const struct nfp_insn_meta *meta)
452/** 452/**
453 * struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info 453 * struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info
454 * @stack_depth: maximum stack depth used by this sub-program 454 * @stack_depth: maximum stack depth used by this sub-program
455 * @needs_reg_push: whether sub-program uses callee-saved registers
455 */ 456 */
456struct nfp_bpf_subprog_info { 457struct nfp_bpf_subprog_info {
457 u16 stack_depth; 458 u16 stack_depth;
459 u8 needs_reg_push : 1;
458}; 460};
459 461
460/** 462/**
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 81a463726d55..f31721bd1fac 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -644,7 +644,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
644} 644}
645 645
646static int 646static int
647nfp_assign_subprog_idx(struct bpf_verifier_env *env, struct nfp_prog *nfp_prog) 647nfp_assign_subprog_idx_and_regs(struct bpf_verifier_env *env,
648 struct nfp_prog *nfp_prog)
648{ 649{
649 struct nfp_insn_meta *meta; 650 struct nfp_insn_meta *meta;
650 int index = 0; 651 int index = 0;
@@ -653,6 +654,10 @@ nfp_assign_subprog_idx(struct bpf_verifier_env *env, struct nfp_prog *nfp_prog)
653 if (nfp_is_subprog_start(meta)) 654 if (nfp_is_subprog_start(meta))
654 index++; 655 index++;
655 meta->subprog_idx = index; 656 meta->subprog_idx = index;
657
658 if (meta->insn.dst_reg >= BPF_REG_6 &&
659 meta->insn.dst_reg <= BPF_REG_9)
660 nfp_prog->subprog[index].needs_reg_push = 1;
656 } 661 }
657 662
658 if (index + 1 != nfp_prog->subprog_cnt) { 663 if (index + 1 != nfp_prog->subprog_cnt) {
@@ -734,7 +739,7 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env)
734 if (!nfp_prog->subprog) 739 if (!nfp_prog->subprog)
735 return -ENOMEM; 740 return -ENOMEM;
736 741
737 nfp_assign_subprog_idx(env, nfp_prog); 742 nfp_assign_subprog_idx_and_regs(env, nfp_prog);
738 743
739 info = env->subprog_info; 744 info = env->subprog_info;
740 for (i = 0; i < nfp_prog->subprog_cnt; i++) { 745 for (i = 0; i < nfp_prog->subprog_cnt; i++) {
@@ -745,8 +750,9 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env)
745 750
746 /* Account for size of return address. */ 751 /* Account for size of return address. */
747 nfp_prog->subprog[i].stack_depth += REG_WIDTH; 752 nfp_prog->subprog[i].stack_depth += REG_WIDTH;
748 /* Account for size of saved registers. */ 753 /* Account for size of saved registers, if necessary. */
749 nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4; 754 if (nfp_prog->subprog[i].needs_reg_push)
755 nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4;
750 } 756 }
751 757
752 nn = netdev_priv(env->prog->aux->offload->netdev); 758 nn = netdev_priv(env->prog->aux->offload->netdev);