diff options
author | Quentin Monnet <quentin.monnet@netronome.com> | 2018-10-07 07:56:56 -0400 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-10-08 04:24:13 -0400 |
commit | 445496231445aad46866a858a384b428cd073977 (patch) | |
tree | 3a53c4366d29fc7b54ead15c07fb7159c163b7a3 | |
parent | 2178f3f0dc200557312e783aa683b87794084ae2 (diff) |
nfp: bpf: optimise save/restore for R6~R9 based on register usage
When pre-processing the instructions, it is trivial to detect what
subprograms are using R6, R7, R8 or R9 as destination registers. If a
subprogram uses none of those, then we do not need to jump to the
subroutines dedicated to saving and restoring callee-saved registers in
its prologue and epilogue.
This patch introduces detection of callee-saved registers in subprograms
and prevents the JIT from adding calls to those subroutines whenever we
can: we save some instructions in the translated program, and some time
at runtime on BPF-to-BPF calls and returns.
If no subprogram needs to save those registers, we can avoid appending
the subroutines at the end of the program.
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/jit.c | 85 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 14 |
3 files changed, 78 insertions, 23 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 74423d3e714d..b393f9dea584 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c | |||
@@ -3132,7 +3132,9 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
3132 | NFP_CSR_ACT_LM_ADDR0); | 3132 | NFP_CSR_ACT_LM_ADDR0); |
3133 | } | 3133 | } |
3134 | 3134 | ||
3135 | /* The following steps are performed: | 3135 | /* Two cases for jumping to the callee: |
3136 | * | ||
3137 | * - If callee uses and needs to save R6~R9 then: | ||
3136 | * 1. Put the start offset of the callee into imm_b(). This will | 3138 | * 1. Put the start offset of the callee into imm_b(). This will |
3137 | * require a fixup step, as we do not necessarily know this | 3139 | * require a fixup step, as we do not necessarily know this |
3138 | * address yet. | 3140 | * address yet. |
@@ -3140,8 +3142,12 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
3140 | * register ret_reg(). | 3142 | * register ret_reg(). |
3141 | * 3. (After defer slots are consumed) Jump to the subroutine that | 3143 | * 3. (After defer slots are consumed) Jump to the subroutine that |
3142 | * pushes the registers to the stack. | 3144 | * pushes the registers to the stack. |
3143 | * The subroutine acts as a trampoline, and returns to the address in | 3145 | * The subroutine acts as a trampoline, and returns to the address in |
3144 | * imm_b(), i.e. jumps to the callee. | 3146 | * imm_b(), i.e. jumps to the callee. |
3147 | * | ||
3148 | * - If callee does not need to save R6~R9 then just load return | ||
3149 | * address to the caller in ret_reg(), and jump to the callee | ||
3150 | * directly. | ||
3145 | * | 3151 | * |
3146 | * Using ret_reg() to pass the return address to the callee is set here | 3152 | * Using ret_reg() to pass the return address to the callee is set here |
3147 | * as a convention. The callee can then push this address onto its | 3153 | * as a convention. The callee can then push this address onto its |
@@ -3157,11 +3163,21 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
3157 | * execution of the callee, we will not have to push the return | 3163 | * execution of the callee, we will not have to push the return |
3158 | * address to the stack for leaf functions. | 3164 | * address to the stack for leaf functions. |
3159 | */ | 3165 | */ |
3160 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; | 3166 | if (!meta->jmp_dst) { |
3161 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, | 3167 | pr_err("BUG: BPF-to-BPF call has no destination recorded\n"); |
3162 | RELO_BR_GO_CALL_PUSH_REGS); | 3168 | return -ELOOP; |
3163 | offset_br = nfp_prog_current_offset(nfp_prog); | 3169 | } |
3164 | wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); | 3170 | if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) { |
3171 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; | ||
3172 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, | ||
3173 | RELO_BR_GO_CALL_PUSH_REGS); | ||
3174 | offset_br = nfp_prog_current_offset(nfp_prog); | ||
3175 | wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); | ||
3176 | } else { | ||
3177 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; | ||
3178 | emit_br(nfp_prog, BR_UNC, meta->n + 1 + meta->insn.imm, 1); | ||
3179 | offset_br = nfp_prog_current_offset(nfp_prog); | ||
3180 | } | ||
3165 | wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); | 3181 | wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); |
3166 | 3182 | ||
3167 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) | 3183 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) |
@@ -3227,15 +3243,24 @@ static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
3227 | static int | 3243 | static int |
3228 | nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | 3244 | nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3229 | { | 3245 | { |
3230 | /* Pop R6~R9 to the stack via related subroutine. | 3246 | if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) { |
3231 | * Pop return address for BPF-to-BPF call from the stack and load it | 3247 | /* Pop R6~R9 to the stack via related subroutine. |
3232 | * into ret_reg() before we jump. This means that the subroutine does | 3248 | * We loaded the return address to the caller into ret_reg(). |
3233 | * not come back here, we make it jump back to the subprogram caller | 3249 | * This means that the subroutine does not come back here, we |
3234 | * directly! | 3250 | * make it jump back to the subprogram caller directly! |
3235 | */ | 3251 | */ |
3236 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, | 3252 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, |
3237 | RELO_BR_GO_CALL_POP_REGS); | 3253 | RELO_BR_GO_CALL_POP_REGS); |
3238 | wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); | 3254 | /* Pop return address from the stack. */ |
3255 | wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); | ||
3256 | } else { | ||
3257 | /* Pop return address from the stack. */ | ||
3258 | wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); | ||
3259 | /* Jump back to caller if no callee-saved registers were used | ||
3260 | * by the subprogram. | ||
3261 | */ | ||
3262 | emit_rtn(nfp_prog, ret_reg(nfp_prog), 0); | ||
3263 | } | ||
3239 | 3264 | ||
3240 | return 0; | 3265 | return 0; |
3241 | } | 3266 | } |
@@ -3410,7 +3435,8 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) | |||
3410 | return -ELOOP; | 3435 | return -ELOOP; |
3411 | } | 3436 | } |
3412 | 3437 | ||
3413 | if (is_mbpf_pseudo_call(meta)) { | 3438 | if (is_mbpf_pseudo_call(meta) && |
3439 | nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) { | ||
3414 | err = nfp_fixup_immed_relo(nfp_prog, meta, | 3440 | err = nfp_fixup_immed_relo(nfp_prog, meta, |
3415 | jmp_dst, br_idx); | 3441 | jmp_dst, br_idx); |
3416 | if (err) | 3442 | if (err) |
@@ -3549,6 +3575,17 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) | |||
3549 | emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); | 3575 | emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); |
3550 | } | 3576 | } |
3551 | 3577 | ||
3578 | static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog) | ||
3579 | { | ||
3580 | unsigned int idx; | ||
3581 | |||
3582 | for (idx = 1; idx < nfp_prog->subprog_cnt; idx++) | ||
3583 | if (nfp_prog->subprog[idx].needs_reg_push) | ||
3584 | return true; | ||
3585 | |||
3586 | return false; | ||
3587 | } | ||
3588 | |||
3552 | static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) | 3589 | static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) |
3553 | { | 3590 | { |
3554 | u8 reg; | 3591 | u8 reg; |
@@ -3612,7 +3649,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog) | |||
3612 | WARN_ON(1); | 3649 | WARN_ON(1); |
3613 | } | 3650 | } |
3614 | 3651 | ||
3615 | if (nfp_prog->subprog_cnt == 1) | 3652 | if (!nfp_prog_needs_callee_reg_save(nfp_prog)) |
3616 | return; | 3653 | return; |
3617 | 3654 | ||
3618 | nfp_push_callee_registers(nfp_prog); | 3655 | nfp_push_callee_registers(nfp_prog); |
@@ -4354,10 +4391,20 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) | |||
4354 | nfp_prog->tgt_abort + bv->start_off); | 4391 | nfp_prog->tgt_abort + bv->start_off); |
4355 | break; | 4392 | break; |
4356 | case RELO_BR_GO_CALL_PUSH_REGS: | 4393 | case RELO_BR_GO_CALL_PUSH_REGS: |
4394 | if (!nfp_prog->tgt_call_push_regs) { | ||
4395 | pr_err("BUG: failed to detect subprogram registers needs\n"); | ||
4396 | err = -EINVAL; | ||
4397 | goto err_free_prog; | ||
4398 | } | ||
4357 | off = nfp_prog->tgt_call_push_regs + bv->start_off; | 4399 | off = nfp_prog->tgt_call_push_regs + bv->start_off; |
4358 | br_set_offset(&prog[i], off); | 4400 | br_set_offset(&prog[i], off); |
4359 | break; | 4401 | break; |
4360 | case RELO_BR_GO_CALL_POP_REGS: | 4402 | case RELO_BR_GO_CALL_POP_REGS: |
4403 | if (!nfp_prog->tgt_call_pop_regs) { | ||
4404 | pr_err("BUG: failed to detect subprogram registers needs\n"); | ||
4405 | err = -EINVAL; | ||
4406 | goto err_free_prog; | ||
4407 | } | ||
4361 | off = nfp_prog->tgt_call_pop_regs + bv->start_off; | 4408 | off = nfp_prog->tgt_call_pop_regs + bv->start_off; |
4362 | br_set_offset(&prog[i], off); | 4409 | br_set_offset(&prog[i], off); |
4363 | break; | 4410 | break; |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 1cef5136c198..44b787a0bd4b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h | |||
@@ -452,9 +452,11 @@ static inline bool is_mbpf_pseudo_call(const struct nfp_insn_meta *meta) | |||
452 | /** | 452 | /** |
453 | * struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info | 453 | * struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info |
454 | * @stack_depth: maximum stack depth used by this sub-program | 454 | * @stack_depth: maximum stack depth used by this sub-program |
455 | * @needs_reg_push: whether sub-program uses callee-saved registers | ||
455 | */ | 456 | */ |
456 | struct nfp_bpf_subprog_info { | 457 | struct nfp_bpf_subprog_info { |
457 | u16 stack_depth; | 458 | u16 stack_depth; |
459 | u8 needs_reg_push : 1; | ||
458 | }; | 460 | }; |
459 | 461 | ||
460 | /** | 462 | /** |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 81a463726d55..f31721bd1fac 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c | |||
@@ -644,7 +644,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) | |||
644 | } | 644 | } |
645 | 645 | ||
646 | static int | 646 | static int |
647 | nfp_assign_subprog_idx(struct bpf_verifier_env *env, struct nfp_prog *nfp_prog) | 647 | nfp_assign_subprog_idx_and_regs(struct bpf_verifier_env *env, |
648 | struct nfp_prog *nfp_prog) | ||
648 | { | 649 | { |
649 | struct nfp_insn_meta *meta; | 650 | struct nfp_insn_meta *meta; |
650 | int index = 0; | 651 | int index = 0; |
@@ -653,6 +654,10 @@ nfp_assign_subprog_idx(struct bpf_verifier_env *env, struct nfp_prog *nfp_prog) | |||
653 | if (nfp_is_subprog_start(meta)) | 654 | if (nfp_is_subprog_start(meta)) |
654 | index++; | 655 | index++; |
655 | meta->subprog_idx = index; | 656 | meta->subprog_idx = index; |
657 | |||
658 | if (meta->insn.dst_reg >= BPF_REG_6 && | ||
659 | meta->insn.dst_reg <= BPF_REG_9) | ||
660 | nfp_prog->subprog[index].needs_reg_push = 1; | ||
656 | } | 661 | } |
657 | 662 | ||
658 | if (index + 1 != nfp_prog->subprog_cnt) { | 663 | if (index + 1 != nfp_prog->subprog_cnt) { |
@@ -734,7 +739,7 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env) | |||
734 | if (!nfp_prog->subprog) | 739 | if (!nfp_prog->subprog) |
735 | return -ENOMEM; | 740 | return -ENOMEM; |
736 | 741 | ||
737 | nfp_assign_subprog_idx(env, nfp_prog); | 742 | nfp_assign_subprog_idx_and_regs(env, nfp_prog); |
738 | 743 | ||
739 | info = env->subprog_info; | 744 | info = env->subprog_info; |
740 | for (i = 0; i < nfp_prog->subprog_cnt; i++) { | 745 | for (i = 0; i < nfp_prog->subprog_cnt; i++) { |
@@ -745,8 +750,9 @@ static int nfp_bpf_finalize(struct bpf_verifier_env *env) | |||
745 | 750 | ||
746 | /* Account for size of return address. */ | 751 | /* Account for size of return address. */ |
747 | nfp_prog->subprog[i].stack_depth += REG_WIDTH; | 752 | nfp_prog->subprog[i].stack_depth += REG_WIDTH; |
748 | /* Account for size of saved registers. */ | 753 | /* Account for size of saved registers, if necessary. */ |
749 | nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4; | 754 | if (nfp_prog->subprog[i].needs_reg_push) |
755 | nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4; | ||
750 | } | 756 | } |
751 | 757 | ||
752 | nn = netdev_priv(env->prog->aux->offload->netdev); | 758 | nn = netdev_priv(env->prog->aux->offload->netdev); |