aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <jakub.kicinski@netronome.com>2018-03-28 20:48:36 -0400
committerAlexei Starovoitov <ast@kernel.org>2018-03-28 22:36:14 -0400
commit41aed09cf61c00ef6c3b2648d5a193cbaf2a74d0 (patch)
tree7ba8625d79f116af1aca358f991d9b7fb1fcdfb4
parentb556ddd9c19983f3f13ab0d524f884349fead115 (diff)
nfp: bpf: add support for atomic add of unknown values
Allow atomic add to be used even when the value is not guaranteed to fit into a 16 bit immediate. This requires the value to be pulled as data, and therefore use of a transfer register and a context swap. Track the information about possible lengths of the value, if it's guaranteed to be larger than 16bits don't generate the code for the optimized case at all. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com> Reviewed-by: Jiong Wang <jiong.wang@netronome.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c78
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h7
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c14
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h3
5 files changed, 88 insertions, 15 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index db73f56de59a..62431a0aa0f5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2125,12 +2125,49 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2125static int 2125static int
2126mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) 2126mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2127{ 2127{
2128 swreg addra, addrb, off, prev_alu = imm_a(nfp_prog);
2129 u8 dst_gpr = meta->insn.dst_reg * 2; 2128 u8 dst_gpr = meta->insn.dst_reg * 2;
2130 u8 src_gpr = meta->insn.src_reg * 2; 2129 u8 src_gpr = meta->insn.src_reg * 2;
2130 unsigned int full_add, out;
2131 swreg addra, addrb, off;
2131 2132
2132 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2133 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2133 2134
2135 /* We can fit 16 bits into command immediate, if we know the immediate
2136 * is guaranteed to either always or never fit into 16 bit we only
2137 * generate code to handle that particular case, otherwise generate
2138 * code for both.
2139 */
2140 out = nfp_prog_current_offset(nfp_prog);
2141 full_add = nfp_prog_current_offset(nfp_prog);
2142
2143 if (meta->insn.off) {
2144 out += 2;
2145 full_add += 2;
2146 }
2147 if (meta->xadd_maybe_16bit) {
2148 out += 3;
2149 full_add += 3;
2150 }
2151 if (meta->xadd_over_16bit)
2152 out += 2 + is64;
2153 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2154 out += 5;
2155 full_add += 5;
2156 }
2157
2158 /* Generate the branch for choosing add_imm vs add */
2159 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2160 swreg max_imm = imm_a(nfp_prog);
2161
2162 wrp_immed(nfp_prog, max_imm, 0xffff);
2163 emit_alu(nfp_prog, reg_none(),
2164 max_imm, ALU_OP_SUB, reg_b(src_gpr));
2165 emit_alu(nfp_prog, reg_none(),
2166 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
2167 emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
2168 /* defer for add */
2169 }
2170
2134 /* If insn has an offset add to the address */ 2171 /* If insn has an offset add to the address */
2135 if (!meta->insn.off) { 2172 if (!meta->insn.off) {
2136 addra = reg_a(dst_gpr); 2173 addra = reg_a(dst_gpr);
@@ -2144,13 +2181,38 @@ mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2144 addrb = imma_b(nfp_prog); 2181 addrb = imma_b(nfp_prog);
2145 } 2182 }
2146 2183
2147 wrp_immed(nfp_prog, prev_alu, 2184 /* Generate the add_imm if 16 bits are possible */
2148 FIELD_PREP(CMD_OVE_DATA, 2) | 2185 if (meta->xadd_maybe_16bit) {
2149 CMD_OVE_LEN | 2186 swreg prev_alu = imm_a(nfp_prog);
2150 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); 2187
2151 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); 2188 wrp_immed(nfp_prog, prev_alu,
2152 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, 2189 FIELD_PREP(CMD_OVE_DATA, 2) |
2153 addra, addrb, 0, CMD_CTX_NO_SWAP); 2190 CMD_OVE_LEN |
2191 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
2192 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
2193 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
2194 addra, addrb, 0, CMD_CTX_NO_SWAP);
2195
2196 if (meta->xadd_over_16bit)
2197 emit_br(nfp_prog, BR_UNC, out, 0);
2198 }
2199
2200 if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
2201 return -EINVAL;
2202
2203 /* Generate the add if 16 bits are not guaranteed */
2204 if (meta->xadd_over_16bit) {
2205 emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
2206 addra, addrb, is64 << 2,
2207 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
2208
2209 wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
2210 if (is64)
2211 wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
2212 }
2213
2214 if (!nfp_prog_confirm_current_offset(nfp_prog, out))
2215 return -EINVAL;
2154 2216
2155 return 0; 2217 return 0;
2156} 2218}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 877be7143991..a73b86c6ce52 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -229,6 +229,8 @@ struct nfp_bpf_reg_state {
229 * @pkt_cache.range_start: start offset for associated packet data cache 229 * @pkt_cache.range_start: start offset for associated packet data cache
230 * @pkt_cache.range_end: end offset for associated packet data cache 230 * @pkt_cache.range_end: end offset for associated packet data cache
231 * @pkt_cache.do_init: this read needs to initialize packet data cache 231 * @pkt_cache.do_init: this read needs to initialize packet data cache
232 * @xadd_over_16bit: 16bit immediate is not guaranteed
233 * @xadd_maybe_16bit: 16bit immediate is possible
232 * @jmp_dst: destination info for jump instructions 234 * @jmp_dst: destination info for jump instructions
233 * @func_id: function id for call instructions 235 * @func_id: function id for call instructions
234 * @arg1: arg1 for call instructions 236 * @arg1: arg1 for call instructions
@@ -243,6 +245,7 @@ struct nfp_bpf_reg_state {
243struct nfp_insn_meta { 245struct nfp_insn_meta {
244 struct bpf_insn insn; 246 struct bpf_insn insn;
245 union { 247 union {
248 /* pointer ops (ld/st/xadd) */
246 struct { 249 struct {
247 struct bpf_reg_state ptr; 250 struct bpf_reg_state ptr;
248 struct bpf_insn *paired_st; 251 struct bpf_insn *paired_st;
@@ -253,8 +256,12 @@ struct nfp_insn_meta {
253 s16 range_end; 256 s16 range_end;
254 bool do_init; 257 bool do_init;
255 } pkt_cache; 258 } pkt_cache;
259 bool xadd_over_16bit;
260 bool xadd_maybe_16bit;
256 }; 261 };
262 /* jump */
257 struct nfp_insn_meta *jmp_dst; 263 struct nfp_insn_meta *jmp_dst;
264 /* function calls */
258 struct { 265 struct {
259 u32 func_id; 266 u32 func_id;
260 struct bpf_reg_state arg1; 267 struct bpf_reg_state arg1;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 40619efea77d..486ffd1d5913 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -414,16 +414,16 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
414 dreg->type); 414 dreg->type);
415 return -EOPNOTSUPP; 415 return -EOPNOTSUPP;
416 } 416 }
417 if (sreg->type != SCALAR_VALUE || 417 if (sreg->type != SCALAR_VALUE) {
418 sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff) { 418 pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type);
419 char tn_buf[48];
420
421 tnum_strn(tn_buf, sizeof(tn_buf), sreg->var_off);
422 pr_vlog(env, "atomic add not of a small constant scalar: %s\n",
423 tn_buf);
424 return -EOPNOTSUPP; 419 return -EOPNOTSUPP;
425 } 420 }
426 421
422 meta->xadd_over_16bit |=
423 sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff;
424 meta->xadd_maybe_16bit |=
425 (sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff;
426
427 return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); 427 return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
428} 428}
429 429
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index 3c0107ac9a2c..cc6ace2be8a9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -48,6 +48,7 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
48 [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c }, 48 [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c },
49 [CMD_TGT_READ_LE] = { 0x01, 0x40 }, 49 [CMD_TGT_READ_LE] = { 0x01, 0x40 },
50 [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, 50 [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
51 [CMD_TGT_ADD] = { 0x00, 0x47 },
51 [CMD_TGT_ADD_IMM] = { 0x02, 0x47 }, 52 [CMD_TGT_ADD_IMM] = { 0x02, 0x47 },
52}; 53};
53 54
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 185192590a17..36524dd6021b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -238,6 +238,7 @@ enum cmd_tgt_map {
238 CMD_TGT_READ32_SWAP, 238 CMD_TGT_READ32_SWAP,
239 CMD_TGT_READ_LE, 239 CMD_TGT_READ_LE,
240 CMD_TGT_READ_SWAP_LE, 240 CMD_TGT_READ_SWAP_LE,
241 CMD_TGT_ADD,
241 CMD_TGT_ADD_IMM, 242 CMD_TGT_ADD_IMM,
242 __CMD_TGT_MAP_SIZE, 243 __CMD_TGT_MAP_SIZE,
243}; 244};
@@ -252,6 +253,8 @@ enum cmd_mode {
252 253
253enum cmd_ctx_swap { 254enum cmd_ctx_swap {
254 CMD_CTX_SWAP = 0, 255 CMD_CTX_SWAP = 0,
256 CMD_CTX_SWAP_DEFER1 = 1,
257 CMD_CTX_SWAP_DEFER2 = 2,
255 CMD_CTX_NO_SWAP = 3, 258 CMD_CTX_NO_SWAP = 3,
256}; 259};
257 260