summaryrefslogtreecommitdiffstats
path: root/arch/sparc/net
diff options
context:
space:
mode:
authorDavid Miller <davem@davemloft.net>2018-11-26 16:03:46 -0500
committerAlexei Starovoitov <ast@kernel.org>2018-11-26 20:48:36 -0500
commitc44768a33da81b4a0986e79bbf0588f1a0651dec (patch)
tree2a4e4f7f9205412e7f84e2d1101fa6ca78cd7931 /arch/sparc/net
parentfdac315d705a97e9edff1a40889fefd2df594422 (diff)
sparc: Fix JIT fused branch convergance.
On T4 and later sparc64 cpus we can use the fused compare and branch instruction. However, it can only be used if the branch destination is in the range of a signed 10-bit immediate offset. This amounts to 1024 instructions forwards or backwards. After the commit referenced in the Fixes: tag, the largest possible size program seen by the JIT explodes by a significant factor. As a result of this convergance takes many more passes since the expanded "BPF_LDX | BPF_MSH | BPF_B" code sequence, for example, contains several embedded branch on condition instructions. On each pass, as suddenly new fused compare and branch instances become valid, this makes thousands more in range for the next pass. And so on and so forth. This is most greatly exemplified by "BPF_MAXINSNS: exec all MSH" which takes 35 passes to converge, and shrinks the image by about 64K. To decrease the cost of this number of convergance passes, do the convergance pass before we have the program image allocated, just like other JITs (such as x86) do. Fixes: e0cea7ce988c ("bpf: implement ld_abs/ld_ind in native bpf") Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'arch/sparc/net')
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c77
1 files changed, 49 insertions, 28 deletions
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 222785af550b..7217d6359643 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1425,12 +1425,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1425 struct bpf_prog *tmp, *orig_prog = prog; 1425 struct bpf_prog *tmp, *orig_prog = prog;
1426 struct sparc64_jit_data *jit_data; 1426 struct sparc64_jit_data *jit_data;
1427 struct bpf_binary_header *header; 1427 struct bpf_binary_header *header;
1428 u32 prev_image_size, image_size;
1428 bool tmp_blinded = false; 1429 bool tmp_blinded = false;
1429 bool extra_pass = false; 1430 bool extra_pass = false;
1430 struct jit_ctx ctx; 1431 struct jit_ctx ctx;
1431 u32 image_size;
1432 u8 *image_ptr; 1432 u8 *image_ptr;
1433 int pass; 1433 int pass, i;
1434 1434
1435 if (!prog->jit_requested) 1435 if (!prog->jit_requested)
1436 return orig_prog; 1436 return orig_prog;
@@ -1461,61 +1461,82 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1461 header = jit_data->header; 1461 header = jit_data->header;
1462 extra_pass = true; 1462 extra_pass = true;
1463 image_size = sizeof(u32) * ctx.idx; 1463 image_size = sizeof(u32) * ctx.idx;
1464 prev_image_size = image_size;
1465 pass = 1;
1464 goto skip_init_ctx; 1466 goto skip_init_ctx;
1465 } 1467 }
1466 1468
1467 memset(&ctx, 0, sizeof(ctx)); 1469 memset(&ctx, 0, sizeof(ctx));
1468 ctx.prog = prog; 1470 ctx.prog = prog;
1469 1471
1470 ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL); 1472 ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL);
1471 if (ctx.offset == NULL) { 1473 if (ctx.offset == NULL) {
1472 prog = orig_prog; 1474 prog = orig_prog;
1473 goto out_off; 1475 goto out_off;
1474 } 1476 }
1475 1477
1476 /* Fake pass to detect features used, and get an accurate assessment 1478 /* Longest sequence emitted is for bswap32, 12 instructions. Pre-cook
1477 * of what the final image size will be. 1479 * the offset array so that we converge faster.
1478 */ 1480 */
1479 if (build_body(&ctx)) { 1481 for (i = 0; i < prog->len; i++)
1480 prog = orig_prog; 1482 ctx.offset[i] = i * (12 * 4);
1481 goto out_off;
1482 }
1483 build_prologue(&ctx);
1484 build_epilogue(&ctx);
1485 1483
1486 /* Now we know the actual image size. */ 1484 prev_image_size = ~0U;
1487 image_size = sizeof(u32) * ctx.idx; 1485 for (pass = 1; pass < 40; pass++) {
1488 header = bpf_jit_binary_alloc(image_size, &image_ptr,
1489 sizeof(u32), jit_fill_hole);
1490 if (header == NULL) {
1491 prog = orig_prog;
1492 goto out_off;
1493 }
1494
1495 ctx.image = (u32 *)image_ptr;
1496skip_init_ctx:
1497 for (pass = 1; pass < 3; pass++) {
1498 ctx.idx = 0; 1486 ctx.idx = 0;
1499 1487
1500 build_prologue(&ctx); 1488 build_prologue(&ctx);
1501
1502 if (build_body(&ctx)) { 1489 if (build_body(&ctx)) {
1503 bpf_jit_binary_free(header);
1504 prog = orig_prog; 1490 prog = orig_prog;
1505 goto out_off; 1491 goto out_off;
1506 } 1492 }
1507
1508 build_epilogue(&ctx); 1493 build_epilogue(&ctx);
1509 1494
1510 if (bpf_jit_enable > 1) 1495 if (bpf_jit_enable > 1)
1511 pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass, 1496 pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass,
1512 image_size - (ctx.idx * 4), 1497 ctx.idx * 4,
1513 ctx.tmp_1_used ? '1' : ' ', 1498 ctx.tmp_1_used ? '1' : ' ',
1514 ctx.tmp_2_used ? '2' : ' ', 1499 ctx.tmp_2_used ? '2' : ' ',
1515 ctx.tmp_3_used ? '3' : ' ', 1500 ctx.tmp_3_used ? '3' : ' ',
1516 ctx.saw_frame_pointer ? 'F' : ' ', 1501 ctx.saw_frame_pointer ? 'F' : ' ',
1517 ctx.saw_call ? 'C' : ' ', 1502 ctx.saw_call ? 'C' : ' ',
1518 ctx.saw_tail_call ? 'T' : ' '); 1503 ctx.saw_tail_call ? 'T' : ' ');
1504
1505 if (ctx.idx * 4 == prev_image_size)
1506 break;
1507 prev_image_size = ctx.idx * 4;
1508 cond_resched();
1509 }
1510
1511 /* Now we know the actual image size. */
1512 image_size = sizeof(u32) * ctx.idx;
1513 header = bpf_jit_binary_alloc(image_size, &image_ptr,
1514 sizeof(u32), jit_fill_hole);
1515 if (header == NULL) {
1516 prog = orig_prog;
1517 goto out_off;
1518 }
1519
1520 ctx.image = (u32 *)image_ptr;
1521skip_init_ctx:
1522 ctx.idx = 0;
1523
1524 build_prologue(&ctx);
1525
1526 if (build_body(&ctx)) {
1527 bpf_jit_binary_free(header);
1528 prog = orig_prog;
1529 goto out_off;
1530 }
1531
1532 build_epilogue(&ctx);
1533
1534 if (ctx.idx * 4 != prev_image_size) {
1535 pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n",
1536 prev_image_size, ctx.idx * 4);
1537 bpf_jit_binary_free(header);
1538 prog = orig_prog;
1539 goto out_off;
1519 } 1540 }
1520 1541
1521 if (bpf_jit_enable > 1) 1542 if (bpf_jit_enable > 1)