diff options
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/arm/net/bpf_jit_32.c | 77 | ||||
| -rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 65 | ||||
| -rw-r--r-- | arch/mips/net/ebpf_jit.c | 104 | ||||
| -rw-r--r-- | arch/powerpc/net/Makefile | 2 | ||||
| -rw-r--r-- | arch/powerpc/net/bpf_jit64.h | 37 | ||||
| -rw-r--r-- | arch/powerpc/net/bpf_jit_asm64.S | 180 | ||||
| -rw-r--r-- | arch/powerpc/net/bpf_jit_comp64.c | 109 | ||||
| -rw-r--r-- | arch/s390/net/Makefile | 2 | ||||
| -rw-r--r-- | arch/s390/net/bpf_jit.S | 116 | ||||
| -rw-r--r-- | arch/s390/net/bpf_jit.h | 20 | ||||
| -rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 127 | ||||
| -rw-r--r-- | arch/sparc/net/Makefile | 5 | ||||
| -rw-r--r-- | arch/sparc/net/bpf_jit_64.h | 29 | ||||
| -rw-r--r-- | arch/sparc/net/bpf_jit_asm_64.S | 162 | ||||
| -rw-r--r-- | arch/sparc/net/bpf_jit_comp_64.c | 79 | ||||
| -rw-r--r-- | arch/x86/Kconfig | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/nospec-branch.h | 30 | ||||
| -rw-r--r-- | arch/x86/net/Makefile | 7 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit.S | 154 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 343 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp32.c | 2419 |
21 files changed, 2602 insertions, 1467 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index b5030e1a41d8..82689b999257 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c | |||
| @@ -1452,83 +1452,6 @@ exit: | |||
| 1452 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); | 1452 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); |
| 1453 | emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); | 1453 | emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); |
| 1454 | break; | 1454 | break; |
| 1455 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
| 1456 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 1457 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 1458 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 1459 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
| 1460 | case BPF_LD | BPF_IND | BPF_W: | ||
| 1461 | case BPF_LD | BPF_IND | BPF_H: | ||
| 1462 | case BPF_LD | BPF_IND | BPF_B: | ||
| 1463 | { | ||
| 1464 | const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */ | ||
| 1465 | const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/ | ||
| 1466 | /* rtn value */ | ||
| 1467 | const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */ | ||
| 1468 | const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */ | ||
| 1469 | const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */ | ||
| 1470 | const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */ | ||
| 1471 | int size; | ||
| 1472 | |||
| 1473 | /* Setting up first argument */ | ||
| 1474 | emit(ARM_MOV_R(r0, r4), ctx); | ||
| 1475 | |||
| 1476 | /* Setting up second argument */ | ||
| 1477 | emit_a32_mov_i(r1, imm, false, ctx); | ||
| 1478 | if (BPF_MODE(code) == BPF_IND) | ||
| 1479 | emit_a32_alu_r(r1, src_lo, false, sstk, ctx, | ||
| 1480 | false, false, BPF_ADD); | ||
| 1481 | |||
| 1482 | /* Setting up third argument */ | ||
| 1483 | switch (BPF_SIZE(code)) { | ||
| 1484 | case BPF_W: | ||
| 1485 | size = 4; | ||
| 1486 | break; | ||
| 1487 | case BPF_H: | ||
| 1488 | size = 2; | ||
| 1489 | break; | ||
| 1490 | case BPF_B: | ||
| 1491 | size = 1; | ||
| 1492 | break; | ||
| 1493 | default: | ||
| 1494 | return -EINVAL; | ||
| 1495 | } | ||
| 1496 | emit_a32_mov_i(r2, size, false, ctx); | ||
| 1497 | |||
| 1498 | /* Setting up fourth argument */ | ||
| 1499 | emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx); | ||
| 1500 | |||
| 1501 | /* Setting up function pointer to call */ | ||
| 1502 | emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx); | ||
| 1503 | emit_blx_r(r6, ctx); | ||
| 1504 | |||
| 1505 | emit(ARM_EOR_R(r1, r1, r1), ctx); | ||
| 1506 | /* Check if return address is NULL or not. | ||
| 1507 | * if NULL then jump to epilogue | ||
| 1508 | * else continue to load the value from retn address | ||
| 1509 | */ | ||
| 1510 | emit(ARM_CMP_I(r0, 0), ctx); | ||
| 1511 | jmp_offset = epilogue_offset(ctx); | ||
| 1512 | check_imm24(jmp_offset); | ||
| 1513 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | ||
| 1514 | |||
| 1515 | /* Load value from the address */ | ||
| 1516 | switch (BPF_SIZE(code)) { | ||
| 1517 | case BPF_W: | ||
| 1518 | emit(ARM_LDR_I(r0, r0, 0), ctx); | ||
| 1519 | emit_rev32(r0, r0, ctx); | ||
| 1520 | break; | ||
| 1521 | case BPF_H: | ||
| 1522 | emit(ARM_LDRH_I(r0, r0, 0), ctx); | ||
| 1523 | emit_rev16(r0, r0, ctx); | ||
| 1524 | break; | ||
| 1525 | case BPF_B: | ||
| 1526 | emit(ARM_LDRB_I(r0, r0, 0), ctx); | ||
| 1527 | /* No need to reverse */ | ||
| 1528 | break; | ||
| 1529 | } | ||
| 1530 | break; | ||
| 1531 | } | ||
| 1532 | /* ST: *(size *)(dst + off) = imm */ | 1455 | /* ST: *(size *)(dst + off) = imm */ |
| 1533 | case BPF_ST | BPF_MEM | BPF_W: | 1456 | case BPF_ST | BPF_MEM | BPF_W: |
| 1534 | case BPF_ST | BPF_MEM | BPF_H: | 1457 | case BPF_ST | BPF_MEM | BPF_H: |
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a93350451e8e..0b40c8fb0706 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c | |||
| @@ -723,71 +723,6 @@ emit_cond_jmp: | |||
| 723 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); | 723 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); |
| 724 | break; | 724 | break; |
| 725 | 725 | ||
| 726 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
| 727 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 728 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 729 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 730 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
| 731 | case BPF_LD | BPF_IND | BPF_W: | ||
| 732 | case BPF_LD | BPF_IND | BPF_H: | ||
| 733 | case BPF_LD | BPF_IND | BPF_B: | ||
| 734 | { | ||
| 735 | const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */ | ||
| 736 | const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */ | ||
| 737 | const u8 fp = bpf2a64[BPF_REG_FP]; | ||
| 738 | const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */ | ||
| 739 | const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */ | ||
| 740 | const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */ | ||
| 741 | const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */ | ||
| 742 | const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */ | ||
| 743 | int size; | ||
| 744 | |||
| 745 | emit(A64_MOV(1, r1, r6), ctx); | ||
| 746 | emit_a64_mov_i(0, r2, imm, ctx); | ||
| 747 | if (BPF_MODE(code) == BPF_IND) | ||
| 748 | emit(A64_ADD(0, r2, r2, src), ctx); | ||
| 749 | switch (BPF_SIZE(code)) { | ||
| 750 | case BPF_W: | ||
| 751 | size = 4; | ||
| 752 | break; | ||
| 753 | case BPF_H: | ||
| 754 | size = 2; | ||
| 755 | break; | ||
| 756 | case BPF_B: | ||
| 757 | size = 1; | ||
| 758 | break; | ||
| 759 | default: | ||
| 760 | return -EINVAL; | ||
| 761 | } | ||
| 762 | emit_a64_mov_i64(r3, size, ctx); | ||
| 763 | emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx); | ||
| 764 | emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx); | ||
| 765 | emit(A64_BLR(r5), ctx); | ||
| 766 | emit(A64_MOV(1, r0, A64_R(0)), ctx); | ||
| 767 | |||
| 768 | jmp_offset = epilogue_offset(ctx); | ||
| 769 | check_imm19(jmp_offset); | ||
| 770 | emit(A64_CBZ(1, r0, jmp_offset), ctx); | ||
| 771 | emit(A64_MOV(1, r5, r0), ctx); | ||
| 772 | switch (BPF_SIZE(code)) { | ||
| 773 | case BPF_W: | ||
| 774 | emit(A64_LDR32(r0, r5, A64_ZR), ctx); | ||
| 775 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
| 776 | emit(A64_REV32(0, r0, r0), ctx); | ||
| 777 | #endif | ||
| 778 | break; | ||
| 779 | case BPF_H: | ||
| 780 | emit(A64_LDRH(r0, r5, A64_ZR), ctx); | ||
| 781 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
| 782 | emit(A64_REV16(0, r0, r0), ctx); | ||
| 783 | #endif | ||
| 784 | break; | ||
| 785 | case BPF_B: | ||
| 786 | emit(A64_LDRB(r0, r5, A64_ZR), ctx); | ||
| 787 | break; | ||
| 788 | } | ||
| 789 | break; | ||
| 790 | } | ||
| 791 | default: | 726 | default: |
| 792 | pr_err_once("unknown opcode %02x\n", code); | 727 | pr_err_once("unknown opcode %02x\n", code); |
| 793 | return -EINVAL; | 728 | return -EINVAL; |
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 3e2798bfea4f..7ba7df9c28fc 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c | |||
| @@ -1267,110 +1267,6 @@ jeq_common: | |||
| 1267 | return -EINVAL; | 1267 | return -EINVAL; |
| 1268 | break; | 1268 | break; |
| 1269 | 1269 | ||
| 1270 | case BPF_LD | BPF_B | BPF_ABS: | ||
| 1271 | case BPF_LD | BPF_H | BPF_ABS: | ||
| 1272 | case BPF_LD | BPF_W | BPF_ABS: | ||
| 1273 | case BPF_LD | BPF_DW | BPF_ABS: | ||
| 1274 | ctx->flags |= EBPF_SAVE_RA; | ||
| 1275 | |||
| 1276 | gen_imm_to_reg(insn, MIPS_R_A1, ctx); | ||
| 1277 | emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); | ||
| 1278 | |||
| 1279 | if (insn->imm < 0) { | ||
| 1280 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper); | ||
| 1281 | } else { | ||
| 1282 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); | ||
| 1283 | emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); | ||
| 1284 | } | ||
| 1285 | goto ld_skb_common; | ||
| 1286 | |||
| 1287 | case BPF_LD | BPF_B | BPF_IND: | ||
| 1288 | case BPF_LD | BPF_H | BPF_IND: | ||
| 1289 | case BPF_LD | BPF_W | BPF_IND: | ||
| 1290 | case BPF_LD | BPF_DW | BPF_IND: | ||
| 1291 | ctx->flags |= EBPF_SAVE_RA; | ||
| 1292 | src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); | ||
| 1293 | if (src < 0) | ||
| 1294 | return src; | ||
| 1295 | ts = get_reg_val_type(ctx, this_idx, insn->src_reg); | ||
| 1296 | if (ts == REG_32BIT_ZERO_EX) { | ||
| 1297 | /* sign extend */ | ||
| 1298 | emit_instr(ctx, sll, MIPS_R_A1, src, 0); | ||
| 1299 | src = MIPS_R_A1; | ||
| 1300 | } | ||
| 1301 | if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { | ||
| 1302 | emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm); | ||
| 1303 | } else { | ||
| 1304 | gen_imm_to_reg(insn, MIPS_R_AT, ctx); | ||
| 1305 | emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src); | ||
| 1306 | } | ||
| 1307 | /* truncate to 32-bit int */ | ||
| 1308 | emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0); | ||
| 1309 | emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); | ||
| 1310 | emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO); | ||
| 1311 | |||
| 1312 | emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper); | ||
| 1313 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); | ||
| 1314 | emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); | ||
| 1315 | emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT); | ||
| 1316 | |||
| 1317 | ld_skb_common: | ||
| 1318 | emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); | ||
| 1319 | /* delay slot move */ | ||
| 1320 | emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO); | ||
| 1321 | |||
| 1322 | /* Check the error value */ | ||
| 1323 | b_off = b_imm(exit_idx, ctx); | ||
| 1324 | if (is_bad_offset(b_off)) { | ||
| 1325 | target = j_target(ctx, exit_idx); | ||
| 1326 | if (target == (unsigned int)-1) | ||
| 1327 | return -E2BIG; | ||
| 1328 | |||
| 1329 | if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { | ||
| 1330 | ctx->offsets[this_idx] |= OFFSETS_B_CONV; | ||
| 1331 | ctx->long_b_conversion = 1; | ||
| 1332 | } | ||
| 1333 | emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3); | ||
| 1334 | emit_instr(ctx, nop); | ||
| 1335 | emit_instr(ctx, j, target); | ||
| 1336 | emit_instr(ctx, nop); | ||
| 1337 | } else { | ||
| 1338 | emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off); | ||
| 1339 | emit_instr(ctx, nop); | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | #ifdef __BIG_ENDIAN | ||
| 1343 | need_swap = false; | ||
| 1344 | #else | ||
| 1345 | need_swap = true; | ||
| 1346 | #endif | ||
| 1347 | dst = MIPS_R_V0; | ||
| 1348 | switch (BPF_SIZE(insn->code)) { | ||
| 1349 | case BPF_B: | ||
| 1350 | emit_instr(ctx, lbu, dst, 0, MIPS_R_V0); | ||
| 1351 | break; | ||
| 1352 | case BPF_H: | ||
| 1353 | emit_instr(ctx, lhu, dst, 0, MIPS_R_V0); | ||
| 1354 | if (need_swap) | ||
| 1355 | emit_instr(ctx, wsbh, dst, dst); | ||
| 1356 | break; | ||
| 1357 | case BPF_W: | ||
| 1358 | emit_instr(ctx, lw, dst, 0, MIPS_R_V0); | ||
| 1359 | if (need_swap) { | ||
| 1360 | emit_instr(ctx, wsbh, dst, dst); | ||
| 1361 | emit_instr(ctx, rotr, dst, dst, 16); | ||
| 1362 | } | ||
| 1363 | break; | ||
| 1364 | case BPF_DW: | ||
| 1365 | emit_instr(ctx, ld, dst, 0, MIPS_R_V0); | ||
| 1366 | if (need_swap) { | ||
| 1367 | emit_instr(ctx, dsbh, dst, dst); | ||
| 1368 | emit_instr(ctx, dshd, dst, dst); | ||
| 1369 | } | ||
| 1370 | break; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | break; | ||
| 1374 | case BPF_ALU | BPF_END | BPF_FROM_BE: | 1270 | case BPF_ALU | BPF_END | BPF_FROM_BE: |
| 1375 | case BPF_ALU | BPF_END | BPF_FROM_LE: | 1271 | case BPF_ALU | BPF_END | BPF_FROM_LE: |
| 1376 | dst = ebpf_to_mips_reg(ctx, insn, dst_reg); | 1272 | dst = ebpf_to_mips_reg(ctx, insn, dst_reg); |
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 02d369ca6a53..809f019d3cba 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | # Arch-specific network modules | 3 | # Arch-specific network modules |
| 4 | # | 4 | # |
| 5 | ifeq ($(CONFIG_PPC64),y) | 5 | ifeq ($(CONFIG_PPC64),y) |
| 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o | 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o |
| 7 | else | 7 | else |
| 8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o | 8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o |
| 9 | endif | 9 | endif |
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 8bdef7ed28a8..3609be4692b3 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * with our redzone usage. | 20 | * with our redzone usage. |
| 21 | * | 21 | * |
| 22 | * [ prev sp ] <------------- | 22 | * [ prev sp ] <------------- |
| 23 | * [ nv gpr save area ] 8*8 | | 23 | * [ nv gpr save area ] 6*8 | |
| 24 | * [ tail_call_cnt ] 8 | | 24 | * [ tail_call_cnt ] 8 | |
| 25 | * [ local_tmp_var ] 8 | | 25 | * [ local_tmp_var ] 8 | |
| 26 | * fp (r31) --> [ ebpf stack space ] upto 512 | | 26 | * fp (r31) --> [ ebpf stack space ] upto 512 | |
| @@ -28,8 +28,8 @@ | |||
| 28 | * sp (r1) ---> [ stack pointer ] -------------- | 28 | * sp (r1) ---> [ stack pointer ] -------------- |
| 29 | */ | 29 | */ |
| 30 | 30 | ||
| 31 | /* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ | 31 | /* for gpr non volatile registers BPG_REG_6 to 10 */ |
| 32 | #define BPF_PPC_STACK_SAVE (8*8) | 32 | #define BPF_PPC_STACK_SAVE (6*8) |
| 33 | /* for bpf JIT code internal usage */ | 33 | /* for bpf JIT code internal usage */ |
| 34 | #define BPF_PPC_STACK_LOCALS 16 | 34 | #define BPF_PPC_STACK_LOCALS 16 |
| 35 | /* stack frame excluding BPF stack, ensure this is quadword aligned */ | 35 | /* stack frame excluding BPF stack, ensure this is quadword aligned */ |
| @@ -39,10 +39,8 @@ | |||
| 39 | #ifndef __ASSEMBLY__ | 39 | #ifndef __ASSEMBLY__ |
| 40 | 40 | ||
| 41 | /* BPF register usage */ | 41 | /* BPF register usage */ |
| 42 | #define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0) | 42 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) |
| 43 | #define SKB_DATA_REG (MAX_BPF_JIT_REG + 1) | 43 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) |
| 44 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 2) | ||
| 45 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 3) | ||
| 46 | 44 | ||
| 47 | /* BPF to ppc register mappings */ | 45 | /* BPF to ppc register mappings */ |
| 48 | static const int b2p[] = { | 46 | static const int b2p[] = { |
| @@ -63,40 +61,23 @@ static const int b2p[] = { | |||
| 63 | [BPF_REG_FP] = 31, | 61 | [BPF_REG_FP] = 31, |
| 64 | /* eBPF jit internal registers */ | 62 | /* eBPF jit internal registers */ |
| 65 | [BPF_REG_AX] = 2, | 63 | [BPF_REG_AX] = 2, |
| 66 | [SKB_HLEN_REG] = 25, | ||
| 67 | [SKB_DATA_REG] = 26, | ||
| 68 | [TMP_REG_1] = 9, | 64 | [TMP_REG_1] = 9, |
| 69 | [TMP_REG_2] = 10 | 65 | [TMP_REG_2] = 10 |
| 70 | }; | 66 | }; |
| 71 | 67 | ||
| 72 | /* PPC NVR range -- update this if we ever use NVRs below r24 */ | 68 | /* PPC NVR range -- update this if we ever use NVRs below r27 */ |
| 73 | #define BPF_PPC_NVR_MIN 24 | 69 | #define BPF_PPC_NVR_MIN 27 |
| 74 | |||
| 75 | /* Assembly helpers */ | ||
| 76 | #define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ | ||
| 77 | u64 func##_negative_offset(u64 r3, u64 r4); \ | ||
| 78 | u64 func##_positive_offset(u64 r3, u64 r4); | ||
| 79 | |||
| 80 | DECLARE_LOAD_FUNC(sk_load_word); | ||
| 81 | DECLARE_LOAD_FUNC(sk_load_half); | ||
| 82 | DECLARE_LOAD_FUNC(sk_load_byte); | ||
| 83 | |||
| 84 | #define CHOOSE_LOAD_FUNC(imm, func) \ | ||
| 85 | (imm < 0 ? \ | ||
| 86 | (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \ | ||
| 87 | func##_positive_offset) | ||
| 88 | 70 | ||
| 89 | #define SEEN_FUNC 0x1000 /* might call external helpers */ | 71 | #define SEEN_FUNC 0x1000 /* might call external helpers */ |
| 90 | #define SEEN_STACK 0x2000 /* uses BPF stack */ | 72 | #define SEEN_STACK 0x2000 /* uses BPF stack */ |
| 91 | #define SEEN_SKB 0x4000 /* uses sk_buff */ | 73 | #define SEEN_TAILCALL 0x4000 /* uses tail calls */ |
| 92 | #define SEEN_TAILCALL 0x8000 /* uses tail calls */ | ||
| 93 | 74 | ||
| 94 | struct codegen_context { | 75 | struct codegen_context { |
| 95 | /* | 76 | /* |
| 96 | * This is used to track register usage as well | 77 | * This is used to track register usage as well |
| 97 | * as calls to external helpers. | 78 | * as calls to external helpers. |
| 98 | * - register usage is tracked with corresponding | 79 | * - register usage is tracked with corresponding |
| 99 | * bits (r3-r10 and r25-r31) | 80 | * bits (r3-r10 and r27-r31) |
| 100 | * - rest of the bits can be used to track other | 81 | * - rest of the bits can be used to track other |
| 101 | * things -- for now, we use bits 16 to 23 | 82 | * things -- for now, we use bits 16 to 23 |
| 102 | * encoded in SEEN_* macros above | 83 | * encoded in SEEN_* macros above |
diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S deleted file mode 100644 index 7e4c51430b84..000000000000 --- a/arch/powerpc/net/bpf_jit_asm64.S +++ /dev/null | |||
| @@ -1,180 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * bpf_jit_asm64.S: Packet/header access helper functions | ||
| 3 | * for PPC64 BPF compiler. | ||
| 4 | * | ||
| 5 | * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | ||
| 6 | * IBM Corporation | ||
| 7 | * | ||
| 8 | * Based on bpf_jit_asm.S by Matt Evans | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public License | ||
| 12 | * as published by the Free Software Foundation; version 2 | ||
| 13 | * of the License. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <asm/ppc_asm.h> | ||
| 17 | #include <asm/ptrace.h> | ||
| 18 | #include "bpf_jit64.h" | ||
| 19 | |||
| 20 | /* | ||
| 21 | * All of these routines are called directly from generated code, | ||
| 22 | * with the below register usage: | ||
| 23 | * r27 skb pointer (ctx) | ||
| 24 | * r25 skb header length | ||
| 25 | * r26 skb->data pointer | ||
| 26 | * r4 offset | ||
| 27 | * | ||
| 28 | * Result is passed back in: | ||
| 29 | * r8 data read in host endian format (accumulator) | ||
| 30 | * | ||
| 31 | * r9 is used as a temporary register | ||
| 32 | */ | ||
| 33 | |||
| 34 | #define r_skb r27 | ||
| 35 | #define r_hlen r25 | ||
| 36 | #define r_data r26 | ||
| 37 | #define r_off r4 | ||
| 38 | #define r_val r8 | ||
| 39 | #define r_tmp r9 | ||
| 40 | |||
| 41 | _GLOBAL_TOC(sk_load_word) | ||
| 42 | cmpdi r_off, 0 | ||
| 43 | blt bpf_slow_path_word_neg | ||
| 44 | b sk_load_word_positive_offset | ||
| 45 | |||
| 46 | _GLOBAL_TOC(sk_load_word_positive_offset) | ||
| 47 | /* Are we accessing past headlen? */ | ||
| 48 | subi r_tmp, r_hlen, 4 | ||
| 49 | cmpd r_tmp, r_off | ||
| 50 | blt bpf_slow_path_word | ||
| 51 | /* Nope, just hitting the header. cr0 here is eq or gt! */ | ||
| 52 | LWZX_BE r_val, r_data, r_off | ||
| 53 | blr /* Return success, cr0 != LT */ | ||
| 54 | |||
| 55 | _GLOBAL_TOC(sk_load_half) | ||
| 56 | cmpdi r_off, 0 | ||
| 57 | blt bpf_slow_path_half_neg | ||
| 58 | b sk_load_half_positive_offset | ||
| 59 | |||
| 60 | _GLOBAL_TOC(sk_load_half_positive_offset) | ||
| 61 | subi r_tmp, r_hlen, 2 | ||
| 62 | cmpd r_tmp, r_off | ||
| 63 | blt bpf_slow_path_half | ||
| 64 | LHZX_BE r_val, r_data, r_off | ||
| 65 | blr | ||
| 66 | |||
| 67 | _GLOBAL_TOC(sk_load_byte) | ||
| 68 | cmpdi r_off, 0 | ||
| 69 | blt bpf_slow_path_byte_neg | ||
| 70 | b sk_load_byte_positive_offset | ||
| 71 | |||
| 72 | _GLOBAL_TOC(sk_load_byte_positive_offset) | ||
| 73 | cmpd r_hlen, r_off | ||
| 74 | ble bpf_slow_path_byte | ||
| 75 | lbzx r_val, r_data, r_off | ||
| 76 | blr | ||
| 77 | |||
| 78 | /* | ||
| 79 | * Call out to skb_copy_bits: | ||
| 80 | * Allocate a new stack frame here to remain ABI-compliant in | ||
| 81 | * stashing LR. | ||
| 82 | */ | ||
| 83 | #define bpf_slow_path_common(SIZE) \ | ||
| 84 | mflr r0; \ | ||
| 85 | std r0, PPC_LR_STKOFF(r1); \ | ||
| 86 | stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \ | ||
| 87 | mr r3, r_skb; \ | ||
| 88 | /* r4 = r_off as passed */ \ | ||
| 89 | addi r5, r1, STACK_FRAME_MIN_SIZE; \ | ||
| 90 | li r6, SIZE; \ | ||
| 91 | bl skb_copy_bits; \ | ||
| 92 | nop; \ | ||
| 93 | /* save r5 */ \ | ||
| 94 | addi r5, r1, STACK_FRAME_MIN_SIZE; \ | ||
| 95 | /* r3 = 0 on success */ \ | ||
| 96 | addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \ | ||
| 97 | ld r0, PPC_LR_STKOFF(r1); \ | ||
| 98 | mtlr r0; \ | ||
| 99 | cmpdi r3, 0; \ | ||
| 100 | blt bpf_error; /* cr0 = LT */ | ||
| 101 | |||
| 102 | bpf_slow_path_word: | ||
| 103 | bpf_slow_path_common(4) | ||
| 104 | /* Data value is on stack, and cr0 != LT */ | ||
| 105 | LWZX_BE r_val, 0, r5 | ||
| 106 | blr | ||
| 107 | |||
| 108 | bpf_slow_path_half: | ||
| 109 | bpf_slow_path_common(2) | ||
| 110 | LHZX_BE r_val, 0, r5 | ||
| 111 | blr | ||
| 112 | |||
| 113 | bpf_slow_path_byte: | ||
| 114 | bpf_slow_path_common(1) | ||
| 115 | lbzx r_val, 0, r5 | ||
| 116 | blr | ||
| 117 | |||
| 118 | /* | ||
| 119 | * Call out to bpf_internal_load_pointer_neg_helper | ||
| 120 | */ | ||
| 121 | #define sk_negative_common(SIZE) \ | ||
| 122 | mflr r0; \ | ||
| 123 | std r0, PPC_LR_STKOFF(r1); \ | ||
| 124 | stdu r1, -STACK_FRAME_MIN_SIZE(r1); \ | ||
| 125 | mr r3, r_skb; \ | ||
| 126 | /* r4 = r_off, as passed */ \ | ||
| 127 | li r5, SIZE; \ | ||
| 128 | bl bpf_internal_load_pointer_neg_helper; \ | ||
| 129 | nop; \ | ||
| 130 | addi r1, r1, STACK_FRAME_MIN_SIZE; \ | ||
| 131 | ld r0, PPC_LR_STKOFF(r1); \ | ||
| 132 | mtlr r0; \ | ||
| 133 | /* R3 != 0 on success */ \ | ||
| 134 | cmpldi r3, 0; \ | ||
| 135 | beq bpf_error_slow; /* cr0 = EQ */ | ||
| 136 | |||
| 137 | bpf_slow_path_word_neg: | ||
| 138 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
| 139 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
| 140 | blt bpf_error /* cr0 = LT */ | ||
| 141 | b sk_load_word_negative_offset | ||
| 142 | |||
| 143 | _GLOBAL_TOC(sk_load_word_negative_offset) | ||
| 144 | sk_negative_common(4) | ||
| 145 | LWZX_BE r_val, 0, r3 | ||
| 146 | blr | ||
| 147 | |||
| 148 | bpf_slow_path_half_neg: | ||
| 149 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
| 150 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
| 151 | blt bpf_error /* cr0 = LT */ | ||
| 152 | b sk_load_half_negative_offset | ||
| 153 | |||
| 154 | _GLOBAL_TOC(sk_load_half_negative_offset) | ||
| 155 | sk_negative_common(2) | ||
| 156 | LHZX_BE r_val, 0, r3 | ||
| 157 | blr | ||
| 158 | |||
| 159 | bpf_slow_path_byte_neg: | ||
| 160 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
| 161 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
| 162 | blt bpf_error /* cr0 = LT */ | ||
| 163 | b sk_load_byte_negative_offset | ||
| 164 | |||
| 165 | _GLOBAL_TOC(sk_load_byte_negative_offset) | ||
| 166 | sk_negative_common(1) | ||
| 167 | lbzx r_val, 0, r3 | ||
| 168 | blr | ||
| 169 | |||
| 170 | bpf_error_slow: | ||
| 171 | /* fabricate a cr0 = lt */ | ||
| 172 | li r_tmp, -1 | ||
| 173 | cmpdi r_tmp, 0 | ||
| 174 | bpf_error: | ||
| 175 | /* | ||
| 176 | * Entered with cr0 = lt | ||
| 177 | * Generated code will 'blt epilogue', returning 0. | ||
| 178 | */ | ||
| 179 | li r_val, 0 | ||
| 180 | blr | ||
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0ef3d9580e98..1bdb1aff0619 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c | |||
| @@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) | |||
| 59 | * [ prev sp ] <------------- | 59 | * [ prev sp ] <------------- |
| 60 | * [ ... ] | | 60 | * [ ... ] | |
| 61 | * sp (r1) ---> [ stack pointer ] -------------- | 61 | * sp (r1) ---> [ stack pointer ] -------------- |
| 62 | * [ nv gpr save area ] 8*8 | 62 | * [ nv gpr save area ] 6*8 |
| 63 | * [ tail_call_cnt ] 8 | 63 | * [ tail_call_cnt ] 8 |
| 64 | * [ local_tmp_var ] 8 | 64 | * [ local_tmp_var ] 8 |
| 65 | * [ unused red zone ] 208 bytes protected | 65 | * [ unused red zone ] 208 bytes protected |
| @@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) | |||
| 88 | BUG(); | 88 | BUG(); |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) | ||
| 92 | { | ||
| 93 | /* | ||
| 94 | * Load skb->len and skb->data_len | ||
| 95 | * r3 points to skb | ||
| 96 | */ | ||
| 97 | PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); | ||
| 98 | PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); | ||
| 99 | /* header_len = len - data_len */ | ||
| 100 | PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); | ||
| 101 | |||
| 102 | /* skb->data pointer */ | ||
| 103 | PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); | ||
| 104 | } | ||
| 105 | |||
| 106 | static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) | 91 | static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) |
| 107 | { | 92 | { |
| 108 | int i; | 93 | int i; |
| @@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) | |||
| 145 | if (bpf_is_seen_register(ctx, i)) | 130 | if (bpf_is_seen_register(ctx, i)) |
| 146 | PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); | 131 | PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); |
| 147 | 132 | ||
| 148 | /* | ||
| 149 | * Save additional non-volatile regs if we cache skb | ||
| 150 | * Also, setup skb data | ||
| 151 | */ | ||
| 152 | if (ctx->seen & SEEN_SKB) { | ||
| 153 | PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, | ||
| 154 | bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); | ||
| 155 | PPC_BPF_STL(b2p[SKB_DATA_REG], 1, | ||
| 156 | bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); | ||
| 157 | bpf_jit_emit_skb_loads(image, ctx); | ||
| 158 | } | ||
| 159 | |||
| 160 | /* Setup frame pointer to point to the bpf stack area */ | 133 | /* Setup frame pointer to point to the bpf stack area */ |
| 161 | if (bpf_is_seen_register(ctx, BPF_REG_FP)) | 134 | if (bpf_is_seen_register(ctx, BPF_REG_FP)) |
| 162 | PPC_ADDI(b2p[BPF_REG_FP], 1, | 135 | PPC_ADDI(b2p[BPF_REG_FP], 1, |
| @@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx | |||
| 172 | if (bpf_is_seen_register(ctx, i)) | 145 | if (bpf_is_seen_register(ctx, i)) |
| 173 | PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); | 146 | PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); |
| 174 | 147 | ||
| 175 | /* Restore non-volatile registers used for skb cache */ | ||
| 176 | if (ctx->seen & SEEN_SKB) { | ||
| 177 | PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, | ||
| 178 | bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); | ||
| 179 | PPC_BPF_LL(b2p[SKB_DATA_REG], 1, | ||
| 180 | bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); | ||
| 181 | } | ||
| 182 | |||
| 183 | /* Tear down our stack frame */ | 148 | /* Tear down our stack frame */ |
| 184 | if (bpf_has_stack_frame(ctx)) { | 149 | if (bpf_has_stack_frame(ctx)) { |
| 185 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); | 150 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); |
| @@ -753,23 +718,10 @@ emit_clear: | |||
| 753 | ctx->seen |= SEEN_FUNC; | 718 | ctx->seen |= SEEN_FUNC; |
| 754 | func = (u8 *) __bpf_call_base + imm; | 719 | func = (u8 *) __bpf_call_base + imm; |
| 755 | 720 | ||
| 756 | /* Save skb pointer if we need to re-cache skb data */ | ||
| 757 | if ((ctx->seen & SEEN_SKB) && | ||
| 758 | bpf_helper_changes_pkt_data(func)) | ||
| 759 | PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); | ||
| 760 | |||
| 761 | bpf_jit_emit_func_call(image, ctx, (u64)func); | 721 | bpf_jit_emit_func_call(image, ctx, (u64)func); |
| 762 | 722 | ||
| 763 | /* move return value from r3 to BPF_REG_0 */ | 723 | /* move return value from r3 to BPF_REG_0 */ |
| 764 | PPC_MR(b2p[BPF_REG_0], 3); | 724 | PPC_MR(b2p[BPF_REG_0], 3); |
| 765 | |||
| 766 | /* refresh skb cache */ | ||
| 767 | if ((ctx->seen & SEEN_SKB) && | ||
| 768 | bpf_helper_changes_pkt_data(func)) { | ||
| 769 | /* reload skb pointer to r3 */ | ||
| 770 | PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); | ||
| 771 | bpf_jit_emit_skb_loads(image, ctx); | ||
| 772 | } | ||
| 773 | break; | 725 | break; |
| 774 | 726 | ||
| 775 | /* | 727 | /* |
| @@ -887,65 +839,6 @@ cond_branch: | |||
| 887 | break; | 839 | break; |
| 888 | 840 | ||
| 889 | /* | 841 | /* |
| 890 | * Loads from packet header/data | ||
| 891 | * Assume 32-bit input value in imm and X (src_reg) | ||
| 892 | */ | ||
| 893 | |||
| 894 | /* Absolute loads */ | ||
| 895 | case BPF_LD | BPF_W | BPF_ABS: | ||
| 896 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); | ||
| 897 | goto common_load_abs; | ||
| 898 | case BPF_LD | BPF_H | BPF_ABS: | ||
| 899 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); | ||
| 900 | goto common_load_abs; | ||
| 901 | case BPF_LD | BPF_B | BPF_ABS: | ||
| 902 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); | ||
| 903 | common_load_abs: | ||
| 904 | /* | ||
| 905 | * Load from [imm] | ||
| 906 | * Load into r4, which can just be passed onto | ||
| 907 | * skb load helpers as the second parameter | ||
| 908 | */ | ||
| 909 | PPC_LI32(4, imm); | ||
| 910 | goto common_load; | ||
| 911 | |||
| 912 | /* Indirect loads */ | ||
| 913 | case BPF_LD | BPF_W | BPF_IND: | ||
| 914 | func = (u8 *)sk_load_word; | ||
| 915 | goto common_load_ind; | ||
| 916 | case BPF_LD | BPF_H | BPF_IND: | ||
| 917 | func = (u8 *)sk_load_half; | ||
| 918 | goto common_load_ind; | ||
| 919 | case BPF_LD | BPF_B | BPF_IND: | ||
| 920 | func = (u8 *)sk_load_byte; | ||
| 921 | common_load_ind: | ||
| 922 | /* | ||
| 923 | * Load from [src_reg + imm] | ||
| 924 | * Treat src_reg as a 32-bit value | ||
| 925 | */ | ||
| 926 | PPC_EXTSW(4, src_reg); | ||
| 927 | if (imm) { | ||
| 928 | if (imm >= -32768 && imm < 32768) | ||
| 929 | PPC_ADDI(4, 4, IMM_L(imm)); | ||
| 930 | else { | ||
| 931 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
| 932 | PPC_ADD(4, 4, b2p[TMP_REG_1]); | ||
| 933 | } | ||
| 934 | } | ||
| 935 | |||
| 936 | common_load: | ||
| 937 | ctx->seen |= SEEN_SKB; | ||
| 938 | ctx->seen |= SEEN_FUNC; | ||
| 939 | bpf_jit_emit_func_call(image, ctx, (u64)func); | ||
| 940 | |||
| 941 | /* | ||
| 942 | * Helper returns 'lt' condition on error, and an | ||
| 943 | * appropriate return value in BPF_REG_0 | ||
| 944 | */ | ||
| 945 | PPC_BCC(COND_LT, exit_addr); | ||
| 946 | break; | ||
| 947 | |||
| 948 | /* | ||
| 949 | * Tail call | 842 | * Tail call |
| 950 | */ | 843 | */ |
| 951 | case BPF_JMP | BPF_TAIL_CALL: | 844 | case BPF_JMP | BPF_TAIL_CALL: |
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile index e0d5f245e42b..d4663b4bf509 100644 --- a/arch/s390/net/Makefile +++ b/arch/s390/net/Makefile | |||
| @@ -2,4 +2,4 @@ | |||
| 2 | # | 2 | # |
| 3 | # Arch-specific network modules | 3 | # Arch-specific network modules |
| 4 | # | 4 | # |
| 5 | obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o | 5 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o |
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S deleted file mode 100644 index 25bb4643c4f4..000000000000 --- a/arch/s390/net/bpf_jit.S +++ /dev/null | |||
| @@ -1,116 +0,0 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | /* | ||
| 3 | * BPF Jit compiler for s390, help functions. | ||
| 4 | * | ||
| 5 | * Copyright IBM Corp. 2012,2015 | ||
| 6 | * | ||
| 7 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
| 8 | * Michael Holzheu <holzheu@linux.vnet.ibm.com> | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/linkage.h> | ||
| 12 | #include "bpf_jit.h" | ||
| 13 | |||
| 14 | /* | ||
| 15 | * Calling convention: | ||
| 16 | * registers %r7-%r10, %r11,%r13, and %r15 are call saved | ||
| 17 | * | ||
| 18 | * Input (64 bit): | ||
| 19 | * %r3 (%b2) = offset into skb data | ||
| 20 | * %r6 (%b5) = return address | ||
| 21 | * %r7 (%b6) = skb pointer | ||
| 22 | * %r12 = skb data pointer | ||
| 23 | * | ||
| 24 | * Output: | ||
| 25 | * %r14= %b0 = return value (read skb value) | ||
| 26 | * | ||
| 27 | * Work registers: %r2,%r4,%r5,%r14 | ||
| 28 | * | ||
| 29 | * skb_copy_bits takes 4 parameters: | ||
| 30 | * %r2 = skb pointer | ||
| 31 | * %r3 = offset into skb data | ||
| 32 | * %r4 = pointer to temp buffer | ||
| 33 | * %r5 = length to copy | ||
| 34 | * Return value in %r2: 0 = ok | ||
| 35 | * | ||
| 36 | * bpf_internal_load_pointer_neg_helper takes 3 parameters: | ||
| 37 | * %r2 = skb pointer | ||
| 38 | * %r3 = offset into data | ||
| 39 | * %r4 = length to copy | ||
| 40 | * Return value in %r2: Pointer to data | ||
| 41 | */ | ||
| 42 | |||
| 43 | #define SKF_MAX_NEG_OFF -0x200000 /* SKF_LL_OFF from filter.h */ | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Load SIZE bytes from SKB | ||
| 47 | */ | ||
| 48 | #define sk_load_common(NAME, SIZE, LOAD) \ | ||
| 49 | ENTRY(sk_load_##NAME); \ | ||
| 50 | ltgr %r3,%r3; /* Is offset negative? */ \ | ||
| 51 | jl sk_load_##NAME##_slow_neg; \ | ||
| 52 | ENTRY(sk_load_##NAME##_pos); \ | ||
| 53 | aghi %r3,SIZE; /* Offset + SIZE */ \ | ||
| 54 | clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \ | ||
| 55 | jh sk_load_##NAME##_slow; \ | ||
| 56 | LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \ | ||
| 57 | b OFF_OK(%r6); /* Return */ \ | ||
| 58 | \ | ||
| 59 | sk_load_##NAME##_slow:; \ | ||
| 60 | lgr %r2,%r7; /* Arg1 = skb pointer */ \ | ||
| 61 | aghi %r3,-SIZE; /* Arg2 = offset */ \ | ||
| 62 | la %r4,STK_OFF_TMP(%r15); /* Arg3 = temp bufffer */ \ | ||
| 63 | lghi %r5,SIZE; /* Arg4 = size */ \ | ||
| 64 | brasl %r14,skb_copy_bits; /* Get data from skb */ \ | ||
| 65 | LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \ | ||
| 66 | ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \ | ||
| 67 | br %r6; /* Return */ | ||
| 68 | |||
| 69 | sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */ | ||
| 70 | sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */ | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Load 1 byte from SKB (optimized version) | ||
| 74 | */ | ||
| 75 | /* r14 = *(u8 *) (skb->data+offset) */ | ||
| 76 | ENTRY(sk_load_byte) | ||
| 77 | ltgr %r3,%r3 # Is offset negative? | ||
| 78 | jl sk_load_byte_slow_neg | ||
| 79 | ENTRY(sk_load_byte_pos) | ||
| 80 | clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen? | ||
| 81 | jnl sk_load_byte_slow | ||
| 82 | llgc %r14,0(%r3,%r12) # Get byte from skb | ||
| 83 | b OFF_OK(%r6) # Return OK | ||
| 84 | |||
| 85 | sk_load_byte_slow: | ||
| 86 | lgr %r2,%r7 # Arg1 = skb pointer | ||
| 87 | # Arg2 = offset | ||
| 88 | la %r4,STK_OFF_TMP(%r15) # Arg3 = pointer to temp buffer | ||
| 89 | lghi %r5,1 # Arg4 = size (1 byte) | ||
| 90 | brasl %r14,skb_copy_bits # Get data from skb | ||
| 91 | llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer | ||
| 92 | ltgr %r2,%r2 # Set cc to (%r2 != 0) | ||
| 93 | br %r6 # Return cc | ||
| 94 | |||
| 95 | #define sk_negative_common(NAME, SIZE, LOAD) \ | ||
| 96 | sk_load_##NAME##_slow_neg:; \ | ||
| 97 | cgfi %r3,SKF_MAX_NEG_OFF; \ | ||
| 98 | jl bpf_error; \ | ||
| 99 | lgr %r2,%r7; /* Arg1 = skb pointer */ \ | ||
| 100 | /* Arg2 = offset */ \ | ||
| 101 | lghi %r4,SIZE; /* Arg3 = size */ \ | ||
| 102 | brasl %r14,bpf_internal_load_pointer_neg_helper; \ | ||
| 103 | ltgr %r2,%r2; \ | ||
| 104 | jz bpf_error; \ | ||
| 105 | LOAD %r14,0(%r2); /* Get data from pointer */ \ | ||
| 106 | xr %r3,%r3; /* Set cc to zero */ \ | ||
| 107 | br %r6; /* Return cc */ | ||
| 108 | |||
| 109 | sk_negative_common(word, 4, llgf) | ||
| 110 | sk_negative_common(half, 2, llgh) | ||
| 111 | sk_negative_common(byte, 1, llgc) | ||
| 112 | |||
| 113 | bpf_error: | ||
| 114 | # force a return 0 from jit handler | ||
| 115 | ltgr %r15,%r15 # Set condition code | ||
| 116 | br %r6 | ||
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h index 5e1e5133132d..7822ea92e54a 100644 --- a/arch/s390/net/bpf_jit.h +++ b/arch/s390/net/bpf_jit.h | |||
| @@ -16,9 +16,6 @@ | |||
| 16 | #include <linux/filter.h> | 16 | #include <linux/filter.h> |
| 17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
| 18 | 18 | ||
| 19 | extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[]; | ||
| 20 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | ||
| 21 | |||
| 22 | #endif /* __ASSEMBLY__ */ | 19 | #endif /* __ASSEMBLY__ */ |
| 23 | 20 | ||
| 24 | /* | 21 | /* |
| @@ -36,15 +33,6 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | |||
| 36 | * | | | | 33 | * | | | |
| 37 | * | BPF stack | | | 34 | * | BPF stack | | |
| 38 | * | | | | 35 | * | | | |
| 39 | * +---------------+ | | ||
| 40 | * | 8 byte skbp | | | ||
| 41 | * R15+176 -> +---------------+ | | ||
| 42 | * | 8 byte hlen | | | ||
| 43 | * R15+168 -> +---------------+ | | ||
| 44 | * | 4 byte align | | | ||
| 45 | * +---------------+ | | ||
| 46 | * | 4 byte temp | | | ||
| 47 | * | for bpf_jit.S | | | ||
| 48 | * R15+160 -> +---------------+ | | 36 | * R15+160 -> +---------------+ | |
| 49 | * | new backchain | | | 37 | * | new backchain | | |
| 50 | * R15+152 -> +---------------+ | | 38 | * R15+152 -> +---------------+ | |
| @@ -57,17 +45,11 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | |||
| 57 | * The stack size used by the BPF program ("BPF stack" above) is passed | 45 | * The stack size used by the BPF program ("BPF stack" above) is passed |
| 58 | * via "aux->stack_depth". | 46 | * via "aux->stack_depth". |
| 59 | */ | 47 | */ |
| 60 | #define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160) | 48 | #define STK_SPACE_ADD (160) |
| 61 | #define STK_160_UNUSED (160 - 12 * 8) | 49 | #define STK_160_UNUSED (160 - 12 * 8) |
| 62 | #define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) | 50 | #define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) |
| 63 | #define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ | ||
| 64 | #define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ | ||
| 65 | #define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */ | ||
| 66 | 51 | ||
| 67 | #define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ | 52 | #define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ |
| 68 | #define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ | 53 | #define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ |
| 69 | 54 | ||
| 70 | /* Offset to skip condition code check */ | ||
| 71 | #define OFF_OK 4 | ||
| 72 | |||
| 73 | #endif /* __ARCH_S390_NET_BPF_JIT_H */ | 55 | #endif /* __ARCH_S390_NET_BPF_JIT_H */ |
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 78a19c93b380..b020bea040b7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c | |||
| @@ -47,23 +47,21 @@ struct bpf_jit { | |||
| 47 | 47 | ||
| 48 | #define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ | 48 | #define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ |
| 49 | 49 | ||
| 50 | #define SEEN_SKB 1 /* skb access */ | 50 | #define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */ |
| 51 | #define SEEN_MEM 2 /* use mem[] for temporary storage */ | 51 | #define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */ |
| 52 | #define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */ | 52 | #define SEEN_LITERAL (1 << 2) /* code uses literals */ |
| 53 | #define SEEN_LITERAL 8 /* code uses literals */ | 53 | #define SEEN_FUNC (1 << 3) /* calls C functions */ |
| 54 | #define SEEN_FUNC 16 /* calls C functions */ | 54 | #define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */ |
| 55 | #define SEEN_TAIL_CALL 32 /* code uses tail calls */ | 55 | #define SEEN_REG_AX (1 << 5) /* code uses constant blinding */ |
| 56 | #define SEEN_REG_AX 64 /* code uses constant blinding */ | 56 | #define SEEN_STACK (SEEN_FUNC | SEEN_MEM) |
| 57 | #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) | ||
| 58 | 57 | ||
| 59 | /* | 58 | /* |
| 60 | * s390 registers | 59 | * s390 registers |
| 61 | */ | 60 | */ |
| 62 | #define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ | 61 | #define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ |
| 63 | #define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ | 62 | #define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ |
| 64 | #define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */ | 63 | #define REG_L (MAX_BPF_JIT_REG + 2) /* Literal pool register */ |
| 65 | #define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */ | 64 | #define REG_15 (MAX_BPF_JIT_REG + 3) /* Register 15 */ |
| 66 | #define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */ | ||
| 67 | #define REG_0 REG_W0 /* Register 0 */ | 65 | #define REG_0 REG_W0 /* Register 0 */ |
| 68 | #define REG_1 REG_W1 /* Register 1 */ | 66 | #define REG_1 REG_W1 /* Register 1 */ |
| 69 | #define REG_2 BPF_REG_1 /* Register 2 */ | 67 | #define REG_2 BPF_REG_1 /* Register 2 */ |
| @@ -88,10 +86,8 @@ static const int reg2hex[] = { | |||
| 88 | [BPF_REG_9] = 10, | 86 | [BPF_REG_9] = 10, |
| 89 | /* BPF stack pointer */ | 87 | /* BPF stack pointer */ |
| 90 | [BPF_REG_FP] = 13, | 88 | [BPF_REG_FP] = 13, |
| 91 | /* Register for blinding (shared with REG_SKB_DATA) */ | 89 | /* Register for blinding */ |
| 92 | [BPF_REG_AX] = 12, | 90 | [BPF_REG_AX] = 12, |
| 93 | /* SKB data pointer */ | ||
| 94 | [REG_SKB_DATA] = 12, | ||
| 95 | /* Work registers for s390x backend */ | 91 | /* Work registers for s390x backend */ |
| 96 | [REG_W0] = 0, | 92 | [REG_W0] = 0, |
| 97 | [REG_W1] = 1, | 93 | [REG_W1] = 1, |
| @@ -385,27 +381,6 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) | |||
| 385 | } | 381 | } |
| 386 | 382 | ||
| 387 | /* | 383 | /* |
| 388 | * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S" | ||
| 389 | * we store the SKB header length on the stack and the SKB data | ||
| 390 | * pointer in REG_SKB_DATA if BPF_REG_AX is not used. | ||
| 391 | */ | ||
| 392 | static void emit_load_skb_data_hlen(struct bpf_jit *jit) | ||
| 393 | { | ||
| 394 | /* Header length: llgf %w1,<len>(%b1) */ | ||
| 395 | EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1, | ||
| 396 | offsetof(struct sk_buff, len)); | ||
| 397 | /* s %w1,<data_len>(%b1) */ | ||
| 398 | EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1, | ||
| 399 | offsetof(struct sk_buff, data_len)); | ||
| 400 | /* stg %w1,ST_OFF_HLEN(%r0,%r15) */ | ||
| 401 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN); | ||
| 402 | if (!(jit->seen & SEEN_REG_AX)) | ||
| 403 | /* lg %skb_data,data_off(%b1) */ | ||
| 404 | EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, | ||
| 405 | BPF_REG_1, offsetof(struct sk_buff, data)); | ||
| 406 | } | ||
| 407 | |||
| 408 | /* | ||
| 409 | * Emit function prologue | 384 | * Emit function prologue |
| 410 | * | 385 | * |
| 411 | * Save registers and create stack frame if necessary. | 386 | * Save registers and create stack frame if necessary. |
| @@ -445,12 +420,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) | |||
| 445 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, | 420 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, |
| 446 | REG_15, 152); | 421 | REG_15, 152); |
| 447 | } | 422 | } |
| 448 | if (jit->seen & SEEN_SKB) { | ||
| 449 | emit_load_skb_data_hlen(jit); | ||
| 450 | /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ | ||
| 451 | EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, | ||
| 452 | STK_OFF_SKBP); | ||
| 453 | } | ||
| 454 | } | 423 | } |
| 455 | 424 | ||
| 456 | /* | 425 | /* |
| @@ -483,12 +452,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i | |||
| 483 | { | 452 | { |
| 484 | struct bpf_insn *insn = &fp->insnsi[i]; | 453 | struct bpf_insn *insn = &fp->insnsi[i]; |
| 485 | int jmp_off, last, insn_count = 1; | 454 | int jmp_off, last, insn_count = 1; |
| 486 | unsigned int func_addr, mask; | ||
| 487 | u32 dst_reg = insn->dst_reg; | 455 | u32 dst_reg = insn->dst_reg; |
| 488 | u32 src_reg = insn->src_reg; | 456 | u32 src_reg = insn->src_reg; |
| 489 | u32 *addrs = jit->addrs; | 457 | u32 *addrs = jit->addrs; |
| 490 | s32 imm = insn->imm; | 458 | s32 imm = insn->imm; |
| 491 | s16 off = insn->off; | 459 | s16 off = insn->off; |
| 460 | unsigned int mask; | ||
| 492 | 461 | ||
| 493 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) | 462 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) |
| 494 | jit->seen |= SEEN_REG_AX; | 463 | jit->seen |= SEEN_REG_AX; |
| @@ -970,13 +939,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i | |||
| 970 | EMIT2(0x0d00, REG_14, REG_W1); | 939 | EMIT2(0x0d00, REG_14, REG_W1); |
| 971 | /* lgr %b0,%r2: load return value into %b0 */ | 940 | /* lgr %b0,%r2: load return value into %b0 */ |
| 972 | EMIT4(0xb9040000, BPF_REG_0, REG_2); | 941 | EMIT4(0xb9040000, BPF_REG_0, REG_2); |
| 973 | if ((jit->seen & SEEN_SKB) && | ||
| 974 | bpf_helper_changes_pkt_data((void *)func)) { | ||
| 975 | /* lg %b1,ST_OFF_SKBP(%r15) */ | ||
| 976 | EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, | ||
| 977 | REG_15, STK_OFF_SKBP); | ||
| 978 | emit_load_skb_data_hlen(jit); | ||
| 979 | } | ||
| 980 | break; | 942 | break; |
| 981 | } | 943 | } |
| 982 | case BPF_JMP | BPF_TAIL_CALL: | 944 | case BPF_JMP | BPF_TAIL_CALL: |
| @@ -1176,73 +1138,6 @@ branch_oc: | |||
| 1176 | jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); | 1138 | jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); |
| 1177 | EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); | 1139 | EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); |
| 1178 | break; | 1140 | break; |
| 1179 | /* | ||
| 1180 | * BPF_LD | ||
| 1181 | */ | ||
| 1182 | case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */ | ||
| 1183 | case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */ | ||
| 1184 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
| 1185 | func_addr = __pa(sk_load_byte_pos); | ||
| 1186 | else | ||
| 1187 | func_addr = __pa(sk_load_byte); | ||
| 1188 | goto call_fn; | ||
| 1189 | case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */ | ||
| 1190 | case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */ | ||
| 1191 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
| 1192 | func_addr = __pa(sk_load_half_pos); | ||
| 1193 | else | ||
| 1194 | func_addr = __pa(sk_load_half); | ||
| 1195 | goto call_fn; | ||
| 1196 | case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */ | ||
| 1197 | case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */ | ||
| 1198 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
| 1199 | func_addr = __pa(sk_load_word_pos); | ||
| 1200 | else | ||
| 1201 | func_addr = __pa(sk_load_word); | ||
| 1202 | goto call_fn; | ||
| 1203 | call_fn: | ||
| 1204 | jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC; | ||
| 1205 | REG_SET_SEEN(REG_14); /* Return address of possible func call */ | ||
| 1206 | |||
| 1207 | /* | ||
| 1208 | * Implicit input: | ||
| 1209 | * BPF_REG_6 (R7) : skb pointer | ||
| 1210 | * REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX) | ||
| 1211 | * | ||
| 1212 | * Calculated input: | ||
| 1213 | * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb | ||
| 1214 | * BPF_REG_5 (R6) : return address | ||
| 1215 | * | ||
| 1216 | * Output: | ||
| 1217 | * BPF_REG_0 (R14): data read from skb | ||
| 1218 | * | ||
| 1219 | * Scratch registers (BPF_REG_1-5) | ||
| 1220 | */ | ||
| 1221 | |||
| 1222 | /* Call function: llilf %w1,func_addr */ | ||
| 1223 | EMIT6_IMM(0xc00f0000, REG_W1, func_addr); | ||
| 1224 | |||
| 1225 | /* Offset: lgfi %b2,imm */ | ||
| 1226 | EMIT6_IMM(0xc0010000, BPF_REG_2, imm); | ||
| 1227 | if (BPF_MODE(insn->code) == BPF_IND) | ||
| 1228 | /* agfr %b2,%src (%src is s32 here) */ | ||
| 1229 | EMIT4(0xb9180000, BPF_REG_2, src_reg); | ||
| 1230 | |||
| 1231 | /* Reload REG_SKB_DATA if BPF_REG_AX is used */ | ||
| 1232 | if (jit->seen & SEEN_REG_AX) | ||
| 1233 | /* lg %skb_data,data_off(%b6) */ | ||
| 1234 | EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, | ||
| 1235 | BPF_REG_6, offsetof(struct sk_buff, data)); | ||
| 1236 | /* basr %b5,%w1 (%b5 is call saved) */ | ||
| 1237 | EMIT2(0x0d00, BPF_REG_5, REG_W1); | ||
| 1238 | |||
| 1239 | /* | ||
| 1240 | * Note: For fast access we jump directly after the | ||
| 1241 | * jnz instruction from bpf_jit.S | ||
| 1242 | */ | ||
| 1243 | /* jnz <ret0> */ | ||
| 1244 | EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg); | ||
| 1245 | break; | ||
| 1246 | default: /* too complex, give up */ | 1141 | default: /* too complex, give up */ |
| 1247 | pr_err("Unknown opcode %02x\n", insn->code); | 1142 | pr_err("Unknown opcode %02x\n", insn->code); |
| 1248 | return -1; | 1143 | return -1; |
diff --git a/arch/sparc/net/Makefile b/arch/sparc/net/Makefile index 76fa8e95b721..d32aac3a25b8 100644 --- a/arch/sparc/net/Makefile +++ b/arch/sparc/net/Makefile | |||
| @@ -1,4 +1,7 @@ | |||
| 1 | # | 1 | # |
| 2 | # Arch-specific network modules | 2 | # Arch-specific network modules |
| 3 | # | 3 | # |
| 4 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o | 4 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o |
| 5 | ifeq ($(BITS),32) | ||
| 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o | ||
| 7 | endif | ||
diff --git a/arch/sparc/net/bpf_jit_64.h b/arch/sparc/net/bpf_jit_64.h index 428f7fd19175..fbc836f1c51c 100644 --- a/arch/sparc/net/bpf_jit_64.h +++ b/arch/sparc/net/bpf_jit_64.h | |||
| @@ -33,35 +33,6 @@ | |||
| 33 | #define I5 0x1d | 33 | #define I5 0x1d |
| 34 | #define FP 0x1e | 34 | #define FP 0x1e |
| 35 | #define I7 0x1f | 35 | #define I7 0x1f |
| 36 | |||
| 37 | #define r_SKB L0 | ||
| 38 | #define r_HEADLEN L4 | ||
| 39 | #define r_SKB_DATA L5 | ||
| 40 | #define r_TMP G1 | ||
| 41 | #define r_TMP2 G3 | ||
| 42 | |||
| 43 | /* assembly code in arch/sparc/net/bpf_jit_asm_64.S */ | ||
| 44 | extern u32 bpf_jit_load_word[]; | ||
| 45 | extern u32 bpf_jit_load_half[]; | ||
| 46 | extern u32 bpf_jit_load_byte[]; | ||
| 47 | extern u32 bpf_jit_load_byte_msh[]; | ||
| 48 | extern u32 bpf_jit_load_word_positive_offset[]; | ||
| 49 | extern u32 bpf_jit_load_half_positive_offset[]; | ||
| 50 | extern u32 bpf_jit_load_byte_positive_offset[]; | ||
| 51 | extern u32 bpf_jit_load_byte_msh_positive_offset[]; | ||
| 52 | extern u32 bpf_jit_load_word_negative_offset[]; | ||
| 53 | extern u32 bpf_jit_load_half_negative_offset[]; | ||
| 54 | extern u32 bpf_jit_load_byte_negative_offset[]; | ||
| 55 | extern u32 bpf_jit_load_byte_msh_negative_offset[]; | ||
| 56 | |||
| 57 | #else | ||
| 58 | #define r_RESULT %o0 | ||
| 59 | #define r_SKB %o0 | ||
| 60 | #define r_OFF %o1 | ||
| 61 | #define r_HEADLEN %l4 | ||
| 62 | #define r_SKB_DATA %l5 | ||
| 63 | #define r_TMP %g1 | ||
| 64 | #define r_TMP2 %g3 | ||
| 65 | #endif | 36 | #endif |
| 66 | 37 | ||
| 67 | #endif /* _BPF_JIT_H */ | 38 | #endif /* _BPF_JIT_H */ |
diff --git a/arch/sparc/net/bpf_jit_asm_64.S b/arch/sparc/net/bpf_jit_asm_64.S deleted file mode 100644 index 7177867052a1..000000000000 --- a/arch/sparc/net/bpf_jit_asm_64.S +++ /dev/null | |||
| @@ -1,162 +0,0 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #include <asm/ptrace.h> | ||
| 3 | |||
| 4 | #include "bpf_jit_64.h" | ||
| 5 | |||
| 6 | #define SAVE_SZ 176 | ||
| 7 | #define SCRATCH_OFF STACK_BIAS + 128 | ||
| 8 | #define BE_PTR(label) be,pn %xcc, label | ||
| 9 | #define SIGN_EXTEND(reg) sra reg, 0, reg | ||
| 10 | |||
| 11 | #define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */ | ||
| 12 | |||
| 13 | .text | ||
| 14 | .globl bpf_jit_load_word | ||
| 15 | bpf_jit_load_word: | ||
| 16 | cmp r_OFF, 0 | ||
| 17 | bl bpf_slow_path_word_neg | ||
| 18 | nop | ||
| 19 | .globl bpf_jit_load_word_positive_offset | ||
| 20 | bpf_jit_load_word_positive_offset: | ||
| 21 | sub r_HEADLEN, r_OFF, r_TMP | ||
| 22 | cmp r_TMP, 3 | ||
| 23 | ble bpf_slow_path_word | ||
| 24 | add r_SKB_DATA, r_OFF, r_TMP | ||
| 25 | andcc r_TMP, 3, %g0 | ||
| 26 | bne load_word_unaligned | ||
| 27 | nop | ||
| 28 | retl | ||
| 29 | ld [r_TMP], r_RESULT | ||
| 30 | load_word_unaligned: | ||
| 31 | ldub [r_TMP + 0x0], r_OFF | ||
| 32 | ldub [r_TMP + 0x1], r_TMP2 | ||
| 33 | sll r_OFF, 8, r_OFF | ||
| 34 | or r_OFF, r_TMP2, r_OFF | ||
| 35 | ldub [r_TMP + 0x2], r_TMP2 | ||
| 36 | sll r_OFF, 8, r_OFF | ||
| 37 | or r_OFF, r_TMP2, r_OFF | ||
| 38 | ldub [r_TMP + 0x3], r_TMP2 | ||
| 39 | sll r_OFF, 8, r_OFF | ||
| 40 | retl | ||
| 41 | or r_OFF, r_TMP2, r_RESULT | ||
| 42 | |||
| 43 | .globl bpf_jit_load_half | ||
| 44 | bpf_jit_load_half: | ||
| 45 | cmp r_OFF, 0 | ||
| 46 | bl bpf_slow_path_half_neg | ||
| 47 | nop | ||
| 48 | .globl bpf_jit_load_half_positive_offset | ||
| 49 | bpf_jit_load_half_positive_offset: | ||
| 50 | sub r_HEADLEN, r_OFF, r_TMP | ||
| 51 | cmp r_TMP, 1 | ||
| 52 | ble bpf_slow_path_half | ||
| 53 | add r_SKB_DATA, r_OFF, r_TMP | ||
| 54 | andcc r_TMP, 1, %g0 | ||
| 55 | bne load_half_unaligned | ||
| 56 | nop | ||
| 57 | retl | ||
| 58 | lduh [r_TMP], r_RESULT | ||
| 59 | load_half_unaligned: | ||
| 60 | ldub [r_TMP + 0x0], r_OFF | ||
| 61 | ldub [r_TMP + 0x1], r_TMP2 | ||
| 62 | sll r_OFF, 8, r_OFF | ||
| 63 | retl | ||
| 64 | or r_OFF, r_TMP2, r_RESULT | ||
| 65 | |||
| 66 | .globl bpf_jit_load_byte | ||
| 67 | bpf_jit_load_byte: | ||
| 68 | cmp r_OFF, 0 | ||
| 69 | bl bpf_slow_path_byte_neg | ||
| 70 | nop | ||
| 71 | .globl bpf_jit_load_byte_positive_offset | ||
| 72 | bpf_jit_load_byte_positive_offset: | ||
| 73 | cmp r_OFF, r_HEADLEN | ||
| 74 | bge bpf_slow_path_byte | ||
| 75 | nop | ||
| 76 | retl | ||
| 77 | ldub [r_SKB_DATA + r_OFF], r_RESULT | ||
| 78 | |||
| 79 | #define bpf_slow_path_common(LEN) \ | ||
| 80 | save %sp, -SAVE_SZ, %sp; \ | ||
| 81 | mov %i0, %o0; \ | ||
| 82 | mov %i1, %o1; \ | ||
| 83 | add %fp, SCRATCH_OFF, %o2; \ | ||
| 84 | call skb_copy_bits; \ | ||
| 85 | mov (LEN), %o3; \ | ||
| 86 | cmp %o0, 0; \ | ||
| 87 | restore; | ||
| 88 | |||
| 89 | bpf_slow_path_word: | ||
| 90 | bpf_slow_path_common(4) | ||
| 91 | bl bpf_error | ||
| 92 | ld [%sp + SCRATCH_OFF], r_RESULT | ||
| 93 | retl | ||
| 94 | nop | ||
| 95 | bpf_slow_path_half: | ||
| 96 | bpf_slow_path_common(2) | ||
| 97 | bl bpf_error | ||
| 98 | lduh [%sp + SCRATCH_OFF], r_RESULT | ||
| 99 | retl | ||
| 100 | nop | ||
| 101 | bpf_slow_path_byte: | ||
| 102 | bpf_slow_path_common(1) | ||
| 103 | bl bpf_error | ||
| 104 | ldub [%sp + SCRATCH_OFF], r_RESULT | ||
| 105 | retl | ||
| 106 | nop | ||
| 107 | |||
| 108 | #define bpf_negative_common(LEN) \ | ||
| 109 | save %sp, -SAVE_SZ, %sp; \ | ||
| 110 | mov %i0, %o0; \ | ||
| 111 | mov %i1, %o1; \ | ||
| 112 | SIGN_EXTEND(%o1); \ | ||
| 113 | call bpf_internal_load_pointer_neg_helper; \ | ||
| 114 | mov (LEN), %o2; \ | ||
| 115 | mov %o0, r_TMP; \ | ||
| 116 | cmp %o0, 0; \ | ||
| 117 | BE_PTR(bpf_error); \ | ||
| 118 | restore; | ||
| 119 | |||
| 120 | bpf_slow_path_word_neg: | ||
| 121 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
| 122 | cmp r_OFF, r_TMP | ||
| 123 | bl bpf_error | ||
| 124 | nop | ||
| 125 | .globl bpf_jit_load_word_negative_offset | ||
| 126 | bpf_jit_load_word_negative_offset: | ||
| 127 | bpf_negative_common(4) | ||
| 128 | andcc r_TMP, 3, %g0 | ||
| 129 | bne load_word_unaligned | ||
| 130 | nop | ||
| 131 | retl | ||
| 132 | ld [r_TMP], r_RESULT | ||
| 133 | |||
| 134 | bpf_slow_path_half_neg: | ||
| 135 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
| 136 | cmp r_OFF, r_TMP | ||
| 137 | bl bpf_error | ||
| 138 | nop | ||
| 139 | .globl bpf_jit_load_half_negative_offset | ||
| 140 | bpf_jit_load_half_negative_offset: | ||
| 141 | bpf_negative_common(2) | ||
| 142 | andcc r_TMP, 1, %g0 | ||
| 143 | bne load_half_unaligned | ||
| 144 | nop | ||
| 145 | retl | ||
| 146 | lduh [r_TMP], r_RESULT | ||
| 147 | |||
| 148 | bpf_slow_path_byte_neg: | ||
| 149 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
| 150 | cmp r_OFF, r_TMP | ||
| 151 | bl bpf_error | ||
| 152 | nop | ||
| 153 | .globl bpf_jit_load_byte_negative_offset | ||
| 154 | bpf_jit_load_byte_negative_offset: | ||
| 155 | bpf_negative_common(1) | ||
| 156 | retl | ||
| 157 | ldub [r_TMP], r_RESULT | ||
| 158 | |||
| 159 | bpf_error: | ||
| 160 | /* Make the JIT program itself return zero. */ | ||
| 161 | ret | ||
| 162 | restore %g0, %g0, %o0 | ||
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 48a25869349b..9f5918e0693a 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c | |||
| @@ -48,10 +48,6 @@ static void bpf_flush_icache(void *start_, void *end_) | |||
| 48 | } | 48 | } |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | #define SEEN_DATAREF 1 /* might call external helpers */ | ||
| 52 | #define SEEN_XREG 2 /* ebx is used */ | ||
| 53 | #define SEEN_MEM 4 /* use mem[] for temporary storage */ | ||
| 54 | |||
| 55 | #define S13(X) ((X) & 0x1fff) | 51 | #define S13(X) ((X) & 0x1fff) |
| 56 | #define S5(X) ((X) & 0x1f) | 52 | #define S5(X) ((X) & 0x1f) |
| 57 | #define IMMED 0x00002000 | 53 | #define IMMED 0x00002000 |
| @@ -198,7 +194,6 @@ struct jit_ctx { | |||
| 198 | bool tmp_1_used; | 194 | bool tmp_1_used; |
| 199 | bool tmp_2_used; | 195 | bool tmp_2_used; |
| 200 | bool tmp_3_used; | 196 | bool tmp_3_used; |
| 201 | bool saw_ld_abs_ind; | ||
| 202 | bool saw_frame_pointer; | 197 | bool saw_frame_pointer; |
| 203 | bool saw_call; | 198 | bool saw_call; |
| 204 | bool saw_tail_call; | 199 | bool saw_tail_call; |
| @@ -207,9 +202,7 @@ struct jit_ctx { | |||
| 207 | 202 | ||
| 208 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) | 203 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) |
| 209 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) | 204 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) |
| 210 | #define SKB_HLEN_REG (MAX_BPF_JIT_REG + 2) | 205 | #define TMP_REG_3 (MAX_BPF_JIT_REG + 2) |
| 211 | #define SKB_DATA_REG (MAX_BPF_JIT_REG + 3) | ||
| 212 | #define TMP_REG_3 (MAX_BPF_JIT_REG + 4) | ||
| 213 | 206 | ||
| 214 | /* Map BPF registers to SPARC registers */ | 207 | /* Map BPF registers to SPARC registers */ |
| 215 | static const int bpf2sparc[] = { | 208 | static const int bpf2sparc[] = { |
| @@ -238,9 +231,6 @@ static const int bpf2sparc[] = { | |||
| 238 | [TMP_REG_1] = G1, | 231 | [TMP_REG_1] = G1, |
| 239 | [TMP_REG_2] = G2, | 232 | [TMP_REG_2] = G2, |
| 240 | [TMP_REG_3] = G3, | 233 | [TMP_REG_3] = G3, |
| 241 | |||
| 242 | [SKB_HLEN_REG] = L4, | ||
| 243 | [SKB_DATA_REG] = L5, | ||
| 244 | }; | 234 | }; |
| 245 | 235 | ||
| 246 | static void emit(const u32 insn, struct jit_ctx *ctx) | 236 | static void emit(const u32 insn, struct jit_ctx *ctx) |
| @@ -800,25 +790,6 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src, | |||
| 800 | return 0; | 790 | return 0; |
| 801 | } | 791 | } |
| 802 | 792 | ||
| 803 | static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb) | ||
| 804 | { | ||
| 805 | const u8 r_headlen = bpf2sparc[SKB_HLEN_REG]; | ||
| 806 | const u8 r_data = bpf2sparc[SKB_DATA_REG]; | ||
| 807 | const u8 r_tmp = bpf2sparc[TMP_REG_1]; | ||
| 808 | unsigned int off; | ||
| 809 | |||
| 810 | off = offsetof(struct sk_buff, len); | ||
| 811 | emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx); | ||
| 812 | |||
| 813 | off = offsetof(struct sk_buff, data_len); | ||
| 814 | emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx); | ||
| 815 | |||
| 816 | emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx); | ||
| 817 | |||
| 818 | off = offsetof(struct sk_buff, data); | ||
| 819 | emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx); | ||
| 820 | } | ||
| 821 | |||
| 822 | /* Just skip the save instruction and the ctx register move. */ | 793 | /* Just skip the save instruction and the ctx register move. */ |
| 823 | #define BPF_TAILCALL_PROLOGUE_SKIP 16 | 794 | #define BPF_TAILCALL_PROLOGUE_SKIP 16 |
| 824 | #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) | 795 | #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) |
| @@ -857,9 +828,6 @@ static void build_prologue(struct jit_ctx *ctx) | |||
| 857 | 828 | ||
| 858 | emit_reg_move(I0, O0, ctx); | 829 | emit_reg_move(I0, O0, ctx); |
| 859 | /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ | 830 | /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ |
| 860 | |||
| 861 | if (ctx->saw_ld_abs_ind) | ||
| 862 | load_skb_regs(ctx, bpf2sparc[BPF_REG_1]); | ||
| 863 | } | 831 | } |
| 864 | 832 | ||
| 865 | static void build_epilogue(struct jit_ctx *ctx) | 833 | static void build_epilogue(struct jit_ctx *ctx) |
| @@ -1225,16 +1193,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |||
| 1225 | u8 *func = ((u8 *)__bpf_call_base) + imm; | 1193 | u8 *func = ((u8 *)__bpf_call_base) + imm; |
| 1226 | 1194 | ||
| 1227 | ctx->saw_call = true; | 1195 | ctx->saw_call = true; |
| 1228 | if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) | ||
| 1229 | emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); | ||
| 1230 | 1196 | ||
| 1231 | emit_call((u32 *)func, ctx); | 1197 | emit_call((u32 *)func, ctx); |
| 1232 | emit_nop(ctx); | 1198 | emit_nop(ctx); |
| 1233 | 1199 | ||
| 1234 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); | 1200 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); |
| 1235 | |||
| 1236 | if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) | ||
| 1237 | load_skb_regs(ctx, L7); | ||
| 1238 | break; | 1201 | break; |
| 1239 | } | 1202 | } |
| 1240 | 1203 | ||
| @@ -1412,43 +1375,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |||
| 1412 | emit_nop(ctx); | 1375 | emit_nop(ctx); |
| 1413 | break; | 1376 | break; |
| 1414 | } | 1377 | } |
| 1415 | #define CHOOSE_LOAD_FUNC(K, func) \ | ||
| 1416 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | ||
| 1417 | |||
| 1418 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
| 1419 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 1420 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word); | ||
| 1421 | goto common_load; | ||
| 1422 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 1423 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half); | ||
| 1424 | goto common_load; | ||
| 1425 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 1426 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte); | ||
| 1427 | goto common_load; | ||
| 1428 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
| 1429 | case BPF_LD | BPF_IND | BPF_W: | ||
| 1430 | func = bpf_jit_load_word; | ||
| 1431 | goto common_load; | ||
| 1432 | case BPF_LD | BPF_IND | BPF_H: | ||
| 1433 | func = bpf_jit_load_half; | ||
| 1434 | goto common_load; | ||
| 1435 | |||
| 1436 | case BPF_LD | BPF_IND | BPF_B: | ||
| 1437 | func = bpf_jit_load_byte; | ||
| 1438 | common_load: | ||
| 1439 | ctx->saw_ld_abs_ind = true; | ||
| 1440 | |||
| 1441 | emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx); | ||
| 1442 | emit_loadimm(imm, O1, ctx); | ||
| 1443 | |||
| 1444 | if (BPF_MODE(code) == BPF_IND) | ||
| 1445 | emit_alu(ADD, src, O1, ctx); | ||
| 1446 | |||
| 1447 | emit_call(func, ctx); | ||
| 1448 | emit_alu_K(SRA, O1, 0, ctx); | ||
| 1449 | |||
| 1450 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); | ||
| 1451 | break; | ||
| 1452 | 1378 | ||
| 1453 | default: | 1379 | default: |
| 1454 | pr_err_once("unknown opcode %02x\n", code); | 1380 | pr_err_once("unknown opcode %02x\n", code); |
| @@ -1583,12 +1509,11 @@ skip_init_ctx: | |||
| 1583 | build_epilogue(&ctx); | 1509 | build_epilogue(&ctx); |
| 1584 | 1510 | ||
| 1585 | if (bpf_jit_enable > 1) | 1511 | if (bpf_jit_enable > 1) |
| 1586 | pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass, | 1512 | pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass, |
| 1587 | image_size - (ctx.idx * 4), | 1513 | image_size - (ctx.idx * 4), |
| 1588 | ctx.tmp_1_used ? '1' : ' ', | 1514 | ctx.tmp_1_used ? '1' : ' ', |
| 1589 | ctx.tmp_2_used ? '2' : ' ', | 1515 | ctx.tmp_2_used ? '2' : ' ', |
| 1590 | ctx.tmp_3_used ? '3' : ' ', | 1516 | ctx.tmp_3_used ? '3' : ' ', |
| 1591 | ctx.saw_ld_abs_ind ? 'L' : ' ', | ||
| 1592 | ctx.saw_frame_pointer ? 'F' : ' ', | 1517 | ctx.saw_frame_pointer ? 'F' : ' ', |
| 1593 | ctx.saw_call ? 'C' : ' ', | 1518 | ctx.saw_call ? 'C' : ' ', |
| 1594 | ctx.saw_tail_call ? 'T' : ' '); | 1519 | ctx.saw_tail_call ? 'T' : ' '); |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..d51a71dcbac2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -138,7 +138,7 @@ config X86 | |||
| 138 | select HAVE_DMA_CONTIGUOUS | 138 | select HAVE_DMA_CONTIGUOUS |
| 139 | select HAVE_DYNAMIC_FTRACE | 139 | select HAVE_DYNAMIC_FTRACE |
| 140 | select HAVE_DYNAMIC_FTRACE_WITH_REGS | 140 | select HAVE_DYNAMIC_FTRACE_WITH_REGS |
| 141 | select HAVE_EBPF_JIT if X86_64 | 141 | select HAVE_EBPF_JIT |
| 142 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | 142 | select HAVE_EFFICIENT_UNALIGNED_ACCESS |
| 143 | select HAVE_EXIT_THREAD | 143 | select HAVE_EXIT_THREAD |
| 144 | select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE | 144 | select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE |
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..2cd344d1a6e5 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h | |||
| @@ -291,16 +291,20 @@ do { \ | |||
| 291 | * lfence | 291 | * lfence |
| 292 | * jmp spec_trap | 292 | * jmp spec_trap |
| 293 | * do_rop: | 293 | * do_rop: |
| 294 | * mov %rax,(%rsp) | 294 | * mov %rax,(%rsp) for x86_64 |
| 295 | * mov %edx,(%esp) for x86_32 | ||
| 295 | * retq | 296 | * retq |
| 296 | * | 297 | * |
| 297 | * Without retpolines configured: | 298 | * Without retpolines configured: |
| 298 | * | 299 | * |
| 299 | * jmp *%rax | 300 | * jmp *%rax for x86_64 |
| 301 | * jmp *%edx for x86_32 | ||
| 300 | */ | 302 | */ |
| 301 | #ifdef CONFIG_RETPOLINE | 303 | #ifdef CONFIG_RETPOLINE |
| 304 | #ifdef CONFIG_X86_64 | ||
| 302 | # define RETPOLINE_RAX_BPF_JIT_SIZE 17 | 305 | # define RETPOLINE_RAX_BPF_JIT_SIZE 17 |
| 303 | # define RETPOLINE_RAX_BPF_JIT() \ | 306 | # define RETPOLINE_RAX_BPF_JIT() \ |
| 307 | do { \ | ||
| 304 | EMIT1_off32(0xE8, 7); /* callq do_rop */ \ | 308 | EMIT1_off32(0xE8, 7); /* callq do_rop */ \ |
| 305 | /* spec_trap: */ \ | 309 | /* spec_trap: */ \ |
| 306 | EMIT2(0xF3, 0x90); /* pause */ \ | 310 | EMIT2(0xF3, 0x90); /* pause */ \ |
| @@ -308,11 +312,31 @@ do { \ | |||
| 308 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ | 312 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ |
| 309 | /* do_rop: */ \ | 313 | /* do_rop: */ \ |
| 310 | EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ | 314 | EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ |
| 311 | EMIT1(0xC3); /* retq */ | 315 | EMIT1(0xC3); /* retq */ \ |
| 316 | } while (0) | ||
| 312 | #else | 317 | #else |
| 318 | # define RETPOLINE_EDX_BPF_JIT() \ | ||
| 319 | do { \ | ||
| 320 | EMIT1_off32(0xE8, 7); /* call do_rop */ \ | ||
| 321 | /* spec_trap: */ \ | ||
| 322 | EMIT2(0xF3, 0x90); /* pause */ \ | ||
| 323 | EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ | ||
| 324 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ | ||
| 325 | /* do_rop: */ \ | ||
| 326 | EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ | ||
| 327 | EMIT1(0xC3); /* ret */ \ | ||
| 328 | } while (0) | ||
| 329 | #endif | ||
| 330 | #else /* !CONFIG_RETPOLINE */ | ||
| 331 | |||
| 332 | #ifdef CONFIG_X86_64 | ||
| 313 | # define RETPOLINE_RAX_BPF_JIT_SIZE 2 | 333 | # define RETPOLINE_RAX_BPF_JIT_SIZE 2 |
| 314 | # define RETPOLINE_RAX_BPF_JIT() \ | 334 | # define RETPOLINE_RAX_BPF_JIT() \ |
| 315 | EMIT2(0xFF, 0xE0); /* jmp *%rax */ | 335 | EMIT2(0xFF, 0xE0); /* jmp *%rax */ |
| 336 | #else | ||
| 337 | # define RETPOLINE_EDX_BPF_JIT() \ | ||
| 338 | EMIT2(0xFF, 0xE2) /* jmp *%edx */ | ||
| 339 | #endif | ||
| 316 | #endif | 340 | #endif |
| 317 | 341 | ||
| 318 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ | 342 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile index fefb4b619598..59e123da580c 100644 --- a/arch/x86/net/Makefile +++ b/arch/x86/net/Makefile | |||
| @@ -1,6 +1,9 @@ | |||
| 1 | # | 1 | # |
| 2 | # Arch-specific network modules | 2 | # Arch-specific network modules |
| 3 | # | 3 | # |
| 4 | OBJECT_FILES_NON_STANDARD_bpf_jit.o += y | ||
| 5 | 4 | ||
| 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o | 5 | ifeq ($(CONFIG_X86_32),y) |
| 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o | ||
| 7 | else | ||
| 8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o | ||
| 9 | endif | ||
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S deleted file mode 100644 index b33093f84528..000000000000 --- a/arch/x86/net/bpf_jit.S +++ /dev/null | |||
| @@ -1,154 +0,0 @@ | |||
| 1 | /* bpf_jit.S : BPF JIT helper functions | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License | ||
| 7 | * as published by the Free Software Foundation; version 2 | ||
| 8 | * of the License. | ||
| 9 | */ | ||
| 10 | #include <linux/linkage.h> | ||
| 11 | #include <asm/frame.h> | ||
| 12 | |||
| 13 | /* | ||
| 14 | * Calling convention : | ||
| 15 | * rbx : skb pointer (callee saved) | ||
| 16 | * esi : offset of byte(s) to fetch in skb (can be scratched) | ||
| 17 | * r10 : copy of skb->data | ||
| 18 | * r9d : hlen = skb->len - skb->data_len | ||
| 19 | */ | ||
| 20 | #define SKBDATA %r10 | ||
| 21 | #define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ | ||
| 22 | |||
| 23 | #define FUNC(name) \ | ||
| 24 | .globl name; \ | ||
| 25 | .type name, @function; \ | ||
| 26 | name: | ||
| 27 | |||
| 28 | FUNC(sk_load_word) | ||
| 29 | test %esi,%esi | ||
| 30 | js bpf_slow_path_word_neg | ||
| 31 | |||
| 32 | FUNC(sk_load_word_positive_offset) | ||
| 33 | mov %r9d,%eax # hlen | ||
| 34 | sub %esi,%eax # hlen - offset | ||
| 35 | cmp $3,%eax | ||
| 36 | jle bpf_slow_path_word | ||
| 37 | mov (SKBDATA,%rsi),%eax | ||
| 38 | bswap %eax /* ntohl() */ | ||
| 39 | ret | ||
| 40 | |||
| 41 | FUNC(sk_load_half) | ||
| 42 | test %esi,%esi | ||
| 43 | js bpf_slow_path_half_neg | ||
| 44 | |||
| 45 | FUNC(sk_load_half_positive_offset) | ||
| 46 | mov %r9d,%eax | ||
| 47 | sub %esi,%eax # hlen - offset | ||
| 48 | cmp $1,%eax | ||
| 49 | jle bpf_slow_path_half | ||
| 50 | movzwl (SKBDATA,%rsi),%eax | ||
| 51 | rol $8,%ax # ntohs() | ||
| 52 | ret | ||
| 53 | |||
| 54 | FUNC(sk_load_byte) | ||
| 55 | test %esi,%esi | ||
| 56 | js bpf_slow_path_byte_neg | ||
| 57 | |||
| 58 | FUNC(sk_load_byte_positive_offset) | ||
| 59 | cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ | ||
| 60 | jle bpf_slow_path_byte | ||
| 61 | movzbl (SKBDATA,%rsi),%eax | ||
| 62 | ret | ||
| 63 | |||
| 64 | /* rsi contains offset and can be scratched */ | ||
| 65 | #define bpf_slow_path_common(LEN) \ | ||
| 66 | lea 32(%rbp), %rdx;\ | ||
| 67 | FRAME_BEGIN; \ | ||
| 68 | mov %rbx, %rdi; /* arg1 == skb */ \ | ||
| 69 | push %r9; \ | ||
| 70 | push SKBDATA; \ | ||
| 71 | /* rsi already has offset */ \ | ||
| 72 | mov $LEN,%ecx; /* len */ \ | ||
| 73 | call skb_copy_bits; \ | ||
| 74 | test %eax,%eax; \ | ||
| 75 | pop SKBDATA; \ | ||
| 76 | pop %r9; \ | ||
| 77 | FRAME_END | ||
| 78 | |||
| 79 | |||
| 80 | bpf_slow_path_word: | ||
| 81 | bpf_slow_path_common(4) | ||
| 82 | js bpf_error | ||
| 83 | mov 32(%rbp),%eax | ||
| 84 | bswap %eax | ||
| 85 | ret | ||
| 86 | |||
| 87 | bpf_slow_path_half: | ||
| 88 | bpf_slow_path_common(2) | ||
| 89 | js bpf_error | ||
| 90 | mov 32(%rbp),%ax | ||
| 91 | rol $8,%ax | ||
| 92 | movzwl %ax,%eax | ||
| 93 | ret | ||
| 94 | |||
| 95 | bpf_slow_path_byte: | ||
| 96 | bpf_slow_path_common(1) | ||
| 97 | js bpf_error | ||
| 98 | movzbl 32(%rbp),%eax | ||
| 99 | ret | ||
| 100 | |||
| 101 | #define sk_negative_common(SIZE) \ | ||
| 102 | FRAME_BEGIN; \ | ||
| 103 | mov %rbx, %rdi; /* arg1 == skb */ \ | ||
| 104 | push %r9; \ | ||
| 105 | push SKBDATA; \ | ||
| 106 | /* rsi already has offset */ \ | ||
| 107 | mov $SIZE,%edx; /* size */ \ | ||
| 108 | call bpf_internal_load_pointer_neg_helper; \ | ||
| 109 | test %rax,%rax; \ | ||
| 110 | pop SKBDATA; \ | ||
| 111 | pop %r9; \ | ||
| 112 | FRAME_END; \ | ||
| 113 | jz bpf_error | ||
| 114 | |||
| 115 | bpf_slow_path_word_neg: | ||
| 116 | cmp SKF_MAX_NEG_OFF, %esi /* test range */ | ||
| 117 | jl bpf_error /* offset lower -> error */ | ||
| 118 | |||
| 119 | FUNC(sk_load_word_negative_offset) | ||
| 120 | sk_negative_common(4) | ||
| 121 | mov (%rax), %eax | ||
| 122 | bswap %eax | ||
| 123 | ret | ||
| 124 | |||
| 125 | bpf_slow_path_half_neg: | ||
| 126 | cmp SKF_MAX_NEG_OFF, %esi | ||
| 127 | jl bpf_error | ||
| 128 | |||
| 129 | FUNC(sk_load_half_negative_offset) | ||
| 130 | sk_negative_common(2) | ||
| 131 | mov (%rax),%ax | ||
| 132 | rol $8,%ax | ||
| 133 | movzwl %ax,%eax | ||
| 134 | ret | ||
| 135 | |||
| 136 | bpf_slow_path_byte_neg: | ||
| 137 | cmp SKF_MAX_NEG_OFF, %esi | ||
| 138 | jl bpf_error | ||
| 139 | |||
| 140 | FUNC(sk_load_byte_negative_offset) | ||
| 141 | sk_negative_common(1) | ||
| 142 | movzbl (%rax), %eax | ||
| 143 | ret | ||
| 144 | |||
| 145 | bpf_error: | ||
| 146 | # force a return 0 from jit handler | ||
| 147 | xor %eax,%eax | ||
| 148 | mov (%rbp),%rbx | ||
| 149 | mov 8(%rbp),%r13 | ||
| 150 | mov 16(%rbp),%r14 | ||
| 151 | mov 24(%rbp),%r15 | ||
| 152 | add $40, %rbp | ||
| 153 | leaveq | ||
| 154 | ret | ||
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 263c8453815e..8fca446aaef6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | /* bpf_jit_comp.c : BPF JIT compiler | 1 | /* |
| 2 | * bpf_jit_comp.c: BPF JIT compiler | ||
| 2 | * | 3 | * |
| 3 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) | 4 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) |
| 4 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | 5 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
| @@ -16,15 +17,6 @@ | |||
| 16 | #include <asm/set_memory.h> | 17 | #include <asm/set_memory.h> |
| 17 | #include <asm/nospec-branch.h> | 18 | #include <asm/nospec-branch.h> |
| 18 | 19 | ||
| 19 | /* | ||
| 20 | * assembly code in arch/x86/net/bpf_jit.S | ||
| 21 | */ | ||
| 22 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | ||
| 23 | extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; | ||
| 24 | extern u8 sk_load_byte_positive_offset[]; | ||
| 25 | extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; | ||
| 26 | extern u8 sk_load_byte_negative_offset[]; | ||
| 27 | |||
| 28 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | 20 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) |
| 29 | { | 21 | { |
| 30 | if (len == 1) | 22 | if (len == 1) |
| @@ -45,14 +37,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | |||
| 45 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | 37 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) |
| 46 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | 38 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) |
| 47 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | 39 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) |
| 40 | |||
| 48 | #define EMIT1_off32(b1, off) \ | 41 | #define EMIT1_off32(b1, off) \ |
| 49 | do {EMIT1(b1); EMIT(off, 4); } while (0) | 42 | do { EMIT1(b1); EMIT(off, 4); } while (0) |
| 50 | #define EMIT2_off32(b1, b2, off) \ | 43 | #define EMIT2_off32(b1, b2, off) \ |
| 51 | do {EMIT2(b1, b2); EMIT(off, 4); } while (0) | 44 | do { EMIT2(b1, b2); EMIT(off, 4); } while (0) |
| 52 | #define EMIT3_off32(b1, b2, b3, off) \ | 45 | #define EMIT3_off32(b1, b2, b3, off) \ |
| 53 | do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) | 46 | do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) |
| 54 | #define EMIT4_off32(b1, b2, b3, b4, off) \ | 47 | #define EMIT4_off32(b1, b2, b3, b4, off) \ |
| 55 | do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) | 48 | do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) |
| 56 | 49 | ||
| 57 | static bool is_imm8(int value) | 50 | static bool is_imm8(int value) |
| 58 | { | 51 | { |
| @@ -70,9 +63,10 @@ static bool is_uimm32(u64 value) | |||
| 70 | } | 63 | } |
| 71 | 64 | ||
| 72 | /* mov dst, src */ | 65 | /* mov dst, src */ |
| 73 | #define EMIT_mov(DST, SRC) \ | 66 | #define EMIT_mov(DST, SRC) \ |
| 74 | do {if (DST != SRC) \ | 67 | do { \ |
| 75 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ | 68 | if (DST != SRC) \ |
| 69 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ | ||
| 76 | } while (0) | 70 | } while (0) |
| 77 | 71 | ||
| 78 | static int bpf_size_to_x86_bytes(int bpf_size) | 72 | static int bpf_size_to_x86_bytes(int bpf_size) |
| @@ -89,7 +83,8 @@ static int bpf_size_to_x86_bytes(int bpf_size) | |||
| 89 | return 0; | 83 | return 0; |
| 90 | } | 84 | } |
| 91 | 85 | ||
| 92 | /* list of x86 cond jumps opcodes (. + s8) | 86 | /* |
| 87 | * List of x86 cond jumps opcodes (. + s8) | ||
| 93 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | 88 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) |
| 94 | */ | 89 | */ |
| 95 | #define X86_JB 0x72 | 90 | #define X86_JB 0x72 |
| @@ -103,38 +98,37 @@ static int bpf_size_to_x86_bytes(int bpf_size) | |||
| 103 | #define X86_JLE 0x7E | 98 | #define X86_JLE 0x7E |
| 104 | #define X86_JG 0x7F | 99 | #define X86_JG 0x7F |
| 105 | 100 | ||
| 106 | #define CHOOSE_LOAD_FUNC(K, func) \ | 101 | /* Pick a register outside of BPF range for JIT internal work */ |
| 107 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | ||
| 108 | |||
| 109 | /* pick a register outside of BPF range for JIT internal work */ | ||
| 110 | #define AUX_REG (MAX_BPF_JIT_REG + 1) | 102 | #define AUX_REG (MAX_BPF_JIT_REG + 1) |
| 111 | 103 | ||
| 112 | /* The following table maps BPF registers to x64 registers. | 104 | /* |
| 105 | * The following table maps BPF registers to x86-64 registers. | ||
| 113 | * | 106 | * |
| 114 | * x64 register r12 is unused, since if used as base address | 107 | * x86-64 register R12 is unused, since if used as base address |
| 115 | * register in load/store instructions, it always needs an | 108 | * register in load/store instructions, it always needs an |
| 116 | * extra byte of encoding and is callee saved. | 109 | * extra byte of encoding and is callee saved. |
| 117 | * | 110 | * |
| 118 | * r9 caches skb->len - skb->data_len | 111 | * Also x86-64 register R9 is unused. x86-64 register R10 is |
| 119 | * r10 caches skb->data, and used for blinding (if enabled) | 112 | * used for blinding (if enabled). |
| 120 | */ | 113 | */ |
| 121 | static const int reg2hex[] = { | 114 | static const int reg2hex[] = { |
| 122 | [BPF_REG_0] = 0, /* rax */ | 115 | [BPF_REG_0] = 0, /* RAX */ |
| 123 | [BPF_REG_1] = 7, /* rdi */ | 116 | [BPF_REG_1] = 7, /* RDI */ |
| 124 | [BPF_REG_2] = 6, /* rsi */ | 117 | [BPF_REG_2] = 6, /* RSI */ |
| 125 | [BPF_REG_3] = 2, /* rdx */ | 118 | [BPF_REG_3] = 2, /* RDX */ |
| 126 | [BPF_REG_4] = 1, /* rcx */ | 119 | [BPF_REG_4] = 1, /* RCX */ |
| 127 | [BPF_REG_5] = 0, /* r8 */ | 120 | [BPF_REG_5] = 0, /* R8 */ |
| 128 | [BPF_REG_6] = 3, /* rbx callee saved */ | 121 | [BPF_REG_6] = 3, /* RBX callee saved */ |
| 129 | [BPF_REG_7] = 5, /* r13 callee saved */ | 122 | [BPF_REG_7] = 5, /* R13 callee saved */ |
| 130 | [BPF_REG_8] = 6, /* r14 callee saved */ | 123 | [BPF_REG_8] = 6, /* R14 callee saved */ |
| 131 | [BPF_REG_9] = 7, /* r15 callee saved */ | 124 | [BPF_REG_9] = 7, /* R15 callee saved */ |
| 132 | [BPF_REG_FP] = 5, /* rbp readonly */ | 125 | [BPF_REG_FP] = 5, /* RBP readonly */ |
| 133 | [BPF_REG_AX] = 2, /* r10 temp register */ | 126 | [BPF_REG_AX] = 2, /* R10 temp register */ |
| 134 | [AUX_REG] = 3, /* r11 temp register */ | 127 | [AUX_REG] = 3, /* R11 temp register */ |
| 135 | }; | 128 | }; |
| 136 | 129 | ||
| 137 | /* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 | 130 | /* |
| 131 | * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 | ||
| 138 | * which need extra byte of encoding. | 132 | * which need extra byte of encoding. |
| 139 | * rax,rcx,...,rbp have simpler encoding | 133 | * rax,rcx,...,rbp have simpler encoding |
| 140 | */ | 134 | */ |
| @@ -153,7 +147,7 @@ static bool is_axreg(u32 reg) | |||
| 153 | return reg == BPF_REG_0; | 147 | return reg == BPF_REG_0; |
| 154 | } | 148 | } |
| 155 | 149 | ||
| 156 | /* add modifiers if 'reg' maps to x64 registers r8..r15 */ | 150 | /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ |
| 157 | static u8 add_1mod(u8 byte, u32 reg) | 151 | static u8 add_1mod(u8 byte, u32 reg) |
| 158 | { | 152 | { |
| 159 | if (is_ereg(reg)) | 153 | if (is_ereg(reg)) |
| @@ -170,13 +164,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2) | |||
| 170 | return byte; | 164 | return byte; |
| 171 | } | 165 | } |
| 172 | 166 | ||
| 173 | /* encode 'dst_reg' register into x64 opcode 'byte' */ | 167 | /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ |
| 174 | static u8 add_1reg(u8 byte, u32 dst_reg) | 168 | static u8 add_1reg(u8 byte, u32 dst_reg) |
| 175 | { | 169 | { |
| 176 | return byte + reg2hex[dst_reg]; | 170 | return byte + reg2hex[dst_reg]; |
| 177 | } | 171 | } |
| 178 | 172 | ||
| 179 | /* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ | 173 | /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ |
| 180 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | 174 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) |
| 181 | { | 175 | { |
| 182 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); | 176 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); |
| @@ -184,27 +178,24 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | |||
| 184 | 178 | ||
| 185 | static void jit_fill_hole(void *area, unsigned int size) | 179 | static void jit_fill_hole(void *area, unsigned int size) |
| 186 | { | 180 | { |
| 187 | /* fill whole space with int3 instructions */ | 181 | /* Fill whole space with INT3 instructions */ |
| 188 | memset(area, 0xcc, size); | 182 | memset(area, 0xcc, size); |
| 189 | } | 183 | } |
| 190 | 184 | ||
| 191 | struct jit_context { | 185 | struct jit_context { |
| 192 | int cleanup_addr; /* epilogue code offset */ | 186 | int cleanup_addr; /* Epilogue code offset */ |
| 193 | bool seen_ld_abs; | ||
| 194 | bool seen_ax_reg; | ||
| 195 | }; | 187 | }; |
| 196 | 188 | ||
| 197 | /* maximum number of bytes emitted while JITing one eBPF insn */ | 189 | /* Maximum number of bytes emitted while JITing one eBPF insn */ |
| 198 | #define BPF_MAX_INSN_SIZE 128 | 190 | #define BPF_MAX_INSN_SIZE 128 |
| 199 | #define BPF_INSN_SAFETY 64 | 191 | #define BPF_INSN_SAFETY 64 |
| 200 | 192 | ||
| 201 | #define AUX_STACK_SPACE \ | 193 | #define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */ |
| 202 | (32 /* space for rbx, r13, r14, r15 */ + \ | ||
| 203 | 8 /* space for skb_copy_bits() buffer */) | ||
| 204 | 194 | ||
| 205 | #define PROLOGUE_SIZE 37 | 195 | #define PROLOGUE_SIZE 37 |
| 206 | 196 | ||
| 207 | /* emit x64 prologue code for BPF program and check it's size. | 197 | /* |
| 198 | * Emit x86-64 prologue code for BPF program and check its size. | ||
| 208 | * bpf_tail_call helper will skip it while jumping into another program | 199 | * bpf_tail_call helper will skip it while jumping into another program |
| 209 | */ | 200 | */ |
| 210 | static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | 201 | static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) |
| @@ -212,8 +203,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 212 | u8 *prog = *pprog; | 203 | u8 *prog = *pprog; |
| 213 | int cnt = 0; | 204 | int cnt = 0; |
| 214 | 205 | ||
| 215 | EMIT1(0x55); /* push rbp */ | 206 | /* push rbp */ |
| 216 | EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ | 207 | EMIT1(0x55); |
| 208 | |||
| 209 | /* mov rbp,rsp */ | ||
| 210 | EMIT3(0x48, 0x89, 0xE5); | ||
| 217 | 211 | ||
| 218 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ | 212 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ |
| 219 | EMIT3_off32(0x48, 0x81, 0xEC, | 213 | EMIT3_off32(0x48, 0x81, 0xEC, |
| @@ -222,19 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 222 | /* sub rbp, AUX_STACK_SPACE */ | 216 | /* sub rbp, AUX_STACK_SPACE */ |
| 223 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); | 217 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); |
| 224 | 218 | ||
| 225 | /* all classic BPF filters use R6(rbx) save it */ | ||
| 226 | |||
| 227 | /* mov qword ptr [rbp+0],rbx */ | 219 | /* mov qword ptr [rbp+0],rbx */ |
| 228 | EMIT4(0x48, 0x89, 0x5D, 0); | 220 | EMIT4(0x48, 0x89, 0x5D, 0); |
| 229 | |||
| 230 | /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 | ||
| 231 | * as temporary, so all tcpdump filters need to spill/fill R7(r13) and | ||
| 232 | * R8(r14). R9(r15) spill could be made conditional, but there is only | ||
| 233 | * one 'bpf_error' return path out of helper functions inside bpf_jit.S | ||
| 234 | * The overhead of extra spill is negligible for any filter other | ||
| 235 | * than synthetic ones. Therefore not worth adding complexity. | ||
| 236 | */ | ||
| 237 | |||
| 238 | /* mov qword ptr [rbp+8],r13 */ | 221 | /* mov qword ptr [rbp+8],r13 */ |
| 239 | EMIT4(0x4C, 0x89, 0x6D, 8); | 222 | EMIT4(0x4C, 0x89, 0x6D, 8); |
| 240 | /* mov qword ptr [rbp+16],r14 */ | 223 | /* mov qword ptr [rbp+16],r14 */ |
| @@ -243,9 +226,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 243 | EMIT4(0x4C, 0x89, 0x7D, 24); | 226 | EMIT4(0x4C, 0x89, 0x7D, 24); |
| 244 | 227 | ||
| 245 | if (!ebpf_from_cbpf) { | 228 | if (!ebpf_from_cbpf) { |
| 246 | /* Clear the tail call counter (tail_call_cnt): for eBPF tail | 229 | /* |
| 230 | * Clear the tail call counter (tail_call_cnt): for eBPF tail | ||
| 247 | * calls we need to reset the counter to 0. It's done in two | 231 | * calls we need to reset the counter to 0. It's done in two |
| 248 | * instructions, resetting rax register to 0, and moving it | 232 | * instructions, resetting RAX register to 0, and moving it |
| 249 | * to the counter location. | 233 | * to the counter location. |
| 250 | */ | 234 | */ |
| 251 | 235 | ||
| @@ -260,7 +244,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 260 | *pprog = prog; | 244 | *pprog = prog; |
| 261 | } | 245 | } |
| 262 | 246 | ||
| 263 | /* generate the following code: | 247 | /* |
| 248 | * Generate the following code: | ||
| 249 | * | ||
| 264 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... | 250 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... |
| 265 | * if (index >= array->map.max_entries) | 251 | * if (index >= array->map.max_entries) |
| 266 | * goto out; | 252 | * goto out; |
| @@ -278,23 +264,26 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 278 | int label1, label2, label3; | 264 | int label1, label2, label3; |
| 279 | int cnt = 0; | 265 | int cnt = 0; |
| 280 | 266 | ||
| 281 | /* rdi - pointer to ctx | 267 | /* |
| 268 | * rdi - pointer to ctx | ||
| 282 | * rsi - pointer to bpf_array | 269 | * rsi - pointer to bpf_array |
| 283 | * rdx - index in bpf_array | 270 | * rdx - index in bpf_array |
| 284 | */ | 271 | */ |
| 285 | 272 | ||
| 286 | /* if (index >= array->map.max_entries) | 273 | /* |
| 287 | * goto out; | 274 | * if (index >= array->map.max_entries) |
| 275 | * goto out; | ||
| 288 | */ | 276 | */ |
| 289 | EMIT2(0x89, 0xD2); /* mov edx, edx */ | 277 | EMIT2(0x89, 0xD2); /* mov edx, edx */ |
| 290 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ | 278 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ |
| 291 | offsetof(struct bpf_array, map.max_entries)); | 279 | offsetof(struct bpf_array, map.max_entries)); |
| 292 | #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ | 280 | #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */ |
| 293 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ | 281 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ |
| 294 | label1 = cnt; | 282 | label1 = cnt; |
| 295 | 283 | ||
| 296 | /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) | 284 | /* |
| 297 | * goto out; | 285 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) |
| 286 | * goto out; | ||
| 298 | */ | 287 | */ |
| 299 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ | 288 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ |
| 300 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ | 289 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ |
| @@ -308,8 +297,9 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 308 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ | 297 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ |
| 309 | offsetof(struct bpf_array, ptrs)); | 298 | offsetof(struct bpf_array, ptrs)); |
| 310 | 299 | ||
| 311 | /* if (prog == NULL) | 300 | /* |
| 312 | * goto out; | 301 | * if (prog == NULL) |
| 302 | * goto out; | ||
| 313 | */ | 303 | */ |
| 314 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ | 304 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ |
| 315 | #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) | 305 | #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) |
| @@ -321,7 +311,8 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 321 | offsetof(struct bpf_prog, bpf_func)); | 311 | offsetof(struct bpf_prog, bpf_func)); |
| 322 | EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ | 312 | EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ |
| 323 | 313 | ||
| 324 | /* now we're ready to jump into next BPF program | 314 | /* |
| 315 | * Wow we're ready to jump into next BPF program | ||
| 325 | * rdi == ctx (1st arg) | 316 | * rdi == ctx (1st arg) |
| 326 | * rax == prog->bpf_func + prologue_size | 317 | * rax == prog->bpf_func + prologue_size |
| 327 | */ | 318 | */ |
| @@ -334,26 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 334 | *pprog = prog; | 325 | *pprog = prog; |
| 335 | } | 326 | } |
| 336 | 327 | ||
| 337 | |||
| 338 | static void emit_load_skb_data_hlen(u8 **pprog) | ||
| 339 | { | ||
| 340 | u8 *prog = *pprog; | ||
| 341 | int cnt = 0; | ||
| 342 | |||
| 343 | /* r9d = skb->len - skb->data_len (headlen) | ||
| 344 | * r10 = skb->data | ||
| 345 | */ | ||
| 346 | /* mov %r9d, off32(%rdi) */ | ||
| 347 | EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len)); | ||
| 348 | |||
| 349 | /* sub %r9d, off32(%rdi) */ | ||
| 350 | EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len)); | ||
| 351 | |||
| 352 | /* mov %r10, off32(%rdi) */ | ||
| 353 | EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data)); | ||
| 354 | *pprog = prog; | ||
| 355 | } | ||
| 356 | |||
| 357 | static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | 328 | static void emit_mov_imm32(u8 **pprog, bool sign_propagate, |
| 358 | u32 dst_reg, const u32 imm32) | 329 | u32 dst_reg, const u32 imm32) |
| 359 | { | 330 | { |
| @@ -361,7 +332,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | |||
| 361 | u8 b1, b2, b3; | 332 | u8 b1, b2, b3; |
| 362 | int cnt = 0; | 333 | int cnt = 0; |
| 363 | 334 | ||
| 364 | /* optimization: if imm32 is positive, use 'mov %eax, imm32' | 335 | /* |
| 336 | * Optimization: if imm32 is positive, use 'mov %eax, imm32' | ||
| 365 | * (which zero-extends imm32) to save 2 bytes. | 337 | * (which zero-extends imm32) to save 2 bytes. |
| 366 | */ | 338 | */ |
| 367 | if (sign_propagate && (s32)imm32 < 0) { | 339 | if (sign_propagate && (s32)imm32 < 0) { |
| @@ -373,7 +345,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | |||
| 373 | goto done; | 345 | goto done; |
| 374 | } | 346 | } |
| 375 | 347 | ||
| 376 | /* optimization: if imm32 is zero, use 'xor %eax, %eax' | 348 | /* |
| 349 | * Optimization: if imm32 is zero, use 'xor %eax, %eax' | ||
| 377 | * to save 3 bytes. | 350 | * to save 3 bytes. |
| 378 | */ | 351 | */ |
| 379 | if (imm32 == 0) { | 352 | if (imm32 == 0) { |
| @@ -400,7 +373,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, | |||
| 400 | int cnt = 0; | 373 | int cnt = 0; |
| 401 | 374 | ||
| 402 | if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { | 375 | if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { |
| 403 | /* For emitting plain u32, where sign bit must not be | 376 | /* |
| 377 | * For emitting plain u32, where sign bit must not be | ||
| 404 | * propagated LLVM tends to load imm64 over mov32 | 378 | * propagated LLVM tends to load imm64 over mov32 |
| 405 | * directly, so save couple of bytes by just doing | 379 | * directly, so save couple of bytes by just doing |
| 406 | * 'mov %eax, imm32' instead. | 380 | * 'mov %eax, imm32' instead. |
| @@ -439,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 439 | { | 413 | { |
| 440 | struct bpf_insn *insn = bpf_prog->insnsi; | 414 | struct bpf_insn *insn = bpf_prog->insnsi; |
| 441 | int insn_cnt = bpf_prog->len; | 415 | int insn_cnt = bpf_prog->len; |
| 442 | bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); | ||
| 443 | bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0); | ||
| 444 | bool seen_exit = false; | 416 | bool seen_exit = false; |
| 445 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; | 417 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; |
| 446 | int i, cnt = 0; | 418 | int i, cnt = 0; |
| @@ -450,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 450 | emit_prologue(&prog, bpf_prog->aux->stack_depth, | 422 | emit_prologue(&prog, bpf_prog->aux->stack_depth, |
| 451 | bpf_prog_was_classic(bpf_prog)); | 423 | bpf_prog_was_classic(bpf_prog)); |
| 452 | 424 | ||
| 453 | if (seen_ld_abs) | ||
| 454 | emit_load_skb_data_hlen(&prog); | ||
| 455 | |||
| 456 | for (i = 0; i < insn_cnt; i++, insn++) { | 425 | for (i = 0; i < insn_cnt; i++, insn++) { |
| 457 | const s32 imm32 = insn->imm; | 426 | const s32 imm32 = insn->imm; |
| 458 | u32 dst_reg = insn->dst_reg; | 427 | u32 dst_reg = insn->dst_reg; |
| @@ -460,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 460 | u8 b2 = 0, b3 = 0; | 429 | u8 b2 = 0, b3 = 0; |
| 461 | s64 jmp_offset; | 430 | s64 jmp_offset; |
| 462 | u8 jmp_cond; | 431 | u8 jmp_cond; |
| 463 | bool reload_skb_data; | ||
| 464 | int ilen; | 432 | int ilen; |
| 465 | u8 *func; | 433 | u8 *func; |
| 466 | 434 | ||
| 467 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) | ||
| 468 | ctx->seen_ax_reg = seen_ax_reg = true; | ||
| 469 | |||
| 470 | switch (insn->code) { | 435 | switch (insn->code) { |
| 471 | /* ALU */ | 436 | /* ALU */ |
| 472 | case BPF_ALU | BPF_ADD | BPF_X: | 437 | case BPF_ALU | BPF_ADD | BPF_X: |
| @@ -525,7 +490,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 525 | else if (is_ereg(dst_reg)) | 490 | else if (is_ereg(dst_reg)) |
| 526 | EMIT1(add_1mod(0x40, dst_reg)); | 491 | EMIT1(add_1mod(0x40, dst_reg)); |
| 527 | 492 | ||
| 528 | /* b3 holds 'normal' opcode, b2 short form only valid | 493 | /* |
| 494 | * b3 holds 'normal' opcode, b2 short form only valid | ||
| 529 | * in case dst is eax/rax. | 495 | * in case dst is eax/rax. |
| 530 | */ | 496 | */ |
| 531 | switch (BPF_OP(insn->code)) { | 497 | switch (BPF_OP(insn->code)) { |
| @@ -593,7 +559,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 593 | /* mov rax, dst_reg */ | 559 | /* mov rax, dst_reg */ |
| 594 | EMIT_mov(BPF_REG_0, dst_reg); | 560 | EMIT_mov(BPF_REG_0, dst_reg); |
| 595 | 561 | ||
| 596 | /* xor edx, edx | 562 | /* |
| 563 | * xor edx, edx | ||
| 597 | * equivalent to 'xor rdx, rdx', but one byte less | 564 | * equivalent to 'xor rdx, rdx', but one byte less |
| 598 | */ | 565 | */ |
| 599 | EMIT2(0x31, 0xd2); | 566 | EMIT2(0x31, 0xd2); |
| @@ -655,7 +622,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 655 | } | 622 | } |
| 656 | break; | 623 | break; |
| 657 | } | 624 | } |
| 658 | /* shifts */ | 625 | /* Shifts */ |
| 659 | case BPF_ALU | BPF_LSH | BPF_K: | 626 | case BPF_ALU | BPF_LSH | BPF_K: |
| 660 | case BPF_ALU | BPF_RSH | BPF_K: | 627 | case BPF_ALU | BPF_RSH | BPF_K: |
| 661 | case BPF_ALU | BPF_ARSH | BPF_K: | 628 | case BPF_ALU | BPF_ARSH | BPF_K: |
| @@ -686,7 +653,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 686 | case BPF_ALU64 | BPF_RSH | BPF_X: | 653 | case BPF_ALU64 | BPF_RSH | BPF_X: |
| 687 | case BPF_ALU64 | BPF_ARSH | BPF_X: | 654 | case BPF_ALU64 | BPF_ARSH | BPF_X: |
| 688 | 655 | ||
| 689 | /* check for bad case when dst_reg == rcx */ | 656 | /* Check for bad case when dst_reg == rcx */ |
| 690 | if (dst_reg == BPF_REG_4) { | 657 | if (dst_reg == BPF_REG_4) { |
| 691 | /* mov r11, dst_reg */ | 658 | /* mov r11, dst_reg */ |
| 692 | EMIT_mov(AUX_REG, dst_reg); | 659 | EMIT_mov(AUX_REG, dst_reg); |
| @@ -724,13 +691,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 724 | case BPF_ALU | BPF_END | BPF_FROM_BE: | 691 | case BPF_ALU | BPF_END | BPF_FROM_BE: |
| 725 | switch (imm32) { | 692 | switch (imm32) { |
| 726 | case 16: | 693 | case 16: |
| 727 | /* emit 'ror %ax, 8' to swap lower 2 bytes */ | 694 | /* Emit 'ror %ax, 8' to swap lower 2 bytes */ |
| 728 | EMIT1(0x66); | 695 | EMIT1(0x66); |
| 729 | if (is_ereg(dst_reg)) | 696 | if (is_ereg(dst_reg)) |
| 730 | EMIT1(0x41); | 697 | EMIT1(0x41); |
| 731 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); | 698 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); |
| 732 | 699 | ||
| 733 | /* emit 'movzwl eax, ax' */ | 700 | /* Emit 'movzwl eax, ax' */ |
| 734 | if (is_ereg(dst_reg)) | 701 | if (is_ereg(dst_reg)) |
| 735 | EMIT3(0x45, 0x0F, 0xB7); | 702 | EMIT3(0x45, 0x0F, 0xB7); |
| 736 | else | 703 | else |
| @@ -738,7 +705,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 738 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); | 705 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); |
| 739 | break; | 706 | break; |
| 740 | case 32: | 707 | case 32: |
| 741 | /* emit 'bswap eax' to swap lower 4 bytes */ | 708 | /* Emit 'bswap eax' to swap lower 4 bytes */ |
| 742 | if (is_ereg(dst_reg)) | 709 | if (is_ereg(dst_reg)) |
| 743 | EMIT2(0x41, 0x0F); | 710 | EMIT2(0x41, 0x0F); |
| 744 | else | 711 | else |
| @@ -746,7 +713,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 746 | EMIT1(add_1reg(0xC8, dst_reg)); | 713 | EMIT1(add_1reg(0xC8, dst_reg)); |
| 747 | break; | 714 | break; |
| 748 | case 64: | 715 | case 64: |
| 749 | /* emit 'bswap rax' to swap 8 bytes */ | 716 | /* Emit 'bswap rax' to swap 8 bytes */ |
| 750 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, | 717 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, |
| 751 | add_1reg(0xC8, dst_reg)); | 718 | add_1reg(0xC8, dst_reg)); |
| 752 | break; | 719 | break; |
| @@ -756,7 +723,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 756 | case BPF_ALU | BPF_END | BPF_FROM_LE: | 723 | case BPF_ALU | BPF_END | BPF_FROM_LE: |
| 757 | switch (imm32) { | 724 | switch (imm32) { |
| 758 | case 16: | 725 | case 16: |
| 759 | /* emit 'movzwl eax, ax' to zero extend 16-bit | 726 | /* |
| 727 | * Emit 'movzwl eax, ax' to zero extend 16-bit | ||
| 760 | * into 64 bit | 728 | * into 64 bit |
| 761 | */ | 729 | */ |
| 762 | if (is_ereg(dst_reg)) | 730 | if (is_ereg(dst_reg)) |
| @@ -766,7 +734,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 766 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); | 734 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); |
| 767 | break; | 735 | break; |
| 768 | case 32: | 736 | case 32: |
| 769 | /* emit 'mov eax, eax' to clear upper 32-bits */ | 737 | /* Emit 'mov eax, eax' to clear upper 32-bits */ |
| 770 | if (is_ereg(dst_reg)) | 738 | if (is_ereg(dst_reg)) |
| 771 | EMIT1(0x45); | 739 | EMIT1(0x45); |
| 772 | EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); | 740 | EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); |
| @@ -809,9 +777,9 @@ st: if (is_imm8(insn->off)) | |||
| 809 | 777 | ||
| 810 | /* STX: *(u8*)(dst_reg + off) = src_reg */ | 778 | /* STX: *(u8*)(dst_reg + off) = src_reg */ |
| 811 | case BPF_STX | BPF_MEM | BPF_B: | 779 | case BPF_STX | BPF_MEM | BPF_B: |
| 812 | /* emit 'mov byte ptr [rax + off], al' */ | 780 | /* Emit 'mov byte ptr [rax + off], al' */ |
| 813 | if (is_ereg(dst_reg) || is_ereg(src_reg) || | 781 | if (is_ereg(dst_reg) || is_ereg(src_reg) || |
| 814 | /* have to add extra byte for x86 SIL, DIL regs */ | 782 | /* We have to add extra byte for x86 SIL, DIL regs */ |
| 815 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) | 783 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) |
| 816 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); | 784 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); |
| 817 | else | 785 | else |
| @@ -840,25 +808,26 @@ stx: if (is_imm8(insn->off)) | |||
| 840 | 808 | ||
| 841 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ | 809 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ |
| 842 | case BPF_LDX | BPF_MEM | BPF_B: | 810 | case BPF_LDX | BPF_MEM | BPF_B: |
| 843 | /* emit 'movzx rax, byte ptr [rax + off]' */ | 811 | /* Emit 'movzx rax, byte ptr [rax + off]' */ |
| 844 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); | 812 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); |
| 845 | goto ldx; | 813 | goto ldx; |
| 846 | case BPF_LDX | BPF_MEM | BPF_H: | 814 | case BPF_LDX | BPF_MEM | BPF_H: |
| 847 | /* emit 'movzx rax, word ptr [rax + off]' */ | 815 | /* Emit 'movzx rax, word ptr [rax + off]' */ |
| 848 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); | 816 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); |
| 849 | goto ldx; | 817 | goto ldx; |
| 850 | case BPF_LDX | BPF_MEM | BPF_W: | 818 | case BPF_LDX | BPF_MEM | BPF_W: |
| 851 | /* emit 'mov eax, dword ptr [rax+0x14]' */ | 819 | /* Emit 'mov eax, dword ptr [rax+0x14]' */ |
| 852 | if (is_ereg(dst_reg) || is_ereg(src_reg)) | 820 | if (is_ereg(dst_reg) || is_ereg(src_reg)) |
| 853 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); | 821 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); |
| 854 | else | 822 | else |
| 855 | EMIT1(0x8B); | 823 | EMIT1(0x8B); |
| 856 | goto ldx; | 824 | goto ldx; |
| 857 | case BPF_LDX | BPF_MEM | BPF_DW: | 825 | case BPF_LDX | BPF_MEM | BPF_DW: |
| 858 | /* emit 'mov rax, qword ptr [rax+0x14]' */ | 826 | /* Emit 'mov rax, qword ptr [rax+0x14]' */ |
| 859 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); | 827 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); |
| 860 | ldx: /* if insn->off == 0 we can save one extra byte, but | 828 | ldx: /* |
| 861 | * special case of x86 r13 which always needs an offset | 829 | * If insn->off == 0 we can save one extra byte, but |
| 830 | * special case of x86 R13 which always needs an offset | ||
| 862 | * is not worth the hassle | 831 | * is not worth the hassle |
| 863 | */ | 832 | */ |
| 864 | if (is_imm8(insn->off)) | 833 | if (is_imm8(insn->off)) |
| @@ -870,7 +839,7 @@ ldx: /* if insn->off == 0 we can save one extra byte, but | |||
| 870 | 839 | ||
| 871 | /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ | 840 | /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ |
| 872 | case BPF_STX | BPF_XADD | BPF_W: | 841 | case BPF_STX | BPF_XADD | BPF_W: |
| 873 | /* emit 'lock add dword ptr [rax + off], eax' */ | 842 | /* Emit 'lock add dword ptr [rax + off], eax' */ |
| 874 | if (is_ereg(dst_reg) || is_ereg(src_reg)) | 843 | if (is_ereg(dst_reg) || is_ereg(src_reg)) |
| 875 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); | 844 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); |
| 876 | else | 845 | else |
| @@ -889,35 +858,12 @@ xadd: if (is_imm8(insn->off)) | |||
| 889 | case BPF_JMP | BPF_CALL: | 858 | case BPF_JMP | BPF_CALL: |
| 890 | func = (u8 *) __bpf_call_base + imm32; | 859 | func = (u8 *) __bpf_call_base + imm32; |
| 891 | jmp_offset = func - (image + addrs[i]); | 860 | jmp_offset = func - (image + addrs[i]); |
| 892 | if (seen_ld_abs) { | ||
| 893 | reload_skb_data = bpf_helper_changes_pkt_data(func); | ||
| 894 | if (reload_skb_data) { | ||
| 895 | EMIT1(0x57); /* push %rdi */ | ||
| 896 | jmp_offset += 22; /* pop, mov, sub, mov */ | ||
| 897 | } else { | ||
| 898 | EMIT2(0x41, 0x52); /* push %r10 */ | ||
| 899 | EMIT2(0x41, 0x51); /* push %r9 */ | ||
| 900 | /* need to adjust jmp offset, since | ||
| 901 | * pop %r9, pop %r10 take 4 bytes after call insn | ||
| 902 | */ | ||
| 903 | jmp_offset += 4; | ||
| 904 | } | ||
| 905 | } | ||
| 906 | if (!imm32 || !is_simm32(jmp_offset)) { | 861 | if (!imm32 || !is_simm32(jmp_offset)) { |
| 907 | pr_err("unsupported bpf func %d addr %p image %p\n", | 862 | pr_err("unsupported BPF func %d addr %p image %p\n", |
| 908 | imm32, func, image); | 863 | imm32, func, image); |
| 909 | return -EINVAL; | 864 | return -EINVAL; |
| 910 | } | 865 | } |
| 911 | EMIT1_off32(0xE8, jmp_offset); | 866 | EMIT1_off32(0xE8, jmp_offset); |
| 912 | if (seen_ld_abs) { | ||
| 913 | if (reload_skb_data) { | ||
| 914 | EMIT1(0x5F); /* pop %rdi */ | ||
| 915 | emit_load_skb_data_hlen(&prog); | ||
| 916 | } else { | ||
| 917 | EMIT2(0x41, 0x59); /* pop %r9 */ | ||
| 918 | EMIT2(0x41, 0x5A); /* pop %r10 */ | ||
| 919 | } | ||
| 920 | } | ||
| 921 | break; | 867 | break; |
| 922 | 868 | ||
| 923 | case BPF_JMP | BPF_TAIL_CALL: | 869 | case BPF_JMP | BPF_TAIL_CALL: |
| @@ -970,7 +916,7 @@ xadd: if (is_imm8(insn->off)) | |||
| 970 | else | 916 | else |
| 971 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); | 917 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); |
| 972 | 918 | ||
| 973 | emit_cond_jmp: /* convert BPF opcode to x86 */ | 919 | emit_cond_jmp: /* Convert BPF opcode to x86 */ |
| 974 | switch (BPF_OP(insn->code)) { | 920 | switch (BPF_OP(insn->code)) { |
| 975 | case BPF_JEQ: | 921 | case BPF_JEQ: |
| 976 | jmp_cond = X86_JE; | 922 | jmp_cond = X86_JE; |
| @@ -996,22 +942,22 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ | |||
| 996 | jmp_cond = X86_JBE; | 942 | jmp_cond = X86_JBE; |
| 997 | break; | 943 | break; |
| 998 | case BPF_JSGT: | 944 | case BPF_JSGT: |
| 999 | /* signed '>', GT in x86 */ | 945 | /* Signed '>', GT in x86 */ |
| 1000 | jmp_cond = X86_JG; | 946 | jmp_cond = X86_JG; |
| 1001 | break; | 947 | break; |
| 1002 | case BPF_JSLT: | 948 | case BPF_JSLT: |
| 1003 | /* signed '<', LT in x86 */ | 949 | /* Signed '<', LT in x86 */ |
| 1004 | jmp_cond = X86_JL; | 950 | jmp_cond = X86_JL; |
| 1005 | break; | 951 | break; |
| 1006 | case BPF_JSGE: | 952 | case BPF_JSGE: |
| 1007 | /* signed '>=', GE in x86 */ | 953 | /* Signed '>=', GE in x86 */ |
| 1008 | jmp_cond = X86_JGE; | 954 | jmp_cond = X86_JGE; |
| 1009 | break; | 955 | break; |
| 1010 | case BPF_JSLE: | 956 | case BPF_JSLE: |
| 1011 | /* signed '<=', LE in x86 */ | 957 | /* Signed '<=', LE in x86 */ |
| 1012 | jmp_cond = X86_JLE; | 958 | jmp_cond = X86_JLE; |
| 1013 | break; | 959 | break; |
| 1014 | default: /* to silence gcc warning */ | 960 | default: /* to silence GCC warning */ |
| 1015 | return -EFAULT; | 961 | return -EFAULT; |
| 1016 | } | 962 | } |
| 1017 | jmp_offset = addrs[i + insn->off] - addrs[i]; | 963 | jmp_offset = addrs[i + insn->off] - addrs[i]; |
| @@ -1039,7 +985,7 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ | |||
| 1039 | jmp_offset = addrs[i + insn->off] - addrs[i]; | 985 | jmp_offset = addrs[i + insn->off] - addrs[i]; |
| 1040 | 986 | ||
| 1041 | if (!jmp_offset) | 987 | if (!jmp_offset) |
| 1042 | /* optimize out nop jumps */ | 988 | /* Optimize out nop jumps */ |
| 1043 | break; | 989 | break; |
| 1044 | emit_jmp: | 990 | emit_jmp: |
| 1045 | if (is_imm8(jmp_offset)) { | 991 | if (is_imm8(jmp_offset)) { |
| @@ -1052,66 +998,13 @@ emit_jmp: | |||
| 1052 | } | 998 | } |
| 1053 | break; | 999 | break; |
| 1054 | 1000 | ||
| 1055 | case BPF_LD | BPF_IND | BPF_W: | ||
| 1056 | func = sk_load_word; | ||
| 1057 | goto common_load; | ||
| 1058 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 1059 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_word); | ||
| 1060 | common_load: | ||
| 1061 | ctx->seen_ld_abs = seen_ld_abs = true; | ||
| 1062 | jmp_offset = func - (image + addrs[i]); | ||
| 1063 | if (!func || !is_simm32(jmp_offset)) { | ||
| 1064 | pr_err("unsupported bpf func %d addr %p image %p\n", | ||
| 1065 | imm32, func, image); | ||
| 1066 | return -EINVAL; | ||
| 1067 | } | ||
| 1068 | if (BPF_MODE(insn->code) == BPF_ABS) { | ||
| 1069 | /* mov %esi, imm32 */ | ||
| 1070 | EMIT1_off32(0xBE, imm32); | ||
| 1071 | } else { | ||
| 1072 | /* mov %rsi, src_reg */ | ||
| 1073 | EMIT_mov(BPF_REG_2, src_reg); | ||
| 1074 | if (imm32) { | ||
| 1075 | if (is_imm8(imm32)) | ||
| 1076 | /* add %esi, imm8 */ | ||
| 1077 | EMIT3(0x83, 0xC6, imm32); | ||
| 1078 | else | ||
| 1079 | /* add %esi, imm32 */ | ||
| 1080 | EMIT2_off32(0x81, 0xC6, imm32); | ||
| 1081 | } | ||
| 1082 | } | ||
| 1083 | /* skb pointer is in R6 (%rbx), it will be copied into | ||
| 1084 | * %rdi if skb_copy_bits() call is necessary. | ||
| 1085 | * sk_load_* helpers also use %r10 and %r9d. | ||
| 1086 | * See bpf_jit.S | ||
| 1087 | */ | ||
| 1088 | if (seen_ax_reg) | ||
| 1089 | /* r10 = skb->data, mov %r10, off32(%rbx) */ | ||
| 1090 | EMIT3_off32(0x4c, 0x8b, 0x93, | ||
| 1091 | offsetof(struct sk_buff, data)); | ||
| 1092 | EMIT1_off32(0xE8, jmp_offset); /* call */ | ||
| 1093 | break; | ||
| 1094 | |||
| 1095 | case BPF_LD | BPF_IND | BPF_H: | ||
| 1096 | func = sk_load_half; | ||
| 1097 | goto common_load; | ||
| 1098 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 1099 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_half); | ||
| 1100 | goto common_load; | ||
| 1101 | case BPF_LD | BPF_IND | BPF_B: | ||
| 1102 | func = sk_load_byte; | ||
| 1103 | goto common_load; | ||
| 1104 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 1105 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte); | ||
| 1106 | goto common_load; | ||
| 1107 | |||
| 1108 | case BPF_JMP | BPF_EXIT: | 1001 | case BPF_JMP | BPF_EXIT: |
| 1109 | if (seen_exit) { | 1002 | if (seen_exit) { |
| 1110 | jmp_offset = ctx->cleanup_addr - addrs[i]; | 1003 | jmp_offset = ctx->cleanup_addr - addrs[i]; |
| 1111 | goto emit_jmp; | 1004 | goto emit_jmp; |
| 1112 | } | 1005 | } |
| 1113 | seen_exit = true; | 1006 | seen_exit = true; |
| 1114 | /* update cleanup_addr */ | 1007 | /* Update cleanup_addr */ |
| 1115 | ctx->cleanup_addr = proglen; | 1008 | ctx->cleanup_addr = proglen; |
| 1116 | /* mov rbx, qword ptr [rbp+0] */ | 1009 | /* mov rbx, qword ptr [rbp+0] */ |
| 1117 | EMIT4(0x48, 0x8B, 0x5D, 0); | 1010 | EMIT4(0x48, 0x8B, 0x5D, 0); |
| @@ -1129,10 +1022,11 @@ common_load: | |||
| 1129 | break; | 1022 | break; |
| 1130 | 1023 | ||
| 1131 | default: | 1024 | default: |
| 1132 | /* By design x64 JIT should support all BPF instructions | 1025 | /* |
| 1026 | * By design x86-64 JIT should support all BPF instructions. | ||
| 1133 | * This error will be seen if new instruction was added | 1027 | * This error will be seen if new instruction was added |
| 1134 | * to interpreter, but not to JIT | 1028 | * to the interpreter, but not to the JIT, or if there is |
| 1135 | * or if there is junk in bpf_prog | 1029 | * junk in bpf_prog. |
| 1136 | */ | 1030 | */ |
| 1137 | pr_err("bpf_jit: unknown opcode %02x\n", insn->code); | 1031 | pr_err("bpf_jit: unknown opcode %02x\n", insn->code); |
| 1138 | return -EINVAL; | 1032 | return -EINVAL; |
| @@ -1184,7 +1078,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1184 | return orig_prog; | 1078 | return orig_prog; |
| 1185 | 1079 | ||
| 1186 | tmp = bpf_jit_blind_constants(prog); | 1080 | tmp = bpf_jit_blind_constants(prog); |
| 1187 | /* If blinding was requested and we failed during blinding, | 1081 | /* |
| 1082 | * If blinding was requested and we failed during blinding, | ||
| 1188 | * we must fall back to the interpreter. | 1083 | * we must fall back to the interpreter. |
| 1189 | */ | 1084 | */ |
| 1190 | if (IS_ERR(tmp)) | 1085 | if (IS_ERR(tmp)) |
| @@ -1218,8 +1113,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1218 | goto out_addrs; | 1113 | goto out_addrs; |
| 1219 | } | 1114 | } |
| 1220 | 1115 | ||
| 1221 | /* Before first pass, make a rough estimation of addrs[] | 1116 | /* |
| 1222 | * each bpf instruction is translated to less than 64 bytes | 1117 | * Before first pass, make a rough estimation of addrs[] |
| 1118 | * each BPF instruction is translated to less than 64 bytes | ||
| 1223 | */ | 1119 | */ |
| 1224 | for (proglen = 0, i = 0; i < prog->len; i++) { | 1120 | for (proglen = 0, i = 0; i < prog->len; i++) { |
| 1225 | proglen += 64; | 1121 | proglen += 64; |
| @@ -1228,10 +1124,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1228 | ctx.cleanup_addr = proglen; | 1124 | ctx.cleanup_addr = proglen; |
| 1229 | skip_init_addrs: | 1125 | skip_init_addrs: |
| 1230 | 1126 | ||
| 1231 | /* JITed image shrinks with every pass and the loop iterates | 1127 | /* |
| 1232 | * until the image stops shrinking. Very large bpf programs | 1128 | * JITed image shrinks with every pass and the loop iterates |
| 1129 | * until the image stops shrinking. Very large BPF programs | ||
| 1233 | * may converge on the last pass. In such case do one more | 1130 | * may converge on the last pass. In such case do one more |
| 1234 | * pass to emit the final image | 1131 | * pass to emit the final image. |
| 1235 | */ | 1132 | */ |
| 1236 | for (pass = 0; pass < 20 || image; pass++) { | 1133 | for (pass = 0; pass < 20 || image; pass++) { |
| 1237 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); | 1134 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); |
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..0cc04e30adc1 --- /dev/null +++ b/arch/x86/net/bpf_jit_comp32.c | |||
| @@ -0,0 +1,2419 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* | ||
| 3 | * Just-In-Time compiler for eBPF filters on IA32 (32bit x86) | ||
| 4 | * | ||
| 5 | * Author: Wang YanQing (udknight@gmail.com) | ||
| 6 | * The code based on code and ideas from: | ||
| 7 | * Eric Dumazet (eric.dumazet@gmail.com) | ||
| 8 | * and from: | ||
| 9 | * Shubham Bansal <illusionist.neo@gmail.com> | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/netdevice.h> | ||
| 13 | #include <linux/filter.h> | ||
| 14 | #include <linux/if_vlan.h> | ||
| 15 | #include <asm/cacheflush.h> | ||
| 16 | #include <asm/set_memory.h> | ||
| 17 | #include <asm/nospec-branch.h> | ||
| 18 | #include <linux/bpf.h> | ||
| 19 | |||
| 20 | /* | ||
| 21 | * eBPF prog stack layout: | ||
| 22 | * | ||
| 23 | * high | ||
| 24 | * original ESP => +-----+ | ||
| 25 | * | | callee saved registers | ||
| 26 | * +-----+ | ||
| 27 | * | ... | eBPF JIT scratch space | ||
| 28 | * BPF_FP,IA32_EBP => +-----+ | ||
| 29 | * | ... | eBPF prog stack | ||
| 30 | * +-----+ | ||
| 31 | * |RSVD | JIT scratchpad | ||
| 32 | * current ESP => +-----+ | ||
| 33 | * | | | ||
| 34 | * | ... | Function call stack | ||
| 35 | * | | | ||
| 36 | * +-----+ | ||
| 37 | * low | ||
| 38 | * | ||
| 39 | * The callee saved registers: | ||
| 40 | * | ||
| 41 | * high | ||
| 42 | * original ESP => +------------------+ \ | ||
| 43 | * | ebp | | | ||
| 44 | * current EBP => +------------------+ } callee saved registers | ||
| 45 | * | ebx,esi,edi | | | ||
| 46 | * +------------------+ / | ||
| 47 | * low | ||
| 48 | */ | ||
| 49 | |||
| 50 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | ||
| 51 | { | ||
| 52 | if (len == 1) | ||
| 53 | *ptr = bytes; | ||
| 54 | else if (len == 2) | ||
| 55 | *(u16 *)ptr = bytes; | ||
| 56 | else { | ||
| 57 | *(u32 *)ptr = bytes; | ||
| 58 | barrier(); | ||
| 59 | } | ||
| 60 | return ptr + len; | ||
| 61 | } | ||
| 62 | |||
| 63 | #define EMIT(bytes, len) \ | ||
| 64 | do { prog = emit_code(prog, bytes, len); cnt += len; } while (0) | ||
| 65 | |||
| 66 | #define EMIT1(b1) EMIT(b1, 1) | ||
| 67 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | ||
| 68 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | ||
| 69 | #define EMIT4(b1, b2, b3, b4) \ | ||
| 70 | EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | ||
| 71 | |||
| 72 | #define EMIT1_off32(b1, off) \ | ||
| 73 | do { EMIT1(b1); EMIT(off, 4); } while (0) | ||
| 74 | #define EMIT2_off32(b1, b2, off) \ | ||
| 75 | do { EMIT2(b1, b2); EMIT(off, 4); } while (0) | ||
| 76 | #define EMIT3_off32(b1, b2, b3, off) \ | ||
| 77 | do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) | ||
| 78 | #define EMIT4_off32(b1, b2, b3, b4, off) \ | ||
| 79 | do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) | ||
| 80 | |||
| 81 | #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len) | ||
| 82 | |||
| 83 | static bool is_imm8(int value) | ||
| 84 | { | ||
| 85 | return value <= 127 && value >= -128; | ||
| 86 | } | ||
| 87 | |||
| 88 | static bool is_simm32(s64 value) | ||
| 89 | { | ||
| 90 | return value == (s64) (s32) value; | ||
| 91 | } | ||
| 92 | |||
| 93 | #define STACK_OFFSET(k) (k) | ||
| 94 | #define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */ | ||
| 95 | |||
| 96 | #define IA32_EAX (0x0) | ||
| 97 | #define IA32_EBX (0x3) | ||
| 98 | #define IA32_ECX (0x1) | ||
| 99 | #define IA32_EDX (0x2) | ||
| 100 | #define IA32_ESI (0x6) | ||
| 101 | #define IA32_EDI (0x7) | ||
| 102 | #define IA32_EBP (0x5) | ||
| 103 | #define IA32_ESP (0x4) | ||
| 104 | |||
| 105 | /* | ||
| 106 | * List of x86 cond jumps opcodes (. + s8) | ||
| 107 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | ||
| 108 | */ | ||
| 109 | #define IA32_JB 0x72 | ||
| 110 | #define IA32_JAE 0x73 | ||
| 111 | #define IA32_JE 0x74 | ||
| 112 | #define IA32_JNE 0x75 | ||
| 113 | #define IA32_JBE 0x76 | ||
| 114 | #define IA32_JA 0x77 | ||
| 115 | #define IA32_JL 0x7C | ||
| 116 | #define IA32_JGE 0x7D | ||
| 117 | #define IA32_JLE 0x7E | ||
| 118 | #define IA32_JG 0x7F | ||
| 119 | |||
| 120 | /* | ||
| 121 | * Map eBPF registers to IA32 32bit registers or stack scratch space. | ||
| 122 | * | ||
| 123 | * 1. All the registers, R0-R10, are mapped to scratch space on stack. | ||
| 124 | * 2. We need two 64 bit temp registers to do complex operations on eBPF | ||
| 125 | * registers. | ||
| 126 | * 3. For performance reason, the BPF_REG_AX for blinding constant, is | ||
| 127 | * mapped to real hardware register pair, IA32_ESI and IA32_EDI. | ||
| 128 | * | ||
| 129 | * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit | ||
| 130 | * registers, we have to map each eBPF registers with two IA32 32 bit regs | ||
| 131 | * or scratch memory space and we have to build eBPF 64 bit register from those. | ||
| 132 | * | ||
| 133 | * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers. | ||
| 134 | */ | ||
| 135 | static const u8 bpf2ia32[][2] = { | ||
| 136 | /* Return value from in-kernel function, and exit value from eBPF */ | ||
| 137 | [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)}, | ||
| 138 | |||
| 139 | /* The arguments from eBPF program to in-kernel function */ | ||
| 140 | /* Stored on stack scratch space */ | ||
| 141 | [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)}, | ||
| 142 | [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)}, | ||
| 143 | [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)}, | ||
| 144 | [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)}, | ||
| 145 | [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)}, | ||
| 146 | |||
| 147 | /* Callee saved registers that in-kernel function will preserve */ | ||
| 148 | /* Stored on stack scratch space */ | ||
| 149 | [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)}, | ||
| 150 | [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)}, | ||
| 151 | [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)}, | ||
| 152 | [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)}, | ||
| 153 | |||
| 154 | /* Read only Frame Pointer to access Stack */ | ||
| 155 | [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)}, | ||
| 156 | |||
| 157 | /* Temporary register for blinding constants. */ | ||
| 158 | [BPF_REG_AX] = {IA32_ESI, IA32_EDI}, | ||
| 159 | |||
| 160 | /* Tail call count. Stored on stack scratch space. */ | ||
| 161 | [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)}, | ||
| 162 | }; | ||
| 163 | |||
| 164 | #define dst_lo dst[0] | ||
| 165 | #define dst_hi dst[1] | ||
| 166 | #define src_lo src[0] | ||
| 167 | #define src_hi src[1] | ||
| 168 | |||
| 169 | #define STACK_ALIGNMENT 8 | ||
| 170 | /* | ||
| 171 | * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, | ||
| 172 | * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9, | ||
| 173 | * BPF_REG_FP, BPF_REG_AX and Tail call counts. | ||
| 174 | */ | ||
| 175 | #define SCRATCH_SIZE 96 | ||
| 176 | |||
| 177 | /* Total stack size used in JITed code */ | ||
| 178 | #define _STACK_SIZE (stack_depth + SCRATCH_SIZE) | ||
| 179 | |||
| 180 | #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) | ||
| 181 | |||
| 182 | /* Get the offset of eBPF REGISTERs stored on scratch space. */ | ||
| 183 | #define STACK_VAR(off) (off) | ||
| 184 | |||
| 185 | /* Encode 'dst_reg' register into IA32 opcode 'byte' */ | ||
| 186 | static u8 add_1reg(u8 byte, u32 dst_reg) | ||
| 187 | { | ||
| 188 | return byte + dst_reg; | ||
| 189 | } | ||
| 190 | |||
| 191 | /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */ | ||
| 192 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | ||
| 193 | { | ||
| 194 | return byte + dst_reg + (src_reg << 3); | ||
| 195 | } | ||
| 196 | |||
| 197 | static void jit_fill_hole(void *area, unsigned int size) | ||
| 198 | { | ||
| 199 | /* Fill whole space with int3 instructions */ | ||
| 200 | memset(area, 0xcc, size); | ||
| 201 | } | ||
| 202 | |||
| 203 | static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk, | ||
| 204 | u8 **pprog) | ||
| 205 | { | ||
| 206 | u8 *prog = *pprog; | ||
| 207 | int cnt = 0; | ||
| 208 | |||
| 209 | if (dstk) { | ||
| 210 | if (val == 0) { | ||
| 211 | /* xor eax,eax */ | ||
| 212 | EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX)); | ||
| 213 | /* mov dword ptr [ebp+off],eax */ | ||
| 214 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 215 | STACK_VAR(dst)); | ||
| 216 | } else { | ||
| 217 | EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP), | ||
| 218 | STACK_VAR(dst), val); | ||
| 219 | } | ||
| 220 | } else { | ||
| 221 | if (val == 0) | ||
| 222 | EMIT2(0x33, add_2reg(0xC0, dst, dst)); | ||
| 223 | else | ||
| 224 | EMIT2_off32(0xC7, add_1reg(0xC0, dst), | ||
| 225 | val); | ||
| 226 | } | ||
| 227 | *pprog = prog; | ||
| 228 | } | ||
| 229 | |||
| 230 | /* dst = imm (4 bytes)*/ | ||
| 231 | static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk, | ||
| 232 | bool sstk, u8 **pprog) | ||
| 233 | { | ||
| 234 | u8 *prog = *pprog; | ||
| 235 | int cnt = 0; | ||
| 236 | u8 sreg = sstk ? IA32_EAX : src; | ||
| 237 | |||
| 238 | if (sstk) | ||
| 239 | /* mov eax,dword ptr [ebp+off] */ | ||
| 240 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src)); | ||
| 241 | if (dstk) | ||
| 242 | /* mov dword ptr [ebp+off],eax */ | ||
| 243 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst)); | ||
| 244 | else | ||
| 245 | /* mov dst,sreg */ | ||
| 246 | EMIT2(0x89, add_2reg(0xC0, dst, sreg)); | ||
| 247 | |||
| 248 | *pprog = prog; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* dst = src */ | ||
| 252 | static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[], | ||
| 253 | const u8 src[], bool dstk, | ||
| 254 | bool sstk, u8 **pprog) | ||
| 255 | { | ||
| 256 | emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog); | ||
| 257 | if (is64) | ||
| 258 | /* complete 8 byte move */ | ||
| 259 | emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog); | ||
| 260 | else | ||
| 261 | /* zero out high 4 bytes */ | ||
| 262 | emit_ia32_mov_i(dst_hi, 0, dstk, pprog); | ||
| 263 | } | ||
| 264 | |||
| 265 | /* Sign extended move */ | ||
| 266 | static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[], | ||
| 267 | const u32 val, bool dstk, u8 **pprog) | ||
| 268 | { | ||
| 269 | u32 hi = 0; | ||
| 270 | |||
| 271 | if (is64 && (val & (1<<31))) | ||
| 272 | hi = (u32)~0; | ||
| 273 | emit_ia32_mov_i(dst_lo, val, dstk, pprog); | ||
| 274 | emit_ia32_mov_i(dst_hi, hi, dstk, pprog); | ||
| 275 | } | ||
| 276 | |||
| 277 | /* | ||
| 278 | * ALU operation (32 bit) | ||
| 279 | * dst = dst * src | ||
| 280 | */ | ||
| 281 | static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk, | ||
| 282 | bool sstk, u8 **pprog) | ||
| 283 | { | ||
| 284 | u8 *prog = *pprog; | ||
| 285 | int cnt = 0; | ||
| 286 | u8 sreg = sstk ? IA32_ECX : src; | ||
| 287 | |||
| 288 | if (sstk) | ||
| 289 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 290 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src)); | ||
| 291 | |||
| 292 | if (dstk) | ||
| 293 | /* mov eax,dword ptr [ebp+off] */ | ||
| 294 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
| 295 | else | ||
| 296 | /* mov eax,dst */ | ||
| 297 | EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX)); | ||
| 298 | |||
| 299 | |||
| 300 | EMIT2(0xF7, add_1reg(0xE0, sreg)); | ||
| 301 | |||
| 302 | if (dstk) | ||
| 303 | /* mov dword ptr [ebp+off],eax */ | ||
| 304 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 305 | STACK_VAR(dst)); | ||
| 306 | else | ||
| 307 | /* mov dst,eax */ | ||
| 308 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX)); | ||
| 309 | |||
| 310 | *pprog = prog; | ||
| 311 | } | ||
| 312 | |||
| 313 | static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, | ||
| 314 | bool dstk, u8 **pprog) | ||
| 315 | { | ||
| 316 | u8 *prog = *pprog; | ||
| 317 | int cnt = 0; | ||
| 318 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 319 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 320 | |||
| 321 | if (dstk && val != 64) { | ||
| 322 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 323 | STACK_VAR(dst_lo)); | ||
| 324 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 325 | STACK_VAR(dst_hi)); | ||
| 326 | } | ||
| 327 | switch (val) { | ||
| 328 | case 16: | ||
| 329 | /* | ||
| 330 | * Emit 'movzwl eax,ax' to zero extend 16-bit | ||
| 331 | * into 64 bit | ||
| 332 | */ | ||
| 333 | EMIT2(0x0F, 0xB7); | ||
| 334 | EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 335 | /* xor dreg_hi,dreg_hi */ | ||
| 336 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 337 | break; | ||
| 338 | case 32: | ||
| 339 | /* xor dreg_hi,dreg_hi */ | ||
| 340 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 341 | break; | ||
| 342 | case 64: | ||
| 343 | /* nop */ | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | |||
| 347 | if (dstk && val != 64) { | ||
| 348 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 349 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 350 | STACK_VAR(dst_lo)); | ||
| 351 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 352 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 353 | STACK_VAR(dst_hi)); | ||
| 354 | } | ||
| 355 | *pprog = prog; | ||
| 356 | } | ||
| 357 | |||
| 358 | static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, | ||
| 359 | bool dstk, u8 **pprog) | ||
| 360 | { | ||
| 361 | u8 *prog = *pprog; | ||
| 362 | int cnt = 0; | ||
| 363 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 364 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 365 | |||
| 366 | if (dstk) { | ||
| 367 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 368 | STACK_VAR(dst_lo)); | ||
| 369 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 370 | STACK_VAR(dst_hi)); | ||
| 371 | } | ||
| 372 | switch (val) { | ||
| 373 | case 16: | ||
| 374 | /* Emit 'ror %ax, 8' to swap lower 2 bytes */ | ||
| 375 | EMIT1(0x66); | ||
| 376 | EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8); | ||
| 377 | |||
| 378 | EMIT2(0x0F, 0xB7); | ||
| 379 | EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 380 | |||
| 381 | /* xor dreg_hi,dreg_hi */ | ||
| 382 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 383 | break; | ||
| 384 | case 32: | ||
| 385 | /* Emit 'bswap eax' to swap lower 4 bytes */ | ||
| 386 | EMIT1(0x0F); | ||
| 387 | EMIT1(add_1reg(0xC8, dreg_lo)); | ||
| 388 | |||
| 389 | /* xor dreg_hi,dreg_hi */ | ||
| 390 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 391 | break; | ||
| 392 | case 64: | ||
| 393 | /* Emit 'bswap eax' to swap lower 4 bytes */ | ||
| 394 | EMIT1(0x0F); | ||
| 395 | EMIT1(add_1reg(0xC8, dreg_lo)); | ||
| 396 | |||
| 397 | /* Emit 'bswap edx' to swap lower 4 bytes */ | ||
| 398 | EMIT1(0x0F); | ||
| 399 | EMIT1(add_1reg(0xC8, dreg_hi)); | ||
| 400 | |||
| 401 | /* mov ecx,dreg_hi */ | ||
| 402 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi)); | ||
| 403 | /* mov dreg_hi,dreg_lo */ | ||
| 404 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
| 405 | /* mov dreg_lo,ecx */ | ||
| 406 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
| 407 | |||
| 408 | break; | ||
| 409 | } | ||
| 410 | if (dstk) { | ||
| 411 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 412 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 413 | STACK_VAR(dst_lo)); | ||
| 414 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 415 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 416 | STACK_VAR(dst_hi)); | ||
| 417 | } | ||
| 418 | *pprog = prog; | ||
| 419 | } | ||
| 420 | |||
| 421 | /* | ||
| 422 | * ALU operation (32 bit) | ||
| 423 | * dst = dst (div|mod) src | ||
| 424 | */ | ||
| 425 | static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src, | ||
| 426 | bool dstk, bool sstk, u8 **pprog) | ||
| 427 | { | ||
| 428 | u8 *prog = *pprog; | ||
| 429 | int cnt = 0; | ||
| 430 | |||
| 431 | if (sstk) | ||
| 432 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 433 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 434 | STACK_VAR(src)); | ||
| 435 | else if (src != IA32_ECX) | ||
| 436 | /* mov ecx,src */ | ||
| 437 | EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX)); | ||
| 438 | |||
| 439 | if (dstk) | ||
| 440 | /* mov eax,dword ptr [ebp+off] */ | ||
| 441 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 442 | STACK_VAR(dst)); | ||
| 443 | else | ||
| 444 | /* mov eax,dst */ | ||
| 445 | EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX)); | ||
| 446 | |||
| 447 | /* xor edx,edx */ | ||
| 448 | EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX)); | ||
| 449 | /* div ecx */ | ||
| 450 | EMIT2(0xF7, add_1reg(0xF0, IA32_ECX)); | ||
| 451 | |||
| 452 | if (op == BPF_MOD) { | ||
| 453 | if (dstk) | ||
| 454 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 455 | STACK_VAR(dst)); | ||
| 456 | else | ||
| 457 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX)); | ||
| 458 | } else { | ||
| 459 | if (dstk) | ||
| 460 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 461 | STACK_VAR(dst)); | ||
| 462 | else | ||
| 463 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX)); | ||
| 464 | } | ||
| 465 | *pprog = prog; | ||
| 466 | } | ||
| 467 | |||
| 468 | /* | ||
| 469 | * ALU operation (32 bit) | ||
| 470 | * dst = dst (shift) src | ||
| 471 | */ | ||
| 472 | static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src, | ||
| 473 | bool dstk, bool sstk, u8 **pprog) | ||
| 474 | { | ||
| 475 | u8 *prog = *pprog; | ||
| 476 | int cnt = 0; | ||
| 477 | u8 dreg = dstk ? IA32_EAX : dst; | ||
| 478 | u8 b2; | ||
| 479 | |||
| 480 | if (dstk) | ||
| 481 | /* mov eax,dword ptr [ebp+off] */ | ||
| 482 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
| 483 | |||
| 484 | if (sstk) | ||
| 485 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 486 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src)); | ||
| 487 | else if (src != IA32_ECX) | ||
| 488 | /* mov ecx,src */ | ||
| 489 | EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX)); | ||
| 490 | |||
| 491 | switch (op) { | ||
| 492 | case BPF_LSH: | ||
| 493 | b2 = 0xE0; break; | ||
| 494 | case BPF_RSH: | ||
| 495 | b2 = 0xE8; break; | ||
| 496 | case BPF_ARSH: | ||
| 497 | b2 = 0xF8; break; | ||
| 498 | default: | ||
| 499 | return; | ||
| 500 | } | ||
| 501 | EMIT2(0xD3, add_1reg(b2, dreg)); | ||
| 502 | |||
| 503 | if (dstk) | ||
| 504 | /* mov dword ptr [ebp+off],dreg */ | ||
| 505 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst)); | ||
| 506 | *pprog = prog; | ||
| 507 | } | ||
| 508 | |||
| 509 | /* | ||
| 510 | * ALU operation (32 bit) | ||
| 511 | * dst = dst (op) src | ||
| 512 | */ | ||
| 513 | static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op, | ||
| 514 | const u8 dst, const u8 src, bool dstk, | ||
| 515 | bool sstk, u8 **pprog) | ||
| 516 | { | ||
| 517 | u8 *prog = *pprog; | ||
| 518 | int cnt = 0; | ||
| 519 | u8 sreg = sstk ? IA32_EAX : src; | ||
| 520 | u8 dreg = dstk ? IA32_EDX : dst; | ||
| 521 | |||
| 522 | if (sstk) | ||
| 523 | /* mov eax,dword ptr [ebp+off] */ | ||
| 524 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src)); | ||
| 525 | |||
| 526 | if (dstk) | ||
| 527 | /* mov eax,dword ptr [ebp+off] */ | ||
| 528 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst)); | ||
| 529 | |||
| 530 | switch (BPF_OP(op)) { | ||
| 531 | /* dst = dst + src */ | ||
| 532 | case BPF_ADD: | ||
| 533 | if (hi && is64) | ||
| 534 | EMIT2(0x11, add_2reg(0xC0, dreg, sreg)); | ||
| 535 | else | ||
| 536 | EMIT2(0x01, add_2reg(0xC0, dreg, sreg)); | ||
| 537 | break; | ||
| 538 | /* dst = dst - src */ | ||
| 539 | case BPF_SUB: | ||
| 540 | if (hi && is64) | ||
| 541 | EMIT2(0x19, add_2reg(0xC0, dreg, sreg)); | ||
| 542 | else | ||
| 543 | EMIT2(0x29, add_2reg(0xC0, dreg, sreg)); | ||
| 544 | break; | ||
| 545 | /* dst = dst | src */ | ||
| 546 | case BPF_OR: | ||
| 547 | EMIT2(0x09, add_2reg(0xC0, dreg, sreg)); | ||
| 548 | break; | ||
| 549 | /* dst = dst & src */ | ||
| 550 | case BPF_AND: | ||
| 551 | EMIT2(0x21, add_2reg(0xC0, dreg, sreg)); | ||
| 552 | break; | ||
| 553 | /* dst = dst ^ src */ | ||
| 554 | case BPF_XOR: | ||
| 555 | EMIT2(0x31, add_2reg(0xC0, dreg, sreg)); | ||
| 556 | break; | ||
| 557 | } | ||
| 558 | |||
| 559 | if (dstk) | ||
| 560 | /* mov dword ptr [ebp+off],dreg */ | ||
| 561 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), | ||
| 562 | STACK_VAR(dst)); | ||
| 563 | *pprog = prog; | ||
| 564 | } | ||
| 565 | |||
| 566 | /* ALU operation (64 bit) */ | ||
| 567 | static inline void emit_ia32_alu_r64(const bool is64, const u8 op, | ||
| 568 | const u8 dst[], const u8 src[], | ||
| 569 | bool dstk, bool sstk, | ||
| 570 | u8 **pprog) | ||
| 571 | { | ||
| 572 | u8 *prog = *pprog; | ||
| 573 | |||
| 574 | emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog); | ||
| 575 | if (is64) | ||
| 576 | emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk, | ||
| 577 | &prog); | ||
| 578 | else | ||
| 579 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 580 | *pprog = prog; | ||
| 581 | } | ||
| 582 | |||
| 583 | /* | ||
| 584 | * ALU operation (32 bit) | ||
| 585 | * dst = dst (op) val | ||
| 586 | */ | ||
| 587 | static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op, | ||
| 588 | const u8 dst, const s32 val, bool dstk, | ||
| 589 | u8 **pprog) | ||
| 590 | { | ||
| 591 | u8 *prog = *pprog; | ||
| 592 | int cnt = 0; | ||
| 593 | u8 dreg = dstk ? IA32_EAX : dst; | ||
| 594 | u8 sreg = IA32_EDX; | ||
| 595 | |||
| 596 | if (dstk) | ||
| 597 | /* mov eax,dword ptr [ebp+off] */ | ||
| 598 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
| 599 | |||
| 600 | if (!is_imm8(val)) | ||
| 601 | /* mov edx,imm32*/ | ||
| 602 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val); | ||
| 603 | |||
| 604 | switch (op) { | ||
| 605 | /* dst = dst + val */ | ||
| 606 | case BPF_ADD: | ||
| 607 | if (hi && is64) { | ||
| 608 | if (is_imm8(val)) | ||
| 609 | EMIT3(0x83, add_1reg(0xD0, dreg), val); | ||
| 610 | else | ||
| 611 | EMIT2(0x11, add_2reg(0xC0, dreg, sreg)); | ||
| 612 | } else { | ||
| 613 | if (is_imm8(val)) | ||
| 614 | EMIT3(0x83, add_1reg(0xC0, dreg), val); | ||
| 615 | else | ||
| 616 | EMIT2(0x01, add_2reg(0xC0, dreg, sreg)); | ||
| 617 | } | ||
| 618 | break; | ||
| 619 | /* dst = dst - val */ | ||
| 620 | case BPF_SUB: | ||
| 621 | if (hi && is64) { | ||
| 622 | if (is_imm8(val)) | ||
| 623 | EMIT3(0x83, add_1reg(0xD8, dreg), val); | ||
| 624 | else | ||
| 625 | EMIT2(0x19, add_2reg(0xC0, dreg, sreg)); | ||
| 626 | } else { | ||
| 627 | if (is_imm8(val)) | ||
| 628 | EMIT3(0x83, add_1reg(0xE8, dreg), val); | ||
| 629 | else | ||
| 630 | EMIT2(0x29, add_2reg(0xC0, dreg, sreg)); | ||
| 631 | } | ||
| 632 | break; | ||
| 633 | /* dst = dst | val */ | ||
| 634 | case BPF_OR: | ||
| 635 | if (is_imm8(val)) | ||
| 636 | EMIT3(0x83, add_1reg(0xC8, dreg), val); | ||
| 637 | else | ||
| 638 | EMIT2(0x09, add_2reg(0xC0, dreg, sreg)); | ||
| 639 | break; | ||
| 640 | /* dst = dst & val */ | ||
| 641 | case BPF_AND: | ||
| 642 | if (is_imm8(val)) | ||
| 643 | EMIT3(0x83, add_1reg(0xE0, dreg), val); | ||
| 644 | else | ||
| 645 | EMIT2(0x21, add_2reg(0xC0, dreg, sreg)); | ||
| 646 | break; | ||
| 647 | /* dst = dst ^ val */ | ||
| 648 | case BPF_XOR: | ||
| 649 | if (is_imm8(val)) | ||
| 650 | EMIT3(0x83, add_1reg(0xF0, dreg), val); | ||
| 651 | else | ||
| 652 | EMIT2(0x31, add_2reg(0xC0, dreg, sreg)); | ||
| 653 | break; | ||
| 654 | case BPF_NEG: | ||
| 655 | EMIT2(0xF7, add_1reg(0xD8, dreg)); | ||
| 656 | break; | ||
| 657 | } | ||
| 658 | |||
| 659 | if (dstk) | ||
| 660 | /* mov dword ptr [ebp+off],dreg */ | ||
| 661 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), | ||
| 662 | STACK_VAR(dst)); | ||
| 663 | *pprog = prog; | ||
| 664 | } | ||
| 665 | |||
| 666 | /* ALU operation (64 bit) */ | ||
| 667 | static inline void emit_ia32_alu_i64(const bool is64, const u8 op, | ||
| 668 | const u8 dst[], const u32 val, | ||
| 669 | bool dstk, u8 **pprog) | ||
| 670 | { | ||
| 671 | u8 *prog = *pprog; | ||
| 672 | u32 hi = 0; | ||
| 673 | |||
| 674 | if (is64 && (val & (1<<31))) | ||
| 675 | hi = (u32)~0; | ||
| 676 | |||
| 677 | emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog); | ||
| 678 | if (is64) | ||
| 679 | emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog); | ||
| 680 | else | ||
| 681 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 682 | |||
| 683 | *pprog = prog; | ||
| 684 | } | ||
| 685 | |||
| 686 | /* dst = ~dst (64 bit) */ | ||
| 687 | static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog) | ||
| 688 | { | ||
| 689 | u8 *prog = *pprog; | ||
| 690 | int cnt = 0; | ||
| 691 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 692 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 693 | |||
| 694 | if (dstk) { | ||
| 695 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 696 | STACK_VAR(dst_lo)); | ||
| 697 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 698 | STACK_VAR(dst_hi)); | ||
| 699 | } | ||
| 700 | |||
| 701 | /* xor ecx,ecx */ | ||
| 702 | EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 703 | /* sub dreg_lo,ecx */ | ||
| 704 | EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
| 705 | /* mov dreg_lo,ecx */ | ||
| 706 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
| 707 | |||
| 708 | /* xor ecx,ecx */ | ||
| 709 | EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 710 | /* sbb dreg_hi,ecx */ | ||
| 711 | EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX)); | ||
| 712 | /* mov dreg_hi,ecx */ | ||
| 713 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX)); | ||
| 714 | |||
| 715 | if (dstk) { | ||
| 716 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 717 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 718 | STACK_VAR(dst_lo)); | ||
| 719 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 720 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 721 | STACK_VAR(dst_hi)); | ||
| 722 | } | ||
| 723 | *pprog = prog; | ||
| 724 | } | ||
| 725 | |||
| 726 | /* dst = dst << src */ | ||
| 727 | static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], | ||
| 728 | bool dstk, bool sstk, u8 **pprog) | ||
| 729 | { | ||
| 730 | u8 *prog = *pprog; | ||
| 731 | int cnt = 0; | ||
| 732 | static int jmp_label1 = -1; | ||
| 733 | static int jmp_label2 = -1; | ||
| 734 | static int jmp_label3 = -1; | ||
| 735 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 736 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 737 | |||
| 738 | if (dstk) { | ||
| 739 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 740 | STACK_VAR(dst_lo)); | ||
| 741 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 742 | STACK_VAR(dst_hi)); | ||
| 743 | } | ||
| 744 | |||
| 745 | if (sstk) | ||
| 746 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 747 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 748 | STACK_VAR(src_lo)); | ||
| 749 | else | ||
| 750 | /* mov ecx,src_lo */ | ||
| 751 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
| 752 | |||
| 753 | /* cmp ecx,32 */ | ||
| 754 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
| 755 | /* Jumps when >= 32 */ | ||
| 756 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
| 757 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
| 758 | else | ||
| 759 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
| 760 | |||
| 761 | /* < 32 */ | ||
| 762 | /* shl dreg_hi,cl */ | ||
| 763 | EMIT2(0xD3, add_1reg(0xE0, dreg_hi)); | ||
| 764 | /* mov ebx,dreg_lo */ | ||
| 765 | EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 766 | /* shl dreg_lo,cl */ | ||
| 767 | EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); | ||
| 768 | |||
| 769 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
| 770 | /* neg ecx */ | ||
| 771 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 772 | /* add ecx,32 */ | ||
| 773 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 774 | |||
| 775 | /* shr ebx,cl */ | ||
| 776 | EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); | ||
| 777 | /* or dreg_hi,ebx */ | ||
| 778 | EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 779 | |||
| 780 | /* goto out; */ | ||
| 781 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 782 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 783 | else | ||
| 784 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 785 | |||
| 786 | /* >= 32 */ | ||
| 787 | if (jmp_label1 == -1) | ||
| 788 | jmp_label1 = cnt; | ||
| 789 | |||
| 790 | /* cmp ecx,64 */ | ||
| 791 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
| 792 | /* Jumps when >= 64 */ | ||
| 793 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
| 794 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
| 795 | else | ||
| 796 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
| 797 | |||
| 798 | /* >= 32 && < 64 */ | ||
| 799 | /* sub ecx,32 */ | ||
| 800 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
| 801 | /* shl dreg_lo,cl */ | ||
| 802 | EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); | ||
| 803 | /* mov dreg_hi,dreg_lo */ | ||
| 804 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
| 805 | |||
| 806 | /* xor dreg_lo,dreg_lo */ | ||
| 807 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 808 | |||
| 809 | /* goto out; */ | ||
| 810 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 811 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 812 | else | ||
| 813 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 814 | |||
| 815 | /* >= 64 */ | ||
| 816 | if (jmp_label2 == -1) | ||
| 817 | jmp_label2 = cnt; | ||
| 818 | /* xor dreg_lo,dreg_lo */ | ||
| 819 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 820 | /* xor dreg_hi,dreg_hi */ | ||
| 821 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 822 | |||
| 823 | if (jmp_label3 == -1) | ||
| 824 | jmp_label3 = cnt; | ||
| 825 | |||
| 826 | if (dstk) { | ||
| 827 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 828 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 829 | STACK_VAR(dst_lo)); | ||
| 830 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 831 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 832 | STACK_VAR(dst_hi)); | ||
| 833 | } | ||
| 834 | /* out: */ | ||
| 835 | *pprog = prog; | ||
| 836 | } | ||
| 837 | |||
| 838 | /* dst = dst >> src (signed)*/ | ||
| 839 | static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], | ||
| 840 | bool dstk, bool sstk, u8 **pprog) | ||
| 841 | { | ||
| 842 | u8 *prog = *pprog; | ||
| 843 | int cnt = 0; | ||
| 844 | static int jmp_label1 = -1; | ||
| 845 | static int jmp_label2 = -1; | ||
| 846 | static int jmp_label3 = -1; | ||
| 847 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 848 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 849 | |||
| 850 | if (dstk) { | ||
| 851 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 852 | STACK_VAR(dst_lo)); | ||
| 853 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 854 | STACK_VAR(dst_hi)); | ||
| 855 | } | ||
| 856 | |||
| 857 | if (sstk) | ||
| 858 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 859 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 860 | STACK_VAR(src_lo)); | ||
| 861 | else | ||
| 862 | /* mov ecx,src_lo */ | ||
| 863 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
| 864 | |||
| 865 | /* cmp ecx,32 */ | ||
| 866 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
| 867 | /* Jumps when >= 32 */ | ||
| 868 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
| 869 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
| 870 | else | ||
| 871 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
| 872 | |||
| 873 | /* < 32 */ | ||
| 874 | /* lshr dreg_lo,cl */ | ||
| 875 | EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); | ||
| 876 | /* mov ebx,dreg_hi */ | ||
| 877 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 878 | /* ashr dreg_hi,cl */ | ||
| 879 | EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); | ||
| 880 | |||
| 881 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
| 882 | /* neg ecx */ | ||
| 883 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 884 | /* add ecx,32 */ | ||
| 885 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 886 | |||
| 887 | /* shl ebx,cl */ | ||
| 888 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
| 889 | /* or dreg_lo,ebx */ | ||
| 890 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 891 | |||
| 892 | /* goto out; */ | ||
| 893 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 894 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 895 | else | ||
| 896 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 897 | |||
| 898 | /* >= 32 */ | ||
| 899 | if (jmp_label1 == -1) | ||
| 900 | jmp_label1 = cnt; | ||
| 901 | |||
| 902 | /* cmp ecx,64 */ | ||
| 903 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
| 904 | /* Jumps when >= 64 */ | ||
| 905 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
| 906 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
| 907 | else | ||
| 908 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
| 909 | |||
| 910 | /* >= 32 && < 64 */ | ||
| 911 | /* sub ecx,32 */ | ||
| 912 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
| 913 | /* ashr dreg_hi,cl */ | ||
| 914 | EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); | ||
| 915 | /* mov dreg_lo,dreg_hi */ | ||
| 916 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 917 | |||
| 918 | /* ashr dreg_hi,imm8 */ | ||
| 919 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
| 920 | |||
| 921 | /* goto out; */ | ||
| 922 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 923 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 924 | else | ||
| 925 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 926 | |||
| 927 | /* >= 64 */ | ||
| 928 | if (jmp_label2 == -1) | ||
| 929 | jmp_label2 = cnt; | ||
| 930 | /* ashr dreg_hi,imm8 */ | ||
| 931 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
| 932 | /* mov dreg_lo,dreg_hi */ | ||
| 933 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 934 | |||
| 935 | if (jmp_label3 == -1) | ||
| 936 | jmp_label3 = cnt; | ||
| 937 | |||
| 938 | if (dstk) { | ||
| 939 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 940 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 941 | STACK_VAR(dst_lo)); | ||
| 942 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 943 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 944 | STACK_VAR(dst_hi)); | ||
| 945 | } | ||
| 946 | /* out: */ | ||
| 947 | *pprog = prog; | ||
| 948 | } | ||
| 949 | |||
| 950 | /* dst = dst >> src */ | ||
| 951 | static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, | ||
| 952 | bool sstk, u8 **pprog) | ||
| 953 | { | ||
| 954 | u8 *prog = *pprog; | ||
| 955 | int cnt = 0; | ||
| 956 | static int jmp_label1 = -1; | ||
| 957 | static int jmp_label2 = -1; | ||
| 958 | static int jmp_label3 = -1; | ||
| 959 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 960 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 961 | |||
| 962 | if (dstk) { | ||
| 963 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 964 | STACK_VAR(dst_lo)); | ||
| 965 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 966 | STACK_VAR(dst_hi)); | ||
| 967 | } | ||
| 968 | |||
| 969 | if (sstk) | ||
| 970 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 971 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 972 | STACK_VAR(src_lo)); | ||
| 973 | else | ||
| 974 | /* mov ecx,src_lo */ | ||
| 975 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
| 976 | |||
| 977 | /* cmp ecx,32 */ | ||
| 978 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
| 979 | /* Jumps when >= 32 */ | ||
| 980 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
| 981 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
| 982 | else | ||
| 983 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
| 984 | |||
| 985 | /* < 32 */ | ||
| 986 | /* lshr dreg_lo,cl */ | ||
| 987 | EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); | ||
| 988 | /* mov ebx,dreg_hi */ | ||
| 989 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 990 | /* shr dreg_hi,cl */ | ||
| 991 | EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); | ||
| 992 | |||
| 993 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
| 994 | /* neg ecx */ | ||
| 995 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 996 | /* add ecx,32 */ | ||
| 997 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 998 | |||
| 999 | /* shl ebx,cl */ | ||
| 1000 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
| 1001 | /* or dreg_lo,ebx */ | ||
| 1002 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 1003 | |||
| 1004 | /* goto out; */ | ||
| 1005 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 1006 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 1007 | else | ||
| 1008 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 1009 | |||
| 1010 | /* >= 32 */ | ||
| 1011 | if (jmp_label1 == -1) | ||
| 1012 | jmp_label1 = cnt; | ||
| 1013 | /* cmp ecx,64 */ | ||
| 1014 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
| 1015 | /* Jumps when >= 64 */ | ||
| 1016 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
| 1017 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
| 1018 | else | ||
| 1019 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
| 1020 | |||
| 1021 | /* >= 32 && < 64 */ | ||
| 1022 | /* sub ecx,32 */ | ||
| 1023 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
| 1024 | /* shr dreg_hi,cl */ | ||
| 1025 | EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); | ||
| 1026 | /* mov dreg_lo,dreg_hi */ | ||
| 1027 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 1028 | /* xor dreg_hi,dreg_hi */ | ||
| 1029 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1030 | |||
| 1031 | /* goto out; */ | ||
| 1032 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 1033 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 1034 | else | ||
| 1035 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 1036 | |||
| 1037 | /* >= 64 */ | ||
| 1038 | if (jmp_label2 == -1) | ||
| 1039 | jmp_label2 = cnt; | ||
| 1040 | /* xor dreg_lo,dreg_lo */ | ||
| 1041 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 1042 | /* xor dreg_hi,dreg_hi */ | ||
| 1043 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1044 | |||
| 1045 | if (jmp_label3 == -1) | ||
| 1046 | jmp_label3 = cnt; | ||
| 1047 | |||
| 1048 | if (dstk) { | ||
| 1049 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 1050 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 1051 | STACK_VAR(dst_lo)); | ||
| 1052 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 1053 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 1054 | STACK_VAR(dst_hi)); | ||
| 1055 | } | ||
| 1056 | /* out: */ | ||
| 1057 | *pprog = prog; | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | /* dst = dst << val */ | ||
| 1061 | static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val, | ||
| 1062 | bool dstk, u8 **pprog) | ||
| 1063 | { | ||
| 1064 | u8 *prog = *pprog; | ||
| 1065 | int cnt = 0; | ||
| 1066 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 1067 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 1068 | |||
| 1069 | if (dstk) { | ||
| 1070 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1071 | STACK_VAR(dst_lo)); | ||
| 1072 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1073 | STACK_VAR(dst_hi)); | ||
| 1074 | } | ||
| 1075 | /* Do LSH operation */ | ||
| 1076 | if (val < 32) { | ||
| 1077 | /* shl dreg_hi,imm8 */ | ||
| 1078 | EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val); | ||
| 1079 | /* mov ebx,dreg_lo */ | ||
| 1080 | EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 1081 | /* shl dreg_lo,imm8 */ | ||
| 1082 | EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val); | ||
| 1083 | |||
| 1084 | /* IA32_ECX = 32 - val */ | ||
| 1085 | /* mov ecx,val */ | ||
| 1086 | EMIT2(0xB1, val); | ||
| 1087 | /* movzx ecx,ecx */ | ||
| 1088 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 1089 | /* neg ecx */ | ||
| 1090 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 1091 | /* add ecx,32 */ | ||
| 1092 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 1093 | |||
| 1094 | /* shr ebx,cl */ | ||
| 1095 | EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); | ||
| 1096 | /* or dreg_hi,ebx */ | ||
| 1097 | EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 1098 | } else if (val >= 32 && val < 64) { | ||
| 1099 | u32 value = val - 32; | ||
| 1100 | |||
| 1101 | /* shl dreg_lo,imm8 */ | ||
| 1102 | EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value); | ||
| 1103 | /* mov dreg_hi,dreg_lo */ | ||
| 1104 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
| 1105 | /* xor dreg_lo,dreg_lo */ | ||
| 1106 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 1107 | } else { | ||
| 1108 | /* xor dreg_lo,dreg_lo */ | ||
| 1109 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 1110 | /* xor dreg_hi,dreg_hi */ | ||
| 1111 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | if (dstk) { | ||
| 1115 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 1116 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 1117 | STACK_VAR(dst_lo)); | ||
| 1118 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 1119 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 1120 | STACK_VAR(dst_hi)); | ||
| 1121 | } | ||
| 1122 | *pprog = prog; | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | /* dst = dst >> val */ | ||
| 1126 | static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val, | ||
| 1127 | bool dstk, u8 **pprog) | ||
| 1128 | { | ||
| 1129 | u8 *prog = *pprog; | ||
| 1130 | int cnt = 0; | ||
| 1131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 1132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 1133 | |||
| 1134 | if (dstk) { | ||
| 1135 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1136 | STACK_VAR(dst_lo)); | ||
| 1137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1138 | STACK_VAR(dst_hi)); | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | /* Do RSH operation */ | ||
| 1142 | if (val < 32) { | ||
| 1143 | /* shr dreg_lo,imm8 */ | ||
| 1144 | EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); | ||
| 1145 | /* mov ebx,dreg_hi */ | ||
| 1146 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 1147 | /* shr dreg_hi,imm8 */ | ||
| 1148 | EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val); | ||
| 1149 | |||
| 1150 | /* IA32_ECX = 32 - val */ | ||
| 1151 | /* mov ecx,val */ | ||
| 1152 | EMIT2(0xB1, val); | ||
| 1153 | /* movzx ecx,ecx */ | ||
| 1154 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 1155 | /* neg ecx */ | ||
| 1156 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 1157 | /* add ecx,32 */ | ||
| 1158 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 1159 | |||
| 1160 | /* shl ebx,cl */ | ||
| 1161 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
| 1162 | /* or dreg_lo,ebx */ | ||
| 1163 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 1164 | } else if (val >= 32 && val < 64) { | ||
| 1165 | u32 value = val - 32; | ||
| 1166 | |||
| 1167 | /* shr dreg_hi,imm8 */ | ||
| 1168 | EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value); | ||
| 1169 | /* mov dreg_lo,dreg_hi */ | ||
| 1170 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 1171 | /* xor dreg_hi,dreg_hi */ | ||
| 1172 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1173 | } else { | ||
| 1174 | /* xor dreg_lo,dreg_lo */ | ||
| 1175 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 1176 | /* xor dreg_hi,dreg_hi */ | ||
| 1177 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | if (dstk) { | ||
| 1181 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 1182 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 1183 | STACK_VAR(dst_lo)); | ||
| 1184 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 1185 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 1186 | STACK_VAR(dst_hi)); | ||
| 1187 | } | ||
| 1188 | *pprog = prog; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | /* dst = dst >> val (signed) */ | ||
| 1192 | static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val, | ||
| 1193 | bool dstk, u8 **pprog) | ||
| 1194 | { | ||
| 1195 | u8 *prog = *pprog; | ||
| 1196 | int cnt = 0; | ||
| 1197 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 1198 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 1199 | |||
| 1200 | if (dstk) { | ||
| 1201 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1202 | STACK_VAR(dst_lo)); | ||
| 1203 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1204 | STACK_VAR(dst_hi)); | ||
| 1205 | } | ||
| 1206 | /* Do RSH operation */ | ||
| 1207 | if (val < 32) { | ||
| 1208 | /* shr dreg_lo,imm8 */ | ||
| 1209 | EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); | ||
| 1210 | /* mov ebx,dreg_hi */ | ||
| 1211 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 1212 | /* ashr dreg_hi,imm8 */ | ||
| 1213 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val); | ||
| 1214 | |||
| 1215 | /* IA32_ECX = 32 - val */ | ||
| 1216 | /* mov ecx,val */ | ||
| 1217 | EMIT2(0xB1, val); | ||
| 1218 | /* movzx ecx,ecx */ | ||
| 1219 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 1220 | /* neg ecx */ | ||
| 1221 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 1222 | /* add ecx,32 */ | ||
| 1223 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 1224 | |||
| 1225 | /* shl ebx,cl */ | ||
| 1226 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
| 1227 | /* or dreg_lo,ebx */ | ||
| 1228 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 1229 | } else if (val >= 32 && val < 64) { | ||
| 1230 | u32 value = val - 32; | ||
| 1231 | |||
| 1232 | /* ashr dreg_hi,imm8 */ | ||
| 1233 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value); | ||
| 1234 | /* mov dreg_lo,dreg_hi */ | ||
| 1235 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 1236 | |||
| 1237 | /* ashr dreg_hi,imm8 */ | ||
| 1238 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
| 1239 | } else { | ||
| 1240 | /* ashr dreg_hi,imm8 */ | ||
| 1241 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
| 1242 | /* mov dreg_lo,dreg_hi */ | ||
| 1243 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 1244 | } | ||
| 1245 | |||
| 1246 | if (dstk) { | ||
| 1247 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 1248 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 1249 | STACK_VAR(dst_lo)); | ||
| 1250 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 1251 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 1252 | STACK_VAR(dst_hi)); | ||
| 1253 | } | ||
| 1254 | *pprog = prog; | ||
| 1255 | } | ||
| 1256 | |||
| 1257 | static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk, | ||
| 1258 | bool sstk, u8 **pprog) | ||
| 1259 | { | ||
| 1260 | u8 *prog = *pprog; | ||
| 1261 | int cnt = 0; | ||
| 1262 | |||
| 1263 | if (dstk) | ||
| 1264 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1265 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1266 | STACK_VAR(dst_hi)); | ||
| 1267 | else | ||
| 1268 | /* mov eax,dst_hi */ | ||
| 1269 | EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX)); | ||
| 1270 | |||
| 1271 | if (sstk) | ||
| 1272 | /* mul dword ptr [ebp+off] */ | ||
| 1273 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo)); | ||
| 1274 | else | ||
| 1275 | /* mul src_lo */ | ||
| 1276 | EMIT2(0xF7, add_1reg(0xE0, src_lo)); | ||
| 1277 | |||
| 1278 | /* mov ecx,eax */ | ||
| 1279 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
| 1280 | |||
| 1281 | if (dstk) | ||
| 1282 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1283 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1284 | STACK_VAR(dst_lo)); | ||
| 1285 | else | ||
| 1286 | /* mov eax,dst_lo */ | ||
| 1287 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1288 | |||
| 1289 | if (sstk) | ||
| 1290 | /* mul dword ptr [ebp+off] */ | ||
| 1291 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi)); | ||
| 1292 | else | ||
| 1293 | /* mul src_hi */ | ||
| 1294 | EMIT2(0xF7, add_1reg(0xE0, src_hi)); | ||
| 1295 | |||
| 1296 | /* add eax,eax */ | ||
| 1297 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
| 1298 | |||
| 1299 | if (dstk) | ||
| 1300 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1301 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1302 | STACK_VAR(dst_lo)); | ||
| 1303 | else | ||
| 1304 | /* mov eax,dst_lo */ | ||
| 1305 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1306 | |||
| 1307 | if (sstk) | ||
| 1308 | /* mul dword ptr [ebp+off] */ | ||
| 1309 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo)); | ||
| 1310 | else | ||
| 1311 | /* mul src_lo */ | ||
| 1312 | EMIT2(0xF7, add_1reg(0xE0, src_lo)); | ||
| 1313 | |||
| 1314 | /* add ecx,edx */ | ||
| 1315 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX)); | ||
| 1316 | |||
| 1317 | if (dstk) { | ||
| 1318 | /* mov dword ptr [ebp+off],eax */ | ||
| 1319 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1320 | STACK_VAR(dst_lo)); | ||
| 1321 | /* mov dword ptr [ebp+off],ecx */ | ||
| 1322 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 1323 | STACK_VAR(dst_hi)); | ||
| 1324 | } else { | ||
| 1325 | /* mov dst_lo,eax */ | ||
| 1326 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1327 | /* mov dst_hi,ecx */ | ||
| 1328 | EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX)); | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | *pprog = prog; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val, | ||
| 1335 | bool dstk, u8 **pprog) | ||
| 1336 | { | ||
| 1337 | u8 *prog = *pprog; | ||
| 1338 | int cnt = 0; | ||
| 1339 | u32 hi; | ||
| 1340 | |||
| 1341 | hi = val & (1<<31) ? (u32)~0 : 0; | ||
| 1342 | /* movl eax,imm32 */ | ||
| 1343 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val); | ||
| 1344 | if (dstk) | ||
| 1345 | /* mul dword ptr [ebp+off] */ | ||
| 1346 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi)); | ||
| 1347 | else | ||
| 1348 | /* mul dst_hi */ | ||
| 1349 | EMIT2(0xF7, add_1reg(0xE0, dst_hi)); | ||
| 1350 | |||
| 1351 | /* mov ecx,eax */ | ||
| 1352 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
| 1353 | |||
| 1354 | /* movl eax,imm32 */ | ||
| 1355 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi); | ||
| 1356 | if (dstk) | ||
| 1357 | /* mul dword ptr [ebp+off] */ | ||
| 1358 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo)); | ||
| 1359 | else | ||
| 1360 | /* mul dst_lo */ | ||
| 1361 | EMIT2(0xF7, add_1reg(0xE0, dst_lo)); | ||
| 1362 | /* add ecx,eax */ | ||
| 1363 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
| 1364 | |||
| 1365 | /* movl eax,imm32 */ | ||
| 1366 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val); | ||
| 1367 | if (dstk) | ||
| 1368 | /* mul dword ptr [ebp+off] */ | ||
| 1369 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo)); | ||
| 1370 | else | ||
| 1371 | /* mul dst_lo */ | ||
| 1372 | EMIT2(0xF7, add_1reg(0xE0, dst_lo)); | ||
| 1373 | |||
| 1374 | /* add ecx,edx */ | ||
| 1375 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX)); | ||
| 1376 | |||
| 1377 | if (dstk) { | ||
| 1378 | /* mov dword ptr [ebp+off],eax */ | ||
| 1379 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1380 | STACK_VAR(dst_lo)); | ||
| 1381 | /* mov dword ptr [ebp+off],ecx */ | ||
| 1382 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 1383 | STACK_VAR(dst_hi)); | ||
| 1384 | } else { | ||
| 1385 | /* mov dword ptr [ebp+off],eax */ | ||
| 1386 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1387 | /* mov dword ptr [ebp+off],ecx */ | ||
| 1388 | EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX)); | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | *pprog = prog; | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | static int bpf_size_to_x86_bytes(int bpf_size) | ||
| 1395 | { | ||
| 1396 | if (bpf_size == BPF_W) | ||
| 1397 | return 4; | ||
| 1398 | else if (bpf_size == BPF_H) | ||
| 1399 | return 2; | ||
| 1400 | else if (bpf_size == BPF_B) | ||
| 1401 | return 1; | ||
| 1402 | else if (bpf_size == BPF_DW) | ||
| 1403 | return 4; /* imm32 */ | ||
| 1404 | else | ||
| 1405 | return 0; | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | struct jit_context { | ||
| 1409 | int cleanup_addr; /* Epilogue code offset */ | ||
| 1410 | }; | ||
| 1411 | |||
| 1412 | /* Maximum number of bytes emitted while JITing one eBPF insn */ | ||
| 1413 | #define BPF_MAX_INSN_SIZE 128 | ||
| 1414 | #define BPF_INSN_SAFETY 64 | ||
| 1415 | |||
| 1416 | #define PROLOGUE_SIZE 35 | ||
| 1417 | |||
| 1418 | /* | ||
| 1419 | * Emit prologue code for BPF program and check it's size. | ||
| 1420 | * bpf_tail_call helper will skip it while jumping into another program. | ||
| 1421 | */ | ||
| 1422 | static void emit_prologue(u8 **pprog, u32 stack_depth) | ||
| 1423 | { | ||
| 1424 | u8 *prog = *pprog; | ||
| 1425 | int cnt = 0; | ||
| 1426 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
| 1427 | const u8 fplo = bpf2ia32[BPF_REG_FP][0]; | ||
| 1428 | const u8 fphi = bpf2ia32[BPF_REG_FP][1]; | ||
| 1429 | const u8 *tcc = bpf2ia32[TCALL_CNT]; | ||
| 1430 | |||
| 1431 | /* push ebp */ | ||
| 1432 | EMIT1(0x55); | ||
| 1433 | /* mov ebp,esp */ | ||
| 1434 | EMIT2(0x89, 0xE5); | ||
| 1435 | /* push edi */ | ||
| 1436 | EMIT1(0x57); | ||
| 1437 | /* push esi */ | ||
| 1438 | EMIT1(0x56); | ||
| 1439 | /* push ebx */ | ||
| 1440 | EMIT1(0x53); | ||
| 1441 | |||
| 1442 | /* sub esp,STACK_SIZE */ | ||
| 1443 | EMIT2_off32(0x81, 0xEC, STACK_SIZE); | ||
| 1444 | /* sub ebp,SCRATCH_SIZE+4+12*/ | ||
| 1445 | EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16); | ||
| 1446 | /* xor ebx,ebx */ | ||
| 1447 | EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX)); | ||
| 1448 | |||
| 1449 | /* Set up BPF prog stack base register */ | ||
| 1450 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo)); | ||
| 1451 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi)); | ||
| 1452 | |||
| 1453 | /* Move BPF_CTX (EAX) to BPF_REG_R1 */ | ||
| 1454 | /* mov dword ptr [ebp+off],eax */ | ||
| 1455 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0])); | ||
| 1456 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1])); | ||
| 1457 | |||
| 1458 | /* Initialize Tail Count */ | ||
| 1459 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0])); | ||
| 1460 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
| 1461 | |||
| 1462 | BUILD_BUG_ON(cnt != PROLOGUE_SIZE); | ||
| 1463 | *pprog = prog; | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | /* Emit epilogue code for BPF program */ | ||
| 1467 | static void emit_epilogue(u8 **pprog, u32 stack_depth) | ||
| 1468 | { | ||
| 1469 | u8 *prog = *pprog; | ||
| 1470 | const u8 *r0 = bpf2ia32[BPF_REG_0]; | ||
| 1471 | int cnt = 0; | ||
| 1472 | |||
| 1473 | /* mov eax,dword ptr [ebp+off]*/ | ||
| 1474 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0])); | ||
| 1475 | /* mov edx,dword ptr [ebp+off]*/ | ||
| 1476 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1])); | ||
| 1477 | |||
| 1478 | /* add ebp,SCRATCH_SIZE+4+12*/ | ||
| 1479 | EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16); | ||
| 1480 | |||
| 1481 | /* mov ebx,dword ptr [ebp-12]*/ | ||
| 1482 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12); | ||
| 1483 | /* mov esi,dword ptr [ebp-8]*/ | ||
| 1484 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8); | ||
| 1485 | /* mov edi,dword ptr [ebp-4]*/ | ||
| 1486 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4); | ||
| 1487 | |||
| 1488 | EMIT1(0xC9); /* leave */ | ||
| 1489 | EMIT1(0xC3); /* ret */ | ||
| 1490 | *pprog = prog; | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | /* | ||
| 1494 | * Generate the following code: | ||
| 1495 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... | ||
| 1496 | * if (index >= array->map.max_entries) | ||
| 1497 | * goto out; | ||
| 1498 | * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
| 1499 | * goto out; | ||
| 1500 | * prog = array->ptrs[index]; | ||
| 1501 | * if (prog == NULL) | ||
| 1502 | * goto out; | ||
| 1503 | * goto *(prog->bpf_func + prologue_size); | ||
| 1504 | * out: | ||
| 1505 | */ | ||
| 1506 | static void emit_bpf_tail_call(u8 **pprog) | ||
| 1507 | { | ||
| 1508 | u8 *prog = *pprog; | ||
| 1509 | int cnt = 0; | ||
| 1510 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
| 1511 | const u8 *r2 = bpf2ia32[BPF_REG_2]; | ||
| 1512 | const u8 *r3 = bpf2ia32[BPF_REG_3]; | ||
| 1513 | const u8 *tcc = bpf2ia32[TCALL_CNT]; | ||
| 1514 | u32 lo, hi; | ||
| 1515 | static int jmp_label1 = -1; | ||
| 1516 | |||
| 1517 | /* | ||
| 1518 | * if (index >= array->map.max_entries) | ||
| 1519 | * goto out; | ||
| 1520 | */ | ||
| 1521 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1522 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0])); | ||
| 1523 | /* mov edx,dword ptr [ebp+off] */ | ||
| 1524 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0])); | ||
| 1525 | |||
| 1526 | /* cmp dword ptr [eax+off],edx */ | ||
| 1527 | EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
| 1528 | offsetof(struct bpf_array, map.max_entries)); | ||
| 1529 | /* jbe out */ | ||
| 1530 | EMIT2(IA32_JBE, jmp_label(jmp_label1, 2)); | ||
| 1531 | |||
| 1532 | /* | ||
| 1533 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
| 1534 | * goto out; | ||
| 1535 | */ | ||
| 1536 | lo = (u32)MAX_TAIL_CALL_CNT; | ||
| 1537 | hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); | ||
| 1538 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0])); | ||
| 1539 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
| 1540 | |||
| 1541 | /* cmp edx,hi */ | ||
| 1542 | EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi); | ||
| 1543 | EMIT2(IA32_JNE, 3); | ||
| 1544 | /* cmp ecx,lo */ | ||
| 1545 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo); | ||
| 1546 | |||
| 1547 | /* ja out */ | ||
| 1548 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
| 1549 | |||
| 1550 | /* add eax,0x1 */ | ||
| 1551 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01); | ||
| 1552 | /* adc ebx,0x0 */ | ||
| 1553 | EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00); | ||
| 1554 | |||
| 1555 | /* mov dword ptr [ebp+off],eax */ | ||
| 1556 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0])); | ||
| 1557 | /* mov dword ptr [ebp+off],edx */ | ||
| 1558 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
| 1559 | |||
| 1560 | /* prog = array->ptrs[index]; */ | ||
| 1561 | /* mov edx, [eax + edx * 4 + offsetof(...)] */ | ||
| 1562 | EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs)); | ||
| 1563 | |||
| 1564 | /* | ||
| 1565 | * if (prog == NULL) | ||
| 1566 | * goto out; | ||
| 1567 | */ | ||
| 1568 | /* test edx,edx */ | ||
| 1569 | EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX)); | ||
| 1570 | /* je out */ | ||
| 1571 | EMIT2(IA32_JE, jmp_label(jmp_label1, 2)); | ||
| 1572 | |||
| 1573 | /* goto *(prog->bpf_func + prologue_size); */ | ||
| 1574 | /* mov edx, dword ptr [edx + 32] */ | ||
| 1575 | EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX), | ||
| 1576 | offsetof(struct bpf_prog, bpf_func)); | ||
| 1577 | /* add edx,prologue_size */ | ||
| 1578 | EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE); | ||
| 1579 | |||
| 1580 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1581 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0])); | ||
| 1582 | |||
| 1583 | /* | ||
| 1584 | * Now we're ready to jump into next BPF program: | ||
| 1585 | * eax == ctx (1st arg) | ||
| 1586 | * edx == prog->bpf_func + prologue_size | ||
| 1587 | */ | ||
| 1588 | RETPOLINE_EDX_BPF_JIT(); | ||
| 1589 | |||
| 1590 | if (jmp_label1 == -1) | ||
| 1591 | jmp_label1 = cnt; | ||
| 1592 | |||
| 1593 | /* out: */ | ||
| 1594 | *pprog = prog; | ||
| 1595 | } | ||
| 1596 | |||
| 1597 | /* Push the scratch stack register on top of the stack. */ | ||
| 1598 | static inline void emit_push_r64(const u8 src[], u8 **pprog) | ||
| 1599 | { | ||
| 1600 | u8 *prog = *pprog; | ||
| 1601 | int cnt = 0; | ||
| 1602 | |||
| 1603 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 1604 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi)); | ||
| 1605 | /* push ecx */ | ||
| 1606 | EMIT1(0x51); | ||
| 1607 | |||
| 1608 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 1609 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo)); | ||
| 1610 | /* push ecx */ | ||
| 1611 | EMIT1(0x51); | ||
| 1612 | |||
| 1613 | *pprog = prog; | ||
| 1614 | } | ||
| 1615 | |||
| 1616 | static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | ||
| 1617 | int oldproglen, struct jit_context *ctx) | ||
| 1618 | { | ||
| 1619 | struct bpf_insn *insn = bpf_prog->insnsi; | ||
| 1620 | int insn_cnt = bpf_prog->len; | ||
| 1621 | bool seen_exit = false; | ||
| 1622 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; | ||
| 1623 | int i, cnt = 0; | ||
| 1624 | int proglen = 0; | ||
| 1625 | u8 *prog = temp; | ||
| 1626 | |||
| 1627 | emit_prologue(&prog, bpf_prog->aux->stack_depth); | ||
| 1628 | |||
| 1629 | for (i = 0; i < insn_cnt; i++, insn++) { | ||
| 1630 | const s32 imm32 = insn->imm; | ||
| 1631 | const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; | ||
| 1632 | const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true; | ||
| 1633 | const bool sstk = insn->src_reg == BPF_REG_AX ? false : true; | ||
| 1634 | const u8 code = insn->code; | ||
| 1635 | const u8 *dst = bpf2ia32[insn->dst_reg]; | ||
| 1636 | const u8 *src = bpf2ia32[insn->src_reg]; | ||
| 1637 | const u8 *r0 = bpf2ia32[BPF_REG_0]; | ||
| 1638 | s64 jmp_offset; | ||
| 1639 | u8 jmp_cond; | ||
| 1640 | int ilen; | ||
| 1641 | u8 *func; | ||
| 1642 | |||
| 1643 | switch (code) { | ||
| 1644 | /* ALU operations */ | ||
| 1645 | /* dst = src */ | ||
| 1646 | case BPF_ALU | BPF_MOV | BPF_K: | ||
| 1647 | case BPF_ALU | BPF_MOV | BPF_X: | ||
| 1648 | case BPF_ALU64 | BPF_MOV | BPF_K: | ||
| 1649 | case BPF_ALU64 | BPF_MOV | BPF_X: | ||
| 1650 | switch (BPF_SRC(code)) { | ||
| 1651 | case BPF_X: | ||
| 1652 | emit_ia32_mov_r64(is64, dst, src, dstk, | ||
| 1653 | sstk, &prog); | ||
| 1654 | break; | ||
| 1655 | case BPF_K: | ||
| 1656 | /* Sign-extend immediate value to dst reg */ | ||
| 1657 | emit_ia32_mov_i64(is64, dst, imm32, | ||
| 1658 | dstk, &prog); | ||
| 1659 | break; | ||
| 1660 | } | ||
| 1661 | break; | ||
| 1662 | /* dst = dst + src/imm */ | ||
| 1663 | /* dst = dst - src/imm */ | ||
| 1664 | /* dst = dst | src/imm */ | ||
| 1665 | /* dst = dst & src/imm */ | ||
| 1666 | /* dst = dst ^ src/imm */ | ||
| 1667 | /* dst = dst * src/imm */ | ||
| 1668 | /* dst = dst << src */ | ||
| 1669 | /* dst = dst >> src */ | ||
| 1670 | case BPF_ALU | BPF_ADD | BPF_K: | ||
| 1671 | case BPF_ALU | BPF_ADD | BPF_X: | ||
| 1672 | case BPF_ALU | BPF_SUB | BPF_K: | ||
| 1673 | case BPF_ALU | BPF_SUB | BPF_X: | ||
| 1674 | case BPF_ALU | BPF_OR | BPF_K: | ||
| 1675 | case BPF_ALU | BPF_OR | BPF_X: | ||
| 1676 | case BPF_ALU | BPF_AND | BPF_K: | ||
| 1677 | case BPF_ALU | BPF_AND | BPF_X: | ||
| 1678 | case BPF_ALU | BPF_XOR | BPF_K: | ||
| 1679 | case BPF_ALU | BPF_XOR | BPF_X: | ||
| 1680 | case BPF_ALU64 | BPF_ADD | BPF_K: | ||
| 1681 | case BPF_ALU64 | BPF_ADD | BPF_X: | ||
| 1682 | case BPF_ALU64 | BPF_SUB | BPF_K: | ||
| 1683 | case BPF_ALU64 | BPF_SUB | BPF_X: | ||
| 1684 | case BPF_ALU64 | BPF_OR | BPF_K: | ||
| 1685 | case BPF_ALU64 | BPF_OR | BPF_X: | ||
| 1686 | case BPF_ALU64 | BPF_AND | BPF_K: | ||
| 1687 | case BPF_ALU64 | BPF_AND | BPF_X: | ||
| 1688 | case BPF_ALU64 | BPF_XOR | BPF_K: | ||
| 1689 | case BPF_ALU64 | BPF_XOR | BPF_X: | ||
| 1690 | switch (BPF_SRC(code)) { | ||
| 1691 | case BPF_X: | ||
| 1692 | emit_ia32_alu_r64(is64, BPF_OP(code), dst, | ||
| 1693 | src, dstk, sstk, &prog); | ||
| 1694 | break; | ||
| 1695 | case BPF_K: | ||
| 1696 | emit_ia32_alu_i64(is64, BPF_OP(code), dst, | ||
| 1697 | imm32, dstk, &prog); | ||
| 1698 | break; | ||
| 1699 | } | ||
| 1700 | break; | ||
| 1701 | case BPF_ALU | BPF_MUL | BPF_K: | ||
| 1702 | case BPF_ALU | BPF_MUL | BPF_X: | ||
| 1703 | switch (BPF_SRC(code)) { | ||
| 1704 | case BPF_X: | ||
| 1705 | emit_ia32_mul_r(dst_lo, src_lo, dstk, | ||
| 1706 | sstk, &prog); | ||
| 1707 | break; | ||
| 1708 | case BPF_K: | ||
| 1709 | /* mov ecx,imm32*/ | ||
| 1710 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
| 1711 | imm32); | ||
| 1712 | emit_ia32_mul_r(dst_lo, IA32_ECX, dstk, | ||
| 1713 | false, &prog); | ||
| 1714 | break; | ||
| 1715 | } | ||
| 1716 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1717 | break; | ||
| 1718 | case BPF_ALU | BPF_LSH | BPF_X: | ||
| 1719 | case BPF_ALU | BPF_RSH | BPF_X: | ||
| 1720 | case BPF_ALU | BPF_ARSH | BPF_K: | ||
| 1721 | case BPF_ALU | BPF_ARSH | BPF_X: | ||
| 1722 | switch (BPF_SRC(code)) { | ||
| 1723 | case BPF_X: | ||
| 1724 | emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo, | ||
| 1725 | dstk, sstk, &prog); | ||
| 1726 | break; | ||
| 1727 | case BPF_K: | ||
| 1728 | /* mov ecx,imm32*/ | ||
| 1729 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
| 1730 | imm32); | ||
| 1731 | emit_ia32_shift_r(BPF_OP(code), dst_lo, | ||
| 1732 | IA32_ECX, dstk, false, | ||
| 1733 | &prog); | ||
| 1734 | break; | ||
| 1735 | } | ||
| 1736 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1737 | break; | ||
| 1738 | /* dst = dst / src(imm) */ | ||
| 1739 | /* dst = dst % src(imm) */ | ||
| 1740 | case BPF_ALU | BPF_DIV | BPF_K: | ||
| 1741 | case BPF_ALU | BPF_DIV | BPF_X: | ||
| 1742 | case BPF_ALU | BPF_MOD | BPF_K: | ||
| 1743 | case BPF_ALU | BPF_MOD | BPF_X: | ||
| 1744 | switch (BPF_SRC(code)) { | ||
| 1745 | case BPF_X: | ||
| 1746 | emit_ia32_div_mod_r(BPF_OP(code), dst_lo, | ||
| 1747 | src_lo, dstk, sstk, &prog); | ||
| 1748 | break; | ||
| 1749 | case BPF_K: | ||
| 1750 | /* mov ecx,imm32*/ | ||
| 1751 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
| 1752 | imm32); | ||
| 1753 | emit_ia32_div_mod_r(BPF_OP(code), dst_lo, | ||
| 1754 | IA32_ECX, dstk, false, | ||
| 1755 | &prog); | ||
| 1756 | break; | ||
| 1757 | } | ||
| 1758 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1759 | break; | ||
| 1760 | case BPF_ALU64 | BPF_DIV | BPF_K: | ||
| 1761 | case BPF_ALU64 | BPF_DIV | BPF_X: | ||
| 1762 | case BPF_ALU64 | BPF_MOD | BPF_K: | ||
| 1763 | case BPF_ALU64 | BPF_MOD | BPF_X: | ||
| 1764 | goto notyet; | ||
| 1765 | /* dst = dst >> imm */ | ||
| 1766 | /* dst = dst << imm */ | ||
| 1767 | case BPF_ALU | BPF_RSH | BPF_K: | ||
| 1768 | case BPF_ALU | BPF_LSH | BPF_K: | ||
| 1769 | if (unlikely(imm32 > 31)) | ||
| 1770 | return -EINVAL; | ||
| 1771 | /* mov ecx,imm32*/ | ||
| 1772 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
| 1773 | emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk, | ||
| 1774 | false, &prog); | ||
| 1775 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1776 | break; | ||
| 1777 | /* dst = dst << imm */ | ||
| 1778 | case BPF_ALU64 | BPF_LSH | BPF_K: | ||
| 1779 | if (unlikely(imm32 > 63)) | ||
| 1780 | return -EINVAL; | ||
| 1781 | emit_ia32_lsh_i64(dst, imm32, dstk, &prog); | ||
| 1782 | break; | ||
| 1783 | /* dst = dst >> imm */ | ||
| 1784 | case BPF_ALU64 | BPF_RSH | BPF_K: | ||
| 1785 | if (unlikely(imm32 > 63)) | ||
| 1786 | return -EINVAL; | ||
| 1787 | emit_ia32_rsh_i64(dst, imm32, dstk, &prog); | ||
| 1788 | break; | ||
| 1789 | /* dst = dst << src */ | ||
| 1790 | case BPF_ALU64 | BPF_LSH | BPF_X: | ||
| 1791 | emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog); | ||
| 1792 | break; | ||
| 1793 | /* dst = dst >> src */ | ||
| 1794 | case BPF_ALU64 | BPF_RSH | BPF_X: | ||
| 1795 | emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog); | ||
| 1796 | break; | ||
| 1797 | /* dst = dst >> src (signed) */ | ||
| 1798 | case BPF_ALU64 | BPF_ARSH | BPF_X: | ||
| 1799 | emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog); | ||
| 1800 | break; | ||
| 1801 | /* dst = dst >> imm (signed) */ | ||
| 1802 | case BPF_ALU64 | BPF_ARSH | BPF_K: | ||
| 1803 | if (unlikely(imm32 > 63)) | ||
| 1804 | return -EINVAL; | ||
| 1805 | emit_ia32_arsh_i64(dst, imm32, dstk, &prog); | ||
| 1806 | break; | ||
| 1807 | /* dst = ~dst */ | ||
| 1808 | case BPF_ALU | BPF_NEG: | ||
| 1809 | emit_ia32_alu_i(is64, false, BPF_OP(code), | ||
| 1810 | dst_lo, 0, dstk, &prog); | ||
| 1811 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1812 | break; | ||
| 1813 | /* dst = ~dst (64 bit) */ | ||
| 1814 | case BPF_ALU64 | BPF_NEG: | ||
| 1815 | emit_ia32_neg64(dst, dstk, &prog); | ||
| 1816 | break; | ||
| 1817 | /* dst = dst * src/imm */ | ||
| 1818 | case BPF_ALU64 | BPF_MUL | BPF_X: | ||
| 1819 | case BPF_ALU64 | BPF_MUL | BPF_K: | ||
| 1820 | switch (BPF_SRC(code)) { | ||
| 1821 | case BPF_X: | ||
| 1822 | emit_ia32_mul_r64(dst, src, dstk, sstk, &prog); | ||
| 1823 | break; | ||
| 1824 | case BPF_K: | ||
| 1825 | emit_ia32_mul_i64(dst, imm32, dstk, &prog); | ||
| 1826 | break; | ||
| 1827 | } | ||
| 1828 | break; | ||
| 1829 | /* dst = htole(dst) */ | ||
| 1830 | case BPF_ALU | BPF_END | BPF_FROM_LE: | ||
| 1831 | emit_ia32_to_le_r64(dst, imm32, dstk, &prog); | ||
| 1832 | break; | ||
| 1833 | /* dst = htobe(dst) */ | ||
| 1834 | case BPF_ALU | BPF_END | BPF_FROM_BE: | ||
| 1835 | emit_ia32_to_be_r64(dst, imm32, dstk, &prog); | ||
| 1836 | break; | ||
| 1837 | /* dst = imm64 */ | ||
| 1838 | case BPF_LD | BPF_IMM | BPF_DW: { | ||
| 1839 | s32 hi, lo = imm32; | ||
| 1840 | |||
| 1841 | hi = insn[1].imm; | ||
| 1842 | emit_ia32_mov_i(dst_lo, lo, dstk, &prog); | ||
| 1843 | emit_ia32_mov_i(dst_hi, hi, dstk, &prog); | ||
| 1844 | insn++; | ||
| 1845 | i++; | ||
| 1846 | break; | ||
| 1847 | } | ||
| 1848 | /* ST: *(u8*)(dst_reg + off) = imm */ | ||
| 1849 | case BPF_ST | BPF_MEM | BPF_H: | ||
| 1850 | case BPF_ST | BPF_MEM | BPF_B: | ||
| 1851 | case BPF_ST | BPF_MEM | BPF_W: | ||
| 1852 | case BPF_ST | BPF_MEM | BPF_DW: | ||
| 1853 | if (dstk) | ||
| 1854 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1855 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1856 | STACK_VAR(dst_lo)); | ||
| 1857 | else | ||
| 1858 | /* mov eax,dst_lo */ | ||
| 1859 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1860 | |||
| 1861 | switch (BPF_SIZE(code)) { | ||
| 1862 | case BPF_B: | ||
| 1863 | EMIT(0xC6, 1); break; | ||
| 1864 | case BPF_H: | ||
| 1865 | EMIT2(0x66, 0xC7); break; | ||
| 1866 | case BPF_W: | ||
| 1867 | case BPF_DW: | ||
| 1868 | EMIT(0xC7, 1); break; | ||
| 1869 | } | ||
| 1870 | |||
| 1871 | if (is_imm8(insn->off)) | ||
| 1872 | EMIT2(add_1reg(0x40, IA32_EAX), insn->off); | ||
| 1873 | else | ||
| 1874 | EMIT1_off32(add_1reg(0x80, IA32_EAX), | ||
| 1875 | insn->off); | ||
| 1876 | EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code))); | ||
| 1877 | |||
| 1878 | if (BPF_SIZE(code) == BPF_DW) { | ||
| 1879 | u32 hi; | ||
| 1880 | |||
| 1881 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
| 1882 | EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX), | ||
| 1883 | insn->off + 4); | ||
| 1884 | EMIT(hi, 4); | ||
| 1885 | } | ||
| 1886 | break; | ||
| 1887 | |||
| 1888 | /* STX: *(u8*)(dst_reg + off) = src_reg */ | ||
| 1889 | case BPF_STX | BPF_MEM | BPF_B: | ||
| 1890 | case BPF_STX | BPF_MEM | BPF_H: | ||
| 1891 | case BPF_STX | BPF_MEM | BPF_W: | ||
| 1892 | case BPF_STX | BPF_MEM | BPF_DW: | ||
| 1893 | if (dstk) | ||
| 1894 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1895 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1896 | STACK_VAR(dst_lo)); | ||
| 1897 | else | ||
| 1898 | /* mov eax,dst_lo */ | ||
| 1899 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1900 | |||
| 1901 | if (sstk) | ||
| 1902 | /* mov edx,dword ptr [ebp+off] */ | ||
| 1903 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1904 | STACK_VAR(src_lo)); | ||
| 1905 | else | ||
| 1906 | /* mov edx,src_lo */ | ||
| 1907 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX)); | ||
| 1908 | |||
| 1909 | switch (BPF_SIZE(code)) { | ||
| 1910 | case BPF_B: | ||
| 1911 | EMIT(0x88, 1); break; | ||
| 1912 | case BPF_H: | ||
| 1913 | EMIT2(0x66, 0x89); break; | ||
| 1914 | case BPF_W: | ||
| 1915 | case BPF_DW: | ||
| 1916 | EMIT(0x89, 1); break; | ||
| 1917 | } | ||
| 1918 | |||
| 1919 | if (is_imm8(insn->off)) | ||
| 1920 | EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
| 1921 | insn->off); | ||
| 1922 | else | ||
| 1923 | EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
| 1924 | insn->off); | ||
| 1925 | |||
| 1926 | if (BPF_SIZE(code) == BPF_DW) { | ||
| 1927 | if (sstk) | ||
| 1928 | /* mov edi,dword ptr [ebp+off] */ | ||
| 1929 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, | ||
| 1930 | IA32_EDX), | ||
| 1931 | STACK_VAR(src_hi)); | ||
| 1932 | else | ||
| 1933 | /* mov edi,src_hi */ | ||
| 1934 | EMIT2(0x8B, add_2reg(0xC0, src_hi, | ||
| 1935 | IA32_EDX)); | ||
| 1936 | EMIT1(0x89); | ||
| 1937 | if (is_imm8(insn->off + 4)) { | ||
| 1938 | EMIT2(add_2reg(0x40, IA32_EAX, | ||
| 1939 | IA32_EDX), | ||
| 1940 | insn->off + 4); | ||
| 1941 | } else { | ||
| 1942 | EMIT1(add_2reg(0x80, IA32_EAX, | ||
| 1943 | IA32_EDX)); | ||
| 1944 | EMIT(insn->off + 4, 4); | ||
| 1945 | } | ||
| 1946 | } | ||
| 1947 | break; | ||
| 1948 | |||
| 1949 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ | ||
| 1950 | case BPF_LDX | BPF_MEM | BPF_B: | ||
| 1951 | case BPF_LDX | BPF_MEM | BPF_H: | ||
| 1952 | case BPF_LDX | BPF_MEM | BPF_W: | ||
| 1953 | case BPF_LDX | BPF_MEM | BPF_DW: | ||
| 1954 | if (sstk) | ||
| 1955 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1956 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1957 | STACK_VAR(src_lo)); | ||
| 1958 | else | ||
| 1959 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1960 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX)); | ||
| 1961 | |||
| 1962 | switch (BPF_SIZE(code)) { | ||
| 1963 | case BPF_B: | ||
| 1964 | EMIT2(0x0F, 0xB6); break; | ||
| 1965 | case BPF_H: | ||
| 1966 | EMIT2(0x0F, 0xB7); break; | ||
| 1967 | case BPF_W: | ||
| 1968 | case BPF_DW: | ||
| 1969 | EMIT(0x8B, 1); break; | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | if (is_imm8(insn->off)) | ||
| 1973 | EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
| 1974 | insn->off); | ||
| 1975 | else | ||
| 1976 | EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
| 1977 | insn->off); | ||
| 1978 | |||
| 1979 | if (dstk) | ||
| 1980 | /* mov dword ptr [ebp+off],edx */ | ||
| 1981 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1982 | STACK_VAR(dst_lo)); | ||
| 1983 | else | ||
| 1984 | /* mov dst_lo,edx */ | ||
| 1985 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX)); | ||
| 1986 | switch (BPF_SIZE(code)) { | ||
| 1987 | case BPF_B: | ||
| 1988 | case BPF_H: | ||
| 1989 | case BPF_W: | ||
| 1990 | if (dstk) { | ||
| 1991 | EMIT3(0xC7, add_1reg(0x40, IA32_EBP), | ||
| 1992 | STACK_VAR(dst_hi)); | ||
| 1993 | EMIT(0x0, 4); | ||
| 1994 | } else { | ||
| 1995 | EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0); | ||
| 1996 | } | ||
| 1997 | break; | ||
| 1998 | case BPF_DW: | ||
| 1999 | EMIT2_off32(0x8B, | ||
| 2000 | add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
| 2001 | insn->off + 4); | ||
| 2002 | if (dstk) | ||
| 2003 | EMIT3(0x89, | ||
| 2004 | add_2reg(0x40, IA32_EBP, | ||
| 2005 | IA32_EDX), | ||
| 2006 | STACK_VAR(dst_hi)); | ||
| 2007 | else | ||
| 2008 | EMIT2(0x89, | ||
| 2009 | add_2reg(0xC0, dst_hi, IA32_EDX)); | ||
| 2010 | break; | ||
| 2011 | default: | ||
| 2012 | break; | ||
| 2013 | } | ||
| 2014 | break; | ||
| 2015 | /* call */ | ||
| 2016 | case BPF_JMP | BPF_CALL: | ||
| 2017 | { | ||
| 2018 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
| 2019 | const u8 *r2 = bpf2ia32[BPF_REG_2]; | ||
| 2020 | const u8 *r3 = bpf2ia32[BPF_REG_3]; | ||
| 2021 | const u8 *r4 = bpf2ia32[BPF_REG_4]; | ||
| 2022 | const u8 *r5 = bpf2ia32[BPF_REG_5]; | ||
| 2023 | |||
| 2024 | if (insn->src_reg == BPF_PSEUDO_CALL) | ||
| 2025 | goto notyet; | ||
| 2026 | |||
| 2027 | func = (u8 *) __bpf_call_base + imm32; | ||
| 2028 | jmp_offset = func - (image + addrs[i]); | ||
| 2029 | |||
| 2030 | if (!imm32 || !is_simm32(jmp_offset)) { | ||
| 2031 | pr_err("unsupported BPF func %d addr %p image %p\n", | ||
| 2032 | imm32, func, image); | ||
| 2033 | return -EINVAL; | ||
| 2034 | } | ||
| 2035 | |||
| 2036 | /* mov eax,dword ptr [ebp+off] */ | ||
| 2037 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2038 | STACK_VAR(r1[0])); | ||
| 2039 | /* mov edx,dword ptr [ebp+off] */ | ||
| 2040 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2041 | STACK_VAR(r1[1])); | ||
| 2042 | |||
| 2043 | emit_push_r64(r5, &prog); | ||
| 2044 | emit_push_r64(r4, &prog); | ||
| 2045 | emit_push_r64(r3, &prog); | ||
| 2046 | emit_push_r64(r2, &prog); | ||
| 2047 | |||
| 2048 | EMIT1_off32(0xE8, jmp_offset + 9); | ||
| 2049 | |||
| 2050 | /* mov dword ptr [ebp+off],eax */ | ||
| 2051 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2052 | STACK_VAR(r0[0])); | ||
| 2053 | /* mov dword ptr [ebp+off],edx */ | ||
| 2054 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2055 | STACK_VAR(r0[1])); | ||
| 2056 | |||
| 2057 | /* add esp,32 */ | ||
| 2058 | EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32); | ||
| 2059 | break; | ||
| 2060 | } | ||
| 2061 | case BPF_JMP | BPF_TAIL_CALL: | ||
| 2062 | emit_bpf_tail_call(&prog); | ||
| 2063 | break; | ||
| 2064 | |||
| 2065 | /* cond jump */ | ||
| 2066 | case BPF_JMP | BPF_JEQ | BPF_X: | ||
| 2067 | case BPF_JMP | BPF_JNE | BPF_X: | ||
| 2068 | case BPF_JMP | BPF_JGT | BPF_X: | ||
| 2069 | case BPF_JMP | BPF_JLT | BPF_X: | ||
| 2070 | case BPF_JMP | BPF_JGE | BPF_X: | ||
| 2071 | case BPF_JMP | BPF_JLE | BPF_X: | ||
| 2072 | case BPF_JMP | BPF_JSGT | BPF_X: | ||
| 2073 | case BPF_JMP | BPF_JSLE | BPF_X: | ||
| 2074 | case BPF_JMP | BPF_JSLT | BPF_X: | ||
| 2075 | case BPF_JMP | BPF_JSGE | BPF_X: { | ||
| 2076 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 2077 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 2078 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | ||
| 2079 | u8 sreg_hi = sstk ? IA32_EBX : src_hi; | ||
| 2080 | |||
| 2081 | if (dstk) { | ||
| 2082 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2083 | STACK_VAR(dst_lo)); | ||
| 2084 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2085 | STACK_VAR(dst_hi)); | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | if (sstk) { | ||
| 2089 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 2090 | STACK_VAR(src_lo)); | ||
| 2091 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | ||
| 2092 | STACK_VAR(src_hi)); | ||
| 2093 | } | ||
| 2094 | |||
| 2095 | /* cmp dreg_hi,sreg_hi */ | ||
| 2096 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | ||
| 2097 | EMIT2(IA32_JNE, 2); | ||
| 2098 | /* cmp dreg_lo,sreg_lo */ | ||
| 2099 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | ||
| 2100 | goto emit_cond_jmp; | ||
| 2101 | } | ||
| 2102 | case BPF_JMP | BPF_JSET | BPF_X: { | ||
| 2103 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 2104 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 2105 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | ||
| 2106 | u8 sreg_hi = sstk ? IA32_EBX : src_hi; | ||
| 2107 | |||
| 2108 | if (dstk) { | ||
| 2109 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2110 | STACK_VAR(dst_lo)); | ||
| 2111 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2112 | STACK_VAR(dst_hi)); | ||
| 2113 | } | ||
| 2114 | |||
| 2115 | if (sstk) { | ||
| 2116 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 2117 | STACK_VAR(src_lo)); | ||
| 2118 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | ||
| 2119 | STACK_VAR(src_hi)); | ||
| 2120 | } | ||
| 2121 | /* and dreg_lo,sreg_lo */ | ||
| 2122 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | ||
| 2123 | /* and dreg_hi,sreg_hi */ | ||
| 2124 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
| 2125 | /* or dreg_lo,dreg_hi */ | ||
| 2126 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 2127 | goto emit_cond_jmp; | ||
| 2128 | } | ||
| 2129 | case BPF_JMP | BPF_JSET | BPF_K: { | ||
| 2130 | u32 hi; | ||
| 2131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 2132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 2133 | u8 sreg_lo = IA32_ECX; | ||
| 2134 | u8 sreg_hi = IA32_EBX; | ||
| 2135 | |||
| 2136 | if (dstk) { | ||
| 2137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2138 | STACK_VAR(dst_lo)); | ||
| 2139 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2140 | STACK_VAR(dst_hi)); | ||
| 2141 | } | ||
| 2142 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
| 2143 | |||
| 2144 | /* mov ecx,imm32 */ | ||
| 2145 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
| 2146 | /* mov ebx,imm32 */ | ||
| 2147 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
| 2148 | |||
| 2149 | /* and dreg_lo,sreg_lo */ | ||
| 2150 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | ||
| 2151 | /* and dreg_hi,sreg_hi */ | ||
| 2152 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
| 2153 | /* or dreg_lo,dreg_hi */ | ||
| 2154 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 2155 | goto emit_cond_jmp; | ||
| 2156 | } | ||
| 2157 | case BPF_JMP | BPF_JEQ | BPF_K: | ||
| 2158 | case BPF_JMP | BPF_JNE | BPF_K: | ||
| 2159 | case BPF_JMP | BPF_JGT | BPF_K: | ||
| 2160 | case BPF_JMP | BPF_JLT | BPF_K: | ||
| 2161 | case BPF_JMP | BPF_JGE | BPF_K: | ||
| 2162 | case BPF_JMP | BPF_JLE | BPF_K: | ||
| 2163 | case BPF_JMP | BPF_JSGT | BPF_K: | ||
| 2164 | case BPF_JMP | BPF_JSLE | BPF_K: | ||
| 2165 | case BPF_JMP | BPF_JSLT | BPF_K: | ||
| 2166 | case BPF_JMP | BPF_JSGE | BPF_K: { | ||
| 2167 | u32 hi; | ||
| 2168 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 2169 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 2170 | u8 sreg_lo = IA32_ECX; | ||
| 2171 | u8 sreg_hi = IA32_EBX; | ||
| 2172 | |||
| 2173 | if (dstk) { | ||
| 2174 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2175 | STACK_VAR(dst_lo)); | ||
| 2176 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2177 | STACK_VAR(dst_hi)); | ||
| 2178 | } | ||
| 2179 | |||
| 2180 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
| 2181 | /* mov ecx,imm32 */ | ||
| 2182 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
| 2183 | /* mov ebx,imm32 */ | ||
| 2184 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
| 2185 | |||
| 2186 | /* cmp dreg_hi,sreg_hi */ | ||
| 2187 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | ||
| 2188 | EMIT2(IA32_JNE, 2); | ||
| 2189 | /* cmp dreg_lo,sreg_lo */ | ||
| 2190 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | ||
| 2191 | |||
| 2192 | emit_cond_jmp: /* Convert BPF opcode to x86 */ | ||
| 2193 | switch (BPF_OP(code)) { | ||
| 2194 | case BPF_JEQ: | ||
| 2195 | jmp_cond = IA32_JE; | ||
| 2196 | break; | ||
| 2197 | case BPF_JSET: | ||
| 2198 | case BPF_JNE: | ||
| 2199 | jmp_cond = IA32_JNE; | ||
| 2200 | break; | ||
| 2201 | case BPF_JGT: | ||
| 2202 | /* GT is unsigned '>', JA in x86 */ | ||
| 2203 | jmp_cond = IA32_JA; | ||
| 2204 | break; | ||
| 2205 | case BPF_JLT: | ||
| 2206 | /* LT is unsigned '<', JB in x86 */ | ||
| 2207 | jmp_cond = IA32_JB; | ||
| 2208 | break; | ||
| 2209 | case BPF_JGE: | ||
| 2210 | /* GE is unsigned '>=', JAE in x86 */ | ||
| 2211 | jmp_cond = IA32_JAE; | ||
| 2212 | break; | ||
| 2213 | case BPF_JLE: | ||
| 2214 | /* LE is unsigned '<=', JBE in x86 */ | ||
| 2215 | jmp_cond = IA32_JBE; | ||
| 2216 | break; | ||
| 2217 | case BPF_JSGT: | ||
| 2218 | /* Signed '>', GT in x86 */ | ||
| 2219 | jmp_cond = IA32_JG; | ||
| 2220 | break; | ||
| 2221 | case BPF_JSLT: | ||
| 2222 | /* Signed '<', LT in x86 */ | ||
| 2223 | jmp_cond = IA32_JL; | ||
| 2224 | break; | ||
| 2225 | case BPF_JSGE: | ||
| 2226 | /* Signed '>=', GE in x86 */ | ||
| 2227 | jmp_cond = IA32_JGE; | ||
| 2228 | break; | ||
| 2229 | case BPF_JSLE: | ||
| 2230 | /* Signed '<=', LE in x86 */ | ||
| 2231 | jmp_cond = IA32_JLE; | ||
| 2232 | break; | ||
| 2233 | default: /* to silence GCC warning */ | ||
| 2234 | return -EFAULT; | ||
| 2235 | } | ||
| 2236 | jmp_offset = addrs[i + insn->off] - addrs[i]; | ||
| 2237 | if (is_imm8(jmp_offset)) { | ||
| 2238 | EMIT2(jmp_cond, jmp_offset); | ||
| 2239 | } else if (is_simm32(jmp_offset)) { | ||
| 2240 | EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); | ||
| 2241 | } else { | ||
| 2242 | pr_err("cond_jmp gen bug %llx\n", jmp_offset); | ||
| 2243 | return -EFAULT; | ||
| 2244 | } | ||
| 2245 | |||
| 2246 | break; | ||
| 2247 | } | ||
| 2248 | case BPF_JMP | BPF_JA: | ||
| 2249 | if (insn->off == -1) | ||
| 2250 | /* -1 jmp instructions will always jump | ||
| 2251 | * backwards two bytes. Explicitly handling | ||
| 2252 | * this case avoids wasting too many passes | ||
| 2253 | * when there are long sequences of replaced | ||
| 2254 | * dead code. | ||
| 2255 | */ | ||
| 2256 | jmp_offset = -2; | ||
| 2257 | else | ||
| 2258 | jmp_offset = addrs[i + insn->off] - addrs[i]; | ||
| 2259 | |||
| 2260 | if (!jmp_offset) | ||
| 2261 | /* Optimize out nop jumps */ | ||
| 2262 | break; | ||
| 2263 | emit_jmp: | ||
| 2264 | if (is_imm8(jmp_offset)) { | ||
| 2265 | EMIT2(0xEB, jmp_offset); | ||
| 2266 | } else if (is_simm32(jmp_offset)) { | ||
| 2267 | EMIT1_off32(0xE9, jmp_offset); | ||
| 2268 | } else { | ||
| 2269 | pr_err("jmp gen bug %llx\n", jmp_offset); | ||
| 2270 | return -EFAULT; | ||
| 2271 | } | ||
| 2272 | break; | ||
| 2273 | /* STX XADD: lock *(u32 *)(dst + off) += src */ | ||
| 2274 | case BPF_STX | BPF_XADD | BPF_W: | ||
| 2275 | /* STX XADD: lock *(u64 *)(dst + off) += src */ | ||
| 2276 | case BPF_STX | BPF_XADD | BPF_DW: | ||
| 2277 | goto notyet; | ||
| 2278 | case BPF_JMP | BPF_EXIT: | ||
| 2279 | if (seen_exit) { | ||
| 2280 | jmp_offset = ctx->cleanup_addr - addrs[i]; | ||
| 2281 | goto emit_jmp; | ||
| 2282 | } | ||
| 2283 | seen_exit = true; | ||
| 2284 | /* Update cleanup_addr */ | ||
| 2285 | ctx->cleanup_addr = proglen; | ||
| 2286 | emit_epilogue(&prog, bpf_prog->aux->stack_depth); | ||
| 2287 | break; | ||
| 2288 | notyet: | ||
| 2289 | pr_info_once("*** NOT YET: opcode %02x ***\n", code); | ||
| 2290 | return -EFAULT; | ||
| 2291 | default: | ||
| 2292 | /* | ||
| 2293 | * This error will be seen if new instruction was added | ||
| 2294 | * to interpreter, but not to JIT or if there is junk in | ||
| 2295 | * bpf_prog | ||
| 2296 | */ | ||
| 2297 | pr_err("bpf_jit: unknown opcode %02x\n", code); | ||
| 2298 | return -EINVAL; | ||
| 2299 | } | ||
| 2300 | |||
| 2301 | ilen = prog - temp; | ||
| 2302 | if (ilen > BPF_MAX_INSN_SIZE) { | ||
| 2303 | pr_err("bpf_jit: fatal insn size error\n"); | ||
| 2304 | return -EFAULT; | ||
| 2305 | } | ||
| 2306 | |||
| 2307 | if (image) { | ||
| 2308 | if (unlikely(proglen + ilen > oldproglen)) { | ||
| 2309 | pr_err("bpf_jit: fatal error\n"); | ||
| 2310 | return -EFAULT; | ||
| 2311 | } | ||
| 2312 | memcpy(image + proglen, temp, ilen); | ||
| 2313 | } | ||
| 2314 | proglen += ilen; | ||
| 2315 | addrs[i] = proglen; | ||
| 2316 | prog = temp; | ||
| 2317 | } | ||
| 2318 | return proglen; | ||
| 2319 | } | ||
| 2320 | |||
| 2321 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | ||
| 2322 | { | ||
| 2323 | struct bpf_binary_header *header = NULL; | ||
| 2324 | struct bpf_prog *tmp, *orig_prog = prog; | ||
| 2325 | int proglen, oldproglen = 0; | ||
| 2326 | struct jit_context ctx = {}; | ||
| 2327 | bool tmp_blinded = false; | ||
| 2328 | u8 *image = NULL; | ||
| 2329 | int *addrs; | ||
| 2330 | int pass; | ||
| 2331 | int i; | ||
| 2332 | |||
| 2333 | if (!prog->jit_requested) | ||
| 2334 | return orig_prog; | ||
| 2335 | |||
| 2336 | tmp = bpf_jit_blind_constants(prog); | ||
| 2337 | /* | ||
| 2338 | * If blinding was requested and we failed during blinding, | ||
| 2339 | * we must fall back to the interpreter. | ||
| 2340 | */ | ||
| 2341 | if (IS_ERR(tmp)) | ||
| 2342 | return orig_prog; | ||
| 2343 | if (tmp != prog) { | ||
| 2344 | tmp_blinded = true; | ||
| 2345 | prog = tmp; | ||
| 2346 | } | ||
| 2347 | |||
| 2348 | addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); | ||
| 2349 | if (!addrs) { | ||
| 2350 | prog = orig_prog; | ||
| 2351 | goto out; | ||
| 2352 | } | ||
| 2353 | |||
| 2354 | /* | ||
| 2355 | * Before first pass, make a rough estimation of addrs[] | ||
| 2356 | * each BPF instruction is translated to less than 64 bytes | ||
| 2357 | */ | ||
| 2358 | for (proglen = 0, i = 0; i < prog->len; i++) { | ||
| 2359 | proglen += 64; | ||
| 2360 | addrs[i] = proglen; | ||
| 2361 | } | ||
| 2362 | ctx.cleanup_addr = proglen; | ||
| 2363 | |||
| 2364 | /* | ||
| 2365 | * JITed image shrinks with every pass and the loop iterates | ||
| 2366 | * until the image stops shrinking. Very large BPF programs | ||
| 2367 | * may converge on the last pass. In such case do one more | ||
| 2368 | * pass to emit the final image. | ||
| 2369 | */ | ||
| 2370 | for (pass = 0; pass < 20 || image; pass++) { | ||
| 2371 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); | ||
| 2372 | if (proglen <= 0) { | ||
| 2373 | out_image: | ||
| 2374 | image = NULL; | ||
| 2375 | if (header) | ||
| 2376 | bpf_jit_binary_free(header); | ||
| 2377 | prog = orig_prog; | ||
| 2378 | goto out_addrs; | ||
| 2379 | } | ||
| 2380 | if (image) { | ||
| 2381 | if (proglen != oldproglen) { | ||
| 2382 | pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", | ||
| 2383 | proglen, oldproglen); | ||
| 2384 | goto out_image; | ||
| 2385 | } | ||
| 2386 | break; | ||
| 2387 | } | ||
| 2388 | if (proglen == oldproglen) { | ||
| 2389 | header = bpf_jit_binary_alloc(proglen, &image, | ||
| 2390 | 1, jit_fill_hole); | ||
| 2391 | if (!header) { | ||
| 2392 | prog = orig_prog; | ||
| 2393 | goto out_addrs; | ||
| 2394 | } | ||
| 2395 | } | ||
| 2396 | oldproglen = proglen; | ||
| 2397 | cond_resched(); | ||
| 2398 | } | ||
| 2399 | |||
| 2400 | if (bpf_jit_enable > 1) | ||
| 2401 | bpf_jit_dump(prog->len, proglen, pass + 1, image); | ||
| 2402 | |||
| 2403 | if (image) { | ||
| 2404 | bpf_jit_binary_lock_ro(header); | ||
| 2405 | prog->bpf_func = (void *)image; | ||
| 2406 | prog->jited = 1; | ||
| 2407 | prog->jited_len = proglen; | ||
| 2408 | } else { | ||
| 2409 | prog = orig_prog; | ||
| 2410 | } | ||
| 2411 | |||
| 2412 | out_addrs: | ||
| 2413 | kfree(addrs); | ||
| 2414 | out: | ||
| 2415 | if (tmp_blinded) | ||
| 2416 | bpf_jit_prog_release_other(prog, prog == orig_prog ? | ||
| 2417 | tmp : orig_prog); | ||
| 2418 | return prog; | ||
| 2419 | } | ||
