aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/net/bpf_jit_32.c77
-rw-r--r--arch/arm64/net/bpf_jit_comp.c65
-rw-r--r--arch/mips/net/ebpf_jit.c104
-rw-r--r--arch/powerpc/net/Makefile2
-rw-r--r--arch/powerpc/net/bpf_jit64.h37
-rw-r--r--arch/powerpc/net/bpf_jit_asm64.S180
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c109
-rw-r--r--arch/s390/net/Makefile2
-rw-r--r--arch/s390/net/bpf_jit.S116
-rw-r--r--arch/s390/net/bpf_jit.h20
-rw-r--r--arch/s390/net/bpf_jit_comp.c127
-rw-r--r--arch/sparc/net/Makefile5
-rw-r--r--arch/sparc/net/bpf_jit_64.h29
-rw-r--r--arch/sparc/net/bpf_jit_asm_64.S162
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c79
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/nospec-branch.h30
-rw-r--r--arch/x86/net/Makefile7
-rw-r--r--arch/x86/net/bpf_jit.S154
-rw-r--r--arch/x86/net/bpf_jit_comp.c343
-rw-r--r--arch/x86/net/bpf_jit_comp32.c2419
21 files changed, 2602 insertions, 1467 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index b5030e1a41d8..82689b999257 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1452,83 +1452,6 @@ exit:
1452 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); 1452 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
1453 emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); 1453 emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
1454 break; 1454 break;
1455 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
1456 case BPF_LD | BPF_ABS | BPF_W:
1457 case BPF_LD | BPF_ABS | BPF_H:
1458 case BPF_LD | BPF_ABS | BPF_B:
1459 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
1460 case BPF_LD | BPF_IND | BPF_W:
1461 case BPF_LD | BPF_IND | BPF_H:
1462 case BPF_LD | BPF_IND | BPF_B:
1463 {
1464 const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
1465 const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
1466 /* rtn value */
1467 const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
1468 const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
1469 const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
1470 const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
1471 int size;
1472
1473 /* Setting up first argument */
1474 emit(ARM_MOV_R(r0, r4), ctx);
1475
1476 /* Setting up second argument */
1477 emit_a32_mov_i(r1, imm, false, ctx);
1478 if (BPF_MODE(code) == BPF_IND)
1479 emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
1480 false, false, BPF_ADD);
1481
1482 /* Setting up third argument */
1483 switch (BPF_SIZE(code)) {
1484 case BPF_W:
1485 size = 4;
1486 break;
1487 case BPF_H:
1488 size = 2;
1489 break;
1490 case BPF_B:
1491 size = 1;
1492 break;
1493 default:
1494 return -EINVAL;
1495 }
1496 emit_a32_mov_i(r2, size, false, ctx);
1497
1498 /* Setting up fourth argument */
1499 emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
1500
1501 /* Setting up function pointer to call */
1502 emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
1503 emit_blx_r(r6, ctx);
1504
1505 emit(ARM_EOR_R(r1, r1, r1), ctx);
1506 /* Check if return address is NULL or not.
1507 * if NULL then jump to epilogue
1508 * else continue to load the value from retn address
1509 */
1510 emit(ARM_CMP_I(r0, 0), ctx);
1511 jmp_offset = epilogue_offset(ctx);
1512 check_imm24(jmp_offset);
1513 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
1514
1515 /* Load value from the address */
1516 switch (BPF_SIZE(code)) {
1517 case BPF_W:
1518 emit(ARM_LDR_I(r0, r0, 0), ctx);
1519 emit_rev32(r0, r0, ctx);
1520 break;
1521 case BPF_H:
1522 emit(ARM_LDRH_I(r0, r0, 0), ctx);
1523 emit_rev16(r0, r0, ctx);
1524 break;
1525 case BPF_B:
1526 emit(ARM_LDRB_I(r0, r0, 0), ctx);
1527 /* No need to reverse */
1528 break;
1529 }
1530 break;
1531 }
1532 /* ST: *(size *)(dst + off) = imm */ 1455 /* ST: *(size *)(dst + off) = imm */
1533 case BPF_ST | BPF_MEM | BPF_W: 1456 case BPF_ST | BPF_MEM | BPF_W:
1534 case BPF_ST | BPF_MEM | BPF_H: 1457 case BPF_ST | BPF_MEM | BPF_H:
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a93350451e8e..0b40c8fb0706 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -723,71 +723,6 @@ emit_cond_jmp:
723 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 723 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
724 break; 724 break;
725 725
726 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
727 case BPF_LD | BPF_ABS | BPF_W:
728 case BPF_LD | BPF_ABS | BPF_H:
729 case BPF_LD | BPF_ABS | BPF_B:
730 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
731 case BPF_LD | BPF_IND | BPF_W:
732 case BPF_LD | BPF_IND | BPF_H:
733 case BPF_LD | BPF_IND | BPF_B:
734 {
735 const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
736 const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
737 const u8 fp = bpf2a64[BPF_REG_FP];
738 const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
739 const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
740 const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
741 const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
742 const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
743 int size;
744
745 emit(A64_MOV(1, r1, r6), ctx);
746 emit_a64_mov_i(0, r2, imm, ctx);
747 if (BPF_MODE(code) == BPF_IND)
748 emit(A64_ADD(0, r2, r2, src), ctx);
749 switch (BPF_SIZE(code)) {
750 case BPF_W:
751 size = 4;
752 break;
753 case BPF_H:
754 size = 2;
755 break;
756 case BPF_B:
757 size = 1;
758 break;
759 default:
760 return -EINVAL;
761 }
762 emit_a64_mov_i64(r3, size, ctx);
763 emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx);
764 emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
765 emit(A64_BLR(r5), ctx);
766 emit(A64_MOV(1, r0, A64_R(0)), ctx);
767
768 jmp_offset = epilogue_offset(ctx);
769 check_imm19(jmp_offset);
770 emit(A64_CBZ(1, r0, jmp_offset), ctx);
771 emit(A64_MOV(1, r5, r0), ctx);
772 switch (BPF_SIZE(code)) {
773 case BPF_W:
774 emit(A64_LDR32(r0, r5, A64_ZR), ctx);
775#ifndef CONFIG_CPU_BIG_ENDIAN
776 emit(A64_REV32(0, r0, r0), ctx);
777#endif
778 break;
779 case BPF_H:
780 emit(A64_LDRH(r0, r5, A64_ZR), ctx);
781#ifndef CONFIG_CPU_BIG_ENDIAN
782 emit(A64_REV16(0, r0, r0), ctx);
783#endif
784 break;
785 case BPF_B:
786 emit(A64_LDRB(r0, r5, A64_ZR), ctx);
787 break;
788 }
789 break;
790 }
791 default: 726 default:
792 pr_err_once("unknown opcode %02x\n", code); 727 pr_err_once("unknown opcode %02x\n", code);
793 return -EINVAL; 728 return -EINVAL;
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 3e2798bfea4f..7ba7df9c28fc 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -1267,110 +1267,6 @@ jeq_common:
1267 return -EINVAL; 1267 return -EINVAL;
1268 break; 1268 break;
1269 1269
1270 case BPF_LD | BPF_B | BPF_ABS:
1271 case BPF_LD | BPF_H | BPF_ABS:
1272 case BPF_LD | BPF_W | BPF_ABS:
1273 case BPF_LD | BPF_DW | BPF_ABS:
1274 ctx->flags |= EBPF_SAVE_RA;
1275
1276 gen_imm_to_reg(insn, MIPS_R_A1, ctx);
1277 emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
1278
1279 if (insn->imm < 0) {
1280 emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper);
1281 } else {
1282 emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
1283 emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
1284 }
1285 goto ld_skb_common;
1286
1287 case BPF_LD | BPF_B | BPF_IND:
1288 case BPF_LD | BPF_H | BPF_IND:
1289 case BPF_LD | BPF_W | BPF_IND:
1290 case BPF_LD | BPF_DW | BPF_IND:
1291 ctx->flags |= EBPF_SAVE_RA;
1292 src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
1293 if (src < 0)
1294 return src;
1295 ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
1296 if (ts == REG_32BIT_ZERO_EX) {
1297 /* sign extend */
1298 emit_instr(ctx, sll, MIPS_R_A1, src, 0);
1299 src = MIPS_R_A1;
1300 }
1301 if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
1302 emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm);
1303 } else {
1304 gen_imm_to_reg(insn, MIPS_R_AT, ctx);
1305 emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src);
1306 }
1307 /* truncate to 32-bit int */
1308 emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0);
1309 emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
1310 emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO);
1311
1312 emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper);
1313 emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
1314 emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
1315 emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT);
1316
1317ld_skb_common:
1318 emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
1319 /* delay slot move */
1320 emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO);
1321
1322 /* Check the error value */
1323 b_off = b_imm(exit_idx, ctx);
1324 if (is_bad_offset(b_off)) {
1325 target = j_target(ctx, exit_idx);
1326 if (target == (unsigned int)-1)
1327 return -E2BIG;
1328
1329 if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
1330 ctx->offsets[this_idx] |= OFFSETS_B_CONV;
1331 ctx->long_b_conversion = 1;
1332 }
1333 emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3);
1334 emit_instr(ctx, nop);
1335 emit_instr(ctx, j, target);
1336 emit_instr(ctx, nop);
1337 } else {
1338 emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off);
1339 emit_instr(ctx, nop);
1340 }
1341
1342#ifdef __BIG_ENDIAN
1343 need_swap = false;
1344#else
1345 need_swap = true;
1346#endif
1347 dst = MIPS_R_V0;
1348 switch (BPF_SIZE(insn->code)) {
1349 case BPF_B:
1350 emit_instr(ctx, lbu, dst, 0, MIPS_R_V0);
1351 break;
1352 case BPF_H:
1353 emit_instr(ctx, lhu, dst, 0, MIPS_R_V0);
1354 if (need_swap)
1355 emit_instr(ctx, wsbh, dst, dst);
1356 break;
1357 case BPF_W:
1358 emit_instr(ctx, lw, dst, 0, MIPS_R_V0);
1359 if (need_swap) {
1360 emit_instr(ctx, wsbh, dst, dst);
1361 emit_instr(ctx, rotr, dst, dst, 16);
1362 }
1363 break;
1364 case BPF_DW:
1365 emit_instr(ctx, ld, dst, 0, MIPS_R_V0);
1366 if (need_swap) {
1367 emit_instr(ctx, dsbh, dst, dst);
1368 emit_instr(ctx, dshd, dst, dst);
1369 }
1370 break;
1371 }
1372
1373 break;
1374 case BPF_ALU | BPF_END | BPF_FROM_BE: 1270 case BPF_ALU | BPF_END | BPF_FROM_BE:
1375 case BPF_ALU | BPF_END | BPF_FROM_LE: 1271 case BPF_ALU | BPF_END | BPF_FROM_LE:
1376 dst = ebpf_to_mips_reg(ctx, insn, dst_reg); 1272 dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index 02d369ca6a53..809f019d3cba 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -3,7 +3,7 @@
3# Arch-specific network modules 3# Arch-specific network modules
4# 4#
5ifeq ($(CONFIG_PPC64),y) 5ifeq ($(CONFIG_PPC64),y)
6obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o 6obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
7else 7else
8obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o 8obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
9endif 9endif
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 8bdef7ed28a8..3609be4692b3 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -20,7 +20,7 @@
20 * with our redzone usage. 20 * with our redzone usage.
21 * 21 *
22 * [ prev sp ] <------------- 22 * [ prev sp ] <-------------
23 * [ nv gpr save area ] 8*8 | 23 * [ nv gpr save area ] 6*8 |
24 * [ tail_call_cnt ] 8 | 24 * [ tail_call_cnt ] 8 |
25 * [ local_tmp_var ] 8 | 25 * [ local_tmp_var ] 8 |
26 * fp (r31) --> [ ebpf stack space ] upto 512 | 26 * fp (r31) --> [ ebpf stack space ] upto 512 |
@@ -28,8 +28,8 @@
28 * sp (r1) ---> [ stack pointer ] -------------- 28 * sp (r1) ---> [ stack pointer ] --------------
29 */ 29 */
30 30
31/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ 31/* for gpr non volatile registers BPG_REG_6 to 10 */
32#define BPF_PPC_STACK_SAVE (8*8) 32#define BPF_PPC_STACK_SAVE (6*8)
33/* for bpf JIT code internal usage */ 33/* for bpf JIT code internal usage */
34#define BPF_PPC_STACK_LOCALS 16 34#define BPF_PPC_STACK_LOCALS 16
35/* stack frame excluding BPF stack, ensure this is quadword aligned */ 35/* stack frame excluding BPF stack, ensure this is quadword aligned */
@@ -39,10 +39,8 @@
39#ifndef __ASSEMBLY__ 39#ifndef __ASSEMBLY__
40 40
41/* BPF register usage */ 41/* BPF register usage */
42#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0) 42#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
43#define SKB_DATA_REG (MAX_BPF_JIT_REG + 1) 43#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
44#define TMP_REG_1 (MAX_BPF_JIT_REG + 2)
45#define TMP_REG_2 (MAX_BPF_JIT_REG + 3)
46 44
47/* BPF to ppc register mappings */ 45/* BPF to ppc register mappings */
48static const int b2p[] = { 46static const int b2p[] = {
@@ -63,40 +61,23 @@ static const int b2p[] = {
63 [BPF_REG_FP] = 31, 61 [BPF_REG_FP] = 31,
64 /* eBPF jit internal registers */ 62 /* eBPF jit internal registers */
65 [BPF_REG_AX] = 2, 63 [BPF_REG_AX] = 2,
66 [SKB_HLEN_REG] = 25,
67 [SKB_DATA_REG] = 26,
68 [TMP_REG_1] = 9, 64 [TMP_REG_1] = 9,
69 [TMP_REG_2] = 10 65 [TMP_REG_2] = 10
70}; 66};
71 67
72/* PPC NVR range -- update this if we ever use NVRs below r24 */ 68/* PPC NVR range -- update this if we ever use NVRs below r27 */
73#define BPF_PPC_NVR_MIN 24 69#define BPF_PPC_NVR_MIN 27
74
75/* Assembly helpers */
76#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \
77 u64 func##_negative_offset(u64 r3, u64 r4); \
78 u64 func##_positive_offset(u64 r3, u64 r4);
79
80DECLARE_LOAD_FUNC(sk_load_word);
81DECLARE_LOAD_FUNC(sk_load_half);
82DECLARE_LOAD_FUNC(sk_load_byte);
83
84#define CHOOSE_LOAD_FUNC(imm, func) \
85 (imm < 0 ? \
86 (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \
87 func##_positive_offset)
88 70
89#define SEEN_FUNC 0x1000 /* might call external helpers */ 71#define SEEN_FUNC 0x1000 /* might call external helpers */
90#define SEEN_STACK 0x2000 /* uses BPF stack */ 72#define SEEN_STACK 0x2000 /* uses BPF stack */
91#define SEEN_SKB 0x4000 /* uses sk_buff */ 73#define SEEN_TAILCALL 0x4000 /* uses tail calls */
92#define SEEN_TAILCALL 0x8000 /* uses tail calls */
93 74
94struct codegen_context { 75struct codegen_context {
95 /* 76 /*
96 * This is used to track register usage as well 77 * This is used to track register usage as well
97 * as calls to external helpers. 78 * as calls to external helpers.
98 * - register usage is tracked with corresponding 79 * - register usage is tracked with corresponding
99 * bits (r3-r10 and r25-r31) 80 * bits (r3-r10 and r27-r31)
100 * - rest of the bits can be used to track other 81 * - rest of the bits can be used to track other
101 * things -- for now, we use bits 16 to 23 82 * things -- for now, we use bits 16 to 23
102 * encoded in SEEN_* macros above 83 * encoded in SEEN_* macros above
diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S
deleted file mode 100644
index 7e4c51430b84..000000000000
--- a/arch/powerpc/net/bpf_jit_asm64.S
+++ /dev/null
@@ -1,180 +0,0 @@
1/*
2 * bpf_jit_asm64.S: Packet/header access helper functions
3 * for PPC64 BPF compiler.
4 *
5 * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
6 * IBM Corporation
7 *
8 * Based on bpf_jit_asm.S by Matt Evans
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; version 2
13 * of the License.
14 */
15
16#include <asm/ppc_asm.h>
17#include <asm/ptrace.h>
18#include "bpf_jit64.h"
19
20/*
21 * All of these routines are called directly from generated code,
22 * with the below register usage:
23 * r27 skb pointer (ctx)
24 * r25 skb header length
25 * r26 skb->data pointer
26 * r4 offset
27 *
28 * Result is passed back in:
29 * r8 data read in host endian format (accumulator)
30 *
31 * r9 is used as a temporary register
32 */
33
34#define r_skb r27
35#define r_hlen r25
36#define r_data r26
37#define r_off r4
38#define r_val r8
39#define r_tmp r9
40
41_GLOBAL_TOC(sk_load_word)
42 cmpdi r_off, 0
43 blt bpf_slow_path_word_neg
44 b sk_load_word_positive_offset
45
46_GLOBAL_TOC(sk_load_word_positive_offset)
47 /* Are we accessing past headlen? */
48 subi r_tmp, r_hlen, 4
49 cmpd r_tmp, r_off
50 blt bpf_slow_path_word
51 /* Nope, just hitting the header. cr0 here is eq or gt! */
52 LWZX_BE r_val, r_data, r_off
53 blr /* Return success, cr0 != LT */
54
55_GLOBAL_TOC(sk_load_half)
56 cmpdi r_off, 0
57 blt bpf_slow_path_half_neg
58 b sk_load_half_positive_offset
59
60_GLOBAL_TOC(sk_load_half_positive_offset)
61 subi r_tmp, r_hlen, 2
62 cmpd r_tmp, r_off
63 blt bpf_slow_path_half
64 LHZX_BE r_val, r_data, r_off
65 blr
66
67_GLOBAL_TOC(sk_load_byte)
68 cmpdi r_off, 0
69 blt bpf_slow_path_byte_neg
70 b sk_load_byte_positive_offset
71
72_GLOBAL_TOC(sk_load_byte_positive_offset)
73 cmpd r_hlen, r_off
74 ble bpf_slow_path_byte
75 lbzx r_val, r_data, r_off
76 blr
77
78/*
79 * Call out to skb_copy_bits:
80 * Allocate a new stack frame here to remain ABI-compliant in
81 * stashing LR.
82 */
83#define bpf_slow_path_common(SIZE) \
84 mflr r0; \
85 std r0, PPC_LR_STKOFF(r1); \
86 stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \
87 mr r3, r_skb; \
88 /* r4 = r_off as passed */ \
89 addi r5, r1, STACK_FRAME_MIN_SIZE; \
90 li r6, SIZE; \
91 bl skb_copy_bits; \
92 nop; \
93 /* save r5 */ \
94 addi r5, r1, STACK_FRAME_MIN_SIZE; \
95 /* r3 = 0 on success */ \
96 addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \
97 ld r0, PPC_LR_STKOFF(r1); \
98 mtlr r0; \
99 cmpdi r3, 0; \
100 blt bpf_error; /* cr0 = LT */
101
102bpf_slow_path_word:
103 bpf_slow_path_common(4)
104 /* Data value is on stack, and cr0 != LT */
105 LWZX_BE r_val, 0, r5
106 blr
107
108bpf_slow_path_half:
109 bpf_slow_path_common(2)
110 LHZX_BE r_val, 0, r5
111 blr
112
113bpf_slow_path_byte:
114 bpf_slow_path_common(1)
115 lbzx r_val, 0, r5
116 blr
117
118/*
119 * Call out to bpf_internal_load_pointer_neg_helper
120 */
121#define sk_negative_common(SIZE) \
122 mflr r0; \
123 std r0, PPC_LR_STKOFF(r1); \
124 stdu r1, -STACK_FRAME_MIN_SIZE(r1); \
125 mr r3, r_skb; \
126 /* r4 = r_off, as passed */ \
127 li r5, SIZE; \
128 bl bpf_internal_load_pointer_neg_helper; \
129 nop; \
130 addi r1, r1, STACK_FRAME_MIN_SIZE; \
131 ld r0, PPC_LR_STKOFF(r1); \
132 mtlr r0; \
133 /* R3 != 0 on success */ \
134 cmpldi r3, 0; \
135 beq bpf_error_slow; /* cr0 = EQ */
136
137bpf_slow_path_word_neg:
138 lis r_tmp, -32 /* SKF_LL_OFF */
139 cmpd r_off, r_tmp /* addr < SKF_* */
140 blt bpf_error /* cr0 = LT */
141 b sk_load_word_negative_offset
142
143_GLOBAL_TOC(sk_load_word_negative_offset)
144 sk_negative_common(4)
145 LWZX_BE r_val, 0, r3
146 blr
147
148bpf_slow_path_half_neg:
149 lis r_tmp, -32 /* SKF_LL_OFF */
150 cmpd r_off, r_tmp /* addr < SKF_* */
151 blt bpf_error /* cr0 = LT */
152 b sk_load_half_negative_offset
153
154_GLOBAL_TOC(sk_load_half_negative_offset)
155 sk_negative_common(2)
156 LHZX_BE r_val, 0, r3
157 blr
158
159bpf_slow_path_byte_neg:
160 lis r_tmp, -32 /* SKF_LL_OFF */
161 cmpd r_off, r_tmp /* addr < SKF_* */
162 blt bpf_error /* cr0 = LT */
163 b sk_load_byte_negative_offset
164
165_GLOBAL_TOC(sk_load_byte_negative_offset)
166 sk_negative_common(1)
167 lbzx r_val, 0, r3
168 blr
169
170bpf_error_slow:
171 /* fabricate a cr0 = lt */
172 li r_tmp, -1
173 cmpdi r_tmp, 0
174bpf_error:
175 /*
176 * Entered with cr0 = lt
177 * Generated code will 'blt epilogue', returning 0.
178 */
179 li r_val, 0
180 blr
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 0ef3d9580e98..1bdb1aff0619 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
59 * [ prev sp ] <------------- 59 * [ prev sp ] <-------------
60 * [ ... ] | 60 * [ ... ] |
61 * sp (r1) ---> [ stack pointer ] -------------- 61 * sp (r1) ---> [ stack pointer ] --------------
62 * [ nv gpr save area ] 8*8 62 * [ nv gpr save area ] 6*8
63 * [ tail_call_cnt ] 8 63 * [ tail_call_cnt ] 8
64 * [ local_tmp_var ] 8 64 * [ local_tmp_var ] 8
65 * [ unused red zone ] 208 bytes protected 65 * [ unused red zone ] 208 bytes protected
@@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
88 BUG(); 88 BUG();
89} 89}
90 90
91static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
92{
93 /*
94 * Load skb->len and skb->data_len
95 * r3 points to skb
96 */
97 PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
98 PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
99 /* header_len = len - data_len */
100 PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
101
102 /* skb->data pointer */
103 PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
104}
105
106static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) 91static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
107{ 92{
108 int i; 93 int i;
@@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
145 if (bpf_is_seen_register(ctx, i)) 130 if (bpf_is_seen_register(ctx, i))
146 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 131 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
147 132
148 /*
149 * Save additional non-volatile regs if we cache skb
150 * Also, setup skb data
151 */
152 if (ctx->seen & SEEN_SKB) {
153 PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
154 bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
155 PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
156 bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
157 bpf_jit_emit_skb_loads(image, ctx);
158 }
159
160 /* Setup frame pointer to point to the bpf stack area */ 133 /* Setup frame pointer to point to the bpf stack area */
161 if (bpf_is_seen_register(ctx, BPF_REG_FP)) 134 if (bpf_is_seen_register(ctx, BPF_REG_FP))
162 PPC_ADDI(b2p[BPF_REG_FP], 1, 135 PPC_ADDI(b2p[BPF_REG_FP], 1,
@@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
172 if (bpf_is_seen_register(ctx, i)) 145 if (bpf_is_seen_register(ctx, i))
173 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 146 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
174 147
175 /* Restore non-volatile registers used for skb cache */
176 if (ctx->seen & SEEN_SKB) {
177 PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
178 bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
179 PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
180 bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
181 }
182
183 /* Tear down our stack frame */ 148 /* Tear down our stack frame */
184 if (bpf_has_stack_frame(ctx)) { 149 if (bpf_has_stack_frame(ctx)) {
185 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); 150 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
@@ -753,23 +718,10 @@ emit_clear:
753 ctx->seen |= SEEN_FUNC; 718 ctx->seen |= SEEN_FUNC;
754 func = (u8 *) __bpf_call_base + imm; 719 func = (u8 *) __bpf_call_base + imm;
755 720
756 /* Save skb pointer if we need to re-cache skb data */
757 if ((ctx->seen & SEEN_SKB) &&
758 bpf_helper_changes_pkt_data(func))
759 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
760
761 bpf_jit_emit_func_call(image, ctx, (u64)func); 721 bpf_jit_emit_func_call(image, ctx, (u64)func);
762 722
763 /* move return value from r3 to BPF_REG_0 */ 723 /* move return value from r3 to BPF_REG_0 */
764 PPC_MR(b2p[BPF_REG_0], 3); 724 PPC_MR(b2p[BPF_REG_0], 3);
765
766 /* refresh skb cache */
767 if ((ctx->seen & SEEN_SKB) &&
768 bpf_helper_changes_pkt_data(func)) {
769 /* reload skb pointer to r3 */
770 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
771 bpf_jit_emit_skb_loads(image, ctx);
772 }
773 break; 725 break;
774 726
775 /* 727 /*
@@ -887,65 +839,6 @@ cond_branch:
887 break; 839 break;
888 840
889 /* 841 /*
890 * Loads from packet header/data
891 * Assume 32-bit input value in imm and X (src_reg)
892 */
893
894 /* Absolute loads */
895 case BPF_LD | BPF_W | BPF_ABS:
896 func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
897 goto common_load_abs;
898 case BPF_LD | BPF_H | BPF_ABS:
899 func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
900 goto common_load_abs;
901 case BPF_LD | BPF_B | BPF_ABS:
902 func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
903common_load_abs:
904 /*
905 * Load from [imm]
906 * Load into r4, which can just be passed onto
907 * skb load helpers as the second parameter
908 */
909 PPC_LI32(4, imm);
910 goto common_load;
911
912 /* Indirect loads */
913 case BPF_LD | BPF_W | BPF_IND:
914 func = (u8 *)sk_load_word;
915 goto common_load_ind;
916 case BPF_LD | BPF_H | BPF_IND:
917 func = (u8 *)sk_load_half;
918 goto common_load_ind;
919 case BPF_LD | BPF_B | BPF_IND:
920 func = (u8 *)sk_load_byte;
921common_load_ind:
922 /*
923 * Load from [src_reg + imm]
924 * Treat src_reg as a 32-bit value
925 */
926 PPC_EXTSW(4, src_reg);
927 if (imm) {
928 if (imm >= -32768 && imm < 32768)
929 PPC_ADDI(4, 4, IMM_L(imm));
930 else {
931 PPC_LI32(b2p[TMP_REG_1], imm);
932 PPC_ADD(4, 4, b2p[TMP_REG_1]);
933 }
934 }
935
936common_load:
937 ctx->seen |= SEEN_SKB;
938 ctx->seen |= SEEN_FUNC;
939 bpf_jit_emit_func_call(image, ctx, (u64)func);
940
941 /*
942 * Helper returns 'lt' condition on error, and an
943 * appropriate return value in BPF_REG_0
944 */
945 PPC_BCC(COND_LT, exit_addr);
946 break;
947
948 /*
949 * Tail call 842 * Tail call
950 */ 843 */
951 case BPF_JMP | BPF_TAIL_CALL: 844 case BPF_JMP | BPF_TAIL_CALL:
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index e0d5f245e42b..d4663b4bf509 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -2,4 +2,4 @@
2# 2#
3# Arch-specific network modules 3# Arch-specific network modules
4# 4#
5obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o 5obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
deleted file mode 100644
index 25bb4643c4f4..000000000000
--- a/arch/s390/net/bpf_jit.S
+++ /dev/null
@@ -1,116 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * BPF Jit compiler for s390, help functions.
4 *
5 * Copyright IBM Corp. 2012,2015
6 *
7 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
8 * Michael Holzheu <holzheu@linux.vnet.ibm.com>
9 */
10
11#include <linux/linkage.h>
12#include "bpf_jit.h"
13
14/*
15 * Calling convention:
16 * registers %r7-%r10, %r11,%r13, and %r15 are call saved
17 *
18 * Input (64 bit):
19 * %r3 (%b2) = offset into skb data
20 * %r6 (%b5) = return address
21 * %r7 (%b6) = skb pointer
22 * %r12 = skb data pointer
23 *
24 * Output:
25 * %r14= %b0 = return value (read skb value)
26 *
27 * Work registers: %r2,%r4,%r5,%r14
28 *
29 * skb_copy_bits takes 4 parameters:
30 * %r2 = skb pointer
31 * %r3 = offset into skb data
32 * %r4 = pointer to temp buffer
33 * %r5 = length to copy
34 * Return value in %r2: 0 = ok
35 *
36 * bpf_internal_load_pointer_neg_helper takes 3 parameters:
37 * %r2 = skb pointer
38 * %r3 = offset into data
39 * %r4 = length to copy
40 * Return value in %r2: Pointer to data
41 */
42
43#define SKF_MAX_NEG_OFF -0x200000 /* SKF_LL_OFF from filter.h */
44
45/*
46 * Load SIZE bytes from SKB
47 */
48#define sk_load_common(NAME, SIZE, LOAD) \
49ENTRY(sk_load_##NAME); \
50 ltgr %r3,%r3; /* Is offset negative? */ \
51 jl sk_load_##NAME##_slow_neg; \
52ENTRY(sk_load_##NAME##_pos); \
53 aghi %r3,SIZE; /* Offset + SIZE */ \
54 clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \
55 jh sk_load_##NAME##_slow; \
56 LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \
57 b OFF_OK(%r6); /* Return */ \
58 \
59sk_load_##NAME##_slow:; \
60 lgr %r2,%r7; /* Arg1 = skb pointer */ \
61 aghi %r3,-SIZE; /* Arg2 = offset */ \
62 la %r4,STK_OFF_TMP(%r15); /* Arg3 = temp bufffer */ \
63 lghi %r5,SIZE; /* Arg4 = size */ \
64 brasl %r14,skb_copy_bits; /* Get data from skb */ \
65 LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \
66 ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \
67 br %r6; /* Return */
68
69sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */
70sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */
71
72/*
73 * Load 1 byte from SKB (optimized version)
74 */
75 /* r14 = *(u8 *) (skb->data+offset) */
76ENTRY(sk_load_byte)
77 ltgr %r3,%r3 # Is offset negative?
78 jl sk_load_byte_slow_neg
79ENTRY(sk_load_byte_pos)
80 clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen?
81 jnl sk_load_byte_slow
82 llgc %r14,0(%r3,%r12) # Get byte from skb
83 b OFF_OK(%r6) # Return OK
84
85sk_load_byte_slow:
86 lgr %r2,%r7 # Arg1 = skb pointer
87 # Arg2 = offset
88 la %r4,STK_OFF_TMP(%r15) # Arg3 = pointer to temp buffer
89 lghi %r5,1 # Arg4 = size (1 byte)
90 brasl %r14,skb_copy_bits # Get data from skb
91 llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer
92 ltgr %r2,%r2 # Set cc to (%r2 != 0)
93 br %r6 # Return cc
94
95#define sk_negative_common(NAME, SIZE, LOAD) \
96sk_load_##NAME##_slow_neg:; \
97 cgfi %r3,SKF_MAX_NEG_OFF; \
98 jl bpf_error; \
99 lgr %r2,%r7; /* Arg1 = skb pointer */ \
100 /* Arg2 = offset */ \
101 lghi %r4,SIZE; /* Arg3 = size */ \
102 brasl %r14,bpf_internal_load_pointer_neg_helper; \
103 ltgr %r2,%r2; \
104 jz bpf_error; \
105 LOAD %r14,0(%r2); /* Get data from pointer */ \
106 xr %r3,%r3; /* Set cc to zero */ \
107 br %r6; /* Return cc */
108
109sk_negative_common(word, 4, llgf)
110sk_negative_common(half, 2, llgh)
111sk_negative_common(byte, 1, llgc)
112
113bpf_error:
114# force a return 0 from jit handler
115 ltgr %r15,%r15 # Set condition code
116 br %r6
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index 5e1e5133132d..7822ea92e54a 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -16,9 +16,6 @@
16#include <linux/filter.h> 16#include <linux/filter.h>
17#include <linux/types.h> 17#include <linux/types.h>
18 18
19extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[];
20extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
21
22#endif /* __ASSEMBLY__ */ 19#endif /* __ASSEMBLY__ */
23 20
24/* 21/*
@@ -36,15 +33,6 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
36 * | | | 33 * | | |
37 * | BPF stack | | 34 * | BPF stack | |
38 * | | | 35 * | | |
39 * +---------------+ |
40 * | 8 byte skbp | |
41 * R15+176 -> +---------------+ |
42 * | 8 byte hlen | |
43 * R15+168 -> +---------------+ |
44 * | 4 byte align | |
45 * +---------------+ |
46 * | 4 byte temp | |
47 * | for bpf_jit.S | |
48 * R15+160 -> +---------------+ | 36 * R15+160 -> +---------------+ |
49 * | new backchain | | 37 * | new backchain | |
50 * R15+152 -> +---------------+ | 38 * R15+152 -> +---------------+ |
@@ -57,17 +45,11 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
57 * The stack size used by the BPF program ("BPF stack" above) is passed 45 * The stack size used by the BPF program ("BPF stack" above) is passed
58 * via "aux->stack_depth". 46 * via "aux->stack_depth".
59 */ 47 */
60#define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160) 48#define STK_SPACE_ADD (160)
61#define STK_160_UNUSED (160 - 12 * 8) 49#define STK_160_UNUSED (160 - 12 * 8)
62#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) 50#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED)
63#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
64#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
65#define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */
66 51
67#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ 52#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
68#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ 53#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
69 54
70/* Offset to skip condition code check */
71#define OFF_OK 4
72
73#endif /* __ARCH_S390_NET_BPF_JIT_H */ 55#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 78a19c93b380..b020bea040b7 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -47,23 +47,21 @@ struct bpf_jit {
47 47
48#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ 48#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
49 49
50#define SEEN_SKB 1 /* skb access */ 50#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */
51#define SEEN_MEM 2 /* use mem[] for temporary storage */ 51#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */
52#define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */ 52#define SEEN_LITERAL (1 << 2) /* code uses literals */
53#define SEEN_LITERAL 8 /* code uses literals */ 53#define SEEN_FUNC (1 << 3) /* calls C functions */
54#define SEEN_FUNC 16 /* calls C functions */ 54#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */
55#define SEEN_TAIL_CALL 32 /* code uses tail calls */ 55#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */
56#define SEEN_REG_AX 64 /* code uses constant blinding */ 56#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
57#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
58 57
59/* 58/*
60 * s390 registers 59 * s390 registers
61 */ 60 */
62#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ 61#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */
63#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ 62#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */
64#define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */ 63#define REG_L (MAX_BPF_JIT_REG + 2) /* Literal pool register */
65#define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */ 64#define REG_15 (MAX_BPF_JIT_REG + 3) /* Register 15 */
66#define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */
67#define REG_0 REG_W0 /* Register 0 */ 65#define REG_0 REG_W0 /* Register 0 */
68#define REG_1 REG_W1 /* Register 1 */ 66#define REG_1 REG_W1 /* Register 1 */
69#define REG_2 BPF_REG_1 /* Register 2 */ 67#define REG_2 BPF_REG_1 /* Register 2 */
@@ -88,10 +86,8 @@ static const int reg2hex[] = {
88 [BPF_REG_9] = 10, 86 [BPF_REG_9] = 10,
89 /* BPF stack pointer */ 87 /* BPF stack pointer */
90 [BPF_REG_FP] = 13, 88 [BPF_REG_FP] = 13,
91 /* Register for blinding (shared with REG_SKB_DATA) */ 89 /* Register for blinding */
92 [BPF_REG_AX] = 12, 90 [BPF_REG_AX] = 12,
93 /* SKB data pointer */
94 [REG_SKB_DATA] = 12,
95 /* Work registers for s390x backend */ 91 /* Work registers for s390x backend */
96 [REG_W0] = 0, 92 [REG_W0] = 0,
97 [REG_W1] = 1, 93 [REG_W1] = 1,
@@ -385,27 +381,6 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
385} 381}
386 382
387/* 383/*
388 * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
389 * we store the SKB header length on the stack and the SKB data
390 * pointer in REG_SKB_DATA if BPF_REG_AX is not used.
391 */
392static void emit_load_skb_data_hlen(struct bpf_jit *jit)
393{
394 /* Header length: llgf %w1,<len>(%b1) */
395 EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
396 offsetof(struct sk_buff, len));
397 /* s %w1,<data_len>(%b1) */
398 EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
399 offsetof(struct sk_buff, data_len));
400 /* stg %w1,ST_OFF_HLEN(%r0,%r15) */
401 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
402 if (!(jit->seen & SEEN_REG_AX))
403 /* lg %skb_data,data_off(%b1) */
404 EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
405 BPF_REG_1, offsetof(struct sk_buff, data));
406}
407
408/*
409 * Emit function prologue 384 * Emit function prologue
410 * 385 *
411 * Save registers and create stack frame if necessary. 386 * Save registers and create stack frame if necessary.
@@ -445,12 +420,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
445 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, 420 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
446 REG_15, 152); 421 REG_15, 152);
447 } 422 }
448 if (jit->seen & SEEN_SKB) {
449 emit_load_skb_data_hlen(jit);
450 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
451 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
452 STK_OFF_SKBP);
453 }
454} 423}
455 424
456/* 425/*
@@ -483,12 +452,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
483{ 452{
484 struct bpf_insn *insn = &fp->insnsi[i]; 453 struct bpf_insn *insn = &fp->insnsi[i];
485 int jmp_off, last, insn_count = 1; 454 int jmp_off, last, insn_count = 1;
486 unsigned int func_addr, mask;
487 u32 dst_reg = insn->dst_reg; 455 u32 dst_reg = insn->dst_reg;
488 u32 src_reg = insn->src_reg; 456 u32 src_reg = insn->src_reg;
489 u32 *addrs = jit->addrs; 457 u32 *addrs = jit->addrs;
490 s32 imm = insn->imm; 458 s32 imm = insn->imm;
491 s16 off = insn->off; 459 s16 off = insn->off;
460 unsigned int mask;
492 461
493 if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) 462 if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
494 jit->seen |= SEEN_REG_AX; 463 jit->seen |= SEEN_REG_AX;
@@ -970,13 +939,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
970 EMIT2(0x0d00, REG_14, REG_W1); 939 EMIT2(0x0d00, REG_14, REG_W1);
971 /* lgr %b0,%r2: load return value into %b0 */ 940 /* lgr %b0,%r2: load return value into %b0 */
972 EMIT4(0xb9040000, BPF_REG_0, REG_2); 941 EMIT4(0xb9040000, BPF_REG_0, REG_2);
973 if ((jit->seen & SEEN_SKB) &&
974 bpf_helper_changes_pkt_data((void *)func)) {
975 /* lg %b1,ST_OFF_SKBP(%r15) */
976 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
977 REG_15, STK_OFF_SKBP);
978 emit_load_skb_data_hlen(jit);
979 }
980 break; 942 break;
981 } 943 }
982 case BPF_JMP | BPF_TAIL_CALL: 944 case BPF_JMP | BPF_TAIL_CALL:
@@ -1176,73 +1138,6 @@ branch_oc:
1176 jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); 1138 jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
1177 EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); 1139 EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
1178 break; 1140 break;
1179 /*
1180 * BPF_LD
1181 */
1182 case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
1183 case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
1184 if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
1185 func_addr = __pa(sk_load_byte_pos);
1186 else
1187 func_addr = __pa(sk_load_byte);
1188 goto call_fn;
1189 case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
1190 case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
1191 if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
1192 func_addr = __pa(sk_load_half_pos);
1193 else
1194 func_addr = __pa(sk_load_half);
1195 goto call_fn;
1196 case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
1197 case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
1198 if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
1199 func_addr = __pa(sk_load_word_pos);
1200 else
1201 func_addr = __pa(sk_load_word);
1202 goto call_fn;
1203call_fn:
1204 jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
1205 REG_SET_SEEN(REG_14); /* Return address of possible func call */
1206
1207 /*
1208 * Implicit input:
1209 * BPF_REG_6 (R7) : skb pointer
1210 * REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)
1211 *
1212 * Calculated input:
1213 * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb
1214 * BPF_REG_5 (R6) : return address
1215 *
1216 * Output:
1217 * BPF_REG_0 (R14): data read from skb
1218 *
1219 * Scratch registers (BPF_REG_1-5)
1220 */
1221
1222 /* Call function: llilf %w1,func_addr */
1223 EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
1224
1225 /* Offset: lgfi %b2,imm */
1226 EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
1227 if (BPF_MODE(insn->code) == BPF_IND)
1228 /* agfr %b2,%src (%src is s32 here) */
1229 EMIT4(0xb9180000, BPF_REG_2, src_reg);
1230
1231 /* Reload REG_SKB_DATA if BPF_REG_AX is used */
1232 if (jit->seen & SEEN_REG_AX)
1233 /* lg %skb_data,data_off(%b6) */
1234 EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
1235 BPF_REG_6, offsetof(struct sk_buff, data));
1236 /* basr %b5,%w1 (%b5 is call saved) */
1237 EMIT2(0x0d00, BPF_REG_5, REG_W1);
1238
1239 /*
1240 * Note: For fast access we jump directly after the
1241 * jnz instruction from bpf_jit.S
1242 */
1243 /* jnz <ret0> */
1244 EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
1245 break;
1246 default: /* too complex, give up */ 1141 default: /* too complex, give up */
1247 pr_err("Unknown opcode %02x\n", insn->code); 1142 pr_err("Unknown opcode %02x\n", insn->code);
1248 return -1; 1143 return -1;
diff --git a/arch/sparc/net/Makefile b/arch/sparc/net/Makefile
index 76fa8e95b721..d32aac3a25b8 100644
--- a/arch/sparc/net/Makefile
+++ b/arch/sparc/net/Makefile
@@ -1,4 +1,7 @@
1# 1#
2# Arch-specific network modules 2# Arch-specific network modules
3# 3#
4obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o 4obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o
5ifeq ($(BITS),32)
6obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o
7endif
diff --git a/arch/sparc/net/bpf_jit_64.h b/arch/sparc/net/bpf_jit_64.h
index 428f7fd19175..fbc836f1c51c 100644
--- a/arch/sparc/net/bpf_jit_64.h
+++ b/arch/sparc/net/bpf_jit_64.h
@@ -33,35 +33,6 @@
33#define I5 0x1d 33#define I5 0x1d
34#define FP 0x1e 34#define FP 0x1e
35#define I7 0x1f 35#define I7 0x1f
36
37#define r_SKB L0
38#define r_HEADLEN L4
39#define r_SKB_DATA L5
40#define r_TMP G1
41#define r_TMP2 G3
42
43/* assembly code in arch/sparc/net/bpf_jit_asm_64.S */
44extern u32 bpf_jit_load_word[];
45extern u32 bpf_jit_load_half[];
46extern u32 bpf_jit_load_byte[];
47extern u32 bpf_jit_load_byte_msh[];
48extern u32 bpf_jit_load_word_positive_offset[];
49extern u32 bpf_jit_load_half_positive_offset[];
50extern u32 bpf_jit_load_byte_positive_offset[];
51extern u32 bpf_jit_load_byte_msh_positive_offset[];
52extern u32 bpf_jit_load_word_negative_offset[];
53extern u32 bpf_jit_load_half_negative_offset[];
54extern u32 bpf_jit_load_byte_negative_offset[];
55extern u32 bpf_jit_load_byte_msh_negative_offset[];
56
57#else
58#define r_RESULT %o0
59#define r_SKB %o0
60#define r_OFF %o1
61#define r_HEADLEN %l4
62#define r_SKB_DATA %l5
63#define r_TMP %g1
64#define r_TMP2 %g3
65#endif 36#endif
66 37
67#endif /* _BPF_JIT_H */ 38#endif /* _BPF_JIT_H */
diff --git a/arch/sparc/net/bpf_jit_asm_64.S b/arch/sparc/net/bpf_jit_asm_64.S
deleted file mode 100644
index 7177867052a1..000000000000
--- a/arch/sparc/net/bpf_jit_asm_64.S
+++ /dev/null
@@ -1,162 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <asm/ptrace.h>
3
4#include "bpf_jit_64.h"
5
6#define SAVE_SZ 176
7#define SCRATCH_OFF STACK_BIAS + 128
8#define BE_PTR(label) be,pn %xcc, label
9#define SIGN_EXTEND(reg) sra reg, 0, reg
10
11#define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */
12
13 .text
14 .globl bpf_jit_load_word
15bpf_jit_load_word:
16 cmp r_OFF, 0
17 bl bpf_slow_path_word_neg
18 nop
19 .globl bpf_jit_load_word_positive_offset
20bpf_jit_load_word_positive_offset:
21 sub r_HEADLEN, r_OFF, r_TMP
22 cmp r_TMP, 3
23 ble bpf_slow_path_word
24 add r_SKB_DATA, r_OFF, r_TMP
25 andcc r_TMP, 3, %g0
26 bne load_word_unaligned
27 nop
28 retl
29 ld [r_TMP], r_RESULT
30load_word_unaligned:
31 ldub [r_TMP + 0x0], r_OFF
32 ldub [r_TMP + 0x1], r_TMP2
33 sll r_OFF, 8, r_OFF
34 or r_OFF, r_TMP2, r_OFF
35 ldub [r_TMP + 0x2], r_TMP2
36 sll r_OFF, 8, r_OFF
37 or r_OFF, r_TMP2, r_OFF
38 ldub [r_TMP + 0x3], r_TMP2
39 sll r_OFF, 8, r_OFF
40 retl
41 or r_OFF, r_TMP2, r_RESULT
42
43 .globl bpf_jit_load_half
44bpf_jit_load_half:
45 cmp r_OFF, 0
46 bl bpf_slow_path_half_neg
47 nop
48 .globl bpf_jit_load_half_positive_offset
49bpf_jit_load_half_positive_offset:
50 sub r_HEADLEN, r_OFF, r_TMP
51 cmp r_TMP, 1
52 ble bpf_slow_path_half
53 add r_SKB_DATA, r_OFF, r_TMP
54 andcc r_TMP, 1, %g0
55 bne load_half_unaligned
56 nop
57 retl
58 lduh [r_TMP], r_RESULT
59load_half_unaligned:
60 ldub [r_TMP + 0x0], r_OFF
61 ldub [r_TMP + 0x1], r_TMP2
62 sll r_OFF, 8, r_OFF
63 retl
64 or r_OFF, r_TMP2, r_RESULT
65
66 .globl bpf_jit_load_byte
67bpf_jit_load_byte:
68 cmp r_OFF, 0
69 bl bpf_slow_path_byte_neg
70 nop
71 .globl bpf_jit_load_byte_positive_offset
72bpf_jit_load_byte_positive_offset:
73 cmp r_OFF, r_HEADLEN
74 bge bpf_slow_path_byte
75 nop
76 retl
77 ldub [r_SKB_DATA + r_OFF], r_RESULT
78
79#define bpf_slow_path_common(LEN) \
80 save %sp, -SAVE_SZ, %sp; \
81 mov %i0, %o0; \
82 mov %i1, %o1; \
83 add %fp, SCRATCH_OFF, %o2; \
84 call skb_copy_bits; \
85 mov (LEN), %o3; \
86 cmp %o0, 0; \
87 restore;
88
89bpf_slow_path_word:
90 bpf_slow_path_common(4)
91 bl bpf_error
92 ld [%sp + SCRATCH_OFF], r_RESULT
93 retl
94 nop
95bpf_slow_path_half:
96 bpf_slow_path_common(2)
97 bl bpf_error
98 lduh [%sp + SCRATCH_OFF], r_RESULT
99 retl
100 nop
101bpf_slow_path_byte:
102 bpf_slow_path_common(1)
103 bl bpf_error
104 ldub [%sp + SCRATCH_OFF], r_RESULT
105 retl
106 nop
107
108#define bpf_negative_common(LEN) \
109 save %sp, -SAVE_SZ, %sp; \
110 mov %i0, %o0; \
111 mov %i1, %o1; \
112 SIGN_EXTEND(%o1); \
113 call bpf_internal_load_pointer_neg_helper; \
114 mov (LEN), %o2; \
115 mov %o0, r_TMP; \
116 cmp %o0, 0; \
117 BE_PTR(bpf_error); \
118 restore;
119
120bpf_slow_path_word_neg:
121 sethi %hi(SKF_MAX_NEG_OFF), r_TMP
122 cmp r_OFF, r_TMP
123 bl bpf_error
124 nop
125 .globl bpf_jit_load_word_negative_offset
126bpf_jit_load_word_negative_offset:
127 bpf_negative_common(4)
128 andcc r_TMP, 3, %g0
129 bne load_word_unaligned
130 nop
131 retl
132 ld [r_TMP], r_RESULT
133
134bpf_slow_path_half_neg:
135 sethi %hi(SKF_MAX_NEG_OFF), r_TMP
136 cmp r_OFF, r_TMP
137 bl bpf_error
138 nop
139 .globl bpf_jit_load_half_negative_offset
140bpf_jit_load_half_negative_offset:
141 bpf_negative_common(2)
142 andcc r_TMP, 1, %g0
143 bne load_half_unaligned
144 nop
145 retl
146 lduh [r_TMP], r_RESULT
147
148bpf_slow_path_byte_neg:
149 sethi %hi(SKF_MAX_NEG_OFF), r_TMP
150 cmp r_OFF, r_TMP
151 bl bpf_error
152 nop
153 .globl bpf_jit_load_byte_negative_offset
154bpf_jit_load_byte_negative_offset:
155 bpf_negative_common(1)
156 retl
157 ldub [r_TMP], r_RESULT
158
159bpf_error:
160 /* Make the JIT program itself return zero. */
161 ret
162 restore %g0, %g0, %o0
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 48a25869349b..9f5918e0693a 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -48,10 +48,6 @@ static void bpf_flush_icache(void *start_, void *end_)
48 } 48 }
49} 49}
50 50
51#define SEEN_DATAREF 1 /* might call external helpers */
52#define SEEN_XREG 2 /* ebx is used */
53#define SEEN_MEM 4 /* use mem[] for temporary storage */
54
55#define S13(X) ((X) & 0x1fff) 51#define S13(X) ((X) & 0x1fff)
56#define S5(X) ((X) & 0x1f) 52#define S5(X) ((X) & 0x1f)
57#define IMMED 0x00002000 53#define IMMED 0x00002000
@@ -198,7 +194,6 @@ struct jit_ctx {
198 bool tmp_1_used; 194 bool tmp_1_used;
199 bool tmp_2_used; 195 bool tmp_2_used;
200 bool tmp_3_used; 196 bool tmp_3_used;
201 bool saw_ld_abs_ind;
202 bool saw_frame_pointer; 197 bool saw_frame_pointer;
203 bool saw_call; 198 bool saw_call;
204 bool saw_tail_call; 199 bool saw_tail_call;
@@ -207,9 +202,7 @@ struct jit_ctx {
207 202
208#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 203#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
209#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 204#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
210#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 2) 205#define TMP_REG_3 (MAX_BPF_JIT_REG + 2)
211#define SKB_DATA_REG (MAX_BPF_JIT_REG + 3)
212#define TMP_REG_3 (MAX_BPF_JIT_REG + 4)
213 206
214/* Map BPF registers to SPARC registers */ 207/* Map BPF registers to SPARC registers */
215static const int bpf2sparc[] = { 208static const int bpf2sparc[] = {
@@ -238,9 +231,6 @@ static const int bpf2sparc[] = {
238 [TMP_REG_1] = G1, 231 [TMP_REG_1] = G1,
239 [TMP_REG_2] = G2, 232 [TMP_REG_2] = G2,
240 [TMP_REG_3] = G3, 233 [TMP_REG_3] = G3,
241
242 [SKB_HLEN_REG] = L4,
243 [SKB_DATA_REG] = L5,
244}; 234};
245 235
246static void emit(const u32 insn, struct jit_ctx *ctx) 236static void emit(const u32 insn, struct jit_ctx *ctx)
@@ -800,25 +790,6 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
800 return 0; 790 return 0;
801} 791}
802 792
803static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb)
804{
805 const u8 r_headlen = bpf2sparc[SKB_HLEN_REG];
806 const u8 r_data = bpf2sparc[SKB_DATA_REG];
807 const u8 r_tmp = bpf2sparc[TMP_REG_1];
808 unsigned int off;
809
810 off = offsetof(struct sk_buff, len);
811 emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx);
812
813 off = offsetof(struct sk_buff, data_len);
814 emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx);
815
816 emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx);
817
818 off = offsetof(struct sk_buff, data);
819 emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx);
820}
821
822/* Just skip the save instruction and the ctx register move. */ 793/* Just skip the save instruction and the ctx register move. */
823#define BPF_TAILCALL_PROLOGUE_SKIP 16 794#define BPF_TAILCALL_PROLOGUE_SKIP 16
824#define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) 795#define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128)
@@ -857,9 +828,6 @@ static void build_prologue(struct jit_ctx *ctx)
857 828
858 emit_reg_move(I0, O0, ctx); 829 emit_reg_move(I0, O0, ctx);
859 /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ 830 /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
860
861 if (ctx->saw_ld_abs_ind)
862 load_skb_regs(ctx, bpf2sparc[BPF_REG_1]);
863} 831}
864 832
865static void build_epilogue(struct jit_ctx *ctx) 833static void build_epilogue(struct jit_ctx *ctx)
@@ -1225,16 +1193,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1225 u8 *func = ((u8 *)__bpf_call_base) + imm; 1193 u8 *func = ((u8 *)__bpf_call_base) + imm;
1226 1194
1227 ctx->saw_call = true; 1195 ctx->saw_call = true;
1228 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1229 emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
1230 1196
1231 emit_call((u32 *)func, ctx); 1197 emit_call((u32 *)func, ctx);
1232 emit_nop(ctx); 1198 emit_nop(ctx);
1233 1199
1234 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); 1200 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1235
1236 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
1237 load_skb_regs(ctx, L7);
1238 break; 1201 break;
1239 } 1202 }
1240 1203
@@ -1412,43 +1375,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1412 emit_nop(ctx); 1375 emit_nop(ctx);
1413 break; 1376 break;
1414 } 1377 }
1415#define CHOOSE_LOAD_FUNC(K, func) \
1416 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
1417
1418 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
1419 case BPF_LD | BPF_ABS | BPF_W:
1420 func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word);
1421 goto common_load;
1422 case BPF_LD | BPF_ABS | BPF_H:
1423 func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half);
1424 goto common_load;
1425 case BPF_LD | BPF_ABS | BPF_B:
1426 func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte);
1427 goto common_load;
1428 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
1429 case BPF_LD | BPF_IND | BPF_W:
1430 func = bpf_jit_load_word;
1431 goto common_load;
1432 case BPF_LD | BPF_IND | BPF_H:
1433 func = bpf_jit_load_half;
1434 goto common_load;
1435
1436 case BPF_LD | BPF_IND | BPF_B:
1437 func = bpf_jit_load_byte;
1438 common_load:
1439 ctx->saw_ld_abs_ind = true;
1440
1441 emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx);
1442 emit_loadimm(imm, O1, ctx);
1443
1444 if (BPF_MODE(code) == BPF_IND)
1445 emit_alu(ADD, src, O1, ctx);
1446
1447 emit_call(func, ctx);
1448 emit_alu_K(SRA, O1, 0, ctx);
1449
1450 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
1451 break;
1452 1378
1453 default: 1379 default:
1454 pr_err_once("unknown opcode %02x\n", code); 1380 pr_err_once("unknown opcode %02x\n", code);
@@ -1583,12 +1509,11 @@ skip_init_ctx:
1583 build_epilogue(&ctx); 1509 build_epilogue(&ctx);
1584 1510
1585 if (bpf_jit_enable > 1) 1511 if (bpf_jit_enable > 1)
1586 pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass, 1512 pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
1587 image_size - (ctx.idx * 4), 1513 image_size - (ctx.idx * 4),
1588 ctx.tmp_1_used ? '1' : ' ', 1514 ctx.tmp_1_used ? '1' : ' ',
1589 ctx.tmp_2_used ? '2' : ' ', 1515 ctx.tmp_2_used ? '2' : ' ',
1590 ctx.tmp_3_used ? '3' : ' ', 1516 ctx.tmp_3_used ? '3' : ' ',
1591 ctx.saw_ld_abs_ind ? 'L' : ' ',
1592 ctx.saw_frame_pointer ? 'F' : ' ', 1517 ctx.saw_frame_pointer ? 'F' : ' ',
1593 ctx.saw_call ? 'C' : ' ', 1518 ctx.saw_call ? 'C' : ' ',
1594 ctx.saw_tail_call ? 'T' : ' '); 1519 ctx.saw_tail_call ? 'T' : ' ');
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c07f492b871a..d51a71dcbac2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -138,7 +138,7 @@ config X86
138 select HAVE_DMA_CONTIGUOUS 138 select HAVE_DMA_CONTIGUOUS
139 select HAVE_DYNAMIC_FTRACE 139 select HAVE_DYNAMIC_FTRACE
140 select HAVE_DYNAMIC_FTRACE_WITH_REGS 140 select HAVE_DYNAMIC_FTRACE_WITH_REGS
141 select HAVE_EBPF_JIT if X86_64 141 select HAVE_EBPF_JIT
142 select HAVE_EFFICIENT_UNALIGNED_ACCESS 142 select HAVE_EFFICIENT_UNALIGNED_ACCESS
143 select HAVE_EXIT_THREAD 143 select HAVE_EXIT_THREAD
144 select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE 144 select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f928ad9b143f..2cd344d1a6e5 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -291,16 +291,20 @@ do { \
291 * lfence 291 * lfence
292 * jmp spec_trap 292 * jmp spec_trap
293 * do_rop: 293 * do_rop:
294 * mov %rax,(%rsp) 294 * mov %rax,(%rsp) for x86_64
295 * mov %edx,(%esp) for x86_32
295 * retq 296 * retq
296 * 297 *
297 * Without retpolines configured: 298 * Without retpolines configured:
298 * 299 *
299 * jmp *%rax 300 * jmp *%rax for x86_64
301 * jmp *%edx for x86_32
300 */ 302 */
301#ifdef CONFIG_RETPOLINE 303#ifdef CONFIG_RETPOLINE
304#ifdef CONFIG_X86_64
302# define RETPOLINE_RAX_BPF_JIT_SIZE 17 305# define RETPOLINE_RAX_BPF_JIT_SIZE 17
303# define RETPOLINE_RAX_BPF_JIT() \ 306# define RETPOLINE_RAX_BPF_JIT() \
307do { \
304 EMIT1_off32(0xE8, 7); /* callq do_rop */ \ 308 EMIT1_off32(0xE8, 7); /* callq do_rop */ \
305 /* spec_trap: */ \ 309 /* spec_trap: */ \
306 EMIT2(0xF3, 0x90); /* pause */ \ 310 EMIT2(0xF3, 0x90); /* pause */ \
@@ -308,11 +312,31 @@ do { \
308 EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ 312 EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
309 /* do_rop: */ \ 313 /* do_rop: */ \
310 EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ 314 EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
311 EMIT1(0xC3); /* retq */ 315 EMIT1(0xC3); /* retq */ \
316} while (0)
312#else 317#else
318# define RETPOLINE_EDX_BPF_JIT() \
319do { \
320 EMIT1_off32(0xE8, 7); /* call do_rop */ \
321 /* spec_trap: */ \
322 EMIT2(0xF3, 0x90); /* pause */ \
323 EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
324 EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
325 /* do_rop: */ \
326 EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
327 EMIT1(0xC3); /* ret */ \
328} while (0)
329#endif
330#else /* !CONFIG_RETPOLINE */
331
332#ifdef CONFIG_X86_64
313# define RETPOLINE_RAX_BPF_JIT_SIZE 2 333# define RETPOLINE_RAX_BPF_JIT_SIZE 2
314# define RETPOLINE_RAX_BPF_JIT() \ 334# define RETPOLINE_RAX_BPF_JIT() \
315 EMIT2(0xFF, 0xE0); /* jmp *%rax */ 335 EMIT2(0xFF, 0xE0); /* jmp *%rax */
336#else
337# define RETPOLINE_EDX_BPF_JIT() \
338 EMIT2(0xFF, 0xE2) /* jmp *%edx */
339#endif
316#endif 340#endif
317 341
318#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ 342#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index fefb4b619598..59e123da580c 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -1,6 +1,9 @@
1# 1#
2# Arch-specific network modules 2# Arch-specific network modules
3# 3#
4OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
5 4
6obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o 5ifeq ($(CONFIG_X86_32),y)
6 obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
7else
8 obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
9endif
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
deleted file mode 100644
index b33093f84528..000000000000
--- a/arch/x86/net/bpf_jit.S
+++ /dev/null
@@ -1,154 +0,0 @@
1/* bpf_jit.S : BPF JIT helper functions
2 *
3 * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
8 * of the License.
9 */
10#include <linux/linkage.h>
11#include <asm/frame.h>
12
13/*
14 * Calling convention :
15 * rbx : skb pointer (callee saved)
16 * esi : offset of byte(s) to fetch in skb (can be scratched)
17 * r10 : copy of skb->data
18 * r9d : hlen = skb->len - skb->data_len
19 */
20#define SKBDATA %r10
21#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
22
23#define FUNC(name) \
24 .globl name; \
25 .type name, @function; \
26 name:
27
28FUNC(sk_load_word)
29 test %esi,%esi
30 js bpf_slow_path_word_neg
31
32FUNC(sk_load_word_positive_offset)
33 mov %r9d,%eax # hlen
34 sub %esi,%eax # hlen - offset
35 cmp $3,%eax
36 jle bpf_slow_path_word
37 mov (SKBDATA,%rsi),%eax
38 bswap %eax /* ntohl() */
39 ret
40
41FUNC(sk_load_half)
42 test %esi,%esi
43 js bpf_slow_path_half_neg
44
45FUNC(sk_load_half_positive_offset)
46 mov %r9d,%eax
47 sub %esi,%eax # hlen - offset
48 cmp $1,%eax
49 jle bpf_slow_path_half
50 movzwl (SKBDATA,%rsi),%eax
51 rol $8,%ax # ntohs()
52 ret
53
54FUNC(sk_load_byte)
55 test %esi,%esi
56 js bpf_slow_path_byte_neg
57
58FUNC(sk_load_byte_positive_offset)
59 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
60 jle bpf_slow_path_byte
61 movzbl (SKBDATA,%rsi),%eax
62 ret
63
64/* rsi contains offset and can be scratched */
65#define bpf_slow_path_common(LEN) \
66 lea 32(%rbp), %rdx;\
67 FRAME_BEGIN; \
68 mov %rbx, %rdi; /* arg1 == skb */ \
69 push %r9; \
70 push SKBDATA; \
71/* rsi already has offset */ \
72 mov $LEN,%ecx; /* len */ \
73 call skb_copy_bits; \
74 test %eax,%eax; \
75 pop SKBDATA; \
76 pop %r9; \
77 FRAME_END
78
79
80bpf_slow_path_word:
81 bpf_slow_path_common(4)
82 js bpf_error
83 mov 32(%rbp),%eax
84 bswap %eax
85 ret
86
87bpf_slow_path_half:
88 bpf_slow_path_common(2)
89 js bpf_error
90 mov 32(%rbp),%ax
91 rol $8,%ax
92 movzwl %ax,%eax
93 ret
94
95bpf_slow_path_byte:
96 bpf_slow_path_common(1)
97 js bpf_error
98 movzbl 32(%rbp),%eax
99 ret
100
101#define sk_negative_common(SIZE) \
102 FRAME_BEGIN; \
103 mov %rbx, %rdi; /* arg1 == skb */ \
104 push %r9; \
105 push SKBDATA; \
106/* rsi already has offset */ \
107 mov $SIZE,%edx; /* size */ \
108 call bpf_internal_load_pointer_neg_helper; \
109 test %rax,%rax; \
110 pop SKBDATA; \
111 pop %r9; \
112 FRAME_END; \
113 jz bpf_error
114
115bpf_slow_path_word_neg:
116 cmp SKF_MAX_NEG_OFF, %esi /* test range */
117 jl bpf_error /* offset lower -> error */
118
119FUNC(sk_load_word_negative_offset)
120 sk_negative_common(4)
121 mov (%rax), %eax
122 bswap %eax
123 ret
124
125bpf_slow_path_half_neg:
126 cmp SKF_MAX_NEG_OFF, %esi
127 jl bpf_error
128
129FUNC(sk_load_half_negative_offset)
130 sk_negative_common(2)
131 mov (%rax),%ax
132 rol $8,%ax
133 movzwl %ax,%eax
134 ret
135
136bpf_slow_path_byte_neg:
137 cmp SKF_MAX_NEG_OFF, %esi
138 jl bpf_error
139
140FUNC(sk_load_byte_negative_offset)
141 sk_negative_common(1)
142 movzbl (%rax), %eax
143 ret
144
145bpf_error:
146# force a return 0 from jit handler
147 xor %eax,%eax
148 mov (%rbp),%rbx
149 mov 8(%rbp),%r13
150 mov 16(%rbp),%r14
151 mov 24(%rbp),%r15
152 add $40, %rbp
153 leaveq
154 ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 263c8453815e..8fca446aaef6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,4 +1,5 @@
1/* bpf_jit_comp.c : BPF JIT compiler 1/*
2 * bpf_jit_comp.c: BPF JIT compiler
2 * 3 *
3 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) 4 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
4 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 5 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
@@ -16,15 +17,6 @@
16#include <asm/set_memory.h> 17#include <asm/set_memory.h>
17#include <asm/nospec-branch.h> 18#include <asm/nospec-branch.h>
18 19
19/*
20 * assembly code in arch/x86/net/bpf_jit.S
21 */
22extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
23extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
24extern u8 sk_load_byte_positive_offset[];
25extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
26extern u8 sk_load_byte_negative_offset[];
27
28static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 20static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
29{ 21{
30 if (len == 1) 22 if (len == 1)
@@ -45,14 +37,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
45#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) 37#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
46#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) 38#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
47#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) 39#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
40
48#define EMIT1_off32(b1, off) \ 41#define EMIT1_off32(b1, off) \
49 do {EMIT1(b1); EMIT(off, 4); } while (0) 42 do { EMIT1(b1); EMIT(off, 4); } while (0)
50#define EMIT2_off32(b1, b2, off) \ 43#define EMIT2_off32(b1, b2, off) \
51 do {EMIT2(b1, b2); EMIT(off, 4); } while (0) 44 do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
52#define EMIT3_off32(b1, b2, b3, off) \ 45#define EMIT3_off32(b1, b2, b3, off) \
53 do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) 46 do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
54#define EMIT4_off32(b1, b2, b3, b4, off) \ 47#define EMIT4_off32(b1, b2, b3, b4, off) \
55 do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) 48 do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
56 49
57static bool is_imm8(int value) 50static bool is_imm8(int value)
58{ 51{
@@ -70,9 +63,10 @@ static bool is_uimm32(u64 value)
70} 63}
71 64
72/* mov dst, src */ 65/* mov dst, src */
73#define EMIT_mov(DST, SRC) \ 66#define EMIT_mov(DST, SRC) \
74 do {if (DST != SRC) \ 67 do { \
75 EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ 68 if (DST != SRC) \
69 EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
76 } while (0) 70 } while (0)
77 71
78static int bpf_size_to_x86_bytes(int bpf_size) 72static int bpf_size_to_x86_bytes(int bpf_size)
@@ -89,7 +83,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
89 return 0; 83 return 0;
90} 84}
91 85
92/* list of x86 cond jumps opcodes (. + s8) 86/*
87 * List of x86 cond jumps opcodes (. + s8)
93 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) 88 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
94 */ 89 */
95#define X86_JB 0x72 90#define X86_JB 0x72
@@ -103,38 +98,37 @@ static int bpf_size_to_x86_bytes(int bpf_size)
103#define X86_JLE 0x7E 98#define X86_JLE 0x7E
104#define X86_JG 0x7F 99#define X86_JG 0x7F
105 100
106#define CHOOSE_LOAD_FUNC(K, func) \ 101/* Pick a register outside of BPF range for JIT internal work */
107 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
108
109/* pick a register outside of BPF range for JIT internal work */
110#define AUX_REG (MAX_BPF_JIT_REG + 1) 102#define AUX_REG (MAX_BPF_JIT_REG + 1)
111 103
112/* The following table maps BPF registers to x64 registers. 104/*
105 * The following table maps BPF registers to x86-64 registers.
113 * 106 *
114 * x64 register r12 is unused, since if used as base address 107 * x86-64 register R12 is unused, since if used as base address
115 * register in load/store instructions, it always needs an 108 * register in load/store instructions, it always needs an
116 * extra byte of encoding and is callee saved. 109 * extra byte of encoding and is callee saved.
117 * 110 *
118 * r9 caches skb->len - skb->data_len 111 * Also x86-64 register R9 is unused. x86-64 register R10 is
119 * r10 caches skb->data, and used for blinding (if enabled) 112 * used for blinding (if enabled).
120 */ 113 */
121static const int reg2hex[] = { 114static const int reg2hex[] = {
122 [BPF_REG_0] = 0, /* rax */ 115 [BPF_REG_0] = 0, /* RAX */
123 [BPF_REG_1] = 7, /* rdi */ 116 [BPF_REG_1] = 7, /* RDI */
124 [BPF_REG_2] = 6, /* rsi */ 117 [BPF_REG_2] = 6, /* RSI */
125 [BPF_REG_3] = 2, /* rdx */ 118 [BPF_REG_3] = 2, /* RDX */
126 [BPF_REG_4] = 1, /* rcx */ 119 [BPF_REG_4] = 1, /* RCX */
127 [BPF_REG_5] = 0, /* r8 */ 120 [BPF_REG_5] = 0, /* R8 */
128 [BPF_REG_6] = 3, /* rbx callee saved */ 121 [BPF_REG_6] = 3, /* RBX callee saved */
129 [BPF_REG_7] = 5, /* r13 callee saved */ 122 [BPF_REG_7] = 5, /* R13 callee saved */
130 [BPF_REG_8] = 6, /* r14 callee saved */ 123 [BPF_REG_8] = 6, /* R14 callee saved */
131 [BPF_REG_9] = 7, /* r15 callee saved */ 124 [BPF_REG_9] = 7, /* R15 callee saved */
132 [BPF_REG_FP] = 5, /* rbp readonly */ 125 [BPF_REG_FP] = 5, /* RBP readonly */
133 [BPF_REG_AX] = 2, /* r10 temp register */ 126 [BPF_REG_AX] = 2, /* R10 temp register */
134 [AUX_REG] = 3, /* r11 temp register */ 127 [AUX_REG] = 3, /* R11 temp register */
135}; 128};
136 129
137/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 130/*
131 * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
138 * which need extra byte of encoding. 132 * which need extra byte of encoding.
139 * rax,rcx,...,rbp have simpler encoding 133 * rax,rcx,...,rbp have simpler encoding
140 */ 134 */
@@ -153,7 +147,7 @@ static bool is_axreg(u32 reg)
153 return reg == BPF_REG_0; 147 return reg == BPF_REG_0;
154} 148}
155 149
156/* add modifiers if 'reg' maps to x64 registers r8..r15 */ 150/* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
157static u8 add_1mod(u8 byte, u32 reg) 151static u8 add_1mod(u8 byte, u32 reg)
158{ 152{
159 if (is_ereg(reg)) 153 if (is_ereg(reg))
@@ -170,13 +164,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
170 return byte; 164 return byte;
171} 165}
172 166
173/* encode 'dst_reg' register into x64 opcode 'byte' */ 167/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
174static u8 add_1reg(u8 byte, u32 dst_reg) 168static u8 add_1reg(u8 byte, u32 dst_reg)
175{ 169{
176 return byte + reg2hex[dst_reg]; 170 return byte + reg2hex[dst_reg];
177} 171}
178 172
179/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ 173/* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
180static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) 174static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
181{ 175{
182 return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); 176 return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
@@ -184,27 +178,24 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
184 178
185static void jit_fill_hole(void *area, unsigned int size) 179static void jit_fill_hole(void *area, unsigned int size)
186{ 180{
187 /* fill whole space with int3 instructions */ 181 /* Fill whole space with INT3 instructions */
188 memset(area, 0xcc, size); 182 memset(area, 0xcc, size);
189} 183}
190 184
191struct jit_context { 185struct jit_context {
192 int cleanup_addr; /* epilogue code offset */ 186 int cleanup_addr; /* Epilogue code offset */
193 bool seen_ld_abs;
194 bool seen_ax_reg;
195}; 187};
196 188
197/* maximum number of bytes emitted while JITing one eBPF insn */ 189/* Maximum number of bytes emitted while JITing one eBPF insn */
198#define BPF_MAX_INSN_SIZE 128 190#define BPF_MAX_INSN_SIZE 128
199#define BPF_INSN_SAFETY 64 191#define BPF_INSN_SAFETY 64
200 192
201#define AUX_STACK_SPACE \ 193#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */
202 (32 /* space for rbx, r13, r14, r15 */ + \
203 8 /* space for skb_copy_bits() buffer */)
204 194
205#define PROLOGUE_SIZE 37 195#define PROLOGUE_SIZE 37
206 196
207/* emit x64 prologue code for BPF program and check it's size. 197/*
198 * Emit x86-64 prologue code for BPF program and check its size.
208 * bpf_tail_call helper will skip it while jumping into another program 199 * bpf_tail_call helper will skip it while jumping into another program
209 */ 200 */
210static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) 201static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
@@ -212,8 +203,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
212 u8 *prog = *pprog; 203 u8 *prog = *pprog;
213 int cnt = 0; 204 int cnt = 0;
214 205
215 EMIT1(0x55); /* push rbp */ 206 /* push rbp */
216 EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ 207 EMIT1(0x55);
208
209 /* mov rbp,rsp */
210 EMIT3(0x48, 0x89, 0xE5);
217 211
218 /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ 212 /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
219 EMIT3_off32(0x48, 0x81, 0xEC, 213 EMIT3_off32(0x48, 0x81, 0xEC,
@@ -222,19 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
222 /* sub rbp, AUX_STACK_SPACE */ 216 /* sub rbp, AUX_STACK_SPACE */
223 EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); 217 EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
224 218
225 /* all classic BPF filters use R6(rbx) save it */
226
227 /* mov qword ptr [rbp+0],rbx */ 219 /* mov qword ptr [rbp+0],rbx */
228 EMIT4(0x48, 0x89, 0x5D, 0); 220 EMIT4(0x48, 0x89, 0x5D, 0);
229
230 /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
231 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
232 * R8(r14). R9(r15) spill could be made conditional, but there is only
233 * one 'bpf_error' return path out of helper functions inside bpf_jit.S
234 * The overhead of extra spill is negligible for any filter other
235 * than synthetic ones. Therefore not worth adding complexity.
236 */
237
238 /* mov qword ptr [rbp+8],r13 */ 221 /* mov qword ptr [rbp+8],r13 */
239 EMIT4(0x4C, 0x89, 0x6D, 8); 222 EMIT4(0x4C, 0x89, 0x6D, 8);
240 /* mov qword ptr [rbp+16],r14 */ 223 /* mov qword ptr [rbp+16],r14 */
@@ -243,9 +226,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
243 EMIT4(0x4C, 0x89, 0x7D, 24); 226 EMIT4(0x4C, 0x89, 0x7D, 24);
244 227
245 if (!ebpf_from_cbpf) { 228 if (!ebpf_from_cbpf) {
246 /* Clear the tail call counter (tail_call_cnt): for eBPF tail 229 /*
230 * Clear the tail call counter (tail_call_cnt): for eBPF tail
247 * calls we need to reset the counter to 0. It's done in two 231 * calls we need to reset the counter to 0. It's done in two
248 * instructions, resetting rax register to 0, and moving it 232 * instructions, resetting RAX register to 0, and moving it
249 * to the counter location. 233 * to the counter location.
250 */ 234 */
251 235
@@ -260,7 +244,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
260 *pprog = prog; 244 *pprog = prog;
261} 245}
262 246
263/* generate the following code: 247/*
248 * Generate the following code:
249 *
264 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... 250 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
265 * if (index >= array->map.max_entries) 251 * if (index >= array->map.max_entries)
266 * goto out; 252 * goto out;
@@ -278,23 +264,26 @@ static void emit_bpf_tail_call(u8 **pprog)
278 int label1, label2, label3; 264 int label1, label2, label3;
279 int cnt = 0; 265 int cnt = 0;
280 266
281 /* rdi - pointer to ctx 267 /*
268 * rdi - pointer to ctx
282 * rsi - pointer to bpf_array 269 * rsi - pointer to bpf_array
283 * rdx - index in bpf_array 270 * rdx - index in bpf_array
284 */ 271 */
285 272
286 /* if (index >= array->map.max_entries) 273 /*
287 * goto out; 274 * if (index >= array->map.max_entries)
275 * goto out;
288 */ 276 */
289 EMIT2(0x89, 0xD2); /* mov edx, edx */ 277 EMIT2(0x89, 0xD2); /* mov edx, edx */
290 EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ 278 EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
291 offsetof(struct bpf_array, map.max_entries)); 279 offsetof(struct bpf_array, map.max_entries));
292#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ 280#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
293 EMIT2(X86_JBE, OFFSET1); /* jbe out */ 281 EMIT2(X86_JBE, OFFSET1); /* jbe out */
294 label1 = cnt; 282 label1 = cnt;
295 283
296 /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) 284 /*
297 * goto out; 285 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
286 * goto out;
298 */ 287 */
299 EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ 288 EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
300 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 289 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
@@ -308,8 +297,9 @@ static void emit_bpf_tail_call(u8 **pprog)
308 EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ 297 EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
309 offsetof(struct bpf_array, ptrs)); 298 offsetof(struct bpf_array, ptrs));
310 299
311 /* if (prog == NULL) 300 /*
312 * goto out; 301 * if (prog == NULL)
302 * goto out;
313 */ 303 */
314 EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ 304 EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
315#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) 305#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
@@ -321,7 +311,8 @@ static void emit_bpf_tail_call(u8 **pprog)
321 offsetof(struct bpf_prog, bpf_func)); 311 offsetof(struct bpf_prog, bpf_func));
322 EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ 312 EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
323 313
324 /* now we're ready to jump into next BPF program 314 /*
315 * Wow we're ready to jump into next BPF program
325 * rdi == ctx (1st arg) 316 * rdi == ctx (1st arg)
326 * rax == prog->bpf_func + prologue_size 317 * rax == prog->bpf_func + prologue_size
327 */ 318 */
@@ -334,26 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog)
334 *pprog = prog; 325 *pprog = prog;
335} 326}
336 327
337
338static void emit_load_skb_data_hlen(u8 **pprog)
339{
340 u8 *prog = *pprog;
341 int cnt = 0;
342
343 /* r9d = skb->len - skb->data_len (headlen)
344 * r10 = skb->data
345 */
346 /* mov %r9d, off32(%rdi) */
347 EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
348
349 /* sub %r9d, off32(%rdi) */
350 EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
351
352 /* mov %r10, off32(%rdi) */
353 EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
354 *pprog = prog;
355}
356
357static void emit_mov_imm32(u8 **pprog, bool sign_propagate, 328static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
358 u32 dst_reg, const u32 imm32) 329 u32 dst_reg, const u32 imm32)
359{ 330{
@@ -361,7 +332,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
361 u8 b1, b2, b3; 332 u8 b1, b2, b3;
362 int cnt = 0; 333 int cnt = 0;
363 334
364 /* optimization: if imm32 is positive, use 'mov %eax, imm32' 335 /*
336 * Optimization: if imm32 is positive, use 'mov %eax, imm32'
365 * (which zero-extends imm32) to save 2 bytes. 337 * (which zero-extends imm32) to save 2 bytes.
366 */ 338 */
367 if (sign_propagate && (s32)imm32 < 0) { 339 if (sign_propagate && (s32)imm32 < 0) {
@@ -373,7 +345,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
373 goto done; 345 goto done;
374 } 346 }
375 347
376 /* optimization: if imm32 is zero, use 'xor %eax, %eax' 348 /*
349 * Optimization: if imm32 is zero, use 'xor %eax, %eax'
377 * to save 3 bytes. 350 * to save 3 bytes.
378 */ 351 */
379 if (imm32 == 0) { 352 if (imm32 == 0) {
@@ -400,7 +373,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
400 int cnt = 0; 373 int cnt = 0;
401 374
402 if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { 375 if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
403 /* For emitting plain u32, where sign bit must not be 376 /*
377 * For emitting plain u32, where sign bit must not be
404 * propagated LLVM tends to load imm64 over mov32 378 * propagated LLVM tends to load imm64 over mov32
405 * directly, so save couple of bytes by just doing 379 * directly, so save couple of bytes by just doing
406 * 'mov %eax, imm32' instead. 380 * 'mov %eax, imm32' instead.
@@ -439,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
439{ 413{
440 struct bpf_insn *insn = bpf_prog->insnsi; 414 struct bpf_insn *insn = bpf_prog->insnsi;
441 int insn_cnt = bpf_prog->len; 415 int insn_cnt = bpf_prog->len;
442 bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
443 bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
444 bool seen_exit = false; 416 bool seen_exit = false;
445 u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; 417 u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
446 int i, cnt = 0; 418 int i, cnt = 0;
@@ -450,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
450 emit_prologue(&prog, bpf_prog->aux->stack_depth, 422 emit_prologue(&prog, bpf_prog->aux->stack_depth,
451 bpf_prog_was_classic(bpf_prog)); 423 bpf_prog_was_classic(bpf_prog));
452 424
453 if (seen_ld_abs)
454 emit_load_skb_data_hlen(&prog);
455
456 for (i = 0; i < insn_cnt; i++, insn++) { 425 for (i = 0; i < insn_cnt; i++, insn++) {
457 const s32 imm32 = insn->imm; 426 const s32 imm32 = insn->imm;
458 u32 dst_reg = insn->dst_reg; 427 u32 dst_reg = insn->dst_reg;
@@ -460,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
460 u8 b2 = 0, b3 = 0; 429 u8 b2 = 0, b3 = 0;
461 s64 jmp_offset; 430 s64 jmp_offset;
462 u8 jmp_cond; 431 u8 jmp_cond;
463 bool reload_skb_data;
464 int ilen; 432 int ilen;
465 u8 *func; 433 u8 *func;
466 434
467 if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
468 ctx->seen_ax_reg = seen_ax_reg = true;
469
470 switch (insn->code) { 435 switch (insn->code) {
471 /* ALU */ 436 /* ALU */
472 case BPF_ALU | BPF_ADD | BPF_X: 437 case BPF_ALU | BPF_ADD | BPF_X:
@@ -525,7 +490,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
525 else if (is_ereg(dst_reg)) 490 else if (is_ereg(dst_reg))
526 EMIT1(add_1mod(0x40, dst_reg)); 491 EMIT1(add_1mod(0x40, dst_reg));
527 492
528 /* b3 holds 'normal' opcode, b2 short form only valid 493 /*
494 * b3 holds 'normal' opcode, b2 short form only valid
529 * in case dst is eax/rax. 495 * in case dst is eax/rax.
530 */ 496 */
531 switch (BPF_OP(insn->code)) { 497 switch (BPF_OP(insn->code)) {
@@ -593,7 +559,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
593 /* mov rax, dst_reg */ 559 /* mov rax, dst_reg */
594 EMIT_mov(BPF_REG_0, dst_reg); 560 EMIT_mov(BPF_REG_0, dst_reg);
595 561
596 /* xor edx, edx 562 /*
563 * xor edx, edx
597 * equivalent to 'xor rdx, rdx', but one byte less 564 * equivalent to 'xor rdx, rdx', but one byte less
598 */ 565 */
599 EMIT2(0x31, 0xd2); 566 EMIT2(0x31, 0xd2);
@@ -655,7 +622,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
655 } 622 }
656 break; 623 break;
657 } 624 }
658 /* shifts */ 625 /* Shifts */
659 case BPF_ALU | BPF_LSH | BPF_K: 626 case BPF_ALU | BPF_LSH | BPF_K:
660 case BPF_ALU | BPF_RSH | BPF_K: 627 case BPF_ALU | BPF_RSH | BPF_K:
661 case BPF_ALU | BPF_ARSH | BPF_K: 628 case BPF_ALU | BPF_ARSH | BPF_K:
@@ -686,7 +653,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
686 case BPF_ALU64 | BPF_RSH | BPF_X: 653 case BPF_ALU64 | BPF_RSH | BPF_X:
687 case BPF_ALU64 | BPF_ARSH | BPF_X: 654 case BPF_ALU64 | BPF_ARSH | BPF_X:
688 655
689 /* check for bad case when dst_reg == rcx */ 656 /* Check for bad case when dst_reg == rcx */
690 if (dst_reg == BPF_REG_4) { 657 if (dst_reg == BPF_REG_4) {
691 /* mov r11, dst_reg */ 658 /* mov r11, dst_reg */
692 EMIT_mov(AUX_REG, dst_reg); 659 EMIT_mov(AUX_REG, dst_reg);
@@ -724,13 +691,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
724 case BPF_ALU | BPF_END | BPF_FROM_BE: 691 case BPF_ALU | BPF_END | BPF_FROM_BE:
725 switch (imm32) { 692 switch (imm32) {
726 case 16: 693 case 16:
727 /* emit 'ror %ax, 8' to swap lower 2 bytes */ 694 /* Emit 'ror %ax, 8' to swap lower 2 bytes */
728 EMIT1(0x66); 695 EMIT1(0x66);
729 if (is_ereg(dst_reg)) 696 if (is_ereg(dst_reg))
730 EMIT1(0x41); 697 EMIT1(0x41);
731 EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); 698 EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
732 699
733 /* emit 'movzwl eax, ax' */ 700 /* Emit 'movzwl eax, ax' */
734 if (is_ereg(dst_reg)) 701 if (is_ereg(dst_reg))
735 EMIT3(0x45, 0x0F, 0xB7); 702 EMIT3(0x45, 0x0F, 0xB7);
736 else 703 else
@@ -738,7 +705,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
738 EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 705 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
739 break; 706 break;
740 case 32: 707 case 32:
741 /* emit 'bswap eax' to swap lower 4 bytes */ 708 /* Emit 'bswap eax' to swap lower 4 bytes */
742 if (is_ereg(dst_reg)) 709 if (is_ereg(dst_reg))
743 EMIT2(0x41, 0x0F); 710 EMIT2(0x41, 0x0F);
744 else 711 else
@@ -746,7 +713,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
746 EMIT1(add_1reg(0xC8, dst_reg)); 713 EMIT1(add_1reg(0xC8, dst_reg));
747 break; 714 break;
748 case 64: 715 case 64:
749 /* emit 'bswap rax' to swap 8 bytes */ 716 /* Emit 'bswap rax' to swap 8 bytes */
750 EMIT3(add_1mod(0x48, dst_reg), 0x0F, 717 EMIT3(add_1mod(0x48, dst_reg), 0x0F,
751 add_1reg(0xC8, dst_reg)); 718 add_1reg(0xC8, dst_reg));
752 break; 719 break;
@@ -756,7 +723,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
756 case BPF_ALU | BPF_END | BPF_FROM_LE: 723 case BPF_ALU | BPF_END | BPF_FROM_LE:
757 switch (imm32) { 724 switch (imm32) {
758 case 16: 725 case 16:
759 /* emit 'movzwl eax, ax' to zero extend 16-bit 726 /*
727 * Emit 'movzwl eax, ax' to zero extend 16-bit
760 * into 64 bit 728 * into 64 bit
761 */ 729 */
762 if (is_ereg(dst_reg)) 730 if (is_ereg(dst_reg))
@@ -766,7 +734,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
766 EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 734 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
767 break; 735 break;
768 case 32: 736 case 32:
769 /* emit 'mov eax, eax' to clear upper 32-bits */ 737 /* Emit 'mov eax, eax' to clear upper 32-bits */
770 if (is_ereg(dst_reg)) 738 if (is_ereg(dst_reg))
771 EMIT1(0x45); 739 EMIT1(0x45);
772 EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); 740 EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
@@ -809,9 +777,9 @@ st: if (is_imm8(insn->off))
809 777
810 /* STX: *(u8*)(dst_reg + off) = src_reg */ 778 /* STX: *(u8*)(dst_reg + off) = src_reg */
811 case BPF_STX | BPF_MEM | BPF_B: 779 case BPF_STX | BPF_MEM | BPF_B:
812 /* emit 'mov byte ptr [rax + off], al' */ 780 /* Emit 'mov byte ptr [rax + off], al' */
813 if (is_ereg(dst_reg) || is_ereg(src_reg) || 781 if (is_ereg(dst_reg) || is_ereg(src_reg) ||
814 /* have to add extra byte for x86 SIL, DIL regs */ 782 /* We have to add extra byte for x86 SIL, DIL regs */
815 src_reg == BPF_REG_1 || src_reg == BPF_REG_2) 783 src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
816 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); 784 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
817 else 785 else
@@ -840,25 +808,26 @@ stx: if (is_imm8(insn->off))
840 808
841 /* LDX: dst_reg = *(u8*)(src_reg + off) */ 809 /* LDX: dst_reg = *(u8*)(src_reg + off) */
842 case BPF_LDX | BPF_MEM | BPF_B: 810 case BPF_LDX | BPF_MEM | BPF_B:
843 /* emit 'movzx rax, byte ptr [rax + off]' */ 811 /* Emit 'movzx rax, byte ptr [rax + off]' */
844 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); 812 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
845 goto ldx; 813 goto ldx;
846 case BPF_LDX | BPF_MEM | BPF_H: 814 case BPF_LDX | BPF_MEM | BPF_H:
847 /* emit 'movzx rax, word ptr [rax + off]' */ 815 /* Emit 'movzx rax, word ptr [rax + off]' */
848 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); 816 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
849 goto ldx; 817 goto ldx;
850 case BPF_LDX | BPF_MEM | BPF_W: 818 case BPF_LDX | BPF_MEM | BPF_W:
851 /* emit 'mov eax, dword ptr [rax+0x14]' */ 819 /* Emit 'mov eax, dword ptr [rax+0x14]' */
852 if (is_ereg(dst_reg) || is_ereg(src_reg)) 820 if (is_ereg(dst_reg) || is_ereg(src_reg))
853 EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); 821 EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
854 else 822 else
855 EMIT1(0x8B); 823 EMIT1(0x8B);
856 goto ldx; 824 goto ldx;
857 case BPF_LDX | BPF_MEM | BPF_DW: 825 case BPF_LDX | BPF_MEM | BPF_DW:
858 /* emit 'mov rax, qword ptr [rax+0x14]' */ 826 /* Emit 'mov rax, qword ptr [rax+0x14]' */
859 EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); 827 EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
860ldx: /* if insn->off == 0 we can save one extra byte, but 828ldx: /*
861 * special case of x86 r13 which always needs an offset 829 * If insn->off == 0 we can save one extra byte, but
830 * special case of x86 R13 which always needs an offset
862 * is not worth the hassle 831 * is not worth the hassle
863 */ 832 */
864 if (is_imm8(insn->off)) 833 if (is_imm8(insn->off))
@@ -870,7 +839,7 @@ ldx: /* if insn->off == 0 we can save one extra byte, but
870 839
871 /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ 840 /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
872 case BPF_STX | BPF_XADD | BPF_W: 841 case BPF_STX | BPF_XADD | BPF_W:
873 /* emit 'lock add dword ptr [rax + off], eax' */ 842 /* Emit 'lock add dword ptr [rax + off], eax' */
874 if (is_ereg(dst_reg) || is_ereg(src_reg)) 843 if (is_ereg(dst_reg) || is_ereg(src_reg))
875 EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); 844 EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
876 else 845 else
@@ -889,35 +858,12 @@ xadd: if (is_imm8(insn->off))
889 case BPF_JMP | BPF_CALL: 858 case BPF_JMP | BPF_CALL:
890 func = (u8 *) __bpf_call_base + imm32; 859 func = (u8 *) __bpf_call_base + imm32;
891 jmp_offset = func - (image + addrs[i]); 860 jmp_offset = func - (image + addrs[i]);
892 if (seen_ld_abs) {
893 reload_skb_data = bpf_helper_changes_pkt_data(func);
894 if (reload_skb_data) {
895 EMIT1(0x57); /* push %rdi */
896 jmp_offset += 22; /* pop, mov, sub, mov */
897 } else {
898 EMIT2(0x41, 0x52); /* push %r10 */
899 EMIT2(0x41, 0x51); /* push %r9 */
900 /* need to adjust jmp offset, since
901 * pop %r9, pop %r10 take 4 bytes after call insn
902 */
903 jmp_offset += 4;
904 }
905 }
906 if (!imm32 || !is_simm32(jmp_offset)) { 861 if (!imm32 || !is_simm32(jmp_offset)) {
907 pr_err("unsupported bpf func %d addr %p image %p\n", 862 pr_err("unsupported BPF func %d addr %p image %p\n",
908 imm32, func, image); 863 imm32, func, image);
909 return -EINVAL; 864 return -EINVAL;
910 } 865 }
911 EMIT1_off32(0xE8, jmp_offset); 866 EMIT1_off32(0xE8, jmp_offset);
912 if (seen_ld_abs) {
913 if (reload_skb_data) {
914 EMIT1(0x5F); /* pop %rdi */
915 emit_load_skb_data_hlen(&prog);
916 } else {
917 EMIT2(0x41, 0x59); /* pop %r9 */
918 EMIT2(0x41, 0x5A); /* pop %r10 */
919 }
920 }
921 break; 867 break;
922 868
923 case BPF_JMP | BPF_TAIL_CALL: 869 case BPF_JMP | BPF_TAIL_CALL:
@@ -970,7 +916,7 @@ xadd: if (is_imm8(insn->off))
970 else 916 else
971 EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); 917 EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
972 918
973emit_cond_jmp: /* convert BPF opcode to x86 */ 919emit_cond_jmp: /* Convert BPF opcode to x86 */
974 switch (BPF_OP(insn->code)) { 920 switch (BPF_OP(insn->code)) {
975 case BPF_JEQ: 921 case BPF_JEQ:
976 jmp_cond = X86_JE; 922 jmp_cond = X86_JE;
@@ -996,22 +942,22 @@ emit_cond_jmp: /* convert BPF opcode to x86 */
996 jmp_cond = X86_JBE; 942 jmp_cond = X86_JBE;
997 break; 943 break;
998 case BPF_JSGT: 944 case BPF_JSGT:
999 /* signed '>', GT in x86 */ 945 /* Signed '>', GT in x86 */
1000 jmp_cond = X86_JG; 946 jmp_cond = X86_JG;
1001 break; 947 break;
1002 case BPF_JSLT: 948 case BPF_JSLT:
1003 /* signed '<', LT in x86 */ 949 /* Signed '<', LT in x86 */
1004 jmp_cond = X86_JL; 950 jmp_cond = X86_JL;
1005 break; 951 break;
1006 case BPF_JSGE: 952 case BPF_JSGE:
1007 /* signed '>=', GE in x86 */ 953 /* Signed '>=', GE in x86 */
1008 jmp_cond = X86_JGE; 954 jmp_cond = X86_JGE;
1009 break; 955 break;
1010 case BPF_JSLE: 956 case BPF_JSLE:
1011 /* signed '<=', LE in x86 */ 957 /* Signed '<=', LE in x86 */
1012 jmp_cond = X86_JLE; 958 jmp_cond = X86_JLE;
1013 break; 959 break;
1014 default: /* to silence gcc warning */ 960 default: /* to silence GCC warning */
1015 return -EFAULT; 961 return -EFAULT;
1016 } 962 }
1017 jmp_offset = addrs[i + insn->off] - addrs[i]; 963 jmp_offset = addrs[i + insn->off] - addrs[i];
@@ -1039,7 +985,7 @@ emit_cond_jmp: /* convert BPF opcode to x86 */
1039 jmp_offset = addrs[i + insn->off] - addrs[i]; 985 jmp_offset = addrs[i + insn->off] - addrs[i];
1040 986
1041 if (!jmp_offset) 987 if (!jmp_offset)
1042 /* optimize out nop jumps */ 988 /* Optimize out nop jumps */
1043 break; 989 break;
1044emit_jmp: 990emit_jmp:
1045 if (is_imm8(jmp_offset)) { 991 if (is_imm8(jmp_offset)) {
@@ -1052,66 +998,13 @@ emit_jmp:
1052 } 998 }
1053 break; 999 break;
1054 1000
1055 case BPF_LD | BPF_IND | BPF_W:
1056 func = sk_load_word;
1057 goto common_load;
1058 case BPF_LD | BPF_ABS | BPF_W:
1059 func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
1060common_load:
1061 ctx->seen_ld_abs = seen_ld_abs = true;
1062 jmp_offset = func - (image + addrs[i]);
1063 if (!func || !is_simm32(jmp_offset)) {
1064 pr_err("unsupported bpf func %d addr %p image %p\n",
1065 imm32, func, image);
1066 return -EINVAL;
1067 }
1068 if (BPF_MODE(insn->code) == BPF_ABS) {
1069 /* mov %esi, imm32 */
1070 EMIT1_off32(0xBE, imm32);
1071 } else {
1072 /* mov %rsi, src_reg */
1073 EMIT_mov(BPF_REG_2, src_reg);
1074 if (imm32) {
1075 if (is_imm8(imm32))
1076 /* add %esi, imm8 */
1077 EMIT3(0x83, 0xC6, imm32);
1078 else
1079 /* add %esi, imm32 */
1080 EMIT2_off32(0x81, 0xC6, imm32);
1081 }
1082 }
1083 /* skb pointer is in R6 (%rbx), it will be copied into
1084 * %rdi if skb_copy_bits() call is necessary.
1085 * sk_load_* helpers also use %r10 and %r9d.
1086 * See bpf_jit.S
1087 */
1088 if (seen_ax_reg)
1089 /* r10 = skb->data, mov %r10, off32(%rbx) */
1090 EMIT3_off32(0x4c, 0x8b, 0x93,
1091 offsetof(struct sk_buff, data));
1092 EMIT1_off32(0xE8, jmp_offset); /* call */
1093 break;
1094
1095 case BPF_LD | BPF_IND | BPF_H:
1096 func = sk_load_half;
1097 goto common_load;
1098 case BPF_LD | BPF_ABS | BPF_H:
1099 func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
1100 goto common_load;
1101 case BPF_LD | BPF_IND | BPF_B:
1102 func = sk_load_byte;
1103 goto common_load;
1104 case BPF_LD | BPF_ABS | BPF_B:
1105 func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
1106 goto common_load;
1107
1108 case BPF_JMP | BPF_EXIT: 1001 case BPF_JMP | BPF_EXIT:
1109 if (seen_exit) { 1002 if (seen_exit) {
1110 jmp_offset = ctx->cleanup_addr - addrs[i]; 1003 jmp_offset = ctx->cleanup_addr - addrs[i];
1111 goto emit_jmp; 1004 goto emit_jmp;
1112 } 1005 }
1113 seen_exit = true; 1006 seen_exit = true;
1114 /* update cleanup_addr */ 1007 /* Update cleanup_addr */
1115 ctx->cleanup_addr = proglen; 1008 ctx->cleanup_addr = proglen;
1116 /* mov rbx, qword ptr [rbp+0] */ 1009 /* mov rbx, qword ptr [rbp+0] */
1117 EMIT4(0x48, 0x8B, 0x5D, 0); 1010 EMIT4(0x48, 0x8B, 0x5D, 0);
@@ -1129,10 +1022,11 @@ common_load:
1129 break; 1022 break;
1130 1023
1131 default: 1024 default:
1132 /* By design x64 JIT should support all BPF instructions 1025 /*
1026 * By design x86-64 JIT should support all BPF instructions.
1133 * This error will be seen if new instruction was added 1027 * This error will be seen if new instruction was added
1134 * to interpreter, but not to JIT 1028 * to the interpreter, but not to the JIT, or if there is
1135 * or if there is junk in bpf_prog 1029 * junk in bpf_prog.
1136 */ 1030 */
1137 pr_err("bpf_jit: unknown opcode %02x\n", insn->code); 1031 pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
1138 return -EINVAL; 1032 return -EINVAL;
@@ -1184,7 +1078,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1184 return orig_prog; 1078 return orig_prog;
1185 1079
1186 tmp = bpf_jit_blind_constants(prog); 1080 tmp = bpf_jit_blind_constants(prog);
1187 /* If blinding was requested and we failed during blinding, 1081 /*
1082 * If blinding was requested and we failed during blinding,
1188 * we must fall back to the interpreter. 1083 * we must fall back to the interpreter.
1189 */ 1084 */
1190 if (IS_ERR(tmp)) 1085 if (IS_ERR(tmp))
@@ -1218,8 +1113,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1218 goto out_addrs; 1113 goto out_addrs;
1219 } 1114 }
1220 1115
1221 /* Before first pass, make a rough estimation of addrs[] 1116 /*
1222 * each bpf instruction is translated to less than 64 bytes 1117 * Before first pass, make a rough estimation of addrs[]
1118 * each BPF instruction is translated to less than 64 bytes
1223 */ 1119 */
1224 for (proglen = 0, i = 0; i < prog->len; i++) { 1120 for (proglen = 0, i = 0; i < prog->len; i++) {
1225 proglen += 64; 1121 proglen += 64;
@@ -1228,10 +1124,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1228 ctx.cleanup_addr = proglen; 1124 ctx.cleanup_addr = proglen;
1229skip_init_addrs: 1125skip_init_addrs:
1230 1126
1231 /* JITed image shrinks with every pass and the loop iterates 1127 /*
1232 * until the image stops shrinking. Very large bpf programs 1128 * JITed image shrinks with every pass and the loop iterates
1129 * until the image stops shrinking. Very large BPF programs
1233 * may converge on the last pass. In such case do one more 1130 * may converge on the last pass. In such case do one more
1234 * pass to emit the final image 1131 * pass to emit the final image.
1235 */ 1132 */
1236 for (pass = 0; pass < 20 || image; pass++) { 1133 for (pass = 0; pass < 20 || image; pass++) {
1237 proglen = do_jit(prog, addrs, image, oldproglen, &ctx); 1134 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
new file mode 100644
index 000000000000..0cc04e30adc1
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -0,0 +1,2419 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
4 *
5 * Author: Wang YanQing (udknight@gmail.com)
6 * The code based on code and ideas from:
7 * Eric Dumazet (eric.dumazet@gmail.com)
8 * and from:
9 * Shubham Bansal <illusionist.neo@gmail.com>
10 */
11
12#include <linux/netdevice.h>
13#include <linux/filter.h>
14#include <linux/if_vlan.h>
15#include <asm/cacheflush.h>
16#include <asm/set_memory.h>
17#include <asm/nospec-branch.h>
18#include <linux/bpf.h>
19
20/*
21 * eBPF prog stack layout:
22 *
23 * high
24 * original ESP => +-----+
25 * | | callee saved registers
26 * +-----+
27 * | ... | eBPF JIT scratch space
28 * BPF_FP,IA32_EBP => +-----+
29 * | ... | eBPF prog stack
30 * +-----+
31 * |RSVD | JIT scratchpad
32 * current ESP => +-----+
33 * | |
34 * | ... | Function call stack
35 * | |
36 * +-----+
37 * low
38 *
39 * The callee saved registers:
40 *
41 * high
42 * original ESP => +------------------+ \
43 * | ebp | |
44 * current EBP => +------------------+ } callee saved registers
45 * | ebx,esi,edi | |
46 * +------------------+ /
47 * low
48 */
49
50static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
51{
52 if (len == 1)
53 *ptr = bytes;
54 else if (len == 2)
55 *(u16 *)ptr = bytes;
56 else {
57 *(u32 *)ptr = bytes;
58 barrier();
59 }
60 return ptr + len;
61}
62
63#define EMIT(bytes, len) \
64 do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
65
66#define EMIT1(b1) EMIT(b1, 1)
67#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
68#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
69#define EMIT4(b1, b2, b3, b4) \
70 EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
71
72#define EMIT1_off32(b1, off) \
73 do { EMIT1(b1); EMIT(off, 4); } while (0)
74#define EMIT2_off32(b1, b2, off) \
75 do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
76#define EMIT3_off32(b1, b2, b3, off) \
77 do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
78#define EMIT4_off32(b1, b2, b3, b4, off) \
79 do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
80
81#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
82
83static bool is_imm8(int value)
84{
85 return value <= 127 && value >= -128;
86}
87
88static bool is_simm32(s64 value)
89{
90 return value == (s64) (s32) value;
91}
92
93#define STACK_OFFSET(k) (k)
94#define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */
95
96#define IA32_EAX (0x0)
97#define IA32_EBX (0x3)
98#define IA32_ECX (0x1)
99#define IA32_EDX (0x2)
100#define IA32_ESI (0x6)
101#define IA32_EDI (0x7)
102#define IA32_EBP (0x5)
103#define IA32_ESP (0x4)
104
105/*
106 * List of x86 cond jumps opcodes (. + s8)
107 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
108 */
109#define IA32_JB 0x72
110#define IA32_JAE 0x73
111#define IA32_JE 0x74
112#define IA32_JNE 0x75
113#define IA32_JBE 0x76
114#define IA32_JA 0x77
115#define IA32_JL 0x7C
116#define IA32_JGE 0x7D
117#define IA32_JLE 0x7E
118#define IA32_JG 0x7F
119
120/*
121 * Map eBPF registers to IA32 32bit registers or stack scratch space.
122 *
123 * 1. All the registers, R0-R10, are mapped to scratch space on stack.
124 * 2. We need two 64 bit temp registers to do complex operations on eBPF
125 * registers.
126 * 3. For performance reason, the BPF_REG_AX for blinding constant, is
127 * mapped to real hardware register pair, IA32_ESI and IA32_EDI.
128 *
129 * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
130 * registers, we have to map each eBPF registers with two IA32 32 bit regs
131 * or scratch memory space and we have to build eBPF 64 bit register from those.
132 *
133 * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
134 */
135static const u8 bpf2ia32[][2] = {
136 /* Return value from in-kernel function, and exit value from eBPF */
137 [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
138
139 /* The arguments from eBPF program to in-kernel function */
140 /* Stored on stack scratch space */
141 [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
142 [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
143 [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
144 [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
145 [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
146
147 /* Callee saved registers that in-kernel function will preserve */
148 /* Stored on stack scratch space */
149 [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
150 [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
151 [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
152 [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
153
154 /* Read only Frame Pointer to access Stack */
155 [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
156
157 /* Temporary register for blinding constants. */
158 [BPF_REG_AX] = {IA32_ESI, IA32_EDI},
159
160 /* Tail call count. Stored on stack scratch space. */
161 [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
162};
163
164#define dst_lo dst[0]
165#define dst_hi dst[1]
166#define src_lo src[0]
167#define src_hi src[1]
168
169#define STACK_ALIGNMENT 8
170/*
171 * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
172 * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
173 * BPF_REG_FP, BPF_REG_AX and Tail call counts.
174 */
175#define SCRATCH_SIZE 96
176
177/* Total stack size used in JITed code */
178#define _STACK_SIZE (stack_depth + SCRATCH_SIZE)
179
180#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
181
182/* Get the offset of eBPF REGISTERs stored on scratch space. */
183#define STACK_VAR(off) (off)
184
185/* Encode 'dst_reg' register into IA32 opcode 'byte' */
186static u8 add_1reg(u8 byte, u32 dst_reg)
187{
188 return byte + dst_reg;
189}
190
191/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
192static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
193{
194 return byte + dst_reg + (src_reg << 3);
195}
196
197static void jit_fill_hole(void *area, unsigned int size)
198{
199 /* Fill whole space with int3 instructions */
200 memset(area, 0xcc, size);
201}
202
203static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
204 u8 **pprog)
205{
206 u8 *prog = *pprog;
207 int cnt = 0;
208
209 if (dstk) {
210 if (val == 0) {
211 /* xor eax,eax */
212 EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
213 /* mov dword ptr [ebp+off],eax */
214 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
215 STACK_VAR(dst));
216 } else {
217 EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
218 STACK_VAR(dst), val);
219 }
220 } else {
221 if (val == 0)
222 EMIT2(0x33, add_2reg(0xC0, dst, dst));
223 else
224 EMIT2_off32(0xC7, add_1reg(0xC0, dst),
225 val);
226 }
227 *pprog = prog;
228}
229
230/* dst = imm (4 bytes)*/
231static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
232 bool sstk, u8 **pprog)
233{
234 u8 *prog = *pprog;
235 int cnt = 0;
236 u8 sreg = sstk ? IA32_EAX : src;
237
238 if (sstk)
239 /* mov eax,dword ptr [ebp+off] */
240 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
241 if (dstk)
242 /* mov dword ptr [ebp+off],eax */
243 EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
244 else
245 /* mov dst,sreg */
246 EMIT2(0x89, add_2reg(0xC0, dst, sreg));
247
248 *pprog = prog;
249}
250
251/* dst = src */
252static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
253 const u8 src[], bool dstk,
254 bool sstk, u8 **pprog)
255{
256 emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
257 if (is64)
258 /* complete 8 byte move */
259 emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
260 else
261 /* zero out high 4 bytes */
262 emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
263}
264
265/* Sign extended move */
266static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
267 const u32 val, bool dstk, u8 **pprog)
268{
269 u32 hi = 0;
270
271 if (is64 && (val & (1<<31)))
272 hi = (u32)~0;
273 emit_ia32_mov_i(dst_lo, val, dstk, pprog);
274 emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
275}
276
277/*
278 * ALU operation (32 bit)
279 * dst = dst * src
280 */
281static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
282 bool sstk, u8 **pprog)
283{
284 u8 *prog = *pprog;
285 int cnt = 0;
286 u8 sreg = sstk ? IA32_ECX : src;
287
288 if (sstk)
289 /* mov ecx,dword ptr [ebp+off] */
290 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
291
292 if (dstk)
293 /* mov eax,dword ptr [ebp+off] */
294 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
295 else
296 /* mov eax,dst */
297 EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
298
299
300 EMIT2(0xF7, add_1reg(0xE0, sreg));
301
302 if (dstk)
303 /* mov dword ptr [ebp+off],eax */
304 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
305 STACK_VAR(dst));
306 else
307 /* mov dst,eax */
308 EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
309
310 *pprog = prog;
311}
312
313static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
314 bool dstk, u8 **pprog)
315{
316 u8 *prog = *pprog;
317 int cnt = 0;
318 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
319 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
320
321 if (dstk && val != 64) {
322 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
323 STACK_VAR(dst_lo));
324 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
325 STACK_VAR(dst_hi));
326 }
327 switch (val) {
328 case 16:
329 /*
330 * Emit 'movzwl eax,ax' to zero extend 16-bit
331 * into 64 bit
332 */
333 EMIT2(0x0F, 0xB7);
334 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
335 /* xor dreg_hi,dreg_hi */
336 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
337 break;
338 case 32:
339 /* xor dreg_hi,dreg_hi */
340 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
341 break;
342 case 64:
343 /* nop */
344 break;
345 }
346
347 if (dstk && val != 64) {
348 /* mov dword ptr [ebp+off],dreg_lo */
349 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
350 STACK_VAR(dst_lo));
351 /* mov dword ptr [ebp+off],dreg_hi */
352 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
353 STACK_VAR(dst_hi));
354 }
355 *pprog = prog;
356}
357
358static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
359 bool dstk, u8 **pprog)
360{
361 u8 *prog = *pprog;
362 int cnt = 0;
363 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
364 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
365
366 if (dstk) {
367 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
368 STACK_VAR(dst_lo));
369 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
370 STACK_VAR(dst_hi));
371 }
372 switch (val) {
373 case 16:
374 /* Emit 'ror %ax, 8' to swap lower 2 bytes */
375 EMIT1(0x66);
376 EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
377
378 EMIT2(0x0F, 0xB7);
379 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
380
381 /* xor dreg_hi,dreg_hi */
382 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
383 break;
384 case 32:
385 /* Emit 'bswap eax' to swap lower 4 bytes */
386 EMIT1(0x0F);
387 EMIT1(add_1reg(0xC8, dreg_lo));
388
389 /* xor dreg_hi,dreg_hi */
390 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
391 break;
392 case 64:
393 /* Emit 'bswap eax' to swap lower 4 bytes */
394 EMIT1(0x0F);
395 EMIT1(add_1reg(0xC8, dreg_lo));
396
397 /* Emit 'bswap edx' to swap lower 4 bytes */
398 EMIT1(0x0F);
399 EMIT1(add_1reg(0xC8, dreg_hi));
400
401 /* mov ecx,dreg_hi */
402 EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
403 /* mov dreg_hi,dreg_lo */
404 EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
405 /* mov dreg_lo,ecx */
406 EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
407
408 break;
409 }
410 if (dstk) {
411 /* mov dword ptr [ebp+off],dreg_lo */
412 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
413 STACK_VAR(dst_lo));
414 /* mov dword ptr [ebp+off],dreg_hi */
415 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
416 STACK_VAR(dst_hi));
417 }
418 *pprog = prog;
419}
420
421/*
422 * ALU operation (32 bit)
423 * dst = dst (div|mod) src
424 */
425static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
426 bool dstk, bool sstk, u8 **pprog)
427{
428 u8 *prog = *pprog;
429 int cnt = 0;
430
431 if (sstk)
432 /* mov ecx,dword ptr [ebp+off] */
433 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
434 STACK_VAR(src));
435 else if (src != IA32_ECX)
436 /* mov ecx,src */
437 EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
438
439 if (dstk)
440 /* mov eax,dword ptr [ebp+off] */
441 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
442 STACK_VAR(dst));
443 else
444 /* mov eax,dst */
445 EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
446
447 /* xor edx,edx */
448 EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
449 /* div ecx */
450 EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
451
452 if (op == BPF_MOD) {
453 if (dstk)
454 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
455 STACK_VAR(dst));
456 else
457 EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
458 } else {
459 if (dstk)
460 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
461 STACK_VAR(dst));
462 else
463 EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
464 }
465 *pprog = prog;
466}
467
468/*
469 * ALU operation (32 bit)
470 * dst = dst (shift) src
471 */
472static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
473 bool dstk, bool sstk, u8 **pprog)
474{
475 u8 *prog = *pprog;
476 int cnt = 0;
477 u8 dreg = dstk ? IA32_EAX : dst;
478 u8 b2;
479
480 if (dstk)
481 /* mov eax,dword ptr [ebp+off] */
482 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
483
484 if (sstk)
485 /* mov ecx,dword ptr [ebp+off] */
486 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
487 else if (src != IA32_ECX)
488 /* mov ecx,src */
489 EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
490
491 switch (op) {
492 case BPF_LSH:
493 b2 = 0xE0; break;
494 case BPF_RSH:
495 b2 = 0xE8; break;
496 case BPF_ARSH:
497 b2 = 0xF8; break;
498 default:
499 return;
500 }
501 EMIT2(0xD3, add_1reg(b2, dreg));
502
503 if (dstk)
504 /* mov dword ptr [ebp+off],dreg */
505 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
506 *pprog = prog;
507}
508
509/*
510 * ALU operation (32 bit)
511 * dst = dst (op) src
512 */
513static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
514 const u8 dst, const u8 src, bool dstk,
515 bool sstk, u8 **pprog)
516{
517 u8 *prog = *pprog;
518 int cnt = 0;
519 u8 sreg = sstk ? IA32_EAX : src;
520 u8 dreg = dstk ? IA32_EDX : dst;
521
522 if (sstk)
523 /* mov eax,dword ptr [ebp+off] */
524 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
525
526 if (dstk)
527 /* mov eax,dword ptr [ebp+off] */
528 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
529
530 switch (BPF_OP(op)) {
531 /* dst = dst + src */
532 case BPF_ADD:
533 if (hi && is64)
534 EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
535 else
536 EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
537 break;
538 /* dst = dst - src */
539 case BPF_SUB:
540 if (hi && is64)
541 EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
542 else
543 EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
544 break;
545 /* dst = dst | src */
546 case BPF_OR:
547 EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
548 break;
549 /* dst = dst & src */
550 case BPF_AND:
551 EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
552 break;
553 /* dst = dst ^ src */
554 case BPF_XOR:
555 EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
556 break;
557 }
558
559 if (dstk)
560 /* mov dword ptr [ebp+off],dreg */
561 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
562 STACK_VAR(dst));
563 *pprog = prog;
564}
565
566/* ALU operation (64 bit) */
567static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
568 const u8 dst[], const u8 src[],
569 bool dstk, bool sstk,
570 u8 **pprog)
571{
572 u8 *prog = *pprog;
573
574 emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
575 if (is64)
576 emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
577 &prog);
578 else
579 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
580 *pprog = prog;
581}
582
583/*
584 * ALU operation (32 bit)
585 * dst = dst (op) val
586 */
587static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
588 const u8 dst, const s32 val, bool dstk,
589 u8 **pprog)
590{
591 u8 *prog = *pprog;
592 int cnt = 0;
593 u8 dreg = dstk ? IA32_EAX : dst;
594 u8 sreg = IA32_EDX;
595
596 if (dstk)
597 /* mov eax,dword ptr [ebp+off] */
598 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
599
600 if (!is_imm8(val))
601 /* mov edx,imm32*/
602 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
603
604 switch (op) {
605 /* dst = dst + val */
606 case BPF_ADD:
607 if (hi && is64) {
608 if (is_imm8(val))
609 EMIT3(0x83, add_1reg(0xD0, dreg), val);
610 else
611 EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
612 } else {
613 if (is_imm8(val))
614 EMIT3(0x83, add_1reg(0xC0, dreg), val);
615 else
616 EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
617 }
618 break;
619 /* dst = dst - val */
620 case BPF_SUB:
621 if (hi && is64) {
622 if (is_imm8(val))
623 EMIT3(0x83, add_1reg(0xD8, dreg), val);
624 else
625 EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
626 } else {
627 if (is_imm8(val))
628 EMIT3(0x83, add_1reg(0xE8, dreg), val);
629 else
630 EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
631 }
632 break;
633 /* dst = dst | val */
634 case BPF_OR:
635 if (is_imm8(val))
636 EMIT3(0x83, add_1reg(0xC8, dreg), val);
637 else
638 EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
639 break;
640 /* dst = dst & val */
641 case BPF_AND:
642 if (is_imm8(val))
643 EMIT3(0x83, add_1reg(0xE0, dreg), val);
644 else
645 EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
646 break;
647 /* dst = dst ^ val */
648 case BPF_XOR:
649 if (is_imm8(val))
650 EMIT3(0x83, add_1reg(0xF0, dreg), val);
651 else
652 EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
653 break;
654 case BPF_NEG:
655 EMIT2(0xF7, add_1reg(0xD8, dreg));
656 break;
657 }
658
659 if (dstk)
660 /* mov dword ptr [ebp+off],dreg */
661 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
662 STACK_VAR(dst));
663 *pprog = prog;
664}
665
666/* ALU operation (64 bit) */
667static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
668 const u8 dst[], const u32 val,
669 bool dstk, u8 **pprog)
670{
671 u8 *prog = *pprog;
672 u32 hi = 0;
673
674 if (is64 && (val & (1<<31)))
675 hi = (u32)~0;
676
677 emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
678 if (is64)
679 emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
680 else
681 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
682
683 *pprog = prog;
684}
685
686/* dst = ~dst (64 bit) */
687static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
688{
689 u8 *prog = *pprog;
690 int cnt = 0;
691 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
692 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
693
694 if (dstk) {
695 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
696 STACK_VAR(dst_lo));
697 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
698 STACK_VAR(dst_hi));
699 }
700
701 /* xor ecx,ecx */
702 EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
703 /* sub dreg_lo,ecx */
704 EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
705 /* mov dreg_lo,ecx */
706 EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
707
708 /* xor ecx,ecx */
709 EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
710 /* sbb dreg_hi,ecx */
711 EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
712 /* mov dreg_hi,ecx */
713 EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
714
715 if (dstk) {
716 /* mov dword ptr [ebp+off],dreg_lo */
717 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
718 STACK_VAR(dst_lo));
719 /* mov dword ptr [ebp+off],dreg_hi */
720 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
721 STACK_VAR(dst_hi));
722 }
723 *pprog = prog;
724}
725
726/* dst = dst << src */
727static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
728 bool dstk, bool sstk, u8 **pprog)
729{
730 u8 *prog = *pprog;
731 int cnt = 0;
732 static int jmp_label1 = -1;
733 static int jmp_label2 = -1;
734 static int jmp_label3 = -1;
735 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
736 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
737
738 if (dstk) {
739 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
740 STACK_VAR(dst_lo));
741 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
742 STACK_VAR(dst_hi));
743 }
744
745 if (sstk)
746 /* mov ecx,dword ptr [ebp+off] */
747 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
748 STACK_VAR(src_lo));
749 else
750 /* mov ecx,src_lo */
751 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
752
753 /* cmp ecx,32 */
754 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
755 /* Jumps when >= 32 */
756 if (is_imm8(jmp_label(jmp_label1, 2)))
757 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
758 else
759 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
760
761 /* < 32 */
762 /* shl dreg_hi,cl */
763 EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
764 /* mov ebx,dreg_lo */
765 EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
766 /* shl dreg_lo,cl */
767 EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
768
769 /* IA32_ECX = -IA32_ECX + 32 */
770 /* neg ecx */
771 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
772 /* add ecx,32 */
773 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
774
775 /* shr ebx,cl */
776 EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
777 /* or dreg_hi,ebx */
778 EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
779
780 /* goto out; */
781 if (is_imm8(jmp_label(jmp_label3, 2)))
782 EMIT2(0xEB, jmp_label(jmp_label3, 2));
783 else
784 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
785
786 /* >= 32 */
787 if (jmp_label1 == -1)
788 jmp_label1 = cnt;
789
790 /* cmp ecx,64 */
791 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
792 /* Jumps when >= 64 */
793 if (is_imm8(jmp_label(jmp_label2, 2)))
794 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
795 else
796 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
797
798 /* >= 32 && < 64 */
799 /* sub ecx,32 */
800 EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
801 /* shl dreg_lo,cl */
802 EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
803 /* mov dreg_hi,dreg_lo */
804 EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
805
806 /* xor dreg_lo,dreg_lo */
807 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
808
809 /* goto out; */
810 if (is_imm8(jmp_label(jmp_label3, 2)))
811 EMIT2(0xEB, jmp_label(jmp_label3, 2));
812 else
813 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
814
815 /* >= 64 */
816 if (jmp_label2 == -1)
817 jmp_label2 = cnt;
818 /* xor dreg_lo,dreg_lo */
819 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
820 /* xor dreg_hi,dreg_hi */
821 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
822
823 if (jmp_label3 == -1)
824 jmp_label3 = cnt;
825
826 if (dstk) {
827 /* mov dword ptr [ebp+off],dreg_lo */
828 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
829 STACK_VAR(dst_lo));
830 /* mov dword ptr [ebp+off],dreg_hi */
831 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
832 STACK_VAR(dst_hi));
833 }
834 /* out: */
835 *pprog = prog;
836}
837
838/* dst = dst >> src (signed)*/
839static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
840 bool dstk, bool sstk, u8 **pprog)
841{
842 u8 *prog = *pprog;
843 int cnt = 0;
844 static int jmp_label1 = -1;
845 static int jmp_label2 = -1;
846 static int jmp_label3 = -1;
847 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
848 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
849
850 if (dstk) {
851 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
852 STACK_VAR(dst_lo));
853 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
854 STACK_VAR(dst_hi));
855 }
856
857 if (sstk)
858 /* mov ecx,dword ptr [ebp+off] */
859 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
860 STACK_VAR(src_lo));
861 else
862 /* mov ecx,src_lo */
863 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
864
865 /* cmp ecx,32 */
866 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
867 /* Jumps when >= 32 */
868 if (is_imm8(jmp_label(jmp_label1, 2)))
869 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
870 else
871 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
872
873 /* < 32 */
874 /* lshr dreg_lo,cl */
875 EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
876 /* mov ebx,dreg_hi */
877 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
878 /* ashr dreg_hi,cl */
879 EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
880
881 /* IA32_ECX = -IA32_ECX + 32 */
882 /* neg ecx */
883 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
884 /* add ecx,32 */
885 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
886
887 /* shl ebx,cl */
888 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
889 /* or dreg_lo,ebx */
890 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
891
892 /* goto out; */
893 if (is_imm8(jmp_label(jmp_label3, 2)))
894 EMIT2(0xEB, jmp_label(jmp_label3, 2));
895 else
896 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
897
898 /* >= 32 */
899 if (jmp_label1 == -1)
900 jmp_label1 = cnt;
901
902 /* cmp ecx,64 */
903 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
904 /* Jumps when >= 64 */
905 if (is_imm8(jmp_label(jmp_label2, 2)))
906 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
907 else
908 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
909
910 /* >= 32 && < 64 */
911 /* sub ecx,32 */
912 EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
913 /* ashr dreg_hi,cl */
914 EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
915 /* mov dreg_lo,dreg_hi */
916 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
917
918 /* ashr dreg_hi,imm8 */
919 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
920
921 /* goto out; */
922 if (is_imm8(jmp_label(jmp_label3, 2)))
923 EMIT2(0xEB, jmp_label(jmp_label3, 2));
924 else
925 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
926
927 /* >= 64 */
928 if (jmp_label2 == -1)
929 jmp_label2 = cnt;
930 /* ashr dreg_hi,imm8 */
931 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
932 /* mov dreg_lo,dreg_hi */
933 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
934
935 if (jmp_label3 == -1)
936 jmp_label3 = cnt;
937
938 if (dstk) {
939 /* mov dword ptr [ebp+off],dreg_lo */
940 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
941 STACK_VAR(dst_lo));
942 /* mov dword ptr [ebp+off],dreg_hi */
943 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
944 STACK_VAR(dst_hi));
945 }
946 /* out: */
947 *pprog = prog;
948}
949
950/* dst = dst >> src */
951static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
952 bool sstk, u8 **pprog)
953{
954 u8 *prog = *pprog;
955 int cnt = 0;
956 static int jmp_label1 = -1;
957 static int jmp_label2 = -1;
958 static int jmp_label3 = -1;
959 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
960 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
961
962 if (dstk) {
963 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
964 STACK_VAR(dst_lo));
965 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
966 STACK_VAR(dst_hi));
967 }
968
969 if (sstk)
970 /* mov ecx,dword ptr [ebp+off] */
971 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
972 STACK_VAR(src_lo));
973 else
974 /* mov ecx,src_lo */
975 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
976
977 /* cmp ecx,32 */
978 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
979 /* Jumps when >= 32 */
980 if (is_imm8(jmp_label(jmp_label1, 2)))
981 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
982 else
983 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
984
985 /* < 32 */
986 /* lshr dreg_lo,cl */
987 EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
988 /* mov ebx,dreg_hi */
989 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
990 /* shr dreg_hi,cl */
991 EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
992
993 /* IA32_ECX = -IA32_ECX + 32 */
994 /* neg ecx */
995 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
996 /* add ecx,32 */
997 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
998
999 /* shl ebx,cl */
1000 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1001 /* or dreg_lo,ebx */
1002 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1003
1004 /* goto out; */
1005 if (is_imm8(jmp_label(jmp_label3, 2)))
1006 EMIT2(0xEB, jmp_label(jmp_label3, 2));
1007 else
1008 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1009
1010 /* >= 32 */
1011 if (jmp_label1 == -1)
1012 jmp_label1 = cnt;
1013 /* cmp ecx,64 */
1014 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
1015 /* Jumps when >= 64 */
1016 if (is_imm8(jmp_label(jmp_label2, 2)))
1017 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
1018 else
1019 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
1020
1021 /* >= 32 && < 64 */
1022 /* sub ecx,32 */
1023 EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
1024 /* shr dreg_hi,cl */
1025 EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
1026 /* mov dreg_lo,dreg_hi */
1027 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1028 /* xor dreg_hi,dreg_hi */
1029 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1030
1031 /* goto out; */
1032 if (is_imm8(jmp_label(jmp_label3, 2)))
1033 EMIT2(0xEB, jmp_label(jmp_label3, 2));
1034 else
1035 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1036
1037 /* >= 64 */
1038 if (jmp_label2 == -1)
1039 jmp_label2 = cnt;
1040 /* xor dreg_lo,dreg_lo */
1041 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1042 /* xor dreg_hi,dreg_hi */
1043 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1044
1045 if (jmp_label3 == -1)
1046 jmp_label3 = cnt;
1047
1048 if (dstk) {
1049 /* mov dword ptr [ebp+off],dreg_lo */
1050 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1051 STACK_VAR(dst_lo));
1052 /* mov dword ptr [ebp+off],dreg_hi */
1053 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1054 STACK_VAR(dst_hi));
1055 }
1056 /* out: */
1057 *pprog = prog;
1058}
1059
1060/* dst = dst << val */
1061static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
1062 bool dstk, u8 **pprog)
1063{
1064 u8 *prog = *pprog;
1065 int cnt = 0;
1066 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1067 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1068
1069 if (dstk) {
1070 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1071 STACK_VAR(dst_lo));
1072 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1073 STACK_VAR(dst_hi));
1074 }
1075 /* Do LSH operation */
1076 if (val < 32) {
1077 /* shl dreg_hi,imm8 */
1078 EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
1079 /* mov ebx,dreg_lo */
1080 EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
1081 /* shl dreg_lo,imm8 */
1082 EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
1083
1084 /* IA32_ECX = 32 - val */
1085 /* mov ecx,val */
1086 EMIT2(0xB1, val);
1087 /* movzx ecx,ecx */
1088 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1089 /* neg ecx */
1090 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1091 /* add ecx,32 */
1092 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1093
1094 /* shr ebx,cl */
1095 EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
1096 /* or dreg_hi,ebx */
1097 EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
1098 } else if (val >= 32 && val < 64) {
1099 u32 value = val - 32;
1100
1101 /* shl dreg_lo,imm8 */
1102 EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
1103 /* mov dreg_hi,dreg_lo */
1104 EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
1105 /* xor dreg_lo,dreg_lo */
1106 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1107 } else {
1108 /* xor dreg_lo,dreg_lo */
1109 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1110 /* xor dreg_hi,dreg_hi */
1111 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1112 }
1113
1114 if (dstk) {
1115 /* mov dword ptr [ebp+off],dreg_lo */
1116 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1117 STACK_VAR(dst_lo));
1118 /* mov dword ptr [ebp+off],dreg_hi */
1119 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1120 STACK_VAR(dst_hi));
1121 }
1122 *pprog = prog;
1123}
1124
1125/* dst = dst >> val */
1126static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
1127 bool dstk, u8 **pprog)
1128{
1129 u8 *prog = *pprog;
1130 int cnt = 0;
1131 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1132 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1133
1134 if (dstk) {
1135 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1136 STACK_VAR(dst_lo));
1137 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1138 STACK_VAR(dst_hi));
1139 }
1140
1141 /* Do RSH operation */
1142 if (val < 32) {
1143 /* shr dreg_lo,imm8 */
1144 EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1145 /* mov ebx,dreg_hi */
1146 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1147 /* shr dreg_hi,imm8 */
1148 EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
1149
1150 /* IA32_ECX = 32 - val */
1151 /* mov ecx,val */
1152 EMIT2(0xB1, val);
1153 /* movzx ecx,ecx */
1154 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1155 /* neg ecx */
1156 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1157 /* add ecx,32 */
1158 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1159
1160 /* shl ebx,cl */
1161 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1162 /* or dreg_lo,ebx */
1163 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1164 } else if (val >= 32 && val < 64) {
1165 u32 value = val - 32;
1166
1167 /* shr dreg_hi,imm8 */
1168 EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
1169 /* mov dreg_lo,dreg_hi */
1170 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1171 /* xor dreg_hi,dreg_hi */
1172 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1173 } else {
1174 /* xor dreg_lo,dreg_lo */
1175 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1176 /* xor dreg_hi,dreg_hi */
1177 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1178 }
1179
1180 if (dstk) {
1181 /* mov dword ptr [ebp+off],dreg_lo */
1182 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1183 STACK_VAR(dst_lo));
1184 /* mov dword ptr [ebp+off],dreg_hi */
1185 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1186 STACK_VAR(dst_hi));
1187 }
1188 *pprog = prog;
1189}
1190
1191/* dst = dst >> val (signed) */
1192static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
1193 bool dstk, u8 **pprog)
1194{
1195 u8 *prog = *pprog;
1196 int cnt = 0;
1197 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1198 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1199
1200 if (dstk) {
1201 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1202 STACK_VAR(dst_lo));
1203 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1204 STACK_VAR(dst_hi));
1205 }
1206 /* Do RSH operation */
1207 if (val < 32) {
1208 /* shr dreg_lo,imm8 */
1209 EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1210 /* mov ebx,dreg_hi */
1211 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1212 /* ashr dreg_hi,imm8 */
1213 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1214
1215 /* IA32_ECX = 32 - val */
1216 /* mov ecx,val */
1217 EMIT2(0xB1, val);
1218 /* movzx ecx,ecx */
1219 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1220 /* neg ecx */
1221 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1222 /* add ecx,32 */
1223 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1224
1225 /* shl ebx,cl */
1226 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1227 /* or dreg_lo,ebx */
1228 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1229 } else if (val >= 32 && val < 64) {
1230 u32 value = val - 32;
1231
1232 /* ashr dreg_hi,imm8 */
1233 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1234 /* mov dreg_lo,dreg_hi */
1235 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1236
1237 /* ashr dreg_hi,imm8 */
1238 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1239 } else {
1240 /* ashr dreg_hi,imm8 */
1241 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1242 /* mov dreg_lo,dreg_hi */
1243 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1244 }
1245
1246 if (dstk) {
1247 /* mov dword ptr [ebp+off],dreg_lo */
1248 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1249 STACK_VAR(dst_lo));
1250 /* mov dword ptr [ebp+off],dreg_hi */
1251 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1252 STACK_VAR(dst_hi));
1253 }
1254 *pprog = prog;
1255}
1256
1257static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1258 bool sstk, u8 **pprog)
1259{
1260 u8 *prog = *pprog;
1261 int cnt = 0;
1262
1263 if (dstk)
1264 /* mov eax,dword ptr [ebp+off] */
1265 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1266 STACK_VAR(dst_hi));
1267 else
1268 /* mov eax,dst_hi */
1269 EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1270
1271 if (sstk)
1272 /* mul dword ptr [ebp+off] */
1273 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1274 else
1275 /* mul src_lo */
1276 EMIT2(0xF7, add_1reg(0xE0, src_lo));
1277
1278 /* mov ecx,eax */
1279 EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1280
1281 if (dstk)
1282 /* mov eax,dword ptr [ebp+off] */
1283 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1284 STACK_VAR(dst_lo));
1285 else
1286 /* mov eax,dst_lo */
1287 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1288
1289 if (sstk)
1290 /* mul dword ptr [ebp+off] */
1291 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1292 else
1293 /* mul src_hi */
1294 EMIT2(0xF7, add_1reg(0xE0, src_hi));
1295
1296 /* add eax,eax */
1297 EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1298
1299 if (dstk)
1300 /* mov eax,dword ptr [ebp+off] */
1301 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1302 STACK_VAR(dst_lo));
1303 else
1304 /* mov eax,dst_lo */
1305 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1306
1307 if (sstk)
1308 /* mul dword ptr [ebp+off] */
1309 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1310 else
1311 /* mul src_lo */
1312 EMIT2(0xF7, add_1reg(0xE0, src_lo));
1313
1314 /* add ecx,edx */
1315 EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1316
1317 if (dstk) {
1318 /* mov dword ptr [ebp+off],eax */
1319 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1320 STACK_VAR(dst_lo));
1321 /* mov dword ptr [ebp+off],ecx */
1322 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1323 STACK_VAR(dst_hi));
1324 } else {
1325 /* mov dst_lo,eax */
1326 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1327 /* mov dst_hi,ecx */
1328 EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1329 }
1330
1331 *pprog = prog;
1332}
1333
1334static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1335 bool dstk, u8 **pprog)
1336{
1337 u8 *prog = *pprog;
1338 int cnt = 0;
1339 u32 hi;
1340
1341 hi = val & (1<<31) ? (u32)~0 : 0;
1342 /* movl eax,imm32 */
1343 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1344 if (dstk)
1345 /* mul dword ptr [ebp+off] */
1346 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1347 else
1348 /* mul dst_hi */
1349 EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1350
1351 /* mov ecx,eax */
1352 EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1353
1354 /* movl eax,imm32 */
1355 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1356 if (dstk)
1357 /* mul dword ptr [ebp+off] */
1358 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1359 else
1360 /* mul dst_lo */
1361 EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1362 /* add ecx,eax */
1363 EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1364
1365 /* movl eax,imm32 */
1366 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1367 if (dstk)
1368 /* mul dword ptr [ebp+off] */
1369 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1370 else
1371 /* mul dst_lo */
1372 EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1373
1374 /* add ecx,edx */
1375 EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1376
1377 if (dstk) {
1378 /* mov dword ptr [ebp+off],eax */
1379 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1380 STACK_VAR(dst_lo));
1381 /* mov dword ptr [ebp+off],ecx */
1382 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1383 STACK_VAR(dst_hi));
1384 } else {
1385 /* mov dword ptr [ebp+off],eax */
1386 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1387 /* mov dword ptr [ebp+off],ecx */
1388 EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1389 }
1390
1391 *pprog = prog;
1392}
1393
1394static int bpf_size_to_x86_bytes(int bpf_size)
1395{
1396 if (bpf_size == BPF_W)
1397 return 4;
1398 else if (bpf_size == BPF_H)
1399 return 2;
1400 else if (bpf_size == BPF_B)
1401 return 1;
1402 else if (bpf_size == BPF_DW)
1403 return 4; /* imm32 */
1404 else
1405 return 0;
1406}
1407
1408struct jit_context {
1409 int cleanup_addr; /* Epilogue code offset */
1410};
1411
1412/* Maximum number of bytes emitted while JITing one eBPF insn */
1413#define BPF_MAX_INSN_SIZE 128
1414#define BPF_INSN_SAFETY 64
1415
1416#define PROLOGUE_SIZE 35
1417
1418/*
1419 * Emit prologue code for BPF program and check it's size.
1420 * bpf_tail_call helper will skip it while jumping into another program.
1421 */
1422static void emit_prologue(u8 **pprog, u32 stack_depth)
1423{
1424 u8 *prog = *pprog;
1425 int cnt = 0;
1426 const u8 *r1 = bpf2ia32[BPF_REG_1];
1427 const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1428 const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1429 const u8 *tcc = bpf2ia32[TCALL_CNT];
1430
1431 /* push ebp */
1432 EMIT1(0x55);
1433 /* mov ebp,esp */
1434 EMIT2(0x89, 0xE5);
1435 /* push edi */
1436 EMIT1(0x57);
1437 /* push esi */
1438 EMIT1(0x56);
1439 /* push ebx */
1440 EMIT1(0x53);
1441
1442 /* sub esp,STACK_SIZE */
1443 EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1444 /* sub ebp,SCRATCH_SIZE+4+12*/
1445 EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
1446 /* xor ebx,ebx */
1447 EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1448
1449 /* Set up BPF prog stack base register */
1450 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1451 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1452
1453 /* Move BPF_CTX (EAX) to BPF_REG_R1 */
1454 /* mov dword ptr [ebp+off],eax */
1455 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1456 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1457
1458 /* Initialize Tail Count */
1459 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1460 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1461
1462 BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1463 *pprog = prog;
1464}
1465
1466/* Emit epilogue code for BPF program */
1467static void emit_epilogue(u8 **pprog, u32 stack_depth)
1468{
1469 u8 *prog = *pprog;
1470 const u8 *r0 = bpf2ia32[BPF_REG_0];
1471 int cnt = 0;
1472
1473 /* mov eax,dword ptr [ebp+off]*/
1474 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1475 /* mov edx,dword ptr [ebp+off]*/
1476 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1477
1478 /* add ebp,SCRATCH_SIZE+4+12*/
1479 EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
1480
1481 /* mov ebx,dword ptr [ebp-12]*/
1482 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1483 /* mov esi,dword ptr [ebp-8]*/
1484 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1485 /* mov edi,dword ptr [ebp-4]*/
1486 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1487
1488 EMIT1(0xC9); /* leave */
1489 EMIT1(0xC3); /* ret */
1490 *pprog = prog;
1491}
1492
1493/*
1494 * Generate the following code:
1495 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1496 * if (index >= array->map.max_entries)
1497 * goto out;
1498 * if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1499 * goto out;
1500 * prog = array->ptrs[index];
1501 * if (prog == NULL)
1502 * goto out;
1503 * goto *(prog->bpf_func + prologue_size);
1504 * out:
1505 */
1506static void emit_bpf_tail_call(u8 **pprog)
1507{
1508 u8 *prog = *pprog;
1509 int cnt = 0;
1510 const u8 *r1 = bpf2ia32[BPF_REG_1];
1511 const u8 *r2 = bpf2ia32[BPF_REG_2];
1512 const u8 *r3 = bpf2ia32[BPF_REG_3];
1513 const u8 *tcc = bpf2ia32[TCALL_CNT];
1514 u32 lo, hi;
1515 static int jmp_label1 = -1;
1516
1517 /*
1518 * if (index >= array->map.max_entries)
1519 * goto out;
1520 */
1521 /* mov eax,dword ptr [ebp+off] */
1522 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1523 /* mov edx,dword ptr [ebp+off] */
1524 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1525
1526 /* cmp dword ptr [eax+off],edx */
1527 EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1528 offsetof(struct bpf_array, map.max_entries));
1529 /* jbe out */
1530 EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1531
1532 /*
1533 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
1534 * goto out;
1535 */
1536 lo = (u32)MAX_TAIL_CALL_CNT;
1537 hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1538 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1539 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1540
1541 /* cmp edx,hi */
1542 EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1543 EMIT2(IA32_JNE, 3);
1544 /* cmp ecx,lo */
1545 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1546
1547 /* ja out */
1548 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1549
1550 /* add eax,0x1 */
1551 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1552 /* adc ebx,0x0 */
1553 EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1554
1555 /* mov dword ptr [ebp+off],eax */
1556 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1557 /* mov dword ptr [ebp+off],edx */
1558 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1559
1560 /* prog = array->ptrs[index]; */
1561 /* mov edx, [eax + edx * 4 + offsetof(...)] */
1562 EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1563
1564 /*
1565 * if (prog == NULL)
1566 * goto out;
1567 */
1568 /* test edx,edx */
1569 EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1570 /* je out */
1571 EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1572
1573 /* goto *(prog->bpf_func + prologue_size); */
1574 /* mov edx, dword ptr [edx + 32] */
1575 EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1576 offsetof(struct bpf_prog, bpf_func));
1577 /* add edx,prologue_size */
1578 EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1579
1580 /* mov eax,dword ptr [ebp+off] */
1581 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1582
1583 /*
1584 * Now we're ready to jump into next BPF program:
1585 * eax == ctx (1st arg)
1586 * edx == prog->bpf_func + prologue_size
1587 */
1588 RETPOLINE_EDX_BPF_JIT();
1589
1590 if (jmp_label1 == -1)
1591 jmp_label1 = cnt;
1592
1593 /* out: */
1594 *pprog = prog;
1595}
1596
1597/* Push the scratch stack register on top of the stack. */
1598static inline void emit_push_r64(const u8 src[], u8 **pprog)
1599{
1600 u8 *prog = *pprog;
1601 int cnt = 0;
1602
1603 /* mov ecx,dword ptr [ebp+off] */
1604 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1605 /* push ecx */
1606 EMIT1(0x51);
1607
1608 /* mov ecx,dword ptr [ebp+off] */
1609 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1610 /* push ecx */
1611 EMIT1(0x51);
1612
1613 *pprog = prog;
1614}
1615
1616static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1617 int oldproglen, struct jit_context *ctx)
1618{
1619 struct bpf_insn *insn = bpf_prog->insnsi;
1620 int insn_cnt = bpf_prog->len;
1621 bool seen_exit = false;
1622 u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1623 int i, cnt = 0;
1624 int proglen = 0;
1625 u8 *prog = temp;
1626
1627 emit_prologue(&prog, bpf_prog->aux->stack_depth);
1628
1629 for (i = 0; i < insn_cnt; i++, insn++) {
1630 const s32 imm32 = insn->imm;
1631 const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1632 const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
1633 const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
1634 const u8 code = insn->code;
1635 const u8 *dst = bpf2ia32[insn->dst_reg];
1636 const u8 *src = bpf2ia32[insn->src_reg];
1637 const u8 *r0 = bpf2ia32[BPF_REG_0];
1638 s64 jmp_offset;
1639 u8 jmp_cond;
1640 int ilen;
1641 u8 *func;
1642
1643 switch (code) {
1644 /* ALU operations */
1645 /* dst = src */
1646 case BPF_ALU | BPF_MOV | BPF_K:
1647 case BPF_ALU | BPF_MOV | BPF_X:
1648 case BPF_ALU64 | BPF_MOV | BPF_K:
1649 case BPF_ALU64 | BPF_MOV | BPF_X:
1650 switch (BPF_SRC(code)) {
1651 case BPF_X:
1652 emit_ia32_mov_r64(is64, dst, src, dstk,
1653 sstk, &prog);
1654 break;
1655 case BPF_K:
1656 /* Sign-extend immediate value to dst reg */
1657 emit_ia32_mov_i64(is64, dst, imm32,
1658 dstk, &prog);
1659 break;
1660 }
1661 break;
1662 /* dst = dst + src/imm */
1663 /* dst = dst - src/imm */
1664 /* dst = dst | src/imm */
1665 /* dst = dst & src/imm */
1666 /* dst = dst ^ src/imm */
1667 /* dst = dst * src/imm */
1668 /* dst = dst << src */
1669 /* dst = dst >> src */
1670 case BPF_ALU | BPF_ADD | BPF_K:
1671 case BPF_ALU | BPF_ADD | BPF_X:
1672 case BPF_ALU | BPF_SUB | BPF_K:
1673 case BPF_ALU | BPF_SUB | BPF_X:
1674 case BPF_ALU | BPF_OR | BPF_K:
1675 case BPF_ALU | BPF_OR | BPF_X:
1676 case BPF_ALU | BPF_AND | BPF_K:
1677 case BPF_ALU | BPF_AND | BPF_X:
1678 case BPF_ALU | BPF_XOR | BPF_K:
1679 case BPF_ALU | BPF_XOR | BPF_X:
1680 case BPF_ALU64 | BPF_ADD | BPF_K:
1681 case BPF_ALU64 | BPF_ADD | BPF_X:
1682 case BPF_ALU64 | BPF_SUB | BPF_K:
1683 case BPF_ALU64 | BPF_SUB | BPF_X:
1684 case BPF_ALU64 | BPF_OR | BPF_K:
1685 case BPF_ALU64 | BPF_OR | BPF_X:
1686 case BPF_ALU64 | BPF_AND | BPF_K:
1687 case BPF_ALU64 | BPF_AND | BPF_X:
1688 case BPF_ALU64 | BPF_XOR | BPF_K:
1689 case BPF_ALU64 | BPF_XOR | BPF_X:
1690 switch (BPF_SRC(code)) {
1691 case BPF_X:
1692 emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1693 src, dstk, sstk, &prog);
1694 break;
1695 case BPF_K:
1696 emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1697 imm32, dstk, &prog);
1698 break;
1699 }
1700 break;
1701 case BPF_ALU | BPF_MUL | BPF_K:
1702 case BPF_ALU | BPF_MUL | BPF_X:
1703 switch (BPF_SRC(code)) {
1704 case BPF_X:
1705 emit_ia32_mul_r(dst_lo, src_lo, dstk,
1706 sstk, &prog);
1707 break;
1708 case BPF_K:
1709 /* mov ecx,imm32*/
1710 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1711 imm32);
1712 emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1713 false, &prog);
1714 break;
1715 }
1716 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1717 break;
1718 case BPF_ALU | BPF_LSH | BPF_X:
1719 case BPF_ALU | BPF_RSH | BPF_X:
1720 case BPF_ALU | BPF_ARSH | BPF_K:
1721 case BPF_ALU | BPF_ARSH | BPF_X:
1722 switch (BPF_SRC(code)) {
1723 case BPF_X:
1724 emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1725 dstk, sstk, &prog);
1726 break;
1727 case BPF_K:
1728 /* mov ecx,imm32*/
1729 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1730 imm32);
1731 emit_ia32_shift_r(BPF_OP(code), dst_lo,
1732 IA32_ECX, dstk, false,
1733 &prog);
1734 break;
1735 }
1736 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1737 break;
1738 /* dst = dst / src(imm) */
1739 /* dst = dst % src(imm) */
1740 case BPF_ALU | BPF_DIV | BPF_K:
1741 case BPF_ALU | BPF_DIV | BPF_X:
1742 case BPF_ALU | BPF_MOD | BPF_K:
1743 case BPF_ALU | BPF_MOD | BPF_X:
1744 switch (BPF_SRC(code)) {
1745 case BPF_X:
1746 emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1747 src_lo, dstk, sstk, &prog);
1748 break;
1749 case BPF_K:
1750 /* mov ecx,imm32*/
1751 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1752 imm32);
1753 emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1754 IA32_ECX, dstk, false,
1755 &prog);
1756 break;
1757 }
1758 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1759 break;
1760 case BPF_ALU64 | BPF_DIV | BPF_K:
1761 case BPF_ALU64 | BPF_DIV | BPF_X:
1762 case BPF_ALU64 | BPF_MOD | BPF_K:
1763 case BPF_ALU64 | BPF_MOD | BPF_X:
1764 goto notyet;
1765 /* dst = dst >> imm */
1766 /* dst = dst << imm */
1767 case BPF_ALU | BPF_RSH | BPF_K:
1768 case BPF_ALU | BPF_LSH | BPF_K:
1769 if (unlikely(imm32 > 31))
1770 return -EINVAL;
1771 /* mov ecx,imm32*/
1772 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1773 emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1774 false, &prog);
1775 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1776 break;
1777 /* dst = dst << imm */
1778 case BPF_ALU64 | BPF_LSH | BPF_K:
1779 if (unlikely(imm32 > 63))
1780 return -EINVAL;
1781 emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1782 break;
1783 /* dst = dst >> imm */
1784 case BPF_ALU64 | BPF_RSH | BPF_K:
1785 if (unlikely(imm32 > 63))
1786 return -EINVAL;
1787 emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1788 break;
1789 /* dst = dst << src */
1790 case BPF_ALU64 | BPF_LSH | BPF_X:
1791 emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1792 break;
1793 /* dst = dst >> src */
1794 case BPF_ALU64 | BPF_RSH | BPF_X:
1795 emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1796 break;
1797 /* dst = dst >> src (signed) */
1798 case BPF_ALU64 | BPF_ARSH | BPF_X:
1799 emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1800 break;
1801 /* dst = dst >> imm (signed) */
1802 case BPF_ALU64 | BPF_ARSH | BPF_K:
1803 if (unlikely(imm32 > 63))
1804 return -EINVAL;
1805 emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1806 break;
1807 /* dst = ~dst */
1808 case BPF_ALU | BPF_NEG:
1809 emit_ia32_alu_i(is64, false, BPF_OP(code),
1810 dst_lo, 0, dstk, &prog);
1811 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1812 break;
1813 /* dst = ~dst (64 bit) */
1814 case BPF_ALU64 | BPF_NEG:
1815 emit_ia32_neg64(dst, dstk, &prog);
1816 break;
1817 /* dst = dst * src/imm */
1818 case BPF_ALU64 | BPF_MUL | BPF_X:
1819 case BPF_ALU64 | BPF_MUL | BPF_K:
1820 switch (BPF_SRC(code)) {
1821 case BPF_X:
1822 emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1823 break;
1824 case BPF_K:
1825 emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1826 break;
1827 }
1828 break;
1829 /* dst = htole(dst) */
1830 case BPF_ALU | BPF_END | BPF_FROM_LE:
1831 emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
1832 break;
1833 /* dst = htobe(dst) */
1834 case BPF_ALU | BPF_END | BPF_FROM_BE:
1835 emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
1836 break;
1837 /* dst = imm64 */
1838 case BPF_LD | BPF_IMM | BPF_DW: {
1839 s32 hi, lo = imm32;
1840
1841 hi = insn[1].imm;
1842 emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1843 emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1844 insn++;
1845 i++;
1846 break;
1847 }
1848 /* ST: *(u8*)(dst_reg + off) = imm */
1849 case BPF_ST | BPF_MEM | BPF_H:
1850 case BPF_ST | BPF_MEM | BPF_B:
1851 case BPF_ST | BPF_MEM | BPF_W:
1852 case BPF_ST | BPF_MEM | BPF_DW:
1853 if (dstk)
1854 /* mov eax,dword ptr [ebp+off] */
1855 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1856 STACK_VAR(dst_lo));
1857 else
1858 /* mov eax,dst_lo */
1859 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1860
1861 switch (BPF_SIZE(code)) {
1862 case BPF_B:
1863 EMIT(0xC6, 1); break;
1864 case BPF_H:
1865 EMIT2(0x66, 0xC7); break;
1866 case BPF_W:
1867 case BPF_DW:
1868 EMIT(0xC7, 1); break;
1869 }
1870
1871 if (is_imm8(insn->off))
1872 EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1873 else
1874 EMIT1_off32(add_1reg(0x80, IA32_EAX),
1875 insn->off);
1876 EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1877
1878 if (BPF_SIZE(code) == BPF_DW) {
1879 u32 hi;
1880
1881 hi = imm32 & (1<<31) ? (u32)~0 : 0;
1882 EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1883 insn->off + 4);
1884 EMIT(hi, 4);
1885 }
1886 break;
1887
1888 /* STX: *(u8*)(dst_reg + off) = src_reg */
1889 case BPF_STX | BPF_MEM | BPF_B:
1890 case BPF_STX | BPF_MEM | BPF_H:
1891 case BPF_STX | BPF_MEM | BPF_W:
1892 case BPF_STX | BPF_MEM | BPF_DW:
1893 if (dstk)
1894 /* mov eax,dword ptr [ebp+off] */
1895 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1896 STACK_VAR(dst_lo));
1897 else
1898 /* mov eax,dst_lo */
1899 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1900
1901 if (sstk)
1902 /* mov edx,dword ptr [ebp+off] */
1903 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1904 STACK_VAR(src_lo));
1905 else
1906 /* mov edx,src_lo */
1907 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1908
1909 switch (BPF_SIZE(code)) {
1910 case BPF_B:
1911 EMIT(0x88, 1); break;
1912 case BPF_H:
1913 EMIT2(0x66, 0x89); break;
1914 case BPF_W:
1915 case BPF_DW:
1916 EMIT(0x89, 1); break;
1917 }
1918
1919 if (is_imm8(insn->off))
1920 EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1921 insn->off);
1922 else
1923 EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1924 insn->off);
1925
1926 if (BPF_SIZE(code) == BPF_DW) {
1927 if (sstk)
1928 /* mov edi,dword ptr [ebp+off] */
1929 EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1930 IA32_EDX),
1931 STACK_VAR(src_hi));
1932 else
1933 /* mov edi,src_hi */
1934 EMIT2(0x8B, add_2reg(0xC0, src_hi,
1935 IA32_EDX));
1936 EMIT1(0x89);
1937 if (is_imm8(insn->off + 4)) {
1938 EMIT2(add_2reg(0x40, IA32_EAX,
1939 IA32_EDX),
1940 insn->off + 4);
1941 } else {
1942 EMIT1(add_2reg(0x80, IA32_EAX,
1943 IA32_EDX));
1944 EMIT(insn->off + 4, 4);
1945 }
1946 }
1947 break;
1948
1949 /* LDX: dst_reg = *(u8*)(src_reg + off) */
1950 case BPF_LDX | BPF_MEM | BPF_B:
1951 case BPF_LDX | BPF_MEM | BPF_H:
1952 case BPF_LDX | BPF_MEM | BPF_W:
1953 case BPF_LDX | BPF_MEM | BPF_DW:
1954 if (sstk)
1955 /* mov eax,dword ptr [ebp+off] */
1956 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1957 STACK_VAR(src_lo));
1958 else
1959 /* mov eax,dword ptr [ebp+off] */
1960 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
1961
1962 switch (BPF_SIZE(code)) {
1963 case BPF_B:
1964 EMIT2(0x0F, 0xB6); break;
1965 case BPF_H:
1966 EMIT2(0x0F, 0xB7); break;
1967 case BPF_W:
1968 case BPF_DW:
1969 EMIT(0x8B, 1); break;
1970 }
1971
1972 if (is_imm8(insn->off))
1973 EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1974 insn->off);
1975 else
1976 EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1977 insn->off);
1978
1979 if (dstk)
1980 /* mov dword ptr [ebp+off],edx */
1981 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1982 STACK_VAR(dst_lo));
1983 else
1984 /* mov dst_lo,edx */
1985 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
1986 switch (BPF_SIZE(code)) {
1987 case BPF_B:
1988 case BPF_H:
1989 case BPF_W:
1990 if (dstk) {
1991 EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
1992 STACK_VAR(dst_hi));
1993 EMIT(0x0, 4);
1994 } else {
1995 EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
1996 }
1997 break;
1998 case BPF_DW:
1999 EMIT2_off32(0x8B,
2000 add_2reg(0x80, IA32_EAX, IA32_EDX),
2001 insn->off + 4);
2002 if (dstk)
2003 EMIT3(0x89,
2004 add_2reg(0x40, IA32_EBP,
2005 IA32_EDX),
2006 STACK_VAR(dst_hi));
2007 else
2008 EMIT2(0x89,
2009 add_2reg(0xC0, dst_hi, IA32_EDX));
2010 break;
2011 default:
2012 break;
2013 }
2014 break;
2015 /* call */
2016 case BPF_JMP | BPF_CALL:
2017 {
2018 const u8 *r1 = bpf2ia32[BPF_REG_1];
2019 const u8 *r2 = bpf2ia32[BPF_REG_2];
2020 const u8 *r3 = bpf2ia32[BPF_REG_3];
2021 const u8 *r4 = bpf2ia32[BPF_REG_4];
2022 const u8 *r5 = bpf2ia32[BPF_REG_5];
2023
2024 if (insn->src_reg == BPF_PSEUDO_CALL)
2025 goto notyet;
2026
2027 func = (u8 *) __bpf_call_base + imm32;
2028 jmp_offset = func - (image + addrs[i]);
2029
2030 if (!imm32 || !is_simm32(jmp_offset)) {
2031 pr_err("unsupported BPF func %d addr %p image %p\n",
2032 imm32, func, image);
2033 return -EINVAL;
2034 }
2035
2036 /* mov eax,dword ptr [ebp+off] */
2037 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2038 STACK_VAR(r1[0]));
2039 /* mov edx,dword ptr [ebp+off] */
2040 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2041 STACK_VAR(r1[1]));
2042
2043 emit_push_r64(r5, &prog);
2044 emit_push_r64(r4, &prog);
2045 emit_push_r64(r3, &prog);
2046 emit_push_r64(r2, &prog);
2047
2048 EMIT1_off32(0xE8, jmp_offset + 9);
2049
2050 /* mov dword ptr [ebp+off],eax */
2051 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
2052 STACK_VAR(r0[0]));
2053 /* mov dword ptr [ebp+off],edx */
2054 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2055 STACK_VAR(r0[1]));
2056
2057 /* add esp,32 */
2058 EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
2059 break;
2060 }
2061 case BPF_JMP | BPF_TAIL_CALL:
2062 emit_bpf_tail_call(&prog);
2063 break;
2064
2065 /* cond jump */
2066 case BPF_JMP | BPF_JEQ | BPF_X:
2067 case BPF_JMP | BPF_JNE | BPF_X:
2068 case BPF_JMP | BPF_JGT | BPF_X:
2069 case BPF_JMP | BPF_JLT | BPF_X:
2070 case BPF_JMP | BPF_JGE | BPF_X:
2071 case BPF_JMP | BPF_JLE | BPF_X:
2072 case BPF_JMP | BPF_JSGT | BPF_X:
2073 case BPF_JMP | BPF_JSLE | BPF_X:
2074 case BPF_JMP | BPF_JSLT | BPF_X:
2075 case BPF_JMP | BPF_JSGE | BPF_X: {
2076 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2077 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2078 u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2079 u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2080
2081 if (dstk) {
2082 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2083 STACK_VAR(dst_lo));
2084 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2085 STACK_VAR(dst_hi));
2086 }
2087
2088 if (sstk) {
2089 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2090 STACK_VAR(src_lo));
2091 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
2092 STACK_VAR(src_hi));
2093 }
2094
2095 /* cmp dreg_hi,sreg_hi */
2096 EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2097 EMIT2(IA32_JNE, 2);
2098 /* cmp dreg_lo,sreg_lo */
2099 EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2100 goto emit_cond_jmp;
2101 }
2102 case BPF_JMP | BPF_JSET | BPF_X: {
2103 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2104 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2105 u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2106 u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2107
2108 if (dstk) {
2109 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2110 STACK_VAR(dst_lo));
2111 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2112 STACK_VAR(dst_hi));
2113 }
2114
2115 if (sstk) {
2116 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2117 STACK_VAR(src_lo));
2118 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
2119 STACK_VAR(src_hi));
2120 }
2121 /* and dreg_lo,sreg_lo */
2122 EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2123 /* and dreg_hi,sreg_hi */
2124 EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2125 /* or dreg_lo,dreg_hi */
2126 EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2127 goto emit_cond_jmp;
2128 }
2129 case BPF_JMP | BPF_JSET | BPF_K: {
2130 u32 hi;
2131 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2132 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2133 u8 sreg_lo = IA32_ECX;
2134 u8 sreg_hi = IA32_EBX;
2135
2136 if (dstk) {
2137 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2138 STACK_VAR(dst_lo));
2139 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2140 STACK_VAR(dst_hi));
2141 }
2142 hi = imm32 & (1<<31) ? (u32)~0 : 0;
2143
2144 /* mov ecx,imm32 */
2145 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2146 /* mov ebx,imm32 */
2147 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2148
2149 /* and dreg_lo,sreg_lo */
2150 EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2151 /* and dreg_hi,sreg_hi */
2152 EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2153 /* or dreg_lo,dreg_hi */
2154 EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2155 goto emit_cond_jmp;
2156 }
2157 case BPF_JMP | BPF_JEQ | BPF_K:
2158 case BPF_JMP | BPF_JNE | BPF_K:
2159 case BPF_JMP | BPF_JGT | BPF_K:
2160 case BPF_JMP | BPF_JLT | BPF_K:
2161 case BPF_JMP | BPF_JGE | BPF_K:
2162 case BPF_JMP | BPF_JLE | BPF_K:
2163 case BPF_JMP | BPF_JSGT | BPF_K:
2164 case BPF_JMP | BPF_JSLE | BPF_K:
2165 case BPF_JMP | BPF_JSLT | BPF_K:
2166 case BPF_JMP | BPF_JSGE | BPF_K: {
2167 u32 hi;
2168 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2169 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2170 u8 sreg_lo = IA32_ECX;
2171 u8 sreg_hi = IA32_EBX;
2172
2173 if (dstk) {
2174 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2175 STACK_VAR(dst_lo));
2176 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2177 STACK_VAR(dst_hi));
2178 }
2179
2180 hi = imm32 & (1<<31) ? (u32)~0 : 0;
2181 /* mov ecx,imm32 */
2182 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2183 /* mov ebx,imm32 */
2184 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2185
2186 /* cmp dreg_hi,sreg_hi */
2187 EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2188 EMIT2(IA32_JNE, 2);
2189 /* cmp dreg_lo,sreg_lo */
2190 EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2191
2192emit_cond_jmp: /* Convert BPF opcode to x86 */
2193 switch (BPF_OP(code)) {
2194 case BPF_JEQ:
2195 jmp_cond = IA32_JE;
2196 break;
2197 case BPF_JSET:
2198 case BPF_JNE:
2199 jmp_cond = IA32_JNE;
2200 break;
2201 case BPF_JGT:
2202 /* GT is unsigned '>', JA in x86 */
2203 jmp_cond = IA32_JA;
2204 break;
2205 case BPF_JLT:
2206 /* LT is unsigned '<', JB in x86 */
2207 jmp_cond = IA32_JB;
2208 break;
2209 case BPF_JGE:
2210 /* GE is unsigned '>=', JAE in x86 */
2211 jmp_cond = IA32_JAE;
2212 break;
2213 case BPF_JLE:
2214 /* LE is unsigned '<=', JBE in x86 */
2215 jmp_cond = IA32_JBE;
2216 break;
2217 case BPF_JSGT:
2218 /* Signed '>', GT in x86 */
2219 jmp_cond = IA32_JG;
2220 break;
2221 case BPF_JSLT:
2222 /* Signed '<', LT in x86 */
2223 jmp_cond = IA32_JL;
2224 break;
2225 case BPF_JSGE:
2226 /* Signed '>=', GE in x86 */
2227 jmp_cond = IA32_JGE;
2228 break;
2229 case BPF_JSLE:
2230 /* Signed '<=', LE in x86 */
2231 jmp_cond = IA32_JLE;
2232 break;
2233 default: /* to silence GCC warning */
2234 return -EFAULT;
2235 }
2236 jmp_offset = addrs[i + insn->off] - addrs[i];
2237 if (is_imm8(jmp_offset)) {
2238 EMIT2(jmp_cond, jmp_offset);
2239 } else if (is_simm32(jmp_offset)) {
2240 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2241 } else {
2242 pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2243 return -EFAULT;
2244 }
2245
2246 break;
2247 }
2248 case BPF_JMP | BPF_JA:
2249 if (insn->off == -1)
2250 /* -1 jmp instructions will always jump
2251 * backwards two bytes. Explicitly handling
2252 * this case avoids wasting too many passes
2253 * when there are long sequences of replaced
2254 * dead code.
2255 */
2256 jmp_offset = -2;
2257 else
2258 jmp_offset = addrs[i + insn->off] - addrs[i];
2259
2260 if (!jmp_offset)
2261 /* Optimize out nop jumps */
2262 break;
2263emit_jmp:
2264 if (is_imm8(jmp_offset)) {
2265 EMIT2(0xEB, jmp_offset);
2266 } else if (is_simm32(jmp_offset)) {
2267 EMIT1_off32(0xE9, jmp_offset);
2268 } else {
2269 pr_err("jmp gen bug %llx\n", jmp_offset);
2270 return -EFAULT;
2271 }
2272 break;
2273 /* STX XADD: lock *(u32 *)(dst + off) += src */
2274 case BPF_STX | BPF_XADD | BPF_W:
2275 /* STX XADD: lock *(u64 *)(dst + off) += src */
2276 case BPF_STX | BPF_XADD | BPF_DW:
2277 goto notyet;
2278 case BPF_JMP | BPF_EXIT:
2279 if (seen_exit) {
2280 jmp_offset = ctx->cleanup_addr - addrs[i];
2281 goto emit_jmp;
2282 }
2283 seen_exit = true;
2284 /* Update cleanup_addr */
2285 ctx->cleanup_addr = proglen;
2286 emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2287 break;
2288notyet:
2289 pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2290 return -EFAULT;
2291 default:
2292 /*
2293 * This error will be seen if new instruction was added
2294 * to interpreter, but not to JIT or if there is junk in
2295 * bpf_prog
2296 */
2297 pr_err("bpf_jit: unknown opcode %02x\n", code);
2298 return -EINVAL;
2299 }
2300
2301 ilen = prog - temp;
2302 if (ilen > BPF_MAX_INSN_SIZE) {
2303 pr_err("bpf_jit: fatal insn size error\n");
2304 return -EFAULT;
2305 }
2306
2307 if (image) {
2308 if (unlikely(proglen + ilen > oldproglen)) {
2309 pr_err("bpf_jit: fatal error\n");
2310 return -EFAULT;
2311 }
2312 memcpy(image + proglen, temp, ilen);
2313 }
2314 proglen += ilen;
2315 addrs[i] = proglen;
2316 prog = temp;
2317 }
2318 return proglen;
2319}
2320
2321struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2322{
2323 struct bpf_binary_header *header = NULL;
2324 struct bpf_prog *tmp, *orig_prog = prog;
2325 int proglen, oldproglen = 0;
2326 struct jit_context ctx = {};
2327 bool tmp_blinded = false;
2328 u8 *image = NULL;
2329 int *addrs;
2330 int pass;
2331 int i;
2332
2333 if (!prog->jit_requested)
2334 return orig_prog;
2335
2336 tmp = bpf_jit_blind_constants(prog);
2337 /*
2338 * If blinding was requested and we failed during blinding,
2339 * we must fall back to the interpreter.
2340 */
2341 if (IS_ERR(tmp))
2342 return orig_prog;
2343 if (tmp != prog) {
2344 tmp_blinded = true;
2345 prog = tmp;
2346 }
2347
2348 addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
2349 if (!addrs) {
2350 prog = orig_prog;
2351 goto out;
2352 }
2353
2354 /*
2355 * Before first pass, make a rough estimation of addrs[]
2356 * each BPF instruction is translated to less than 64 bytes
2357 */
2358 for (proglen = 0, i = 0; i < prog->len; i++) {
2359 proglen += 64;
2360 addrs[i] = proglen;
2361 }
2362 ctx.cleanup_addr = proglen;
2363
2364 /*
2365 * JITed image shrinks with every pass and the loop iterates
2366 * until the image stops shrinking. Very large BPF programs
2367 * may converge on the last pass. In such case do one more
2368 * pass to emit the final image.
2369 */
2370 for (pass = 0; pass < 20 || image; pass++) {
2371 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2372 if (proglen <= 0) {
2373out_image:
2374 image = NULL;
2375 if (header)
2376 bpf_jit_binary_free(header);
2377 prog = orig_prog;
2378 goto out_addrs;
2379 }
2380 if (image) {
2381 if (proglen != oldproglen) {
2382 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2383 proglen, oldproglen);
2384 goto out_image;
2385 }
2386 break;
2387 }
2388 if (proglen == oldproglen) {
2389 header = bpf_jit_binary_alloc(proglen, &image,
2390 1, jit_fill_hole);
2391 if (!header) {
2392 prog = orig_prog;
2393 goto out_addrs;
2394 }
2395 }
2396 oldproglen = proglen;
2397 cond_resched();
2398 }
2399
2400 if (bpf_jit_enable > 1)
2401 bpf_jit_dump(prog->len, proglen, pass + 1, image);
2402
2403 if (image) {
2404 bpf_jit_binary_lock_ro(header);
2405 prog->bpf_func = (void *)image;
2406 prog->jited = 1;
2407 prog->jited_len = proglen;
2408 } else {
2409 prog = orig_prog;
2410 }
2411
2412out_addrs:
2413 kfree(addrs);
2414out:
2415 if (tmp_blinded)
2416 bpf_jit_prog_release_other(prog, prog == orig_prog ?
2417 tmp : orig_prog);
2418 return prog;
2419}