diff options
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 233 |
1 files changed, 133 insertions, 100 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b725154182cc..ac4df93d4105 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | /* bpf_jit_comp.c : BPF JIT compiler | 1 | /* |
| 2 | * bpf_jit_comp.c: BPF JIT compiler | ||
| 2 | * | 3 | * |
| 3 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) | 4 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) |
| 4 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | 5 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
| @@ -17,7 +18,7 @@ | |||
| 17 | #include <asm/nospec-branch.h> | 18 | #include <asm/nospec-branch.h> |
| 18 | 19 | ||
| 19 | /* | 20 | /* |
| 20 | * assembly code in arch/x86/net/bpf_jit.S | 21 | * Assembly code in arch/x86/net/bpf_jit.S |
| 21 | */ | 22 | */ |
| 22 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | 23 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; |
| 23 | extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; | 24 | extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; |
| @@ -45,14 +46,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | |||
| 45 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | 46 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) |
| 46 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | 47 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) |
| 47 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | 48 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) |
| 49 | |||
| 48 | #define EMIT1_off32(b1, off) \ | 50 | #define EMIT1_off32(b1, off) \ |
| 49 | do {EMIT1(b1); EMIT(off, 4); } while (0) | 51 | do { EMIT1(b1); EMIT(off, 4); } while (0) |
| 50 | #define EMIT2_off32(b1, b2, off) \ | 52 | #define EMIT2_off32(b1, b2, off) \ |
| 51 | do {EMIT2(b1, b2); EMIT(off, 4); } while (0) | 53 | do { EMIT2(b1, b2); EMIT(off, 4); } while (0) |
| 52 | #define EMIT3_off32(b1, b2, b3, off) \ | 54 | #define EMIT3_off32(b1, b2, b3, off) \ |
| 53 | do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) | 55 | do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) |
| 54 | #define EMIT4_off32(b1, b2, b3, b4, off) \ | 56 | #define EMIT4_off32(b1, b2, b3, b4, off) \ |
| 55 | do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) | 57 | do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) |
| 56 | 58 | ||
| 57 | static bool is_imm8(int value) | 59 | static bool is_imm8(int value) |
| 58 | { | 60 | { |
| @@ -70,9 +72,10 @@ static bool is_uimm32(u64 value) | |||
| 70 | } | 72 | } |
| 71 | 73 | ||
| 72 | /* mov dst, src */ | 74 | /* mov dst, src */ |
| 73 | #define EMIT_mov(DST, SRC) \ | 75 | #define EMIT_mov(DST, SRC) \ |
| 74 | do {if (DST != SRC) \ | 76 | do { \ |
| 75 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ | 77 | if (DST != SRC) \ |
| 78 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ | ||
| 76 | } while (0) | 79 | } while (0) |
| 77 | 80 | ||
| 78 | static int bpf_size_to_x86_bytes(int bpf_size) | 81 | static int bpf_size_to_x86_bytes(int bpf_size) |
| @@ -89,7 +92,8 @@ static int bpf_size_to_x86_bytes(int bpf_size) | |||
| 89 | return 0; | 92 | return 0; |
| 90 | } | 93 | } |
| 91 | 94 | ||
| 92 | /* list of x86 cond jumps opcodes (. + s8) | 95 | /* |
| 96 | * List of x86 cond jumps opcodes (. + s8) | ||
| 93 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | 97 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) |
| 94 | */ | 98 | */ |
| 95 | #define X86_JB 0x72 | 99 | #define X86_JB 0x72 |
| @@ -106,35 +110,37 @@ static int bpf_size_to_x86_bytes(int bpf_size) | |||
| 106 | #define CHOOSE_LOAD_FUNC(K, func) \ | 110 | #define CHOOSE_LOAD_FUNC(K, func) \ |
| 107 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | 111 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) |
| 108 | 112 | ||
| 109 | /* pick a register outside of BPF range for JIT internal work */ | 113 | /* Pick a register outside of BPF range for JIT internal work */ |
| 110 | #define AUX_REG (MAX_BPF_JIT_REG + 1) | 114 | #define AUX_REG (MAX_BPF_JIT_REG + 1) |
| 111 | 115 | ||
| 112 | /* The following table maps BPF registers to x64 registers. | 116 | /* |
| 117 | * The following table maps BPF registers to x86-64 registers. | ||
| 113 | * | 118 | * |
| 114 | * x64 register r12 is unused, since if used as base address | 119 | * x86-64 register R12 is unused, since if used as base address |
| 115 | * register in load/store instructions, it always needs an | 120 | * register in load/store instructions, it always needs an |
| 116 | * extra byte of encoding and is callee saved. | 121 | * extra byte of encoding and is callee saved. |
| 117 | * | 122 | * |
| 118 | * r9 caches skb->len - skb->data_len | 123 | * R9 caches skb->len - skb->data_len |
| 119 | * r10 caches skb->data, and used for blinding (if enabled) | 124 | * R10 caches skb->data, and used for blinding (if enabled) |
| 120 | */ | 125 | */ |
| 121 | static const int reg2hex[] = { | 126 | static const int reg2hex[] = { |
| 122 | [BPF_REG_0] = 0, /* rax */ | 127 | [BPF_REG_0] = 0, /* RAX */ |
| 123 | [BPF_REG_1] = 7, /* rdi */ | 128 | [BPF_REG_1] = 7, /* RDI */ |
| 124 | [BPF_REG_2] = 6, /* rsi */ | 129 | [BPF_REG_2] = 6, /* RSI */ |
| 125 | [BPF_REG_3] = 2, /* rdx */ | 130 | [BPF_REG_3] = 2, /* RDX */ |
| 126 | [BPF_REG_4] = 1, /* rcx */ | 131 | [BPF_REG_4] = 1, /* RCX */ |
| 127 | [BPF_REG_5] = 0, /* r8 */ | 132 | [BPF_REG_5] = 0, /* R8 */ |
| 128 | [BPF_REG_6] = 3, /* rbx callee saved */ | 133 | [BPF_REG_6] = 3, /* RBX callee saved */ |
| 129 | [BPF_REG_7] = 5, /* r13 callee saved */ | 134 | [BPF_REG_7] = 5, /* R13 callee saved */ |
| 130 | [BPF_REG_8] = 6, /* r14 callee saved */ | 135 | [BPF_REG_8] = 6, /* R14 callee saved */ |
| 131 | [BPF_REG_9] = 7, /* r15 callee saved */ | 136 | [BPF_REG_9] = 7, /* R15 callee saved */ |
| 132 | [BPF_REG_FP] = 5, /* rbp readonly */ | 137 | [BPF_REG_FP] = 5, /* RBP readonly */ |
| 133 | [BPF_REG_AX] = 2, /* r10 temp register */ | 138 | [BPF_REG_AX] = 2, /* R10 temp register */ |
| 134 | [AUX_REG] = 3, /* r11 temp register */ | 139 | [AUX_REG] = 3, /* R11 temp register */ |
| 135 | }; | 140 | }; |
| 136 | 141 | ||
| 137 | /* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 | 142 | /* |
| 143 | * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 | ||
| 138 | * which need extra byte of encoding. | 144 | * which need extra byte of encoding. |
| 139 | * rax,rcx,...,rbp have simpler encoding | 145 | * rax,rcx,...,rbp have simpler encoding |
| 140 | */ | 146 | */ |
| @@ -153,7 +159,7 @@ static bool is_axreg(u32 reg) | |||
| 153 | return reg == BPF_REG_0; | 159 | return reg == BPF_REG_0; |
| 154 | } | 160 | } |
| 155 | 161 | ||
| 156 | /* add modifiers if 'reg' maps to x64 registers r8..r15 */ | 162 | /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ |
| 157 | static u8 add_1mod(u8 byte, u32 reg) | 163 | static u8 add_1mod(u8 byte, u32 reg) |
| 158 | { | 164 | { |
| 159 | if (is_ereg(reg)) | 165 | if (is_ereg(reg)) |
| @@ -170,13 +176,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2) | |||
| 170 | return byte; | 176 | return byte; |
| 171 | } | 177 | } |
| 172 | 178 | ||
| 173 | /* encode 'dst_reg' register into x64 opcode 'byte' */ | 179 | /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ |
| 174 | static u8 add_1reg(u8 byte, u32 dst_reg) | 180 | static u8 add_1reg(u8 byte, u32 dst_reg) |
| 175 | { | 181 | { |
| 176 | return byte + reg2hex[dst_reg]; | 182 | return byte + reg2hex[dst_reg]; |
| 177 | } | 183 | } |
| 178 | 184 | ||
| 179 | /* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ | 185 | /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ |
| 180 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | 186 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) |
| 181 | { | 187 | { |
| 182 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); | 188 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); |
| @@ -184,27 +190,28 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | |||
| 184 | 190 | ||
| 185 | static void jit_fill_hole(void *area, unsigned int size) | 191 | static void jit_fill_hole(void *area, unsigned int size) |
| 186 | { | 192 | { |
| 187 | /* fill whole space with int3 instructions */ | 193 | /* Fill whole space with INT3 instructions */ |
| 188 | memset(area, 0xcc, size); | 194 | memset(area, 0xcc, size); |
| 189 | } | 195 | } |
| 190 | 196 | ||
| 191 | struct jit_context { | 197 | struct jit_context { |
| 192 | int cleanup_addr; /* epilogue code offset */ | 198 | int cleanup_addr; /* Epilogue code offset */ |
| 193 | bool seen_ld_abs; | 199 | bool seen_ld_abs; |
| 194 | bool seen_ax_reg; | 200 | bool seen_ax_reg; |
| 195 | }; | 201 | }; |
| 196 | 202 | ||
| 197 | /* maximum number of bytes emitted while JITing one eBPF insn */ | 203 | /* Maximum number of bytes emitted while JITing one eBPF insn */ |
| 198 | #define BPF_MAX_INSN_SIZE 128 | 204 | #define BPF_MAX_INSN_SIZE 128 |
| 199 | #define BPF_INSN_SAFETY 64 | 205 | #define BPF_INSN_SAFETY 64 |
| 200 | 206 | ||
| 201 | #define AUX_STACK_SPACE \ | 207 | #define AUX_STACK_SPACE \ |
| 202 | (32 /* space for rbx, r13, r14, r15 */ + \ | 208 | (32 /* Space for RBX, R13, R14, R15 */ + \ |
| 203 | 8 /* space for skb_copy_bits() buffer */) | 209 | 8 /* Space for skb_copy_bits() buffer */) |
| 204 | 210 | ||
| 205 | #define PROLOGUE_SIZE 37 | 211 | #define PROLOGUE_SIZE 37 |
| 206 | 212 | ||
| 207 | /* emit x64 prologue code for BPF program and check it's size. | 213 | /* |
| 214 | * Emit x86-64 prologue code for BPF program and check its size. | ||
| 208 | * bpf_tail_call helper will skip it while jumping into another program | 215 | * bpf_tail_call helper will skip it while jumping into another program |
| 209 | */ | 216 | */ |
| 210 | static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | 217 | static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) |
| @@ -212,8 +219,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 212 | u8 *prog = *pprog; | 219 | u8 *prog = *pprog; |
| 213 | int cnt = 0; | 220 | int cnt = 0; |
| 214 | 221 | ||
| 215 | EMIT1(0x55); /* push rbp */ | 222 | /* push rbp */ |
| 216 | EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ | 223 | EMIT1(0x55); |
| 224 | |||
| 225 | /* mov rbp,rsp */ | ||
| 226 | EMIT3(0x48, 0x89, 0xE5); | ||
| 217 | 227 | ||
| 218 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ | 228 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ |
| 219 | EMIT3_off32(0x48, 0x81, 0xEC, | 229 | EMIT3_off32(0x48, 0x81, 0xEC, |
| @@ -222,14 +232,15 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 222 | /* sub rbp, AUX_STACK_SPACE */ | 232 | /* sub rbp, AUX_STACK_SPACE */ |
| 223 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); | 233 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); |
| 224 | 234 | ||
| 225 | /* all classic BPF filters use R6(rbx) save it */ | 235 | /* All classic BPF filters use R6(rbx) save it */ |
| 226 | 236 | ||
| 227 | /* mov qword ptr [rbp+0],rbx */ | 237 | /* mov qword ptr [rbp+0],rbx */ |
| 228 | EMIT4(0x48, 0x89, 0x5D, 0); | 238 | EMIT4(0x48, 0x89, 0x5D, 0); |
| 229 | 239 | ||
| 230 | /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 | 240 | /* |
| 231 | * as temporary, so all tcpdump filters need to spill/fill R7(r13) and | 241 | * bpf_convert_filter() maps classic BPF register X to R7 and uses R8 |
| 232 | * R8(r14). R9(r15) spill could be made conditional, but there is only | 242 | * as temporary, so all tcpdump filters need to spill/fill R7(R13) and |
| 243 | * R8(R14). R9(R15) spill could be made conditional, but there is only | ||
| 233 | * one 'bpf_error' return path out of helper functions inside bpf_jit.S | 244 | * one 'bpf_error' return path out of helper functions inside bpf_jit.S |
| 234 | * The overhead of extra spill is negligible for any filter other | 245 | * The overhead of extra spill is negligible for any filter other |
| 235 | * than synthetic ones. Therefore not worth adding complexity. | 246 | * than synthetic ones. Therefore not worth adding complexity. |
| @@ -243,9 +254,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 243 | EMIT4(0x4C, 0x89, 0x7D, 24); | 254 | EMIT4(0x4C, 0x89, 0x7D, 24); |
| 244 | 255 | ||
| 245 | if (!ebpf_from_cbpf) { | 256 | if (!ebpf_from_cbpf) { |
| 246 | /* Clear the tail call counter (tail_call_cnt): for eBPF tail | 257 | /* |
| 258 | * Clear the tail call counter (tail_call_cnt): for eBPF tail | ||
| 247 | * calls we need to reset the counter to 0. It's done in two | 259 | * calls we need to reset the counter to 0. It's done in two |
| 248 | * instructions, resetting rax register to 0, and moving it | 260 | * instructions, resetting RAX register to 0, and moving it |
| 249 | * to the counter location. | 261 | * to the counter location. |
| 250 | */ | 262 | */ |
| 251 | 263 | ||
| @@ -260,7 +272,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 260 | *pprog = prog; | 272 | *pprog = prog; |
| 261 | } | 273 | } |
| 262 | 274 | ||
| 263 | /* generate the following code: | 275 | /* |
| 276 | * Generate the following code: | ||
| 277 | * | ||
| 264 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... | 278 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... |
| 265 | * if (index >= array->map.max_entries) | 279 | * if (index >= array->map.max_entries) |
| 266 | * goto out; | 280 | * goto out; |
| @@ -278,23 +292,26 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 278 | int label1, label2, label3; | 292 | int label1, label2, label3; |
| 279 | int cnt = 0; | 293 | int cnt = 0; |
| 280 | 294 | ||
| 281 | /* rdi - pointer to ctx | 295 | /* |
| 296 | * rdi - pointer to ctx | ||
| 282 | * rsi - pointer to bpf_array | 297 | * rsi - pointer to bpf_array |
| 283 | * rdx - index in bpf_array | 298 | * rdx - index in bpf_array |
| 284 | */ | 299 | */ |
| 285 | 300 | ||
| 286 | /* if (index >= array->map.max_entries) | 301 | /* |
| 287 | * goto out; | 302 | * if (index >= array->map.max_entries) |
| 303 | * goto out; | ||
| 288 | */ | 304 | */ |
| 289 | EMIT2(0x89, 0xD2); /* mov edx, edx */ | 305 | EMIT2(0x89, 0xD2); /* mov edx, edx */ |
| 290 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ | 306 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ |
| 291 | offsetof(struct bpf_array, map.max_entries)); | 307 | offsetof(struct bpf_array, map.max_entries)); |
| 292 | #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ | 308 | #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */ |
| 293 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ | 309 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ |
| 294 | label1 = cnt; | 310 | label1 = cnt; |
| 295 | 311 | ||
| 296 | /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) | 312 | /* |
| 297 | * goto out; | 313 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) |
| 314 | * goto out; | ||
| 298 | */ | 315 | */ |
| 299 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ | 316 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ |
| 300 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ | 317 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ |
| @@ -308,8 +325,9 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 308 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ | 325 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ |
| 309 | offsetof(struct bpf_array, ptrs)); | 326 | offsetof(struct bpf_array, ptrs)); |
| 310 | 327 | ||
| 311 | /* if (prog == NULL) | 328 | /* |
| 312 | * goto out; | 329 | * if (prog == NULL) |
| 330 | * goto out; | ||
| 313 | */ | 331 | */ |
| 314 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ | 332 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ |
| 315 | #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) | 333 | #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) |
| @@ -321,7 +339,8 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 321 | offsetof(struct bpf_prog, bpf_func)); | 339 | offsetof(struct bpf_prog, bpf_func)); |
| 322 | EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ | 340 | EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ |
| 323 | 341 | ||
| 324 | /* now we're ready to jump into next BPF program | 342 | /* |
| 343 | * Wow we're ready to jump into next BPF program | ||
| 325 | * rdi == ctx (1st arg) | 344 | * rdi == ctx (1st arg) |
| 326 | * rax == prog->bpf_func + prologue_size | 345 | * rax == prog->bpf_func + prologue_size |
| 327 | */ | 346 | */ |
| @@ -340,7 +359,8 @@ static void emit_load_skb_data_hlen(u8 **pprog) | |||
| 340 | u8 *prog = *pprog; | 359 | u8 *prog = *pprog; |
| 341 | int cnt = 0; | 360 | int cnt = 0; |
| 342 | 361 | ||
| 343 | /* r9d = skb->len - skb->data_len (headlen) | 362 | /* |
| 363 | * r9d = skb->len - skb->data_len (headlen) | ||
| 344 | * r10 = skb->data | 364 | * r10 = skb->data |
| 345 | */ | 365 | */ |
| 346 | /* mov %r9d, off32(%rdi) */ | 366 | /* mov %r9d, off32(%rdi) */ |
| @@ -361,7 +381,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | |||
| 361 | u8 b1, b2, b3; | 381 | u8 b1, b2, b3; |
| 362 | int cnt = 0; | 382 | int cnt = 0; |
| 363 | 383 | ||
| 364 | /* optimization: if imm32 is positive, use 'mov %eax, imm32' | 384 | /* |
| 385 | * Optimization: if imm32 is positive, use 'mov %eax, imm32' | ||
| 365 | * (which zero-extends imm32) to save 2 bytes. | 386 | * (which zero-extends imm32) to save 2 bytes. |
| 366 | */ | 387 | */ |
| 367 | if (sign_propagate && (s32)imm32 < 0) { | 388 | if (sign_propagate && (s32)imm32 < 0) { |
| @@ -373,7 +394,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | |||
| 373 | goto done; | 394 | goto done; |
| 374 | } | 395 | } |
| 375 | 396 | ||
| 376 | /* optimization: if imm32 is zero, use 'xor %eax, %eax' | 397 | /* |
| 398 | * Optimization: if imm32 is zero, use 'xor %eax, %eax' | ||
| 377 | * to save 3 bytes. | 399 | * to save 3 bytes. |
| 378 | */ | 400 | */ |
| 379 | if (imm32 == 0) { | 401 | if (imm32 == 0) { |
| @@ -400,7 +422,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, | |||
| 400 | int cnt = 0; | 422 | int cnt = 0; |
| 401 | 423 | ||
| 402 | if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { | 424 | if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { |
| 403 | /* For emitting plain u32, where sign bit must not be | 425 | /* |
| 426 | * For emitting plain u32, where sign bit must not be | ||
| 404 | * propagated LLVM tends to load imm64 over mov32 | 427 | * propagated LLVM tends to load imm64 over mov32 |
| 405 | * directly, so save couple of bytes by just doing | 428 | * directly, so save couple of bytes by just doing |
| 406 | * 'mov %eax, imm32' instead. | 429 | * 'mov %eax, imm32' instead. |
| @@ -525,7 +548,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 525 | else if (is_ereg(dst_reg)) | 548 | else if (is_ereg(dst_reg)) |
| 526 | EMIT1(add_1mod(0x40, dst_reg)); | 549 | EMIT1(add_1mod(0x40, dst_reg)); |
| 527 | 550 | ||
| 528 | /* b3 holds 'normal' opcode, b2 short form only valid | 551 | /* |
| 552 | * b3 holds 'normal' opcode, b2 short form only valid | ||
| 529 | * in case dst is eax/rax. | 553 | * in case dst is eax/rax. |
| 530 | */ | 554 | */ |
| 531 | switch (BPF_OP(insn->code)) { | 555 | switch (BPF_OP(insn->code)) { |
| @@ -593,7 +617,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 593 | /* mov rax, dst_reg */ | 617 | /* mov rax, dst_reg */ |
| 594 | EMIT_mov(BPF_REG_0, dst_reg); | 618 | EMIT_mov(BPF_REG_0, dst_reg); |
| 595 | 619 | ||
| 596 | /* xor edx, edx | 620 | /* |
| 621 | * xor edx, edx | ||
| 597 | * equivalent to 'xor rdx, rdx', but one byte less | 622 | * equivalent to 'xor rdx, rdx', but one byte less |
| 598 | */ | 623 | */ |
| 599 | EMIT2(0x31, 0xd2); | 624 | EMIT2(0x31, 0xd2); |
| @@ -655,7 +680,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 655 | } | 680 | } |
| 656 | break; | 681 | break; |
| 657 | } | 682 | } |
| 658 | /* shifts */ | 683 | /* Shifts */ |
| 659 | case BPF_ALU | BPF_LSH | BPF_K: | 684 | case BPF_ALU | BPF_LSH | BPF_K: |
| 660 | case BPF_ALU | BPF_RSH | BPF_K: | 685 | case BPF_ALU | BPF_RSH | BPF_K: |
| 661 | case BPF_ALU | BPF_ARSH | BPF_K: | 686 | case BPF_ALU | BPF_ARSH | BPF_K: |
| @@ -686,7 +711,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 686 | case BPF_ALU64 | BPF_RSH | BPF_X: | 711 | case BPF_ALU64 | BPF_RSH | BPF_X: |
| 687 | case BPF_ALU64 | BPF_ARSH | BPF_X: | 712 | case BPF_ALU64 | BPF_ARSH | BPF_X: |
| 688 | 713 | ||
| 689 | /* check for bad case when dst_reg == rcx */ | 714 | /* Check for bad case when dst_reg == rcx */ |
| 690 | if (dst_reg == BPF_REG_4) { | 715 | if (dst_reg == BPF_REG_4) { |
| 691 | /* mov r11, dst_reg */ | 716 | /* mov r11, dst_reg */ |
| 692 | EMIT_mov(AUX_REG, dst_reg); | 717 | EMIT_mov(AUX_REG, dst_reg); |
| @@ -724,13 +749,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 724 | case BPF_ALU | BPF_END | BPF_FROM_BE: | 749 | case BPF_ALU | BPF_END | BPF_FROM_BE: |
| 725 | switch (imm32) { | 750 | switch (imm32) { |
| 726 | case 16: | 751 | case 16: |
| 727 | /* emit 'ror %ax, 8' to swap lower 2 bytes */ | 752 | /* Emit 'ror %ax, 8' to swap lower 2 bytes */ |
| 728 | EMIT1(0x66); | 753 | EMIT1(0x66); |
| 729 | if (is_ereg(dst_reg)) | 754 | if (is_ereg(dst_reg)) |
| 730 | EMIT1(0x41); | 755 | EMIT1(0x41); |
| 731 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); | 756 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); |
| 732 | 757 | ||
| 733 | /* emit 'movzwl eax, ax' */ | 758 | /* Emit 'movzwl eax, ax' */ |
| 734 | if (is_ereg(dst_reg)) | 759 | if (is_ereg(dst_reg)) |
| 735 | EMIT3(0x45, 0x0F, 0xB7); | 760 | EMIT3(0x45, 0x0F, 0xB7); |
| 736 | else | 761 | else |
| @@ -738,7 +763,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 738 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); | 763 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); |
| 739 | break; | 764 | break; |
| 740 | case 32: | 765 | case 32: |
| 741 | /* emit 'bswap eax' to swap lower 4 bytes */ | 766 | /* Emit 'bswap eax' to swap lower 4 bytes */ |
| 742 | if (is_ereg(dst_reg)) | 767 | if (is_ereg(dst_reg)) |
| 743 | EMIT2(0x41, 0x0F); | 768 | EMIT2(0x41, 0x0F); |
| 744 | else | 769 | else |
| @@ -746,7 +771,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 746 | EMIT1(add_1reg(0xC8, dst_reg)); | 771 | EMIT1(add_1reg(0xC8, dst_reg)); |
| 747 | break; | 772 | break; |
| 748 | case 64: | 773 | case 64: |
| 749 | /* emit 'bswap rax' to swap 8 bytes */ | 774 | /* Emit 'bswap rax' to swap 8 bytes */ |
| 750 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, | 775 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, |
| 751 | add_1reg(0xC8, dst_reg)); | 776 | add_1reg(0xC8, dst_reg)); |
| 752 | break; | 777 | break; |
| @@ -756,7 +781,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 756 | case BPF_ALU | BPF_END | BPF_FROM_LE: | 781 | case BPF_ALU | BPF_END | BPF_FROM_LE: |
| 757 | switch (imm32) { | 782 | switch (imm32) { |
| 758 | case 16: | 783 | case 16: |
| 759 | /* emit 'movzwl eax, ax' to zero extend 16-bit | 784 | /* |
| 785 | * Emit 'movzwl eax, ax' to zero extend 16-bit | ||
| 760 | * into 64 bit | 786 | * into 64 bit |
| 761 | */ | 787 | */ |
| 762 | if (is_ereg(dst_reg)) | 788 | if (is_ereg(dst_reg)) |
| @@ -766,7 +792,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 766 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); | 792 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); |
| 767 | break; | 793 | break; |
| 768 | case 32: | 794 | case 32: |
| 769 | /* emit 'mov eax, eax' to clear upper 32-bits */ | 795 | /* Emit 'mov eax, eax' to clear upper 32-bits */ |
| 770 | if (is_ereg(dst_reg)) | 796 | if (is_ereg(dst_reg)) |
| 771 | EMIT1(0x45); | 797 | EMIT1(0x45); |
| 772 | EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); | 798 | EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); |
| @@ -809,9 +835,9 @@ st: if (is_imm8(insn->off)) | |||
| 809 | 835 | ||
| 810 | /* STX: *(u8*)(dst_reg + off) = src_reg */ | 836 | /* STX: *(u8*)(dst_reg + off) = src_reg */ |
| 811 | case BPF_STX | BPF_MEM | BPF_B: | 837 | case BPF_STX | BPF_MEM | BPF_B: |
| 812 | /* emit 'mov byte ptr [rax + off], al' */ | 838 | /* Emit 'mov byte ptr [rax + off], al' */ |
| 813 | if (is_ereg(dst_reg) || is_ereg(src_reg) || | 839 | if (is_ereg(dst_reg) || is_ereg(src_reg) || |
| 814 | /* have to add extra byte for x86 SIL, DIL regs */ | 840 | /* We have to add extra byte for x86 SIL, DIL regs */ |
| 815 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) | 841 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) |
| 816 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); | 842 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); |
| 817 | else | 843 | else |
| @@ -840,25 +866,26 @@ stx: if (is_imm8(insn->off)) | |||
| 840 | 866 | ||
| 841 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ | 867 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ |
| 842 | case BPF_LDX | BPF_MEM | BPF_B: | 868 | case BPF_LDX | BPF_MEM | BPF_B: |
| 843 | /* emit 'movzx rax, byte ptr [rax + off]' */ | 869 | /* Emit 'movzx rax, byte ptr [rax + off]' */ |
| 844 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); | 870 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); |
| 845 | goto ldx; | 871 | goto ldx; |
| 846 | case BPF_LDX | BPF_MEM | BPF_H: | 872 | case BPF_LDX | BPF_MEM | BPF_H: |
| 847 | /* emit 'movzx rax, word ptr [rax + off]' */ | 873 | /* Emit 'movzx rax, word ptr [rax + off]' */ |
| 848 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); | 874 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); |
| 849 | goto ldx; | 875 | goto ldx; |
| 850 | case BPF_LDX | BPF_MEM | BPF_W: | 876 | case BPF_LDX | BPF_MEM | BPF_W: |
| 851 | /* emit 'mov eax, dword ptr [rax+0x14]' */ | 877 | /* Emit 'mov eax, dword ptr [rax+0x14]' */ |
| 852 | if (is_ereg(dst_reg) || is_ereg(src_reg)) | 878 | if (is_ereg(dst_reg) || is_ereg(src_reg)) |
| 853 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); | 879 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); |
| 854 | else | 880 | else |
| 855 | EMIT1(0x8B); | 881 | EMIT1(0x8B); |
| 856 | goto ldx; | 882 | goto ldx; |
| 857 | case BPF_LDX | BPF_MEM | BPF_DW: | 883 | case BPF_LDX | BPF_MEM | BPF_DW: |
| 858 | /* emit 'mov rax, qword ptr [rax+0x14]' */ | 884 | /* Emit 'mov rax, qword ptr [rax+0x14]' */ |
| 859 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); | 885 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); |
| 860 | ldx: /* if insn->off == 0 we can save one extra byte, but | 886 | ldx: /* |
| 861 | * special case of x86 r13 which always needs an offset | 887 | * If insn->off == 0 we can save one extra byte, but |
| 888 | * special case of x86 R13 which always needs an offset | ||
| 862 | * is not worth the hassle | 889 | * is not worth the hassle |
| 863 | */ | 890 | */ |
| 864 | if (is_imm8(insn->off)) | 891 | if (is_imm8(insn->off)) |
| @@ -870,7 +897,7 @@ ldx: /* if insn->off == 0 we can save one extra byte, but | |||
| 870 | 897 | ||
| 871 | /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ | 898 | /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ |
| 872 | case BPF_STX | BPF_XADD | BPF_W: | 899 | case BPF_STX | BPF_XADD | BPF_W: |
| 873 | /* emit 'lock add dword ptr [rax + off], eax' */ | 900 | /* Emit 'lock add dword ptr [rax + off], eax' */ |
| 874 | if (is_ereg(dst_reg) || is_ereg(src_reg)) | 901 | if (is_ereg(dst_reg) || is_ereg(src_reg)) |
| 875 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); | 902 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); |
| 876 | else | 903 | else |
| @@ -897,14 +924,15 @@ xadd: if (is_imm8(insn->off)) | |||
| 897 | } else { | 924 | } else { |
| 898 | EMIT2(0x41, 0x52); /* push %r10 */ | 925 | EMIT2(0x41, 0x52); /* push %r10 */ |
| 899 | EMIT2(0x41, 0x51); /* push %r9 */ | 926 | EMIT2(0x41, 0x51); /* push %r9 */ |
| 900 | /* need to adjust jmp offset, since | 927 | /* |
| 928 | * We need to adjust jmp offset, since | ||
| 901 | * pop %r9, pop %r10 take 4 bytes after call insn | 929 | * pop %r9, pop %r10 take 4 bytes after call insn |
| 902 | */ | 930 | */ |
| 903 | jmp_offset += 4; | 931 | jmp_offset += 4; |
| 904 | } | 932 | } |
| 905 | } | 933 | } |
| 906 | if (!imm32 || !is_simm32(jmp_offset)) { | 934 | if (!imm32 || !is_simm32(jmp_offset)) { |
| 907 | pr_err("unsupported bpf func %d addr %p image %p\n", | 935 | pr_err("unsupported BPF func %d addr %p image %p\n", |
| 908 | imm32, func, image); | 936 | imm32, func, image); |
| 909 | return -EINVAL; | 937 | return -EINVAL; |
| 910 | } | 938 | } |
| @@ -970,7 +998,7 @@ xadd: if (is_imm8(insn->off)) | |||
| 970 | else | 998 | else |
| 971 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); | 999 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); |
| 972 | 1000 | ||
| 973 | emit_cond_jmp: /* convert BPF opcode to x86 */ | 1001 | emit_cond_jmp: /* Convert BPF opcode to x86 */ |
| 974 | switch (BPF_OP(insn->code)) { | 1002 | switch (BPF_OP(insn->code)) { |
| 975 | case BPF_JEQ: | 1003 | case BPF_JEQ: |
| 976 | jmp_cond = X86_JE; | 1004 | jmp_cond = X86_JE; |
| @@ -996,22 +1024,22 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ | |||
| 996 | jmp_cond = X86_JBE; | 1024 | jmp_cond = X86_JBE; |
| 997 | break; | 1025 | break; |
| 998 | case BPF_JSGT: | 1026 | case BPF_JSGT: |
| 999 | /* signed '>', GT in x86 */ | 1027 | /* Signed '>', GT in x86 */ |
| 1000 | jmp_cond = X86_JG; | 1028 | jmp_cond = X86_JG; |
| 1001 | break; | 1029 | break; |
| 1002 | case BPF_JSLT: | 1030 | case BPF_JSLT: |
| 1003 | /* signed '<', LT in x86 */ | 1031 | /* Signed '<', LT in x86 */ |
| 1004 | jmp_cond = X86_JL; | 1032 | jmp_cond = X86_JL; |
| 1005 | break; | 1033 | break; |
| 1006 | case BPF_JSGE: | 1034 | case BPF_JSGE: |
| 1007 | /* signed '>=', GE in x86 */ | 1035 | /* Signed '>=', GE in x86 */ |
| 1008 | jmp_cond = X86_JGE; | 1036 | jmp_cond = X86_JGE; |
| 1009 | break; | 1037 | break; |
| 1010 | case BPF_JSLE: | 1038 | case BPF_JSLE: |
| 1011 | /* signed '<=', LE in x86 */ | 1039 | /* Signed '<=', LE in x86 */ |
| 1012 | jmp_cond = X86_JLE; | 1040 | jmp_cond = X86_JLE; |
| 1013 | break; | 1041 | break; |
| 1014 | default: /* to silence gcc warning */ | 1042 | default: /* to silence GCC warning */ |
| 1015 | return -EFAULT; | 1043 | return -EFAULT; |
| 1016 | } | 1044 | } |
| 1017 | jmp_offset = addrs[i + insn->off] - addrs[i]; | 1045 | jmp_offset = addrs[i + insn->off] - addrs[i]; |
| @@ -1029,7 +1057,7 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ | |||
| 1029 | case BPF_JMP | BPF_JA: | 1057 | case BPF_JMP | BPF_JA: |
| 1030 | jmp_offset = addrs[i + insn->off] - addrs[i]; | 1058 | jmp_offset = addrs[i + insn->off] - addrs[i]; |
| 1031 | if (!jmp_offset) | 1059 | if (!jmp_offset) |
| 1032 | /* optimize out nop jumps */ | 1060 | /* Optimize out nop jumps */ |
| 1033 | break; | 1061 | break; |
| 1034 | emit_jmp: | 1062 | emit_jmp: |
| 1035 | if (is_imm8(jmp_offset)) { | 1063 | if (is_imm8(jmp_offset)) { |
| @@ -1051,7 +1079,7 @@ common_load: | |||
| 1051 | ctx->seen_ld_abs = seen_ld_abs = true; | 1079 | ctx->seen_ld_abs = seen_ld_abs = true; |
| 1052 | jmp_offset = func - (image + addrs[i]); | 1080 | jmp_offset = func - (image + addrs[i]); |
| 1053 | if (!func || !is_simm32(jmp_offset)) { | 1081 | if (!func || !is_simm32(jmp_offset)) { |
| 1054 | pr_err("unsupported bpf func %d addr %p image %p\n", | 1082 | pr_err("unsupported BPF func %d addr %p image %p\n", |
| 1055 | imm32, func, image); | 1083 | imm32, func, image); |
| 1056 | return -EINVAL; | 1084 | return -EINVAL; |
| 1057 | } | 1085 | } |
| @@ -1070,7 +1098,8 @@ common_load: | |||
| 1070 | EMIT2_off32(0x81, 0xC6, imm32); | 1098 | EMIT2_off32(0x81, 0xC6, imm32); |
| 1071 | } | 1099 | } |
| 1072 | } | 1100 | } |
| 1073 | /* skb pointer is in R6 (%rbx), it will be copied into | 1101 | /* |
| 1102 | * skb pointer is in R6 (%rbx), it will be copied into | ||
| 1074 | * %rdi if skb_copy_bits() call is necessary. | 1103 | * %rdi if skb_copy_bits() call is necessary. |
| 1075 | * sk_load_* helpers also use %r10 and %r9d. | 1104 | * sk_load_* helpers also use %r10 and %r9d. |
| 1076 | * See bpf_jit.S | 1105 | * See bpf_jit.S |
| @@ -1101,7 +1130,7 @@ common_load: | |||
| 1101 | goto emit_jmp; | 1130 | goto emit_jmp; |
| 1102 | } | 1131 | } |
| 1103 | seen_exit = true; | 1132 | seen_exit = true; |
| 1104 | /* update cleanup_addr */ | 1133 | /* Update cleanup_addr */ |
| 1105 | ctx->cleanup_addr = proglen; | 1134 | ctx->cleanup_addr = proglen; |
| 1106 | /* mov rbx, qword ptr [rbp+0] */ | 1135 | /* mov rbx, qword ptr [rbp+0] */ |
| 1107 | EMIT4(0x48, 0x8B, 0x5D, 0); | 1136 | EMIT4(0x48, 0x8B, 0x5D, 0); |
| @@ -1119,10 +1148,11 @@ common_load: | |||
| 1119 | break; | 1148 | break; |
| 1120 | 1149 | ||
| 1121 | default: | 1150 | default: |
| 1122 | /* By design x64 JIT should support all BPF instructions | 1151 | /* |
| 1152 | * By design x86-64 JIT should support all BPF instructions. | ||
| 1123 | * This error will be seen if new instruction was added | 1153 | * This error will be seen if new instruction was added |
| 1124 | * to interpreter, but not to JIT | 1154 | * to the interpreter, but not to the JIT, or if there is |
| 1125 | * or if there is junk in bpf_prog | 1155 | * junk in bpf_prog. |
| 1126 | */ | 1156 | */ |
| 1127 | pr_err("bpf_jit: unknown opcode %02x\n", insn->code); | 1157 | pr_err("bpf_jit: unknown opcode %02x\n", insn->code); |
| 1128 | return -EINVAL; | 1158 | return -EINVAL; |
| @@ -1174,7 +1204,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1174 | return orig_prog; | 1204 | return orig_prog; |
| 1175 | 1205 | ||
| 1176 | tmp = bpf_jit_blind_constants(prog); | 1206 | tmp = bpf_jit_blind_constants(prog); |
| 1177 | /* If blinding was requested and we failed during blinding, | 1207 | /* |
| 1208 | * If blinding was requested and we failed during blinding, | ||
| 1178 | * we must fall back to the interpreter. | 1209 | * we must fall back to the interpreter. |
| 1179 | */ | 1210 | */ |
| 1180 | if (IS_ERR(tmp)) | 1211 | if (IS_ERR(tmp)) |
| @@ -1208,8 +1239,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1208 | goto out_addrs; | 1239 | goto out_addrs; |
| 1209 | } | 1240 | } |
| 1210 | 1241 | ||
| 1211 | /* Before first pass, make a rough estimation of addrs[] | 1242 | /* |
| 1212 | * each bpf instruction is translated to less than 64 bytes | 1243 | * Before first pass, make a rough estimation of addrs[] |
| 1244 | * each BPF instruction is translated to less than 64 bytes | ||
| 1213 | */ | 1245 | */ |
| 1214 | for (proglen = 0, i = 0; i < prog->len; i++) { | 1246 | for (proglen = 0, i = 0; i < prog->len; i++) { |
| 1215 | proglen += 64; | 1247 | proglen += 64; |
| @@ -1218,10 +1250,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1218 | ctx.cleanup_addr = proglen; | 1250 | ctx.cleanup_addr = proglen; |
| 1219 | skip_init_addrs: | 1251 | skip_init_addrs: |
| 1220 | 1252 | ||
| 1221 | /* JITed image shrinks with every pass and the loop iterates | 1253 | /* |
| 1222 | * until the image stops shrinking. Very large bpf programs | 1254 | * JITed image shrinks with every pass and the loop iterates |
| 1255 | * until the image stops shrinking. Very large BPF programs | ||
| 1223 | * may converge on the last pass. In such case do one more | 1256 | * may converge on the last pass. In such case do one more |
| 1224 | * pass to emit the final image | 1257 | * pass to emit the final image. |
| 1225 | */ | 1258 | */ |
| 1226 | for (pass = 0; pass < 20 || image; pass++) { | 1259 | for (pass = 0; pass < 20 || image; pass++) { |
| 1227 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); | 1260 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); |
