diff options
Diffstat (limited to 'arch/x86/net/bpf_jit_comp32.c')
-rw-r--r-- | arch/x86/net/bpf_jit_comp32.c | 2419 |
1 files changed, 2419 insertions, 0 deletions
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..0cc04e30adc1 --- /dev/null +++ b/arch/x86/net/bpf_jit_comp32.c | |||
@@ -0,0 +1,2419 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Just-In-Time compiler for eBPF filters on IA32 (32bit x86) | ||
4 | * | ||
5 | * Author: Wang YanQing (udknight@gmail.com) | ||
6 | * The code based on code and ideas from: | ||
7 | * Eric Dumazet (eric.dumazet@gmail.com) | ||
8 | * and from: | ||
9 | * Shubham Bansal <illusionist.neo@gmail.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/netdevice.h> | ||
13 | #include <linux/filter.h> | ||
14 | #include <linux/if_vlan.h> | ||
15 | #include <asm/cacheflush.h> | ||
16 | #include <asm/set_memory.h> | ||
17 | #include <asm/nospec-branch.h> | ||
18 | #include <linux/bpf.h> | ||
19 | |||
20 | /* | ||
21 | * eBPF prog stack layout: | ||
22 | * | ||
23 | * high | ||
24 | * original ESP => +-----+ | ||
25 | * | | callee saved registers | ||
26 | * +-----+ | ||
27 | * | ... | eBPF JIT scratch space | ||
28 | * BPF_FP,IA32_EBP => +-----+ | ||
29 | * | ... | eBPF prog stack | ||
30 | * +-----+ | ||
31 | * |RSVD | JIT scratchpad | ||
32 | * current ESP => +-----+ | ||
33 | * | | | ||
34 | * | ... | Function call stack | ||
35 | * | | | ||
36 | * +-----+ | ||
37 | * low | ||
38 | * | ||
39 | * The callee saved registers: | ||
40 | * | ||
41 | * high | ||
42 | * original ESP => +------------------+ \ | ||
43 | * | ebp | | | ||
44 | * current EBP => +------------------+ } callee saved registers | ||
45 | * | ebx,esi,edi | | | ||
46 | * +------------------+ / | ||
47 | * low | ||
48 | */ | ||
49 | |||
50 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | ||
51 | { | ||
52 | if (len == 1) | ||
53 | *ptr = bytes; | ||
54 | else if (len == 2) | ||
55 | *(u16 *)ptr = bytes; | ||
56 | else { | ||
57 | *(u32 *)ptr = bytes; | ||
58 | barrier(); | ||
59 | } | ||
60 | return ptr + len; | ||
61 | } | ||
62 | |||
63 | #define EMIT(bytes, len) \ | ||
64 | do { prog = emit_code(prog, bytes, len); cnt += len; } while (0) | ||
65 | |||
66 | #define EMIT1(b1) EMIT(b1, 1) | ||
67 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | ||
68 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | ||
69 | #define EMIT4(b1, b2, b3, b4) \ | ||
70 | EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | ||
71 | |||
72 | #define EMIT1_off32(b1, off) \ | ||
73 | do { EMIT1(b1); EMIT(off, 4); } while (0) | ||
74 | #define EMIT2_off32(b1, b2, off) \ | ||
75 | do { EMIT2(b1, b2); EMIT(off, 4); } while (0) | ||
76 | #define EMIT3_off32(b1, b2, b3, off) \ | ||
77 | do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) | ||
78 | #define EMIT4_off32(b1, b2, b3, b4, off) \ | ||
79 | do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) | ||
80 | |||
81 | #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len) | ||
82 | |||
83 | static bool is_imm8(int value) | ||
84 | { | ||
85 | return value <= 127 && value >= -128; | ||
86 | } | ||
87 | |||
88 | static bool is_simm32(s64 value) | ||
89 | { | ||
90 | return value == (s64) (s32) value; | ||
91 | } | ||
92 | |||
93 | #define STACK_OFFSET(k) (k) | ||
94 | #define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */ | ||
95 | |||
96 | #define IA32_EAX (0x0) | ||
97 | #define IA32_EBX (0x3) | ||
98 | #define IA32_ECX (0x1) | ||
99 | #define IA32_EDX (0x2) | ||
100 | #define IA32_ESI (0x6) | ||
101 | #define IA32_EDI (0x7) | ||
102 | #define IA32_EBP (0x5) | ||
103 | #define IA32_ESP (0x4) | ||
104 | |||
105 | /* | ||
106 | * List of x86 cond jumps opcodes (. + s8) | ||
107 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | ||
108 | */ | ||
109 | #define IA32_JB 0x72 | ||
110 | #define IA32_JAE 0x73 | ||
111 | #define IA32_JE 0x74 | ||
112 | #define IA32_JNE 0x75 | ||
113 | #define IA32_JBE 0x76 | ||
114 | #define IA32_JA 0x77 | ||
115 | #define IA32_JL 0x7C | ||
116 | #define IA32_JGE 0x7D | ||
117 | #define IA32_JLE 0x7E | ||
118 | #define IA32_JG 0x7F | ||
119 | |||
120 | /* | ||
121 | * Map eBPF registers to IA32 32bit registers or stack scratch space. | ||
122 | * | ||
123 | * 1. All the registers, R0-R10, are mapped to scratch space on stack. | ||
124 | * 2. We need two 64 bit temp registers to do complex operations on eBPF | ||
125 | * registers. | ||
126 | * 3. For performance reason, the BPF_REG_AX for blinding constant, is | ||
127 | * mapped to real hardware register pair, IA32_ESI and IA32_EDI. | ||
128 | * | ||
129 | * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit | ||
130 | * registers, we have to map each eBPF registers with two IA32 32 bit regs | ||
131 | * or scratch memory space and we have to build eBPF 64 bit register from those. | ||
132 | * | ||
133 | * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers. | ||
134 | */ | ||
135 | static const u8 bpf2ia32[][2] = { | ||
136 | /* Return value from in-kernel function, and exit value from eBPF */ | ||
137 | [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)}, | ||
138 | |||
139 | /* The arguments from eBPF program to in-kernel function */ | ||
140 | /* Stored on stack scratch space */ | ||
141 | [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)}, | ||
142 | [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)}, | ||
143 | [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)}, | ||
144 | [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)}, | ||
145 | [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)}, | ||
146 | |||
147 | /* Callee saved registers that in-kernel function will preserve */ | ||
148 | /* Stored on stack scratch space */ | ||
149 | [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)}, | ||
150 | [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)}, | ||
151 | [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)}, | ||
152 | [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)}, | ||
153 | |||
154 | /* Read only Frame Pointer to access Stack */ | ||
155 | [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)}, | ||
156 | |||
157 | /* Temporary register for blinding constants. */ | ||
158 | [BPF_REG_AX] = {IA32_ESI, IA32_EDI}, | ||
159 | |||
160 | /* Tail call count. Stored on stack scratch space. */ | ||
161 | [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)}, | ||
162 | }; | ||
163 | |||
164 | #define dst_lo dst[0] | ||
165 | #define dst_hi dst[1] | ||
166 | #define src_lo src[0] | ||
167 | #define src_hi src[1] | ||
168 | |||
169 | #define STACK_ALIGNMENT 8 | ||
170 | /* | ||
171 | * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, | ||
172 | * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9, | ||
173 | * BPF_REG_FP, BPF_REG_AX and Tail call counts. | ||
174 | */ | ||
175 | #define SCRATCH_SIZE 96 | ||
176 | |||
177 | /* Total stack size used in JITed code */ | ||
178 | #define _STACK_SIZE (stack_depth + SCRATCH_SIZE) | ||
179 | |||
180 | #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) | ||
181 | |||
182 | /* Get the offset of eBPF REGISTERs stored on scratch space. */ | ||
183 | #define STACK_VAR(off) (off) | ||
184 | |||
185 | /* Encode 'dst_reg' register into IA32 opcode 'byte' */ | ||
186 | static u8 add_1reg(u8 byte, u32 dst_reg) | ||
187 | { | ||
188 | return byte + dst_reg; | ||
189 | } | ||
190 | |||
191 | /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */ | ||
192 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | ||
193 | { | ||
194 | return byte + dst_reg + (src_reg << 3); | ||
195 | } | ||
196 | |||
197 | static void jit_fill_hole(void *area, unsigned int size) | ||
198 | { | ||
199 | /* Fill whole space with int3 instructions */ | ||
200 | memset(area, 0xcc, size); | ||
201 | } | ||
202 | |||
203 | static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk, | ||
204 | u8 **pprog) | ||
205 | { | ||
206 | u8 *prog = *pprog; | ||
207 | int cnt = 0; | ||
208 | |||
209 | if (dstk) { | ||
210 | if (val == 0) { | ||
211 | /* xor eax,eax */ | ||
212 | EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX)); | ||
213 | /* mov dword ptr [ebp+off],eax */ | ||
214 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
215 | STACK_VAR(dst)); | ||
216 | } else { | ||
217 | EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP), | ||
218 | STACK_VAR(dst), val); | ||
219 | } | ||
220 | } else { | ||
221 | if (val == 0) | ||
222 | EMIT2(0x33, add_2reg(0xC0, dst, dst)); | ||
223 | else | ||
224 | EMIT2_off32(0xC7, add_1reg(0xC0, dst), | ||
225 | val); | ||
226 | } | ||
227 | *pprog = prog; | ||
228 | } | ||
229 | |||
230 | /* dst = imm (4 bytes)*/ | ||
231 | static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk, | ||
232 | bool sstk, u8 **pprog) | ||
233 | { | ||
234 | u8 *prog = *pprog; | ||
235 | int cnt = 0; | ||
236 | u8 sreg = sstk ? IA32_EAX : src; | ||
237 | |||
238 | if (sstk) | ||
239 | /* mov eax,dword ptr [ebp+off] */ | ||
240 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src)); | ||
241 | if (dstk) | ||
242 | /* mov dword ptr [ebp+off],eax */ | ||
243 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst)); | ||
244 | else | ||
245 | /* mov dst,sreg */ | ||
246 | EMIT2(0x89, add_2reg(0xC0, dst, sreg)); | ||
247 | |||
248 | *pprog = prog; | ||
249 | } | ||
250 | |||
251 | /* dst = src */ | ||
252 | static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[], | ||
253 | const u8 src[], bool dstk, | ||
254 | bool sstk, u8 **pprog) | ||
255 | { | ||
256 | emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog); | ||
257 | if (is64) | ||
258 | /* complete 8 byte move */ | ||
259 | emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog); | ||
260 | else | ||
261 | /* zero out high 4 bytes */ | ||
262 | emit_ia32_mov_i(dst_hi, 0, dstk, pprog); | ||
263 | } | ||
264 | |||
265 | /* Sign extended move */ | ||
266 | static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[], | ||
267 | const u32 val, bool dstk, u8 **pprog) | ||
268 | { | ||
269 | u32 hi = 0; | ||
270 | |||
271 | if (is64 && (val & (1<<31))) | ||
272 | hi = (u32)~0; | ||
273 | emit_ia32_mov_i(dst_lo, val, dstk, pprog); | ||
274 | emit_ia32_mov_i(dst_hi, hi, dstk, pprog); | ||
275 | } | ||
276 | |||
277 | /* | ||
278 | * ALU operation (32 bit) | ||
279 | * dst = dst * src | ||
280 | */ | ||
281 | static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk, | ||
282 | bool sstk, u8 **pprog) | ||
283 | { | ||
284 | u8 *prog = *pprog; | ||
285 | int cnt = 0; | ||
286 | u8 sreg = sstk ? IA32_ECX : src; | ||
287 | |||
288 | if (sstk) | ||
289 | /* mov ecx,dword ptr [ebp+off] */ | ||
290 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src)); | ||
291 | |||
292 | if (dstk) | ||
293 | /* mov eax,dword ptr [ebp+off] */ | ||
294 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
295 | else | ||
296 | /* mov eax,dst */ | ||
297 | EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX)); | ||
298 | |||
299 | |||
300 | EMIT2(0xF7, add_1reg(0xE0, sreg)); | ||
301 | |||
302 | if (dstk) | ||
303 | /* mov dword ptr [ebp+off],eax */ | ||
304 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
305 | STACK_VAR(dst)); | ||
306 | else | ||
307 | /* mov dst,eax */ | ||
308 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX)); | ||
309 | |||
310 | *pprog = prog; | ||
311 | } | ||
312 | |||
313 | static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, | ||
314 | bool dstk, u8 **pprog) | ||
315 | { | ||
316 | u8 *prog = *pprog; | ||
317 | int cnt = 0; | ||
318 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
319 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
320 | |||
321 | if (dstk && val != 64) { | ||
322 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
323 | STACK_VAR(dst_lo)); | ||
324 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
325 | STACK_VAR(dst_hi)); | ||
326 | } | ||
327 | switch (val) { | ||
328 | case 16: | ||
329 | /* | ||
330 | * Emit 'movzwl eax,ax' to zero extend 16-bit | ||
331 | * into 64 bit | ||
332 | */ | ||
333 | EMIT2(0x0F, 0xB7); | ||
334 | EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
335 | /* xor dreg_hi,dreg_hi */ | ||
336 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
337 | break; | ||
338 | case 32: | ||
339 | /* xor dreg_hi,dreg_hi */ | ||
340 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
341 | break; | ||
342 | case 64: | ||
343 | /* nop */ | ||
344 | break; | ||
345 | } | ||
346 | |||
347 | if (dstk && val != 64) { | ||
348 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
349 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
350 | STACK_VAR(dst_lo)); | ||
351 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
352 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
353 | STACK_VAR(dst_hi)); | ||
354 | } | ||
355 | *pprog = prog; | ||
356 | } | ||
357 | |||
358 | static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, | ||
359 | bool dstk, u8 **pprog) | ||
360 | { | ||
361 | u8 *prog = *pprog; | ||
362 | int cnt = 0; | ||
363 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
364 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
365 | |||
366 | if (dstk) { | ||
367 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
368 | STACK_VAR(dst_lo)); | ||
369 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
370 | STACK_VAR(dst_hi)); | ||
371 | } | ||
372 | switch (val) { | ||
373 | case 16: | ||
374 | /* Emit 'ror %ax, 8' to swap lower 2 bytes */ | ||
375 | EMIT1(0x66); | ||
376 | EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8); | ||
377 | |||
378 | EMIT2(0x0F, 0xB7); | ||
379 | EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
380 | |||
381 | /* xor dreg_hi,dreg_hi */ | ||
382 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
383 | break; | ||
384 | case 32: | ||
385 | /* Emit 'bswap eax' to swap lower 4 bytes */ | ||
386 | EMIT1(0x0F); | ||
387 | EMIT1(add_1reg(0xC8, dreg_lo)); | ||
388 | |||
389 | /* xor dreg_hi,dreg_hi */ | ||
390 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
391 | break; | ||
392 | case 64: | ||
393 | /* Emit 'bswap eax' to swap lower 4 bytes */ | ||
394 | EMIT1(0x0F); | ||
395 | EMIT1(add_1reg(0xC8, dreg_lo)); | ||
396 | |||
397 | /* Emit 'bswap edx' to swap lower 4 bytes */ | ||
398 | EMIT1(0x0F); | ||
399 | EMIT1(add_1reg(0xC8, dreg_hi)); | ||
400 | |||
401 | /* mov ecx,dreg_hi */ | ||
402 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi)); | ||
403 | /* mov dreg_hi,dreg_lo */ | ||
404 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
405 | /* mov dreg_lo,ecx */ | ||
406 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
407 | |||
408 | break; | ||
409 | } | ||
410 | if (dstk) { | ||
411 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
412 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
413 | STACK_VAR(dst_lo)); | ||
414 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
415 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
416 | STACK_VAR(dst_hi)); | ||
417 | } | ||
418 | *pprog = prog; | ||
419 | } | ||
420 | |||
421 | /* | ||
422 | * ALU operation (32 bit) | ||
423 | * dst = dst (div|mod) src | ||
424 | */ | ||
425 | static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src, | ||
426 | bool dstk, bool sstk, u8 **pprog) | ||
427 | { | ||
428 | u8 *prog = *pprog; | ||
429 | int cnt = 0; | ||
430 | |||
431 | if (sstk) | ||
432 | /* mov ecx,dword ptr [ebp+off] */ | ||
433 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
434 | STACK_VAR(src)); | ||
435 | else if (src != IA32_ECX) | ||
436 | /* mov ecx,src */ | ||
437 | EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX)); | ||
438 | |||
439 | if (dstk) | ||
440 | /* mov eax,dword ptr [ebp+off] */ | ||
441 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
442 | STACK_VAR(dst)); | ||
443 | else | ||
444 | /* mov eax,dst */ | ||
445 | EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX)); | ||
446 | |||
447 | /* xor edx,edx */ | ||
448 | EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX)); | ||
449 | /* div ecx */ | ||
450 | EMIT2(0xF7, add_1reg(0xF0, IA32_ECX)); | ||
451 | |||
452 | if (op == BPF_MOD) { | ||
453 | if (dstk) | ||
454 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
455 | STACK_VAR(dst)); | ||
456 | else | ||
457 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX)); | ||
458 | } else { | ||
459 | if (dstk) | ||
460 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
461 | STACK_VAR(dst)); | ||
462 | else | ||
463 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX)); | ||
464 | } | ||
465 | *pprog = prog; | ||
466 | } | ||
467 | |||
468 | /* | ||
469 | * ALU operation (32 bit) | ||
470 | * dst = dst (shift) src | ||
471 | */ | ||
472 | static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src, | ||
473 | bool dstk, bool sstk, u8 **pprog) | ||
474 | { | ||
475 | u8 *prog = *pprog; | ||
476 | int cnt = 0; | ||
477 | u8 dreg = dstk ? IA32_EAX : dst; | ||
478 | u8 b2; | ||
479 | |||
480 | if (dstk) | ||
481 | /* mov eax,dword ptr [ebp+off] */ | ||
482 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
483 | |||
484 | if (sstk) | ||
485 | /* mov ecx,dword ptr [ebp+off] */ | ||
486 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src)); | ||
487 | else if (src != IA32_ECX) | ||
488 | /* mov ecx,src */ | ||
489 | EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX)); | ||
490 | |||
491 | switch (op) { | ||
492 | case BPF_LSH: | ||
493 | b2 = 0xE0; break; | ||
494 | case BPF_RSH: | ||
495 | b2 = 0xE8; break; | ||
496 | case BPF_ARSH: | ||
497 | b2 = 0xF8; break; | ||
498 | default: | ||
499 | return; | ||
500 | } | ||
501 | EMIT2(0xD3, add_1reg(b2, dreg)); | ||
502 | |||
503 | if (dstk) | ||
504 | /* mov dword ptr [ebp+off],dreg */ | ||
505 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst)); | ||
506 | *pprog = prog; | ||
507 | } | ||
508 | |||
509 | /* | ||
510 | * ALU operation (32 bit) | ||
511 | * dst = dst (op) src | ||
512 | */ | ||
513 | static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op, | ||
514 | const u8 dst, const u8 src, bool dstk, | ||
515 | bool sstk, u8 **pprog) | ||
516 | { | ||
517 | u8 *prog = *pprog; | ||
518 | int cnt = 0; | ||
519 | u8 sreg = sstk ? IA32_EAX : src; | ||
520 | u8 dreg = dstk ? IA32_EDX : dst; | ||
521 | |||
522 | if (sstk) | ||
523 | /* mov eax,dword ptr [ebp+off] */ | ||
524 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src)); | ||
525 | |||
526 | if (dstk) | ||
527 | /* mov eax,dword ptr [ebp+off] */ | ||
528 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst)); | ||
529 | |||
530 | switch (BPF_OP(op)) { | ||
531 | /* dst = dst + src */ | ||
532 | case BPF_ADD: | ||
533 | if (hi && is64) | ||
534 | EMIT2(0x11, add_2reg(0xC0, dreg, sreg)); | ||
535 | else | ||
536 | EMIT2(0x01, add_2reg(0xC0, dreg, sreg)); | ||
537 | break; | ||
538 | /* dst = dst - src */ | ||
539 | case BPF_SUB: | ||
540 | if (hi && is64) | ||
541 | EMIT2(0x19, add_2reg(0xC0, dreg, sreg)); | ||
542 | else | ||
543 | EMIT2(0x29, add_2reg(0xC0, dreg, sreg)); | ||
544 | break; | ||
545 | /* dst = dst | src */ | ||
546 | case BPF_OR: | ||
547 | EMIT2(0x09, add_2reg(0xC0, dreg, sreg)); | ||
548 | break; | ||
549 | /* dst = dst & src */ | ||
550 | case BPF_AND: | ||
551 | EMIT2(0x21, add_2reg(0xC0, dreg, sreg)); | ||
552 | break; | ||
553 | /* dst = dst ^ src */ | ||
554 | case BPF_XOR: | ||
555 | EMIT2(0x31, add_2reg(0xC0, dreg, sreg)); | ||
556 | break; | ||
557 | } | ||
558 | |||
559 | if (dstk) | ||
560 | /* mov dword ptr [ebp+off],dreg */ | ||
561 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), | ||
562 | STACK_VAR(dst)); | ||
563 | *pprog = prog; | ||
564 | } | ||
565 | |||
566 | /* ALU operation (64 bit) */ | ||
567 | static inline void emit_ia32_alu_r64(const bool is64, const u8 op, | ||
568 | const u8 dst[], const u8 src[], | ||
569 | bool dstk, bool sstk, | ||
570 | u8 **pprog) | ||
571 | { | ||
572 | u8 *prog = *pprog; | ||
573 | |||
574 | emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog); | ||
575 | if (is64) | ||
576 | emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk, | ||
577 | &prog); | ||
578 | else | ||
579 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
580 | *pprog = prog; | ||
581 | } | ||
582 | |||
583 | /* | ||
584 | * ALU operation (32 bit) | ||
585 | * dst = dst (op) val | ||
586 | */ | ||
587 | static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op, | ||
588 | const u8 dst, const s32 val, bool dstk, | ||
589 | u8 **pprog) | ||
590 | { | ||
591 | u8 *prog = *pprog; | ||
592 | int cnt = 0; | ||
593 | u8 dreg = dstk ? IA32_EAX : dst; | ||
594 | u8 sreg = IA32_EDX; | ||
595 | |||
596 | if (dstk) | ||
597 | /* mov eax,dword ptr [ebp+off] */ | ||
598 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
599 | |||
600 | if (!is_imm8(val)) | ||
601 | /* mov edx,imm32*/ | ||
602 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val); | ||
603 | |||
604 | switch (op) { | ||
605 | /* dst = dst + val */ | ||
606 | case BPF_ADD: | ||
607 | if (hi && is64) { | ||
608 | if (is_imm8(val)) | ||
609 | EMIT3(0x83, add_1reg(0xD0, dreg), val); | ||
610 | else | ||
611 | EMIT2(0x11, add_2reg(0xC0, dreg, sreg)); | ||
612 | } else { | ||
613 | if (is_imm8(val)) | ||
614 | EMIT3(0x83, add_1reg(0xC0, dreg), val); | ||
615 | else | ||
616 | EMIT2(0x01, add_2reg(0xC0, dreg, sreg)); | ||
617 | } | ||
618 | break; | ||
619 | /* dst = dst - val */ | ||
620 | case BPF_SUB: | ||
621 | if (hi && is64) { | ||
622 | if (is_imm8(val)) | ||
623 | EMIT3(0x83, add_1reg(0xD8, dreg), val); | ||
624 | else | ||
625 | EMIT2(0x19, add_2reg(0xC0, dreg, sreg)); | ||
626 | } else { | ||
627 | if (is_imm8(val)) | ||
628 | EMIT3(0x83, add_1reg(0xE8, dreg), val); | ||
629 | else | ||
630 | EMIT2(0x29, add_2reg(0xC0, dreg, sreg)); | ||
631 | } | ||
632 | break; | ||
633 | /* dst = dst | val */ | ||
634 | case BPF_OR: | ||
635 | if (is_imm8(val)) | ||
636 | EMIT3(0x83, add_1reg(0xC8, dreg), val); | ||
637 | else | ||
638 | EMIT2(0x09, add_2reg(0xC0, dreg, sreg)); | ||
639 | break; | ||
640 | /* dst = dst & val */ | ||
641 | case BPF_AND: | ||
642 | if (is_imm8(val)) | ||
643 | EMIT3(0x83, add_1reg(0xE0, dreg), val); | ||
644 | else | ||
645 | EMIT2(0x21, add_2reg(0xC0, dreg, sreg)); | ||
646 | break; | ||
647 | /* dst = dst ^ val */ | ||
648 | case BPF_XOR: | ||
649 | if (is_imm8(val)) | ||
650 | EMIT3(0x83, add_1reg(0xF0, dreg), val); | ||
651 | else | ||
652 | EMIT2(0x31, add_2reg(0xC0, dreg, sreg)); | ||
653 | break; | ||
654 | case BPF_NEG: | ||
655 | EMIT2(0xF7, add_1reg(0xD8, dreg)); | ||
656 | break; | ||
657 | } | ||
658 | |||
659 | if (dstk) | ||
660 | /* mov dword ptr [ebp+off],dreg */ | ||
661 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), | ||
662 | STACK_VAR(dst)); | ||
663 | *pprog = prog; | ||
664 | } | ||
665 | |||
666 | /* ALU operation (64 bit) */ | ||
667 | static inline void emit_ia32_alu_i64(const bool is64, const u8 op, | ||
668 | const u8 dst[], const u32 val, | ||
669 | bool dstk, u8 **pprog) | ||
670 | { | ||
671 | u8 *prog = *pprog; | ||
672 | u32 hi = 0; | ||
673 | |||
674 | if (is64 && (val & (1<<31))) | ||
675 | hi = (u32)~0; | ||
676 | |||
677 | emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog); | ||
678 | if (is64) | ||
679 | emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog); | ||
680 | else | ||
681 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
682 | |||
683 | *pprog = prog; | ||
684 | } | ||
685 | |||
686 | /* dst = ~dst (64 bit) */ | ||
687 | static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog) | ||
688 | { | ||
689 | u8 *prog = *pprog; | ||
690 | int cnt = 0; | ||
691 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
692 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
693 | |||
694 | if (dstk) { | ||
695 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
696 | STACK_VAR(dst_lo)); | ||
697 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
698 | STACK_VAR(dst_hi)); | ||
699 | } | ||
700 | |||
701 | /* xor ecx,ecx */ | ||
702 | EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
703 | /* sub dreg_lo,ecx */ | ||
704 | EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
705 | /* mov dreg_lo,ecx */ | ||
706 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
707 | |||
708 | /* xor ecx,ecx */ | ||
709 | EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
710 | /* sbb dreg_hi,ecx */ | ||
711 | EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX)); | ||
712 | /* mov dreg_hi,ecx */ | ||
713 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX)); | ||
714 | |||
715 | if (dstk) { | ||
716 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
717 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
718 | STACK_VAR(dst_lo)); | ||
719 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
720 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
721 | STACK_VAR(dst_hi)); | ||
722 | } | ||
723 | *pprog = prog; | ||
724 | } | ||
725 | |||
726 | /* dst = dst << src */ | ||
727 | static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], | ||
728 | bool dstk, bool sstk, u8 **pprog) | ||
729 | { | ||
730 | u8 *prog = *pprog; | ||
731 | int cnt = 0; | ||
732 | static int jmp_label1 = -1; | ||
733 | static int jmp_label2 = -1; | ||
734 | static int jmp_label3 = -1; | ||
735 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
736 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
737 | |||
738 | if (dstk) { | ||
739 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
740 | STACK_VAR(dst_lo)); | ||
741 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
742 | STACK_VAR(dst_hi)); | ||
743 | } | ||
744 | |||
745 | if (sstk) | ||
746 | /* mov ecx,dword ptr [ebp+off] */ | ||
747 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
748 | STACK_VAR(src_lo)); | ||
749 | else | ||
750 | /* mov ecx,src_lo */ | ||
751 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
752 | |||
753 | /* cmp ecx,32 */ | ||
754 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
755 | /* Jumps when >= 32 */ | ||
756 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
757 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
758 | else | ||
759 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
760 | |||
761 | /* < 32 */ | ||
762 | /* shl dreg_hi,cl */ | ||
763 | EMIT2(0xD3, add_1reg(0xE0, dreg_hi)); | ||
764 | /* mov ebx,dreg_lo */ | ||
765 | EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
766 | /* shl dreg_lo,cl */ | ||
767 | EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); | ||
768 | |||
769 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
770 | /* neg ecx */ | ||
771 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
772 | /* add ecx,32 */ | ||
773 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
774 | |||
775 | /* shr ebx,cl */ | ||
776 | EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); | ||
777 | /* or dreg_hi,ebx */ | ||
778 | EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
779 | |||
780 | /* goto out; */ | ||
781 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
782 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
783 | else | ||
784 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
785 | |||
786 | /* >= 32 */ | ||
787 | if (jmp_label1 == -1) | ||
788 | jmp_label1 = cnt; | ||
789 | |||
790 | /* cmp ecx,64 */ | ||
791 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
792 | /* Jumps when >= 64 */ | ||
793 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
794 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
795 | else | ||
796 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
797 | |||
798 | /* >= 32 && < 64 */ | ||
799 | /* sub ecx,32 */ | ||
800 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
801 | /* shl dreg_lo,cl */ | ||
802 | EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); | ||
803 | /* mov dreg_hi,dreg_lo */ | ||
804 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
805 | |||
806 | /* xor dreg_lo,dreg_lo */ | ||
807 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
808 | |||
809 | /* goto out; */ | ||
810 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
811 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
812 | else | ||
813 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
814 | |||
815 | /* >= 64 */ | ||
816 | if (jmp_label2 == -1) | ||
817 | jmp_label2 = cnt; | ||
818 | /* xor dreg_lo,dreg_lo */ | ||
819 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
820 | /* xor dreg_hi,dreg_hi */ | ||
821 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
822 | |||
823 | if (jmp_label3 == -1) | ||
824 | jmp_label3 = cnt; | ||
825 | |||
826 | if (dstk) { | ||
827 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
828 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
829 | STACK_VAR(dst_lo)); | ||
830 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
831 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
832 | STACK_VAR(dst_hi)); | ||
833 | } | ||
834 | /* out: */ | ||
835 | *pprog = prog; | ||
836 | } | ||
837 | |||
838 | /* dst = dst >> src (signed)*/ | ||
839 | static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], | ||
840 | bool dstk, bool sstk, u8 **pprog) | ||
841 | { | ||
842 | u8 *prog = *pprog; | ||
843 | int cnt = 0; | ||
844 | static int jmp_label1 = -1; | ||
845 | static int jmp_label2 = -1; | ||
846 | static int jmp_label3 = -1; | ||
847 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
848 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
849 | |||
850 | if (dstk) { | ||
851 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
852 | STACK_VAR(dst_lo)); | ||
853 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
854 | STACK_VAR(dst_hi)); | ||
855 | } | ||
856 | |||
857 | if (sstk) | ||
858 | /* mov ecx,dword ptr [ebp+off] */ | ||
859 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
860 | STACK_VAR(src_lo)); | ||
861 | else | ||
862 | /* mov ecx,src_lo */ | ||
863 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
864 | |||
865 | /* cmp ecx,32 */ | ||
866 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
867 | /* Jumps when >= 32 */ | ||
868 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
869 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
870 | else | ||
871 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
872 | |||
873 | /* < 32 */ | ||
874 | /* lshr dreg_lo,cl */ | ||
875 | EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); | ||
876 | /* mov ebx,dreg_hi */ | ||
877 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
878 | /* ashr dreg_hi,cl */ | ||
879 | EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); | ||
880 | |||
881 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
882 | /* neg ecx */ | ||
883 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
884 | /* add ecx,32 */ | ||
885 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
886 | |||
887 | /* shl ebx,cl */ | ||
888 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
889 | /* or dreg_lo,ebx */ | ||
890 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
891 | |||
892 | /* goto out; */ | ||
893 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
894 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
895 | else | ||
896 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
897 | |||
898 | /* >= 32 */ | ||
899 | if (jmp_label1 == -1) | ||
900 | jmp_label1 = cnt; | ||
901 | |||
902 | /* cmp ecx,64 */ | ||
903 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
904 | /* Jumps when >= 64 */ | ||
905 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
906 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
907 | else | ||
908 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
909 | |||
910 | /* >= 32 && < 64 */ | ||
911 | /* sub ecx,32 */ | ||
912 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
913 | /* ashr dreg_hi,cl */ | ||
914 | EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); | ||
915 | /* mov dreg_lo,dreg_hi */ | ||
916 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
917 | |||
918 | /* ashr dreg_hi,imm8 */ | ||
919 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
920 | |||
921 | /* goto out; */ | ||
922 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
923 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
924 | else | ||
925 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
926 | |||
927 | /* >= 64 */ | ||
928 | if (jmp_label2 == -1) | ||
929 | jmp_label2 = cnt; | ||
930 | /* ashr dreg_hi,imm8 */ | ||
931 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
932 | /* mov dreg_lo,dreg_hi */ | ||
933 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
934 | |||
935 | if (jmp_label3 == -1) | ||
936 | jmp_label3 = cnt; | ||
937 | |||
938 | if (dstk) { | ||
939 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
940 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
941 | STACK_VAR(dst_lo)); | ||
942 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
943 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
944 | STACK_VAR(dst_hi)); | ||
945 | } | ||
946 | /* out: */ | ||
947 | *pprog = prog; | ||
948 | } | ||
949 | |||
950 | /* dst = dst >> src */ | ||
951 | static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, | ||
952 | bool sstk, u8 **pprog) | ||
953 | { | ||
954 | u8 *prog = *pprog; | ||
955 | int cnt = 0; | ||
956 | static int jmp_label1 = -1; | ||
957 | static int jmp_label2 = -1; | ||
958 | static int jmp_label3 = -1; | ||
959 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
960 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
961 | |||
962 | if (dstk) { | ||
963 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
964 | STACK_VAR(dst_lo)); | ||
965 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
966 | STACK_VAR(dst_hi)); | ||
967 | } | ||
968 | |||
969 | if (sstk) | ||
970 | /* mov ecx,dword ptr [ebp+off] */ | ||
971 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
972 | STACK_VAR(src_lo)); | ||
973 | else | ||
974 | /* mov ecx,src_lo */ | ||
975 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
976 | |||
977 | /* cmp ecx,32 */ | ||
978 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
979 | /* Jumps when >= 32 */ | ||
980 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
981 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
982 | else | ||
983 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
984 | |||
985 | /* < 32 */ | ||
986 | /* lshr dreg_lo,cl */ | ||
987 | EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); | ||
988 | /* mov ebx,dreg_hi */ | ||
989 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
990 | /* shr dreg_hi,cl */ | ||
991 | EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); | ||
992 | |||
993 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
994 | /* neg ecx */ | ||
995 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
996 | /* add ecx,32 */ | ||
997 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
998 | |||
999 | /* shl ebx,cl */ | ||
1000 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
1001 | /* or dreg_lo,ebx */ | ||
1002 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
1003 | |||
1004 | /* goto out; */ | ||
1005 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
1006 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
1007 | else | ||
1008 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
1009 | |||
1010 | /* >= 32 */ | ||
1011 | if (jmp_label1 == -1) | ||
1012 | jmp_label1 = cnt; | ||
1013 | /* cmp ecx,64 */ | ||
1014 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
1015 | /* Jumps when >= 64 */ | ||
1016 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
1017 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
1018 | else | ||
1019 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
1020 | |||
1021 | /* >= 32 && < 64 */ | ||
1022 | /* sub ecx,32 */ | ||
1023 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
1024 | /* shr dreg_hi,cl */ | ||
1025 | EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); | ||
1026 | /* mov dreg_lo,dreg_hi */ | ||
1027 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
1028 | /* xor dreg_hi,dreg_hi */ | ||
1029 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1030 | |||
1031 | /* goto out; */ | ||
1032 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
1033 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
1034 | else | ||
1035 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
1036 | |||
1037 | /* >= 64 */ | ||
1038 | if (jmp_label2 == -1) | ||
1039 | jmp_label2 = cnt; | ||
1040 | /* xor dreg_lo,dreg_lo */ | ||
1041 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
1042 | /* xor dreg_hi,dreg_hi */ | ||
1043 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1044 | |||
1045 | if (jmp_label3 == -1) | ||
1046 | jmp_label3 = cnt; | ||
1047 | |||
1048 | if (dstk) { | ||
1049 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
1050 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
1051 | STACK_VAR(dst_lo)); | ||
1052 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
1053 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
1054 | STACK_VAR(dst_hi)); | ||
1055 | } | ||
1056 | /* out: */ | ||
1057 | *pprog = prog; | ||
1058 | } | ||
1059 | |||
1060 | /* dst = dst << val */ | ||
1061 | static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val, | ||
1062 | bool dstk, u8 **pprog) | ||
1063 | { | ||
1064 | u8 *prog = *pprog; | ||
1065 | int cnt = 0; | ||
1066 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
1067 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
1068 | |||
1069 | if (dstk) { | ||
1070 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1071 | STACK_VAR(dst_lo)); | ||
1072 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1073 | STACK_VAR(dst_hi)); | ||
1074 | } | ||
1075 | /* Do LSH operation */ | ||
1076 | if (val < 32) { | ||
1077 | /* shl dreg_hi,imm8 */ | ||
1078 | EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val); | ||
1079 | /* mov ebx,dreg_lo */ | ||
1080 | EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
1081 | /* shl dreg_lo,imm8 */ | ||
1082 | EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val); | ||
1083 | |||
1084 | /* IA32_ECX = 32 - val */ | ||
1085 | /* mov ecx,val */ | ||
1086 | EMIT2(0xB1, val); | ||
1087 | /* movzx ecx,ecx */ | ||
1088 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
1089 | /* neg ecx */ | ||
1090 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
1091 | /* add ecx,32 */ | ||
1092 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
1093 | |||
1094 | /* shr ebx,cl */ | ||
1095 | EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); | ||
1096 | /* or dreg_hi,ebx */ | ||
1097 | EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
1098 | } else if (val >= 32 && val < 64) { | ||
1099 | u32 value = val - 32; | ||
1100 | |||
1101 | /* shl dreg_lo,imm8 */ | ||
1102 | EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value); | ||
1103 | /* mov dreg_hi,dreg_lo */ | ||
1104 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
1105 | /* xor dreg_lo,dreg_lo */ | ||
1106 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
1107 | } else { | ||
1108 | /* xor dreg_lo,dreg_lo */ | ||
1109 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
1110 | /* xor dreg_hi,dreg_hi */ | ||
1111 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1112 | } | ||
1113 | |||
1114 | if (dstk) { | ||
1115 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
1116 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
1117 | STACK_VAR(dst_lo)); | ||
1118 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
1119 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
1120 | STACK_VAR(dst_hi)); | ||
1121 | } | ||
1122 | *pprog = prog; | ||
1123 | } | ||
1124 | |||
1125 | /* dst = dst >> val */ | ||
1126 | static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val, | ||
1127 | bool dstk, u8 **pprog) | ||
1128 | { | ||
1129 | u8 *prog = *pprog; | ||
1130 | int cnt = 0; | ||
1131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
1132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
1133 | |||
1134 | if (dstk) { | ||
1135 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1136 | STACK_VAR(dst_lo)); | ||
1137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1138 | STACK_VAR(dst_hi)); | ||
1139 | } | ||
1140 | |||
1141 | /* Do RSH operation */ | ||
1142 | if (val < 32) { | ||
1143 | /* shr dreg_lo,imm8 */ | ||
1144 | EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); | ||
1145 | /* mov ebx,dreg_hi */ | ||
1146 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
1147 | /* shr dreg_hi,imm8 */ | ||
1148 | EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val); | ||
1149 | |||
1150 | /* IA32_ECX = 32 - val */ | ||
1151 | /* mov ecx,val */ | ||
1152 | EMIT2(0xB1, val); | ||
1153 | /* movzx ecx,ecx */ | ||
1154 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
1155 | /* neg ecx */ | ||
1156 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
1157 | /* add ecx,32 */ | ||
1158 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
1159 | |||
1160 | /* shl ebx,cl */ | ||
1161 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
1162 | /* or dreg_lo,ebx */ | ||
1163 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
1164 | } else if (val >= 32 && val < 64) { | ||
1165 | u32 value = val - 32; | ||
1166 | |||
1167 | /* shr dreg_hi,imm8 */ | ||
1168 | EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value); | ||
1169 | /* mov dreg_lo,dreg_hi */ | ||
1170 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
1171 | /* xor dreg_hi,dreg_hi */ | ||
1172 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1173 | } else { | ||
1174 | /* xor dreg_lo,dreg_lo */ | ||
1175 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
1176 | /* xor dreg_hi,dreg_hi */ | ||
1177 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1178 | } | ||
1179 | |||
1180 | if (dstk) { | ||
1181 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
1182 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
1183 | STACK_VAR(dst_lo)); | ||
1184 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
1185 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
1186 | STACK_VAR(dst_hi)); | ||
1187 | } | ||
1188 | *pprog = prog; | ||
1189 | } | ||
1190 | |||
1191 | /* dst = dst >> val (signed) */ | ||
1192 | static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val, | ||
1193 | bool dstk, u8 **pprog) | ||
1194 | { | ||
1195 | u8 *prog = *pprog; | ||
1196 | int cnt = 0; | ||
1197 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
1198 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
1199 | |||
1200 | if (dstk) { | ||
1201 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1202 | STACK_VAR(dst_lo)); | ||
1203 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1204 | STACK_VAR(dst_hi)); | ||
1205 | } | ||
1206 | /* Do RSH operation */ | ||
1207 | if (val < 32) { | ||
1208 | /* shr dreg_lo,imm8 */ | ||
1209 | EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); | ||
1210 | /* mov ebx,dreg_hi */ | ||
1211 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
1212 | /* ashr dreg_hi,imm8 */ | ||
1213 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val); | ||
1214 | |||
1215 | /* IA32_ECX = 32 - val */ | ||
1216 | /* mov ecx,val */ | ||
1217 | EMIT2(0xB1, val); | ||
1218 | /* movzx ecx,ecx */ | ||
1219 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
1220 | /* neg ecx */ | ||
1221 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
1222 | /* add ecx,32 */ | ||
1223 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
1224 | |||
1225 | /* shl ebx,cl */ | ||
1226 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
1227 | /* or dreg_lo,ebx */ | ||
1228 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
1229 | } else if (val >= 32 && val < 64) { | ||
1230 | u32 value = val - 32; | ||
1231 | |||
1232 | /* ashr dreg_hi,imm8 */ | ||
1233 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value); | ||
1234 | /* mov dreg_lo,dreg_hi */ | ||
1235 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
1236 | |||
1237 | /* ashr dreg_hi,imm8 */ | ||
1238 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
1239 | } else { | ||
1240 | /* ashr dreg_hi,imm8 */ | ||
1241 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
1242 | /* mov dreg_lo,dreg_hi */ | ||
1243 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
1244 | } | ||
1245 | |||
1246 | if (dstk) { | ||
1247 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
1248 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
1249 | STACK_VAR(dst_lo)); | ||
1250 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
1251 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
1252 | STACK_VAR(dst_hi)); | ||
1253 | } | ||
1254 | *pprog = prog; | ||
1255 | } | ||
1256 | |||
1257 | static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk, | ||
1258 | bool sstk, u8 **pprog) | ||
1259 | { | ||
1260 | u8 *prog = *pprog; | ||
1261 | int cnt = 0; | ||
1262 | |||
1263 | if (dstk) | ||
1264 | /* mov eax,dword ptr [ebp+off] */ | ||
1265 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1266 | STACK_VAR(dst_hi)); | ||
1267 | else | ||
1268 | /* mov eax,dst_hi */ | ||
1269 | EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX)); | ||
1270 | |||
1271 | if (sstk) | ||
1272 | /* mul dword ptr [ebp+off] */ | ||
1273 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo)); | ||
1274 | else | ||
1275 | /* mul src_lo */ | ||
1276 | EMIT2(0xF7, add_1reg(0xE0, src_lo)); | ||
1277 | |||
1278 | /* mov ecx,eax */ | ||
1279 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
1280 | |||
1281 | if (dstk) | ||
1282 | /* mov eax,dword ptr [ebp+off] */ | ||
1283 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1284 | STACK_VAR(dst_lo)); | ||
1285 | else | ||
1286 | /* mov eax,dst_lo */ | ||
1287 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1288 | |||
1289 | if (sstk) | ||
1290 | /* mul dword ptr [ebp+off] */ | ||
1291 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi)); | ||
1292 | else | ||
1293 | /* mul src_hi */ | ||
1294 | EMIT2(0xF7, add_1reg(0xE0, src_hi)); | ||
1295 | |||
1296 | /* add eax,eax */ | ||
1297 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
1298 | |||
1299 | if (dstk) | ||
1300 | /* mov eax,dword ptr [ebp+off] */ | ||
1301 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1302 | STACK_VAR(dst_lo)); | ||
1303 | else | ||
1304 | /* mov eax,dst_lo */ | ||
1305 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1306 | |||
1307 | if (sstk) | ||
1308 | /* mul dword ptr [ebp+off] */ | ||
1309 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo)); | ||
1310 | else | ||
1311 | /* mul src_lo */ | ||
1312 | EMIT2(0xF7, add_1reg(0xE0, src_lo)); | ||
1313 | |||
1314 | /* add ecx,edx */ | ||
1315 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX)); | ||
1316 | |||
1317 | if (dstk) { | ||
1318 | /* mov dword ptr [ebp+off],eax */ | ||
1319 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1320 | STACK_VAR(dst_lo)); | ||
1321 | /* mov dword ptr [ebp+off],ecx */ | ||
1322 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
1323 | STACK_VAR(dst_hi)); | ||
1324 | } else { | ||
1325 | /* mov dst_lo,eax */ | ||
1326 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1327 | /* mov dst_hi,ecx */ | ||
1328 | EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX)); | ||
1329 | } | ||
1330 | |||
1331 | *pprog = prog; | ||
1332 | } | ||
1333 | |||
1334 | static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val, | ||
1335 | bool dstk, u8 **pprog) | ||
1336 | { | ||
1337 | u8 *prog = *pprog; | ||
1338 | int cnt = 0; | ||
1339 | u32 hi; | ||
1340 | |||
1341 | hi = val & (1<<31) ? (u32)~0 : 0; | ||
1342 | /* movl eax,imm32 */ | ||
1343 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val); | ||
1344 | if (dstk) | ||
1345 | /* mul dword ptr [ebp+off] */ | ||
1346 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi)); | ||
1347 | else | ||
1348 | /* mul dst_hi */ | ||
1349 | EMIT2(0xF7, add_1reg(0xE0, dst_hi)); | ||
1350 | |||
1351 | /* mov ecx,eax */ | ||
1352 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
1353 | |||
1354 | /* movl eax,imm32 */ | ||
1355 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi); | ||
1356 | if (dstk) | ||
1357 | /* mul dword ptr [ebp+off] */ | ||
1358 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo)); | ||
1359 | else | ||
1360 | /* mul dst_lo */ | ||
1361 | EMIT2(0xF7, add_1reg(0xE0, dst_lo)); | ||
1362 | /* add ecx,eax */ | ||
1363 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
1364 | |||
1365 | /* movl eax,imm32 */ | ||
1366 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val); | ||
1367 | if (dstk) | ||
1368 | /* mul dword ptr [ebp+off] */ | ||
1369 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo)); | ||
1370 | else | ||
1371 | /* mul dst_lo */ | ||
1372 | EMIT2(0xF7, add_1reg(0xE0, dst_lo)); | ||
1373 | |||
1374 | /* add ecx,edx */ | ||
1375 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX)); | ||
1376 | |||
1377 | if (dstk) { | ||
1378 | /* mov dword ptr [ebp+off],eax */ | ||
1379 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1380 | STACK_VAR(dst_lo)); | ||
1381 | /* mov dword ptr [ebp+off],ecx */ | ||
1382 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
1383 | STACK_VAR(dst_hi)); | ||
1384 | } else { | ||
1385 | /* mov dword ptr [ebp+off],eax */ | ||
1386 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1387 | /* mov dword ptr [ebp+off],ecx */ | ||
1388 | EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX)); | ||
1389 | } | ||
1390 | |||
1391 | *pprog = prog; | ||
1392 | } | ||
1393 | |||
1394 | static int bpf_size_to_x86_bytes(int bpf_size) | ||
1395 | { | ||
1396 | if (bpf_size == BPF_W) | ||
1397 | return 4; | ||
1398 | else if (bpf_size == BPF_H) | ||
1399 | return 2; | ||
1400 | else if (bpf_size == BPF_B) | ||
1401 | return 1; | ||
1402 | else if (bpf_size == BPF_DW) | ||
1403 | return 4; /* imm32 */ | ||
1404 | else | ||
1405 | return 0; | ||
1406 | } | ||
1407 | |||
1408 | struct jit_context { | ||
1409 | int cleanup_addr; /* Epilogue code offset */ | ||
1410 | }; | ||
1411 | |||
1412 | /* Maximum number of bytes emitted while JITing one eBPF insn */ | ||
1413 | #define BPF_MAX_INSN_SIZE 128 | ||
1414 | #define BPF_INSN_SAFETY 64 | ||
1415 | |||
1416 | #define PROLOGUE_SIZE 35 | ||
1417 | |||
1418 | /* | ||
1419 | * Emit prologue code for BPF program and check it's size. | ||
1420 | * bpf_tail_call helper will skip it while jumping into another program. | ||
1421 | */ | ||
1422 | static void emit_prologue(u8 **pprog, u32 stack_depth) | ||
1423 | { | ||
1424 | u8 *prog = *pprog; | ||
1425 | int cnt = 0; | ||
1426 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
1427 | const u8 fplo = bpf2ia32[BPF_REG_FP][0]; | ||
1428 | const u8 fphi = bpf2ia32[BPF_REG_FP][1]; | ||
1429 | const u8 *tcc = bpf2ia32[TCALL_CNT]; | ||
1430 | |||
1431 | /* push ebp */ | ||
1432 | EMIT1(0x55); | ||
1433 | /* mov ebp,esp */ | ||
1434 | EMIT2(0x89, 0xE5); | ||
1435 | /* push edi */ | ||
1436 | EMIT1(0x57); | ||
1437 | /* push esi */ | ||
1438 | EMIT1(0x56); | ||
1439 | /* push ebx */ | ||
1440 | EMIT1(0x53); | ||
1441 | |||
1442 | /* sub esp,STACK_SIZE */ | ||
1443 | EMIT2_off32(0x81, 0xEC, STACK_SIZE); | ||
1444 | /* sub ebp,SCRATCH_SIZE+4+12*/ | ||
1445 | EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16); | ||
1446 | /* xor ebx,ebx */ | ||
1447 | EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX)); | ||
1448 | |||
1449 | /* Set up BPF prog stack base register */ | ||
1450 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo)); | ||
1451 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi)); | ||
1452 | |||
1453 | /* Move BPF_CTX (EAX) to BPF_REG_R1 */ | ||
1454 | /* mov dword ptr [ebp+off],eax */ | ||
1455 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0])); | ||
1456 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1])); | ||
1457 | |||
1458 | /* Initialize Tail Count */ | ||
1459 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0])); | ||
1460 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
1461 | |||
1462 | BUILD_BUG_ON(cnt != PROLOGUE_SIZE); | ||
1463 | *pprog = prog; | ||
1464 | } | ||
1465 | |||
1466 | /* Emit epilogue code for BPF program */ | ||
1467 | static void emit_epilogue(u8 **pprog, u32 stack_depth) | ||
1468 | { | ||
1469 | u8 *prog = *pprog; | ||
1470 | const u8 *r0 = bpf2ia32[BPF_REG_0]; | ||
1471 | int cnt = 0; | ||
1472 | |||
1473 | /* mov eax,dword ptr [ebp+off]*/ | ||
1474 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0])); | ||
1475 | /* mov edx,dword ptr [ebp+off]*/ | ||
1476 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1])); | ||
1477 | |||
1478 | /* add ebp,SCRATCH_SIZE+4+12*/ | ||
1479 | EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16); | ||
1480 | |||
1481 | /* mov ebx,dword ptr [ebp-12]*/ | ||
1482 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12); | ||
1483 | /* mov esi,dword ptr [ebp-8]*/ | ||
1484 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8); | ||
1485 | /* mov edi,dword ptr [ebp-4]*/ | ||
1486 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4); | ||
1487 | |||
1488 | EMIT1(0xC9); /* leave */ | ||
1489 | EMIT1(0xC3); /* ret */ | ||
1490 | *pprog = prog; | ||
1491 | } | ||
1492 | |||
1493 | /* | ||
1494 | * Generate the following code: | ||
1495 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... | ||
1496 | * if (index >= array->map.max_entries) | ||
1497 | * goto out; | ||
1498 | * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
1499 | * goto out; | ||
1500 | * prog = array->ptrs[index]; | ||
1501 | * if (prog == NULL) | ||
1502 | * goto out; | ||
1503 | * goto *(prog->bpf_func + prologue_size); | ||
1504 | * out: | ||
1505 | */ | ||
1506 | static void emit_bpf_tail_call(u8 **pprog) | ||
1507 | { | ||
1508 | u8 *prog = *pprog; | ||
1509 | int cnt = 0; | ||
1510 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
1511 | const u8 *r2 = bpf2ia32[BPF_REG_2]; | ||
1512 | const u8 *r3 = bpf2ia32[BPF_REG_3]; | ||
1513 | const u8 *tcc = bpf2ia32[TCALL_CNT]; | ||
1514 | u32 lo, hi; | ||
1515 | static int jmp_label1 = -1; | ||
1516 | |||
1517 | /* | ||
1518 | * if (index >= array->map.max_entries) | ||
1519 | * goto out; | ||
1520 | */ | ||
1521 | /* mov eax,dword ptr [ebp+off] */ | ||
1522 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0])); | ||
1523 | /* mov edx,dword ptr [ebp+off] */ | ||
1524 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0])); | ||
1525 | |||
1526 | /* cmp dword ptr [eax+off],edx */ | ||
1527 | EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
1528 | offsetof(struct bpf_array, map.max_entries)); | ||
1529 | /* jbe out */ | ||
1530 | EMIT2(IA32_JBE, jmp_label(jmp_label1, 2)); | ||
1531 | |||
1532 | /* | ||
1533 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
1534 | * goto out; | ||
1535 | */ | ||
1536 | lo = (u32)MAX_TAIL_CALL_CNT; | ||
1537 | hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); | ||
1538 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0])); | ||
1539 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
1540 | |||
1541 | /* cmp edx,hi */ | ||
1542 | EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi); | ||
1543 | EMIT2(IA32_JNE, 3); | ||
1544 | /* cmp ecx,lo */ | ||
1545 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo); | ||
1546 | |||
1547 | /* ja out */ | ||
1548 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
1549 | |||
1550 | /* add eax,0x1 */ | ||
1551 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01); | ||
1552 | /* adc ebx,0x0 */ | ||
1553 | EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00); | ||
1554 | |||
1555 | /* mov dword ptr [ebp+off],eax */ | ||
1556 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0])); | ||
1557 | /* mov dword ptr [ebp+off],edx */ | ||
1558 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
1559 | |||
1560 | /* prog = array->ptrs[index]; */ | ||
1561 | /* mov edx, [eax + edx * 4 + offsetof(...)] */ | ||
1562 | EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs)); | ||
1563 | |||
1564 | /* | ||
1565 | * if (prog == NULL) | ||
1566 | * goto out; | ||
1567 | */ | ||
1568 | /* test edx,edx */ | ||
1569 | EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX)); | ||
1570 | /* je out */ | ||
1571 | EMIT2(IA32_JE, jmp_label(jmp_label1, 2)); | ||
1572 | |||
1573 | /* goto *(prog->bpf_func + prologue_size); */ | ||
1574 | /* mov edx, dword ptr [edx + 32] */ | ||
1575 | EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX), | ||
1576 | offsetof(struct bpf_prog, bpf_func)); | ||
1577 | /* add edx,prologue_size */ | ||
1578 | EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE); | ||
1579 | |||
1580 | /* mov eax,dword ptr [ebp+off] */ | ||
1581 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0])); | ||
1582 | |||
1583 | /* | ||
1584 | * Now we're ready to jump into next BPF program: | ||
1585 | * eax == ctx (1st arg) | ||
1586 | * edx == prog->bpf_func + prologue_size | ||
1587 | */ | ||
1588 | RETPOLINE_EDX_BPF_JIT(); | ||
1589 | |||
1590 | if (jmp_label1 == -1) | ||
1591 | jmp_label1 = cnt; | ||
1592 | |||
1593 | /* out: */ | ||
1594 | *pprog = prog; | ||
1595 | } | ||
1596 | |||
1597 | /* Push the scratch stack register on top of the stack. */ | ||
1598 | static inline void emit_push_r64(const u8 src[], u8 **pprog) | ||
1599 | { | ||
1600 | u8 *prog = *pprog; | ||
1601 | int cnt = 0; | ||
1602 | |||
1603 | /* mov ecx,dword ptr [ebp+off] */ | ||
1604 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi)); | ||
1605 | /* push ecx */ | ||
1606 | EMIT1(0x51); | ||
1607 | |||
1608 | /* mov ecx,dword ptr [ebp+off] */ | ||
1609 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo)); | ||
1610 | /* push ecx */ | ||
1611 | EMIT1(0x51); | ||
1612 | |||
1613 | *pprog = prog; | ||
1614 | } | ||
1615 | |||
1616 | static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | ||
1617 | int oldproglen, struct jit_context *ctx) | ||
1618 | { | ||
1619 | struct bpf_insn *insn = bpf_prog->insnsi; | ||
1620 | int insn_cnt = bpf_prog->len; | ||
1621 | bool seen_exit = false; | ||
1622 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; | ||
1623 | int i, cnt = 0; | ||
1624 | int proglen = 0; | ||
1625 | u8 *prog = temp; | ||
1626 | |||
1627 | emit_prologue(&prog, bpf_prog->aux->stack_depth); | ||
1628 | |||
1629 | for (i = 0; i < insn_cnt; i++, insn++) { | ||
1630 | const s32 imm32 = insn->imm; | ||
1631 | const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; | ||
1632 | const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true; | ||
1633 | const bool sstk = insn->src_reg == BPF_REG_AX ? false : true; | ||
1634 | const u8 code = insn->code; | ||
1635 | const u8 *dst = bpf2ia32[insn->dst_reg]; | ||
1636 | const u8 *src = bpf2ia32[insn->src_reg]; | ||
1637 | const u8 *r0 = bpf2ia32[BPF_REG_0]; | ||
1638 | s64 jmp_offset; | ||
1639 | u8 jmp_cond; | ||
1640 | int ilen; | ||
1641 | u8 *func; | ||
1642 | |||
1643 | switch (code) { | ||
1644 | /* ALU operations */ | ||
1645 | /* dst = src */ | ||
1646 | case BPF_ALU | BPF_MOV | BPF_K: | ||
1647 | case BPF_ALU | BPF_MOV | BPF_X: | ||
1648 | case BPF_ALU64 | BPF_MOV | BPF_K: | ||
1649 | case BPF_ALU64 | BPF_MOV | BPF_X: | ||
1650 | switch (BPF_SRC(code)) { | ||
1651 | case BPF_X: | ||
1652 | emit_ia32_mov_r64(is64, dst, src, dstk, | ||
1653 | sstk, &prog); | ||
1654 | break; | ||
1655 | case BPF_K: | ||
1656 | /* Sign-extend immediate value to dst reg */ | ||
1657 | emit_ia32_mov_i64(is64, dst, imm32, | ||
1658 | dstk, &prog); | ||
1659 | break; | ||
1660 | } | ||
1661 | break; | ||
1662 | /* dst = dst + src/imm */ | ||
1663 | /* dst = dst - src/imm */ | ||
1664 | /* dst = dst | src/imm */ | ||
1665 | /* dst = dst & src/imm */ | ||
1666 | /* dst = dst ^ src/imm */ | ||
1667 | /* dst = dst * src/imm */ | ||
1668 | /* dst = dst << src */ | ||
1669 | /* dst = dst >> src */ | ||
1670 | case BPF_ALU | BPF_ADD | BPF_K: | ||
1671 | case BPF_ALU | BPF_ADD | BPF_X: | ||
1672 | case BPF_ALU | BPF_SUB | BPF_K: | ||
1673 | case BPF_ALU | BPF_SUB | BPF_X: | ||
1674 | case BPF_ALU | BPF_OR | BPF_K: | ||
1675 | case BPF_ALU | BPF_OR | BPF_X: | ||
1676 | case BPF_ALU | BPF_AND | BPF_K: | ||
1677 | case BPF_ALU | BPF_AND | BPF_X: | ||
1678 | case BPF_ALU | BPF_XOR | BPF_K: | ||
1679 | case BPF_ALU | BPF_XOR | BPF_X: | ||
1680 | case BPF_ALU64 | BPF_ADD | BPF_K: | ||
1681 | case BPF_ALU64 | BPF_ADD | BPF_X: | ||
1682 | case BPF_ALU64 | BPF_SUB | BPF_K: | ||
1683 | case BPF_ALU64 | BPF_SUB | BPF_X: | ||
1684 | case BPF_ALU64 | BPF_OR | BPF_K: | ||
1685 | case BPF_ALU64 | BPF_OR | BPF_X: | ||
1686 | case BPF_ALU64 | BPF_AND | BPF_K: | ||
1687 | case BPF_ALU64 | BPF_AND | BPF_X: | ||
1688 | case BPF_ALU64 | BPF_XOR | BPF_K: | ||
1689 | case BPF_ALU64 | BPF_XOR | BPF_X: | ||
1690 | switch (BPF_SRC(code)) { | ||
1691 | case BPF_X: | ||
1692 | emit_ia32_alu_r64(is64, BPF_OP(code), dst, | ||
1693 | src, dstk, sstk, &prog); | ||
1694 | break; | ||
1695 | case BPF_K: | ||
1696 | emit_ia32_alu_i64(is64, BPF_OP(code), dst, | ||
1697 | imm32, dstk, &prog); | ||
1698 | break; | ||
1699 | } | ||
1700 | break; | ||
1701 | case BPF_ALU | BPF_MUL | BPF_K: | ||
1702 | case BPF_ALU | BPF_MUL | BPF_X: | ||
1703 | switch (BPF_SRC(code)) { | ||
1704 | case BPF_X: | ||
1705 | emit_ia32_mul_r(dst_lo, src_lo, dstk, | ||
1706 | sstk, &prog); | ||
1707 | break; | ||
1708 | case BPF_K: | ||
1709 | /* mov ecx,imm32*/ | ||
1710 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
1711 | imm32); | ||
1712 | emit_ia32_mul_r(dst_lo, IA32_ECX, dstk, | ||
1713 | false, &prog); | ||
1714 | break; | ||
1715 | } | ||
1716 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1717 | break; | ||
1718 | case BPF_ALU | BPF_LSH | BPF_X: | ||
1719 | case BPF_ALU | BPF_RSH | BPF_X: | ||
1720 | case BPF_ALU | BPF_ARSH | BPF_K: | ||
1721 | case BPF_ALU | BPF_ARSH | BPF_X: | ||
1722 | switch (BPF_SRC(code)) { | ||
1723 | case BPF_X: | ||
1724 | emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo, | ||
1725 | dstk, sstk, &prog); | ||
1726 | break; | ||
1727 | case BPF_K: | ||
1728 | /* mov ecx,imm32*/ | ||
1729 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
1730 | imm32); | ||
1731 | emit_ia32_shift_r(BPF_OP(code), dst_lo, | ||
1732 | IA32_ECX, dstk, false, | ||
1733 | &prog); | ||
1734 | break; | ||
1735 | } | ||
1736 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1737 | break; | ||
1738 | /* dst = dst / src(imm) */ | ||
1739 | /* dst = dst % src(imm) */ | ||
1740 | case BPF_ALU | BPF_DIV | BPF_K: | ||
1741 | case BPF_ALU | BPF_DIV | BPF_X: | ||
1742 | case BPF_ALU | BPF_MOD | BPF_K: | ||
1743 | case BPF_ALU | BPF_MOD | BPF_X: | ||
1744 | switch (BPF_SRC(code)) { | ||
1745 | case BPF_X: | ||
1746 | emit_ia32_div_mod_r(BPF_OP(code), dst_lo, | ||
1747 | src_lo, dstk, sstk, &prog); | ||
1748 | break; | ||
1749 | case BPF_K: | ||
1750 | /* mov ecx,imm32*/ | ||
1751 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
1752 | imm32); | ||
1753 | emit_ia32_div_mod_r(BPF_OP(code), dst_lo, | ||
1754 | IA32_ECX, dstk, false, | ||
1755 | &prog); | ||
1756 | break; | ||
1757 | } | ||
1758 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1759 | break; | ||
1760 | case BPF_ALU64 | BPF_DIV | BPF_K: | ||
1761 | case BPF_ALU64 | BPF_DIV | BPF_X: | ||
1762 | case BPF_ALU64 | BPF_MOD | BPF_K: | ||
1763 | case BPF_ALU64 | BPF_MOD | BPF_X: | ||
1764 | goto notyet; | ||
1765 | /* dst = dst >> imm */ | ||
1766 | /* dst = dst << imm */ | ||
1767 | case BPF_ALU | BPF_RSH | BPF_K: | ||
1768 | case BPF_ALU | BPF_LSH | BPF_K: | ||
1769 | if (unlikely(imm32 > 31)) | ||
1770 | return -EINVAL; | ||
1771 | /* mov ecx,imm32*/ | ||
1772 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
1773 | emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk, | ||
1774 | false, &prog); | ||
1775 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1776 | break; | ||
1777 | /* dst = dst << imm */ | ||
1778 | case BPF_ALU64 | BPF_LSH | BPF_K: | ||
1779 | if (unlikely(imm32 > 63)) | ||
1780 | return -EINVAL; | ||
1781 | emit_ia32_lsh_i64(dst, imm32, dstk, &prog); | ||
1782 | break; | ||
1783 | /* dst = dst >> imm */ | ||
1784 | case BPF_ALU64 | BPF_RSH | BPF_K: | ||
1785 | if (unlikely(imm32 > 63)) | ||
1786 | return -EINVAL; | ||
1787 | emit_ia32_rsh_i64(dst, imm32, dstk, &prog); | ||
1788 | break; | ||
1789 | /* dst = dst << src */ | ||
1790 | case BPF_ALU64 | BPF_LSH | BPF_X: | ||
1791 | emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog); | ||
1792 | break; | ||
1793 | /* dst = dst >> src */ | ||
1794 | case BPF_ALU64 | BPF_RSH | BPF_X: | ||
1795 | emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog); | ||
1796 | break; | ||
1797 | /* dst = dst >> src (signed) */ | ||
1798 | case BPF_ALU64 | BPF_ARSH | BPF_X: | ||
1799 | emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog); | ||
1800 | break; | ||
1801 | /* dst = dst >> imm (signed) */ | ||
1802 | case BPF_ALU64 | BPF_ARSH | BPF_K: | ||
1803 | if (unlikely(imm32 > 63)) | ||
1804 | return -EINVAL; | ||
1805 | emit_ia32_arsh_i64(dst, imm32, dstk, &prog); | ||
1806 | break; | ||
1807 | /* dst = ~dst */ | ||
1808 | case BPF_ALU | BPF_NEG: | ||
1809 | emit_ia32_alu_i(is64, false, BPF_OP(code), | ||
1810 | dst_lo, 0, dstk, &prog); | ||
1811 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1812 | break; | ||
1813 | /* dst = ~dst (64 bit) */ | ||
1814 | case BPF_ALU64 | BPF_NEG: | ||
1815 | emit_ia32_neg64(dst, dstk, &prog); | ||
1816 | break; | ||
1817 | /* dst = dst * src/imm */ | ||
1818 | case BPF_ALU64 | BPF_MUL | BPF_X: | ||
1819 | case BPF_ALU64 | BPF_MUL | BPF_K: | ||
1820 | switch (BPF_SRC(code)) { | ||
1821 | case BPF_X: | ||
1822 | emit_ia32_mul_r64(dst, src, dstk, sstk, &prog); | ||
1823 | break; | ||
1824 | case BPF_K: | ||
1825 | emit_ia32_mul_i64(dst, imm32, dstk, &prog); | ||
1826 | break; | ||
1827 | } | ||
1828 | break; | ||
1829 | /* dst = htole(dst) */ | ||
1830 | case BPF_ALU | BPF_END | BPF_FROM_LE: | ||
1831 | emit_ia32_to_le_r64(dst, imm32, dstk, &prog); | ||
1832 | break; | ||
1833 | /* dst = htobe(dst) */ | ||
1834 | case BPF_ALU | BPF_END | BPF_FROM_BE: | ||
1835 | emit_ia32_to_be_r64(dst, imm32, dstk, &prog); | ||
1836 | break; | ||
1837 | /* dst = imm64 */ | ||
1838 | case BPF_LD | BPF_IMM | BPF_DW: { | ||
1839 | s32 hi, lo = imm32; | ||
1840 | |||
1841 | hi = insn[1].imm; | ||
1842 | emit_ia32_mov_i(dst_lo, lo, dstk, &prog); | ||
1843 | emit_ia32_mov_i(dst_hi, hi, dstk, &prog); | ||
1844 | insn++; | ||
1845 | i++; | ||
1846 | break; | ||
1847 | } | ||
1848 | /* ST: *(u8*)(dst_reg + off) = imm */ | ||
1849 | case BPF_ST | BPF_MEM | BPF_H: | ||
1850 | case BPF_ST | BPF_MEM | BPF_B: | ||
1851 | case BPF_ST | BPF_MEM | BPF_W: | ||
1852 | case BPF_ST | BPF_MEM | BPF_DW: | ||
1853 | if (dstk) | ||
1854 | /* mov eax,dword ptr [ebp+off] */ | ||
1855 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1856 | STACK_VAR(dst_lo)); | ||
1857 | else | ||
1858 | /* mov eax,dst_lo */ | ||
1859 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1860 | |||
1861 | switch (BPF_SIZE(code)) { | ||
1862 | case BPF_B: | ||
1863 | EMIT(0xC6, 1); break; | ||
1864 | case BPF_H: | ||
1865 | EMIT2(0x66, 0xC7); break; | ||
1866 | case BPF_W: | ||
1867 | case BPF_DW: | ||
1868 | EMIT(0xC7, 1); break; | ||
1869 | } | ||
1870 | |||
1871 | if (is_imm8(insn->off)) | ||
1872 | EMIT2(add_1reg(0x40, IA32_EAX), insn->off); | ||
1873 | else | ||
1874 | EMIT1_off32(add_1reg(0x80, IA32_EAX), | ||
1875 | insn->off); | ||
1876 | EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code))); | ||
1877 | |||
1878 | if (BPF_SIZE(code) == BPF_DW) { | ||
1879 | u32 hi; | ||
1880 | |||
1881 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
1882 | EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX), | ||
1883 | insn->off + 4); | ||
1884 | EMIT(hi, 4); | ||
1885 | } | ||
1886 | break; | ||
1887 | |||
1888 | /* STX: *(u8*)(dst_reg + off) = src_reg */ | ||
1889 | case BPF_STX | BPF_MEM | BPF_B: | ||
1890 | case BPF_STX | BPF_MEM | BPF_H: | ||
1891 | case BPF_STX | BPF_MEM | BPF_W: | ||
1892 | case BPF_STX | BPF_MEM | BPF_DW: | ||
1893 | if (dstk) | ||
1894 | /* mov eax,dword ptr [ebp+off] */ | ||
1895 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1896 | STACK_VAR(dst_lo)); | ||
1897 | else | ||
1898 | /* mov eax,dst_lo */ | ||
1899 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1900 | |||
1901 | if (sstk) | ||
1902 | /* mov edx,dword ptr [ebp+off] */ | ||
1903 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1904 | STACK_VAR(src_lo)); | ||
1905 | else | ||
1906 | /* mov edx,src_lo */ | ||
1907 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX)); | ||
1908 | |||
1909 | switch (BPF_SIZE(code)) { | ||
1910 | case BPF_B: | ||
1911 | EMIT(0x88, 1); break; | ||
1912 | case BPF_H: | ||
1913 | EMIT2(0x66, 0x89); break; | ||
1914 | case BPF_W: | ||
1915 | case BPF_DW: | ||
1916 | EMIT(0x89, 1); break; | ||
1917 | } | ||
1918 | |||
1919 | if (is_imm8(insn->off)) | ||
1920 | EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
1921 | insn->off); | ||
1922 | else | ||
1923 | EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
1924 | insn->off); | ||
1925 | |||
1926 | if (BPF_SIZE(code) == BPF_DW) { | ||
1927 | if (sstk) | ||
1928 | /* mov edi,dword ptr [ebp+off] */ | ||
1929 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, | ||
1930 | IA32_EDX), | ||
1931 | STACK_VAR(src_hi)); | ||
1932 | else | ||
1933 | /* mov edi,src_hi */ | ||
1934 | EMIT2(0x8B, add_2reg(0xC0, src_hi, | ||
1935 | IA32_EDX)); | ||
1936 | EMIT1(0x89); | ||
1937 | if (is_imm8(insn->off + 4)) { | ||
1938 | EMIT2(add_2reg(0x40, IA32_EAX, | ||
1939 | IA32_EDX), | ||
1940 | insn->off + 4); | ||
1941 | } else { | ||
1942 | EMIT1(add_2reg(0x80, IA32_EAX, | ||
1943 | IA32_EDX)); | ||
1944 | EMIT(insn->off + 4, 4); | ||
1945 | } | ||
1946 | } | ||
1947 | break; | ||
1948 | |||
1949 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ | ||
1950 | case BPF_LDX | BPF_MEM | BPF_B: | ||
1951 | case BPF_LDX | BPF_MEM | BPF_H: | ||
1952 | case BPF_LDX | BPF_MEM | BPF_W: | ||
1953 | case BPF_LDX | BPF_MEM | BPF_DW: | ||
1954 | if (sstk) | ||
1955 | /* mov eax,dword ptr [ebp+off] */ | ||
1956 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1957 | STACK_VAR(src_lo)); | ||
1958 | else | ||
1959 | /* mov eax,dword ptr [ebp+off] */ | ||
1960 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX)); | ||
1961 | |||
1962 | switch (BPF_SIZE(code)) { | ||
1963 | case BPF_B: | ||
1964 | EMIT2(0x0F, 0xB6); break; | ||
1965 | case BPF_H: | ||
1966 | EMIT2(0x0F, 0xB7); break; | ||
1967 | case BPF_W: | ||
1968 | case BPF_DW: | ||
1969 | EMIT(0x8B, 1); break; | ||
1970 | } | ||
1971 | |||
1972 | if (is_imm8(insn->off)) | ||
1973 | EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
1974 | insn->off); | ||
1975 | else | ||
1976 | EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
1977 | insn->off); | ||
1978 | |||
1979 | if (dstk) | ||
1980 | /* mov dword ptr [ebp+off],edx */ | ||
1981 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1982 | STACK_VAR(dst_lo)); | ||
1983 | else | ||
1984 | /* mov dst_lo,edx */ | ||
1985 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX)); | ||
1986 | switch (BPF_SIZE(code)) { | ||
1987 | case BPF_B: | ||
1988 | case BPF_H: | ||
1989 | case BPF_W: | ||
1990 | if (dstk) { | ||
1991 | EMIT3(0xC7, add_1reg(0x40, IA32_EBP), | ||
1992 | STACK_VAR(dst_hi)); | ||
1993 | EMIT(0x0, 4); | ||
1994 | } else { | ||
1995 | EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0); | ||
1996 | } | ||
1997 | break; | ||
1998 | case BPF_DW: | ||
1999 | EMIT2_off32(0x8B, | ||
2000 | add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
2001 | insn->off + 4); | ||
2002 | if (dstk) | ||
2003 | EMIT3(0x89, | ||
2004 | add_2reg(0x40, IA32_EBP, | ||
2005 | IA32_EDX), | ||
2006 | STACK_VAR(dst_hi)); | ||
2007 | else | ||
2008 | EMIT2(0x89, | ||
2009 | add_2reg(0xC0, dst_hi, IA32_EDX)); | ||
2010 | break; | ||
2011 | default: | ||
2012 | break; | ||
2013 | } | ||
2014 | break; | ||
2015 | /* call */ | ||
2016 | case BPF_JMP | BPF_CALL: | ||
2017 | { | ||
2018 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
2019 | const u8 *r2 = bpf2ia32[BPF_REG_2]; | ||
2020 | const u8 *r3 = bpf2ia32[BPF_REG_3]; | ||
2021 | const u8 *r4 = bpf2ia32[BPF_REG_4]; | ||
2022 | const u8 *r5 = bpf2ia32[BPF_REG_5]; | ||
2023 | |||
2024 | if (insn->src_reg == BPF_PSEUDO_CALL) | ||
2025 | goto notyet; | ||
2026 | |||
2027 | func = (u8 *) __bpf_call_base + imm32; | ||
2028 | jmp_offset = func - (image + addrs[i]); | ||
2029 | |||
2030 | if (!imm32 || !is_simm32(jmp_offset)) { | ||
2031 | pr_err("unsupported BPF func %d addr %p image %p\n", | ||
2032 | imm32, func, image); | ||
2033 | return -EINVAL; | ||
2034 | } | ||
2035 | |||
2036 | /* mov eax,dword ptr [ebp+off] */ | ||
2037 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2038 | STACK_VAR(r1[0])); | ||
2039 | /* mov edx,dword ptr [ebp+off] */ | ||
2040 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2041 | STACK_VAR(r1[1])); | ||
2042 | |||
2043 | emit_push_r64(r5, &prog); | ||
2044 | emit_push_r64(r4, &prog); | ||
2045 | emit_push_r64(r3, &prog); | ||
2046 | emit_push_r64(r2, &prog); | ||
2047 | |||
2048 | EMIT1_off32(0xE8, jmp_offset + 9); | ||
2049 | |||
2050 | /* mov dword ptr [ebp+off],eax */ | ||
2051 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2052 | STACK_VAR(r0[0])); | ||
2053 | /* mov dword ptr [ebp+off],edx */ | ||
2054 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2055 | STACK_VAR(r0[1])); | ||
2056 | |||
2057 | /* add esp,32 */ | ||
2058 | EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32); | ||
2059 | break; | ||
2060 | } | ||
2061 | case BPF_JMP | BPF_TAIL_CALL: | ||
2062 | emit_bpf_tail_call(&prog); | ||
2063 | break; | ||
2064 | |||
2065 | /* cond jump */ | ||
2066 | case BPF_JMP | BPF_JEQ | BPF_X: | ||
2067 | case BPF_JMP | BPF_JNE | BPF_X: | ||
2068 | case BPF_JMP | BPF_JGT | BPF_X: | ||
2069 | case BPF_JMP | BPF_JLT | BPF_X: | ||
2070 | case BPF_JMP | BPF_JGE | BPF_X: | ||
2071 | case BPF_JMP | BPF_JLE | BPF_X: | ||
2072 | case BPF_JMP | BPF_JSGT | BPF_X: | ||
2073 | case BPF_JMP | BPF_JSLE | BPF_X: | ||
2074 | case BPF_JMP | BPF_JSLT | BPF_X: | ||
2075 | case BPF_JMP | BPF_JSGE | BPF_X: { | ||
2076 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
2077 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
2078 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | ||
2079 | u8 sreg_hi = sstk ? IA32_EBX : src_hi; | ||
2080 | |||
2081 | if (dstk) { | ||
2082 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2083 | STACK_VAR(dst_lo)); | ||
2084 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2085 | STACK_VAR(dst_hi)); | ||
2086 | } | ||
2087 | |||
2088 | if (sstk) { | ||
2089 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
2090 | STACK_VAR(src_lo)); | ||
2091 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | ||
2092 | STACK_VAR(src_hi)); | ||
2093 | } | ||
2094 | |||
2095 | /* cmp dreg_hi,sreg_hi */ | ||
2096 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | ||
2097 | EMIT2(IA32_JNE, 2); | ||
2098 | /* cmp dreg_lo,sreg_lo */ | ||
2099 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | ||
2100 | goto emit_cond_jmp; | ||
2101 | } | ||
2102 | case BPF_JMP | BPF_JSET | BPF_X: { | ||
2103 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
2104 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
2105 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | ||
2106 | u8 sreg_hi = sstk ? IA32_EBX : src_hi; | ||
2107 | |||
2108 | if (dstk) { | ||
2109 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2110 | STACK_VAR(dst_lo)); | ||
2111 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2112 | STACK_VAR(dst_hi)); | ||
2113 | } | ||
2114 | |||
2115 | if (sstk) { | ||
2116 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
2117 | STACK_VAR(src_lo)); | ||
2118 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | ||
2119 | STACK_VAR(src_hi)); | ||
2120 | } | ||
2121 | /* and dreg_lo,sreg_lo */ | ||
2122 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | ||
2123 | /* and dreg_hi,sreg_hi */ | ||
2124 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
2125 | /* or dreg_lo,dreg_hi */ | ||
2126 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
2127 | goto emit_cond_jmp; | ||
2128 | } | ||
2129 | case BPF_JMP | BPF_JSET | BPF_K: { | ||
2130 | u32 hi; | ||
2131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
2132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
2133 | u8 sreg_lo = IA32_ECX; | ||
2134 | u8 sreg_hi = IA32_EBX; | ||
2135 | |||
2136 | if (dstk) { | ||
2137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2138 | STACK_VAR(dst_lo)); | ||
2139 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2140 | STACK_VAR(dst_hi)); | ||
2141 | } | ||
2142 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
2143 | |||
2144 | /* mov ecx,imm32 */ | ||
2145 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
2146 | /* mov ebx,imm32 */ | ||
2147 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
2148 | |||
2149 | /* and dreg_lo,sreg_lo */ | ||
2150 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | ||
2151 | /* and dreg_hi,sreg_hi */ | ||
2152 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
2153 | /* or dreg_lo,dreg_hi */ | ||
2154 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
2155 | goto emit_cond_jmp; | ||
2156 | } | ||
2157 | case BPF_JMP | BPF_JEQ | BPF_K: | ||
2158 | case BPF_JMP | BPF_JNE | BPF_K: | ||
2159 | case BPF_JMP | BPF_JGT | BPF_K: | ||
2160 | case BPF_JMP | BPF_JLT | BPF_K: | ||
2161 | case BPF_JMP | BPF_JGE | BPF_K: | ||
2162 | case BPF_JMP | BPF_JLE | BPF_K: | ||
2163 | case BPF_JMP | BPF_JSGT | BPF_K: | ||
2164 | case BPF_JMP | BPF_JSLE | BPF_K: | ||
2165 | case BPF_JMP | BPF_JSLT | BPF_K: | ||
2166 | case BPF_JMP | BPF_JSGE | BPF_K: { | ||
2167 | u32 hi; | ||
2168 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
2169 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
2170 | u8 sreg_lo = IA32_ECX; | ||
2171 | u8 sreg_hi = IA32_EBX; | ||
2172 | |||
2173 | if (dstk) { | ||
2174 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2175 | STACK_VAR(dst_lo)); | ||
2176 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2177 | STACK_VAR(dst_hi)); | ||
2178 | } | ||
2179 | |||
2180 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
2181 | /* mov ecx,imm32 */ | ||
2182 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
2183 | /* mov ebx,imm32 */ | ||
2184 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
2185 | |||
2186 | /* cmp dreg_hi,sreg_hi */ | ||
2187 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | ||
2188 | EMIT2(IA32_JNE, 2); | ||
2189 | /* cmp dreg_lo,sreg_lo */ | ||
2190 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | ||
2191 | |||
2192 | emit_cond_jmp: /* Convert BPF opcode to x86 */ | ||
2193 | switch (BPF_OP(code)) { | ||
2194 | case BPF_JEQ: | ||
2195 | jmp_cond = IA32_JE; | ||
2196 | break; | ||
2197 | case BPF_JSET: | ||
2198 | case BPF_JNE: | ||
2199 | jmp_cond = IA32_JNE; | ||
2200 | break; | ||
2201 | case BPF_JGT: | ||
2202 | /* GT is unsigned '>', JA in x86 */ | ||
2203 | jmp_cond = IA32_JA; | ||
2204 | break; | ||
2205 | case BPF_JLT: | ||
2206 | /* LT is unsigned '<', JB in x86 */ | ||
2207 | jmp_cond = IA32_JB; | ||
2208 | break; | ||
2209 | case BPF_JGE: | ||
2210 | /* GE is unsigned '>=', JAE in x86 */ | ||
2211 | jmp_cond = IA32_JAE; | ||
2212 | break; | ||
2213 | case BPF_JLE: | ||
2214 | /* LE is unsigned '<=', JBE in x86 */ | ||
2215 | jmp_cond = IA32_JBE; | ||
2216 | break; | ||
2217 | case BPF_JSGT: | ||
2218 | /* Signed '>', GT in x86 */ | ||
2219 | jmp_cond = IA32_JG; | ||
2220 | break; | ||
2221 | case BPF_JSLT: | ||
2222 | /* Signed '<', LT in x86 */ | ||
2223 | jmp_cond = IA32_JL; | ||
2224 | break; | ||
2225 | case BPF_JSGE: | ||
2226 | /* Signed '>=', GE in x86 */ | ||
2227 | jmp_cond = IA32_JGE; | ||
2228 | break; | ||
2229 | case BPF_JSLE: | ||
2230 | /* Signed '<=', LE in x86 */ | ||
2231 | jmp_cond = IA32_JLE; | ||
2232 | break; | ||
2233 | default: /* to silence GCC warning */ | ||
2234 | return -EFAULT; | ||
2235 | } | ||
2236 | jmp_offset = addrs[i + insn->off] - addrs[i]; | ||
2237 | if (is_imm8(jmp_offset)) { | ||
2238 | EMIT2(jmp_cond, jmp_offset); | ||
2239 | } else if (is_simm32(jmp_offset)) { | ||
2240 | EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); | ||
2241 | } else { | ||
2242 | pr_err("cond_jmp gen bug %llx\n", jmp_offset); | ||
2243 | return -EFAULT; | ||
2244 | } | ||
2245 | |||
2246 | break; | ||
2247 | } | ||
2248 | case BPF_JMP | BPF_JA: | ||
2249 | if (insn->off == -1) | ||
2250 | /* -1 jmp instructions will always jump | ||
2251 | * backwards two bytes. Explicitly handling | ||
2252 | * this case avoids wasting too many passes | ||
2253 | * when there are long sequences of replaced | ||
2254 | * dead code. | ||
2255 | */ | ||
2256 | jmp_offset = -2; | ||
2257 | else | ||
2258 | jmp_offset = addrs[i + insn->off] - addrs[i]; | ||
2259 | |||
2260 | if (!jmp_offset) | ||
2261 | /* Optimize out nop jumps */ | ||
2262 | break; | ||
2263 | emit_jmp: | ||
2264 | if (is_imm8(jmp_offset)) { | ||
2265 | EMIT2(0xEB, jmp_offset); | ||
2266 | } else if (is_simm32(jmp_offset)) { | ||
2267 | EMIT1_off32(0xE9, jmp_offset); | ||
2268 | } else { | ||
2269 | pr_err("jmp gen bug %llx\n", jmp_offset); | ||
2270 | return -EFAULT; | ||
2271 | } | ||
2272 | break; | ||
2273 | /* STX XADD: lock *(u32 *)(dst + off) += src */ | ||
2274 | case BPF_STX | BPF_XADD | BPF_W: | ||
2275 | /* STX XADD: lock *(u64 *)(dst + off) += src */ | ||
2276 | case BPF_STX | BPF_XADD | BPF_DW: | ||
2277 | goto notyet; | ||
2278 | case BPF_JMP | BPF_EXIT: | ||
2279 | if (seen_exit) { | ||
2280 | jmp_offset = ctx->cleanup_addr - addrs[i]; | ||
2281 | goto emit_jmp; | ||
2282 | } | ||
2283 | seen_exit = true; | ||
2284 | /* Update cleanup_addr */ | ||
2285 | ctx->cleanup_addr = proglen; | ||
2286 | emit_epilogue(&prog, bpf_prog->aux->stack_depth); | ||
2287 | break; | ||
2288 | notyet: | ||
2289 | pr_info_once("*** NOT YET: opcode %02x ***\n", code); | ||
2290 | return -EFAULT; | ||
2291 | default: | ||
2292 | /* | ||
2293 | * This error will be seen if new instruction was added | ||
2294 | * to interpreter, but not to JIT or if there is junk in | ||
2295 | * bpf_prog | ||
2296 | */ | ||
2297 | pr_err("bpf_jit: unknown opcode %02x\n", code); | ||
2298 | return -EINVAL; | ||
2299 | } | ||
2300 | |||
2301 | ilen = prog - temp; | ||
2302 | if (ilen > BPF_MAX_INSN_SIZE) { | ||
2303 | pr_err("bpf_jit: fatal insn size error\n"); | ||
2304 | return -EFAULT; | ||
2305 | } | ||
2306 | |||
2307 | if (image) { | ||
2308 | if (unlikely(proglen + ilen > oldproglen)) { | ||
2309 | pr_err("bpf_jit: fatal error\n"); | ||
2310 | return -EFAULT; | ||
2311 | } | ||
2312 | memcpy(image + proglen, temp, ilen); | ||
2313 | } | ||
2314 | proglen += ilen; | ||
2315 | addrs[i] = proglen; | ||
2316 | prog = temp; | ||
2317 | } | ||
2318 | return proglen; | ||
2319 | } | ||
2320 | |||
2321 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | ||
2322 | { | ||
2323 | struct bpf_binary_header *header = NULL; | ||
2324 | struct bpf_prog *tmp, *orig_prog = prog; | ||
2325 | int proglen, oldproglen = 0; | ||
2326 | struct jit_context ctx = {}; | ||
2327 | bool tmp_blinded = false; | ||
2328 | u8 *image = NULL; | ||
2329 | int *addrs; | ||
2330 | int pass; | ||
2331 | int i; | ||
2332 | |||
2333 | if (!prog->jit_requested) | ||
2334 | return orig_prog; | ||
2335 | |||
2336 | tmp = bpf_jit_blind_constants(prog); | ||
2337 | /* | ||
2338 | * If blinding was requested and we failed during blinding, | ||
2339 | * we must fall back to the interpreter. | ||
2340 | */ | ||
2341 | if (IS_ERR(tmp)) | ||
2342 | return orig_prog; | ||
2343 | if (tmp != prog) { | ||
2344 | tmp_blinded = true; | ||
2345 | prog = tmp; | ||
2346 | } | ||
2347 | |||
2348 | addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); | ||
2349 | if (!addrs) { | ||
2350 | prog = orig_prog; | ||
2351 | goto out; | ||
2352 | } | ||
2353 | |||
2354 | /* | ||
2355 | * Before first pass, make a rough estimation of addrs[] | ||
2356 | * each BPF instruction is translated to less than 64 bytes | ||
2357 | */ | ||
2358 | for (proglen = 0, i = 0; i < prog->len; i++) { | ||
2359 | proglen += 64; | ||
2360 | addrs[i] = proglen; | ||
2361 | } | ||
2362 | ctx.cleanup_addr = proglen; | ||
2363 | |||
2364 | /* | ||
2365 | * JITed image shrinks with every pass and the loop iterates | ||
2366 | * until the image stops shrinking. Very large BPF programs | ||
2367 | * may converge on the last pass. In such case do one more | ||
2368 | * pass to emit the final image. | ||
2369 | */ | ||
2370 | for (pass = 0; pass < 20 || image; pass++) { | ||
2371 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); | ||
2372 | if (proglen <= 0) { | ||
2373 | out_image: | ||
2374 | image = NULL; | ||
2375 | if (header) | ||
2376 | bpf_jit_binary_free(header); | ||
2377 | prog = orig_prog; | ||
2378 | goto out_addrs; | ||
2379 | } | ||
2380 | if (image) { | ||
2381 | if (proglen != oldproglen) { | ||
2382 | pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", | ||
2383 | proglen, oldproglen); | ||
2384 | goto out_image; | ||
2385 | } | ||
2386 | break; | ||
2387 | } | ||
2388 | if (proglen == oldproglen) { | ||
2389 | header = bpf_jit_binary_alloc(proglen, &image, | ||
2390 | 1, jit_fill_hole); | ||
2391 | if (!header) { | ||
2392 | prog = orig_prog; | ||
2393 | goto out_addrs; | ||
2394 | } | ||
2395 | } | ||
2396 | oldproglen = proglen; | ||
2397 | cond_resched(); | ||
2398 | } | ||
2399 | |||
2400 | if (bpf_jit_enable > 1) | ||
2401 | bpf_jit_dump(prog->len, proglen, pass + 1, image); | ||
2402 | |||
2403 | if (image) { | ||
2404 | bpf_jit_binary_lock_ro(header); | ||
2405 | prog->bpf_func = (void *)image; | ||
2406 | prog->jited = 1; | ||
2407 | prog->jited_len = proglen; | ||
2408 | } else { | ||
2409 | prog = orig_prog; | ||
2410 | } | ||
2411 | |||
2412 | out_addrs: | ||
2413 | kfree(addrs); | ||
2414 | out: | ||
2415 | if (tmp_blinded) | ||
2416 | bpf_jit_prog_release_other(prog, prog == orig_prog ? | ||
2417 | tmp : orig_prog); | ||
2418 | return prog; | ||
2419 | } | ||