aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@plumgrid.com>2014-05-13 22:50:46 -0400
committerDavid S. Miller <davem@davemloft.net>2014-05-15 16:31:30 -0400
commit622582786c9e041d0bd52bde201787adeab249f8 (patch)
tree2521b3a1d52c88b6884259626b331ec4195fc8e7 /arch/x86
parentf3c2af7ba17a83809806880062c9ad541744fb95 (diff)
net: filter: x86: internal BPF JIT
Maps all internal BPF instructions into x86_64 instructions. This patch replaces original BPF x64 JIT with internal BPF x64 JIT. sysctl net.core.bpf_jit_enable is reused as on/off switch. Performance: 1. old BPF JIT and internal BPF JIT generate equivalent x86_64 code. No performance difference is observed for filters that were JIT-able before Example assembler code for BPF filter "tcpdump port 22" original BPF -> old JIT: original BPF -> internal BPF -> new JIT: 0: push %rbp 0: push %rbp 1: mov %rsp,%rbp 1: mov %rsp,%rbp 4: sub $0x60,%rsp 4: sub $0x228,%rsp 8: mov %rbx,-0x8(%rbp) b: mov %rbx,-0x228(%rbp) // prologue 12: mov %r13,-0x220(%rbp) 19: mov %r14,-0x218(%rbp) 20: mov %r15,-0x210(%rbp) 27: xor %eax,%eax // clear A c: xor %ebx,%ebx 29: xor %r13,%r13 // clear X e: mov 0x68(%rdi),%r9d 2c: mov 0x68(%rdi),%r9d 12: sub 0x6c(%rdi),%r9d 30: sub 0x6c(%rdi),%r9d 16: mov 0xd8(%rdi),%r8 34: mov 0xd8(%rdi),%r10 3b: mov %rdi,%rbx 1d: mov $0xc,%esi 3e: mov $0xc,%esi 22: callq 0xffffffffe1021e15 43: callq 0xffffffffe102bd75 27: cmp $0x86dd,%eax 48: cmp $0x86dd,%rax 2c: jne 0x0000000000000069 4f: jne 0x000000000000009a 2e: mov $0x14,%esi 51: mov $0x14,%esi 33: callq 0xffffffffe1021e31 56: callq 0xffffffffe102bd91 38: cmp $0x84,%eax 5b: cmp $0x84,%rax 3d: je 0x0000000000000049 62: je 0x0000000000000074 3f: cmp $0x6,%eax 64: cmp $0x6,%rax 42: je 0x0000000000000049 68: je 0x0000000000000074 44: cmp $0x11,%eax 6a: cmp $0x11,%rax 47: jne 0x00000000000000c6 6e: jne 0x0000000000000117 49: mov $0x36,%esi 74: mov $0x36,%esi 4e: callq 0xffffffffe1021e15 79: callq 0xffffffffe102bd75 53: cmp $0x16,%eax 7e: cmp $0x16,%rax 56: je 0x00000000000000bf 82: je 0x0000000000000110 58: mov $0x38,%esi 88: mov $0x38,%esi 5d: callq 0xffffffffe1021e15 8d: callq 0xffffffffe102bd75 62: cmp $0x16,%eax 92: cmp $0x16,%rax 65: je 0x00000000000000bf 96: je 0x0000000000000110 67: jmp 0x00000000000000c6 98: jmp 0x0000000000000117 69: cmp $0x800,%eax 9a: cmp $0x800,%rax 6e: jne 0x00000000000000c6 a1: jne 0x0000000000000117 70: mov $0x17,%esi a3: mov $0x17,%esi 75: callq 0xffffffffe1021e31 a8: callq 0xffffffffe102bd91 7a: cmp $0x84,%eax ad: cmp $0x84,%rax 7f: je 0x000000000000008b b4: je 0x00000000000000c2 81: cmp $0x6,%eax b6: cmp $0x6,%rax 84: je 0x000000000000008b ba: je 0x00000000000000c2 86: cmp $0x11,%eax bc: cmp $0x11,%rax 89: jne 0x00000000000000c6 c0: jne 0x0000000000000117 8b: mov $0x14,%esi c2: mov $0x14,%esi 90: callq 0xffffffffe1021e15 c7: callq 0xffffffffe102bd75 95: test $0x1fff,%ax cc: test $0x1fff,%rax 99: jne 0x00000000000000c6 d3: jne 0x0000000000000117 d5: mov %rax,%r14 9b: mov $0xe,%esi d8: mov $0xe,%esi a0: callq 0xffffffffe1021e44 dd: callq 0xffffffffe102bd91 // MSH e2: and $0xf,%eax e5: shl $0x2,%eax e8: mov %rax,%r13 eb: mov %r14,%rax ee: mov %r13,%rsi a5: lea 0xe(%rbx),%esi f1: add $0xe,%esi a8: callq 0xffffffffe1021e0d f4: callq 0xffffffffe102bd6d ad: cmp $0x16,%eax f9: cmp $0x16,%rax b0: je 0x00000000000000bf fd: je 0x0000000000000110 ff: mov %r13,%rsi b2: lea 0x10(%rbx),%esi 102: add $0x10,%esi b5: callq 0xffffffffe1021e0d 105: callq 0xffffffffe102bd6d ba: cmp $0x16,%eax 10a: cmp $0x16,%rax bd: jne 0x00000000000000c6 10e: jne 0x0000000000000117 bf: mov $0xffff,%eax 110: mov $0xffff,%eax c4: jmp 0x00000000000000c8 115: jmp 0x000000000000011c c6: xor %eax,%eax 117: mov $0x0,%eax c8: mov -0x8(%rbp),%rbx 11c: mov -0x228(%rbp),%rbx // epilogue cc: leaveq 123: mov -0x220(%rbp),%r13 cd: retq 12a: mov -0x218(%rbp),%r14 131: mov -0x210(%rbp),%r15 138: leaveq 139: retq On fully cached SKBs both JITed functions take 12 nsec to execute. BPF interpreter executes the program in 30 nsec. The difference in generated assembler is due to the following: Old BPF imlements LDX_MSH instruction via sk_load_byte_msh() helper function inside bpf_jit.S. New JIT removes the helper and does it explicitly, so ldx_msh cost is the same for both JITs, but generated code looks longer. New JIT has 4 registers to save, so prologue/epilogue are larger, but the cost is within noise on x64. Old JIT checks whether first insn clears A and if not emits 'xor %eax,%eax'. New JIT clears %rax unconditionally. 2. old BPF JIT doesn't support ANC_NLATTR, ANC_PAY_OFFSET, ANC_RANDOM extensions. New JIT supports all BPF extensions. Performance of such filters improves 2-4 times depending on a filter. The longer the filter the higher performance gain. Synthetic benchmarks with many ancillary loads see 20x speedup which seems to be the maximum gain from JIT Notes: . net.core.bpf_jit_enable=2 + tools/net/bpf_jit_disasm is still functional and can be used to see generated assembler . there are two jit_compile() functions and code flow for classic filters is: sk_attach_filter() - load classic BPF bpf_jit_compile() - try to JIT from classic BPF sk_convert_filter() - convert classic to internal bpf_int_jit_compile() - JIT from internal BPF seccomp and tracing filters will just call bpf_int_jit_compile() Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/net/bpf_jit.S77
-rw-r--r--arch/x86/net/bpf_jit_comp.c1314
2 files changed, 737 insertions, 654 deletions
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 01495755701b..6440221ced0d 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -12,13 +12,16 @@
12 12
13/* 13/*
14 * Calling convention : 14 * Calling convention :
15 * rdi : skb pointer 15 * rbx : skb pointer (callee saved)
16 * esi : offset of byte(s) to fetch in skb (can be scratched) 16 * esi : offset of byte(s) to fetch in skb (can be scratched)
17 * r8 : copy of skb->data 17 * r10 : copy of skb->data
18 * r9d : hlen = skb->len - skb->data_len 18 * r9d : hlen = skb->len - skb->data_len
19 */ 19 */
20#define SKBDATA %r8 20#define SKBDATA %r10
21#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ 21#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
22#define MAX_BPF_STACK (512 /* from filter.h */ + \
23 32 /* space for rbx,r13,r14,r15 */ + \
24 8 /* space for skb_copy_bits */)
22 25
23sk_load_word: 26sk_load_word:
24 .globl sk_load_word 27 .globl sk_load_word
@@ -68,53 +71,31 @@ sk_load_byte_positive_offset:
68 movzbl (SKBDATA,%rsi),%eax 71 movzbl (SKBDATA,%rsi),%eax
69 ret 72 ret
70 73
71/**
72 * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
73 *
74 * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
75 * Must preserve A accumulator (%eax)
76 * Inputs : %esi is the offset value
77 */
78sk_load_byte_msh:
79 .globl sk_load_byte_msh
80 test %esi,%esi
81 js bpf_slow_path_byte_msh_neg
82
83sk_load_byte_msh_positive_offset:
84 .globl sk_load_byte_msh_positive_offset
85 cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
86 jle bpf_slow_path_byte_msh
87 movzbl (SKBDATA,%rsi),%ebx
88 and $15,%bl
89 shl $2,%bl
90 ret
91
92/* rsi contains offset and can be scratched */ 74/* rsi contains offset and can be scratched */
93#define bpf_slow_path_common(LEN) \ 75#define bpf_slow_path_common(LEN) \
94 push %rdi; /* save skb */ \ 76 mov %rbx, %rdi; /* arg1 == skb */ \
95 push %r9; \ 77 push %r9; \
96 push SKBDATA; \ 78 push SKBDATA; \
97/* rsi already has offset */ \ 79/* rsi already has offset */ \
98 mov $LEN,%ecx; /* len */ \ 80 mov $LEN,%ecx; /* len */ \
99 lea -12(%rbp),%rdx; \ 81 lea - MAX_BPF_STACK + 32(%rbp),%rdx; \
100 call skb_copy_bits; \ 82 call skb_copy_bits; \
101 test %eax,%eax; \ 83 test %eax,%eax; \
102 pop SKBDATA; \ 84 pop SKBDATA; \
103 pop %r9; \ 85 pop %r9;
104 pop %rdi
105 86
106 87
107bpf_slow_path_word: 88bpf_slow_path_word:
108 bpf_slow_path_common(4) 89 bpf_slow_path_common(4)
109 js bpf_error 90 js bpf_error
110 mov -12(%rbp),%eax 91 mov - MAX_BPF_STACK + 32(%rbp),%eax
111 bswap %eax 92 bswap %eax
112 ret 93 ret
113 94
114bpf_slow_path_half: 95bpf_slow_path_half:
115 bpf_slow_path_common(2) 96 bpf_slow_path_common(2)
116 js bpf_error 97 js bpf_error
117 mov -12(%rbp),%ax 98 mov - MAX_BPF_STACK + 32(%rbp),%ax
118 rol $8,%ax 99 rol $8,%ax
119 movzwl %ax,%eax 100 movzwl %ax,%eax
120 ret 101 ret
@@ -122,21 +103,11 @@ bpf_slow_path_half:
122bpf_slow_path_byte: 103bpf_slow_path_byte:
123 bpf_slow_path_common(1) 104 bpf_slow_path_common(1)
124 js bpf_error 105 js bpf_error
125 movzbl -12(%rbp),%eax 106 movzbl - MAX_BPF_STACK + 32(%rbp),%eax
126 ret
127
128bpf_slow_path_byte_msh:
129 xchg %eax,%ebx /* dont lose A , X is about to be scratched */
130 bpf_slow_path_common(1)
131 js bpf_error
132 movzbl -12(%rbp),%eax
133 and $15,%al
134 shl $2,%al
135 xchg %eax,%ebx
136 ret 107 ret
137 108
138#define sk_negative_common(SIZE) \ 109#define sk_negative_common(SIZE) \
139 push %rdi; /* save skb */ \ 110 mov %rbx, %rdi; /* arg1 == skb */ \
140 push %r9; \ 111 push %r9; \
141 push SKBDATA; \ 112 push SKBDATA; \
142/* rsi already has offset */ \ 113/* rsi already has offset */ \
@@ -145,10 +116,8 @@ bpf_slow_path_byte_msh:
145 test %rax,%rax; \ 116 test %rax,%rax; \
146 pop SKBDATA; \ 117 pop SKBDATA; \
147 pop %r9; \ 118 pop %r9; \
148 pop %rdi; \
149 jz bpf_error 119 jz bpf_error
150 120
151
152bpf_slow_path_word_neg: 121bpf_slow_path_word_neg:
153 cmp SKF_MAX_NEG_OFF, %esi /* test range */ 122 cmp SKF_MAX_NEG_OFF, %esi /* test range */
154 jl bpf_error /* offset lower -> error */ 123 jl bpf_error /* offset lower -> error */
@@ -179,22 +148,12 @@ sk_load_byte_negative_offset:
179 movzbl (%rax), %eax 148 movzbl (%rax), %eax
180 ret 149 ret
181 150
182bpf_slow_path_byte_msh_neg:
183 cmp SKF_MAX_NEG_OFF, %esi
184 jl bpf_error
185sk_load_byte_msh_negative_offset:
186 .globl sk_load_byte_msh_negative_offset
187 xchg %eax,%ebx /* dont lose A , X is about to be scratched */
188 sk_negative_common(1)
189 movzbl (%rax),%eax
190 and $15,%al
191 shl $2,%al
192 xchg %eax,%ebx
193 ret
194
195bpf_error: 151bpf_error:
196# force a return 0 from jit handler 152# force a return 0 from jit handler
197 xor %eax,%eax 153 xor %eax,%eax
198 mov -8(%rbp),%rbx 154 mov - MAX_BPF_STACK(%rbp),%rbx
155 mov - MAX_BPF_STACK + 8(%rbp),%r13
156 mov - MAX_BPF_STACK + 16(%rbp),%r14
157 mov - MAX_BPF_STACK + 24(%rbp),%r15
199 leaveq 158 leaveq
200 ret 159 ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c5fa7c9cb665..92aef8fdac2f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,6 +1,7 @@
1/* bpf_jit_comp.c : BPF JIT compiler 1/* bpf_jit_comp.c : BPF JIT compiler
2 * 2 *
3 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) 3 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
4 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
@@ -14,28 +15,16 @@
14#include <linux/if_vlan.h> 15#include <linux/if_vlan.h>
15#include <linux/random.h> 16#include <linux/random.h>
16 17
17/*
18 * Conventions :
19 * EAX : BPF A accumulator
20 * EBX : BPF X accumulator
21 * RDI : pointer to skb (first argument given to JIT function)
22 * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
23 * ECX,EDX,ESI : scratch registers
24 * r9d : skb->len - skb->data_len (headlen)
25 * r8 : skb->data
26 * -8(RBP) : saved RBX value
27 * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
28 */
29int bpf_jit_enable __read_mostly; 18int bpf_jit_enable __read_mostly;
30 19
31/* 20/*
32 * assembly code in arch/x86/net/bpf_jit.S 21 * assembly code in arch/x86/net/bpf_jit.S
33 */ 22 */
34extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; 23extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
35extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; 24extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
36extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[]; 25extern u8 sk_load_byte_positive_offset[];
37extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; 26extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
38extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[]; 27extern u8 sk_load_byte_negative_offset[];
39 28
40static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 29static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
41{ 30{
@@ -56,30 +45,44 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
56#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) 45#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
57#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) 46#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
58#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) 47#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
59#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) 48#define EMIT1_off32(b1, off) \
60 49 do {EMIT1(b1); EMIT(off, 4); } while (0)
61#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ 50#define EMIT2_off32(b1, b2, off) \
62#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ 51 do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
52#define EMIT3_off32(b1, b2, b3, off) \
53 do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
54#define EMIT4_off32(b1, b2, b3, b4, off) \
55 do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
63 56
64static inline bool is_imm8(int value) 57static inline bool is_imm8(int value)
65{ 58{
66 return value <= 127 && value >= -128; 59 return value <= 127 && value >= -128;
67} 60}
68 61
69static inline bool is_near(int offset) 62static inline bool is_simm32(s64 value)
70{ 63{
71 return offset <= 127 && offset >= -128; 64 return value == (s64) (s32) value;
72} 65}
73 66
74#define EMIT_JMP(offset) \ 67/* mov A, X */
75do { \ 68#define EMIT_mov(A, X) \
76 if (offset) { \ 69 do {if (A != X) \
77 if (is_near(offset)) \ 70 EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \
78 EMIT2(0xeb, offset); /* jmp .+off8 */ \ 71 } while (0)
79 else \ 72
80 EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \ 73static int bpf_size_to_x86_bytes(int bpf_size)
81 } \ 74{
82} while (0) 75 if (bpf_size == BPF_W)
76 return 4;
77 else if (bpf_size == BPF_H)
78 return 2;
79 else if (bpf_size == BPF_B)
80 return 1;
81 else if (bpf_size == BPF_DW)
82 return 4; /* imm32 */
83 else
84 return 0;
85}
83 86
84/* list of x86 cond jumps opcodes (. + s8) 87/* list of x86 cond jumps opcodes (. + s8)
85 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) 88 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
@@ -90,27 +93,8 @@ do { \
90#define X86_JNE 0x75 93#define X86_JNE 0x75
91#define X86_JBE 0x76 94#define X86_JBE 0x76
92#define X86_JA 0x77 95#define X86_JA 0x77
93 96#define X86_JGE 0x7D
94#define EMIT_COND_JMP(op, offset) \ 97#define X86_JG 0x7F
95do { \
96 if (is_near(offset)) \
97 EMIT2(op, offset); /* jxx .+off8 */ \
98 else { \
99 EMIT2(0x0f, op + 0x10); \
100 EMIT(offset, 4); /* jxx .+off32 */ \
101 } \
102} while (0)
103
104#define COND_SEL(CODE, TOP, FOP) \
105 case CODE: \
106 t_op = TOP; \
107 f_op = FOP; \
108 goto cond_branch
109
110
111#define SEEN_DATAREF 1 /* might call external helpers */
112#define SEEN_XREG 2 /* ebx is used */
113#define SEEN_MEM 4 /* use mem[] for temporary storage */
114 98
115static inline void bpf_flush_icache(void *start, void *end) 99static inline void bpf_flush_icache(void *start, void *end)
116{ 100{
@@ -125,26 +109,6 @@ static inline void bpf_flush_icache(void *start, void *end)
125#define CHOOSE_LOAD_FUNC(K, func) \ 109#define CHOOSE_LOAD_FUNC(K, func) \
126 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) 110 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
127 111
128/* Helper to find the offset of pkt_type in sk_buff
129 * We want to make sure its still a 3bit field starting at a byte boundary.
130 */
131#define PKT_TYPE_MAX 7
132static int pkt_type_offset(void)
133{
134 struct sk_buff skb_probe = {
135 .pkt_type = ~0,
136 };
137 char *ct = (char *)&skb_probe;
138 unsigned int off;
139
140 for (off = 0; off < sizeof(struct sk_buff); off++) {
141 if (ct[off] == PKT_TYPE_MAX)
142 return off;
143 }
144 pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
145 return -1;
146}
147
148struct bpf_binary_header { 112struct bpf_binary_header {
149 unsigned int pages; 113 unsigned int pages;
150 /* Note : for security reasons, bpf code will follow a randomly 114 /* Note : for security reasons, bpf code will follow a randomly
@@ -178,546 +142,715 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
178 return header; 142 return header;
179} 143}
180 144
145/* pick a register outside of BPF range for JIT internal work */
146#define AUX_REG (MAX_BPF_REG + 1)
147
148/* the following table maps BPF registers to x64 registers.
149 * x64 register r12 is unused, since if used as base address register
150 * in load/store instructions, it always needs an extra byte of encoding
151 */
152static const int reg2hex[] = {
153 [BPF_REG_0] = 0, /* rax */
154 [BPF_REG_1] = 7, /* rdi */
155 [BPF_REG_2] = 6, /* rsi */
156 [BPF_REG_3] = 2, /* rdx */
157 [BPF_REG_4] = 1, /* rcx */
158 [BPF_REG_5] = 0, /* r8 */
159 [BPF_REG_6] = 3, /* rbx callee saved */
160 [BPF_REG_7] = 5, /* r13 callee saved */
161 [BPF_REG_8] = 6, /* r14 callee saved */
162 [BPF_REG_9] = 7, /* r15 callee saved */
163 [BPF_REG_FP] = 5, /* rbp readonly */
164 [AUX_REG] = 3, /* r11 temp register */
165};
166
167/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
168 * which need extra byte of encoding.
169 * rax,rcx,...,rbp have simpler encoding
170 */
171static inline bool is_ereg(u32 reg)
172{
173 if (reg == BPF_REG_5 || reg == AUX_REG ||
174 (reg >= BPF_REG_7 && reg <= BPF_REG_9))
175 return true;
176 else
177 return false;
178}
179
180/* add modifiers if 'reg' maps to x64 registers r8..r15 */
181static inline u8 add_1mod(u8 byte, u32 reg)
182{
183 if (is_ereg(reg))
184 byte |= 1;
185 return byte;
186}
187
188static inline u8 add_2mod(u8 byte, u32 r1, u32 r2)
189{
190 if (is_ereg(r1))
191 byte |= 1;
192 if (is_ereg(r2))
193 byte |= 4;
194 return byte;
195}
196
197/* encode dest register 'a_reg' into x64 opcode 'byte' */
198static inline u8 add_1reg(u8 byte, u32 a_reg)
199{
200 return byte + reg2hex[a_reg];
201}
202
203/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */
204static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg)
205{
206 return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3);
207}
208
181struct jit_context { 209struct jit_context {
182 unsigned int cleanup_addr; /* epilogue code offset */ 210 unsigned int cleanup_addr; /* epilogue code offset */
183 int pc_ret0; /* bpf index of first RET #0 instruction (if any) */ 211 bool seen_ld_abs;
184 u8 seen;
185}; 212};
186 213
187static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, 214static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
188 int oldproglen, struct jit_context *ctx) 215 int oldproglen, struct jit_context *ctx)
189{ 216{
190 const struct sock_filter *filter = bpf_prog->insns; 217 struct sock_filter_int *insn = bpf_prog->insnsi;
191 int flen = bpf_prog->len; 218 int insn_cnt = bpf_prog->len;
192 u8 temp[64]; 219 u8 temp[64];
193 u8 *prog; 220 int i;
194 int ilen, i, proglen; 221 int proglen = 0;
195 int t_offset, f_offset; 222 u8 *prog = temp;
196 u8 t_op, f_op, seen = 0; 223 int stacksize = MAX_BPF_STACK +
197 u8 *func; 224 32 /* space for rbx, r13, r14, r15 */ +
198 unsigned int cleanup_addr = ctx->cleanup_addr; 225 8 /* space for skb_copy_bits() buffer */;
199 u8 seen_or_pass0 = ctx->seen;
200
201 /* no prologue/epilogue for trivial filters (RET something) */
202 proglen = 0;
203 prog = temp;
204 226
205 if (seen_or_pass0) { 227 EMIT1(0x55); /* push rbp */
206 EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ 228 EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
207 EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
208 /* note : must save %rbx in case bpf_error is hit */
209 if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
210 EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
211 if (seen_or_pass0 & SEEN_XREG)
212 CLEAR_X(); /* make sure we dont leek kernel memory */
213
214 /*
215 * If this filter needs to access skb data,
216 * loads r9 and r8 with :
217 * r9 = skb->len - skb->data_len
218 * r8 = skb->data
219 */
220 if (seen_or_pass0 & SEEN_DATAREF) {
221 if (offsetof(struct sk_buff, len) <= 127)
222 /* mov off8(%rdi),%r9d */
223 EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
224 else {
225 /* mov off32(%rdi),%r9d */
226 EMIT3(0x44, 0x8b, 0x8f);
227 EMIT(offsetof(struct sk_buff, len), 4);
228 }
229 if (is_imm8(offsetof(struct sk_buff, data_len)))
230 /* sub off8(%rdi),%r9d */
231 EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
232 else {
233 EMIT3(0x44, 0x2b, 0x8f);
234 EMIT(offsetof(struct sk_buff, data_len), 4);
235 }
236 229
237 if (is_imm8(offsetof(struct sk_buff, data))) 230 /* sub rsp, stacksize */
238 /* mov off8(%rdi),%r8 */ 231 EMIT3_off32(0x48, 0x81, 0xEC, stacksize);
239 EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data)); 232
240 else { 233 /* all classic BPF filters use R6(rbx) save it */
241 /* mov off32(%rdi),%r8 */ 234
242 EMIT3(0x4c, 0x8b, 0x87); 235 /* mov qword ptr [rbp-X],rbx */
243 EMIT(offsetof(struct sk_buff, data), 4); 236 EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
244 } 237
238 /* sk_convert_filter() maps classic BPF register X to R7 and uses R8
239 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
240 * R8(r14). R9(r15) spill could be made conditional, but there is only
241 * one 'bpf_error' return path out of helper functions inside bpf_jit.S
242 * The overhead of extra spill is negligible for any filter other
243 * than synthetic ones. Therefore not worth adding complexity.
244 */
245
246 /* mov qword ptr [rbp-X],r13 */
247 EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8);
248 /* mov qword ptr [rbp-X],r14 */
249 EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16);
250 /* mov qword ptr [rbp-X],r15 */
251 EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24);
252
253 /* clear A and X registers */
254 EMIT2(0x31, 0xc0); /* xor eax, eax */
255 EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */
256
257 if (ctx->seen_ld_abs) {
258 /* r9d : skb->len - skb->data_len (headlen)
259 * r10 : skb->data
260 */
261 if (is_imm8(offsetof(struct sk_buff, len)))
262 /* mov %r9d, off8(%rdi) */
263 EMIT4(0x44, 0x8b, 0x4f,
264 offsetof(struct sk_buff, len));
265 else
266 /* mov %r9d, off32(%rdi) */
267 EMIT3_off32(0x44, 0x8b, 0x8f,
268 offsetof(struct sk_buff, len));
269
270 if (is_imm8(offsetof(struct sk_buff, data_len)))
271 /* sub %r9d, off8(%rdi) */
272 EMIT4(0x44, 0x2b, 0x4f,
273 offsetof(struct sk_buff, data_len));
274 else
275 EMIT3_off32(0x44, 0x2b, 0x8f,
276 offsetof(struct sk_buff, data_len));
277
278 if (is_imm8(offsetof(struct sk_buff, data)))
279 /* mov %r10, off8(%rdi) */
280 EMIT4(0x4c, 0x8b, 0x57,
281 offsetof(struct sk_buff, data));
282 else
283 /* mov %r10, off32(%rdi) */
284 EMIT3_off32(0x4c, 0x8b, 0x97,
285 offsetof(struct sk_buff, data));
286 }
287
288 for (i = 0; i < insn_cnt; i++, insn++) {
289 const s32 K = insn->imm;
290 u32 a_reg = insn->a_reg;
291 u32 x_reg = insn->x_reg;
292 u8 b1 = 0, b2 = 0, b3 = 0;
293 s64 jmp_offset;
294 u8 jmp_cond;
295 int ilen;
296 u8 *func;
297
298 switch (insn->code) {
299 /* ALU */
300 case BPF_ALU | BPF_ADD | BPF_X:
301 case BPF_ALU | BPF_SUB | BPF_X:
302 case BPF_ALU | BPF_AND | BPF_X:
303 case BPF_ALU | BPF_OR | BPF_X:
304 case BPF_ALU | BPF_XOR | BPF_X:
305 case BPF_ALU64 | BPF_ADD | BPF_X:
306 case BPF_ALU64 | BPF_SUB | BPF_X:
307 case BPF_ALU64 | BPF_AND | BPF_X:
308 case BPF_ALU64 | BPF_OR | BPF_X:
309 case BPF_ALU64 | BPF_XOR | BPF_X:
310 switch (BPF_OP(insn->code)) {
311 case BPF_ADD: b2 = 0x01; break;
312 case BPF_SUB: b2 = 0x29; break;
313 case BPF_AND: b2 = 0x21; break;
314 case BPF_OR: b2 = 0x09; break;
315 case BPF_XOR: b2 = 0x31; break;
245 } 316 }
246 } 317 if (BPF_CLASS(insn->code) == BPF_ALU64)
318 EMIT1(add_2mod(0x48, a_reg, x_reg));
319 else if (is_ereg(a_reg) || is_ereg(x_reg))
320 EMIT1(add_2mod(0x40, a_reg, x_reg));
321 EMIT2(b2, add_2reg(0xC0, a_reg, x_reg));
322 break;
247 323
248 switch (filter[0].code) { 324 /* mov A, X */
249 case BPF_S_RET_K: 325 case BPF_ALU64 | BPF_MOV | BPF_X:
250 case BPF_S_LD_W_LEN: 326 EMIT_mov(a_reg, x_reg);
251 case BPF_S_ANC_PROTOCOL:
252 case BPF_S_ANC_IFINDEX:
253 case BPF_S_ANC_MARK:
254 case BPF_S_ANC_RXHASH:
255 case BPF_S_ANC_CPU:
256 case BPF_S_ANC_VLAN_TAG:
257 case BPF_S_ANC_VLAN_TAG_PRESENT:
258 case BPF_S_ANC_QUEUE:
259 case BPF_S_ANC_PKTTYPE:
260 case BPF_S_LD_W_ABS:
261 case BPF_S_LD_H_ABS:
262 case BPF_S_LD_B_ABS:
263 /* first instruction sets A register (or is RET 'constant') */
264 break; 327 break;
265 default:
266 /* make sure we dont leak kernel information to user */
267 CLEAR_A(); /* A = 0 */
268 }
269 328
270 for (i = 0; i < flen; i++) { 329 /* mov32 A, X */
271 unsigned int K = filter[i].k; 330 case BPF_ALU | BPF_MOV | BPF_X:
331 if (is_ereg(a_reg) || is_ereg(x_reg))
332 EMIT1(add_2mod(0x40, a_reg, x_reg));
333 EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg));
334 break;
272 335
273 switch (filter[i].code) { 336 /* neg A */
274 case BPF_S_ALU_ADD_X: /* A += X; */ 337 case BPF_ALU | BPF_NEG:
275 seen |= SEEN_XREG; 338 case BPF_ALU64 | BPF_NEG:
276 EMIT2(0x01, 0xd8); /* add %ebx,%eax */ 339 if (BPF_CLASS(insn->code) == BPF_ALU64)
277 break; 340 EMIT1(add_1mod(0x48, a_reg));
278 case BPF_S_ALU_ADD_K: /* A += K; */ 341 else if (is_ereg(a_reg))
279 if (!K) 342 EMIT1(add_1mod(0x40, a_reg));
280 break; 343 EMIT2(0xF7, add_1reg(0xD8, a_reg));
281 if (is_imm8(K)) 344 break;
282 EMIT3(0x83, 0xc0, K); /* add imm8,%eax */ 345
283 else 346 case BPF_ALU | BPF_ADD | BPF_K:
284 EMIT1_off32(0x05, K); /* add imm32,%eax */ 347 case BPF_ALU | BPF_SUB | BPF_K:
285 break; 348 case BPF_ALU | BPF_AND | BPF_K:
286 case BPF_S_ALU_SUB_X: /* A -= X; */ 349 case BPF_ALU | BPF_OR | BPF_K:
287 seen |= SEEN_XREG; 350 case BPF_ALU | BPF_XOR | BPF_K:
288 EMIT2(0x29, 0xd8); /* sub %ebx,%eax */ 351 case BPF_ALU64 | BPF_ADD | BPF_K:
289 break; 352 case BPF_ALU64 | BPF_SUB | BPF_K:
290 case BPF_S_ALU_SUB_K: /* A -= K */ 353 case BPF_ALU64 | BPF_AND | BPF_K:
291 if (!K) 354 case BPF_ALU64 | BPF_OR | BPF_K:
292 break; 355 case BPF_ALU64 | BPF_XOR | BPF_K:
293 if (is_imm8(K)) 356 if (BPF_CLASS(insn->code) == BPF_ALU64)
294 EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */ 357 EMIT1(add_1mod(0x48, a_reg));
295 else 358 else if (is_ereg(a_reg))
296 EMIT1_off32(0x2d, K); /* sub imm32,%eax */ 359 EMIT1(add_1mod(0x40, a_reg));
297 break; 360
298 case BPF_S_ALU_MUL_X: /* A *= X; */ 361 switch (BPF_OP(insn->code)) {
299 seen |= SEEN_XREG; 362 case BPF_ADD: b3 = 0xC0; break;
300 EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */ 363 case BPF_SUB: b3 = 0xE8; break;
301 break; 364 case BPF_AND: b3 = 0xE0; break;
302 case BPF_S_ALU_MUL_K: /* A *= K */ 365 case BPF_OR: b3 = 0xC8; break;
303 if (is_imm8(K)) 366 case BPF_XOR: b3 = 0xF0; break;
304 EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ 367 }
305 else { 368
306 EMIT2(0x69, 0xc0); /* imul imm32,%eax */ 369 if (is_imm8(K))
307 EMIT(K, 4); 370 EMIT3(0x83, add_1reg(b3, a_reg), K);
308 } 371 else
309 break; 372 EMIT2_off32(0x81, add_1reg(b3, a_reg), K);
310 case BPF_S_ALU_DIV_X: /* A /= X; */ 373 break;
311 seen |= SEEN_XREG; 374
312 EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ 375 case BPF_ALU64 | BPF_MOV | BPF_K:
313 if (ctx->pc_ret0 > 0) { 376 /* optimization: if imm32 is positive,
314 /* addrs[pc_ret0 - 1] is start address of target 377 * use 'mov eax, imm32' (which zero-extends imm32)
315 * (addrs[i] - 4) is the address following this jmp 378 * to save 2 bytes
316 * ("xor %edx,%edx; div %ebx" being 4 bytes long) 379 */
317 */ 380 if (K < 0) {
318 EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] - 381 /* 'mov rax, imm32' sign extends imm32 */
319 (addrs[i] - 4)); 382 b1 = add_1mod(0x48, a_reg);
320 } else { 383 b2 = 0xC7;
321 EMIT_COND_JMP(X86_JNE, 2 + 5); 384 b3 = 0xC0;
322 CLEAR_A(); 385 EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K);
323 EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
324 }
325 EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
326 break;
327 case BPF_S_ALU_MOD_X: /* A %= X; */
328 seen |= SEEN_XREG;
329 EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
330 if (ctx->pc_ret0 > 0) {
331 /* addrs[pc_ret0 - 1] is start address of target
332 * (addrs[i] - 6) is the address following this jmp
333 * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
334 */
335 EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] -
336 (addrs[i] - 6));
337 } else {
338 EMIT_COND_JMP(X86_JNE, 2 + 5);
339 CLEAR_A();
340 EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
341 }
342 EMIT2(0x31, 0xd2); /* xor %edx,%edx */
343 EMIT2(0xf7, 0xf3); /* div %ebx */
344 EMIT2(0x89, 0xd0); /* mov %edx,%eax */
345 break;
346 case BPF_S_ALU_MOD_K: /* A %= K; */
347 if (K == 1) {
348 CLEAR_A();
349 break;
350 }
351 EMIT2(0x31, 0xd2); /* xor %edx,%edx */
352 EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
353 EMIT2(0xf7, 0xf1); /* div %ecx */
354 EMIT2(0x89, 0xd0); /* mov %edx,%eax */
355 break;
356 case BPF_S_ALU_DIV_K: /* A /= K */
357 if (K == 1)
358 break;
359 EMIT2(0x31, 0xd2); /* xor %edx,%edx */
360 EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
361 EMIT2(0xf7, 0xf1); /* div %ecx */
362 break;
363 case BPF_S_ALU_AND_X:
364 seen |= SEEN_XREG;
365 EMIT2(0x21, 0xd8); /* and %ebx,%eax */
366 break;
367 case BPF_S_ALU_AND_K:
368 if (K >= 0xFFFFFF00) {
369 EMIT2(0x24, K & 0xFF); /* and imm8,%al */
370 } else if (K >= 0xFFFF0000) {
371 EMIT2(0x66, 0x25); /* and imm16,%ax */
372 EMIT(K, 2);
373 } else {
374 EMIT1_off32(0x25, K); /* and imm32,%eax */
375 }
376 break;
377 case BPF_S_ALU_OR_X:
378 seen |= SEEN_XREG;
379 EMIT2(0x09, 0xd8); /* or %ebx,%eax */
380 break;
381 case BPF_S_ALU_OR_K:
382 if (is_imm8(K))
383 EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
384 else
385 EMIT1_off32(0x0d, K); /* or imm32,%eax */
386 break;
387 case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
388 case BPF_S_ALU_XOR_X:
389 seen |= SEEN_XREG;
390 EMIT2(0x31, 0xd8); /* xor %ebx,%eax */
391 break;
392 case BPF_S_ALU_XOR_K: /* A ^= K; */
393 if (K == 0)
394 break;
395 if (is_imm8(K))
396 EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */
397 else
398 EMIT1_off32(0x35, K); /* xor imm32,%eax */
399 break;
400 case BPF_S_ALU_LSH_X: /* A <<= X; */
401 seen |= SEEN_XREG;
402 EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */
403 break;
404 case BPF_S_ALU_LSH_K:
405 if (K == 0)
406 break;
407 else if (K == 1)
408 EMIT2(0xd1, 0xe0); /* shl %eax */
409 else
410 EMIT3(0xc1, 0xe0, K);
411 break;
412 case BPF_S_ALU_RSH_X: /* A >>= X; */
413 seen |= SEEN_XREG;
414 EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */
415 break;
416 case BPF_S_ALU_RSH_K: /* A >>= K; */
417 if (K == 0)
418 break;
419 else if (K == 1)
420 EMIT2(0xd1, 0xe8); /* shr %eax */
421 else
422 EMIT3(0xc1, 0xe8, K);
423 break;
424 case BPF_S_ALU_NEG:
425 EMIT2(0xf7, 0xd8); /* neg %eax */
426 break;
427 case BPF_S_RET_K:
428 if (!K) {
429 if (ctx->pc_ret0 == -1)
430 ctx->pc_ret0 = i;
431 CLEAR_A();
432 } else {
433 EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
434 }
435 /* fallinto */
436 case BPF_S_RET_A:
437 if (seen_or_pass0) {
438 if (i != flen - 1) {
439 EMIT_JMP(cleanup_addr - addrs[i]);
440 break;
441 }
442 if (seen_or_pass0 & SEEN_XREG)
443 EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */
444 EMIT1(0xc9); /* leaveq */
445 }
446 EMIT1(0xc3); /* ret */
447 break;
448 case BPF_S_MISC_TAX: /* X = A */
449 seen |= SEEN_XREG;
450 EMIT2(0x89, 0xc3); /* mov %eax,%ebx */
451 break;
452 case BPF_S_MISC_TXA: /* A = X */
453 seen |= SEEN_XREG;
454 EMIT2(0x89, 0xd8); /* mov %ebx,%eax */
455 break;
456 case BPF_S_LD_IMM: /* A = K */
457 if (!K)
458 CLEAR_A();
459 else
460 EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
461 break; 386 break;
462 case BPF_S_LDX_IMM: /* X = K */ 387 }
463 seen |= SEEN_XREG; 388
464 if (!K) 389 case BPF_ALU | BPF_MOV | BPF_K:
465 CLEAR_X(); 390 /* mov %eax, imm32 */
391 if (is_ereg(a_reg))
392 EMIT1(add_1mod(0x40, a_reg));
393 EMIT1_off32(add_1reg(0xB8, a_reg), K);
394 break;
395
396 /* A %= X, A /= X, A %= K, A /= K */
397 case BPF_ALU | BPF_MOD | BPF_X:
398 case BPF_ALU | BPF_DIV | BPF_X:
399 case BPF_ALU | BPF_MOD | BPF_K:
400 case BPF_ALU | BPF_DIV | BPF_K:
401 case BPF_ALU64 | BPF_MOD | BPF_X:
402 case BPF_ALU64 | BPF_DIV | BPF_X:
403 case BPF_ALU64 | BPF_MOD | BPF_K:
404 case BPF_ALU64 | BPF_DIV | BPF_K:
405 EMIT1(0x50); /* push rax */
406 EMIT1(0x52); /* push rdx */
407
408 if (BPF_SRC(insn->code) == BPF_X)
409 /* mov r11, X */
410 EMIT_mov(AUX_REG, x_reg);
411 else
412 /* mov r11, K */
413 EMIT3_off32(0x49, 0xC7, 0xC3, K);
414
415 /* mov rax, A */
416 EMIT_mov(BPF_REG_0, a_reg);
417
418 /* xor edx, edx
419 * equivalent to 'xor rdx, rdx', but one byte less
420 */
421 EMIT2(0x31, 0xd2);
422
423 if (BPF_SRC(insn->code) == BPF_X) {
424 /* if (X == 0) return 0 */
425
426 /* cmp r11, 0 */
427 EMIT4(0x49, 0x83, 0xFB, 0x00);
428
429 /* jne .+9 (skip over pop, pop, xor and jmp) */
430 EMIT2(X86_JNE, 1 + 1 + 2 + 5);
431 EMIT1(0x5A); /* pop rdx */
432 EMIT1(0x58); /* pop rax */
433 EMIT2(0x31, 0xc0); /* xor eax, eax */
434
435 /* jmp cleanup_addr
436 * addrs[i] - 11, because there are 11 bytes
437 * after this insn: div, mov, pop, pop, mov
438 */
439 jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
440 EMIT1_off32(0xE9, jmp_offset);
441 }
442
443 if (BPF_CLASS(insn->code) == BPF_ALU64)
444 /* div r11 */
445 EMIT3(0x49, 0xF7, 0xF3);
446 else
447 /* div r11d */
448 EMIT3(0x41, 0xF7, 0xF3);
449
450 if (BPF_OP(insn->code) == BPF_MOD)
451 /* mov r11, rdx */
452 EMIT3(0x49, 0x89, 0xD3);
453 else
454 /* mov r11, rax */
455 EMIT3(0x49, 0x89, 0xC3);
456
457 EMIT1(0x5A); /* pop rdx */
458 EMIT1(0x58); /* pop rax */
459
460 /* mov A, r11 */
461 EMIT_mov(a_reg, AUX_REG);
462 break;
463
464 case BPF_ALU | BPF_MUL | BPF_K:
465 case BPF_ALU | BPF_MUL | BPF_X:
466 case BPF_ALU64 | BPF_MUL | BPF_K:
467 case BPF_ALU64 | BPF_MUL | BPF_X:
468 EMIT1(0x50); /* push rax */
469 EMIT1(0x52); /* push rdx */
470
471 /* mov r11, A */
472 EMIT_mov(AUX_REG, a_reg);
473
474 if (BPF_SRC(insn->code) == BPF_X)
475 /* mov rax, X */
476 EMIT_mov(BPF_REG_0, x_reg);
477 else
478 /* mov rax, K */
479 EMIT3_off32(0x48, 0xC7, 0xC0, K);
480
481 if (BPF_CLASS(insn->code) == BPF_ALU64)
482 EMIT1(add_1mod(0x48, AUX_REG));
483 else if (is_ereg(AUX_REG))
484 EMIT1(add_1mod(0x40, AUX_REG));
485 /* mul(q) r11 */
486 EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
487
488 /* mov r11, rax */
489 EMIT_mov(AUX_REG, BPF_REG_0);
490
491 EMIT1(0x5A); /* pop rdx */
492 EMIT1(0x58); /* pop rax */
493
494 /* mov A, r11 */
495 EMIT_mov(a_reg, AUX_REG);
496 break;
497
498 /* shifts */
499 case BPF_ALU | BPF_LSH | BPF_K:
500 case BPF_ALU | BPF_RSH | BPF_K:
501 case BPF_ALU | BPF_ARSH | BPF_K:
502 case BPF_ALU64 | BPF_LSH | BPF_K:
503 case BPF_ALU64 | BPF_RSH | BPF_K:
504 case BPF_ALU64 | BPF_ARSH | BPF_K:
505 if (BPF_CLASS(insn->code) == BPF_ALU64)
506 EMIT1(add_1mod(0x48, a_reg));
507 else if (is_ereg(a_reg))
508 EMIT1(add_1mod(0x40, a_reg));
509
510 switch (BPF_OP(insn->code)) {
511 case BPF_LSH: b3 = 0xE0; break;
512 case BPF_RSH: b3 = 0xE8; break;
513 case BPF_ARSH: b3 = 0xF8; break;
514 }
515 EMIT3(0xC1, add_1reg(b3, a_reg), K);
516 break;
517
518 case BPF_ALU | BPF_END | BPF_FROM_BE:
519 switch (K) {
520 case 16:
521 /* emit 'ror %ax, 8' to swap lower 2 bytes */
522 EMIT1(0x66);
523 if (is_ereg(a_reg))
524 EMIT1(0x41);
525 EMIT3(0xC1, add_1reg(0xC8, a_reg), 8);
526 break;
527 case 32:
528 /* emit 'bswap eax' to swap lower 4 bytes */
529 if (is_ereg(a_reg))
530 EMIT2(0x41, 0x0F);
466 else 531 else
467 EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */ 532 EMIT1(0x0F);
468 break; 533 EMIT1(add_1reg(0xC8, a_reg));
469 case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
470 seen |= SEEN_MEM;
471 EMIT3(0x8b, 0x45, 0xf0 - K*4);
472 break; 534 break;
473 case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */ 535 case 64:
474 seen |= SEEN_XREG | SEEN_MEM; 536 /* emit 'bswap rax' to swap 8 bytes */
475 EMIT3(0x8b, 0x5d, 0xf0 - K*4); 537 EMIT3(add_1mod(0x48, a_reg), 0x0F,
476 break; 538 add_1reg(0xC8, a_reg));
477 case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
478 seen |= SEEN_MEM;
479 EMIT3(0x89, 0x45, 0xf0 - K*4);
480 break;
481 case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
482 seen |= SEEN_XREG | SEEN_MEM;
483 EMIT3(0x89, 0x5d, 0xf0 - K*4);
484 break;
485 case BPF_S_LD_W_LEN: /* A = skb->len; */
486 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
487 if (is_imm8(offsetof(struct sk_buff, len)))
488 /* mov off8(%rdi),%eax */
489 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
490 else {
491 EMIT2(0x8b, 0x87);
492 EMIT(offsetof(struct sk_buff, len), 4);
493 }
494 break;
495 case BPF_S_LDX_W_LEN: /* X = skb->len; */
496 seen |= SEEN_XREG;
497 if (is_imm8(offsetof(struct sk_buff, len)))
498 /* mov off8(%rdi),%ebx */
499 EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
500 else {
501 EMIT2(0x8b, 0x9f);
502 EMIT(offsetof(struct sk_buff, len), 4);
503 }
504 break;
505 case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
506 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
507 if (is_imm8(offsetof(struct sk_buff, protocol))) {
508 /* movzwl off8(%rdi),%eax */
509 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
510 } else {
511 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
512 EMIT(offsetof(struct sk_buff, protocol), 4);
513 }
514 EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */
515 break;
516 case BPF_S_ANC_IFINDEX:
517 if (is_imm8(offsetof(struct sk_buff, dev))) {
518 /* movq off8(%rdi),%rax */
519 EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
520 } else {
521 EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
522 EMIT(offsetof(struct sk_buff, dev), 4);
523 }
524 EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */
525 EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
526 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
527 EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */
528 EMIT(offsetof(struct net_device, ifindex), 4);
529 break;
530 case BPF_S_ANC_MARK:
531 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
532 if (is_imm8(offsetof(struct sk_buff, mark))) {
533 /* mov off8(%rdi),%eax */
534 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
535 } else {
536 EMIT2(0x8b, 0x87);
537 EMIT(offsetof(struct sk_buff, mark), 4);
538 }
539 break;
540 case BPF_S_ANC_RXHASH:
541 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
542 if (is_imm8(offsetof(struct sk_buff, hash))) {
543 /* mov off8(%rdi),%eax */
544 EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash));
545 } else {
546 EMIT2(0x8b, 0x87);
547 EMIT(offsetof(struct sk_buff, hash), 4);
548 }
549 break;
550 case BPF_S_ANC_QUEUE:
551 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
552 if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
553 /* movzwl off8(%rdi),%eax */
554 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
555 } else {
556 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
557 EMIT(offsetof(struct sk_buff, queue_mapping), 4);
558 }
559 break;
560 case BPF_S_ANC_CPU:
561#ifdef CONFIG_SMP
562 EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
563 EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
564#else
565 CLEAR_A();
566#endif
567 break;
568 case BPF_S_ANC_VLAN_TAG:
569 case BPF_S_ANC_VLAN_TAG_PRESENT:
570 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
571 if (is_imm8(offsetof(struct sk_buff, vlan_tci))) {
572 /* movzwl off8(%rdi),%eax */
573 EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci));
574 } else {
575 EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
576 EMIT(offsetof(struct sk_buff, vlan_tci), 4);
577 }
578 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
579 if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
580 EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */
581 } else {
582 EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */
583 EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */
584 }
585 break;
586 case BPF_S_ANC_PKTTYPE:
587 {
588 int off = pkt_type_offset();
589
590 if (off < 0)
591 return -EINVAL;
592 if (is_imm8(off)) {
593 /* movzbl off8(%rdi),%eax */
594 EMIT4(0x0f, 0xb6, 0x47, off);
595 } else {
596 /* movbl off32(%rdi),%eax */
597 EMIT3(0x0f, 0xb6, 0x87);
598 EMIT(off, 4);
599 }
600 EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */
601 break; 539 break;
602 } 540 }
603 case BPF_S_LD_W_ABS: 541 break;
604 func = CHOOSE_LOAD_FUNC(K, sk_load_word); 542
605common_load: seen |= SEEN_DATAREF; 543 case BPF_ALU | BPF_END | BPF_FROM_LE:
606 t_offset = func - (image + addrs[i]); 544 break;
607 EMIT1_off32(0xbe, K); /* mov imm32,%esi */ 545
608 EMIT1_off32(0xe8, t_offset); /* call */ 546 /* ST: *(u8*)(a_reg + off) = imm */
609 break; 547 case BPF_ST | BPF_MEM | BPF_B:
610 case BPF_S_LD_H_ABS: 548 if (is_ereg(a_reg))
611 func = CHOOSE_LOAD_FUNC(K, sk_load_half); 549 EMIT2(0x41, 0xC6);
612 goto common_load; 550 else
613 case BPF_S_LD_B_ABS: 551 EMIT1(0xC6);
614 func = CHOOSE_LOAD_FUNC(K, sk_load_byte); 552 goto st;
615 goto common_load; 553 case BPF_ST | BPF_MEM | BPF_H:
616 case BPF_S_LDX_B_MSH: 554 if (is_ereg(a_reg))
617 func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); 555 EMIT3(0x66, 0x41, 0xC7);
618 seen |= SEEN_DATAREF | SEEN_XREG; 556 else
619 t_offset = func - (image + addrs[i]); 557 EMIT2(0x66, 0xC7);
620 EMIT1_off32(0xbe, K); /* mov imm32,%esi */ 558 goto st;
621 EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ 559 case BPF_ST | BPF_MEM | BPF_W:
622 break; 560 if (is_ereg(a_reg))
623 case BPF_S_LD_W_IND: 561 EMIT2(0x41, 0xC7);
624 func = sk_load_word; 562 else
625common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; 563 EMIT1(0xC7);
626 t_offset = func - (image + addrs[i]); 564 goto st;
627 if (K) { 565 case BPF_ST | BPF_MEM | BPF_DW:
628 if (is_imm8(K)) { 566 EMIT2(add_1mod(0x48, a_reg), 0xC7);
629 EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */ 567
630 } else { 568st: if (is_imm8(insn->off))
631 EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */ 569 EMIT2(add_1reg(0x40, a_reg), insn->off);
632 EMIT(K, 4); 570 else
633 } 571 EMIT1_off32(add_1reg(0x80, a_reg), insn->off);
634 } else { 572
635 EMIT2(0x89,0xde); /* mov %ebx,%esi */ 573 EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
636 } 574 break;
637 EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ 575
638 break; 576 /* STX: *(u8*)(a_reg + off) = x_reg */
639 case BPF_S_LD_H_IND: 577 case BPF_STX | BPF_MEM | BPF_B:
640 func = sk_load_half; 578 /* emit 'mov byte ptr [rax + off], al' */
641 goto common_load_ind; 579 if (is_ereg(a_reg) || is_ereg(x_reg) ||
642 case BPF_S_LD_B_IND: 580 /* have to add extra byte for x86 SIL, DIL regs */
643 func = sk_load_byte; 581 x_reg == BPF_REG_1 || x_reg == BPF_REG_2)
644 goto common_load_ind; 582 EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88);
645 case BPF_S_JMP_JA: 583 else
646 t_offset = addrs[i + K] - addrs[i]; 584 EMIT1(0x88);
647 EMIT_JMP(t_offset); 585 goto stx;
648 break; 586 case BPF_STX | BPF_MEM | BPF_H:
649 COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE); 587 if (is_ereg(a_reg) || is_ereg(x_reg))
650 COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB); 588 EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89);
651 COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE); 589 else
652 COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE); 590 EMIT2(0x66, 0x89);
653 COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE); 591 goto stx;
654 COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB); 592 case BPF_STX | BPF_MEM | BPF_W:
655 COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE); 593 if (is_ereg(a_reg) || is_ereg(x_reg))
656 COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE); 594 EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89);
657 595 else
658cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; 596 EMIT1(0x89);
659 t_offset = addrs[i + filter[i].jt] - addrs[i]; 597 goto stx;
660 598 case BPF_STX | BPF_MEM | BPF_DW:
661 /* same targets, can avoid doing the test :) */ 599 EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89);
662 if (filter[i].jt == filter[i].jf) { 600stx: if (is_imm8(insn->off))
663 EMIT_JMP(t_offset); 601 EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
664 break; 602 else
665 } 603 EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
604 insn->off);
605 break;
606
607 /* LDX: a_reg = *(u8*)(x_reg + off) */
608 case BPF_LDX | BPF_MEM | BPF_B:
609 /* emit 'movzx rax, byte ptr [rax + off]' */
610 EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6);
611 goto ldx;
612 case BPF_LDX | BPF_MEM | BPF_H:
613 /* emit 'movzx rax, word ptr [rax + off]' */
614 EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7);
615 goto ldx;
616 case BPF_LDX | BPF_MEM | BPF_W:
617 /* emit 'mov eax, dword ptr [rax+0x14]' */
618 if (is_ereg(a_reg) || is_ereg(x_reg))
619 EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B);
620 else
621 EMIT1(0x8B);
622 goto ldx;
623 case BPF_LDX | BPF_MEM | BPF_DW:
624 /* emit 'mov rax, qword ptr [rax+0x14]' */
625 EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B);
626ldx: /* if insn->off == 0 we can save one extra byte, but
627 * special case of x86 r13 which always needs an offset
628 * is not worth the hassle
629 */
630 if (is_imm8(insn->off))
631 EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off);
632 else
633 EMIT1_off32(add_2reg(0x80, x_reg, a_reg),
634 insn->off);
635 break;
636
637 /* STX XADD: lock *(u32*)(a_reg + off) += x_reg */
638 case BPF_STX | BPF_XADD | BPF_W:
639 /* emit 'lock add dword ptr [rax + off], eax' */
640 if (is_ereg(a_reg) || is_ereg(x_reg))
641 EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01);
642 else
643 EMIT2(0xF0, 0x01);
644 goto xadd;
645 case BPF_STX | BPF_XADD | BPF_DW:
646 EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01);
647xadd: if (is_imm8(insn->off))
648 EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
649 else
650 EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
651 insn->off);
652 break;
653
654 /* call */
655 case BPF_JMP | BPF_CALL:
656 func = (u8 *) __bpf_call_base + K;
657 jmp_offset = func - (image + addrs[i]);
658 if (ctx->seen_ld_abs) {
659 EMIT2(0x41, 0x52); /* push %r10 */
660 EMIT2(0x41, 0x51); /* push %r9 */
661 /* need to adjust jmp offset, since
662 * pop %r9, pop %r10 take 4 bytes after call insn
663 */
664 jmp_offset += 4;
665 }
666 if (!K || !is_simm32(jmp_offset)) {
667 pr_err("unsupported bpf func %d addr %p image %p\n",
668 K, func, image);
669 return -EINVAL;
670 }
671 EMIT1_off32(0xE8, jmp_offset);
672 if (ctx->seen_ld_abs) {
673 EMIT2(0x41, 0x59); /* pop %r9 */
674 EMIT2(0x41, 0x5A); /* pop %r10 */
675 }
676 break;
677
678 /* cond jump */
679 case BPF_JMP | BPF_JEQ | BPF_X:
680 case BPF_JMP | BPF_JNE | BPF_X:
681 case BPF_JMP | BPF_JGT | BPF_X:
682 case BPF_JMP | BPF_JGE | BPF_X:
683 case BPF_JMP | BPF_JSGT | BPF_X:
684 case BPF_JMP | BPF_JSGE | BPF_X:
685 /* cmp a_reg, x_reg */
686 EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39,
687 add_2reg(0xC0, a_reg, x_reg));
688 goto emit_cond_jmp;
689
690 case BPF_JMP | BPF_JSET | BPF_X:
691 /* test a_reg, x_reg */
692 EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85,
693 add_2reg(0xC0, a_reg, x_reg));
694 goto emit_cond_jmp;
695
696 case BPF_JMP | BPF_JSET | BPF_K:
697 /* test a_reg, imm32 */
698 EMIT1(add_1mod(0x48, a_reg));
699 EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K);
700 goto emit_cond_jmp;
701
702 case BPF_JMP | BPF_JEQ | BPF_K:
703 case BPF_JMP | BPF_JNE | BPF_K:
704 case BPF_JMP | BPF_JGT | BPF_K:
705 case BPF_JMP | BPF_JGE | BPF_K:
706 case BPF_JMP | BPF_JSGT | BPF_K:
707 case BPF_JMP | BPF_JSGE | BPF_K:
708 /* cmp a_reg, imm8/32 */
709 EMIT1(add_1mod(0x48, a_reg));
710
711 if (is_imm8(K))
712 EMIT3(0x83, add_1reg(0xF8, a_reg), K);
713 else
714 EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K);
715
716emit_cond_jmp: /* convert BPF opcode to x86 */
717 switch (BPF_OP(insn->code)) {
718 case BPF_JEQ:
719 jmp_cond = X86_JE;
720 break;
721 case BPF_JSET:
722 case BPF_JNE:
723 jmp_cond = X86_JNE;
724 break;
725 case BPF_JGT:
726 /* GT is unsigned '>', JA in x86 */
727 jmp_cond = X86_JA;
728 break;
729 case BPF_JGE:
730 /* GE is unsigned '>=', JAE in x86 */
731 jmp_cond = X86_JAE;
732 break;
733 case BPF_JSGT:
734 /* signed '>', GT in x86 */
735 jmp_cond = X86_JG;
736 break;
737 case BPF_JSGE:
738 /* signed '>=', GE in x86 */
739 jmp_cond = X86_JGE;
740 break;
741 default: /* to silence gcc warning */
742 return -EFAULT;
743 }
744 jmp_offset = addrs[i + insn->off] - addrs[i];
745 if (is_imm8(jmp_offset)) {
746 EMIT2(jmp_cond, jmp_offset);
747 } else if (is_simm32(jmp_offset)) {
748 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
749 } else {
750 pr_err("cond_jmp gen bug %llx\n", jmp_offset);
751 return -EFAULT;
752 }
753
754 break;
666 755
667 switch (filter[i].code) { 756 case BPF_JMP | BPF_JA:
668 case BPF_S_JMP_JGT_X: 757 jmp_offset = addrs[i + insn->off] - addrs[i];
669 case BPF_S_JMP_JGE_X: 758 if (!jmp_offset)
670 case BPF_S_JMP_JEQ_X: 759 /* optimize out nop jumps */
671 seen |= SEEN_XREG; 760 break;
672 EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */ 761emit_jmp:
673 break; 762 if (is_imm8(jmp_offset)) {
674 case BPF_S_JMP_JSET_X: 763 EMIT2(0xEB, jmp_offset);
675 seen |= SEEN_XREG; 764 } else if (is_simm32(jmp_offset)) {
676 EMIT2(0x85, 0xd8); /* test %ebx,%eax */ 765 EMIT1_off32(0xE9, jmp_offset);
677 break; 766 } else {
678 case BPF_S_JMP_JEQ_K: 767 pr_err("jmp gen bug %llx\n", jmp_offset);
679 if (K == 0) { 768 return -EFAULT;
680 EMIT2(0x85, 0xc0); /* test %eax,%eax */ 769 }
681 break; 770 break;
682 } 771
683 case BPF_S_JMP_JGT_K: 772 case BPF_LD | BPF_IND | BPF_W:
684 case BPF_S_JMP_JGE_K: 773 func = sk_load_word;
685 if (K <= 127) 774 goto common_load;
686 EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */ 775 case BPF_LD | BPF_ABS | BPF_W:
776 func = CHOOSE_LOAD_FUNC(K, sk_load_word);
777common_load: ctx->seen_ld_abs = true;
778 jmp_offset = func - (image + addrs[i]);
779 if (!func || !is_simm32(jmp_offset)) {
780 pr_err("unsupported bpf func %d addr %p image %p\n",
781 K, func, image);
782 return -EINVAL;
783 }
784 if (BPF_MODE(insn->code) == BPF_ABS) {
785 /* mov %esi, imm32 */
786 EMIT1_off32(0xBE, K);
787 } else {
788 /* mov %rsi, x_reg */
789 EMIT_mov(BPF_REG_2, x_reg);
790 if (K) {
791 if (is_imm8(K))
792 /* add %esi, imm8 */
793 EMIT3(0x83, 0xC6, K);
687 else 794 else
688 EMIT1_off32(0x3d, K); /* cmp imm32,%eax */ 795 /* add %esi, imm32 */
689 break; 796 EMIT2_off32(0x81, 0xC6, K);
690 case BPF_S_JMP_JSET_K:
691 if (K <= 0xFF)
692 EMIT2(0xa8, K); /* test imm8,%al */
693 else if (!(K & 0xFFFF00FF))
694 EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
695 else if (K <= 0xFFFF) {
696 EMIT2(0x66, 0xa9); /* test imm16,%ax */
697 EMIT(K, 2);
698 } else {
699 EMIT1_off32(0xa9, K); /* test imm32,%eax */
700 }
701 break;
702 } 797 }
703 if (filter[i].jt != 0) { 798 }
704 if (filter[i].jf && f_offset) 799 /* skb pointer is in R6 (%rbx), it will be copied into
705 t_offset += is_near(f_offset) ? 2 : 5; 800 * %rdi if skb_copy_bits() call is necessary.
706 EMIT_COND_JMP(t_op, t_offset); 801 * sk_load_* helpers also use %r10 and %r9d.
707 if (filter[i].jf) 802 * See bpf_jit.S
708 EMIT_JMP(f_offset); 803 */
709 break; 804 EMIT1_off32(0xE8, jmp_offset); /* call */
710 } 805 break;
711 EMIT_COND_JMP(f_op, f_offset); 806
712 break; 807 case BPF_LD | BPF_IND | BPF_H:
808 func = sk_load_half;
809 goto common_load;
810 case BPF_LD | BPF_ABS | BPF_H:
811 func = CHOOSE_LOAD_FUNC(K, sk_load_half);
812 goto common_load;
813 case BPF_LD | BPF_IND | BPF_B:
814 func = sk_load_byte;
815 goto common_load;
816 case BPF_LD | BPF_ABS | BPF_B:
817 func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
818 goto common_load;
819
820 case BPF_JMP | BPF_EXIT:
821 if (i != insn_cnt - 1) {
822 jmp_offset = ctx->cleanup_addr - addrs[i];
823 goto emit_jmp;
824 }
825 /* update cleanup_addr */
826 ctx->cleanup_addr = proglen;
827 /* mov rbx, qword ptr [rbp-X] */
828 EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize);
829 /* mov r13, qword ptr [rbp-X] */
830 EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8);
831 /* mov r14, qword ptr [rbp-X] */
832 EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16);
833 /* mov r15, qword ptr [rbp-X] */
834 EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24);
835
836 EMIT1(0xC9); /* leave */
837 EMIT1(0xC3); /* ret */
838 break;
839
713 default: 840 default:
714 /* hmm, too complex filter, give up with jit compiler */ 841 /* By design x64 JIT should support all BPF instructions
842 * This error will be seen if new instruction was added
843 * to interpreter, but not to JIT
844 * or if there is junk in sk_filter
845 */
846 pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
715 return -EINVAL; 847 return -EINVAL;
716 } 848 }
849
717 ilen = prog - temp; 850 ilen = prog - temp;
718 if (image) { 851 if (image) {
719 if (unlikely(proglen + ilen > oldproglen)) { 852 if (unlikely(proglen + ilen > oldproglen)) {
720 pr_err("bpb_jit_compile fatal error\n"); 853 pr_err("bpf_jit_compile fatal error\n");
721 return -EFAULT; 854 return -EFAULT;
722 } 855 }
723 memcpy(image + proglen, temp, ilen); 856 memcpy(image + proglen, temp, ilen);
@@ -726,22 +859,15 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
726 addrs[i] = proglen; 859 addrs[i] = proglen;
727 prog = temp; 860 prog = temp;
728 } 861 }
729 /* last bpf instruction is always a RET :
730 * use it to give the cleanup instruction(s) addr
731 */
732 ctx->cleanup_addr = proglen - 1; /* ret */
733 if (seen_or_pass0)
734 ctx->cleanup_addr -= 1; /* leaveq */
735 if (seen_or_pass0 & SEEN_XREG)
736 ctx->cleanup_addr -= 4; /* mov -8(%rbp),%rbx */
737
738 ctx->seen = seen;
739
740 return proglen; 862 return proglen;
741} 863}
742 864
743void bpf_jit_compile(struct sk_filter *prog) 865void bpf_jit_compile(struct sk_filter *prog)
744{ 866{
867}
868
869void bpf_int_jit_compile(struct sk_filter *prog)
870{
745 struct bpf_binary_header *header = NULL; 871 struct bpf_binary_header *header = NULL;
746 int proglen, oldproglen = 0; 872 int proglen, oldproglen = 0;
747 struct jit_context ctx = {}; 873 struct jit_context ctx = {};
@@ -768,8 +894,6 @@ void bpf_jit_compile(struct sk_filter *prog)
768 addrs[i] = proglen; 894 addrs[i] = proglen;
769 } 895 }
770 ctx.cleanup_addr = proglen; 896 ctx.cleanup_addr = proglen;
771 ctx.seen = SEEN_XREG | SEEN_DATAREF | SEEN_MEM;
772 ctx.pc_ret0 = -1;
773 897
774 for (pass = 0; pass < 10; pass++) { 898 for (pass = 0; pass < 10; pass++) {
775 proglen = do_jit(prog, addrs, image, oldproglen, &ctx); 899 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);