aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/net
diff options
context:
space:
mode:
authorShubham Bansal <illusionist.neo@gmail.com>2017-08-22 02:32:33 -0400
committerDavid S. Miller <davem@davemloft.net>2017-08-22 12:26:43 -0400
commit39c13c204bb1150d401e27d41a9d8b332be47c49 (patch)
tree5b227810abdecca21fad1181a83b0820efc0c82f /arch/arm/net
parente2a7c34fb2856fd5306e307e170e3dde358d0dce (diff)
arm: eBPF JIT compiler
The JIT compiler emits ARM 32 bit instructions. Currently, It supports eBPF only. Classic BPF is supported because of the conversion by BPF core. This patch is essentially changing the current implementation of JIT compiler of Berkeley Packet Filter from classic to internal with almost all instructions from eBPF ISA supported except the following BPF_ALU64 | BPF_DIV | BPF_K BPF_ALU64 | BPF_DIV | BPF_X BPF_ALU64 | BPF_MOD | BPF_K BPF_ALU64 | BPF_MOD | BPF_X BPF_STX | BPF_XADD | BPF_W BPF_STX | BPF_XADD | BPF_DW Implementation is using scratch space to emulate 64 bit eBPF ISA on 32 bit ARM because of deficiency of general purpose registers on ARM. Currently, only LITTLE ENDIAN machines are supported in this eBPF JIT Compiler. Tested on ARMv7 with QEMU by me (Shubham Bansal). Testing results on ARMv7: 1) test_bpf: Summary: 341 PASSED, 0 FAILED, [312/333 JIT'ed] 2) test_tag: OK (40945 tests) 3) test_progs: Summary: 30 PASSED, 0 FAILED 4) test_lpm: OK 5) test_lru_map: OK Above tests are all done with following flags enabled discreatly. 1) bpf_jit_enable=1 a) CONFIG_FRAME_POINTER enabled b) CONFIG_FRAME_POINTER disabled 2) bpf_jit_enable=1 and bpf_jit_harden=2 a) CONFIG_FRAME_POINTER enabled b) CONFIG_FRAME_POINTER disabled See Documentation/networking/filter.txt for more information. Signed-off-by: Shubham Bansal <illusionist.neo@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/arm/net')
-rw-r--r--arch/arm/net/bpf_jit_32.c2448
-rw-r--r--arch/arm/net/bpf_jit_32.h108
2 files changed, 1746 insertions, 810 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index d5b9fa19b684..c199990e12b6 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Just-In-Time compiler for BPF filters on 32bit ARM 2 * Just-In-Time compiler for eBPF filters on 32bit ARM
3 * 3 *
4 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
4 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 5 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify it 7 * This program is free software; you can redistribute it and/or modify it
@@ -8,6 +9,7 @@
8 * Free Software Foundation; version 2 of the License. 9 * Free Software Foundation; version 2 of the License.
9 */ 10 */
10 11
12#include <linux/bpf.h>
11#include <linux/bitops.h> 13#include <linux/bitops.h>
12#include <linux/compiler.h> 14#include <linux/compiler.h>
13#include <linux/errno.h> 15#include <linux/errno.h>
@@ -18,54 +20,101 @@
18#include <linux/if_vlan.h> 20#include <linux/if_vlan.h>
19 21
20#include <asm/cacheflush.h> 22#include <asm/cacheflush.h>
21#include <asm/set_memory.h>
22#include <asm/hwcap.h> 23#include <asm/hwcap.h>
23#include <asm/opcodes.h> 24#include <asm/opcodes.h>
24 25
25#include "bpf_jit_32.h" 26#include "bpf_jit_32.h"
26 27
28int bpf_jit_enable __read_mostly;
29
30#define STACK_OFFSET(k) (k)
31#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
32#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
33#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
34
35/* Flags used for JIT optimization */
36#define SEEN_CALL (1 << 0)
37
38#define FLAG_IMM_OVERFLOW (1 << 0)
39
27/* 40/*
28 * ABI: 41 * Map eBPF registers to ARM 32bit registers or stack scratch space.
42 *
43 * 1. First argument is passed using the arm 32bit registers and rest of the
44 * arguments are passed on stack scratch space.
45 * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest
46 * arguments are mapped to scratch space on stack.
47 * 3. We need two 64 bit temp registers to do complex operations on eBPF
48 * registers.
49 *
50 * As the eBPF registers are all 64 bit registers and arm has only 32 bit
51 * registers, we have to map each eBPF registers with two arm 32 bit regs or
52 * scratch memory space and we have to build eBPF 64 bit register from those.
29 * 53 *
30 * r0 scratch register
31 * r4 BPF register A
32 * r5 BPF register X
33 * r6 pointer to the skb
34 * r7 skb->data
35 * r8 skb_headlen(skb)
36 */ 54 */
55static const u8 bpf2a32[][2] = {
56 /* return value from in-kernel function, and exit value from eBPF */
57 [BPF_REG_0] = {ARM_R1, ARM_R0},
58 /* arguments from eBPF program to in-kernel function */
59 [BPF_REG_1] = {ARM_R3, ARM_R2},
60 /* Stored on stack scratch space */
61 [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)},
62 [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)},
63 [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)},
64 [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)},
65 /* callee saved registers that in-kernel function will preserve */
66 [BPF_REG_6] = {ARM_R5, ARM_R4},
67 /* Stored on stack scratch space */
68 [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)},
69 [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)},
70 [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)},
71 /* Read only Frame Pointer to access Stack */
72 [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)},
73 /* Temporary Register for internal BPF JIT, can be used
74 * for constant blindings and others.
75 */
76 [TMP_REG_1] = {ARM_R7, ARM_R6},
77 [TMP_REG_2] = {ARM_R10, ARM_R8},
78 /* Tail call count. Stored on stack scratch space. */
79 [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)},
80 /* temporary register for blinding constants.
81 * Stored on stack scratch space.
82 */
83 [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)},
84};
37 85
38#define r_scratch ARM_R0 86#define dst_lo dst[1]
39/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */ 87#define dst_hi dst[0]
40#define r_off ARM_R1 88#define src_lo src[1]
41#define r_A ARM_R4 89#define src_hi src[0]
42#define r_X ARM_R5
43#define r_skb ARM_R6
44#define r_skb_data ARM_R7
45#define r_skb_hl ARM_R8
46
47#define SCRATCH_SP_OFFSET 0
48#define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k))
49
50#define SEEN_MEM ((1 << BPF_MEMWORDS) - 1)
51#define SEEN_MEM_WORD(k) (1 << (k))
52#define SEEN_X (1 << BPF_MEMWORDS)
53#define SEEN_CALL (1 << (BPF_MEMWORDS + 1))
54#define SEEN_SKB (1 << (BPF_MEMWORDS + 2))
55#define SEEN_DATA (1 << (BPF_MEMWORDS + 3))
56 90
57#define FLAG_NEED_X_RESET (1 << 0) 91/*
58#define FLAG_IMM_OVERFLOW (1 << 1) 92 * JIT Context:
93 *
94 * prog : bpf_prog
95 * idx : index of current last JITed instruction.
96 * prologue_bytes : bytes used in prologue.
97 * epilogue_offset : offset of epilogue starting.
98 * seen : bit mask used for JIT optimization.
99 * offsets : array of eBPF instruction offsets in
100 * JITed code.
101 * target : final JITed code.
102 * epilogue_bytes : no of bytes used in epilogue.
103 * imm_count : no of immediate counts used for global
104 * variables.
105 * imms : array of global variable addresses.
106 */
59 107
60struct jit_ctx { 108struct jit_ctx {
61 const struct bpf_prog *skf; 109 const struct bpf_prog *prog;
62 unsigned idx; 110 unsigned int idx;
63 unsigned prologue_bytes; 111 unsigned int prologue_bytes;
64 int ret0_fp_idx; 112 unsigned int epilogue_offset;
65 u32 seen; 113 u32 seen;
66 u32 flags; 114 u32 flags;
67 u32 *offsets; 115 u32 *offsets;
68 u32 *target; 116 u32 *target;
117 u32 stack_size;
69#if __LINUX_ARM_ARCH__ < 7 118#if __LINUX_ARM_ARCH__ < 7
70 u16 epilogue_bytes; 119 u16 epilogue_bytes;
71 u16 imm_count; 120 u16 imm_count;
@@ -73,68 +122,16 @@ struct jit_ctx {
73#endif 122#endif
74}; 123};
75 124
76int bpf_jit_enable __read_mostly;
77
78static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
79 unsigned int size)
80{
81 void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
82
83 if (!ptr)
84 return -EFAULT;
85 memcpy(ret, ptr, size);
86 return 0;
87}
88
89static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
90{
91 u8 ret;
92 int err;
93
94 if (offset < 0)
95 err = call_neg_helper(skb, offset, &ret, 1);
96 else
97 err = skb_copy_bits(skb, offset, &ret, 1);
98
99 return (u64)err << 32 | ret;
100}
101
102static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
103{
104 u16 ret;
105 int err;
106
107 if (offset < 0)
108 err = call_neg_helper(skb, offset, &ret, 2);
109 else
110 err = skb_copy_bits(skb, offset, &ret, 2);
111
112 return (u64)err << 32 | ntohs(ret);
113}
114
115static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
116{
117 u32 ret;
118 int err;
119
120 if (offset < 0)
121 err = call_neg_helper(skb, offset, &ret, 4);
122 else
123 err = skb_copy_bits(skb, offset, &ret, 4);
124
125 return (u64)err << 32 | ntohl(ret);
126}
127
128/* 125/*
129 * Wrappers which handle both OABI and EABI and assures Thumb2 interworking 126 * Wrappers which handle both OABI and EABI and assures Thumb2 interworking
130 * (where the assembly routines like __aeabi_uidiv could cause problems). 127 * (where the assembly routines like __aeabi_uidiv could cause problems).
131 */ 128 */
132static u32 jit_udiv(u32 dividend, u32 divisor) 129static u32 jit_udiv32(u32 dividend, u32 divisor)
133{ 130{
134 return dividend / divisor; 131 return dividend / divisor;
135} 132}
136 133
137static u32 jit_mod(u32 dividend, u32 divisor) 134static u32 jit_mod32(u32 dividend, u32 divisor)
138{ 135{
139 return dividend % divisor; 136 return dividend % divisor;
140} 137}
@@ -158,36 +155,22 @@ static inline void emit(u32 inst, struct jit_ctx *ctx)
158 _emit(ARM_COND_AL, inst, ctx); 155 _emit(ARM_COND_AL, inst, ctx);
159} 156}
160 157
161static u16 saved_regs(struct jit_ctx *ctx) 158/*
159 * Checks if immediate value can be converted to imm12(12 bits) value.
160 */
161static int16_t imm8m(u32 x)
162{ 162{
163 u16 ret = 0; 163 u32 rot;
164
165 if ((ctx->skf->len > 1) ||
166 (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
167 ret |= 1 << r_A;
168
169#ifdef CONFIG_FRAME_POINTER
170 ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC);
171#else
172 if (ctx->seen & SEEN_CALL)
173 ret |= 1 << ARM_LR;
174#endif
175 if (ctx->seen & (SEEN_DATA | SEEN_SKB))
176 ret |= 1 << r_skb;
177 if (ctx->seen & SEEN_DATA)
178 ret |= (1 << r_skb_data) | (1 << r_skb_hl);
179 if (ctx->seen & SEEN_X)
180 ret |= 1 << r_X;
181
182 return ret;
183}
184 164
185static inline int mem_words_used(struct jit_ctx *ctx) 165 for (rot = 0; rot < 16; rot++)
186{ 166 if ((x & ~ror32(0xff, 2 * rot)) == 0)
187 /* yes, we do waste some stack space IF there are "holes" in the set" */ 167 return rol32(x, 2 * rot) | (rot << 8);
188 return fls(ctx->seen & SEEN_MEM); 168 return -1;
189} 169}
190 170
171/*
172 * Initializes the JIT space with undefined instructions.
173 */
191static void jit_fill_hole(void *area, unsigned int size) 174static void jit_fill_hole(void *area, unsigned int size)
192{ 175{
193 u32 *ptr; 176 u32 *ptr;
@@ -196,88 +179,34 @@ static void jit_fill_hole(void *area, unsigned int size)
196 *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); 179 *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
197} 180}
198 181
199static void build_prologue(struct jit_ctx *ctx) 182/* Stack must be multiples of 16 Bytes */
200{ 183#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
201 u16 reg_set = saved_regs(ctx);
202 u16 off;
203
204#ifdef CONFIG_FRAME_POINTER
205 emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
206 emit(ARM_PUSH(reg_set), ctx);
207 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
208#else
209 if (reg_set)
210 emit(ARM_PUSH(reg_set), ctx);
211#endif
212
213 if (ctx->seen & (SEEN_DATA | SEEN_SKB))
214 emit(ARM_MOV_R(r_skb, ARM_R0), ctx);
215
216 if (ctx->seen & SEEN_DATA) {
217 off = offsetof(struct sk_buff, data);
218 emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx);
219 /* headlen = len - data_len */
220 off = offsetof(struct sk_buff, len);
221 emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx);
222 off = offsetof(struct sk_buff, data_len);
223 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
224 emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx);
225 }
226
227 if (ctx->flags & FLAG_NEED_X_RESET)
228 emit(ARM_MOV_I(r_X, 0), ctx);
229
230 /* do not leak kernel data to userspace */
231 if (bpf_needs_clear_a(&ctx->skf->insns[0]))
232 emit(ARM_MOV_I(r_A, 0), ctx);
233
234 /* stack space for the BPF_MEM words */
235 if (ctx->seen & SEEN_MEM)
236 emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
237}
238
239static void build_epilogue(struct jit_ctx *ctx)
240{
241 u16 reg_set = saved_regs(ctx);
242
243 if (ctx->seen & SEEN_MEM)
244 emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
245
246 reg_set &= ~(1 << ARM_LR);
247 184
248#ifdef CONFIG_FRAME_POINTER 185/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
249 /* the first instruction of the prologue was: mov ip, sp */ 186 * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
250 reg_set &= ~(1 << ARM_IP); 187 * BPF_REG_FP and Tail call counts.
251 reg_set |= (1 << ARM_SP); 188 */
252 emit(ARM_LDM(ARM_SP, reg_set), ctx); 189#define SCRATCH_SIZE 80
253#else
254 if (reg_set) {
255 if (ctx->seen & SEEN_CALL)
256 reg_set |= 1 << ARM_PC;
257 emit(ARM_POP(reg_set), ctx);
258 }
259 190
260 if (!(ctx->seen & SEEN_CALL)) 191/* total stack size used in JITed code */
261 emit(ARM_BX(ARM_LR), ctx); 192#define _STACK_SIZE \
262#endif 193 (ctx->prog->aux->stack_depth + \
263} 194 + SCRATCH_SIZE + \
195 + 4 /* extra for skb_copy_bits buffer */)
264 196
265static int16_t imm8m(u32 x) 197#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
266{
267 u32 rot;
268 198
269 for (rot = 0; rot < 16; rot++) 199/* Get the offset of eBPF REGISTERs stored on scratch space. */
270 if ((x & ~ror32(0xff, 2 * rot)) == 0) 200#define STACK_VAR(off) (STACK_SIZE-off-4)
271 return rol32(x, 2 * rot) | (rot << 8);
272 201
273 return -1; 202/* Offset of skb_copy_bits buffer */
274} 203#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
275 204
276#if __LINUX_ARM_ARCH__ < 7 205#if __LINUX_ARM_ARCH__ < 7
277 206
278static u16 imm_offset(u32 k, struct jit_ctx *ctx) 207static u16 imm_offset(u32 k, struct jit_ctx *ctx)
279{ 208{
280 unsigned i = 0, offset; 209 unsigned int i = 0, offset;
281 u16 imm; 210 u16 imm;
282 211
283 /* on the "fake" run we just count them (duplicates included) */ 212 /* on the "fake" run we just count them (duplicates included) */
@@ -296,7 +225,7 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
296 ctx->imms[i] = k; 225 ctx->imms[i] = k;
297 226
298 /* constants go just after the epilogue */ 227 /* constants go just after the epilogue */
299 offset = ctx->offsets[ctx->skf->len]; 228 offset = ctx->offsets[ctx->prog->len - 1] * 4;
300 offset += ctx->prologue_bytes; 229 offset += ctx->prologue_bytes;
301 offset += ctx->epilogue_bytes; 230 offset += ctx->epilogue_bytes;
302 offset += i * 4; 231 offset += i * 4;
@@ -320,10 +249,22 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
320 249
321#endif /* __LINUX_ARM_ARCH__ */ 250#endif /* __LINUX_ARM_ARCH__ */
322 251
252static inline int bpf2a32_offset(int bpf_to, int bpf_from,
253 const struct jit_ctx *ctx) {
254 int to, from;
255
256 if (ctx->target == NULL)
257 return 0;
258 to = ctx->offsets[bpf_to];
259 from = ctx->offsets[bpf_from];
260
261 return to - from - 1;
262}
263
323/* 264/*
324 * Move an immediate that's not an imm8m to a core register. 265 * Move an immediate that's not an imm8m to a core register.
325 */ 266 */
326static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx) 267static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
327{ 268{
328#if __LINUX_ARM_ARCH__ < 7 269#if __LINUX_ARM_ARCH__ < 7
329 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); 270 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
@@ -334,7 +275,7 @@ static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
334#endif 275#endif
335} 276}
336 277
337static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx) 278static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
338{ 279{
339 int imm12 = imm8m(val); 280 int imm12 = imm8m(val);
340 281
@@ -344,676 +285,1594 @@ static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
344 emit_mov_i_no8m(rd, val, ctx); 285 emit_mov_i_no8m(rd, val, ctx);
345} 286}
346 287
347#if __LINUX_ARM_ARCH__ < 6 288static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
348
349static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
350{ 289{
351 _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx); 290 ctx->seen |= SEEN_CALL;
352 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 291#if __LINUX_ARM_ARCH__ < 5
353 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx); 292 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
354 _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx); 293
355 _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx); 294 if (elf_hwcap & HWCAP_THUMB)
356 _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx); 295 emit(ARM_BX(tgt_reg), ctx);
357 _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx); 296 else
358 _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx); 297 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
298#else
299 emit(ARM_BLX_R(tgt_reg), ctx);
300#endif
359} 301}
360 302
361static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 303static inline int epilogue_offset(const struct jit_ctx *ctx)
362{ 304{
363 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 305 int to, from;
364 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx); 306 /* No need for 1st dummy run */
365 _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx); 307 if (ctx->target == NULL)
308 return 0;
309 to = ctx->epilogue_offset;
310 from = ctx->idx;
311
312 return to - from - 2;
366} 313}
367 314
368static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx) 315static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
369{ 316{
370 /* r_dst = (r_src << 8) | (r_src >> 8) */ 317 const u8 *tmp = bpf2a32[TMP_REG_1];
371 emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx); 318 s32 jmp_offset;
372 emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx); 319
320 /* checks if divisor is zero or not. If it is, then
321 * exit directly.
322 */
323 emit(ARM_CMP_I(rn, 0), ctx);
324 _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
325 jmp_offset = epilogue_offset(ctx);
326 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
327#if __LINUX_ARM_ARCH__ == 7
328 if (elf_hwcap & HWCAP_IDIVA) {
329 if (op == BPF_DIV)
330 emit(ARM_UDIV(rd, rm, rn), ctx);
331 else {
332 emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
333 emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
334 }
335 return;
336 }
337#endif
373 338
374 /* 339 /*
375 * we need to mask out the bits set in r_dst[23:16] due to 340 * For BPF_ALU | BPF_DIV | BPF_K instructions
376 * the first shift instruction. 341 * As ARM_R1 and ARM_R0 contains 1st argument of bpf
377 * 342 * function, we need to save it on caller side to save
378 * note that 0x8ff is the encoded immediate 0x00ff0000. 343 * it from getting destroyed within callee.
344 * After the return from the callee, we restore ARM_R0
345 * ARM_R1.
379 */ 346 */
380 emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx); 347 if (rn != ARM_R1) {
381} 348 emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
349 emit(ARM_MOV_R(ARM_R1, rn), ctx);
350 }
351 if (rm != ARM_R0) {
352 emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
353 emit(ARM_MOV_R(ARM_R0, rm), ctx);
354 }
382 355
383#else /* ARMv6+ */ 356 /* Call appropriate function */
357 ctx->seen |= SEEN_CALL;
358 emit_mov_i(ARM_IP, op == BPF_DIV ?
359 (u32)jit_udiv32 : (u32)jit_mod32, ctx);
360 emit_blx_r(ARM_IP, ctx);
384 361
385static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 362 /* Save return value */
386{ 363 if (rd != ARM_R0)
387 _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx); 364 emit(ARM_MOV_R(rd, ARM_R0), ctx);
388#ifdef __LITTLE_ENDIAN 365
389 _emit(cond, ARM_REV(r_res, r_res), ctx); 366 /* Restore ARM_R0 and ARM_R1 */
390#endif 367 if (rn != ARM_R1)
368 emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx);
369 if (rm != ARM_R0)
370 emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
391} 371}
392 372
393static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 373/* Checks whether BPF register is on scratch stack space or not. */
374static inline bool is_on_stack(u8 bpf_reg)
394{ 375{
395 _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx); 376 static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5,
396#ifdef __LITTLE_ENDIAN 377 BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT,
397 _emit(cond, ARM_REV16(r_res, r_res), ctx); 378 BPF_REG_2, BPF_REG_FP};
398#endif 379 int i, reg_len = sizeof(stack_regs);
380
381 for (i = 0 ; i < reg_len ; i++) {
382 if (bpf_reg == stack_regs[i])
383 return true;
384 }
385 return false;
399} 386}
400 387
401static inline void emit_swap16(u8 r_dst __maybe_unused, 388static inline void emit_a32_mov_i(const u8 dst, const u32 val,
402 u8 r_src __maybe_unused, 389 bool dstk, struct jit_ctx *ctx)
403 struct jit_ctx *ctx __maybe_unused)
404{ 390{
405#ifdef __LITTLE_ENDIAN 391 const u8 *tmp = bpf2a32[TMP_REG_1];
406 emit(ARM_REV16(r_dst, r_src), ctx); 392
407#endif 393 if (dstk) {
394 emit_mov_i(tmp[1], val, ctx);
395 emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx);
396 } else {
397 emit_mov_i(dst, val, ctx);
398 }
408} 399}
409 400
410#endif /* __LINUX_ARM_ARCH__ < 6 */ 401/* Sign extended move */
402static inline void emit_a32_mov_i64(const bool is64, const u8 dst[],
403 const u32 val, bool dstk,
404 struct jit_ctx *ctx) {
405 u32 hi = 0;
411 406
407 if (is64 && (val & (1<<31)))
408 hi = (u32)~0;
409 emit_a32_mov_i(dst_lo, val, dstk, ctx);
410 emit_a32_mov_i(dst_hi, hi, dstk, ctx);
411}
412 412
413/* Compute the immediate value for a PC-relative branch. */ 413static inline void emit_a32_add_r(const u8 dst, const u8 src,
414static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx) 414 const bool is64, const bool hi,
415{ 415 struct jit_ctx *ctx) {
416 u32 imm; 416 /* 64 bit :
417 * adds dst_lo, dst_lo, src_lo
418 * adc dst_hi, dst_hi, src_hi
419 * 32 bit :
420 * add dst_lo, dst_lo, src_lo
421 */
422 if (!hi && is64)
423 emit(ARM_ADDS_R(dst, dst, src), ctx);
424 else if (hi && is64)
425 emit(ARM_ADC_R(dst, dst, src), ctx);
426 else
427 emit(ARM_ADD_R(dst, dst, src), ctx);
428}
417 429
418 if (ctx->target == NULL) 430static inline void emit_a32_sub_r(const u8 dst, const u8 src,
419 return 0; 431 const bool is64, const bool hi,
420 /* 432 struct jit_ctx *ctx) {
421 * BPF allows only forward jumps and the offset of the target is 433 /* 64 bit :
422 * still the one computed during the first pass. 434 * subs dst_lo, dst_lo, src_lo
435 * sbc dst_hi, dst_hi, src_hi
436 * 32 bit :
437 * sub dst_lo, dst_lo, src_lo
423 */ 438 */
424 imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8); 439 if (!hi && is64)
440 emit(ARM_SUBS_R(dst, dst, src), ctx);
441 else if (hi && is64)
442 emit(ARM_SBC_R(dst, dst, src), ctx);
443 else
444 emit(ARM_SUB_R(dst, dst, src), ctx);
445}
425 446
426 return imm >> 2; 447static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
448 const bool hi, const u8 op, struct jit_ctx *ctx){
449 switch (BPF_OP(op)) {
450 /* dst = dst + src */
451 case BPF_ADD:
452 emit_a32_add_r(dst, src, is64, hi, ctx);
453 break;
454 /* dst = dst - src */
455 case BPF_SUB:
456 emit_a32_sub_r(dst, src, is64, hi, ctx);
457 break;
458 /* dst = dst | src */
459 case BPF_OR:
460 emit(ARM_ORR_R(dst, dst, src), ctx);
461 break;
462 /* dst = dst & src */
463 case BPF_AND:
464 emit(ARM_AND_R(dst, dst, src), ctx);
465 break;
466 /* dst = dst ^ src */
467 case BPF_XOR:
468 emit(ARM_EOR_R(dst, dst, src), ctx);
469 break;
470 /* dst = dst * src */
471 case BPF_MUL:
472 emit(ARM_MUL(dst, dst, src), ctx);
473 break;
474 /* dst = dst << src */
475 case BPF_LSH:
476 emit(ARM_LSL_R(dst, dst, src), ctx);
477 break;
478 /* dst = dst >> src */
479 case BPF_RSH:
480 emit(ARM_LSR_R(dst, dst, src), ctx);
481 break;
482 /* dst = dst >> src (signed)*/
483 case BPF_ARSH:
484 emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx);
485 break;
486 }
427} 487}
428 488
429#define OP_IMM3(op, r1, r2, imm_val, ctx) \ 489/* ALU operation (32 bit)
430 do { \ 490 * dst = dst (op) src
431 imm12 = imm8m(imm_val); \ 491 */
432 if (imm12 < 0) { \ 492static inline void emit_a32_alu_r(const u8 dst, const u8 src,
433 emit_mov_i_no8m(r_scratch, imm_val, ctx); \ 493 bool dstk, bool sstk,
434 emit(op ## _R((r1), (r2), r_scratch), ctx); \ 494 struct jit_ctx *ctx, const bool is64,
435 } else { \ 495 const bool hi, const u8 op) {
436 emit(op ## _I((r1), (r2), imm12), ctx); \ 496 const u8 *tmp = bpf2a32[TMP_REG_1];
437 } \ 497 u8 rn = sstk ? tmp[1] : src;
438 } while (0) 498
439 499 if (sstk)
440static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx) 500 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx);
441{ 501
442 if (ctx->ret0_fp_idx >= 0) { 502 /* ALU operation */
443 _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx); 503 if (dstk) {
444 /* NOP to keep the size constant between passes */ 504 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
445 emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx); 505 emit_alu_r(tmp[0], rn, is64, hi, op, ctx);
506 emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
446 } else { 507 } else {
447 _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx); 508 emit_alu_r(dst, rn, is64, hi, op, ctx);
448 _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
449 } 509 }
450} 510}
451 511
452static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) 512/* ALU operation (64 bit) */
453{ 513static inline void emit_a32_alu_r64(const bool is64, const u8 dst[],
454#if __LINUX_ARM_ARCH__ < 5 514 const u8 src[], bool dstk,
455 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); 515 bool sstk, struct jit_ctx *ctx,
516 const u8 op) {
517 emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op);
518 if (is64)
519 emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op);
520 else
521 emit_a32_mov_i(dst_hi, 0, dstk, ctx);
522}
456 523
457 if (elf_hwcap & HWCAP_THUMB) 524/* dst = imm (4 bytes)*/
458 emit(ARM_BX(tgt_reg), ctx); 525static inline void emit_a32_mov_r(const u8 dst, const u8 src,
526 bool dstk, bool sstk,
527 struct jit_ctx *ctx) {
528 const u8 *tmp = bpf2a32[TMP_REG_1];
529 u8 rt = sstk ? tmp[0] : src;
530
531 if (sstk)
532 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx);
533 if (dstk)
534 emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx);
459 else 535 else
460 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); 536 emit(ARM_MOV_R(dst, rt), ctx);
461#else
462 emit(ARM_BLX_R(tgt_reg), ctx);
463#endif
464} 537}
465 538
466static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, 539/* dst = src */
467 int bpf_op) 540static inline void emit_a32_mov_r64(const bool is64, const u8 dst[],
468{ 541 const u8 src[], bool dstk,
469#if __LINUX_ARM_ARCH__ == 7 542 bool sstk, struct jit_ctx *ctx) {
470 if (elf_hwcap & HWCAP_IDIVA) { 543 emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx);
471 if (bpf_op == BPF_DIV) 544 if (is64) {
472 emit(ARM_UDIV(rd, rm, rn), ctx); 545 /* complete 8 byte move */
473 else { 546 emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx);
474 emit(ARM_UDIV(ARM_R3, rm, rn), ctx); 547 } else {
475 emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx); 548 /* Zero out high 4 bytes */
476 } 549 emit_a32_mov_i(dst_hi, 0, dstk, ctx);
477 return;
478 } 550 }
479#endif 551}
480 552
481 /* 553/* Shift operations */
482 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4 554static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk,
483 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into 555 struct jit_ctx *ctx, const u8 op) {
484 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm 556 const u8 *tmp = bpf2a32[TMP_REG_1];
485 * before using it as a source for ARM_R1. 557 u8 rd = dstk ? tmp[0] : dst;
486 * 558
487 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is 559 if (dstk)
488 * ARM_R5 (r_X) so there is no particular register overlap 560 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
489 * issues. 561
490 */ 562 /* Do shift operation */
491 if (rn != ARM_R1) 563 switch (op) {
492 emit(ARM_MOV_R(ARM_R1, rn), ctx); 564 case BPF_LSH:
493 if (rm != ARM_R0) 565 emit(ARM_LSL_I(rd, rd, val), ctx);
494 emit(ARM_MOV_R(ARM_R0, rm), ctx); 566 break;
567 case BPF_RSH:
568 emit(ARM_LSR_I(rd, rd, val), ctx);
569 break;
570 case BPF_NEG:
571 emit(ARM_RSB_I(rd, rd, val), ctx);
572 break;
573 }
495 574
575 if (dstk)
576 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
577}
578
579/* dst = ~dst (64 bit) */
580static inline void emit_a32_neg64(const u8 dst[], bool dstk,
581 struct jit_ctx *ctx){
582 const u8 *tmp = bpf2a32[TMP_REG_1];
583 u8 rd = dstk ? tmp[1] : dst[1];
584 u8 rm = dstk ? tmp[0] : dst[0];
585
586 /* Setup Operand */
587 if (dstk) {
588 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
589 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
590 }
591
592 /* Do Negate Operation */
593 emit(ARM_RSBS_I(rd, rd, 0), ctx);
594 emit(ARM_RSC_I(rm, rm, 0), ctx);
595
596 if (dstk) {
597 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
598 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
599 }
600}
601
602/* dst = dst << src */
603static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
604 bool sstk, struct jit_ctx *ctx) {
605 const u8 *tmp = bpf2a32[TMP_REG_1];
606 const u8 *tmp2 = bpf2a32[TMP_REG_2];
607
608 /* Setup Operands */
609 u8 rt = sstk ? tmp2[1] : src_lo;
610 u8 rd = dstk ? tmp[1] : dst_lo;
611 u8 rm = dstk ? tmp[0] : dst_hi;
612
613 if (sstk)
614 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
615 if (dstk) {
616 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
617 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
618 }
619
620 /* Do LSH operation */
621 emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
622 emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
623 /* As we are using ARM_LR */
496 ctx->seen |= SEEN_CALL; 624 ctx->seen |= SEEN_CALL;
497 emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod, 625 emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
498 ctx); 626 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
499 emit_blx_r(ARM_R3, ctx); 627 emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
628 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx);
629
630 if (dstk) {
631 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
632 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
633 } else {
634 emit(ARM_MOV_R(rd, ARM_LR), ctx);
635 emit(ARM_MOV_R(rm, ARM_IP), ctx);
636 }
637}
500 638
501 if (rd != ARM_R0) 639/* dst = dst >> src (signed)*/
502 emit(ARM_MOV_R(rd, ARM_R0), ctx); 640static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
641 bool sstk, struct jit_ctx *ctx) {
642 const u8 *tmp = bpf2a32[TMP_REG_1];
643 const u8 *tmp2 = bpf2a32[TMP_REG_2];
644 /* Setup Operands */
645 u8 rt = sstk ? tmp2[1] : src_lo;
646 u8 rd = dstk ? tmp[1] : dst_lo;
647 u8 rm = dstk ? tmp[0] : dst_hi;
648
649 if (sstk)
650 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
651 if (dstk) {
652 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
653 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
654 }
655
656 /* Do the ARSH operation */
657 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
658 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
659 /* As we are using ARM_LR */
660 ctx->seen |= SEEN_CALL;
661 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
662 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
663 _emit(ARM_COND_MI, ARM_B(0), ctx);
664 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx);
665 emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx);
666 if (dstk) {
667 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
668 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
669 } else {
670 emit(ARM_MOV_R(rd, ARM_LR), ctx);
671 emit(ARM_MOV_R(rm, ARM_IP), ctx);
672 }
673}
674
675/* dst = dst >> src */
676static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
677 bool sstk, struct jit_ctx *ctx) {
678 const u8 *tmp = bpf2a32[TMP_REG_1];
679 const u8 *tmp2 = bpf2a32[TMP_REG_2];
680 /* Setup Operands */
681 u8 rt = sstk ? tmp2[1] : src_lo;
682 u8 rd = dstk ? tmp[1] : dst_lo;
683 u8 rm = dstk ? tmp[0] : dst_hi;
684
685 if (sstk)
686 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
687 if (dstk) {
688 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
689 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
690 }
691
692 /* Do LSH operation */
693 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
694 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
695 /* As we are using ARM_LR */
696 ctx->seen |= SEEN_CALL;
697 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
698 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
699 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
700 emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx);
701 if (dstk) {
702 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
703 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
704 } else {
705 emit(ARM_MOV_R(rd, ARM_LR), ctx);
706 emit(ARM_MOV_R(rm, ARM_IP), ctx);
707 }
503} 708}
504 709
505static inline void update_on_xread(struct jit_ctx *ctx) 710/* dst = dst << val */
711static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk,
712 const u32 val, struct jit_ctx *ctx){
713 const u8 *tmp = bpf2a32[TMP_REG_1];
714 const u8 *tmp2 = bpf2a32[TMP_REG_2];
715 /* Setup operands */
716 u8 rd = dstk ? tmp[1] : dst_lo;
717 u8 rm = dstk ? tmp[0] : dst_hi;
718
719 if (dstk) {
720 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
721 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
722 }
723
724 /* Do LSH operation */
725 if (val < 32) {
726 emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx);
727 emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx);
728 emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx);
729 } else {
730 if (val == 32)
731 emit(ARM_MOV_R(rm, rd), ctx);
732 else
733 emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx);
734 emit(ARM_EOR_R(rd, rd, rd), ctx);
735 }
736
737 if (dstk) {
738 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
739 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
740 }
741}
742
743/* dst = dst >> val */
744static inline void emit_a32_lsr_i64(const u8 dst[], bool dstk,
745 const u32 val, struct jit_ctx *ctx) {
746 const u8 *tmp = bpf2a32[TMP_REG_1];
747 const u8 *tmp2 = bpf2a32[TMP_REG_2];
748 /* Setup operands */
749 u8 rd = dstk ? tmp[1] : dst_lo;
750 u8 rm = dstk ? tmp[0] : dst_hi;
751
752 if (dstk) {
753 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
754 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
755 }
756
757 /* Do LSR operation */
758 if (val < 32) {
759 emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
760 emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
761 emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx);
762 } else if (val == 32) {
763 emit(ARM_MOV_R(rd, rm), ctx);
764 emit(ARM_MOV_I(rm, 0), ctx);
765 } else {
766 emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx);
767 emit(ARM_MOV_I(rm, 0), ctx);
768 }
769
770 if (dstk) {
771 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
772 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
773 }
774}
775
776/* dst = dst >> val (signed) */
777static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk,
778 const u32 val, struct jit_ctx *ctx){
779 const u8 *tmp = bpf2a32[TMP_REG_1];
780 const u8 *tmp2 = bpf2a32[TMP_REG_2];
781 /* Setup operands */
782 u8 rd = dstk ? tmp[1] : dst_lo;
783 u8 rm = dstk ? tmp[0] : dst_hi;
784
785 if (dstk) {
786 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
787 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
788 }
789
790 /* Do ARSH operation */
791 if (val < 32) {
792 emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
793 emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
794 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx);
795 } else if (val == 32) {
796 emit(ARM_MOV_R(rd, rm), ctx);
797 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
798 } else {
799 emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx);
800 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
801 }
802
803 if (dstk) {
804 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
805 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
806 }
807}
808
809static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
810 bool sstk, struct jit_ctx *ctx) {
811 const u8 *tmp = bpf2a32[TMP_REG_1];
812 const u8 *tmp2 = bpf2a32[TMP_REG_2];
813 /* Setup operands for multiplication */
814 u8 rd = dstk ? tmp[1] : dst_lo;
815 u8 rm = dstk ? tmp[0] : dst_hi;
816 u8 rt = sstk ? tmp2[1] : src_lo;
817 u8 rn = sstk ? tmp2[0] : src_hi;
818
819 if (dstk) {
820 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
821 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
822 }
823 if (sstk) {
824 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
825 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx);
826 }
827
828 /* Do Multiplication */
829 emit(ARM_MUL(ARM_IP, rd, rn), ctx);
830 emit(ARM_MUL(ARM_LR, rm, rt), ctx);
831 /* As we are using ARM_LR */
832 ctx->seen |= SEEN_CALL;
833 emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
834
835 emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
836 emit(ARM_ADD_R(rm, ARM_LR, rm), ctx);
837 if (dstk) {
838 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx);
839 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
840 } else {
841 emit(ARM_MOV_R(rd, ARM_IP), ctx);
842 }
843}
844
845/* *(size *)(dst + off) = src */
846static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
847 const s32 off, struct jit_ctx *ctx, const u8 sz){
848 const u8 *tmp = bpf2a32[TMP_REG_1];
849 u8 rd = dstk ? tmp[1] : dst;
850
851 if (dstk)
852 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
853 if (off) {
854 emit_a32_mov_i(tmp[0], off, false, ctx);
855 emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx);
856 rd = tmp[0];
857 }
858 switch (sz) {
859 case BPF_W:
860 /* Store a Word */
861 emit(ARM_STR_I(src, rd, 0), ctx);
862 break;
863 case BPF_H:
864 /* Store a HalfWord */
865 emit(ARM_STRH_I(src, rd, 0), ctx);
866 break;
867 case BPF_B:
868 /* Store a Byte */
869 emit(ARM_STRB_I(src, rd, 0), ctx);
870 break;
871 }
872}
873
874/* dst = *(size*)(src + off) */
875static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
876 const s32 off, struct jit_ctx *ctx, const u8 sz){
877 const u8 *tmp = bpf2a32[TMP_REG_1];
878 u8 rd = dstk ? tmp[1] : dst;
879 u8 rm = src;
880
881 if (off) {
882 emit_a32_mov_i(tmp[0], off, false, ctx);
883 emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
884 rm = tmp[0];
885 }
886 switch (sz) {
887 case BPF_W:
888 /* Load a Word */
889 emit(ARM_LDR_I(rd, rm, 0), ctx);
890 break;
891 case BPF_H:
892 /* Load a HalfWord */
893 emit(ARM_LDRH_I(rd, rm, 0), ctx);
894 break;
895 case BPF_B:
896 /* Load a Byte */
897 emit(ARM_LDRB_I(rd, rm, 0), ctx);
898 break;
899 }
900 if (dstk)
901 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
902}
903
904/* Arithmatic Operation */
905static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
906 const u8 rn, struct jit_ctx *ctx, u8 op) {
907 switch (op) {
908 case BPF_JSET:
909 ctx->seen |= SEEN_CALL;
910 emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
911 emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
912 emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
913 break;
914 case BPF_JEQ:
915 case BPF_JNE:
916 case BPF_JGT:
917 case BPF_JGE:
918 case BPF_JLE:
919 case BPF_JLT:
920 emit(ARM_CMP_R(rd, rm), ctx);
921 _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
922 break;
923 case BPF_JSLE:
924 case BPF_JSGT:
925 emit(ARM_CMP_R(rn, rt), ctx);
926 emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
927 break;
928 case BPF_JSLT:
929 case BPF_JSGE:
930 emit(ARM_CMP_R(rt, rn), ctx);
931 emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
932 break;
933 }
934}
935
936static int out_offset = -1; /* initialized on the first pass of build_body() */
937static int emit_bpf_tail_call(struct jit_ctx *ctx)
506{ 938{
507 if (!(ctx->seen & SEEN_X))
508 ctx->flags |= FLAG_NEED_X_RESET;
509 939
510 ctx->seen |= SEEN_X; 940 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
941 const u8 *r2 = bpf2a32[BPF_REG_2];
942 const u8 *r3 = bpf2a32[BPF_REG_3];
943 const u8 *tmp = bpf2a32[TMP_REG_1];
944 const u8 *tmp2 = bpf2a32[TMP_REG_2];
945 const u8 *tcc = bpf2a32[TCALL_CNT];
946 const int idx0 = ctx->idx;
947#define cur_offset (ctx->idx - idx0)
948#define jmp_offset (out_offset - (cur_offset))
949 u32 off, lo, hi;
950
951 /* if (index >= array->map.max_entries)
952 * goto out;
953 */
954 off = offsetof(struct bpf_array, map.max_entries);
955 /* array->map.max_entries */
956 emit_a32_mov_i(tmp[1], off, false, ctx);
957 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
958 emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
959 /* index (64 bit) */
960 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
961 /* index >= array->map.max_entries */
962 emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
963 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
964
965 /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
966 * goto out;
967 * tail_call_cnt++;
968 */
969 lo = (u32)MAX_TAIL_CALL_CNT;
970 hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
971 emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
972 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
973 emit(ARM_CMP_I(tmp[0], hi), ctx);
974 _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx);
975 _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
976 emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx);
977 emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx);
978 emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
979 emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
980
981 /* prog = array->ptrs[index]
982 * if (prog == NULL)
983 * goto out;
984 */
985 off = offsetof(struct bpf_array, ptrs);
986 emit_a32_mov_i(tmp[1], off, false, ctx);
987 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
988 emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx);
989 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
990 emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx);
991 emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx);
992 emit(ARM_CMP_I(tmp[1], 0), ctx);
993 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
994
995 /* goto *(prog->bpf_func + prologue_size); */
996 off = offsetof(struct bpf_prog, bpf_func);
997 emit_a32_mov_i(tmp2[1], off, false, ctx);
998 emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
999 emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
1000 emit(ARM_BX(tmp[1]), ctx);
1001
1002 /* out: */
1003 if (out_offset == -1)
1004 out_offset = cur_offset;
1005 if (cur_offset != out_offset) {
1006 pr_err_once("tail_call out_offset = %d, expected %d!\n",
1007 cur_offset, out_offset);
1008 return -1;
1009 }
1010 return 0;
1011#undef cur_offset
1012#undef jmp_offset
511} 1013}
512 1014
513static int build_body(struct jit_ctx *ctx) 1015/* 0xabcd => 0xcdab */
1016static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
514{ 1017{
515 void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; 1018#if __LINUX_ARM_ARCH__ < 6
516 const struct bpf_prog *prog = ctx->skf; 1019 const u8 *tmp2 = bpf2a32[TMP_REG_2];
517 const struct sock_filter *inst; 1020
518 unsigned i, load_order, off, condt; 1021 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
519 int imm12; 1022 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx);
520 u32 k; 1023 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
1024 emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx);
1025#else /* ARMv6+ */
1026 emit(ARM_REV16(rd, rn), ctx);
1027#endif
1028}
521 1029
522 for (i = 0; i < prog->len; i++) { 1030/* 0xabcdefgh => 0xghefcdab */
523 u16 code; 1031static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
1032{
1033#if __LINUX_ARM_ARCH__ < 6
1034 const u8 *tmp2 = bpf2a32[TMP_REG_2];
1035
1036 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
1037 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx);
1038 emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx);
1039
1040 emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx);
1041 emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx);
1042 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx);
1043 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
1044 emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx);
1045 emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx);
1046 emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx);
1047
1048#else /* ARMv6+ */
1049 emit(ARM_REV(rd, rn), ctx);
1050#endif
1051}
524 1052
525 inst = &(prog->insns[i]); 1053// push the scratch stack register on top of the stack
526 /* K as an immediate value operand */ 1054static inline void emit_push_r64(const u8 src[], const u8 shift,
527 k = inst->k; 1055 struct jit_ctx *ctx)
528 code = bpf_anc_helper(inst); 1056{
1057 const u8 *tmp2 = bpf2a32[TMP_REG_2];
1058 u16 reg_set = 0;
529 1059
530 /* compute offsets only in the fake pass */ 1060 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx);
531 if (ctx->target == NULL) 1061 emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx);
532 ctx->offsets[i] = ctx->idx * 4; 1062
1063 reg_set = (1 << tmp2[1]) | (1 << tmp2[0]);
1064 emit(ARM_PUSH(reg_set), ctx);
1065}
1066
1067static void build_prologue(struct jit_ctx *ctx)
1068{
1069 const u8 r0 = bpf2a32[BPF_REG_0][1];
1070 const u8 r2 = bpf2a32[BPF_REG_1][1];
1071 const u8 r3 = bpf2a32[BPF_REG_1][0];
1072 const u8 r4 = bpf2a32[BPF_REG_6][1];
1073 const u8 r5 = bpf2a32[BPF_REG_6][0];
1074 const u8 r6 = bpf2a32[TMP_REG_1][1];
1075 const u8 r7 = bpf2a32[TMP_REG_1][0];
1076 const u8 r8 = bpf2a32[TMP_REG_2][1];
1077 const u8 r10 = bpf2a32[TMP_REG_2][0];
1078 const u8 fplo = bpf2a32[BPF_REG_FP][1];
1079 const u8 fphi = bpf2a32[BPF_REG_FP][0];
1080 const u8 sp = ARM_SP;
1081 const u8 *tcc = bpf2a32[TCALL_CNT];
1082
1083 u16 reg_set = 0;
1084
1085 /*
1086 * eBPF prog stack layout
1087 *
1088 * high
1089 * original ARM_SP => +-----+ eBPF prologue
1090 * |FP/LR|
1091 * current ARM_FP => +-----+
1092 * | ... | callee saved registers
1093 * eBPF fp register => +-----+ <= (BPF_FP)
1094 * | ... | eBPF JIT scratch space
1095 * | | eBPF prog stack
1096 * +-----+
1097 * |RSVD | JIT scratchpad
1098 * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
1099 * | |
1100 * | ... | Function call stack
1101 * | |
1102 * +-----+
1103 * low
1104 */
1105
1106 /* Save callee saved registers. */
1107 reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
1108#ifdef CONFIG_FRAME_POINTER
1109 reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
1110 emit(ARM_MOV_R(ARM_IP, sp), ctx);
1111 emit(ARM_PUSH(reg_set), ctx);
1112 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
1113#else
1114 /* Check if call instruction exists in BPF body */
1115 if (ctx->seen & SEEN_CALL)
1116 reg_set |= (1<<ARM_LR);
1117 emit(ARM_PUSH(reg_set), ctx);
1118#endif
1119 /* Save frame pointer for later */
1120 emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
1121
1122 ctx->stack_size = imm8m(STACK_SIZE);
1123
1124 /* Set up function call stack */
1125 emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
533 1126
534 switch (code) { 1127 /* Set up BPF prog stack base register */
535 case BPF_LD | BPF_IMM: 1128 emit_a32_mov_r(fplo, ARM_IP, true, false, ctx);
536 emit_mov_i(r_A, k, ctx); 1129 emit_a32_mov_i(fphi, 0, true, ctx);
1130
1131 /* mov r4, 0 */
1132 emit(ARM_MOV_I(r4, 0), ctx);
1133
1134 /* Move BPF_CTX to BPF_R1 */
1135 emit(ARM_MOV_R(r3, r4), ctx);
1136 emit(ARM_MOV_R(r2, r0), ctx);
1137 /* Initialize Tail Count */
1138 emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx);
1139 emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx);
1140 /* end of prologue */
1141}
1142
1143static void build_epilogue(struct jit_ctx *ctx)
1144{
1145 const u8 r4 = bpf2a32[BPF_REG_6][1];
1146 const u8 r5 = bpf2a32[BPF_REG_6][0];
1147 const u8 r6 = bpf2a32[TMP_REG_1][1];
1148 const u8 r7 = bpf2a32[TMP_REG_1][0];
1149 const u8 r8 = bpf2a32[TMP_REG_2][1];
1150 const u8 r10 = bpf2a32[TMP_REG_2][0];
1151 u16 reg_set = 0;
1152
1153 /* unwind function call stack */
1154 emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
1155
1156 /* restore callee saved registers. */
1157 reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
1158#ifdef CONFIG_FRAME_POINTER
1159 /* the first instruction of the prologue was: mov ip, sp */
1160 reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
1161 emit(ARM_LDM(ARM_SP, reg_set), ctx);
1162#else
1163 if (ctx->seen & SEEN_CALL)
1164 reg_set |= (1<<ARM_PC);
1165 /* Restore callee saved registers. */
1166 emit(ARM_POP(reg_set), ctx);
1167 /* Return back to the callee function */
1168 if (!(ctx->seen & SEEN_CALL))
1169 emit(ARM_BX(ARM_LR), ctx);
1170#endif
1171}
1172
1173/*
1174 * Convert an eBPF instruction to native instruction, i.e
1175 * JITs an eBPF instruction.
1176 * Returns :
1177 * 0 - Successfully JITed an 8-byte eBPF instruction
1178 * >0 - Successfully JITed a 16-byte eBPF instruction
1179 * <0 - Failed to JIT.
1180 */
1181static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1182{
1183 const u8 code = insn->code;
1184 const u8 *dst = bpf2a32[insn->dst_reg];
1185 const u8 *src = bpf2a32[insn->src_reg];
1186 const u8 *tmp = bpf2a32[TMP_REG_1];
1187 const u8 *tmp2 = bpf2a32[TMP_REG_2];
1188 const s16 off = insn->off;
1189 const s32 imm = insn->imm;
1190 const int i = insn - ctx->prog->insnsi;
1191 const bool is64 = BPF_CLASS(code) == BPF_ALU64;
1192 const bool dstk = is_on_stack(insn->dst_reg);
1193 const bool sstk = is_on_stack(insn->src_reg);
1194 u8 rd, rt, rm, rn;
1195 s32 jmp_offset;
1196
1197#define check_imm(bits, imm) do { \
1198 if ((((imm) > 0) && ((imm) >> (bits))) || \
1199 (((imm) < 0) && (~(imm) >> (bits)))) { \
1200 pr_info("[%2d] imm=%d(0x%x) out of range\n", \
1201 i, imm, imm); \
1202 return -EINVAL; \
1203 } \
1204} while (0)
1205#define check_imm24(imm) check_imm(24, imm)
1206
1207 switch (code) {
1208 /* ALU operations */
1209
1210 /* dst = src */
1211 case BPF_ALU | BPF_MOV | BPF_K:
1212 case BPF_ALU | BPF_MOV | BPF_X:
1213 case BPF_ALU64 | BPF_MOV | BPF_K:
1214 case BPF_ALU64 | BPF_MOV | BPF_X:
1215 switch (BPF_SRC(code)) {
1216 case BPF_X:
1217 emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx);
537 break; 1218 break;
538 case BPF_LD | BPF_W | BPF_LEN: 1219 case BPF_K:
539 ctx->seen |= SEEN_SKB; 1220 /* Sign-extend immediate value to destination reg */
540 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 1221 emit_a32_mov_i64(is64, dst, imm, dstk, ctx);
541 emit(ARM_LDR_I(r_A, r_skb,
542 offsetof(struct sk_buff, len)), ctx);
543 break; 1222 break;
544 case BPF_LD | BPF_MEM: 1223 }
545 /* A = scratch[k] */ 1224 break;
546 ctx->seen |= SEEN_MEM_WORD(k); 1225 /* dst = dst + src/imm */
547 emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 1226 /* dst = dst - src/imm */
1227 /* dst = dst | src/imm */
1228 /* dst = dst & src/imm */
1229 /* dst = dst ^ src/imm */
1230 /* dst = dst * src/imm */
1231 /* dst = dst << src */
1232 /* dst = dst >> src */
1233 case BPF_ALU | BPF_ADD | BPF_K:
1234 case BPF_ALU | BPF_ADD | BPF_X:
1235 case BPF_ALU | BPF_SUB | BPF_K:
1236 case BPF_ALU | BPF_SUB | BPF_X:
1237 case BPF_ALU | BPF_OR | BPF_K:
1238 case BPF_ALU | BPF_OR | BPF_X:
1239 case BPF_ALU | BPF_AND | BPF_K:
1240 case BPF_ALU | BPF_AND | BPF_X:
1241 case BPF_ALU | BPF_XOR | BPF_K:
1242 case BPF_ALU | BPF_XOR | BPF_X:
1243 case BPF_ALU | BPF_MUL | BPF_K:
1244 case BPF_ALU | BPF_MUL | BPF_X:
1245 case BPF_ALU | BPF_LSH | BPF_X:
1246 case BPF_ALU | BPF_RSH | BPF_X:
1247 case BPF_ALU | BPF_ARSH | BPF_K:
1248 case BPF_ALU | BPF_ARSH | BPF_X:
1249 case BPF_ALU64 | BPF_ADD | BPF_K:
1250 case BPF_ALU64 | BPF_ADD | BPF_X:
1251 case BPF_ALU64 | BPF_SUB | BPF_K:
1252 case BPF_ALU64 | BPF_SUB | BPF_X:
1253 case BPF_ALU64 | BPF_OR | BPF_K:
1254 case BPF_ALU64 | BPF_OR | BPF_X:
1255 case BPF_ALU64 | BPF_AND | BPF_K:
1256 case BPF_ALU64 | BPF_AND | BPF_X:
1257 case BPF_ALU64 | BPF_XOR | BPF_K:
1258 case BPF_ALU64 | BPF_XOR | BPF_X:
1259 switch (BPF_SRC(code)) {
1260 case BPF_X:
1261 emit_a32_alu_r64(is64, dst, src, dstk, sstk,
1262 ctx, BPF_OP(code));
548 break; 1263 break;
549 case BPF_LD | BPF_W | BPF_ABS: 1264 case BPF_K:
550 load_order = 2; 1265 /* Move immediate value to the temporary register
551 goto load; 1266 * and then do the ALU operation on the temporary
552 case BPF_LD | BPF_H | BPF_ABS: 1267 * register as this will sign-extend the immediate
553 load_order = 1; 1268 * value into temporary reg and then it would be
554 goto load; 1269 * safe to do the operation on it.
555 case BPF_LD | BPF_B | BPF_ABS:
556 load_order = 0;
557load:
558 emit_mov_i(r_off, k, ctx);
559load_common:
560 ctx->seen |= SEEN_DATA | SEEN_CALL;
561
562 if (load_order > 0) {
563 emit(ARM_SUB_I(r_scratch, r_skb_hl,
564 1 << load_order), ctx);
565 emit(ARM_CMP_R(r_scratch, r_off), ctx);
566 condt = ARM_COND_GE;
567 } else {
568 emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
569 condt = ARM_COND_HI;
570 }
571
572 /*
573 * test for negative offset, only if we are
574 * currently scheduled to take the fast
575 * path. this will update the flags so that
576 * the slowpath instruction are ignored if the
577 * offset is negative.
578 *
579 * for loard_order == 0 the HI condition will
580 * make loads at offset 0 take the slow path too.
581 */ 1270 */
582 _emit(condt, ARM_CMP_I(r_off, 0), ctx); 1271 emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
583 1272 emit_a32_alu_r64(is64, dst, tmp2, dstk, false,
584 _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data), 1273 ctx, BPF_OP(code));
585 ctx);
586
587 if (load_order == 0)
588 _emit(condt, ARM_LDRB_I(r_A, r_scratch, 0),
589 ctx);
590 else if (load_order == 1)
591 emit_load_be16(condt, r_A, r_scratch, ctx);
592 else if (load_order == 2)
593 emit_load_be32(condt, r_A, r_scratch, ctx);
594
595 _emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx);
596
597 /* the slowpath */
598 emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx);
599 emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
600 /* the offset is already in R1 */
601 emit_blx_r(ARM_R3, ctx);
602 /* check the result of skb_copy_bits */
603 emit(ARM_CMP_I(ARM_R1, 0), ctx);
604 emit_err_ret(ARM_COND_NE, ctx);
605 emit(ARM_MOV_R(r_A, ARM_R0), ctx);
606 break; 1274 break;
607 case BPF_LD | BPF_W | BPF_IND: 1275 }
608 load_order = 2; 1276 break;
609 goto load_ind; 1277 /* dst = dst / src(imm) */
610 case BPF_LD | BPF_H | BPF_IND: 1278 /* dst = dst % src(imm) */
611 load_order = 1; 1279 case BPF_ALU | BPF_DIV | BPF_K:
612 goto load_ind; 1280 case BPF_ALU | BPF_DIV | BPF_X:
613 case BPF_LD | BPF_B | BPF_IND: 1281 case BPF_ALU | BPF_MOD | BPF_K:
614 load_order = 0; 1282 case BPF_ALU | BPF_MOD | BPF_X:
615load_ind: 1283 rt = src_lo;
616 update_on_xread(ctx); 1284 rd = dstk ? tmp2[1] : dst_lo;
617 OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); 1285 if (dstk)
618 goto load_common; 1286 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
619 case BPF_LDX | BPF_IMM: 1287 switch (BPF_SRC(code)) {
620 ctx->seen |= SEEN_X; 1288 case BPF_X:
621 emit_mov_i(r_X, k, ctx); 1289 rt = sstk ? tmp2[0] : rt;
1290 if (sstk)
1291 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)),
1292 ctx);
622 break; 1293 break;
623 case BPF_LDX | BPF_W | BPF_LEN: 1294 case BPF_K:
624 ctx->seen |= SEEN_X | SEEN_SKB; 1295 rt = tmp2[0];
625 emit(ARM_LDR_I(r_X, r_skb, 1296 emit_a32_mov_i(rt, imm, false, ctx);
626 offsetof(struct sk_buff, len)), ctx);
627 break; 1297 break;
628 case BPF_LDX | BPF_MEM: 1298 }
629 ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); 1299 emit_udivmod(rd, rd, rt, ctx, BPF_OP(code));
630 emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 1300 if (dstk)
1301 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
1302 emit_a32_mov_i(dst_hi, 0, dstk, ctx);
1303 break;
1304 case BPF_ALU64 | BPF_DIV | BPF_K:
1305 case BPF_ALU64 | BPF_DIV | BPF_X:
1306 case BPF_ALU64 | BPF_MOD | BPF_K:
1307 case BPF_ALU64 | BPF_MOD | BPF_X:
1308 goto notyet;
1309 /* dst = dst >> imm */
1310 /* dst = dst << imm */
1311 case BPF_ALU | BPF_RSH | BPF_K:
1312 case BPF_ALU | BPF_LSH | BPF_K:
1313 if (unlikely(imm > 31))
1314 return -EINVAL;
1315 if (imm)
1316 emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code));
1317 emit_a32_mov_i(dst_hi, 0, dstk, ctx);
1318 break;
1319 /* dst = dst << imm */
1320 case BPF_ALU64 | BPF_LSH | BPF_K:
1321 if (unlikely(imm > 63))
1322 return -EINVAL;
1323 emit_a32_lsh_i64(dst, dstk, imm, ctx);
1324 break;
1325 /* dst = dst >> imm */
1326 case BPF_ALU64 | BPF_RSH | BPF_K:
1327 if (unlikely(imm > 63))
1328 return -EINVAL;
1329 emit_a32_lsr_i64(dst, dstk, imm, ctx);
1330 break;
1331 /* dst = dst << src */
1332 case BPF_ALU64 | BPF_LSH | BPF_X:
1333 emit_a32_lsh_r64(dst, src, dstk, sstk, ctx);
1334 break;
1335 /* dst = dst >> src */
1336 case BPF_ALU64 | BPF_RSH | BPF_X:
1337 emit_a32_lsr_r64(dst, src, dstk, sstk, ctx);
1338 break;
1339 /* dst = dst >> src (signed) */
1340 case BPF_ALU64 | BPF_ARSH | BPF_X:
1341 emit_a32_arsh_r64(dst, src, dstk, sstk, ctx);
1342 break;
1343 /* dst = dst >> imm (signed) */
1344 case BPF_ALU64 | BPF_ARSH | BPF_K:
1345 if (unlikely(imm > 63))
1346 return -EINVAL;
1347 emit_a32_arsh_i64(dst, dstk, imm, ctx);
1348 break;
1349 /* dst = ~dst */
1350 case BPF_ALU | BPF_NEG:
1351 emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code));
1352 emit_a32_mov_i(dst_hi, 0, dstk, ctx);
1353 break;
1354 /* dst = ~dst (64 bit) */
1355 case BPF_ALU64 | BPF_NEG:
1356 emit_a32_neg64(dst, dstk, ctx);
1357 break;
1358 /* dst = dst * src/imm */
1359 case BPF_ALU64 | BPF_MUL | BPF_X:
1360 case BPF_ALU64 | BPF_MUL | BPF_K:
1361 switch (BPF_SRC(code)) {
1362 case BPF_X:
1363 emit_a32_mul_r64(dst, src, dstk, sstk, ctx);
631 break; 1364 break;
632 case BPF_LDX | BPF_B | BPF_MSH: 1365 case BPF_K:
633 /* x = ((*(frame + k)) & 0xf) << 2; */ 1366 /* Move immediate value to the temporary register
634 ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; 1367 * and then do the multiplication on it as this
635 /* the interpreter should deal with the negative K */ 1368 * will sign-extend the immediate value into temp
636 if ((int)k < 0) 1369 * reg then it would be safe to do the operation
637 return -1; 1370 * on it.
638 /* offset in r1: we might have to take the slow path */
639 emit_mov_i(r_off, k, ctx);
640 emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
641
642 /* load in r0: common with the slowpath */
643 _emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data,
644 ARM_R1), ctx);
645 /*
646 * emit_mov_i() might generate one or two instructions,
647 * the same holds for emit_blx_r()
648 */ 1371 */
649 _emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx); 1372 emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
650 1373 emit_a32_mul_r64(dst, tmp2, dstk, false, ctx);
651 emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
652 /* r_off is r1 */
653 emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx);
654 emit_blx_r(ARM_R3, ctx);
655 /* check the return value of skb_copy_bits */
656 emit(ARM_CMP_I(ARM_R1, 0), ctx);
657 emit_err_ret(ARM_COND_NE, ctx);
658
659 emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
660 emit(ARM_LSL_I(r_X, r_X, 2), ctx);
661 break;
662 case BPF_ST:
663 ctx->seen |= SEEN_MEM_WORD(k);
664 emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
665 break;
666 case BPF_STX:
667 update_on_xread(ctx);
668 ctx->seen |= SEEN_MEM_WORD(k);
669 emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
670 break;
671 case BPF_ALU | BPF_ADD | BPF_K:
672 /* A += K */
673 OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
674 break;
675 case BPF_ALU | BPF_ADD | BPF_X:
676 update_on_xread(ctx);
677 emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
678 break;
679 case BPF_ALU | BPF_SUB | BPF_K:
680 /* A -= K */
681 OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
682 break;
683 case BPF_ALU | BPF_SUB | BPF_X:
684 update_on_xread(ctx);
685 emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
686 break;
687 case BPF_ALU | BPF_MUL | BPF_K:
688 /* A *= K */
689 emit_mov_i(r_scratch, k, ctx);
690 emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
691 break;
692 case BPF_ALU | BPF_MUL | BPF_X:
693 update_on_xread(ctx);
694 emit(ARM_MUL(r_A, r_A, r_X), ctx);
695 break; 1374 break;
696 case BPF_ALU | BPF_DIV | BPF_K: 1375 }
697 if (k == 1) 1376 break;
698 break; 1377 /* dst = htole(dst) */
699 emit_mov_i(r_scratch, k, ctx); 1378 /* dst = htobe(dst) */
700 emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV); 1379 case BPF_ALU | BPF_END | BPF_FROM_LE:
701 break; 1380 case BPF_ALU | BPF_END | BPF_FROM_BE:
702 case BPF_ALU | BPF_DIV | BPF_X: 1381 rd = dstk ? tmp[0] : dst_hi;
703 update_on_xread(ctx); 1382 rt = dstk ? tmp[1] : dst_lo;
704 emit(ARM_CMP_I(r_X, 0), ctx); 1383 if (dstk) {
705 emit_err_ret(ARM_COND_EQ, ctx); 1384 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
706 emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV); 1385 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
1386 }
1387 if (BPF_SRC(code) == BPF_FROM_LE)
1388 goto emit_bswap_uxt;
1389 switch (imm) {
1390 case 16:
1391 emit_rev16(rt, rt, ctx);
1392 goto emit_bswap_uxt;
1393 case 32:
1394 emit_rev32(rt, rt, ctx);
1395 goto emit_bswap_uxt;
1396 case 64:
1397 /* Because of the usage of ARM_LR */
1398 ctx->seen |= SEEN_CALL;
1399 emit_rev32(ARM_LR, rt, ctx);
1400 emit_rev32(rt, rd, ctx);
1401 emit(ARM_MOV_R(rd, ARM_LR), ctx);
707 break; 1402 break;
708 case BPF_ALU | BPF_MOD | BPF_K: 1403 }
709 if (k == 1) { 1404 goto exit;
710 emit_mov_i(r_A, 0, ctx); 1405emit_bswap_uxt:
711 break; 1406 switch (imm) {
712 } 1407 case 16:
713 emit_mov_i(r_scratch, k, ctx); 1408 /* zero-extend 16 bits into 64 bits */
714 emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD); 1409#if __LINUX_ARM_ARCH__ < 6
1410 emit_a32_mov_i(tmp2[1], 0xffff, false, ctx);
1411 emit(ARM_AND_R(rt, rt, tmp2[1]), ctx);
1412#else /* ARMv6+ */
1413 emit(ARM_UXTH(rt, rt), ctx);
1414#endif
1415 emit(ARM_EOR_R(rd, rd, rd), ctx);
715 break; 1416 break;
716 case BPF_ALU | BPF_MOD | BPF_X: 1417 case 32:
717 update_on_xread(ctx); 1418 /* zero-extend 32 bits into 64 bits */
718 emit(ARM_CMP_I(r_X, 0), ctx); 1419 emit(ARM_EOR_R(rd, rd, rd), ctx);
719 emit_err_ret(ARM_COND_EQ, ctx);
720 emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD);
721 break; 1420 break;
722 case BPF_ALU | BPF_OR | BPF_K: 1421 case 64:
723 /* A |= K */ 1422 /* nop */
724 OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
725 break; 1423 break;
726 case BPF_ALU | BPF_OR | BPF_X: 1424 }
727 update_on_xread(ctx); 1425exit:
728 emit(ARM_ORR_R(r_A, r_A, r_X), ctx); 1426 if (dstk) {
1427 emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
1428 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
1429 }
1430 break;
1431 /* dst = imm64 */
1432 case BPF_LD | BPF_IMM | BPF_DW:
1433 {
1434 const struct bpf_insn insn1 = insn[1];
1435 u32 hi, lo = imm;
1436
1437 hi = insn1.imm;
1438 emit_a32_mov_i(dst_lo, lo, dstk, ctx);
1439 emit_a32_mov_i(dst_hi, hi, dstk, ctx);
1440
1441 return 1;
1442 }
1443 /* LDX: dst = *(size *)(src + off) */
1444 case BPF_LDX | BPF_MEM | BPF_W:
1445 case BPF_LDX | BPF_MEM | BPF_H:
1446 case BPF_LDX | BPF_MEM | BPF_B:
1447 case BPF_LDX | BPF_MEM | BPF_DW:
1448 rn = sstk ? tmp2[1] : src_lo;
1449 if (sstk)
1450 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
1451 switch (BPF_SIZE(code)) {
1452 case BPF_W:
1453 /* Load a Word */
1454 case BPF_H:
1455 /* Load a Half-Word */
1456 case BPF_B:
1457 /* Load a Byte */
1458 emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
1459 emit_a32_mov_i(dst_hi, 0, dstk, ctx);
729 break; 1460 break;
730 case BPF_ALU | BPF_XOR | BPF_K: 1461 case BPF_DW:
731 /* A ^= K; */ 1462 /* Load a double word */
732 OP_IMM3(ARM_EOR, r_A, r_A, k, ctx); 1463 emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
1464 emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
733 break; 1465 break;
734 case BPF_ANC | SKF_AD_ALU_XOR_X: 1466 }
735 case BPF_ALU | BPF_XOR | BPF_X: 1467 break;
736 /* A ^= X */ 1468 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
737 update_on_xread(ctx); 1469 case BPF_LD | BPF_ABS | BPF_W:
738 emit(ARM_EOR_R(r_A, r_A, r_X), ctx); 1470 case BPF_LD | BPF_ABS | BPF_H:
1471 case BPF_LD | BPF_ABS | BPF_B:
1472 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
1473 case BPF_LD | BPF_IND | BPF_W:
1474 case BPF_LD | BPF_IND | BPF_H:
1475 case BPF_LD | BPF_IND | BPF_B:
1476 {
1477 const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
1478 const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
1479 /* rtn value */
1480 const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
1481 const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
1482 const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
1483 const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
1484 int size;
1485
1486 /* Setting up first argument */
1487 emit(ARM_MOV_R(r0, r4), ctx);
1488
1489 /* Setting up second argument */
1490 emit_a32_mov_i(r1, imm, false, ctx);
1491 if (BPF_MODE(code) == BPF_IND)
1492 emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
1493 false, false, BPF_ADD);
1494
1495 /* Setting up third argument */
1496 switch (BPF_SIZE(code)) {
1497 case BPF_W:
1498 size = 4;
739 break; 1499 break;
740 case BPF_ALU | BPF_AND | BPF_K: 1500 case BPF_H:
741 /* A &= K */ 1501 size = 2;
742 OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
743 break; 1502 break;
744 case BPF_ALU | BPF_AND | BPF_X: 1503 case BPF_B:
745 update_on_xread(ctx); 1504 size = 1;
746 emit(ARM_AND_R(r_A, r_A, r_X), ctx);
747 break; 1505 break;
748 case BPF_ALU | BPF_LSH | BPF_K: 1506 default:
749 if (unlikely(k > 31)) 1507 return -EINVAL;
750 return -1; 1508 }
751 emit(ARM_LSL_I(r_A, r_A, k), ctx); 1509 emit_a32_mov_i(r2, size, false, ctx);
1510
1511 /* Setting up fourth argument */
1512 emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
1513
1514 /* Setting up function pointer to call */
1515 emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
1516 emit_blx_r(r6, ctx);
1517
1518 emit(ARM_EOR_R(r1, r1, r1), ctx);
1519 /* Check if return address is NULL or not.
1520 * if NULL then jump to epilogue
1521 * else continue to load the value from retn address
1522 */
1523 emit(ARM_CMP_I(r0, 0), ctx);
1524 jmp_offset = epilogue_offset(ctx);
1525 check_imm24(jmp_offset);
1526 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
1527
1528 /* Load value from the address */
1529 switch (BPF_SIZE(code)) {
1530 case BPF_W:
1531 emit(ARM_LDR_I(r0, r0, 0), ctx);
1532 emit_rev32(r0, r0, ctx);
752 break; 1533 break;
753 case BPF_ALU | BPF_LSH | BPF_X: 1534 case BPF_H:
754 update_on_xread(ctx); 1535 emit(ARM_LDRH_I(r0, r0, 0), ctx);
755 emit(ARM_LSL_R(r_A, r_A, r_X), ctx); 1536 emit_rev16(r0, r0, ctx);
756 break; 1537 break;
757 case BPF_ALU | BPF_RSH | BPF_K: 1538 case BPF_B:
758 if (unlikely(k > 31)) 1539 emit(ARM_LDRB_I(r0, r0, 0), ctx);
759 return -1; 1540 /* No need to reverse */
760 if (k)
761 emit(ARM_LSR_I(r_A, r_A, k), ctx);
762 break; 1541 break;
763 case BPF_ALU | BPF_RSH | BPF_X: 1542 }
764 update_on_xread(ctx); 1543 break;
765 emit(ARM_LSR_R(r_A, r_A, r_X), ctx); 1544 }
1545 /* ST: *(size *)(dst + off) = imm */
1546 case BPF_ST | BPF_MEM | BPF_W:
1547 case BPF_ST | BPF_MEM | BPF_H:
1548 case BPF_ST | BPF_MEM | BPF_B:
1549 case BPF_ST | BPF_MEM | BPF_DW:
1550 switch (BPF_SIZE(code)) {
1551 case BPF_DW:
1552 /* Sign-extend immediate value into temp reg */
1553 emit_a32_mov_i64(true, tmp2, imm, false, ctx);
1554 emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W);
1555 emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W);
766 break; 1556 break;
767 case BPF_ALU | BPF_NEG: 1557 case BPF_W:
768 /* A = -A */ 1558 case BPF_H:
769 emit(ARM_RSB_I(r_A, r_A, 0), ctx); 1559 case BPF_B:
1560 emit_a32_mov_i(tmp2[1], imm, false, ctx);
1561 emit_str_r(dst_lo, tmp2[1], dstk, off, ctx,
1562 BPF_SIZE(code));
770 break; 1563 break;
771 case BPF_JMP | BPF_JA: 1564 }
772 /* pc += K */ 1565 break;
773 emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); 1566 /* STX XADD: lock *(u32 *)(dst + off) += src */
1567 case BPF_STX | BPF_XADD | BPF_W:
1568 /* STX XADD: lock *(u64 *)(dst + off) += src */
1569 case BPF_STX | BPF_XADD | BPF_DW:
1570 goto notyet;
1571 /* STX: *(size *)(dst + off) = src */
1572 case BPF_STX | BPF_MEM | BPF_W:
1573 case BPF_STX | BPF_MEM | BPF_H:
1574 case BPF_STX | BPF_MEM | BPF_B:
1575 case BPF_STX | BPF_MEM | BPF_DW:
1576 {
1577 u8 sz = BPF_SIZE(code);
1578
1579 rn = sstk ? tmp2[1] : src_lo;
1580 rm = sstk ? tmp2[0] : src_hi;
1581 if (sstk) {
1582 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
1583 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
1584 }
1585
1586 /* Store the value */
1587 if (BPF_SIZE(code) == BPF_DW) {
1588 emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W);
1589 emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W);
1590 } else {
1591 emit_str_r(dst_lo, rn, dstk, off, ctx, sz);
1592 }
1593 break;
1594 }
1595 /* PC += off if dst == src */
1596 /* PC += off if dst > src */
1597 /* PC += off if dst >= src */
1598 /* PC += off if dst < src */
1599 /* PC += off if dst <= src */
1600 /* PC += off if dst != src */
1601 /* PC += off if dst > src (signed) */
1602 /* PC += off if dst >= src (signed) */
1603 /* PC += off if dst < src (signed) */
1604 /* PC += off if dst <= src (signed) */
1605 /* PC += off if dst & src */
1606 case BPF_JMP | BPF_JEQ | BPF_X:
1607 case BPF_JMP | BPF_JGT | BPF_X:
1608 case BPF_JMP | BPF_JGE | BPF_X:
1609 case BPF_JMP | BPF_JNE | BPF_X:
1610 case BPF_JMP | BPF_JSGT | BPF_X:
1611 case BPF_JMP | BPF_JSGE | BPF_X:
1612 case BPF_JMP | BPF_JSET | BPF_X:
1613 case BPF_JMP | BPF_JLE | BPF_X:
1614 case BPF_JMP | BPF_JLT | BPF_X:
1615 case BPF_JMP | BPF_JSLT | BPF_X:
1616 case BPF_JMP | BPF_JSLE | BPF_X:
1617 /* Setup source registers */
1618 rm = sstk ? tmp2[0] : src_hi;
1619 rn = sstk ? tmp2[1] : src_lo;
1620 if (sstk) {
1621 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
1622 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
1623 }
1624 goto go_jmp;
1625 /* PC += off if dst == imm */
1626 /* PC += off if dst > imm */
1627 /* PC += off if dst >= imm */
1628 /* PC += off if dst < imm */
1629 /* PC += off if dst <= imm */
1630 /* PC += off if dst != imm */
1631 /* PC += off if dst > imm (signed) */
1632 /* PC += off if dst >= imm (signed) */
1633 /* PC += off if dst < imm (signed) */
1634 /* PC += off if dst <= imm (signed) */
1635 /* PC += off if dst & imm */
1636 case BPF_JMP | BPF_JEQ | BPF_K:
1637 case BPF_JMP | BPF_JGT | BPF_K:
1638 case BPF_JMP | BPF_JGE | BPF_K:
1639 case BPF_JMP | BPF_JNE | BPF_K:
1640 case BPF_JMP | BPF_JSGT | BPF_K:
1641 case BPF_JMP | BPF_JSGE | BPF_K:
1642 case BPF_JMP | BPF_JSET | BPF_K:
1643 case BPF_JMP | BPF_JLT | BPF_K:
1644 case BPF_JMP | BPF_JLE | BPF_K:
1645 case BPF_JMP | BPF_JSLT | BPF_K:
1646 case BPF_JMP | BPF_JSLE | BPF_K:
1647 if (off == 0)
774 break; 1648 break;
775 case BPF_JMP | BPF_JEQ | BPF_K: 1649 rm = tmp2[0];
776 /* pc += (A == K) ? pc->jt : pc->jf */ 1650 rn = tmp2[1];
777 condt = ARM_COND_EQ; 1651 /* Sign-extend immediate value */
778 goto cmp_imm; 1652 emit_a32_mov_i64(true, tmp2, imm, false, ctx);
779 case BPF_JMP | BPF_JGT | BPF_K: 1653go_jmp:
780 /* pc += (A > K) ? pc->jt : pc->jf */ 1654 /* Setup destination register */
781 condt = ARM_COND_HI; 1655 rd = dstk ? tmp[0] : dst_hi;
782 goto cmp_imm; 1656 rt = dstk ? tmp[1] : dst_lo;
783 case BPF_JMP | BPF_JGE | BPF_K: 1657 if (dstk) {
784 /* pc += (A >= K) ? pc->jt : pc->jf */ 1658 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
785 condt = ARM_COND_HS; 1659 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
786cmp_imm: 1660 }
787 imm12 = imm8m(k); 1661
788 if (imm12 < 0) { 1662 /* Check for the condition */
789 emit_mov_i_no8m(r_scratch, k, ctx); 1663 emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code));
790 emit(ARM_CMP_R(r_A, r_scratch), ctx); 1664
791 } else { 1665 /* Setup JUMP instruction */
792 emit(ARM_CMP_I(r_A, imm12), ctx); 1666 jmp_offset = bpf2a32_offset(i+off, i, ctx);
793 } 1667 switch (BPF_OP(code)) {
794cond_jump: 1668 case BPF_JNE:
795 if (inst->jt) 1669 case BPF_JSET:
796 _emit(condt, ARM_B(b_imm(i + inst->jt + 1, 1670 _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx);
797 ctx)), ctx);
798 if (inst->jf)
799 _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
800 ctx)), ctx);
801 break; 1671 break;
802 case BPF_JMP | BPF_JEQ | BPF_X: 1672 case BPF_JEQ:
803 /* pc += (A == X) ? pc->jt : pc->jf */ 1673 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
804 condt = ARM_COND_EQ;
805 goto cmp_x;
806 case BPF_JMP | BPF_JGT | BPF_X:
807 /* pc += (A > X) ? pc->jt : pc->jf */
808 condt = ARM_COND_HI;
809 goto cmp_x;
810 case BPF_JMP | BPF_JGE | BPF_X:
811 /* pc += (A >= X) ? pc->jt : pc->jf */
812 condt = ARM_COND_CS;
813cmp_x:
814 update_on_xread(ctx);
815 emit(ARM_CMP_R(r_A, r_X), ctx);
816 goto cond_jump;
817 case BPF_JMP | BPF_JSET | BPF_K:
818 /* pc += (A & K) ? pc->jt : pc->jf */
819 condt = ARM_COND_NE;
820 /* not set iff all zeroes iff Z==1 iff EQ */
821
822 imm12 = imm8m(k);
823 if (imm12 < 0) {
824 emit_mov_i_no8m(r_scratch, k, ctx);
825 emit(ARM_TST_R(r_A, r_scratch), ctx);
826 } else {
827 emit(ARM_TST_I(r_A, imm12), ctx);
828 }
829 goto cond_jump;
830 case BPF_JMP | BPF_JSET | BPF_X:
831 /* pc += (A & X) ? pc->jt : pc->jf */
832 update_on_xread(ctx);
833 condt = ARM_COND_NE;
834 emit(ARM_TST_R(r_A, r_X), ctx);
835 goto cond_jump;
836 case BPF_RET | BPF_A:
837 emit(ARM_MOV_R(ARM_R0, r_A), ctx);
838 goto b_epilogue;
839 case BPF_RET | BPF_K:
840 if ((k == 0) && (ctx->ret0_fp_idx < 0))
841 ctx->ret0_fp_idx = i;
842 emit_mov_i(ARM_R0, k, ctx);
843b_epilogue:
844 if (i != ctx->skf->len - 1)
845 emit(ARM_B(b_imm(prog->len, ctx)), ctx);
846 break; 1674 break;
847 case BPF_MISC | BPF_TAX: 1675 case BPF_JGT:
848 /* X = A */ 1676 _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
849 ctx->seen |= SEEN_X;
850 emit(ARM_MOV_R(r_X, r_A), ctx);
851 break; 1677 break;
852 case BPF_MISC | BPF_TXA: 1678 case BPF_JGE:
853 /* A = X */ 1679 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
854 update_on_xread(ctx);
855 emit(ARM_MOV_R(r_A, r_X), ctx);
856 break; 1680 break;
857 case BPF_ANC | SKF_AD_PROTOCOL: 1681 case BPF_JSGT:
858 /* A = ntohs(skb->protocol) */ 1682 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
859 ctx->seen |= SEEN_SKB;
860 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
861 protocol) != 2);
862 off = offsetof(struct sk_buff, protocol);
863 emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
864 emit_swap16(r_A, r_scratch, ctx);
865 break; 1683 break;
866 case BPF_ANC | SKF_AD_CPU: 1684 case BPF_JSGE:
867 /* r_scratch = current_thread_info() */ 1685 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
868 OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
869 /* A = current_thread_info()->cpu */
870 BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);
871 off = offsetof(struct thread_info, cpu);
872 emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
873 break; 1686 break;
874 case BPF_ANC | SKF_AD_IFINDEX: 1687 case BPF_JLE:
875 case BPF_ANC | SKF_AD_HATYPE: 1688 _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx);
876 /* A = skb->dev->ifindex */
877 /* A = skb->dev->type */
878 ctx->seen |= SEEN_SKB;
879 off = offsetof(struct sk_buff, dev);
880 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
881
882 emit(ARM_CMP_I(r_scratch, 0), ctx);
883 emit_err_ret(ARM_COND_EQ, ctx);
884
885 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
886 ifindex) != 4);
887 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
888 type) != 2);
889
890 if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
891 off = offsetof(struct net_device, ifindex);
892 emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
893 } else {
894 /*
895 * offset of field "type" in "struct
896 * net_device" is above what can be
897 * used in the ldrh rd, [rn, #imm]
898 * instruction, so load the offset in
899 * a register and use ldrh rd, [rn, rm]
900 */
901 off = offsetof(struct net_device, type);
902 emit_mov_i(ARM_R3, off, ctx);
903 emit(ARM_LDRH_R(r_A, r_scratch, ARM_R3), ctx);
904 }
905 break; 1689 break;
906 case BPF_ANC | SKF_AD_MARK: 1690 case BPF_JLT:
907 ctx->seen |= SEEN_SKB; 1691 _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx);
908 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
909 off = offsetof(struct sk_buff, mark);
910 emit(ARM_LDR_I(r_A, r_skb, off), ctx);
911 break; 1692 break;
912 case BPF_ANC | SKF_AD_RXHASH: 1693 case BPF_JSLT:
913 ctx->seen |= SEEN_SKB; 1694 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
914 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
915 off = offsetof(struct sk_buff, hash);
916 emit(ARM_LDR_I(r_A, r_skb, off), ctx);
917 break; 1695 break;
918 case BPF_ANC | SKF_AD_VLAN_TAG: 1696 case BPF_JSLE:
919 case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: 1697 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
920 ctx->seen |= SEEN_SKB;
921 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
922 off = offsetof(struct sk_buff, vlan_tci);
923 emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
924 if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
925 OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx);
926 else {
927 OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx);
928 OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx);
929 }
930 break; 1698 break;
931 case BPF_ANC | SKF_AD_PKTTYPE: 1699 }
932 ctx->seen |= SEEN_SKB; 1700 break;
933 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1701 /* JMP OFF */
934 __pkt_type_offset[0]) != 1); 1702 case BPF_JMP | BPF_JA:
935 off = PKT_TYPE_OFFSET(); 1703 {
936 emit(ARM_LDRB_I(r_A, r_skb, off), ctx); 1704 if (off == 0)
937 emit(ARM_AND_I(r_A, r_A, PKT_TYPE_MAX), ctx);
938#ifdef __BIG_ENDIAN_BITFIELD
939 emit(ARM_LSR_I(r_A, r_A, 5), ctx);
940#endif
941 break; 1705 break;
942 case BPF_ANC | SKF_AD_QUEUE: 1706 jmp_offset = bpf2a32_offset(i+off, i, ctx);
943 ctx->seen |= SEEN_SKB; 1707 check_imm24(jmp_offset);
944 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1708 emit(ARM_B(jmp_offset), ctx);
945 queue_mapping) != 2); 1709 break;
946 BUILD_BUG_ON(offsetof(struct sk_buff, 1710 }
947 queue_mapping) > 0xff); 1711 /* tail call */
948 off = offsetof(struct sk_buff, queue_mapping); 1712 case BPF_JMP | BPF_TAIL_CALL:
949 emit(ARM_LDRH_I(r_A, r_skb, off), ctx); 1713 if (emit_bpf_tail_call(ctx))
1714 return -EFAULT;
1715 break;
1716 /* function call */
1717 case BPF_JMP | BPF_CALL:
1718 {
1719 const u8 *r0 = bpf2a32[BPF_REG_0];
1720 const u8 *r1 = bpf2a32[BPF_REG_1];
1721 const u8 *r2 = bpf2a32[BPF_REG_2];
1722 const u8 *r3 = bpf2a32[BPF_REG_3];
1723 const u8 *r4 = bpf2a32[BPF_REG_4];
1724 const u8 *r5 = bpf2a32[BPF_REG_5];
1725 const u32 func = (u32)__bpf_call_base + (u32)imm;
1726
1727 emit_a32_mov_r64(true, r0, r1, false, false, ctx);
1728 emit_a32_mov_r64(true, r1, r2, false, true, ctx);
1729 emit_push_r64(r5, 0, ctx);
1730 emit_push_r64(r4, 8, ctx);
1731 emit_push_r64(r3, 16, ctx);
1732
1733 emit_a32_mov_i(tmp[1], func, false, ctx);
1734 emit_blx_r(tmp[1], ctx);
1735
1736 emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean
1737 break;
1738 }
1739 /* function return */
1740 case BPF_JMP | BPF_EXIT:
1741 /* Optimization: when last instruction is EXIT
1742 * simply fallthrough to epilogue.
1743 */
1744 if (i == ctx->prog->len - 1)
950 break; 1745 break;
951 case BPF_ANC | SKF_AD_PAY_OFFSET: 1746 jmp_offset = epilogue_offset(ctx);
952 ctx->seen |= SEEN_SKB | SEEN_CALL; 1747 check_imm24(jmp_offset);
1748 emit(ARM_B(jmp_offset), ctx);
1749 break;
1750notyet:
1751 pr_info_once("*** NOT YET: opcode %02x ***\n", code);
1752 return -EFAULT;
1753 default:
1754 pr_err_once("unknown opcode %02x\n", code);
1755 return -EINVAL;
1756 }
953 1757
954 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 1758 if (ctx->flags & FLAG_IMM_OVERFLOW)
955 emit_mov_i(ARM_R3, (unsigned int)skb_get_poff, ctx); 1759 /*
956 emit_blx_r(ARM_R3, ctx); 1760 * this instruction generated an overflow when
957 emit(ARM_MOV_R(r_A, ARM_R0), ctx); 1761 * trying to access the literal pool, so
958 break; 1762 * delegate this filter to the kernel interpreter.
959 case BPF_LDX | BPF_W | BPF_ABS: 1763 */
960 /* 1764 return -1;
961 * load a 32bit word from struct seccomp_data. 1765 return 0;
962 * seccomp_check_filter() will already have checked 1766}
963 * that k is 32bit aligned and lies within the 1767
964 * struct seccomp_data. 1768static int build_body(struct jit_ctx *ctx)
965 */ 1769{
966 ctx->seen |= SEEN_SKB; 1770 const struct bpf_prog *prog = ctx->prog;
967 emit(ARM_LDR_I(r_A, r_skb, k), ctx); 1771 unsigned int i;
968 break; 1772
969 default: 1773 for (i = 0; i < prog->len; i++) {
970 return -1; 1774 const struct bpf_insn *insn = &(prog->insnsi[i]);
1775 int ret;
1776
1777 ret = build_insn(insn, ctx);
1778
1779 /* It's used with loading the 64 bit immediate value. */
1780 if (ret > 0) {
1781 i++;
1782 if (ctx->target == NULL)
1783 ctx->offsets[i] = ctx->idx;
1784 continue;
971 } 1785 }
972 1786
973 if (ctx->flags & FLAG_IMM_OVERFLOW) 1787 if (ctx->target == NULL)
974 /* 1788 ctx->offsets[i] = ctx->idx;
975 * this instruction generated an overflow when 1789
976 * trying to access the literal pool, so 1790 /* If unsuccesfull, return with error code */
977 * delegate this filter to the kernel interpreter. 1791 if (ret)
978 */ 1792 return ret;
979 return -1;
980 } 1793 }
1794 return 0;
1795}
981 1796
982 /* compute offsets only during the first pass */ 1797static int validate_code(struct jit_ctx *ctx)
983 if (ctx->target == NULL) 1798{
984 ctx->offsets[i] = ctx->idx * 4; 1799 int i;
1800
1801 for (i = 0; i < ctx->idx; i++) {
1802 if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF))
1803 return -1;
1804 }
985 1805
986 return 0; 1806 return 0;
987} 1807}
988 1808
1809void bpf_jit_compile(struct bpf_prog *prog)
1810{
1811 /* Nothing to do here. We support Internal BPF. */
1812}
989 1813
990void bpf_jit_compile(struct bpf_prog *fp) 1814struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
991{ 1815{
1816 struct bpf_prog *tmp, *orig_prog = prog;
992 struct bpf_binary_header *header; 1817 struct bpf_binary_header *header;
1818 bool tmp_blinded = false;
993 struct jit_ctx ctx; 1819 struct jit_ctx ctx;
994 unsigned tmp_idx; 1820 unsigned int tmp_idx;
995 unsigned alloc_size; 1821 unsigned int image_size;
996 u8 *target_ptr; 1822 u8 *image_ptr;
997 1823
1824 /* If BPF JIT was not enabled then we must fall back to
1825 * the interpreter.
1826 */
998 if (!bpf_jit_enable) 1827 if (!bpf_jit_enable)
999 return; 1828 return orig_prog;
1000 1829
1001 memset(&ctx, 0, sizeof(ctx)); 1830 /* If constant blinding was enabled and we failed during blinding
1002 ctx.skf = fp; 1831 * then we must fall back to the interpreter. Otherwise, we save
1003 ctx.ret0_fp_idx = -1; 1832 * the new JITed code.
1833 */
1834 tmp = bpf_jit_blind_constants(prog);
1004 1835
1005 ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL); 1836 if (IS_ERR(tmp))
1006 if (ctx.offsets == NULL) 1837 return orig_prog;
1007 return; 1838 if (tmp != prog) {
1839 tmp_blinded = true;
1840 prog = tmp;
1841 }
1008 1842
1009 /* fake pass to fill in the ctx->seen */ 1843 memset(&ctx, 0, sizeof(ctx));
1010 if (unlikely(build_body(&ctx))) 1844 ctx.prog = prog;
1845
1846 /* Not able to allocate memory for offsets[] , then
1847 * we must fall back to the interpreter
1848 */
1849 ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
1850 if (ctx.offsets == NULL) {
1851 prog = orig_prog;
1011 goto out; 1852 goto out;
1853 }
1854
1855 /* 1) fake pass to find in the length of the JITed code,
1856 * to compute ctx->offsets and other context variables
1857 * needed to compute final JITed code.
1858 * Also, calculate random starting pointer/start of JITed code
1859 * which is prefixed by random number of fault instructions.
1860 *
1861 * If the first pass fails then there is no chance of it
1862 * being successful in the second pass, so just fall back
1863 * to the interpreter.
1864 */
1865 if (build_body(&ctx)) {
1866 prog = orig_prog;
1867 goto out_off;
1868 }
1012 1869
1013 tmp_idx = ctx.idx; 1870 tmp_idx = ctx.idx;
1014 build_prologue(&ctx); 1871 build_prologue(&ctx);
1015 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 1872 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
1016 1873
1874 ctx.epilogue_offset = ctx.idx;
1875
1017#if __LINUX_ARM_ARCH__ < 7 1876#if __LINUX_ARM_ARCH__ < 7
1018 tmp_idx = ctx.idx; 1877 tmp_idx = ctx.idx;
1019 build_epilogue(&ctx); 1878 build_epilogue(&ctx);
@@ -1021,64 +1880,83 @@ void bpf_jit_compile(struct bpf_prog *fp)
1021 1880
1022 ctx.idx += ctx.imm_count; 1881 ctx.idx += ctx.imm_count;
1023 if (ctx.imm_count) { 1882 if (ctx.imm_count) {
1024 ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL); 1883 ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL);
1025 if (ctx.imms == NULL) 1884 if (ctx.imms == NULL) {
1026 goto out; 1885 prog = orig_prog;
1886 goto out_off;
1887 }
1027 } 1888 }
1028#else 1889#else
1029 /* there's nothing after the epilogue on ARMv7 */ 1890 /* there's nothing about the epilogue on ARMv7 */
1030 build_epilogue(&ctx); 1891 build_epilogue(&ctx);
1031#endif 1892#endif
1032 alloc_size = 4 * ctx.idx; 1893 /* Now we can get the actual image size of the JITed arm code.
1033 header = bpf_jit_binary_alloc(alloc_size, &target_ptr, 1894 * Currently, we are not considering the THUMB-2 instructions
1034 4, jit_fill_hole); 1895 * for jit, although it can decrease the size of the image.
1035 if (header == NULL) 1896 *
1036 goto out; 1897 * As each arm instruction is of length 32bit, we are translating
1898 * number of JITed intructions into the size required to store these
1899 * JITed code.
1900 */
1901 image_size = sizeof(u32) * ctx.idx;
1037 1902
1038 ctx.target = (u32 *) target_ptr; 1903 /* Now we know the size of the structure to make */
1904 header = bpf_jit_binary_alloc(image_size, &image_ptr,
1905 sizeof(u32), jit_fill_hole);
1906 /* Not able to allocate memory for the structure then
1907 * we must fall back to the interpretation
1908 */
1909 if (header == NULL) {
1910 prog = orig_prog;
1911 goto out_imms;
1912 }
1913
1914 /* 2.) Actual pass to generate final JIT code */
1915 ctx.target = (u32 *) image_ptr;
1039 ctx.idx = 0; 1916 ctx.idx = 0;
1040 1917
1041 build_prologue(&ctx); 1918 build_prologue(&ctx);
1919
1920 /* If building the body of the JITed code fails somehow,
1921 * we fall back to the interpretation.
1922 */
1042 if (build_body(&ctx) < 0) { 1923 if (build_body(&ctx) < 0) {
1043#if __LINUX_ARM_ARCH__ < 7 1924 image_ptr = NULL;
1044 if (ctx.imm_count)
1045 kfree(ctx.imms);
1046#endif
1047 bpf_jit_binary_free(header); 1925 bpf_jit_binary_free(header);
1048 goto out; 1926 prog = orig_prog;
1927 goto out_imms;
1049 } 1928 }
1050 build_epilogue(&ctx); 1929 build_epilogue(&ctx);
1051 1930
1931 /* 3.) Extra pass to validate JITed Code */
1932 if (validate_code(&ctx)) {
1933 image_ptr = NULL;
1934 bpf_jit_binary_free(header);
1935 prog = orig_prog;
1936 goto out_imms;
1937 }
1052 flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); 1938 flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
1053 1939
1054#if __LINUX_ARM_ARCH__ < 7
1055 if (ctx.imm_count)
1056 kfree(ctx.imms);
1057#endif
1058
1059 if (bpf_jit_enable > 1) 1940 if (bpf_jit_enable > 1)
1060 /* there are 2 passes here */ 1941 /* there are 2 passes here */
1061 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); 1942 bpf_jit_dump(prog->len, image_size, 2, ctx.target);
1062 1943
1063 set_memory_ro((unsigned long)header, header->pages); 1944 set_memory_ro((unsigned long)header, header->pages);
1064 fp->bpf_func = (void *)ctx.target; 1945 prog->bpf_func = (void *)ctx.target;
1065 fp->jited = 1; 1946 prog->jited = 1;
1066out: 1947 prog->jited_len = image_size;
1948
1949out_imms:
1950#if __LINUX_ARM_ARCH__ < 7
1951 if (ctx.imm_count)
1952 kfree(ctx.imms);
1953#endif
1954out_off:
1067 kfree(ctx.offsets); 1955 kfree(ctx.offsets);
1068 return; 1956out:
1957 if (tmp_blinded)
1958 bpf_jit_prog_release_other(prog, prog == orig_prog ?
1959 tmp : orig_prog);
1960 return prog;
1069} 1961}
1070 1962
1071void bpf_jit_free(struct bpf_prog *fp)
1072{
1073 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
1074 struct bpf_binary_header *header = (void *)addr;
1075
1076 if (!fp->jited)
1077 goto free_filter;
1078
1079 set_memory_rw(addr, header->pages);
1080 bpf_jit_binary_free(header);
1081
1082free_filter:
1083 bpf_prog_unlock_free(fp);
1084}
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index c46fca2972f7..d5cf5f6208aa 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -11,6 +11,7 @@
11#ifndef PFILTER_OPCODES_ARM_H 11#ifndef PFILTER_OPCODES_ARM_H
12#define PFILTER_OPCODES_ARM_H 12#define PFILTER_OPCODES_ARM_H
13 13
14/* ARM 32bit Registers */
14#define ARM_R0 0 15#define ARM_R0 0
15#define ARM_R1 1 16#define ARM_R1 1
16#define ARM_R2 2 17#define ARM_R2 2
@@ -22,38 +23,43 @@
22#define ARM_R8 8 23#define ARM_R8 8
23#define ARM_R9 9 24#define ARM_R9 9
24#define ARM_R10 10 25#define ARM_R10 10
25#define ARM_FP 11 26#define ARM_FP 11 /* Frame Pointer */
26#define ARM_IP 12 27#define ARM_IP 12 /* Intra-procedure scratch register */
27#define ARM_SP 13 28#define ARM_SP 13 /* Stack pointer: as load/store base reg */
28#define ARM_LR 14 29#define ARM_LR 14 /* Link Register */
29#define ARM_PC 15 30#define ARM_PC 15 /* Program counter */
30 31
31#define ARM_COND_EQ 0x0 32#define ARM_COND_EQ 0x0 /* == */
32#define ARM_COND_NE 0x1 33#define ARM_COND_NE 0x1 /* != */
33#define ARM_COND_CS 0x2 34#define ARM_COND_CS 0x2 /* unsigned >= */
34#define ARM_COND_HS ARM_COND_CS 35#define ARM_COND_HS ARM_COND_CS
35#define ARM_COND_CC 0x3 36#define ARM_COND_CC 0x3 /* unsigned < */
36#define ARM_COND_LO ARM_COND_CC 37#define ARM_COND_LO ARM_COND_CC
37#define ARM_COND_MI 0x4 38#define ARM_COND_MI 0x4 /* < 0 */
38#define ARM_COND_PL 0x5 39#define ARM_COND_PL 0x5 /* >= 0 */
39#define ARM_COND_VS 0x6 40#define ARM_COND_VS 0x6 /* Signed Overflow */
40#define ARM_COND_VC 0x7 41#define ARM_COND_VC 0x7 /* No Signed Overflow */
41#define ARM_COND_HI 0x8 42#define ARM_COND_HI 0x8 /* unsigned > */
42#define ARM_COND_LS 0x9 43#define ARM_COND_LS 0x9 /* unsigned <= */
43#define ARM_COND_GE 0xa 44#define ARM_COND_GE 0xa /* Signed >= */
44#define ARM_COND_LT 0xb 45#define ARM_COND_LT 0xb /* Signed < */
45#define ARM_COND_GT 0xc 46#define ARM_COND_GT 0xc /* Signed > */
46#define ARM_COND_LE 0xd 47#define ARM_COND_LE 0xd /* Signed <= */
47#define ARM_COND_AL 0xe 48#define ARM_COND_AL 0xe /* None */
48 49
49/* register shift types */ 50/* register shift types */
50#define SRTYPE_LSL 0 51#define SRTYPE_LSL 0
51#define SRTYPE_LSR 1 52#define SRTYPE_LSR 1
52#define SRTYPE_ASR 2 53#define SRTYPE_ASR 2
53#define SRTYPE_ROR 3 54#define SRTYPE_ROR 3
55#define SRTYPE_ASL (SRTYPE_LSL)
54 56
55#define ARM_INST_ADD_R 0x00800000 57#define ARM_INST_ADD_R 0x00800000
58#define ARM_INST_ADDS_R 0x00900000
59#define ARM_INST_ADC_R 0x00a00000
60#define ARM_INST_ADC_I 0x02a00000
56#define ARM_INST_ADD_I 0x02800000 61#define ARM_INST_ADD_I 0x02800000
62#define ARM_INST_ADDS_I 0x02900000
57 63
58#define ARM_INST_AND_R 0x00000000 64#define ARM_INST_AND_R 0x00000000
59#define ARM_INST_AND_I 0x02000000 65#define ARM_INST_AND_I 0x02000000
@@ -76,8 +82,10 @@
76#define ARM_INST_LDRH_I 0x01d000b0 82#define ARM_INST_LDRH_I 0x01d000b0
77#define ARM_INST_LDRH_R 0x019000b0 83#define ARM_INST_LDRH_R 0x019000b0
78#define ARM_INST_LDR_I 0x05900000 84#define ARM_INST_LDR_I 0x05900000
85#define ARM_INST_LDR_R 0x07900000
79 86
80#define ARM_INST_LDM 0x08900000 87#define ARM_INST_LDM 0x08900000
88#define ARM_INST_LDM_IA 0x08b00000
81 89
82#define ARM_INST_LSL_I 0x01a00000 90#define ARM_INST_LSL_I 0x01a00000
83#define ARM_INST_LSL_R 0x01a00010 91#define ARM_INST_LSL_R 0x01a00010
@@ -86,6 +94,7 @@
86#define ARM_INST_LSR_R 0x01a00030 94#define ARM_INST_LSR_R 0x01a00030
87 95
88#define ARM_INST_MOV_R 0x01a00000 96#define ARM_INST_MOV_R 0x01a00000
97#define ARM_INST_MOVS_R 0x01b00000
89#define ARM_INST_MOV_I 0x03a00000 98#define ARM_INST_MOV_I 0x03a00000
90#define ARM_INST_MOVW 0x03000000 99#define ARM_INST_MOVW 0x03000000
91#define ARM_INST_MOVT 0x03400000 100#define ARM_INST_MOVT 0x03400000
@@ -96,17 +105,28 @@
96#define ARM_INST_PUSH 0x092d0000 105#define ARM_INST_PUSH 0x092d0000
97 106
98#define ARM_INST_ORR_R 0x01800000 107#define ARM_INST_ORR_R 0x01800000
108#define ARM_INST_ORRS_R 0x01900000
99#define ARM_INST_ORR_I 0x03800000 109#define ARM_INST_ORR_I 0x03800000
100 110
101#define ARM_INST_REV 0x06bf0f30 111#define ARM_INST_REV 0x06bf0f30
102#define ARM_INST_REV16 0x06bf0fb0 112#define ARM_INST_REV16 0x06bf0fb0
103 113
104#define ARM_INST_RSB_I 0x02600000 114#define ARM_INST_RSB_I 0x02600000
115#define ARM_INST_RSBS_I 0x02700000
116#define ARM_INST_RSC_I 0x02e00000
105 117
106#define ARM_INST_SUB_R 0x00400000 118#define ARM_INST_SUB_R 0x00400000
119#define ARM_INST_SUBS_R 0x00500000
120#define ARM_INST_RSB_R 0x00600000
107#define ARM_INST_SUB_I 0x02400000 121#define ARM_INST_SUB_I 0x02400000
122#define ARM_INST_SUBS_I 0x02500000
123#define ARM_INST_SBC_I 0x02c00000
124#define ARM_INST_SBC_R 0x00c00000
125#define ARM_INST_SBCS_R 0x00d00000
108 126
109#define ARM_INST_STR_I 0x05800000 127#define ARM_INST_STR_I 0x05800000
128#define ARM_INST_STRB_I 0x05c00000
129#define ARM_INST_STRH_I 0x01c000b0
110 130
111#define ARM_INST_TST_R 0x01100000 131#define ARM_INST_TST_R 0x01100000
112#define ARM_INST_TST_I 0x03100000 132#define ARM_INST_TST_I 0x03100000
@@ -117,6 +137,8 @@
117 137
118#define ARM_INST_MLS 0x00600090 138#define ARM_INST_MLS 0x00600090
119 139
140#define ARM_INST_UXTH 0x06ff0070
141
120/* 142/*
121 * Use a suitable undefined instruction to use for ARM/Thumb2 faulting. 143 * Use a suitable undefined instruction to use for ARM/Thumb2 faulting.
122 * We need to be careful not to conflict with those used by other modules 144 * We need to be careful not to conflict with those used by other modules
@@ -135,9 +157,15 @@
135#define _AL3_R(op, rd, rn, rm) ((op ## _R) | (rd) << 12 | (rn) << 16 | (rm)) 157#define _AL3_R(op, rd, rn, rm) ((op ## _R) | (rd) << 12 | (rn) << 16 | (rm))
136/* immediate */ 158/* immediate */
137#define _AL3_I(op, rd, rn, imm) ((op ## _I) | (rd) << 12 | (rn) << 16 | (imm)) 159#define _AL3_I(op, rd, rn, imm) ((op ## _I) | (rd) << 12 | (rn) << 16 | (imm))
160/* register with register-shift */
161#define _AL3_SR(inst) (inst | (1 << 4))
138 162
139#define ARM_ADD_R(rd, rn, rm) _AL3_R(ARM_INST_ADD, rd, rn, rm) 163#define ARM_ADD_R(rd, rn, rm) _AL3_R(ARM_INST_ADD, rd, rn, rm)
164#define ARM_ADDS_R(rd, rn, rm) _AL3_R(ARM_INST_ADDS, rd, rn, rm)
140#define ARM_ADD_I(rd, rn, imm) _AL3_I(ARM_INST_ADD, rd, rn, imm) 165#define ARM_ADD_I(rd, rn, imm) _AL3_I(ARM_INST_ADD, rd, rn, imm)
166#define ARM_ADDS_I(rd, rn, imm) _AL3_I(ARM_INST_ADDS, rd, rn, imm)
167#define ARM_ADC_R(rd, rn, rm) _AL3_R(ARM_INST_ADC, rd, rn, rm)
168#define ARM_ADC_I(rd, rn, imm) _AL3_I(ARM_INST_ADC, rd, rn, imm)
141 169
142#define ARM_AND_R(rd, rn, rm) _AL3_R(ARM_INST_AND, rd, rn, rm) 170#define ARM_AND_R(rd, rn, rm) _AL3_R(ARM_INST_AND, rd, rn, rm)
143#define ARM_AND_I(rd, rn, imm) _AL3_I(ARM_INST_AND, rd, rn, imm) 171#define ARM_AND_I(rd, rn, imm) _AL3_I(ARM_INST_AND, rd, rn, imm)
@@ -156,7 +184,9 @@
156#define ARM_EOR_I(rd, rn, imm) _AL3_I(ARM_INST_EOR, rd, rn, imm) 184#define ARM_EOR_I(rd, rn, imm) _AL3_I(ARM_INST_EOR, rd, rn, imm)
157 185
158#define ARM_LDR_I(rt, rn, off) (ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \ 186#define ARM_LDR_I(rt, rn, off) (ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \
159 | (off)) 187 | ((off) & 0xfff))
188#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | (rt) << 12 | (rn) << 16 \
189 | (rm))
160#define ARM_LDRB_I(rt, rn, off) (ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \ 190#define ARM_LDRB_I(rt, rn, off) (ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \
161 | (off)) 191 | (off))
162#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \ 192#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \
@@ -167,15 +197,23 @@
167 | (rm)) 197 | (rm))
168 198
169#define ARM_LDM(rn, regs) (ARM_INST_LDM | (rn) << 16 | (regs)) 199#define ARM_LDM(rn, regs) (ARM_INST_LDM | (rn) << 16 | (regs))
200#define ARM_LDM_IA(rn, regs) (ARM_INST_LDM_IA | (rn) << 16 | (regs))
170 201
171#define ARM_LSL_R(rd, rn, rm) (_AL3_R(ARM_INST_LSL, rd, 0, rn) | (rm) << 8) 202#define ARM_LSL_R(rd, rn, rm) (_AL3_R(ARM_INST_LSL, rd, 0, rn) | (rm) << 8)
172#define ARM_LSL_I(rd, rn, imm) (_AL3_I(ARM_INST_LSL, rd, 0, rn) | (imm) << 7) 203#define ARM_LSL_I(rd, rn, imm) (_AL3_I(ARM_INST_LSL, rd, 0, rn) | (imm) << 7)
173 204
174#define ARM_LSR_R(rd, rn, rm) (_AL3_R(ARM_INST_LSR, rd, 0, rn) | (rm) << 8) 205#define ARM_LSR_R(rd, rn, rm) (_AL3_R(ARM_INST_LSR, rd, 0, rn) | (rm) << 8)
175#define ARM_LSR_I(rd, rn, imm) (_AL3_I(ARM_INST_LSR, rd, 0, rn) | (imm) << 7) 206#define ARM_LSR_I(rd, rn, imm) (_AL3_I(ARM_INST_LSR, rd, 0, rn) | (imm) << 7)
207#define ARM_ASR_R(rd, rn, rm) (_AL3_R(ARM_INST_ASR, rd, 0, rn) | (rm) << 8)
208#define ARM_ASR_I(rd, rn, imm) (_AL3_I(ARM_INST_ASR, rd, 0, rn) | (imm) << 7)
176 209
177#define ARM_MOV_R(rd, rm) _AL3_R(ARM_INST_MOV, rd, 0, rm) 210#define ARM_MOV_R(rd, rm) _AL3_R(ARM_INST_MOV, rd, 0, rm)
211#define ARM_MOVS_R(rd, rm) _AL3_R(ARM_INST_MOVS, rd, 0, rm)
178#define ARM_MOV_I(rd, imm) _AL3_I(ARM_INST_MOV, rd, 0, imm) 212#define ARM_MOV_I(rd, imm) _AL3_I(ARM_INST_MOV, rd, 0, imm)
213#define ARM_MOV_SR(rd, rm, type, rs) \
214 (_AL3_SR(ARM_MOV_R(rd, rm)) | (type) << 5 | (rs) << 8)
215#define ARM_MOV_SI(rd, rm, type, imm6) \
216 (ARM_MOV_R(rd, rm) | (type) << 5 | (imm6) << 7)
179 217
180#define ARM_MOVW(rd, imm) \ 218#define ARM_MOVW(rd, imm) \
181 (ARM_INST_MOVW | ((imm) >> 12) << 16 | (rd) << 12 | ((imm) & 0x0fff)) 219 (ARM_INST_MOVW | ((imm) >> 12) << 16 | (rd) << 12 | ((imm) & 0x0fff))
@@ -190,19 +228,38 @@
190 228
191#define ARM_ORR_R(rd, rn, rm) _AL3_R(ARM_INST_ORR, rd, rn, rm) 229#define ARM_ORR_R(rd, rn, rm) _AL3_R(ARM_INST_ORR, rd, rn, rm)
192#define ARM_ORR_I(rd, rn, imm) _AL3_I(ARM_INST_ORR, rd, rn, imm) 230#define ARM_ORR_I(rd, rn, imm) _AL3_I(ARM_INST_ORR, rd, rn, imm)
193#define ARM_ORR_S(rd, rn, rm, type, rs) \ 231#define ARM_ORR_SR(rd, rn, rm, type, rs) \
194 (ARM_ORR_R(rd, rn, rm) | (type) << 5 | (rs) << 7) 232 (_AL3_SR(ARM_ORR_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
233#define ARM_ORRS_R(rd, rn, rm) _AL3_R(ARM_INST_ORRS, rd, rn, rm)
234#define ARM_ORRS_SR(rd, rn, rm, type, rs) \
235 (_AL3_SR(ARM_ORRS_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
236#define ARM_ORR_SI(rd, rn, rm, type, imm6) \
237 (ARM_ORR_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
238#define ARM_ORRS_SI(rd, rn, rm, type, imm6) \
239 (ARM_ORRS_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
195 240
196#define ARM_REV(rd, rm) (ARM_INST_REV | (rd) << 12 | (rm)) 241#define ARM_REV(rd, rm) (ARM_INST_REV | (rd) << 12 | (rm))
197#define ARM_REV16(rd, rm) (ARM_INST_REV16 | (rd) << 12 | (rm)) 242#define ARM_REV16(rd, rm) (ARM_INST_REV16 | (rd) << 12 | (rm))
198 243
199#define ARM_RSB_I(rd, rn, imm) _AL3_I(ARM_INST_RSB, rd, rn, imm) 244#define ARM_RSB_I(rd, rn, imm) _AL3_I(ARM_INST_RSB, rd, rn, imm)
245#define ARM_RSBS_I(rd, rn, imm) _AL3_I(ARM_INST_RSBS, rd, rn, imm)
246#define ARM_RSC_I(rd, rn, imm) _AL3_I(ARM_INST_RSC, rd, rn, imm)
200 247
201#define ARM_SUB_R(rd, rn, rm) _AL3_R(ARM_INST_SUB, rd, rn, rm) 248#define ARM_SUB_R(rd, rn, rm) _AL3_R(ARM_INST_SUB, rd, rn, rm)
249#define ARM_SUBS_R(rd, rn, rm) _AL3_R(ARM_INST_SUBS, rd, rn, rm)
250#define ARM_RSB_R(rd, rn, rm) _AL3_R(ARM_INST_RSB, rd, rn, rm)
251#define ARM_SBC_R(rd, rn, rm) _AL3_R(ARM_INST_SBC, rd, rn, rm)
252#define ARM_SBCS_R(rd, rn, rm) _AL3_R(ARM_INST_SBCS, rd, rn, rm)
202#define ARM_SUB_I(rd, rn, imm) _AL3_I(ARM_INST_SUB, rd, rn, imm) 253#define ARM_SUB_I(rd, rn, imm) _AL3_I(ARM_INST_SUB, rd, rn, imm)
254#define ARM_SUBS_I(rd, rn, imm) _AL3_I(ARM_INST_SUBS, rd, rn, imm)
255#define ARM_SBC_I(rd, rn, imm) _AL3_I(ARM_INST_SBC, rd, rn, imm)
203 256
204#define ARM_STR_I(rt, rn, off) (ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \ 257#define ARM_STR_I(rt, rn, off) (ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \
205 | (off)) 258 | ((off) & 0xfff))
259#define ARM_STRH_I(rt, rn, off) (ARM_INST_STRH_I | (rt) << 12 | (rn) << 16 \
260 | (((off) & 0xf0) << 4) | ((off) & 0xf))
261#define ARM_STRB_I(rt, rn, off) (ARM_INST_STRB_I | (rt) << 12 | (rn) << 16 \
262 | (((off) & 0xf0) << 4) | ((off) & 0xf))
206 263
207#define ARM_TST_R(rn, rm) _AL3_R(ARM_INST_TST, 0, rn, rm) 264#define ARM_TST_R(rn, rm) _AL3_R(ARM_INST_TST, 0, rn, rm)
208#define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) 265#define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm)
@@ -214,5 +271,6 @@
214 271
215#define ARM_MLS(rd, rn, rm, ra) (ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \ 272#define ARM_MLS(rd, rn, rm, ra) (ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \
216 | (ra) << 12) 273 | (ra) << 12)
274#define ARM_UXTH(rd, rm) (ARM_INST_UXTH | (rd) << 12 | (rm))
217 275
218#endif /* PFILTER_OPCODES_ARM_H */ 276#endif /* PFILTER_OPCODES_ARM_H */