aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-04-15 23:06:27 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-16 23:15:14 -0400
commit2809a2087cc44b55e4377d7b9be3f7f5d2569091 (patch)
treec868c86ae407b6efc5aac3630ba0244de2eb672d /arch/sparc
parentf4f9f6e75d67ddfcfea79a2108217bc654aef3af (diff)
net: filter: Just In Time compiler for sparc
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/Makefile1
-rw-r--r--arch/sparc/net/Makefile4
-rw-r--r--arch/sparc/net/bpf_jit.h52
-rw-r--r--arch/sparc/net/bpf_jit_asm.S199
-rw-r--r--arch/sparc/net/bpf_jit_comp.c785
6 files changed, 1042 insertions, 0 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 6c0683d3fcba..5f6acce45a0d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -30,6 +30,7 @@ config SPARC
30 select USE_GENERIC_SMP_HELPERS if SMP 30 select USE_GENERIC_SMP_HELPERS if SMP
31 select GENERIC_PCI_IOMAP 31 select GENERIC_PCI_IOMAP
32 select HAVE_NMI_WATCHDOG if SPARC64 32 select HAVE_NMI_WATCHDOG if SPARC64
33 select HAVE_BPF_JIT
33 34
34config SPARC32 35config SPARC32
35 def_bool !64BIT 36 def_bool !64BIT
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index eddcfb36aafb..0e5de13b56c5 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -66,6 +66,7 @@ head-y += arch/sparc/kernel/init_task.o
66 66
67core-y += arch/sparc/kernel/ 67core-y += arch/sparc/kernel/
68core-y += arch/sparc/mm/ arch/sparc/math-emu/ 68core-y += arch/sparc/mm/ arch/sparc/math-emu/
69core-y += arch/sparc/net/
69 70
70libs-y += arch/sparc/prom/ 71libs-y += arch/sparc/prom/
71libs-y += arch/sparc/lib/ 72libs-y += arch/sparc/lib/
diff --git a/arch/sparc/net/Makefile b/arch/sparc/net/Makefile
new file mode 100644
index 000000000000..1306a58ac541
--- /dev/null
+++ b/arch/sparc/net/Makefile
@@ -0,0 +1,4 @@
1#
2# Arch-specific network modules
3#
4obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
diff --git a/arch/sparc/net/bpf_jit.h b/arch/sparc/net/bpf_jit.h
new file mode 100644
index 000000000000..74f048be739c
--- /dev/null
+++ b/arch/sparc/net/bpf_jit.h
@@ -0,0 +1,52 @@
1#ifndef _BPF_JIT_H
2#define _BPF_JIT_H
3
4/* Conventions:
5 * %g1 : temporary
6 * %g2 : Secondary temporary used by SKB data helper stubs.
7 * %o0 : pointer to skb (first argument given to JIT function)
8 * %o1 : BPF A accumulator
9 * %o2 : BPF X accumulator
10 * %o3 : Holds saved %o7 so we can call helper functions without needing
11 * to allocate a register window.
12 * %o4 : skb->data
13 * %o5 : skb->len - skb->data_len
14 */
15
16#ifndef __ASSEMBLER__
17#define G0 0x00
18#define G1 0x01
19#define G3 0x03
20#define G6 0x06
21#define O0 0x08
22#define O1 0x09
23#define O2 0x0a
24#define O3 0x0b
25#define O4 0x0c
26#define O5 0x0d
27#define SP 0x0e
28#define O7 0x0f
29#define FP 0x1e
30
31#define r_SKB O0
32#define r_A O1
33#define r_X O2
34#define r_saved_O7 O3
35#define r_HEADLEN O4
36#define r_SKB_DATA O5
37#define r_TMP G1
38#define r_TMP2 G2
39#define r_OFF G3
40#else
41#define r_SKB %o0
42#define r_A %o1
43#define r_X %o2
44#define r_saved_O7 %o3
45#define r_HEADLEN %o4
46#define r_SKB_DATA %o5
47#define r_TMP %g1
48#define r_TMP2 %g2
49#define r_OFF %g3
50#endif
51
52#endif /* _BPF_JIT_H */
diff --git a/arch/sparc/net/bpf_jit_asm.S b/arch/sparc/net/bpf_jit_asm.S
new file mode 100644
index 000000000000..fdc69324aabc
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_asm.S
@@ -0,0 +1,199 @@
1#include <asm/ptrace.h>
2
3#include "bpf_jit.h"
4
5#ifdef CONFIG_SPARC64
6#define SAVE_SZ 176
7#define SCRATCH_OFF STACK_BIAS + 128
8#define BE_PTR(label) be,pn %xcc, label
9#else
10#define SAVE_SZ 96
11#define SCRATCH_OFF 72
12#define BE_PTR(label) be label
13#endif
14
15#define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */
16
17 .text
18 .globl bpf_jit_load_word
19bpf_jit_load_word:
20 cmp r_OFF, 0
21 bl bpf_slow_path_word_neg
22 nop
23 .globl bpf_jit_load_word_positive_offset
24bpf_jit_load_word_positive_offset:
25 sub r_HEADLEN, r_OFF, r_TMP
26 cmp r_TMP, 3
27 ble bpf_slow_path_word
28 add r_SKB_DATA, r_OFF, r_TMP
29 andcc r_TMP, 3, %g0
30 bne load_word_unaligned
31 nop
32 retl
33 ld [r_SKB_DATA + r_OFF], r_A
34load_word_unaligned:
35 ldub [r_TMP + 0x0], r_OFF
36 ldub [r_TMP + 0x1], r_TMP2
37 sll r_OFF, 8, r_OFF
38 or r_OFF, r_TMP2, r_OFF
39 ldub [r_TMP + 0x2], r_TMP2
40 sll r_OFF, 8, r_OFF
41 or r_OFF, r_TMP2, r_OFF
42 ldub [r_TMP + 0x3], r_TMP2
43 sll r_OFF, 8, r_OFF
44 retl
45 or r_OFF, r_TMP2, r_A
46
47 .globl bpf_jit_load_half
48bpf_jit_load_half:
49 cmp r_OFF, 0
50 bl bpf_slow_path_half_neg
51 nop
52 .globl bpf_jit_load_half_positive_offset
53bpf_jit_load_half_positive_offset:
54 sub r_HEADLEN, r_OFF, r_TMP
55 cmp r_TMP, 1
56 ble bpf_slow_path_half
57 add r_SKB_DATA, r_OFF, r_TMP
58 andcc r_TMP, 1, %g0
59 bne load_half_unaligned
60 nop
61 retl
62 lduh [r_SKB_DATA + r_OFF], r_A
63load_half_unaligned:
64 ldub [r_TMP + 0x0], r_OFF
65 ldub [r_TMP + 0x1], r_TMP2
66 sll r_OFF, 8, r_OFF
67 retl
68 or r_OFF, r_TMP2, r_A
69
70 .globl bpf_jit_load_byte
71bpf_jit_load_byte:
72 cmp r_OFF, 0
73 bl bpf_slow_path_byte_neg
74 nop
75 .globl bpf_jit_load_byte_positive_offset
76bpf_jit_load_byte_positive_offset:
77 cmp r_OFF, r_HEADLEN
78 bge bpf_slow_path_byte
79 nop
80 retl
81 ldub [r_SKB_DATA + r_OFF], r_A
82
83 .globl bpf_jit_load_byte_msh
84bpf_jit_load_byte_msh:
85 cmp r_OFF, 0
86 bl bpf_slow_path_byte_msh_neg
87 nop
88 .globl bpf_jit_load_byte_msh_positive_offset
89bpf_jit_load_byte_msh_positive_offset:
90 cmp r_OFF, r_HEADLEN
91 bge bpf_slow_path_byte_msh
92 nop
93 ldub [r_SKB_DATA + r_OFF], r_OFF
94 and r_OFF, 0xf, r_OFF
95 retl
96 sll r_OFF, 2, r_X
97
98#define bpf_slow_path_common(LEN) \
99 save %sp, -SAVE_SZ, %sp; \
100 mov %i0, %o0; \
101 mov r_OFF, %o1; \
102 add %fp, SCRATCH_OFF, %o2; \
103 call skb_copy_bits; \
104 mov (LEN), %o3; \
105 cmp %o0, 0; \
106 restore;
107
108bpf_slow_path_word:
109 bpf_slow_path_common(4)
110 bl bpf_error
111 ld [%sp + SCRATCH_OFF], r_A
112 retl
113 nop
114bpf_slow_path_half:
115 bpf_slow_path_common(2)
116 bl bpf_error
117 lduh [%sp + SCRATCH_OFF], r_A
118 retl
119 nop
120bpf_slow_path_byte:
121 bpf_slow_path_common(1)
122 bl bpf_error
123 ldub [%sp + SCRATCH_OFF], r_A
124 retl
125 nop
126bpf_slow_path_byte_msh:
127 bpf_slow_path_common(1)
128 bl bpf_error
129 ldub [%sp + SCRATCH_OFF], r_A
130 and r_OFF, 0xf, r_OFF
131 retl
132 sll r_OFF, 2, r_X
133
134#define bpf_negative_common(LEN) \
135 save %sp, -SAVE_SZ, %sp; \
136 mov %i0, %o0; \
137 mov r_OFF, %o1; \
138 call bpf_internal_load_pointer_neg_helper; \
139 mov (LEN), %o2; \
140 mov %o0, r_TMP; \
141 cmp %o0, 0; \
142 BE_PTR(bpf_error); \
143 restore;
144
145bpf_slow_path_word_neg:
146 sethi %hi(SKF_MAX_NEG_OFF), r_TMP
147 cmp r_OFF, r_TMP
148 bl bpf_error
149 nop
150 .globl bpf_jit_load_word_negative_offset
151bpf_jit_load_word_negative_offset:
152 bpf_negative_common(4)
153 andcc r_TMP, 3, %g0
154 bne load_word_unaligned
155 nop
156 retl
157 ld [r_TMP], r_A
158
159bpf_slow_path_half_neg:
160 sethi %hi(SKF_MAX_NEG_OFF), r_TMP
161 cmp r_OFF, r_TMP
162 bl bpf_error
163 nop
164 .globl bpf_jit_load_half_negative_offset
165bpf_jit_load_half_negative_offset:
166 bpf_negative_common(2)
167 andcc r_TMP, 1, %g0
168 bne load_half_unaligned
169 nop
170 retl
171 lduh [r_TMP], r_A
172
173bpf_slow_path_byte_neg:
174 sethi %hi(SKF_MAX_NEG_OFF), r_TMP
175 cmp r_OFF, r_TMP
176 bl bpf_error
177 nop
178 .globl bpf_jit_load_byte_negative_offset
179bpf_jit_load_byte_negative_offset:
180 bpf_negative_common(1)
181 retl
182 ldub [r_TMP], r_A
183
184bpf_slow_path_byte_msh_neg:
185 sethi %hi(SKF_MAX_NEG_OFF), r_TMP
186 cmp r_OFF, r_TMP
187 bl bpf_error
188 nop
189 .globl bpf_jit_load_byte_msh_negative_offset
190bpf_jit_load_byte_msh_negative_offset:
191 bpf_negative_common(1)
192 ldub [r_TMP], r_OFF
193 and r_OFF, 0xf, r_OFF
194 retl
195 sll r_OFF, 2, r_X
196
197bpf_error:
198 jmpl r_saved_O7 + 8, %g0
199 clr %o0
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
new file mode 100644
index 000000000000..86349cab60c5
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -0,0 +1,785 @@
1#include <linux/moduleloader.h>
2#include <linux/workqueue.h>
3#include <linux/netdevice.h>
4#include <linux/filter.h>
5#include <linux/cache.h>
6
7#include <asm/cacheflush.h>
8#include <asm/ptrace.h>
9
10#include "bpf_jit.h"
11
12int bpf_jit_enable __read_mostly;
13
14/* assembly code in arch/sparc/net/bpf_jit_asm.S */
15extern u32 bpf_jit_load_word[];
16extern u32 bpf_jit_load_half[];
17extern u32 bpf_jit_load_byte[];
18extern u32 bpf_jit_load_byte_msh[];
19extern u32 bpf_jit_load_word_positive_offset[];
20extern u32 bpf_jit_load_half_positive_offset[];
21extern u32 bpf_jit_load_byte_positive_offset[];
22extern u32 bpf_jit_load_byte_msh_positive_offset[];
23extern u32 bpf_jit_load_word_negative_offset[];
24extern u32 bpf_jit_load_half_negative_offset[];
25extern u32 bpf_jit_load_byte_negative_offset[];
26extern u32 bpf_jit_load_byte_msh_negative_offset[];
27
28static inline bool is_simm13(unsigned int value)
29{
30 return value + 0x1000 < 0x2000;
31}
32
33static void bpf_flush_icache(void *start_, void *end_)
34{
35#ifdef CONFIG_SPARC64
36 /* Cheetah's I-cache is fully coherent. */
37 if (tlb_type == spitfire) {
38 unsigned long start = (unsigned long) start_;
39 unsigned long end = (unsigned long) end_;
40
41 start &= ~7UL;
42 end = (end + 7UL) & ~7UL;
43 while (start < end) {
44 flushi(start);
45 start += 32;
46 }
47 }
48#endif
49}
50
51#define SEEN_DATAREF 1 /* might call external helpers */
52#define SEEN_XREG 2 /* ebx is used */
53#define SEEN_MEM 4 /* use mem[] for temporary storage */
54
55#define S13(X) ((X) & 0x1fff)
56#define IMMED 0x00002000
57#define RD(X) ((X) << 25)
58#define RS1(X) ((X) << 14)
59#define RS2(X) ((X))
60#define OP(X) ((X) << 30)
61#define OP2(X) ((X) << 22)
62#define OP3(X) ((X) << 19)
63#define COND(X) ((X) << 25)
64#define F1(X) OP(X)
65#define F2(X, Y) (OP(X) | OP2(Y))
66#define F3(X, Y) (OP(X) | OP3(Y))
67
68#define CONDN COND (0x0)
69#define CONDE COND (0x1)
70#define CONDLE COND (0x2)
71#define CONDL COND (0x3)
72#define CONDLEU COND (0x4)
73#define CONDCS COND (0x5)
74#define CONDNEG COND (0x6)
75#define CONDVC COND (0x7)
76#define CONDA COND (0x8)
77#define CONDNE COND (0x9)
78#define CONDG COND (0xa)
79#define CONDGE COND (0xb)
80#define CONDGU COND (0xc)
81#define CONDCC COND (0xd)
82#define CONDPOS COND (0xe)
83#define CONDVS COND (0xf)
84
85#define CONDGEU CONDCC
86#define CONDLU CONDCS
87
88#define WDISP22(X) (((X) >> 2) & 0x3fffff)
89
90#define BA (F2(0, 2) | CONDA)
91#define BGU (F2(0, 2) | CONDGU)
92#define BLEU (F2(0, 2) | CONDLEU)
93#define BGEU (F2(0, 2) | CONDGEU)
94#define BLU (F2(0, 2) | CONDLU)
95#define BE (F2(0, 2) | CONDE)
96#define BNE (F2(0, 2) | CONDNE)
97
98#ifdef CONFIG_SPARC64
99#define BNE_PTR (F2(0, 1) | CONDNE | (2 << 20))
100#else
101#define BNE_PTR BNE
102#endif
103
104#define SETHI(K, REG) \
105 (F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
106#define OR_LO(K, REG) \
107 (F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG))
108
109#define ADD F3(2, 0x00)
110#define AND F3(2, 0x01)
111#define ANDCC F3(2, 0x11)
112#define OR F3(2, 0x02)
113#define SUB F3(2, 0x04)
114#define SUBCC F3(2, 0x14)
115#define MUL F3(2, 0x0a) /* umul */
116#define DIV F3(2, 0x0e) /* udiv */
117#define SLL F3(2, 0x25)
118#define SRL F3(2, 0x26)
119#define JMPL F3(2, 0x38)
120#define CALL F1(1)
121#define BR F2(0, 0x01)
122#define RD_Y F3(2, 0x28)
123#define WR_Y F3(2, 0x30)
124
125#define LD32 F3(3, 0x00)
126#define LD8 F3(3, 0x01)
127#define LD16 F3(3, 0x02)
128#define LD64 F3(3, 0x0b)
129#define ST32 F3(3, 0x04)
130
131#ifdef CONFIG_SPARC64
132#define LDPTR LD64
133#define BASE_STACKFRAME 176
134#else
135#define LDPTR LD32
136#define BASE_STACKFRAME 96
137#endif
138
139#define LD32I (LD32 | IMMED)
140#define LD8I (LD8 | IMMED)
141#define LD16I (LD16 | IMMED)
142#define LD64I (LD64 | IMMED)
143#define LDPTRI (LDPTR | IMMED)
144#define ST32I (ST32 | IMMED)
145
146#define emit_nop() \
147do { \
148 *prog++ = SETHI(0, G0); \
149} while (0)
150
151#define emit_neg() \
152do { /* sub %g0, r_A, r_A */ \
153 *prog++ = SUB | RS1(G0) | RS2(r_A) | RD(r_A); \
154} while (0)
155
156#define emit_reg_move(FROM, TO) \
157do { /* or %g0, FROM, TO */ \
158 *prog++ = OR | RS1(G0) | RS2(FROM) | RD(TO); \
159} while (0)
160
161#define emit_clear(REG) \
162do { /* or %g0, %g0, REG */ \
163 *prog++ = OR | RS1(G0) | RS2(G0) | RD(REG); \
164} while (0)
165
166#define emit_set_const(K, REG) \
167do { /* sethi %hi(K), REG */ \
168 *prog++ = SETHI(K, REG); \
169 /* or REG, %lo(K), REG */ \
170 *prog++ = OR_LO(K, REG); \
171} while (0)
172
173 /* Emit
174 *
175 * OP r_A, r_X, r_A
176 */
177#define emit_alu_X(OPCODE) \
178do { \
179 seen |= SEEN_XREG; \
180 *prog++ = OPCODE | RS1(r_A) | RS2(r_X) | RD(r_A); \
181} while (0)
182
183 /* Emit either:
184 *
185 * OP r_A, K, r_A
186 *
187 * or
188 *
189 * sethi %hi(K), r_TMP
190 * or r_TMP, %lo(K), r_TMP
191 * OP r_A, r_TMP, r_A
192 *
193 * depending upon whether K fits in a signed 13-bit
194 * immediate instruction field. Emit nothing if K
195 * is zero.
196 */
197#define emit_alu_K(OPCODE, K) \
198do { \
199 if (K) { \
200 unsigned int _insn = OPCODE; \
201 _insn |= RS1(r_A) | RD(r_A); \
202 if (is_simm13(K)) { \
203 *prog++ = _insn | IMMED | S13(K); \
204 } else { \
205 emit_set_const(K, r_TMP); \
206 *prog++ = _insn | RS2(r_TMP); \
207 } \
208 } \
209} while (0)
210
211#define emit_loadimm(K, DEST) \
212do { \
213 if (is_simm13(K)) { \
214 /* or %g0, K, DEST */ \
215 *prog++ = OR | IMMED | RS1(G0) | S13(K) | RD(DEST); \
216 } else { \
217 emit_set_const(K, DEST); \
218 } \
219} while (0)
220
221#define emit_loadptr(BASE, STRUCT, FIELD, DEST) \
222do { unsigned int _off = offsetof(STRUCT, FIELD); \
223 BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(void *)); \
224 *prog++ = LDPTRI | RS1(BASE) | S13(_off) | RD(DEST); \
225} while(0)
226
227#define emit_load32(BASE, STRUCT, FIELD, DEST) \
228do { unsigned int _off = offsetof(STRUCT, FIELD); \
229 BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u32)); \
230 *prog++ = LD32I | RS1(BASE) | S13(_off) | RD(DEST); \
231} while(0)
232
233#define emit_load16(BASE, STRUCT, FIELD, DEST) \
234do { unsigned int _off = offsetof(STRUCT, FIELD); \
235 BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u16)); \
236 *prog++ = LD16I | RS1(BASE) | S13(_off) | RD(DEST); \
237} while(0)
238
239#define __emit_load8(BASE, STRUCT, FIELD, DEST) \
240do { unsigned int _off = offsetof(STRUCT, FIELD); \
241 *prog++ = LD8I | RS1(BASE) | S13(_off) | RD(DEST); \
242} while(0)
243
244#define emit_load8(BASE, STRUCT, FIELD, DEST) \
245do { BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8)); \
246 __emit_load8(BASE, STRUCT, FIELD, DEST); \
247} while(0)
248
249#define emit_ldmem(OFF, DEST) \
250do { *prog++ = LD32I | RS1(FP) | S13(-(OFF)) | RD(DEST); \
251} while(0)
252
253#define emit_stmem(OFF, SRC) \
254do { *prog++ = LD32I | RS1(FP) | S13(-(OFF)) | RD(SRC); \
255} while(0)
256
257#define cpu_off offsetof(struct thread_info, cpu)
258
259#ifdef CONFIG_SMP
260#ifdef CONFIG_SPARC64
261#define emit_load_cpu(REG) \
262 emit_load16(G6, struct thread_info, cpu, REG)
263#else
264#define emit_load_cpu(REG) \
265 emit_load32(G6, struct thread_info, cpu, REG)
266#endif
267#else
268#define emit_load_cpu(REG) emit_clear(REG)
269#endif
270
271#define emit_skb_loadptr(FIELD, DEST) \
272 emit_loadptr(r_SKB, struct sk_buff, FIELD, DEST)
273#define emit_skb_load32(FIELD, DEST) \
274 emit_load32(r_SKB, struct sk_buff, FIELD, DEST)
275#define emit_skb_load16(FIELD, DEST) \
276 emit_load16(r_SKB, struct sk_buff, FIELD, DEST)
277#define __emit_skb_load8(FIELD, DEST) \
278 __emit_load8(r_SKB, struct sk_buff, FIELD, DEST)
279#define emit_skb_load8(FIELD, DEST) \
280 emit_load8(r_SKB, struct sk_buff, FIELD, DEST)
281
282#define emit_jmpl(BASE, IMM_OFF, LREG) \
283 *prog++ = (JMPL | IMMED | RS1(BASE) | S13(IMM_OFF) | RD(LREG))
284
285#define emit_call(FUNC) \
286do { void *_here = image + addrs[i] - 8; \
287 unsigned int _off = (void *)(FUNC) - _here; \
288 *prog++ = CALL | (((_off) >> 2) & 0x3fffffff); \
289 emit_nop(); \
290} while (0)
291
292#define emit_branch(BR_OPC, DEST) \
293do { unsigned int _here = addrs[i] - 8; \
294 *prog++ = BR_OPC | WDISP22((DEST) - _here); \
295} while(0)
296
297#define emit_branch_off(BR_OPC, OFF) \
298do { *prog++ = BR_OPC | WDISP22(OFF); \
299} while(0)
300
301#define emit_jump(DEST) emit_branch(BA, DEST)
302
303#define emit_read_y(REG) *prog++ = RD_Y | RD(REG);
304#define emit_write_y(REG) *prog++ = WR_Y | IMMED | RS1(REG) | S13(0);
305
306#define emit_cmp(R1, R2) \
307 *prog++ = (SUBCC | RS1(R1) | RS2(R2) | RD(G0))
308
309#define emit_cmpi(R1, IMM) \
310 *prog++ = (SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0));
311
312#define emit_btst(R1, R2) \
313 *prog++ = (ANDCC | RS1(R1) | RS2(R2) | RD(G0))
314
315#define emit_btsti(R1, IMM) \
316 *prog++ = (ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0));
317
318#define emit_sub(R1, R2, R3) \
319 *prog++ = (SUB | RS1(R1) | RS2(R2) | RD(R3))
320
321#define emit_subi(R1, IMM, R3) \
322 *prog++ = (SUB | IMMED | RS1(R1) | S13(IMM) | RD(R3))
323
324#define emit_add(R1, R2, R3) \
325 *prog++ = (ADD | RS1(R1) | RS2(R2) | RD(R3))
326
327#define emit_addi(R1, IMM, R3) \
328 *prog++ = (ADD | IMMED | RS1(R1) | S13(IMM) | RD(R3))
329
330#define emit_alloc_stack(SZ) \
331 *prog++ = (SUB | IMMED | RS1(SP) | S13(SZ) | RD(SP))
332
333#define emit_release_stack(SZ) \
334 *prog++ = (ADD | IMMED | RS1(SP) | S13(SZ) | RD(SP))
335
336void bpf_jit_compile(struct sk_filter *fp)
337{
338 unsigned int cleanup_addr, proglen, oldproglen = 0;
339 u32 temp[8], *prog, *func, seen = 0, pass;
340 const struct sock_filter *filter = fp->insns;
341 int i, flen = fp->len, pc_ret0 = -1;
342 unsigned int *addrs;
343 void *image;
344
345 if (!bpf_jit_enable)
346 return;
347
348 addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
349 if (addrs == NULL)
350 return;
351
352 /* Before first pass, make a rough estimation of addrs[]
353 * each bpf instruction is translated to less than 64 bytes
354 */
355 for (proglen = 0, i = 0; i < flen; i++) {
356 proglen += 64;
357 addrs[i] = proglen;
358 }
359 cleanup_addr = proglen; /* epilogue address */
360 image = NULL;
361 for (pass = 0; pass < 10; pass++) {
362 u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
363
364 /* no prologue/epilogue for trivial filters (RET something) */
365 proglen = 0;
366 prog = temp;
367
368 /* Prologue */
369 if (seen_or_pass0) {
370 if (seen_or_pass0 & SEEN_MEM) {
371 unsigned int sz = BASE_STACKFRAME;
372 sz += BPF_MEMWORDS * sizeof(u32);
373 emit_alloc_stack(sz);
374 }
375
376 /* Make sure we dont leek kernel memory. */
377 if (seen_or_pass0 & SEEN_XREG)
378 emit_clear(r_X);
379
380 /* If this filter needs to access skb data,
381 * load %o4 and %o4 with:
382 * %o4 = skb->len - skb->data_len
383 * %o5 = skb->data
384 * And also back up %o7 into r_saved_O7 so we can
385 * invoke the stubs using 'call'.
386 */
387 if (seen_or_pass0 & SEEN_DATAREF) {
388 emit_load32(r_SKB, struct sk_buff, len, r_HEADLEN);
389 emit_load32(r_SKB, struct sk_buff, data_len, r_TMP);
390 emit_sub(r_HEADLEN, r_TMP, r_HEADLEN);
391 emit_loadptr(r_SKB, struct sk_buff, data, r_SKB_DATA);
392 }
393 }
394 emit_reg_move(O7, r_saved_O7);
395
396 switch (filter[0].code) {
397 case BPF_S_RET_K:
398 case BPF_S_LD_W_LEN:
399 case BPF_S_ANC_PROTOCOL:
400 case BPF_S_ANC_PKTTYPE:
401 case BPF_S_ANC_IFINDEX:
402 case BPF_S_ANC_MARK:
403 case BPF_S_ANC_RXHASH:
404 case BPF_S_ANC_CPU:
405 case BPF_S_ANC_QUEUE:
406 case BPF_S_LD_W_ABS:
407 case BPF_S_LD_H_ABS:
408 case BPF_S_LD_B_ABS:
409 /* The first instruction sets the A register (or is
410 * a "RET 'constant'")
411 */
412 break;
413 default:
414 /* Make sure we dont leak kernel information to the
415 * user.
416 */
417 emit_clear(r_A); /* A = 0 */
418 }
419
420 for (i = 0; i < flen; i++) {
421 unsigned int K = filter[i].k;
422 unsigned int t_offset;
423 unsigned int f_offset;
424 u32 t_op, f_op;
425 int ilen;
426
427 switch (filter[i].code) {
428 case BPF_S_ALU_ADD_X: /* A += X; */
429 emit_alu_X(ADD);
430 break;
431 case BPF_S_ALU_ADD_K: /* A += K; */
432 emit_alu_K(ADD, K);
433 break;
434 case BPF_S_ALU_SUB_X: /* A -= X; */
435 emit_alu_X(SUB);
436 break;
437 case BPF_S_ALU_SUB_K: /* A -= K */
438 emit_alu_K(SUB, K);
439 break;
440 case BPF_S_ALU_AND_X: /* A &= X */
441 emit_alu_X(AND);
442 break;
443 case BPF_S_ALU_AND_K: /* A &= K */
444 emit_alu_K(AND, K);
445 break;
446 case BPF_S_ALU_OR_X: /* A |= X */
447 emit_alu_X(OR);
448 break;
449 case BPF_S_ALU_OR_K: /* A |= K */
450 emit_alu_K(OR, K);
451 break;
452 case BPF_S_ALU_LSH_X: /* A <<= X */
453 emit_alu_X(SLL);
454 break;
455 case BPF_S_ALU_LSH_K: /* A <<= K */
456 emit_alu_K(SLL, K);
457 break;
458 case BPF_S_ALU_RSH_X: /* A >>= X */
459 emit_alu_X(SRL);
460 break;
461 case BPF_S_ALU_RSH_K: /* A >>= K */
462 emit_alu_K(SRL, K);
463 break;
464 case BPF_S_ALU_MUL_X: /* A *= X; */
465 emit_alu_X(MUL);
466 break;
467 case BPF_S_ALU_MUL_K: /* A *= K */
468 emit_alu_K(MUL, K);
469 break;
470 case BPF_S_ALU_DIV_K: /* A /= K */
471 emit_alu_K(MUL, K);
472 emit_read_y(r_A);
473 break;
474 case BPF_S_ALU_DIV_X: /* A /= X; */
475 emit_cmpi(r_X, 0);
476 if (pc_ret0 > 0) {
477 t_offset = addrs[pc_ret0 - 1];
478#ifdef CONFIG_SPARC32
479 emit_branch(BE, t_offset + 20);
480#else
481 emit_branch(BE, t_offset + 8);
482#endif
483 emit_nop(); /* delay slot */
484 } else {
485 emit_branch_off(BNE, 16);
486 emit_nop();
487#ifdef CONFIG_SPARC32
488 emit_jump(cleanup_addr + 20);
489#else
490 emit_jump(cleanup_addr + 8);
491#endif
492 emit_clear(r_A);
493 }
494 emit_write_y(G0);
495#ifdef CONFIG_SPARC32
496 emit_nop();
497 emit_nop();
498 emit_nop();
499#endif
500 emit_alu_X(DIV);
501 break;
502 case BPF_S_ALU_NEG:
503 emit_neg();
504 break;
505 case BPF_S_RET_K:
506 if (!K) {
507 if (pc_ret0 == -1)
508 pc_ret0 = i;
509 emit_clear(r_A);
510 } else {
511 emit_loadimm(K, r_A);
512 }
513 /* Fallthrough */
514 case BPF_S_RET_A:
515 if (seen_or_pass0) {
516 if (i != flen - 1) {
517 emit_jump(cleanup_addr);
518 emit_nop();
519 break;
520 }
521 if (seen_or_pass0 & SEEN_MEM) {
522 unsigned int sz = BASE_STACKFRAME;
523 sz += BPF_MEMWORDS * sizeof(u32);
524 emit_release_stack(sz);
525 }
526 }
527 /* jmpl %r_saved_O7 + 8, %g0 */
528 emit_jmpl(r_saved_O7, 8, G0);
529 emit_reg_move(r_A, O0); /* delay slot */
530 break;
531 case BPF_S_MISC_TAX:
532 seen |= SEEN_XREG;
533 emit_reg_move(r_A, r_X);
534 break;
535 case BPF_S_MISC_TXA:
536 seen |= SEEN_XREG;
537 emit_reg_move(r_X, r_A);
538 break;
539 case BPF_S_ANC_CPU:
540 emit_load_cpu(r_A);
541 break;
542 case BPF_S_ANC_PROTOCOL:
543 emit_skb_load16(protocol, r_A);
544 break;
545#if 0
546 /* GCC won't let us take the address of
547 * a bit field even though we very much
548 * know what we are doing here.
549 */
550 case BPF_S_ANC_PKTTYPE:
551 __emit_skb_load8(pkt_type, r_A);
552 emit_alu_K(SRL, 5);
553 break;
554#endif
555 case BPF_S_ANC_IFINDEX:
556 emit_skb_loadptr(dev, r_A);
557 emit_cmpi(r_A, 0);
558 emit_branch(BNE_PTR, cleanup_addr + 4);
559 emit_nop();
560 emit_load32(r_A, struct net_device, ifindex, r_A);
561 break;
562 case BPF_S_ANC_MARK:
563 emit_skb_load32(mark, r_A);
564 break;
565 case BPF_S_ANC_QUEUE:
566 emit_skb_load16(queue_mapping, r_A);
567 break;
568 case BPF_S_ANC_HATYPE:
569 emit_skb_loadptr(dev, r_A);
570 emit_cmpi(r_A, 0);
571 emit_branch(BNE_PTR, cleanup_addr + 4);
572 emit_nop();
573 emit_load16(r_A, struct net_device, type, r_A);
574 break;
575 case BPF_S_ANC_RXHASH:
576 emit_skb_load32(rxhash, r_A);
577 break;
578
579 case BPF_S_LD_IMM:
580 emit_loadimm(K, r_A);
581 break;
582 case BPF_S_LDX_IMM:
583 emit_loadimm(K, r_X);
584 break;
585 case BPF_S_LD_MEM:
586 emit_ldmem(K * 4, r_A);
587 break;
588 case BPF_S_LDX_MEM:
589 emit_ldmem(K * 4, r_X);
590 break;
591 case BPF_S_ST:
592 emit_stmem(K * 4, r_A);
593 break;
594 case BPF_S_STX:
595 emit_stmem(K * 4, r_X);
596 break;
597
598#define CHOOSE_LOAD_FUNC(K, func) \
599 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
600
601 case BPF_S_LD_W_ABS:
602 func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
603common_load: seen |= SEEN_DATAREF;
604 emit_loadimm(K, r_OFF);
605 emit_call(func);
606 break;
607 case BPF_S_LD_H_ABS:
608 func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
609 goto common_load;
610 case BPF_S_LD_B_ABS:
611 func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
612 goto common_load;
613 case BPF_S_LDX_B_MSH:
614 func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
615 goto common_load;
616 case BPF_S_LD_W_IND:
617 func = bpf_jit_load_word;
618common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
619 if (K) {
620 if (is_simm13(K)) {
621 emit_addi(r_X, K, r_OFF);
622 } else {
623 emit_loadimm(K, r_TMP);
624 emit_add(r_X, r_TMP, r_OFF);
625 }
626 } else {
627 emit_reg_move(r_X, r_OFF);
628 }
629 emit_call(func);
630 break;
631 case BPF_S_LD_H_IND:
632 func = bpf_jit_load_half;
633 goto common_load_ind;
634 case BPF_S_LD_B_IND:
635 func = bpf_jit_load_byte;
636 goto common_load_ind;
637 case BPF_S_JMP_JA:
638 emit_jump(addrs[i + K]);
639 emit_nop();
640 break;
641
642#define COND_SEL(CODE, TOP, FOP) \
643 case CODE: \
644 t_op = TOP; \
645 f_op = FOP; \
646 goto cond_branch
647
648 COND_SEL(BPF_S_JMP_JGT_K, BGU, BLEU);
649 COND_SEL(BPF_S_JMP_JGE_K, BGEU, BLU);
650 COND_SEL(BPF_S_JMP_JEQ_K, BE, BNE);
651 COND_SEL(BPF_S_JMP_JSET_K, BNE, BE);
652 COND_SEL(BPF_S_JMP_JGT_X, BGU, BLEU);
653 COND_SEL(BPF_S_JMP_JGE_X, BGEU, BLU);
654 COND_SEL(BPF_S_JMP_JEQ_X, BE, BNE);
655 COND_SEL(BPF_S_JMP_JSET_X, BNE, BE);
656
657cond_branch: f_offset = addrs[i + filter[i].jf];
658 t_offset = addrs[i + filter[i].jt];
659
660 /* same targets, can avoid doing the test :) */
661 if (filter[i].jt == filter[i].jf) {
662 emit_jump(t_offset);
663 emit_nop();
664 break;
665 }
666
667 switch (filter[i].code) {
668 case BPF_S_JMP_JGT_X:
669 case BPF_S_JMP_JGE_X:
670 case BPF_S_JMP_JEQ_X:
671 seen |= SEEN_XREG;
672 emit_cmp(r_A, r_X);
673 break;
674 case BPF_S_JMP_JSET_X:
675 seen |= SEEN_XREG;
676 emit_btst(r_A, r_X);
677 break;
678 case BPF_S_JMP_JEQ_K:
679 case BPF_S_JMP_JGT_K:
680 case BPF_S_JMP_JGE_K:
681 if (is_simm13(K)) {
682 emit_cmpi(r_A, K);
683 } else {
684 emit_loadimm(K, r_TMP);
685 emit_cmp(r_A, r_TMP);
686 }
687 break;
688 case BPF_S_JMP_JSET_K:
689 if (is_simm13(K)) {
690 emit_btsti(r_A, K);
691 } else {
692 emit_loadimm(K, r_TMP);
693 emit_btst(r_A, r_TMP);
694 }
695 break;
696 }
697 if (filter[i].jt != 0) {
698 if (filter[i].jf)
699 t_offset += 8;
700 emit_branch(t_op, t_offset);
701 emit_nop(); /* delay slot */
702 if (filter[i].jf) {
703 emit_jump(f_offset);
704 emit_nop();
705 }
706 break;
707 }
708 emit_branch(f_op, f_offset);
709 emit_nop(); /* delay slot */
710 break;
711
712 default:
713 /* hmm, too complex filter, give up with jit compiler */
714 goto out;
715 }
716 ilen = (void *) prog - (void *) temp;
717 if (image) {
718 if (unlikely(proglen + ilen > oldproglen)) {
719 pr_err("bpb_jit_compile fatal error\n");
720 kfree(addrs);
721 module_free(NULL, image);
722 return;
723 }
724 memcpy(image + proglen, temp, ilen);
725 }
726 proglen += ilen;
727 addrs[i] = proglen;
728 prog = temp;
729 }
730 /* last bpf instruction is always a RET :
731 * use it to give the cleanup instruction(s) addr
732 */
733 cleanup_addr = proglen - 8; /* jmpl; mov r_A,%o0; */
734 if (seen_or_pass0 & SEEN_MEM)
735 cleanup_addr -= 4; /* add %sp, X, %sp; */
736
737 if (image) {
738 if (proglen != oldproglen)
739 pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n",
740 proglen, oldproglen);
741 break;
742 }
743 if (proglen == oldproglen) {
744 image = module_alloc(max_t(unsigned int,
745 proglen,
746 sizeof(struct work_struct)));
747 if (!image)
748 goto out;
749 }
750 oldproglen = proglen;
751 }
752
753 if (bpf_jit_enable > 1)
754 pr_err("flen=%d proglen=%u pass=%d image=%p\n",
755 flen, proglen, pass, image);
756
757 if (image) {
758 if (bpf_jit_enable > 1)
759 print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
760 16, 1, image, proglen, false);
761 bpf_flush_icache(image, image + proglen);
762 fp->bpf_func = (void *)image;
763 }
764out:
765 kfree(addrs);
766 return;
767}
768
769static void jit_free_defer(struct work_struct *arg)
770{
771 module_free(NULL, arg);
772}
773
774/* run from softirq, we must use a work_struct to call
775 * module_free() from process context
776 */
777void bpf_jit_free(struct sk_filter *fp)
778{
779 if (fp->bpf_func != sk_run_filter) {
780 struct work_struct *work = (struct work_struct *)fp->bpf_func;
781
782 INIT_WORK(work, jit_free_defer);
783 schedule_work(work);
784 }
785}