diff options
author | Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | 2016-06-22 12:25:07 -0400 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2016-06-24 01:17:57 -0400 |
commit | 156d0e290e969caba25f1851c52417c14d141b24 (patch) | |
tree | a232dcd99242656e3e935e44d0fa6649618f58ba | |
parent | 6ac0ba5a4f82b40b4f6b3a75e7e4f0a15a3d7b9b (diff) |
powerpc/ebpf/jit: Implement JIT compiler for extended BPF
PPC64 eBPF JIT compiler.
Enable with:
echo 1 > /proc/sys/net/core/bpf_jit_enable
or
echo 2 > /proc/sys/net/core/bpf_jit_enable
... to see the generated JIT code. This can further be processed with
tools/net/bpf_jit_disasm.
With CONFIG_TEST_BPF=m and 'modprobe test_bpf':
test_bpf: Summary: 305 PASSED, 0 FAILED, [297/297 JIT'ed]
... on both ppc64 BE and LE.
The details of the approach are documented through various comments in
the code.
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r-- | arch/powerpc/Kconfig | 3 | ||||
-rw-r--r-- | arch/powerpc/include/asm/asm-compat.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/ppc-opcode.h | 20 | ||||
-rw-r--r-- | arch/powerpc/net/Makefile | 4 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit.h | 53 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit64.h | 102 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_asm64.S | 180 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp64.c | 954 |
8 files changed, 1315 insertions, 3 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 01f7464d9fea..ee82f9a09a85 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -128,7 +128,8 @@ config PPC | |||
128 | select IRQ_FORCED_THREADING | 128 | select IRQ_FORCED_THREADING |
129 | select HAVE_RCU_TABLE_FREE if SMP | 129 | select HAVE_RCU_TABLE_FREE if SMP |
130 | select HAVE_SYSCALL_TRACEPOINTS | 130 | select HAVE_SYSCALL_TRACEPOINTS |
131 | select HAVE_CBPF_JIT | 131 | select HAVE_CBPF_JIT if !PPC64 |
132 | select HAVE_EBPF_JIT if PPC64 | ||
132 | select HAVE_ARCH_JUMP_LABEL | 133 | select HAVE_ARCH_JUMP_LABEL |
133 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 134 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
134 | select ARCH_HAS_GCOV_PROFILE_ALL | 135 | select ARCH_HAS_GCOV_PROFILE_ALL |
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index dc85dcb891cf..cee3aa087653 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h | |||
@@ -36,11 +36,13 @@ | |||
36 | #define PPC_MIN_STKFRM 112 | 36 | #define PPC_MIN_STKFRM 112 |
37 | 37 | ||
38 | #ifdef __BIG_ENDIAN__ | 38 | #ifdef __BIG_ENDIAN__ |
39 | #define LHZX_BE stringify_in_c(lhzx) | ||
39 | #define LWZX_BE stringify_in_c(lwzx) | 40 | #define LWZX_BE stringify_in_c(lwzx) |
40 | #define LDX_BE stringify_in_c(ldx) | 41 | #define LDX_BE stringify_in_c(ldx) |
41 | #define STWX_BE stringify_in_c(stwx) | 42 | #define STWX_BE stringify_in_c(stwx) |
42 | #define STDX_BE stringify_in_c(stdx) | 43 | #define STDX_BE stringify_in_c(stdx) |
43 | #else | 44 | #else |
45 | #define LHZX_BE stringify_in_c(lhbrx) | ||
44 | #define LWZX_BE stringify_in_c(lwbrx) | 46 | #define LWZX_BE stringify_in_c(lwbrx) |
45 | #define LDX_BE stringify_in_c(ldbrx) | 47 | #define LDX_BE stringify_in_c(ldbrx) |
46 | #define STWX_BE stringify_in_c(stwbrx) | 48 | #define STWX_BE stringify_in_c(stwbrx) |
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index fd8d640a8e28..6a77d1304d4d 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h | |||
@@ -142,9 +142,11 @@ | |||
142 | #define PPC_INST_ISEL 0x7c00001e | 142 | #define PPC_INST_ISEL 0x7c00001e |
143 | #define PPC_INST_ISEL_MASK 0xfc00003e | 143 | #define PPC_INST_ISEL_MASK 0xfc00003e |
144 | #define PPC_INST_LDARX 0x7c0000a8 | 144 | #define PPC_INST_LDARX 0x7c0000a8 |
145 | #define PPC_INST_STDCX 0x7c0001ad | ||
145 | #define PPC_INST_LSWI 0x7c0004aa | 146 | #define PPC_INST_LSWI 0x7c0004aa |
146 | #define PPC_INST_LSWX 0x7c00042a | 147 | #define PPC_INST_LSWX 0x7c00042a |
147 | #define PPC_INST_LWARX 0x7c000028 | 148 | #define PPC_INST_LWARX 0x7c000028 |
149 | #define PPC_INST_STWCX 0x7c00012d | ||
148 | #define PPC_INST_LWSYNC 0x7c2004ac | 150 | #define PPC_INST_LWSYNC 0x7c2004ac |
149 | #define PPC_INST_SYNC 0x7c0004ac | 151 | #define PPC_INST_SYNC 0x7c0004ac |
150 | #define PPC_INST_SYNC_MASK 0xfc0007fe | 152 | #define PPC_INST_SYNC_MASK 0xfc0007fe |
@@ -211,8 +213,11 @@ | |||
211 | #define PPC_INST_LBZ 0x88000000 | 213 | #define PPC_INST_LBZ 0x88000000 |
212 | #define PPC_INST_LD 0xe8000000 | 214 | #define PPC_INST_LD 0xe8000000 |
213 | #define PPC_INST_LHZ 0xa0000000 | 215 | #define PPC_INST_LHZ 0xa0000000 |
214 | #define PPC_INST_LHBRX 0x7c00062c | ||
215 | #define PPC_INST_LWZ 0x80000000 | 216 | #define PPC_INST_LWZ 0x80000000 |
217 | #define PPC_INST_LHBRX 0x7c00062c | ||
218 | #define PPC_INST_LDBRX 0x7c000428 | ||
219 | #define PPC_INST_STB 0x98000000 | ||
220 | #define PPC_INST_STH 0xb0000000 | ||
216 | #define PPC_INST_STD 0xf8000000 | 221 | #define PPC_INST_STD 0xf8000000 |
217 | #define PPC_INST_STDU 0xf8000001 | 222 | #define PPC_INST_STDU 0xf8000001 |
218 | #define PPC_INST_STW 0x90000000 | 223 | #define PPC_INST_STW 0x90000000 |
@@ -221,22 +226,34 @@ | |||
221 | #define PPC_INST_MTLR 0x7c0803a6 | 226 | #define PPC_INST_MTLR 0x7c0803a6 |
222 | #define PPC_INST_CMPWI 0x2c000000 | 227 | #define PPC_INST_CMPWI 0x2c000000 |
223 | #define PPC_INST_CMPDI 0x2c200000 | 228 | #define PPC_INST_CMPDI 0x2c200000 |
229 | #define PPC_INST_CMPW 0x7c000000 | ||
230 | #define PPC_INST_CMPD 0x7c200000 | ||
224 | #define PPC_INST_CMPLW 0x7c000040 | 231 | #define PPC_INST_CMPLW 0x7c000040 |
232 | #define PPC_INST_CMPLD 0x7c200040 | ||
225 | #define PPC_INST_CMPLWI 0x28000000 | 233 | #define PPC_INST_CMPLWI 0x28000000 |
234 | #define PPC_INST_CMPLDI 0x28200000 | ||
226 | #define PPC_INST_ADDI 0x38000000 | 235 | #define PPC_INST_ADDI 0x38000000 |
227 | #define PPC_INST_ADDIS 0x3c000000 | 236 | #define PPC_INST_ADDIS 0x3c000000 |
228 | #define PPC_INST_ADD 0x7c000214 | 237 | #define PPC_INST_ADD 0x7c000214 |
229 | #define PPC_INST_SUB 0x7c000050 | 238 | #define PPC_INST_SUB 0x7c000050 |
230 | #define PPC_INST_BLR 0x4e800020 | 239 | #define PPC_INST_BLR 0x4e800020 |
231 | #define PPC_INST_BLRL 0x4e800021 | 240 | #define PPC_INST_BLRL 0x4e800021 |
241 | #define PPC_INST_MULLD 0x7c0001d2 | ||
232 | #define PPC_INST_MULLW 0x7c0001d6 | 242 | #define PPC_INST_MULLW 0x7c0001d6 |
233 | #define PPC_INST_MULHWU 0x7c000016 | 243 | #define PPC_INST_MULHWU 0x7c000016 |
234 | #define PPC_INST_MULLI 0x1c000000 | 244 | #define PPC_INST_MULLI 0x1c000000 |
235 | #define PPC_INST_DIVWU 0x7c000396 | 245 | #define PPC_INST_DIVWU 0x7c000396 |
246 | #define PPC_INST_DIVD 0x7c0003d2 | ||
236 | #define PPC_INST_RLWINM 0x54000000 | 247 | #define PPC_INST_RLWINM 0x54000000 |
248 | #define PPC_INST_RLWIMI 0x50000000 | ||
249 | #define PPC_INST_RLDICL 0x78000000 | ||
237 | #define PPC_INST_RLDICR 0x78000004 | 250 | #define PPC_INST_RLDICR 0x78000004 |
238 | #define PPC_INST_SLW 0x7c000030 | 251 | #define PPC_INST_SLW 0x7c000030 |
252 | #define PPC_INST_SLD 0x7c000036 | ||
239 | #define PPC_INST_SRW 0x7c000430 | 253 | #define PPC_INST_SRW 0x7c000430 |
254 | #define PPC_INST_SRD 0x7c000436 | ||
255 | #define PPC_INST_SRAD 0x7c000634 | ||
256 | #define PPC_INST_SRADI 0x7c000674 | ||
240 | #define PPC_INST_AND 0x7c000038 | 257 | #define PPC_INST_AND 0x7c000038 |
241 | #define PPC_INST_ANDDOT 0x7c000039 | 258 | #define PPC_INST_ANDDOT 0x7c000039 |
242 | #define PPC_INST_OR 0x7c000378 | 259 | #define PPC_INST_OR 0x7c000378 |
@@ -247,6 +264,7 @@ | |||
247 | #define PPC_INST_XORI 0x68000000 | 264 | #define PPC_INST_XORI 0x68000000 |
248 | #define PPC_INST_XORIS 0x6c000000 | 265 | #define PPC_INST_XORIS 0x6c000000 |
249 | #define PPC_INST_NEG 0x7c0000d0 | 266 | #define PPC_INST_NEG 0x7c0000d0 |
267 | #define PPC_INST_EXTSW 0x7c0007b4 | ||
250 | #define PPC_INST_BRANCH 0x48000000 | 268 | #define PPC_INST_BRANCH 0x48000000 |
251 | #define PPC_INST_BRANCH_COND 0x40800000 | 269 | #define PPC_INST_BRANCH_COND 0x40800000 |
252 | #define PPC_INST_LBZCIX 0x7c0006aa | 270 | #define PPC_INST_LBZCIX 0x7c0006aa |
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 1306a58ac541..c1ff16a6eb51 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile | |||
@@ -1,4 +1,8 @@ | |||
1 | # | 1 | # |
2 | # Arch-specific network modules | 2 | # Arch-specific network modules |
3 | # | 3 | # |
4 | ifeq ($(CONFIG_PPC64),y) | ||
5 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o | ||
6 | else | ||
4 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o | 7 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o |
8 | endif | ||
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 313cfafde9bb..d5301b6f20d0 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h | |||
@@ -2,6 +2,7 @@ | |||
2 | * bpf_jit.h: BPF JIT compiler for PPC | 2 | * bpf_jit.h: BPF JIT compiler for PPC |
3 | * | 3 | * |
4 | * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation | 4 | * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation |
5 | * 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | ||
5 | * | 6 | * |
6 | * This program is free software; you can redistribute it and/or | 7 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License | 8 | * modify it under the terms of the GNU General Public License |
@@ -13,7 +14,9 @@ | |||
13 | 14 | ||
14 | #ifndef __ASSEMBLY__ | 15 | #ifndef __ASSEMBLY__ |
15 | 16 | ||
16 | #ifdef CONFIG_PPC64 | 17 | #include <asm/types.h> |
18 | |||
19 | #ifdef PPC64_ELF_ABI_v1 | ||
17 | #define FUNCTION_DESCR_SIZE 24 | 20 | #define FUNCTION_DESCR_SIZE 24 |
18 | #else | 21 | #else |
19 | #define FUNCTION_DESCR_SIZE 0 | 22 | #define FUNCTION_DESCR_SIZE 0 |
@@ -52,6 +55,10 @@ | |||
52 | ___PPC_RA(base) | IMM_L(i)) | 55 | ___PPC_RA(base) | IMM_L(i)) |
53 | #define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \ | 56 | #define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \ |
54 | ___PPC_RA(base) | IMM_L(i)) | 57 | ___PPC_RA(base) | IMM_L(i)) |
58 | #define PPC_STH(r, base, i) EMIT(PPC_INST_STH | ___PPC_RS(r) | \ | ||
59 | ___PPC_RA(base) | IMM_L(i)) | ||
60 | #define PPC_STB(r, base, i) EMIT(PPC_INST_STB | ___PPC_RS(r) | \ | ||
61 | ___PPC_RA(base) | IMM_L(i)) | ||
55 | 62 | ||
56 | #define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ | 63 | #define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ |
57 | ___PPC_RA(base) | IMM_L(i)) | 64 | ___PPC_RA(base) | IMM_L(i)) |
@@ -63,6 +70,19 @@ | |||
63 | ___PPC_RA(base) | IMM_L(i)) | 70 | ___PPC_RA(base) | IMM_L(i)) |
64 | #define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \ | 71 | #define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \ |
65 | ___PPC_RA(base) | ___PPC_RB(b)) | 72 | ___PPC_RA(base) | ___PPC_RB(b)) |
73 | #define PPC_LDBRX(r, base, b) EMIT(PPC_INST_LDBRX | ___PPC_RT(r) | \ | ||
74 | ___PPC_RA(base) | ___PPC_RB(b)) | ||
75 | |||
76 | #define PPC_BPF_LDARX(t, a, b, eh) EMIT(PPC_INST_LDARX | ___PPC_RT(t) | \ | ||
77 | ___PPC_RA(a) | ___PPC_RB(b) | \ | ||
78 | __PPC_EH(eh)) | ||
79 | #define PPC_BPF_LWARX(t, a, b, eh) EMIT(PPC_INST_LWARX | ___PPC_RT(t) | \ | ||
80 | ___PPC_RA(a) | ___PPC_RB(b) | \ | ||
81 | __PPC_EH(eh)) | ||
82 | #define PPC_BPF_STWCX(s, a, b) EMIT(PPC_INST_STWCX | ___PPC_RS(s) | \ | ||
83 | ___PPC_RA(a) | ___PPC_RB(b)) | ||
84 | #define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ | ||
85 | ___PPC_RA(a) | ___PPC_RB(b)) | ||
66 | 86 | ||
67 | #ifdef CONFIG_PPC64 | 87 | #ifdef CONFIG_PPC64 |
68 | #define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0) | 88 | #define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0) |
@@ -76,14 +96,23 @@ | |||
76 | 96 | ||
77 | #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) | 97 | #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) |
78 | #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) | 98 | #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) |
99 | #define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ | ||
100 | ___PPC_RB(b)) | ||
101 | #define PPC_CMPD(a, b) EMIT(PPC_INST_CMPD | ___PPC_RA(a) | \ | ||
102 | ___PPC_RB(b)) | ||
79 | #define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i)) | 103 | #define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i)) |
104 | #define PPC_CMPLDI(a, i) EMIT(PPC_INST_CMPLDI | ___PPC_RA(a) | IMM_L(i)) | ||
80 | #define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \ | 105 | #define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \ |
81 | ___PPC_RB(b)) | 106 | ___PPC_RB(b)) |
107 | #define PPC_CMPLD(a, b) EMIT(PPC_INST_CMPLD | ___PPC_RA(a) | \ | ||
108 | ___PPC_RB(b)) | ||
82 | 109 | ||
83 | #define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \ | 110 | #define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \ |
84 | ___PPC_RB(a) | ___PPC_RA(b)) | 111 | ___PPC_RB(a) | ___PPC_RA(b)) |
85 | #define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \ | 112 | #define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \ |
86 | ___PPC_RA(a) | ___PPC_RB(b)) | 113 | ___PPC_RA(a) | ___PPC_RB(b)) |
114 | #define PPC_MULD(d, a, b) EMIT(PPC_INST_MULLD | ___PPC_RT(d) | \ | ||
115 | ___PPC_RA(a) | ___PPC_RB(b)) | ||
87 | #define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \ | 116 | #define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \ |
88 | ___PPC_RA(a) | ___PPC_RB(b)) | 117 | ___PPC_RA(a) | ___PPC_RB(b)) |
89 | #define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \ | 118 | #define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \ |
@@ -92,6 +121,8 @@ | |||
92 | ___PPC_RA(a) | IMM_L(i)) | 121 | ___PPC_RA(a) | IMM_L(i)) |
93 | #define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ | 122 | #define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ |
94 | ___PPC_RA(a) | ___PPC_RB(b)) | 123 | ___PPC_RA(a) | ___PPC_RB(b)) |
124 | #define PPC_DIVD(d, a, b) EMIT(PPC_INST_DIVD | ___PPC_RT(d) | \ | ||
125 | ___PPC_RA(a) | ___PPC_RB(b)) | ||
95 | #define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ | 126 | #define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ |
96 | ___PPC_RS(a) | ___PPC_RB(b)) | 127 | ___PPC_RS(a) | ___PPC_RB(b)) |
97 | #define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \ | 128 | #define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \ |
@@ -100,6 +131,7 @@ | |||
100 | ___PPC_RS(a) | ___PPC_RB(b)) | 131 | ___PPC_RS(a) | ___PPC_RB(b)) |
101 | #define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \ | 132 | #define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \ |
102 | ___PPC_RS(a) | ___PPC_RB(b)) | 133 | ___PPC_RS(a) | ___PPC_RB(b)) |
134 | #define PPC_MR(d, a) PPC_OR(d, a, a) | ||
103 | #define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \ | 135 | #define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \ |
104 | ___PPC_RS(a) | IMM_L(i)) | 136 | ___PPC_RS(a) | IMM_L(i)) |
105 | #define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \ | 137 | #define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \ |
@@ -110,13 +142,30 @@ | |||
110 | ___PPC_RS(a) | IMM_L(i)) | 142 | ___PPC_RS(a) | IMM_L(i)) |
111 | #define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \ | 143 | #define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \ |
112 | ___PPC_RS(a) | IMM_L(i)) | 144 | ___PPC_RS(a) | IMM_L(i)) |
145 | #define PPC_EXTSW(d, a) EMIT(PPC_INST_EXTSW | ___PPC_RA(d) | \ | ||
146 | ___PPC_RS(a)) | ||
113 | #define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \ | 147 | #define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \ |
114 | ___PPC_RS(a) | ___PPC_RB(s)) | 148 | ___PPC_RS(a) | ___PPC_RB(s)) |
149 | #define PPC_SLD(d, a, s) EMIT(PPC_INST_SLD | ___PPC_RA(d) | \ | ||
150 | ___PPC_RS(a) | ___PPC_RB(s)) | ||
115 | #define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \ | 151 | #define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \ |
116 | ___PPC_RS(a) | ___PPC_RB(s)) | 152 | ___PPC_RS(a) | ___PPC_RB(s)) |
153 | #define PPC_SRD(d, a, s) EMIT(PPC_INST_SRD | ___PPC_RA(d) | \ | ||
154 | ___PPC_RS(a) | ___PPC_RB(s)) | ||
155 | #define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \ | ||
156 | ___PPC_RS(a) | ___PPC_RB(s)) | ||
157 | #define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \ | ||
158 | ___PPC_RS(a) | __PPC_SH(i) | \ | ||
159 | (((i) & 0x20) >> 4)) | ||
117 | #define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ | 160 | #define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ |
118 | ___PPC_RS(a) | __PPC_SH(i) | \ | 161 | ___PPC_RS(a) | __PPC_SH(i) | \ |
119 | __PPC_MB(mb) | __PPC_ME(me)) | 162 | __PPC_MB(mb) | __PPC_ME(me)) |
163 | #define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \ | ||
164 | ___PPC_RS(a) | __PPC_SH(i) | \ | ||
165 | __PPC_MB(mb) | __PPC_ME(me)) | ||
166 | #define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \ | ||
167 | ___PPC_RS(a) | __PPC_SH(i) | \ | ||
168 | __PPC_MB64(mb) | (((i) & 0x20) >> 4)) | ||
120 | #define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ | 169 | #define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ |
121 | ___PPC_RS(a) | __PPC_SH(i) | \ | 170 | ___PPC_RS(a) | __PPC_SH(i) | \ |
122 | __PPC_ME64(me) | (((i) & 0x20) >> 4)) | 171 | __PPC_ME64(me) | (((i) & 0x20) >> 4)) |
@@ -127,6 +176,8 @@ | |||
127 | #define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31) | 176 | #define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31) |
128 | /* sldi = rldicr Rx, Ry, n, 63-n */ | 177 | /* sldi = rldicr Rx, Ry, n, 63-n */ |
129 | #define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i)) | 178 | #define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i)) |
179 | /* sldi = rldicl Rx, Ry, 64-n, n */ | ||
180 | #define PPC_SRDI(d, a, i) PPC_RLDICL(d, a, 64-(i), i) | ||
130 | 181 | ||
131 | #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) | 182 | #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) |
132 | 183 | ||
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h new file mode 100644 index 000000000000..5046d6f65c02 --- /dev/null +++ b/arch/powerpc/net/bpf_jit64.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * bpf_jit64.h: BPF JIT compiler for PPC64 | ||
3 | * | ||
4 | * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | ||
5 | * IBM Corporation | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | */ | ||
12 | #ifndef _BPF_JIT64_H | ||
13 | #define _BPF_JIT64_H | ||
14 | |||
15 | #include "bpf_jit.h" | ||
16 | |||
17 | /* | ||
18 | * Stack layout: | ||
19 | * | ||
20 | * [ prev sp ] <------------- | ||
21 | * [ nv gpr save area ] 8*8 | | ||
22 | * fp (r31) --> [ ebpf stack space ] 512 | | ||
23 | * [ local/tmp var space ] 16 | | ||
24 | * [ frame header ] 32/112 | | ||
25 | * sp (r1) ---> [ stack pointer ] -------------- | ||
26 | */ | ||
27 | |||
28 | /* for bpf JIT code internal usage */ | ||
29 | #define BPF_PPC_STACK_LOCALS 16 | ||
30 | /* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ | ||
31 | #define BPF_PPC_STACK_SAVE (8*8) | ||
32 | /* Ensure this is quadword aligned */ | ||
33 | #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS + \ | ||
34 | MAX_BPF_STACK + BPF_PPC_STACK_SAVE) | ||
35 | |||
36 | #ifndef __ASSEMBLY__ | ||
37 | |||
38 | /* BPF register usage */ | ||
39 | #define SKB_HLEN_REG (MAX_BPF_REG + 0) | ||
40 | #define SKB_DATA_REG (MAX_BPF_REG + 1) | ||
41 | #define TMP_REG_1 (MAX_BPF_REG + 2) | ||
42 | #define TMP_REG_2 (MAX_BPF_REG + 3) | ||
43 | |||
44 | /* BPF to ppc register mappings */ | ||
45 | static const int b2p[] = { | ||
46 | /* function return value */ | ||
47 | [BPF_REG_0] = 8, | ||
48 | /* function arguments */ | ||
49 | [BPF_REG_1] = 3, | ||
50 | [BPF_REG_2] = 4, | ||
51 | [BPF_REG_3] = 5, | ||
52 | [BPF_REG_4] = 6, | ||
53 | [BPF_REG_5] = 7, | ||
54 | /* non volatile registers */ | ||
55 | [BPF_REG_6] = 27, | ||
56 | [BPF_REG_7] = 28, | ||
57 | [BPF_REG_8] = 29, | ||
58 | [BPF_REG_9] = 30, | ||
59 | /* frame pointer aka BPF_REG_10 */ | ||
60 | [BPF_REG_FP] = 31, | ||
61 | /* eBPF jit internal registers */ | ||
62 | [SKB_HLEN_REG] = 25, | ||
63 | [SKB_DATA_REG] = 26, | ||
64 | [TMP_REG_1] = 9, | ||
65 | [TMP_REG_2] = 10 | ||
66 | }; | ||
67 | |||
68 | /* Assembly helpers */ | ||
69 | #define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ | ||
70 | u64 func##_negative_offset(u64 r3, u64 r4); \ | ||
71 | u64 func##_positive_offset(u64 r3, u64 r4); | ||
72 | |||
73 | DECLARE_LOAD_FUNC(sk_load_word); | ||
74 | DECLARE_LOAD_FUNC(sk_load_half); | ||
75 | DECLARE_LOAD_FUNC(sk_load_byte); | ||
76 | |||
77 | #define CHOOSE_LOAD_FUNC(imm, func) \ | ||
78 | (imm < 0 ? \ | ||
79 | (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \ | ||
80 | func##_positive_offset) | ||
81 | |||
82 | #define SEEN_FUNC 0x1000 /* might call external helpers */ | ||
83 | #define SEEN_STACK 0x2000 /* uses BPF stack */ | ||
84 | #define SEEN_SKB 0x4000 /* uses sk_buff */ | ||
85 | |||
86 | struct codegen_context { | ||
87 | /* | ||
88 | * This is used to track register usage as well | ||
89 | * as calls to external helpers. | ||
90 | * - register usage is tracked with corresponding | ||
91 | * bits (r3-r10 and r25-r31) | ||
92 | * - rest of the bits can be used to track other | ||
93 | * things -- for now, we use bits 16 to 23 | ||
94 | * encoded in SEEN_* macros above | ||
95 | */ | ||
96 | unsigned int seen; | ||
97 | unsigned int idx; | ||
98 | }; | ||
99 | |||
100 | #endif /* !__ASSEMBLY__ */ | ||
101 | |||
102 | #endif | ||
diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S new file mode 100644 index 000000000000..7e4c51430b84 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_asm64.S | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * bpf_jit_asm64.S: Packet/header access helper functions | ||
3 | * for PPC64 BPF compiler. | ||
4 | * | ||
5 | * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | ||
6 | * IBM Corporation | ||
7 | * | ||
8 | * Based on bpf_jit_asm.S by Matt Evans | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; version 2 | ||
13 | * of the License. | ||
14 | */ | ||
15 | |||
16 | #include <asm/ppc_asm.h> | ||
17 | #include <asm/ptrace.h> | ||
18 | #include "bpf_jit64.h" | ||
19 | |||
20 | /* | ||
21 | * All of these routines are called directly from generated code, | ||
22 | * with the below register usage: | ||
23 | * r27 skb pointer (ctx) | ||
24 | * r25 skb header length | ||
25 | * r26 skb->data pointer | ||
26 | * r4 offset | ||
27 | * | ||
28 | * Result is passed back in: | ||
29 | * r8 data read in host endian format (accumulator) | ||
30 | * | ||
31 | * r9 is used as a temporary register | ||
32 | */ | ||
33 | |||
34 | #define r_skb r27 | ||
35 | #define r_hlen r25 | ||
36 | #define r_data r26 | ||
37 | #define r_off r4 | ||
38 | #define r_val r8 | ||
39 | #define r_tmp r9 | ||
40 | |||
41 | _GLOBAL_TOC(sk_load_word) | ||
42 | cmpdi r_off, 0 | ||
43 | blt bpf_slow_path_word_neg | ||
44 | b sk_load_word_positive_offset | ||
45 | |||
46 | _GLOBAL_TOC(sk_load_word_positive_offset) | ||
47 | /* Are we accessing past headlen? */ | ||
48 | subi r_tmp, r_hlen, 4 | ||
49 | cmpd r_tmp, r_off | ||
50 | blt bpf_slow_path_word | ||
51 | /* Nope, just hitting the header. cr0 here is eq or gt! */ | ||
52 | LWZX_BE r_val, r_data, r_off | ||
53 | blr /* Return success, cr0 != LT */ | ||
54 | |||
55 | _GLOBAL_TOC(sk_load_half) | ||
56 | cmpdi r_off, 0 | ||
57 | blt bpf_slow_path_half_neg | ||
58 | b sk_load_half_positive_offset | ||
59 | |||
60 | _GLOBAL_TOC(sk_load_half_positive_offset) | ||
61 | subi r_tmp, r_hlen, 2 | ||
62 | cmpd r_tmp, r_off | ||
63 | blt bpf_slow_path_half | ||
64 | LHZX_BE r_val, r_data, r_off | ||
65 | blr | ||
66 | |||
67 | _GLOBAL_TOC(sk_load_byte) | ||
68 | cmpdi r_off, 0 | ||
69 | blt bpf_slow_path_byte_neg | ||
70 | b sk_load_byte_positive_offset | ||
71 | |||
72 | _GLOBAL_TOC(sk_load_byte_positive_offset) | ||
73 | cmpd r_hlen, r_off | ||
74 | ble bpf_slow_path_byte | ||
75 | lbzx r_val, r_data, r_off | ||
76 | blr | ||
77 | |||
78 | /* | ||
79 | * Call out to skb_copy_bits: | ||
80 | * Allocate a new stack frame here to remain ABI-compliant in | ||
81 | * stashing LR. | ||
82 | */ | ||
83 | #define bpf_slow_path_common(SIZE) \ | ||
84 | mflr r0; \ | ||
85 | std r0, PPC_LR_STKOFF(r1); \ | ||
86 | stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \ | ||
87 | mr r3, r_skb; \ | ||
88 | /* r4 = r_off as passed */ \ | ||
89 | addi r5, r1, STACK_FRAME_MIN_SIZE; \ | ||
90 | li r6, SIZE; \ | ||
91 | bl skb_copy_bits; \ | ||
92 | nop; \ | ||
93 | /* save r5 */ \ | ||
94 | addi r5, r1, STACK_FRAME_MIN_SIZE; \ | ||
95 | /* r3 = 0 on success */ \ | ||
96 | addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \ | ||
97 | ld r0, PPC_LR_STKOFF(r1); \ | ||
98 | mtlr r0; \ | ||
99 | cmpdi r3, 0; \ | ||
100 | blt bpf_error; /* cr0 = LT */ | ||
101 | |||
102 | bpf_slow_path_word: | ||
103 | bpf_slow_path_common(4) | ||
104 | /* Data value is on stack, and cr0 != LT */ | ||
105 | LWZX_BE r_val, 0, r5 | ||
106 | blr | ||
107 | |||
108 | bpf_slow_path_half: | ||
109 | bpf_slow_path_common(2) | ||
110 | LHZX_BE r_val, 0, r5 | ||
111 | blr | ||
112 | |||
113 | bpf_slow_path_byte: | ||
114 | bpf_slow_path_common(1) | ||
115 | lbzx r_val, 0, r5 | ||
116 | blr | ||
117 | |||
118 | /* | ||
119 | * Call out to bpf_internal_load_pointer_neg_helper | ||
120 | */ | ||
121 | #define sk_negative_common(SIZE) \ | ||
122 | mflr r0; \ | ||
123 | std r0, PPC_LR_STKOFF(r1); \ | ||
124 | stdu r1, -STACK_FRAME_MIN_SIZE(r1); \ | ||
125 | mr r3, r_skb; \ | ||
126 | /* r4 = r_off, as passed */ \ | ||
127 | li r5, SIZE; \ | ||
128 | bl bpf_internal_load_pointer_neg_helper; \ | ||
129 | nop; \ | ||
130 | addi r1, r1, STACK_FRAME_MIN_SIZE; \ | ||
131 | ld r0, PPC_LR_STKOFF(r1); \ | ||
132 | mtlr r0; \ | ||
133 | /* R3 != 0 on success */ \ | ||
134 | cmpldi r3, 0; \ | ||
135 | beq bpf_error_slow; /* cr0 = EQ */ | ||
136 | |||
137 | bpf_slow_path_word_neg: | ||
138 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
139 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
140 | blt bpf_error /* cr0 = LT */ | ||
141 | b sk_load_word_negative_offset | ||
142 | |||
143 | _GLOBAL_TOC(sk_load_word_negative_offset) | ||
144 | sk_negative_common(4) | ||
145 | LWZX_BE r_val, 0, r3 | ||
146 | blr | ||
147 | |||
148 | bpf_slow_path_half_neg: | ||
149 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
150 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
151 | blt bpf_error /* cr0 = LT */ | ||
152 | b sk_load_half_negative_offset | ||
153 | |||
154 | _GLOBAL_TOC(sk_load_half_negative_offset) | ||
155 | sk_negative_common(2) | ||
156 | LHZX_BE r_val, 0, r3 | ||
157 | blr | ||
158 | |||
159 | bpf_slow_path_byte_neg: | ||
160 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
161 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
162 | blt bpf_error /* cr0 = LT */ | ||
163 | b sk_load_byte_negative_offset | ||
164 | |||
165 | _GLOBAL_TOC(sk_load_byte_negative_offset) | ||
166 | sk_negative_common(1) | ||
167 | lbzx r_val, 0, r3 | ||
168 | blr | ||
169 | |||
170 | bpf_error_slow: | ||
171 | /* fabricate a cr0 = lt */ | ||
172 | li r_tmp, -1 | ||
173 | cmpdi r_tmp, 0 | ||
174 | bpf_error: | ||
175 | /* | ||
176 | * Entered with cr0 = lt | ||
177 | * Generated code will 'blt epilogue', returning 0. | ||
178 | */ | ||
179 | li r_val, 0 | ||
180 | blr | ||
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c new file mode 100644 index 000000000000..6073b78516f6 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp64.c | |||
@@ -0,0 +1,954 @@ | |||
1 | /* | ||
2 | * bpf_jit_comp64.c: eBPF JIT compiler | ||
3 | * | ||
4 | * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | ||
5 | * IBM Corporation | ||
6 | * | ||
7 | * Based on the powerpc classic BPF JIT compiler by Matt Evans | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; version 2 | ||
12 | * of the License. | ||
13 | */ | ||
14 | #include <linux/moduleloader.h> | ||
15 | #include <asm/cacheflush.h> | ||
16 | #include <linux/netdevice.h> | ||
17 | #include <linux/filter.h> | ||
18 | #include <linux/if_vlan.h> | ||
19 | #include <asm/kprobes.h> | ||
20 | |||
21 | #include "bpf_jit64.h" | ||
22 | |||
23 | int bpf_jit_enable __read_mostly; | ||
24 | |||
25 | static void bpf_jit_fill_ill_insns(void *area, unsigned int size) | ||
26 | { | ||
27 | int *p = area; | ||
28 | |||
29 | /* Fill whole space with trap instructions */ | ||
30 | while (p < (int *)((char *)area + size)) | ||
31 | *p++ = BREAKPOINT_INSTRUCTION; | ||
32 | } | ||
33 | |||
34 | static inline void bpf_flush_icache(void *start, void *end) | ||
35 | { | ||
36 | smp_wmb(); | ||
37 | flush_icache_range((unsigned long)start, (unsigned long)end); | ||
38 | } | ||
39 | |||
40 | static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) | ||
41 | { | ||
42 | return (ctx->seen & (1 << (31 - b2p[i]))); | ||
43 | } | ||
44 | |||
45 | static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) | ||
46 | { | ||
47 | ctx->seen |= (1 << (31 - b2p[i])); | ||
48 | } | ||
49 | |||
50 | static inline bool bpf_has_stack_frame(struct codegen_context *ctx) | ||
51 | { | ||
52 | /* | ||
53 | * We only need a stack frame if: | ||
54 | * - we call other functions (kernel helpers), or | ||
55 | * - the bpf program uses its stack area | ||
56 | * The latter condition is deduced from the usage of BPF_REG_FP | ||
57 | */ | ||
58 | return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); | ||
59 | } | ||
60 | |||
61 | static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) | ||
62 | { | ||
63 | /* | ||
64 | * Load skb->len and skb->data_len | ||
65 | * r3 points to skb | ||
66 | */ | ||
67 | PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); | ||
68 | PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); | ||
69 | /* header_len = len - data_len */ | ||
70 | PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); | ||
71 | |||
72 | /* skb->data pointer */ | ||
73 | PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); | ||
74 | } | ||
75 | |||
76 | static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) | ||
77 | { | ||
78 | #ifdef PPC64_ELF_ABI_v1 | ||
79 | /* func points to the function descriptor */ | ||
80 | PPC_LI64(b2p[TMP_REG_2], func); | ||
81 | /* Load actual entry point from function descriptor */ | ||
82 | PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); | ||
83 | /* ... and move it to LR */ | ||
84 | PPC_MTLR(b2p[TMP_REG_1]); | ||
85 | /* | ||
86 | * Load TOC from function descriptor at offset 8. | ||
87 | * We can clobber r2 since we get called through a | ||
88 | * function pointer (so caller will save/restore r2) | ||
89 | * and since we don't use a TOC ourself. | ||
90 | */ | ||
91 | PPC_BPF_LL(2, b2p[TMP_REG_2], 8); | ||
92 | #else | ||
93 | /* We can clobber r12 */ | ||
94 | PPC_FUNC_ADDR(12, func); | ||
95 | PPC_MTLR(12); | ||
96 | #endif | ||
97 | PPC_BLRL(); | ||
98 | } | ||
99 | |||
100 | static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) | ||
101 | { | ||
102 | int i; | ||
103 | bool new_stack_frame = bpf_has_stack_frame(ctx); | ||
104 | |||
105 | if (new_stack_frame) { | ||
106 | /* | ||
107 | * We need a stack frame, but we don't necessarily need to | ||
108 | * save/restore LR unless we call other functions | ||
109 | */ | ||
110 | if (ctx->seen & SEEN_FUNC) { | ||
111 | EMIT(PPC_INST_MFLR | __PPC_RT(R0)); | ||
112 | PPC_BPF_STL(0, 1, PPC_LR_STKOFF); | ||
113 | } | ||
114 | |||
115 | PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME); | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * Back up non-volatile regs -- BPF registers 6-10 | ||
120 | * If we haven't created our own stack frame, we save these | ||
121 | * in the protected zone below the previous stack frame | ||
122 | */ | ||
123 | for (i = BPF_REG_6; i <= BPF_REG_10; i++) | ||
124 | if (bpf_is_seen_register(ctx, i)) | ||
125 | PPC_BPF_STL(b2p[i], 1, | ||
126 | (new_stack_frame ? BPF_PPC_STACKFRAME : 0) - | ||
127 | (8 * (32 - b2p[i]))); | ||
128 | |||
129 | /* | ||
130 | * Save additional non-volatile regs if we cache skb | ||
131 | * Also, setup skb data | ||
132 | */ | ||
133 | if (ctx->seen & SEEN_SKB) { | ||
134 | PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, | ||
135 | BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG]))); | ||
136 | PPC_BPF_STL(b2p[SKB_DATA_REG], 1, | ||
137 | BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG]))); | ||
138 | bpf_jit_emit_skb_loads(image, ctx); | ||
139 | } | ||
140 | |||
141 | /* Setup frame pointer to point to the bpf stack area */ | ||
142 | if (bpf_is_seen_register(ctx, BPF_REG_FP)) | ||
143 | PPC_ADDI(b2p[BPF_REG_FP], 1, | ||
144 | BPF_PPC_STACKFRAME - BPF_PPC_STACK_SAVE); | ||
145 | } | ||
146 | |||
147 | static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) | ||
148 | { | ||
149 | int i; | ||
150 | bool new_stack_frame = bpf_has_stack_frame(ctx); | ||
151 | |||
152 | /* Move result to r3 */ | ||
153 | PPC_MR(3, b2p[BPF_REG_0]); | ||
154 | |||
155 | /* Restore NVRs */ | ||
156 | for (i = BPF_REG_6; i <= BPF_REG_10; i++) | ||
157 | if (bpf_is_seen_register(ctx, i)) | ||
158 | PPC_BPF_LL(b2p[i], 1, | ||
159 | (new_stack_frame ? BPF_PPC_STACKFRAME : 0) - | ||
160 | (8 * (32 - b2p[i]))); | ||
161 | |||
162 | /* Restore non-volatile registers used for skb cache */ | ||
163 | if (ctx->seen & SEEN_SKB) { | ||
164 | PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, | ||
165 | BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG]))); | ||
166 | PPC_BPF_LL(b2p[SKB_DATA_REG], 1, | ||
167 | BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG]))); | ||
168 | } | ||
169 | |||
170 | /* Tear down our stack frame */ | ||
171 | if (new_stack_frame) { | ||
172 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); | ||
173 | if (ctx->seen & SEEN_FUNC) { | ||
174 | PPC_BPF_LL(0, 1, PPC_LR_STKOFF); | ||
175 | PPC_MTLR(0); | ||
176 | } | ||
177 | } | ||
178 | |||
179 | PPC_BLR(); | ||
180 | } | ||
181 | |||
182 | /* Assemble the body code between the prologue & epilogue */ | ||
183 | static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, | ||
184 | struct codegen_context *ctx, | ||
185 | u32 *addrs) | ||
186 | { | ||
187 | const struct bpf_insn *insn = fp->insnsi; | ||
188 | int flen = fp->len; | ||
189 | int i; | ||
190 | |||
191 | /* Start of epilogue code - will only be valid 2nd pass onwards */ | ||
192 | u32 exit_addr = addrs[flen]; | ||
193 | |||
194 | for (i = 0; i < flen; i++) { | ||
195 | u32 code = insn[i].code; | ||
196 | u32 dst_reg = b2p[insn[i].dst_reg]; | ||
197 | u32 src_reg = b2p[insn[i].src_reg]; | ||
198 | s16 off = insn[i].off; | ||
199 | s32 imm = insn[i].imm; | ||
200 | u64 imm64; | ||
201 | u8 *func; | ||
202 | u32 true_cond; | ||
203 | int stack_local_off; | ||
204 | |||
205 | /* | ||
206 | * addrs[] maps a BPF bytecode address into a real offset from | ||
207 | * the start of the body code. | ||
208 | */ | ||
209 | addrs[i] = ctx->idx * 4; | ||
210 | |||
211 | /* | ||
212 | * As an optimization, we note down which non-volatile registers | ||
213 | * are used so that we can only save/restore those in our | ||
214 | * prologue and epilogue. We do this here regardless of whether | ||
215 | * the actual BPF instruction uses src/dst registers or not | ||
216 | * (for instance, BPF_CALL does not use them). The expectation | ||
217 | * is that those instructions will have src_reg/dst_reg set to | ||
218 | * 0. Even otherwise, we just lose some prologue/epilogue | ||
219 | * optimization but everything else should work without | ||
220 | * any issues. | ||
221 | */ | ||
222 | if (dst_reg >= 24 && dst_reg <= 31) | ||
223 | bpf_set_seen_register(ctx, insn[i].dst_reg); | ||
224 | if (src_reg >= 24 && src_reg <= 31) | ||
225 | bpf_set_seen_register(ctx, insn[i].src_reg); | ||
226 | |||
227 | switch (code) { | ||
228 | /* | ||
229 | * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG | ||
230 | */ | ||
231 | case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ | ||
232 | case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ | ||
233 | PPC_ADD(dst_reg, dst_reg, src_reg); | ||
234 | goto bpf_alu32_trunc; | ||
235 | case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ | ||
236 | case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ | ||
237 | PPC_SUB(dst_reg, dst_reg, src_reg); | ||
238 | goto bpf_alu32_trunc; | ||
239 | case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ | ||
240 | case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ | ||
241 | case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ | ||
242 | case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ | ||
243 | if (BPF_OP(code) == BPF_SUB) | ||
244 | imm = -imm; | ||
245 | if (imm) { | ||
246 | if (imm >= -32768 && imm < 32768) | ||
247 | PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); | ||
248 | else { | ||
249 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
250 | PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); | ||
251 | } | ||
252 | } | ||
253 | goto bpf_alu32_trunc; | ||
254 | case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ | ||
255 | case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ | ||
256 | if (BPF_CLASS(code) == BPF_ALU) | ||
257 | PPC_MULW(dst_reg, dst_reg, src_reg); | ||
258 | else | ||
259 | PPC_MULD(dst_reg, dst_reg, src_reg); | ||
260 | goto bpf_alu32_trunc; | ||
261 | case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ | ||
262 | case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ | ||
263 | if (imm >= -32768 && imm < 32768) | ||
264 | PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); | ||
265 | else { | ||
266 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
267 | if (BPF_CLASS(code) == BPF_ALU) | ||
268 | PPC_MULW(dst_reg, dst_reg, | ||
269 | b2p[TMP_REG_1]); | ||
270 | else | ||
271 | PPC_MULD(dst_reg, dst_reg, | ||
272 | b2p[TMP_REG_1]); | ||
273 | } | ||
274 | goto bpf_alu32_trunc; | ||
275 | case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ | ||
276 | case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ | ||
277 | PPC_CMPWI(src_reg, 0); | ||
278 | PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12); | ||
279 | PPC_LI(b2p[BPF_REG_0], 0); | ||
280 | PPC_JMP(exit_addr); | ||
281 | if (BPF_OP(code) == BPF_MOD) { | ||
282 | PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); | ||
283 | PPC_MULW(b2p[TMP_REG_1], src_reg, | ||
284 | b2p[TMP_REG_1]); | ||
285 | PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); | ||
286 | } else | ||
287 | PPC_DIVWU(dst_reg, dst_reg, src_reg); | ||
288 | goto bpf_alu32_trunc; | ||
289 | case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ | ||
290 | case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ | ||
291 | PPC_CMPDI(src_reg, 0); | ||
292 | PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12); | ||
293 | PPC_LI(b2p[BPF_REG_0], 0); | ||
294 | PPC_JMP(exit_addr); | ||
295 | if (BPF_OP(code) == BPF_MOD) { | ||
296 | PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); | ||
297 | PPC_MULD(b2p[TMP_REG_1], src_reg, | ||
298 | b2p[TMP_REG_1]); | ||
299 | PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); | ||
300 | } else | ||
301 | PPC_DIVD(dst_reg, dst_reg, src_reg); | ||
302 | break; | ||
303 | case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ | ||
304 | case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ | ||
305 | case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ | ||
306 | case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ | ||
307 | if (imm == 0) | ||
308 | return -EINVAL; | ||
309 | else if (imm == 1) | ||
310 | goto bpf_alu32_trunc; | ||
311 | |||
312 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
313 | switch (BPF_CLASS(code)) { | ||
314 | case BPF_ALU: | ||
315 | if (BPF_OP(code) == BPF_MOD) { | ||
316 | PPC_DIVWU(b2p[TMP_REG_2], dst_reg, | ||
317 | b2p[TMP_REG_1]); | ||
318 | PPC_MULW(b2p[TMP_REG_1], | ||
319 | b2p[TMP_REG_1], | ||
320 | b2p[TMP_REG_2]); | ||
321 | PPC_SUB(dst_reg, dst_reg, | ||
322 | b2p[TMP_REG_1]); | ||
323 | } else | ||
324 | PPC_DIVWU(dst_reg, dst_reg, | ||
325 | b2p[TMP_REG_1]); | ||
326 | break; | ||
327 | case BPF_ALU64: | ||
328 | if (BPF_OP(code) == BPF_MOD) { | ||
329 | PPC_DIVD(b2p[TMP_REG_2], dst_reg, | ||
330 | b2p[TMP_REG_1]); | ||
331 | PPC_MULD(b2p[TMP_REG_1], | ||
332 | b2p[TMP_REG_1], | ||
333 | b2p[TMP_REG_2]); | ||
334 | PPC_SUB(dst_reg, dst_reg, | ||
335 | b2p[TMP_REG_1]); | ||
336 | } else | ||
337 | PPC_DIVD(dst_reg, dst_reg, | ||
338 | b2p[TMP_REG_1]); | ||
339 | break; | ||
340 | } | ||
341 | goto bpf_alu32_trunc; | ||
342 | case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ | ||
343 | case BPF_ALU64 | BPF_NEG: /* dst = -dst */ | ||
344 | PPC_NEG(dst_reg, dst_reg); | ||
345 | goto bpf_alu32_trunc; | ||
346 | |||
347 | /* | ||
348 | * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH | ||
349 | */ | ||
350 | case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ | ||
351 | case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ | ||
352 | PPC_AND(dst_reg, dst_reg, src_reg); | ||
353 | goto bpf_alu32_trunc; | ||
354 | case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ | ||
355 | case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ | ||
356 | if (!IMM_H(imm)) | ||
357 | PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); | ||
358 | else { | ||
359 | /* Sign-extended */ | ||
360 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
361 | PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); | ||
362 | } | ||
363 | goto bpf_alu32_trunc; | ||
364 | case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ | ||
365 | case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ | ||
366 | PPC_OR(dst_reg, dst_reg, src_reg); | ||
367 | goto bpf_alu32_trunc; | ||
368 | case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ | ||
369 | case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ | ||
370 | if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { | ||
371 | /* Sign-extended */ | ||
372 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
373 | PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); | ||
374 | } else { | ||
375 | if (IMM_L(imm)) | ||
376 | PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); | ||
377 | if (IMM_H(imm)) | ||
378 | PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); | ||
379 | } | ||
380 | goto bpf_alu32_trunc; | ||
381 | case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ | ||
382 | case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ | ||
383 | PPC_XOR(dst_reg, dst_reg, src_reg); | ||
384 | goto bpf_alu32_trunc; | ||
385 | case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ | ||
386 | case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ | ||
387 | if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { | ||
388 | /* Sign-extended */ | ||
389 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
390 | PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); | ||
391 | } else { | ||
392 | if (IMM_L(imm)) | ||
393 | PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); | ||
394 | if (IMM_H(imm)) | ||
395 | PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); | ||
396 | } | ||
397 | goto bpf_alu32_trunc; | ||
398 | case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ | ||
399 | /* slw clears top 32 bits */ | ||
400 | PPC_SLW(dst_reg, dst_reg, src_reg); | ||
401 | break; | ||
402 | case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ | ||
403 | PPC_SLD(dst_reg, dst_reg, src_reg); | ||
404 | break; | ||
405 | case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ | ||
406 | /* with imm 0, we still need to clear top 32 bits */ | ||
407 | PPC_SLWI(dst_reg, dst_reg, imm); | ||
408 | break; | ||
409 | case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ | ||
410 | if (imm != 0) | ||
411 | PPC_SLDI(dst_reg, dst_reg, imm); | ||
412 | break; | ||
413 | case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ | ||
414 | PPC_SRW(dst_reg, dst_reg, src_reg); | ||
415 | break; | ||
416 | case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ | ||
417 | PPC_SRD(dst_reg, dst_reg, src_reg); | ||
418 | break; | ||
419 | case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ | ||
420 | PPC_SRWI(dst_reg, dst_reg, imm); | ||
421 | break; | ||
422 | case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ | ||
423 | if (imm != 0) | ||
424 | PPC_SRDI(dst_reg, dst_reg, imm); | ||
425 | break; | ||
426 | case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ | ||
427 | PPC_SRAD(dst_reg, dst_reg, src_reg); | ||
428 | break; | ||
429 | case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ | ||
430 | if (imm != 0) | ||
431 | PPC_SRADI(dst_reg, dst_reg, imm); | ||
432 | break; | ||
433 | |||
434 | /* | ||
435 | * MOV | ||
436 | */ | ||
437 | case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ | ||
438 | case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ | ||
439 | PPC_MR(dst_reg, src_reg); | ||
440 | goto bpf_alu32_trunc; | ||
441 | case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ | ||
442 | case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ | ||
443 | PPC_LI32(dst_reg, imm); | ||
444 | if (imm < 0) | ||
445 | goto bpf_alu32_trunc; | ||
446 | break; | ||
447 | |||
448 | bpf_alu32_trunc: | ||
449 | /* Truncate to 32-bits */ | ||
450 | if (BPF_CLASS(code) == BPF_ALU) | ||
451 | PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); | ||
452 | break; | ||
453 | |||
454 | /* | ||
455 | * BPF_FROM_BE/LE | ||
456 | */ | ||
457 | case BPF_ALU | BPF_END | BPF_FROM_LE: | ||
458 | case BPF_ALU | BPF_END | BPF_FROM_BE: | ||
459 | #ifdef __BIG_ENDIAN__ | ||
460 | if (BPF_SRC(code) == BPF_FROM_BE) | ||
461 | goto emit_clear; | ||
462 | #else /* !__BIG_ENDIAN__ */ | ||
463 | if (BPF_SRC(code) == BPF_FROM_LE) | ||
464 | goto emit_clear; | ||
465 | #endif | ||
466 | switch (imm) { | ||
467 | case 16: | ||
468 | /* Rotate 8 bits left & mask with 0x0000ff00 */ | ||
469 | PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); | ||
470 | /* Rotate 8 bits right & insert LSB to reg */ | ||
471 | PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); | ||
472 | /* Move result back to dst_reg */ | ||
473 | PPC_MR(dst_reg, b2p[TMP_REG_1]); | ||
474 | break; | ||
475 | case 32: | ||
476 | /* | ||
477 | * Rotate word left by 8 bits: | ||
478 | * 2 bytes are already in their final position | ||
479 | * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) | ||
480 | */ | ||
481 | PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); | ||
482 | /* Rotate 24 bits and insert byte 1 */ | ||
483 | PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); | ||
484 | /* Rotate 24 bits and insert byte 3 */ | ||
485 | PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); | ||
486 | PPC_MR(dst_reg, b2p[TMP_REG_1]); | ||
487 | break; | ||
488 | case 64: | ||
489 | /* | ||
490 | * Way easier and faster(?) to store the value | ||
491 | * into stack and then use ldbrx | ||
492 | * | ||
493 | * First, determine where in stack we can store | ||
494 | * this: | ||
495 | * - if we have allotted a stack frame, then we | ||
496 | * will utilize the area set aside by | ||
497 | * BPF_PPC_STACK_LOCALS | ||
498 | * - else, we use the area beneath the NV GPR | ||
499 | * save area | ||
500 | * | ||
501 | * ctx->seen will be reliable in pass2, but | ||
502 | * the instructions generated will remain the | ||
503 | * same across all passes | ||
504 | */ | ||
505 | if (bpf_has_stack_frame(ctx)) | ||
506 | stack_local_off = STACK_FRAME_MIN_SIZE; | ||
507 | else | ||
508 | stack_local_off = -(BPF_PPC_STACK_SAVE + 8); | ||
509 | |||
510 | PPC_STD(dst_reg, 1, stack_local_off); | ||
511 | PPC_ADDI(b2p[TMP_REG_1], 1, stack_local_off); | ||
512 | PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); | ||
513 | break; | ||
514 | } | ||
515 | break; | ||
516 | |||
517 | emit_clear: | ||
518 | switch (imm) { | ||
519 | case 16: | ||
520 | /* zero-extend 16 bits into 64 bits */ | ||
521 | PPC_RLDICL(dst_reg, dst_reg, 0, 48); | ||
522 | break; | ||
523 | case 32: | ||
524 | /* zero-extend 32 bits into 64 bits */ | ||
525 | PPC_RLDICL(dst_reg, dst_reg, 0, 32); | ||
526 | break; | ||
527 | case 64: | ||
528 | /* nop */ | ||
529 | break; | ||
530 | } | ||
531 | break; | ||
532 | |||
533 | /* | ||
534 | * BPF_ST(X) | ||
535 | */ | ||
536 | case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ | ||
537 | case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ | ||
538 | if (BPF_CLASS(code) == BPF_ST) { | ||
539 | PPC_LI(b2p[TMP_REG_1], imm); | ||
540 | src_reg = b2p[TMP_REG_1]; | ||
541 | } | ||
542 | PPC_STB(src_reg, dst_reg, off); | ||
543 | break; | ||
544 | case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ | ||
545 | case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ | ||
546 | if (BPF_CLASS(code) == BPF_ST) { | ||
547 | PPC_LI(b2p[TMP_REG_1], imm); | ||
548 | src_reg = b2p[TMP_REG_1]; | ||
549 | } | ||
550 | PPC_STH(src_reg, dst_reg, off); | ||
551 | break; | ||
552 | case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ | ||
553 | case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ | ||
554 | if (BPF_CLASS(code) == BPF_ST) { | ||
555 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
556 | src_reg = b2p[TMP_REG_1]; | ||
557 | } | ||
558 | PPC_STW(src_reg, dst_reg, off); | ||
559 | break; | ||
560 | case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ | ||
561 | case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ | ||
562 | if (BPF_CLASS(code) == BPF_ST) { | ||
563 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
564 | src_reg = b2p[TMP_REG_1]; | ||
565 | } | ||
566 | PPC_STD(src_reg, dst_reg, off); | ||
567 | break; | ||
568 | |||
569 | /* | ||
570 | * BPF_STX XADD (atomic_add) | ||
571 | */ | ||
572 | /* *(u32 *)(dst + off) += src */ | ||
573 | case BPF_STX | BPF_XADD | BPF_W: | ||
574 | /* Get EA into TMP_REG_1 */ | ||
575 | PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); | ||
576 | /* error if EA is not word-aligned */ | ||
577 | PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x03); | ||
578 | PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + 12); | ||
579 | PPC_LI(b2p[BPF_REG_0], 0); | ||
580 | PPC_JMP(exit_addr); | ||
581 | /* load value from memory into TMP_REG_2 */ | ||
582 | PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); | ||
583 | /* add value from src_reg into this */ | ||
584 | PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); | ||
585 | /* store result back */ | ||
586 | PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); | ||
587 | /* we're done if this succeeded */ | ||
588 | PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); | ||
589 | /* otherwise, let's try once more */ | ||
590 | PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); | ||
591 | PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); | ||
592 | PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); | ||
593 | /* exit if the store was not successful */ | ||
594 | PPC_LI(b2p[BPF_REG_0], 0); | ||
595 | PPC_BCC(COND_NE, exit_addr); | ||
596 | break; | ||
597 | /* *(u64 *)(dst + off) += src */ | ||
598 | case BPF_STX | BPF_XADD | BPF_DW: | ||
599 | PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); | ||
600 | /* error if EA is not doubleword-aligned */ | ||
601 | PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x07); | ||
602 | PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (3*4)); | ||
603 | PPC_LI(b2p[BPF_REG_0], 0); | ||
604 | PPC_JMP(exit_addr); | ||
605 | PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); | ||
606 | PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); | ||
607 | PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); | ||
608 | PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); | ||
609 | PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); | ||
610 | PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); | ||
611 | PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); | ||
612 | PPC_LI(b2p[BPF_REG_0], 0); | ||
613 | PPC_BCC(COND_NE, exit_addr); | ||
614 | break; | ||
615 | |||
616 | /* | ||
617 | * BPF_LDX | ||
618 | */ | ||
619 | /* dst = *(u8 *)(ul) (src + off) */ | ||
620 | case BPF_LDX | BPF_MEM | BPF_B: | ||
621 | PPC_LBZ(dst_reg, src_reg, off); | ||
622 | break; | ||
623 | /* dst = *(u16 *)(ul) (src + off) */ | ||
624 | case BPF_LDX | BPF_MEM | BPF_H: | ||
625 | PPC_LHZ(dst_reg, src_reg, off); | ||
626 | break; | ||
627 | /* dst = *(u32 *)(ul) (src + off) */ | ||
628 | case BPF_LDX | BPF_MEM | BPF_W: | ||
629 | PPC_LWZ(dst_reg, src_reg, off); | ||
630 | break; | ||
631 | /* dst = *(u64 *)(ul) (src + off) */ | ||
632 | case BPF_LDX | BPF_MEM | BPF_DW: | ||
633 | PPC_LD(dst_reg, src_reg, off); | ||
634 | break; | ||
635 | |||
636 | /* | ||
637 | * Doubleword load | ||
638 | * 16 byte instruction that uses two 'struct bpf_insn' | ||
639 | */ | ||
640 | case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ | ||
641 | imm64 = ((u64)(u32) insn[i].imm) | | ||
642 | (((u64)(u32) insn[i+1].imm) << 32); | ||
643 | /* Adjust for two bpf instructions */ | ||
644 | addrs[++i] = ctx->idx * 4; | ||
645 | PPC_LI64(dst_reg, imm64); | ||
646 | break; | ||
647 | |||
648 | /* | ||
649 | * Return/Exit | ||
650 | */ | ||
651 | case BPF_JMP | BPF_EXIT: | ||
652 | /* | ||
653 | * If this isn't the very last instruction, branch to | ||
654 | * the epilogue. If we _are_ the last instruction, | ||
655 | * we'll just fall through to the epilogue. | ||
656 | */ | ||
657 | if (i != flen - 1) | ||
658 | PPC_JMP(exit_addr); | ||
659 | /* else fall through to the epilogue */ | ||
660 | break; | ||
661 | |||
662 | /* | ||
663 | * Call kernel helper | ||
664 | */ | ||
665 | case BPF_JMP | BPF_CALL: | ||
666 | ctx->seen |= SEEN_FUNC; | ||
667 | func = (u8 *) __bpf_call_base + imm; | ||
668 | |||
669 | /* Save skb pointer if we need to re-cache skb data */ | ||
670 | if (bpf_helper_changes_skb_data(func)) | ||
671 | PPC_BPF_STL(3, 1, STACK_FRAME_MIN_SIZE); | ||
672 | |||
673 | bpf_jit_emit_func_call(image, ctx, (u64)func); | ||
674 | |||
675 | /* move return value from r3 to BPF_REG_0 */ | ||
676 | PPC_MR(b2p[BPF_REG_0], 3); | ||
677 | |||
678 | /* refresh skb cache */ | ||
679 | if (bpf_helper_changes_skb_data(func)) { | ||
680 | /* reload skb pointer to r3 */ | ||
681 | PPC_BPF_LL(3, 1, STACK_FRAME_MIN_SIZE); | ||
682 | bpf_jit_emit_skb_loads(image, ctx); | ||
683 | } | ||
684 | break; | ||
685 | |||
686 | /* | ||
687 | * Jumps and branches | ||
688 | */ | ||
689 | case BPF_JMP | BPF_JA: | ||
690 | PPC_JMP(addrs[i + 1 + off]); | ||
691 | break; | ||
692 | |||
693 | case BPF_JMP | BPF_JGT | BPF_K: | ||
694 | case BPF_JMP | BPF_JGT | BPF_X: | ||
695 | case BPF_JMP | BPF_JSGT | BPF_K: | ||
696 | case BPF_JMP | BPF_JSGT | BPF_X: | ||
697 | true_cond = COND_GT; | ||
698 | goto cond_branch; | ||
699 | case BPF_JMP | BPF_JGE | BPF_K: | ||
700 | case BPF_JMP | BPF_JGE | BPF_X: | ||
701 | case BPF_JMP | BPF_JSGE | BPF_K: | ||
702 | case BPF_JMP | BPF_JSGE | BPF_X: | ||
703 | true_cond = COND_GE; | ||
704 | goto cond_branch; | ||
705 | case BPF_JMP | BPF_JEQ | BPF_K: | ||
706 | case BPF_JMP | BPF_JEQ | BPF_X: | ||
707 | true_cond = COND_EQ; | ||
708 | goto cond_branch; | ||
709 | case BPF_JMP | BPF_JNE | BPF_K: | ||
710 | case BPF_JMP | BPF_JNE | BPF_X: | ||
711 | true_cond = COND_NE; | ||
712 | goto cond_branch; | ||
713 | case BPF_JMP | BPF_JSET | BPF_K: | ||
714 | case BPF_JMP | BPF_JSET | BPF_X: | ||
715 | true_cond = COND_NE; | ||
716 | /* Fall through */ | ||
717 | |||
718 | cond_branch: | ||
719 | switch (code) { | ||
720 | case BPF_JMP | BPF_JGT | BPF_X: | ||
721 | case BPF_JMP | BPF_JGE | BPF_X: | ||
722 | case BPF_JMP | BPF_JEQ | BPF_X: | ||
723 | case BPF_JMP | BPF_JNE | BPF_X: | ||
724 | /* unsigned comparison */ | ||
725 | PPC_CMPLD(dst_reg, src_reg); | ||
726 | break; | ||
727 | case BPF_JMP | BPF_JSGT | BPF_X: | ||
728 | case BPF_JMP | BPF_JSGE | BPF_X: | ||
729 | /* signed comparison */ | ||
730 | PPC_CMPD(dst_reg, src_reg); | ||
731 | break; | ||
732 | case BPF_JMP | BPF_JSET | BPF_X: | ||
733 | PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg); | ||
734 | break; | ||
735 | case BPF_JMP | BPF_JNE | BPF_K: | ||
736 | case BPF_JMP | BPF_JEQ | BPF_K: | ||
737 | case BPF_JMP | BPF_JGT | BPF_K: | ||
738 | case BPF_JMP | BPF_JGE | BPF_K: | ||
739 | /* | ||
740 | * Need sign-extended load, so only positive | ||
741 | * values can be used as imm in cmpldi | ||
742 | */ | ||
743 | if (imm >= 0 && imm < 32768) | ||
744 | PPC_CMPLDI(dst_reg, imm); | ||
745 | else { | ||
746 | /* sign-extending load */ | ||
747 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
748 | /* ... but unsigned comparison */ | ||
749 | PPC_CMPLD(dst_reg, b2p[TMP_REG_1]); | ||
750 | } | ||
751 | break; | ||
752 | case BPF_JMP | BPF_JSGT | BPF_K: | ||
753 | case BPF_JMP | BPF_JSGE | BPF_K: | ||
754 | /* | ||
755 | * signed comparison, so any 16-bit value | ||
756 | * can be used in cmpdi | ||
757 | */ | ||
758 | if (imm >= -32768 && imm < 32768) | ||
759 | PPC_CMPDI(dst_reg, imm); | ||
760 | else { | ||
761 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
762 | PPC_CMPD(dst_reg, b2p[TMP_REG_1]); | ||
763 | } | ||
764 | break; | ||
765 | case BPF_JMP | BPF_JSET | BPF_K: | ||
766 | /* andi does not sign-extend the immediate */ | ||
767 | if (imm >= 0 && imm < 32768) | ||
768 | /* PPC_ANDI is _only/always_ dot-form */ | ||
769 | PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); | ||
770 | else { | ||
771 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
772 | PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, | ||
773 | b2p[TMP_REG_1]); | ||
774 | } | ||
775 | break; | ||
776 | } | ||
777 | PPC_BCC(true_cond, addrs[i + 1 + off]); | ||
778 | break; | ||
779 | |||
780 | /* | ||
781 | * Loads from packet header/data | ||
782 | * Assume 32-bit input value in imm and X (src_reg) | ||
783 | */ | ||
784 | |||
785 | /* Absolute loads */ | ||
786 | case BPF_LD | BPF_W | BPF_ABS: | ||
787 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); | ||
788 | goto common_load_abs; | ||
789 | case BPF_LD | BPF_H | BPF_ABS: | ||
790 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); | ||
791 | goto common_load_abs; | ||
792 | case BPF_LD | BPF_B | BPF_ABS: | ||
793 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); | ||
794 | common_load_abs: | ||
795 | /* | ||
796 | * Load from [imm] | ||
797 | * Load into r4, which can just be passed onto | ||
798 | * skb load helpers as the second parameter | ||
799 | */ | ||
800 | PPC_LI32(4, imm); | ||
801 | goto common_load; | ||
802 | |||
803 | /* Indirect loads */ | ||
804 | case BPF_LD | BPF_W | BPF_IND: | ||
805 | func = (u8 *)sk_load_word; | ||
806 | goto common_load_ind; | ||
807 | case BPF_LD | BPF_H | BPF_IND: | ||
808 | func = (u8 *)sk_load_half; | ||
809 | goto common_load_ind; | ||
810 | case BPF_LD | BPF_B | BPF_IND: | ||
811 | func = (u8 *)sk_load_byte; | ||
812 | common_load_ind: | ||
813 | /* | ||
814 | * Load from [src_reg + imm] | ||
815 | * Treat src_reg as a 32-bit value | ||
816 | */ | ||
817 | PPC_EXTSW(4, src_reg); | ||
818 | if (imm) { | ||
819 | if (imm >= -32768 && imm < 32768) | ||
820 | PPC_ADDI(4, 4, IMM_L(imm)); | ||
821 | else { | ||
822 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
823 | PPC_ADD(4, 4, b2p[TMP_REG_1]); | ||
824 | } | ||
825 | } | ||
826 | |||
827 | common_load: | ||
828 | ctx->seen |= SEEN_SKB; | ||
829 | ctx->seen |= SEEN_FUNC; | ||
830 | bpf_jit_emit_func_call(image, ctx, (u64)func); | ||
831 | |||
832 | /* | ||
833 | * Helper returns 'lt' condition on error, and an | ||
834 | * appropriate return value in BPF_REG_0 | ||
835 | */ | ||
836 | PPC_BCC(COND_LT, exit_addr); | ||
837 | break; | ||
838 | |||
839 | /* | ||
840 | * TODO: Tail call | ||
841 | */ | ||
842 | case BPF_JMP | BPF_CALL | BPF_X: | ||
843 | |||
844 | default: | ||
845 | /* | ||
846 | * The filter contains something cruel & unusual. | ||
847 | * We don't handle it, but also there shouldn't be | ||
848 | * anything missing from our list. | ||
849 | */ | ||
850 | pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", | ||
851 | code, i); | ||
852 | return -ENOTSUPP; | ||
853 | } | ||
854 | } | ||
855 | |||
856 | /* Set end-of-body-code address for exit. */ | ||
857 | addrs[i] = ctx->idx * 4; | ||
858 | |||
859 | return 0; | ||
860 | } | ||
861 | |||
862 | void bpf_jit_compile(struct bpf_prog *fp) { } | ||
863 | |||
864 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) | ||
865 | { | ||
866 | u32 proglen; | ||
867 | u32 alloclen; | ||
868 | u8 *image = NULL; | ||
869 | u32 *code_base; | ||
870 | u32 *addrs; | ||
871 | struct codegen_context cgctx; | ||
872 | int pass; | ||
873 | int flen; | ||
874 | struct bpf_binary_header *bpf_hdr; | ||
875 | |||
876 | if (!bpf_jit_enable) | ||
877 | return fp; | ||
878 | |||
879 | flen = fp->len; | ||
880 | addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); | ||
881 | if (addrs == NULL) | ||
882 | return fp; | ||
883 | |||
884 | cgctx.idx = 0; | ||
885 | cgctx.seen = 0; | ||
886 | /* Scouting faux-generate pass 0 */ | ||
887 | if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) | ||
888 | /* We hit something illegal or unsupported. */ | ||
889 | goto out; | ||
890 | |||
891 | /* | ||
892 | * Pretend to build prologue, given the features we've seen. This will | ||
893 | * update ctgtx.idx as it pretends to output instructions, then we can | ||
894 | * calculate total size from idx. | ||
895 | */ | ||
896 | bpf_jit_build_prologue(0, &cgctx); | ||
897 | bpf_jit_build_epilogue(0, &cgctx); | ||
898 | |||
899 | proglen = cgctx.idx * 4; | ||
900 | alloclen = proglen + FUNCTION_DESCR_SIZE; | ||
901 | |||
902 | bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, | ||
903 | bpf_jit_fill_ill_insns); | ||
904 | if (!bpf_hdr) | ||
905 | goto out; | ||
906 | |||
907 | code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); | ||
908 | |||
909 | /* Code generation passes 1-2 */ | ||
910 | for (pass = 1; pass < 3; pass++) { | ||
911 | /* Now build the prologue, body code & epilogue for real. */ | ||
912 | cgctx.idx = 0; | ||
913 | bpf_jit_build_prologue(code_base, &cgctx); | ||
914 | bpf_jit_build_body(fp, code_base, &cgctx, addrs); | ||
915 | bpf_jit_build_epilogue(code_base, &cgctx); | ||
916 | |||
917 | if (bpf_jit_enable > 1) | ||
918 | pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, | ||
919 | proglen - (cgctx.idx * 4), cgctx.seen); | ||
920 | } | ||
921 | |||
922 | if (bpf_jit_enable > 1) | ||
923 | /* | ||
924 | * Note that we output the base address of the code_base | ||
925 | * rather than image, since opcodes are in code_base. | ||
926 | */ | ||
927 | bpf_jit_dump(flen, proglen, pass, code_base); | ||
928 | |||
929 | if (image) { | ||
930 | bpf_flush_icache(bpf_hdr, image + alloclen); | ||
931 | #ifdef PPC64_ELF_ABI_v1 | ||
932 | /* Function descriptor nastiness: Address + TOC */ | ||
933 | ((u64 *)image)[0] = (u64)code_base; | ||
934 | ((u64 *)image)[1] = local_paca->kernel_toc; | ||
935 | #endif | ||
936 | fp->bpf_func = (void *)image; | ||
937 | fp->jited = 1; | ||
938 | } | ||
939 | |||
940 | out: | ||
941 | kfree(addrs); | ||
942 | return fp; | ||
943 | } | ||
944 | |||
945 | void bpf_jit_free(struct bpf_prog *fp) | ||
946 | { | ||
947 | unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; | ||
948 | struct bpf_binary_header *bpf_hdr = (void *)addr; | ||
949 | |||
950 | if (fp->jited) | ||
951 | bpf_jit_binary_free(bpf_hdr); | ||
952 | |||
953 | bpf_prog_unlock_free(fp); | ||
954 | } | ||