aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/filter.h74
-rw-r--r--include/linux/seccomp.h1
-rw-r--r--kernel/seccomp.c119
-rw-r--r--net/core/filter.c1457
4 files changed, 1279 insertions, 372 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9bde3ed19fe6..262dcbb75ffe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -9,13 +9,58 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <uapi/linux/filter.h> 10#include <uapi/linux/filter.h>
11 11
12#ifdef CONFIG_COMPAT 12/* Internally used and optimized filter representation with extended
13/* 13 * instruction set based on top of classic BPF.
14 * A struct sock_filter is architecture independent.
15 */ 14 */
15
16/* instruction classes */
17#define BPF_ALU64 0x07 /* alu mode in double word width */
18
19/* ld/ldx fields */
20#define BPF_DW 0x18 /* double word */
21#define BPF_XADD 0xc0 /* exclusive add */
22
23/* alu/jmp fields */
24#define BPF_MOV 0xb0 /* mov reg to reg */
25#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
26
27/* change endianness of a register */
28#define BPF_END 0xd0 /* flags for endianness conversion: */
29#define BPF_TO_LE 0x00 /* convert to little-endian */
30#define BPF_TO_BE 0x08 /* convert to big-endian */
31#define BPF_FROM_LE BPF_TO_LE
32#define BPF_FROM_BE BPF_TO_BE
33
34#define BPF_JNE 0x50 /* jump != */
35#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
36#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
37#define BPF_CALL 0x80 /* function call */
38#define BPF_EXIT 0x90 /* function return */
39
40/* BPF has 10 general purpose 64-bit registers and stack frame. */
41#define MAX_BPF_REG 11
42
43/* BPF program can access up to 512 bytes of stack space. */
44#define MAX_BPF_STACK 512
45
46/* Arg1, context and stack frame pointer register positions. */
47#define ARG1_REG 1
48#define CTX_REG 6
49#define FP_REG 10
50
51struct sock_filter_int {
52 __u8 code; /* opcode */
53 __u8 a_reg:4; /* dest register */
54 __u8 x_reg:4; /* source register */
55 __s16 off; /* signed offset */
56 __s32 imm; /* signed immediate constant */
57};
58
59#ifdef CONFIG_COMPAT
60/* A struct sock_filter is architecture independent. */
16struct compat_sock_fprog { 61struct compat_sock_fprog {
17 u16 len; 62 u16 len;
18 compat_uptr_t filter; /* struct sock_filter * */ 63 compat_uptr_t filter; /* struct sock_filter * */
19}; 64};
20#endif 65#endif
21 66
@@ -26,6 +71,7 @@ struct sock_fprog_kern {
26 71
27struct sk_buff; 72struct sk_buff;
28struct sock; 73struct sock;
74struct seccomp_data;
29 75
30struct sk_filter { 76struct sk_filter {
31 atomic_t refcnt; 77 atomic_t refcnt;
@@ -34,9 +80,10 @@ struct sk_filter {
34 struct sock_fprog_kern *orig_prog; /* Original BPF program */ 80 struct sock_fprog_kern *orig_prog; /* Original BPF program */
35 struct rcu_head rcu; 81 struct rcu_head rcu;
36 unsigned int (*bpf_func)(const struct sk_buff *skb, 82 unsigned int (*bpf_func)(const struct sk_buff *skb,
37 const struct sock_filter *filter); 83 const struct sock_filter_int *filter);
38 union { 84 union {
39 struct sock_filter insns[0]; 85 struct sock_filter insns[0];
86 struct sock_filter_int insnsi[0];
40 struct work_struct work; 87 struct work_struct work;
41 }; 88 };
42}; 89};
@@ -50,9 +97,18 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
50#define sk_filter_proglen(fprog) \ 97#define sk_filter_proglen(fprog) \
51 (fprog->len * sizeof(fprog->filter[0])) 98 (fprog->len * sizeof(fprog->filter[0]))
52 99
100#define SK_RUN_FILTER(filter, ctx) \
101 (*filter->bpf_func)(ctx, filter->insnsi)
102
53int sk_filter(struct sock *sk, struct sk_buff *skb); 103int sk_filter(struct sock *sk, struct sk_buff *skb);
54unsigned int sk_run_filter(const struct sk_buff *skb, 104
55 const struct sock_filter *filter); 105u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
106 const struct sock_filter_int *insni);
107u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
108 const struct sock_filter_int *insni);
109
110int sk_convert_filter(struct sock_filter *prog, int len,
111 struct sock_filter_int *new_prog, int *new_len);
56 112
57int sk_unattached_filter_create(struct sk_filter **pfp, 113int sk_unattached_filter_create(struct sk_filter **pfp,
58 struct sock_fprog *fprog); 114 struct sock_fprog *fprog);
@@ -86,7 +142,6 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
86 print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 142 print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
87 16, 1, image, proglen, false); 143 16, 1, image, proglen, false);
88} 144}
89#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
90#else 145#else
91#include <linux/slab.h> 146#include <linux/slab.h>
92static inline void bpf_jit_compile(struct sk_filter *fp) 147static inline void bpf_jit_compile(struct sk_filter *fp)
@@ -96,7 +151,6 @@ static inline void bpf_jit_free(struct sk_filter *fp)
96{ 151{
97 kfree(fp); 152 kfree(fp);
98} 153}
99#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
100#endif 154#endif
101 155
102static inline int bpf_tell_extensions(void) 156static inline int bpf_tell_extensions(void)
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 6f19cfd1840e..4054b0994071 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -76,7 +76,6 @@ static inline int seccomp_mode(struct seccomp *s)
76#ifdef CONFIG_SECCOMP_FILTER 76#ifdef CONFIG_SECCOMP_FILTER
77extern void put_seccomp_filter(struct task_struct *tsk); 77extern void put_seccomp_filter(struct task_struct *tsk);
78extern void get_seccomp_filter(struct task_struct *tsk); 78extern void get_seccomp_filter(struct task_struct *tsk);
79extern u32 seccomp_bpf_load(int off);
80#else /* CONFIG_SECCOMP_FILTER */ 79#else /* CONFIG_SECCOMP_FILTER */
81static inline void put_seccomp_filter(struct task_struct *tsk) 80static inline void put_seccomp_filter(struct task_struct *tsk)
82{ 81{
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b7a10048a32c..4f18e754c23e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -55,60 +55,33 @@ struct seccomp_filter {
55 atomic_t usage; 55 atomic_t usage;
56 struct seccomp_filter *prev; 56 struct seccomp_filter *prev;
57 unsigned short len; /* Instruction count */ 57 unsigned short len; /* Instruction count */
58 struct sock_filter insns[]; 58 struct sock_filter_int insnsi[];
59}; 59};
60 60
61/* Limit any path through the tree to 256KB worth of instructions. */ 61/* Limit any path through the tree to 256KB worth of instructions. */
62#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter)) 62#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
63 63
64/** 64/*
65 * get_u32 - returns a u32 offset into data
66 * @data: a unsigned 64 bit value
67 * @index: 0 or 1 to return the first or second 32-bits
68 *
69 * This inline exists to hide the length of unsigned long. If a 32-bit
70 * unsigned long is passed in, it will be extended and the top 32-bits will be
71 * 0. If it is a 64-bit unsigned long, then whatever data is resident will be
72 * properly returned.
73 *
74 * Endianness is explicitly ignored and left for BPF program authors to manage 65 * Endianness is explicitly ignored and left for BPF program authors to manage
75 * as per the specific architecture. 66 * as per the specific architecture.
76 */ 67 */
77static inline u32 get_u32(u64 data, int index) 68static void populate_seccomp_data(struct seccomp_data *sd)
78{ 69{
79 return ((u32 *)&data)[index]; 70 struct task_struct *task = current;
80} 71 struct pt_regs *regs = task_pt_regs(task);
81 72
82/* Helper for bpf_load below. */ 73 sd->nr = syscall_get_nr(task, regs);
83#define BPF_DATA(_name) offsetof(struct seccomp_data, _name) 74 sd->arch = syscall_get_arch(task, regs);
84/** 75
85 * bpf_load: checks and returns a pointer to the requested offset 76 /* Unroll syscall_get_args to help gcc on arm. */
86 * @off: offset into struct seccomp_data to load from 77 syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]);
87 * 78 syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]);
88 * Returns the requested 32-bits of data. 79 syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]);
89 * seccomp_check_filter() should assure that @off is 32-bit aligned 80 syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]);
90 * and not out of bounds. Failure to do so is a BUG. 81 syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]);
91 */ 82 syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]);
92u32 seccomp_bpf_load(int off) 83
93{ 84 sd->instruction_pointer = KSTK_EIP(task);
94 struct pt_regs *regs = task_pt_regs(current);
95 if (off == BPF_DATA(nr))
96 return syscall_get_nr(current, regs);
97 if (off == BPF_DATA(arch))
98 return syscall_get_arch(current, regs);
99 if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) {
100 unsigned long value;
101 int arg = (off - BPF_DATA(args[0])) / sizeof(u64);
102 int index = !!(off % sizeof(u64));
103 syscall_get_arguments(current, regs, arg, 1, &value);
104 return get_u32(value, index);
105 }
106 if (off == BPF_DATA(instruction_pointer))
107 return get_u32(KSTK_EIP(current), 0);
108 if (off == BPF_DATA(instruction_pointer) + sizeof(u32))
109 return get_u32(KSTK_EIP(current), 1);
110 /* seccomp_check_filter should make this impossible. */
111 BUG();
112} 85}
113 86
114/** 87/**
@@ -133,17 +106,17 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
133 106
134 switch (code) { 107 switch (code) {
135 case BPF_S_LD_W_ABS: 108 case BPF_S_LD_W_ABS:
136 ftest->code = BPF_S_ANC_SECCOMP_LD_W; 109 ftest->code = BPF_LDX | BPF_W | BPF_ABS;
137 /* 32-bit aligned and not out of bounds. */ 110 /* 32-bit aligned and not out of bounds. */
138 if (k >= sizeof(struct seccomp_data) || k & 3) 111 if (k >= sizeof(struct seccomp_data) || k & 3)
139 return -EINVAL; 112 return -EINVAL;
140 continue; 113 continue;
141 case BPF_S_LD_W_LEN: 114 case BPF_S_LD_W_LEN:
142 ftest->code = BPF_S_LD_IMM; 115 ftest->code = BPF_LD | BPF_IMM;
143 ftest->k = sizeof(struct seccomp_data); 116 ftest->k = sizeof(struct seccomp_data);
144 continue; 117 continue;
145 case BPF_S_LDX_W_LEN: 118 case BPF_S_LDX_W_LEN:
146 ftest->code = BPF_S_LDX_IMM; 119 ftest->code = BPF_LDX | BPF_IMM;
147 ftest->k = sizeof(struct seccomp_data); 120 ftest->k = sizeof(struct seccomp_data);
148 continue; 121 continue;
149 /* Explicitly include allowed calls. */ 122 /* Explicitly include allowed calls. */
@@ -185,6 +158,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
185 case BPF_S_JMP_JGT_X: 158 case BPF_S_JMP_JGT_X:
186 case BPF_S_JMP_JSET_K: 159 case BPF_S_JMP_JSET_K:
187 case BPF_S_JMP_JSET_X: 160 case BPF_S_JMP_JSET_X:
161 sk_decode_filter(ftest, ftest);
188 continue; 162 continue;
189 default: 163 default:
190 return -EINVAL; 164 return -EINVAL;
@@ -202,18 +176,21 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
202static u32 seccomp_run_filters(int syscall) 176static u32 seccomp_run_filters(int syscall)
203{ 177{
204 struct seccomp_filter *f; 178 struct seccomp_filter *f;
179 struct seccomp_data sd;
205 u32 ret = SECCOMP_RET_ALLOW; 180 u32 ret = SECCOMP_RET_ALLOW;
206 181
207 /* Ensure unexpected behavior doesn't result in failing open. */ 182 /* Ensure unexpected behavior doesn't result in failing open. */
208 if (WARN_ON(current->seccomp.filter == NULL)) 183 if (WARN_ON(current->seccomp.filter == NULL))
209 return SECCOMP_RET_KILL; 184 return SECCOMP_RET_KILL;
210 185
186 populate_seccomp_data(&sd);
187
211 /* 188 /*
212 * All filters in the list are evaluated and the lowest BPF return 189 * All filters in the list are evaluated and the lowest BPF return
213 * value always takes priority (ignoring the DATA). 190 * value always takes priority (ignoring the DATA).
214 */ 191 */
215 for (f = current->seccomp.filter; f; f = f->prev) { 192 for (f = current->seccomp.filter; f; f = f->prev) {
216 u32 cur_ret = sk_run_filter(NULL, f->insns); 193 u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi);
217 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) 194 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
218 ret = cur_ret; 195 ret = cur_ret;
219 } 196 }
@@ -231,6 +208,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
231 struct seccomp_filter *filter; 208 struct seccomp_filter *filter;
232 unsigned long fp_size = fprog->len * sizeof(struct sock_filter); 209 unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
233 unsigned long total_insns = fprog->len; 210 unsigned long total_insns = fprog->len;
211 struct sock_filter *fp;
212 int new_len;
234 long ret; 213 long ret;
235 214
236 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) 215 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
@@ -252,28 +231,43 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
252 CAP_SYS_ADMIN) != 0) 231 CAP_SYS_ADMIN) != 0)
253 return -EACCES; 232 return -EACCES;
254 233
255 /* Allocate a new seccomp_filter */ 234 fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
256 filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, 235 if (!fp)
257 GFP_KERNEL|__GFP_NOWARN);
258 if (!filter)
259 return -ENOMEM; 236 return -ENOMEM;
260 atomic_set(&filter->usage, 1);
261 filter->len = fprog->len;
262 237
263 /* Copy the instructions from fprog. */ 238 /* Copy the instructions from fprog. */
264 ret = -EFAULT; 239 ret = -EFAULT;
265 if (copy_from_user(filter->insns, fprog->filter, fp_size)) 240 if (copy_from_user(fp, fprog->filter, fp_size))
266 goto fail; 241 goto free_prog;
267 242
268 /* Check and rewrite the fprog via the skb checker */ 243 /* Check and rewrite the fprog via the skb checker */
269 ret = sk_chk_filter(filter->insns, filter->len); 244 ret = sk_chk_filter(fp, fprog->len);
270 if (ret) 245 if (ret)
271 goto fail; 246 goto free_prog;
272 247
273 /* Check and rewrite the fprog for seccomp use */ 248 /* Check and rewrite the fprog for seccomp use */
274 ret = seccomp_check_filter(filter->insns, filter->len); 249 ret = seccomp_check_filter(fp, fprog->len);
250 if (ret)
251 goto free_prog;
252
253 /* Convert 'sock_filter' insns to 'sock_filter_int' insns */
254 ret = sk_convert_filter(fp, fprog->len, NULL, &new_len);
255 if (ret)
256 goto free_prog;
257
258 /* Allocate a new seccomp_filter */
259 filter = kzalloc(sizeof(struct seccomp_filter) +
260 sizeof(struct sock_filter_int) * new_len,
261 GFP_KERNEL|__GFP_NOWARN);
262 if (!filter)
263 goto free_prog;
264
265 ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
275 if (ret) 266 if (ret)
276 goto fail; 267 goto free_filter;
268
269 atomic_set(&filter->usage, 1);
270 filter->len = new_len;
277 271
278 /* 272 /*
279 * If there is an existing filter, make it the prev and don't drop its 273 * If there is an existing filter, make it the prev and don't drop its
@@ -282,8 +276,11 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
282 filter->prev = current->seccomp.filter; 276 filter->prev = current->seccomp.filter;
283 current->seccomp.filter = filter; 277 current->seccomp.filter = filter;
284 return 0; 278 return 0;
285fail: 279
280free_filter:
286 kfree(filter); 281 kfree(filter);
282free_prog:
283 kfree(fp);
287 return ret; 284 return ret;
288} 285}
289 286
diff --git a/net/core/filter.c b/net/core/filter.c
index 5b3427aaeca5..3733381190ec 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1,11 +1,16 @@
1/* 1/*
2 * Linux Socket Filter - Kernel level socket filtering 2 * Linux Socket Filter - Kernel level socket filtering
3 * 3 *
4 * Author: 4 * Based on the design of the Berkeley Packet Filter. The new
5 * Jay Schulist <jschlst@samba.org> 5 * internal format has been designed by PLUMgrid:
6 * 6 *
7 * Based on the design of: 7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 * - The Berkeley Packet Filter 8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
9 * 14 *
10 * This program is free software; you can redistribute it and/or 15 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License 16 * modify it under the terms of the GNU General Public License
@@ -108,304 +113,1045 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
108} 113}
109EXPORT_SYMBOL(sk_filter); 114EXPORT_SYMBOL(sk_filter);
110 115
116/* Base function for offset calculation. Needs to go into .text section,
117 * therefore keeping it non-static as well; will also be used by JITs
118 * anyway later on, so do not let the compiler omit it.
119 */
120noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
121{
122 return 0;
123}
124
111/** 125/**
112 * sk_run_filter - run a filter on a socket 126 * __sk_run_filter - run a filter on a given context
113 * @skb: buffer to run the filter on 127 * @ctx: buffer to run the filter on
114 * @fentry: filter to apply 128 * @fentry: filter to apply
115 * 129 *
116 * Decode and apply filter instructions to the skb->data. 130 * Decode and apply filter instructions to the skb->data. Return length to
117 * Return length to keep, 0 for none. @skb is the data we are 131 * keep, 0 for none. @ctx is the data we are operating on, @filter is the
118 * filtering, @filter is the array of filter instructions. 132 * array of filter instructions.
119 * Because all jumps are guaranteed to be before last instruction,
120 * and last instruction guaranteed to be a RET, we dont need to check
121 * flen. (We used to pass to this function the length of filter)
122 */ 133 */
123unsigned int sk_run_filter(const struct sk_buff *skb, 134unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
124 const struct sock_filter *fentry)
125{ 135{
136 u64 stack[MAX_BPF_STACK / sizeof(u64)];
137 u64 regs[MAX_BPF_REG], tmp;
126 void *ptr; 138 void *ptr;
127 u32 A = 0; /* Accumulator */ 139 int off;
128 u32 X = 0; /* Index Register */ 140
129 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 141#define K insn->imm
130 u32 tmp; 142#define A regs[insn->a_reg]
131 int k; 143#define X regs[insn->x_reg]
144#define R0 regs[0]
145
146#define CONT ({insn++; goto select_insn; })
147#define CONT_JMP ({insn++; goto select_insn; })
148
149 static const void *jumptable[256] = {
150 [0 ... 255] = &&default_label,
151 /* Now overwrite non-defaults ... */
152#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C
153 DL(BPF_ALU, BPF_ADD, BPF_X),
154 DL(BPF_ALU, BPF_ADD, BPF_K),
155 DL(BPF_ALU, BPF_SUB, BPF_X),
156 DL(BPF_ALU, BPF_SUB, BPF_K),
157 DL(BPF_ALU, BPF_AND, BPF_X),
158 DL(BPF_ALU, BPF_AND, BPF_K),
159 DL(BPF_ALU, BPF_OR, BPF_X),
160 DL(BPF_ALU, BPF_OR, BPF_K),
161 DL(BPF_ALU, BPF_LSH, BPF_X),
162 DL(BPF_ALU, BPF_LSH, BPF_K),
163 DL(BPF_ALU, BPF_RSH, BPF_X),
164 DL(BPF_ALU, BPF_RSH, BPF_K),
165 DL(BPF_ALU, BPF_XOR, BPF_X),
166 DL(BPF_ALU, BPF_XOR, BPF_K),
167 DL(BPF_ALU, BPF_MUL, BPF_X),
168 DL(BPF_ALU, BPF_MUL, BPF_K),
169 DL(BPF_ALU, BPF_MOV, BPF_X),
170 DL(BPF_ALU, BPF_MOV, BPF_K),
171 DL(BPF_ALU, BPF_DIV, BPF_X),
172 DL(BPF_ALU, BPF_DIV, BPF_K),
173 DL(BPF_ALU, BPF_MOD, BPF_X),
174 DL(BPF_ALU, BPF_MOD, BPF_K),
175 DL(BPF_ALU, BPF_NEG, 0),
176 DL(BPF_ALU, BPF_END, BPF_TO_BE),
177 DL(BPF_ALU, BPF_END, BPF_TO_LE),
178 DL(BPF_ALU64, BPF_ADD, BPF_X),
179 DL(BPF_ALU64, BPF_ADD, BPF_K),
180 DL(BPF_ALU64, BPF_SUB, BPF_X),
181 DL(BPF_ALU64, BPF_SUB, BPF_K),
182 DL(BPF_ALU64, BPF_AND, BPF_X),
183 DL(BPF_ALU64, BPF_AND, BPF_K),
184 DL(BPF_ALU64, BPF_OR, BPF_X),
185 DL(BPF_ALU64, BPF_OR, BPF_K),
186 DL(BPF_ALU64, BPF_LSH, BPF_X),
187 DL(BPF_ALU64, BPF_LSH, BPF_K),
188 DL(BPF_ALU64, BPF_RSH, BPF_X),
189 DL(BPF_ALU64, BPF_RSH, BPF_K),
190 DL(BPF_ALU64, BPF_XOR, BPF_X),
191 DL(BPF_ALU64, BPF_XOR, BPF_K),
192 DL(BPF_ALU64, BPF_MUL, BPF_X),
193 DL(BPF_ALU64, BPF_MUL, BPF_K),
194 DL(BPF_ALU64, BPF_MOV, BPF_X),
195 DL(BPF_ALU64, BPF_MOV, BPF_K),
196 DL(BPF_ALU64, BPF_ARSH, BPF_X),
197 DL(BPF_ALU64, BPF_ARSH, BPF_K),
198 DL(BPF_ALU64, BPF_DIV, BPF_X),
199 DL(BPF_ALU64, BPF_DIV, BPF_K),
200 DL(BPF_ALU64, BPF_MOD, BPF_X),
201 DL(BPF_ALU64, BPF_MOD, BPF_K),
202 DL(BPF_ALU64, BPF_NEG, 0),
203 DL(BPF_JMP, BPF_CALL, 0),
204 DL(BPF_JMP, BPF_JA, 0),
205 DL(BPF_JMP, BPF_JEQ, BPF_X),
206 DL(BPF_JMP, BPF_JEQ, BPF_K),
207 DL(BPF_JMP, BPF_JNE, BPF_X),
208 DL(BPF_JMP, BPF_JNE, BPF_K),
209 DL(BPF_JMP, BPF_JGT, BPF_X),
210 DL(BPF_JMP, BPF_JGT, BPF_K),
211 DL(BPF_JMP, BPF_JGE, BPF_X),
212 DL(BPF_JMP, BPF_JGE, BPF_K),
213 DL(BPF_JMP, BPF_JSGT, BPF_X),
214 DL(BPF_JMP, BPF_JSGT, BPF_K),
215 DL(BPF_JMP, BPF_JSGE, BPF_X),
216 DL(BPF_JMP, BPF_JSGE, BPF_K),
217 DL(BPF_JMP, BPF_JSET, BPF_X),
218 DL(BPF_JMP, BPF_JSET, BPF_K),
219 DL(BPF_JMP, BPF_EXIT, 0),
220 DL(BPF_STX, BPF_MEM, BPF_B),
221 DL(BPF_STX, BPF_MEM, BPF_H),
222 DL(BPF_STX, BPF_MEM, BPF_W),
223 DL(BPF_STX, BPF_MEM, BPF_DW),
224 DL(BPF_STX, BPF_XADD, BPF_W),
225 DL(BPF_STX, BPF_XADD, BPF_DW),
226 DL(BPF_ST, BPF_MEM, BPF_B),
227 DL(BPF_ST, BPF_MEM, BPF_H),
228 DL(BPF_ST, BPF_MEM, BPF_W),
229 DL(BPF_ST, BPF_MEM, BPF_DW),
230 DL(BPF_LDX, BPF_MEM, BPF_B),
231 DL(BPF_LDX, BPF_MEM, BPF_H),
232 DL(BPF_LDX, BPF_MEM, BPF_W),
233 DL(BPF_LDX, BPF_MEM, BPF_DW),
234 DL(BPF_LD, BPF_ABS, BPF_W),
235 DL(BPF_LD, BPF_ABS, BPF_H),
236 DL(BPF_LD, BPF_ABS, BPF_B),
237 DL(BPF_LD, BPF_IND, BPF_W),
238 DL(BPF_LD, BPF_IND, BPF_H),
239 DL(BPF_LD, BPF_IND, BPF_B),
240#undef DL
241 };
132 242
133 /* 243 regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
134 * Process array of filter instructions. 244 regs[ARG1_REG] = (u64) (unsigned long) ctx;
135 */ 245
136 for (;; fentry++) { 246select_insn:
137#if defined(CONFIG_X86_32) 247 goto *jumptable[insn->code];
138#define K (fentry->k) 248
139#else 249 /* ALU */
140 const u32 K = fentry->k; 250#define ALU(OPCODE, OP) \
141#endif 251 BPF_ALU64_##OPCODE##_BPF_X: \
142 252 A = A OP X; \
143 switch (fentry->code) { 253 CONT; \
144 case BPF_S_ALU_ADD_X: 254 BPF_ALU_##OPCODE##_BPF_X: \
145 A += X; 255 A = (u32) A OP (u32) X; \
146 continue; 256 CONT; \
147 case BPF_S_ALU_ADD_K: 257 BPF_ALU64_##OPCODE##_BPF_K: \
148 A += K; 258 A = A OP K; \
149 continue; 259 CONT; \
150 case BPF_S_ALU_SUB_X: 260 BPF_ALU_##OPCODE##_BPF_K: \
151 A -= X; 261 A = (u32) A OP (u32) K; \
152 continue; 262 CONT;
153 case BPF_S_ALU_SUB_K: 263
154 A -= K; 264 ALU(BPF_ADD, +)
155 continue; 265 ALU(BPF_SUB, -)
156 case BPF_S_ALU_MUL_X: 266 ALU(BPF_AND, &)
157 A *= X; 267 ALU(BPF_OR, |)
158 continue; 268 ALU(BPF_LSH, <<)
159 case BPF_S_ALU_MUL_K: 269 ALU(BPF_RSH, >>)
160 A *= K; 270 ALU(BPF_XOR, ^)
161 continue; 271 ALU(BPF_MUL, *)
162 case BPF_S_ALU_DIV_X: 272#undef ALU
163 if (X == 0) 273 BPF_ALU_BPF_NEG_0:
164 return 0; 274 A = (u32) -A;
165 A /= X; 275 CONT;
166 continue; 276 BPF_ALU64_BPF_NEG_0:
167 case BPF_S_ALU_DIV_K: 277 A = -A;
168 A /= K; 278 CONT;
169 continue; 279 BPF_ALU_BPF_MOV_BPF_X:
170 case BPF_S_ALU_MOD_X: 280 A = (u32) X;
171 if (X == 0) 281 CONT;
172 return 0; 282 BPF_ALU_BPF_MOV_BPF_K:
173 A %= X; 283 A = (u32) K;
174 continue; 284 CONT;
175 case BPF_S_ALU_MOD_K: 285 BPF_ALU64_BPF_MOV_BPF_X:
176 A %= K; 286 A = X;
177 continue; 287 CONT;
178 case BPF_S_ALU_AND_X: 288 BPF_ALU64_BPF_MOV_BPF_K:
179 A &= X; 289 A = K;
180 continue; 290 CONT;
181 case BPF_S_ALU_AND_K: 291 BPF_ALU64_BPF_ARSH_BPF_X:
182 A &= K; 292 (*(s64 *) &A) >>= X;
183 continue; 293 CONT;
184 case BPF_S_ALU_OR_X: 294 BPF_ALU64_BPF_ARSH_BPF_K:
185 A |= X; 295 (*(s64 *) &A) >>= K;
186 continue; 296 CONT;
187 case BPF_S_ALU_OR_K: 297 BPF_ALU64_BPF_MOD_BPF_X:
188 A |= K; 298 tmp = A;
189 continue; 299 if (X)
190 case BPF_S_ANC_ALU_XOR_X: 300 A = do_div(tmp, X);
191 case BPF_S_ALU_XOR_X: 301 CONT;
192 A ^= X; 302 BPF_ALU_BPF_MOD_BPF_X:
193 continue; 303 tmp = (u32) A;
194 case BPF_S_ALU_XOR_K: 304 if (X)
195 A ^= K; 305 A = do_div(tmp, (u32) X);
196 continue; 306 CONT;
197 case BPF_S_ALU_LSH_X: 307 BPF_ALU64_BPF_MOD_BPF_K:
198 A <<= X; 308 tmp = A;
199 continue; 309 if (K)
200 case BPF_S_ALU_LSH_K: 310 A = do_div(tmp, K);
201 A <<= K; 311 CONT;
202 continue; 312 BPF_ALU_BPF_MOD_BPF_K:
203 case BPF_S_ALU_RSH_X: 313 tmp = (u32) A;
204 A >>= X; 314 if (K)
205 continue; 315 A = do_div(tmp, (u32) K);
206 case BPF_S_ALU_RSH_K: 316 CONT;
207 A >>= K; 317 BPF_ALU64_BPF_DIV_BPF_X:
208 continue; 318 if (X)
209 case BPF_S_ALU_NEG: 319 do_div(A, X);
210 A = -A; 320 CONT;
211 continue; 321 BPF_ALU_BPF_DIV_BPF_X:
212 case BPF_S_JMP_JA: 322 tmp = (u32) A;
213 fentry += K; 323 if (X)
214 continue; 324 do_div(tmp, (u32) X);
215 case BPF_S_JMP_JGT_K: 325 A = (u32) tmp;
216 fentry += (A > K) ? fentry->jt : fentry->jf; 326 CONT;
217 continue; 327 BPF_ALU64_BPF_DIV_BPF_K:
218 case BPF_S_JMP_JGE_K: 328 if (K)
219 fentry += (A >= K) ? fentry->jt : fentry->jf; 329 do_div(A, K);
220 continue; 330 CONT;
221 case BPF_S_JMP_JEQ_K: 331 BPF_ALU_BPF_DIV_BPF_K:
222 fentry += (A == K) ? fentry->jt : fentry->jf; 332 tmp = (u32) A;
223 continue; 333 if (K)
224 case BPF_S_JMP_JSET_K: 334 do_div(tmp, (u32) K);
225 fentry += (A & K) ? fentry->jt : fentry->jf; 335 A = (u32) tmp;
226 continue; 336 CONT;
227 case BPF_S_JMP_JGT_X: 337 BPF_ALU_BPF_END_BPF_TO_BE:
228 fentry += (A > X) ? fentry->jt : fentry->jf; 338 switch (K) {
229 continue; 339 case 16:
230 case BPF_S_JMP_JGE_X: 340 A = (__force u16) cpu_to_be16(A);
231 fentry += (A >= X) ? fentry->jt : fentry->jf; 341 break;
232 continue; 342 case 32:
233 case BPF_S_JMP_JEQ_X: 343 A = (__force u32) cpu_to_be32(A);
234 fentry += (A == X) ? fentry->jt : fentry->jf; 344 break;
235 continue; 345 case 64:
236 case BPF_S_JMP_JSET_X: 346 A = (__force u64) cpu_to_be64(A);
237 fentry += (A & X) ? fentry->jt : fentry->jf; 347 break;
238 continue; 348 }
239 case BPF_S_LD_W_ABS: 349 CONT;
240 k = K; 350 BPF_ALU_BPF_END_BPF_TO_LE:
241load_w: 351 switch (K) {
242 ptr = load_pointer(skb, k, 4, &tmp); 352 case 16:
243 if (ptr != NULL) { 353 A = (__force u16) cpu_to_le16(A);
244 A = get_unaligned_be32(ptr); 354 break;
245 continue; 355 case 32:
246 } 356 A = (__force u32) cpu_to_le32(A);
247 return 0; 357 break;
248 case BPF_S_LD_H_ABS: 358 case 64:
249 k = K; 359 A = (__force u64) cpu_to_le64(A);
250load_h: 360 break;
251 ptr = load_pointer(skb, k, 2, &tmp); 361 }
252 if (ptr != NULL) { 362 CONT;
253 A = get_unaligned_be16(ptr); 363
254 continue; 364 /* CALL */
365 BPF_JMP_BPF_CALL_0:
366 /* Function call scratches R1-R5 registers, preserves R6-R9,
367 * and stores return value into R0.
368 */
369 R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
370 regs[4], regs[5]);
371 CONT;
372
373 /* JMP */
374 BPF_JMP_BPF_JA_0:
375 insn += insn->off;
376 CONT;
377 BPF_JMP_BPF_JEQ_BPF_X:
378 if (A == X) {
379 insn += insn->off;
380 CONT_JMP;
381 }
382 CONT;
383 BPF_JMP_BPF_JEQ_BPF_K:
384 if (A == K) {
385 insn += insn->off;
386 CONT_JMP;
387 }
388 CONT;
389 BPF_JMP_BPF_JNE_BPF_X:
390 if (A != X) {
391 insn += insn->off;
392 CONT_JMP;
393 }
394 CONT;
395 BPF_JMP_BPF_JNE_BPF_K:
396 if (A != K) {
397 insn += insn->off;
398 CONT_JMP;
399 }
400 CONT;
401 BPF_JMP_BPF_JGT_BPF_X:
402 if (A > X) {
403 insn += insn->off;
404 CONT_JMP;
405 }
406 CONT;
407 BPF_JMP_BPF_JGT_BPF_K:
408 if (A > K) {
409 insn += insn->off;
410 CONT_JMP;
411 }
412 CONT;
413 BPF_JMP_BPF_JGE_BPF_X:
414 if (A >= X) {
415 insn += insn->off;
416 CONT_JMP;
417 }
418 CONT;
419 BPF_JMP_BPF_JGE_BPF_K:
420 if (A >= K) {
421 insn += insn->off;
422 CONT_JMP;
423 }
424 CONT;
425 BPF_JMP_BPF_JSGT_BPF_X:
426 if (((s64)A) > ((s64)X)) {
427 insn += insn->off;
428 CONT_JMP;
429 }
430 CONT;
431 BPF_JMP_BPF_JSGT_BPF_K:
432 if (((s64)A) > ((s64)K)) {
433 insn += insn->off;
434 CONT_JMP;
435 }
436 CONT;
437 BPF_JMP_BPF_JSGE_BPF_X:
438 if (((s64)A) >= ((s64)X)) {
439 insn += insn->off;
440 CONT_JMP;
441 }
442 CONT;
443 BPF_JMP_BPF_JSGE_BPF_K:
444 if (((s64)A) >= ((s64)K)) {
445 insn += insn->off;
446 CONT_JMP;
447 }
448 CONT;
449 BPF_JMP_BPF_JSET_BPF_X:
450 if (A & X) {
451 insn += insn->off;
452 CONT_JMP;
453 }
454 CONT;
455 BPF_JMP_BPF_JSET_BPF_K:
456 if (A & K) {
457 insn += insn->off;
458 CONT_JMP;
459 }
460 CONT;
461 BPF_JMP_BPF_EXIT_0:
462 return R0;
463
464 /* STX and ST and LDX*/
465#define LDST(SIZEOP, SIZE) \
466 BPF_STX_BPF_MEM_##SIZEOP: \
467 *(SIZE *)(unsigned long) (A + insn->off) = X; \
468 CONT; \
469 BPF_ST_BPF_MEM_##SIZEOP: \
470 *(SIZE *)(unsigned long) (A + insn->off) = K; \
471 CONT; \
472 BPF_LDX_BPF_MEM_##SIZEOP: \
473 A = *(SIZE *)(unsigned long) (X + insn->off); \
474 CONT;
475
476 LDST(BPF_B, u8)
477 LDST(BPF_H, u16)
478 LDST(BPF_W, u32)
479 LDST(BPF_DW, u64)
480#undef LDST
481 BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
482 atomic_add((u32) X, (atomic_t *)(unsigned long)
483 (A + insn->off));
484 CONT;
485 BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
486 atomic64_add((u64) X, (atomic64_t *)(unsigned long)
487 (A + insn->off));
488 CONT;
489 BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
490 off = K;
491load_word:
492 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
493 * appearing in the programs where ctx == skb. All programs
494 * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
495 * saves it in R6, internal BPF verifier will check that
496 * R6 == ctx.
497 *
498 * BPF_ABS and BPF_IND are wrappers of function calls, so
499 * they scratch R1-R5 registers, preserve R6-R9, and store
500 * return value into R0.
501 *
502 * Implicit input:
503 * ctx
504 *
505 * Explicit input:
506 * X == any register
507 * K == 32-bit immediate
508 *
509 * Output:
510 * R0 - 8/16/32-bit skb data converted to cpu endianness
511 */
512 ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
513 if (likely(ptr != NULL)) {
514 R0 = get_unaligned_be32(ptr);
515 CONT;
516 }
517 return 0;
518 BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
519 off = K;
520load_half:
521 ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
522 if (likely(ptr != NULL)) {
523 R0 = get_unaligned_be16(ptr);
524 CONT;
525 }
526 return 0;
527 BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
528 off = K;
529load_byte:
530 ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
531 if (likely(ptr != NULL)) {
532 R0 = *(u8 *)ptr;
533 CONT;
534 }
535 return 0;
536 BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
537 off = K + X;
538 goto load_word;
539 BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
540 off = K + X;
541 goto load_half;
542 BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
543 off = K + X;
544 goto load_byte;
545
546 default_label:
547 /* If we ever reach this, we have a bug somewhere. */
548 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
549 return 0;
550#undef CONT_JMP
551#undef CONT
552
553#undef R0
554#undef X
555#undef A
556#undef K
557}
558
559u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
560 const struct sock_filter_int *insni)
561 __attribute__ ((alias ("__sk_run_filter")));
562
563u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
564 const struct sock_filter_int *insni)
565 __attribute__ ((alias ("__sk_run_filter")));
566EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
567
568/* Helper to find the offset of pkt_type in sk_buff structure. We want
569 * to make sure its still a 3bit field starting at a byte boundary;
570 * taken from arch/x86/net/bpf_jit_comp.c.
571 */
572#define PKT_TYPE_MAX 7
573static unsigned int pkt_type_offset(void)
574{
575 struct sk_buff skb_probe = { .pkt_type = ~0, };
576 u8 *ct = (u8 *) &skb_probe;
577 unsigned int off;
578
579 for (off = 0; off < sizeof(struct sk_buff); off++) {
580 if (ct[off] == PKT_TYPE_MAX)
581 return off;
582 }
583
584 pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
585 return -1;
586}
587
588static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
589{
590 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
591
592 return __skb_get_poff(skb);
593}
594
595static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
596{
597 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
598 struct nlattr *nla;
599
600 if (skb_is_nonlinear(skb))
601 return 0;
602
603 if (A > skb->len - sizeof(struct nlattr))
604 return 0;
605
606 nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
607 if (nla)
608 return (void *) nla - (void *) skb->data;
609
610 return 0;
611}
612
613static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
614{
615 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
616 struct nlattr *nla;
617
618 if (skb_is_nonlinear(skb))
619 return 0;
620
621 if (A > skb->len - sizeof(struct nlattr))
622 return 0;
623
624 nla = (struct nlattr *) &skb->data[A];
625 if (nla->nla_len > A - skb->len)
626 return 0;
627
628 nla = nla_find_nested(nla, X);
629 if (nla)
630 return (void *) nla - (void *) skb->data;
631
632 return 0;
633}
634
635static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
636{
637 return raw_smp_processor_id();
638}
639
640/* Register mappings for user programs. */
641#define A_REG 0
642#define X_REG 7
643#define TMP_REG 8
644#define ARG2_REG 2
645#define ARG3_REG 3
646
647static bool convert_bpf_extensions(struct sock_filter *fp,
648 struct sock_filter_int **insnp)
649{
650 struct sock_filter_int *insn = *insnp;
651
652 switch (fp->k) {
653 case SKF_AD_OFF + SKF_AD_PROTOCOL:
654 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
655
656 insn->code = BPF_LDX | BPF_MEM | BPF_H;
657 insn->a_reg = A_REG;
658 insn->x_reg = CTX_REG;
659 insn->off = offsetof(struct sk_buff, protocol);
660 insn++;
661
662 /* A = ntohs(A) [emitting a nop or swap16] */
663 insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
664 insn->a_reg = A_REG;
665 insn->imm = 16;
666 break;
667
668 case SKF_AD_OFF + SKF_AD_PKTTYPE:
669 insn->code = BPF_LDX | BPF_MEM | BPF_B;
670 insn->a_reg = A_REG;
671 insn->x_reg = CTX_REG;
672 insn->off = pkt_type_offset();
673 if (insn->off < 0)
674 return false;
675 insn++;
676
677 insn->code = BPF_ALU | BPF_AND | BPF_K;
678 insn->a_reg = A_REG;
679 insn->imm = PKT_TYPE_MAX;
680 break;
681
682 case SKF_AD_OFF + SKF_AD_IFINDEX:
683 case SKF_AD_OFF + SKF_AD_HATYPE:
684 if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
685 insn->code = BPF_LDX | BPF_MEM | BPF_DW;
686 else
687 insn->code = BPF_LDX | BPF_MEM | BPF_W;
688 insn->a_reg = TMP_REG;
689 insn->x_reg = CTX_REG;
690 insn->off = offsetof(struct sk_buff, dev);
691 insn++;
692
693 insn->code = BPF_JMP | BPF_JNE | BPF_K;
694 insn->a_reg = TMP_REG;
695 insn->imm = 0;
696 insn->off = 1;
697 insn++;
698
699 insn->code = BPF_JMP | BPF_EXIT;
700 insn++;
701
702 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
703 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
704
705 insn->a_reg = A_REG;
706 insn->x_reg = TMP_REG;
707
708 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
709 insn->code = BPF_LDX | BPF_MEM | BPF_W;
710 insn->off = offsetof(struct net_device, ifindex);
711 } else {
712 insn->code = BPF_LDX | BPF_MEM | BPF_H;
713 insn->off = offsetof(struct net_device, type);
714 }
715 break;
716
717 case SKF_AD_OFF + SKF_AD_MARK:
718 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
719
720 insn->code = BPF_LDX | BPF_MEM | BPF_W;
721 insn->a_reg = A_REG;
722 insn->x_reg = CTX_REG;
723 insn->off = offsetof(struct sk_buff, mark);
724 break;
725
726 case SKF_AD_OFF + SKF_AD_RXHASH:
727 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
728
729 insn->code = BPF_LDX | BPF_MEM | BPF_W;
730 insn->a_reg = A_REG;
731 insn->x_reg = CTX_REG;
732 insn->off = offsetof(struct sk_buff, hash);
733 break;
734
735 case SKF_AD_OFF + SKF_AD_QUEUE:
736 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
737
738 insn->code = BPF_LDX | BPF_MEM | BPF_H;
739 insn->a_reg = A_REG;
740 insn->x_reg = CTX_REG;
741 insn->off = offsetof(struct sk_buff, queue_mapping);
742 break;
743
744 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
745 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
746 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
747
748 insn->code = BPF_LDX | BPF_MEM | BPF_H;
749 insn->a_reg = A_REG;
750 insn->x_reg = CTX_REG;
751 insn->off = offsetof(struct sk_buff, vlan_tci);
752 insn++;
753
754 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
755
756 if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
757 insn->code = BPF_ALU | BPF_AND | BPF_K;
758 insn->a_reg = A_REG;
759 insn->imm = ~VLAN_TAG_PRESENT;
760 } else {
761 insn->code = BPF_ALU | BPF_RSH | BPF_K;
762 insn->a_reg = A_REG;
763 insn->imm = 12;
764 insn++;
765
766 insn->code = BPF_ALU | BPF_AND | BPF_K;
767 insn->a_reg = A_REG;
768 insn->imm = 1;
769 }
770 break;
771
772 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
773 case SKF_AD_OFF + SKF_AD_NLATTR:
774 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
775 case SKF_AD_OFF + SKF_AD_CPU:
776 /* arg1 = ctx */
777 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
778 insn->a_reg = ARG1_REG;
779 insn->x_reg = CTX_REG;
780 insn++;
781
782 /* arg2 = A */
783 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
784 insn->a_reg = ARG2_REG;
785 insn->x_reg = A_REG;
786 insn++;
787
788 /* arg3 = X */
789 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
790 insn->a_reg = ARG3_REG;
791 insn->x_reg = X_REG;
792 insn++;
793
794 /* Emit call(ctx, arg2=A, arg3=X) */
795 insn->code = BPF_JMP | BPF_CALL;
796 switch (fp->k) {
797 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
798 insn->imm = __skb_get_pay_offset - __bpf_call_base;
799 break;
800 case SKF_AD_OFF + SKF_AD_NLATTR:
801 insn->imm = __skb_get_nlattr - __bpf_call_base;
802 break;
803 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
804 insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
805 break;
806 case SKF_AD_OFF + SKF_AD_CPU:
807 insn->imm = __get_raw_cpu_id - __bpf_call_base;
808 break;
809 }
810 break;
811
812 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
813 insn->code = BPF_ALU | BPF_XOR | BPF_X;
814 insn->a_reg = A_REG;
815 insn->x_reg = X_REG;
816 break;
817
818 default:
819 /* This is just a dummy call to avoid letting the compiler
820 * evict __bpf_call_base() as an optimization. Placed here
821 * where no-one bothers.
822 */
823 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
824 return false;
825 }
826
827 *insnp = insn;
828 return true;
829}
830
831/**
832 * sk_convert_filter - convert filter program
833 * @prog: the user passed filter program
834 * @len: the length of the user passed filter program
835 * @new_prog: buffer where converted program will be stored
836 * @new_len: pointer to store length of converted program
837 *
838 * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
839 * Conversion workflow:
840 *
841 * 1) First pass for calculating the new program length:
842 * sk_convert_filter(old_prog, old_len, NULL, &new_len)
843 *
844 * 2) 2nd pass to remap in two passes: 1st pass finds new
845 * jump offsets, 2nd pass remapping:
846 * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len);
847 * sk_convert_filter(old_prog, old_len, new_prog, &new_len);
848 *
849 * User BPF's register A is mapped to our BPF register 6, user BPF
850 * register X is mapped to BPF register 7; frame pointer is always
851 * register 10; Context 'void *ctx' is stored in register 1, that is,
852 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
853 * ctx == 'struct seccomp_data *'.
854 */
855int sk_convert_filter(struct sock_filter *prog, int len,
856 struct sock_filter_int *new_prog, int *new_len)
857{
858 int new_flen = 0, pass = 0, target, i;
859 struct sock_filter_int *new_insn;
860 struct sock_filter *fp;
861 int *addrs = NULL;
862 u8 bpf_src;
863
864 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
865 BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
866
867 if (len <= 0 || len >= BPF_MAXINSNS)
868 return -EINVAL;
869
870 if (new_prog) {
871 addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL);
872 if (!addrs)
873 return -ENOMEM;
874 }
875
876do_pass:
877 new_insn = new_prog;
878 fp = prog;
879
880 if (new_insn) {
881 new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
882 new_insn->a_reg = CTX_REG;
883 new_insn->x_reg = ARG1_REG;
884 }
885 new_insn++;
886
887 for (i = 0; i < len; fp++, i++) {
888 struct sock_filter_int tmp_insns[6] = { };
889 struct sock_filter_int *insn = tmp_insns;
890
891 if (addrs)
892 addrs[i] = new_insn - new_prog;
893
894 switch (fp->code) {
895 /* All arithmetic insns and skb loads map as-is. */
896 case BPF_ALU | BPF_ADD | BPF_X:
897 case BPF_ALU | BPF_ADD | BPF_K:
898 case BPF_ALU | BPF_SUB | BPF_X:
899 case BPF_ALU | BPF_SUB | BPF_K:
900 case BPF_ALU | BPF_AND | BPF_X:
901 case BPF_ALU | BPF_AND | BPF_K:
902 case BPF_ALU | BPF_OR | BPF_X:
903 case BPF_ALU | BPF_OR | BPF_K:
904 case BPF_ALU | BPF_LSH | BPF_X:
905 case BPF_ALU | BPF_LSH | BPF_K:
906 case BPF_ALU | BPF_RSH | BPF_X:
907 case BPF_ALU | BPF_RSH | BPF_K:
908 case BPF_ALU | BPF_XOR | BPF_X:
909 case BPF_ALU | BPF_XOR | BPF_K:
910 case BPF_ALU | BPF_MUL | BPF_X:
911 case BPF_ALU | BPF_MUL | BPF_K:
912 case BPF_ALU | BPF_DIV | BPF_X:
913 case BPF_ALU | BPF_DIV | BPF_K:
914 case BPF_ALU | BPF_MOD | BPF_X:
915 case BPF_ALU | BPF_MOD | BPF_K:
916 case BPF_ALU | BPF_NEG:
917 case BPF_LD | BPF_ABS | BPF_W:
918 case BPF_LD | BPF_ABS | BPF_H:
919 case BPF_LD | BPF_ABS | BPF_B:
920 case BPF_LD | BPF_IND | BPF_W:
921 case BPF_LD | BPF_IND | BPF_H:
922 case BPF_LD | BPF_IND | BPF_B:
923 /* Check for overloaded BPF extension and
924 * directly convert it if found, otherwise
925 * just move on with mapping.
926 */
927 if (BPF_CLASS(fp->code) == BPF_LD &&
928 BPF_MODE(fp->code) == BPF_ABS &&
929 convert_bpf_extensions(fp, &insn))
930 break;
931
932 insn->code = fp->code;
933 insn->a_reg = A_REG;
934 insn->x_reg = X_REG;
935 insn->imm = fp->k;
936 break;
937
938 /* Jump opcodes map as-is, but offsets need adjustment. */
939 case BPF_JMP | BPF_JA:
940 target = i + fp->k + 1;
941 insn->code = fp->code;
942#define EMIT_JMP \
943 do { \
944 if (target >= len || target < 0) \
945 goto err; \
946 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
947 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
948 insn->off -= insn - tmp_insns; \
949 } while (0)
950
951 EMIT_JMP;
952 break;
953
954 case BPF_JMP | BPF_JEQ | BPF_K:
955 case BPF_JMP | BPF_JEQ | BPF_X:
956 case BPF_JMP | BPF_JSET | BPF_K:
957 case BPF_JMP | BPF_JSET | BPF_X:
958 case BPF_JMP | BPF_JGT | BPF_K:
959 case BPF_JMP | BPF_JGT | BPF_X:
960 case BPF_JMP | BPF_JGE | BPF_K:
961 case BPF_JMP | BPF_JGE | BPF_X:
962 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
963 /* BPF immediates are signed, zero extend
964 * immediate into tmp register and use it
965 * in compare insn.
966 */
967 insn->code = BPF_ALU | BPF_MOV | BPF_K;
968 insn->a_reg = TMP_REG;
969 insn->imm = fp->k;
970 insn++;
971
972 insn->a_reg = A_REG;
973 insn->x_reg = TMP_REG;
974 bpf_src = BPF_X;
975 } else {
976 insn->a_reg = A_REG;
977 insn->x_reg = X_REG;
978 insn->imm = fp->k;
979 bpf_src = BPF_SRC(fp->code);
255 } 980 }
256 return 0; 981
257 case BPF_S_LD_B_ABS: 982 /* Common case where 'jump_false' is next insn. */
258 k = K; 983 if (fp->jf == 0) {
259load_b: 984 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
260 ptr = load_pointer(skb, k, 1, &tmp); 985 target = i + fp->jt + 1;
261 if (ptr != NULL) { 986 EMIT_JMP;
262 A = *(u8 *)ptr; 987 break;
263 continue;
264 } 988 }
265 return 0; 989
266 case BPF_S_LD_W_LEN: 990 /* Convert JEQ into JNE when 'jump_true' is next insn. */
267 A = skb->len; 991 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
268 continue; 992 insn->code = BPF_JMP | BPF_JNE | bpf_src;
269 case BPF_S_LDX_W_LEN: 993 target = i + fp->jf + 1;
270 X = skb->len; 994 EMIT_JMP;
271 continue; 995 break;
272 case BPF_S_LD_W_IND:
273 k = X + K;
274 goto load_w;
275 case BPF_S_LD_H_IND:
276 k = X + K;
277 goto load_h;
278 case BPF_S_LD_B_IND:
279 k = X + K;
280 goto load_b;
281 case BPF_S_LDX_B_MSH:
282 ptr = load_pointer(skb, K, 1, &tmp);
283 if (ptr != NULL) {
284 X = (*(u8 *)ptr & 0xf) << 2;
285 continue;
286 } 996 }
287 return 0; 997
288 case BPF_S_LD_IMM: 998 /* Other jumps are mapped into two insns: Jxx and JA. */
289 A = K; 999 target = i + fp->jt + 1;
290 continue; 1000 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
291 case BPF_S_LDX_IMM: 1001 EMIT_JMP;
292 X = K; 1002 insn++;
293 continue; 1003
294 case BPF_S_LD_MEM: 1004 insn->code = BPF_JMP | BPF_JA;
295 A = mem[K]; 1005 target = i + fp->jf + 1;
296 continue; 1006 EMIT_JMP;
297 case BPF_S_LDX_MEM: 1007 break;
298 X = mem[K]; 1008
299 continue; 1009 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
300 case BPF_S_MISC_TAX: 1010 case BPF_LDX | BPF_MSH | BPF_B:
301 X = A; 1011 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
302 continue; 1012 insn->a_reg = TMP_REG;
303 case BPF_S_MISC_TXA: 1013 insn->x_reg = A_REG;
304 A = X; 1014 insn++;
305 continue; 1015
306 case BPF_S_RET_K: 1016 insn->code = BPF_LD | BPF_ABS | BPF_B;
307 return K; 1017 insn->a_reg = A_REG;
308 case BPF_S_RET_A: 1018 insn->imm = fp->k;
309 return A; 1019 insn++;
310 case BPF_S_ST: 1020
311 mem[K] = A; 1021 insn->code = BPF_ALU | BPF_AND | BPF_K;
312 continue; 1022 insn->a_reg = A_REG;
313 case BPF_S_STX: 1023 insn->imm = 0xf;
314 mem[K] = X; 1024 insn++;
315 continue; 1025
316 case BPF_S_ANC_PROTOCOL: 1026 insn->code = BPF_ALU | BPF_LSH | BPF_K;
317 A = ntohs(skb->protocol); 1027 insn->a_reg = A_REG;
318 continue; 1028 insn->imm = 2;
319 case BPF_S_ANC_PKTTYPE: 1029 insn++;
320 A = skb->pkt_type; 1030
321 continue; 1031 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
322 case BPF_S_ANC_IFINDEX: 1032 insn->a_reg = X_REG;
323 if (!skb->dev) 1033 insn->x_reg = A_REG;
324 return 0; 1034 insn++;
325 A = skb->dev->ifindex; 1035
326 continue; 1036 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
327 case BPF_S_ANC_MARK: 1037 insn->a_reg = A_REG;
328 A = skb->mark; 1038 insn->x_reg = TMP_REG;
329 continue; 1039 break;
330 case BPF_S_ANC_QUEUE: 1040
331 A = skb->queue_mapping; 1041 /* RET_K, RET_A are remaped into 2 insns. */
332 continue; 1042 case BPF_RET | BPF_A:
333 case BPF_S_ANC_HATYPE: 1043 case BPF_RET | BPF_K:
334 if (!skb->dev) 1044 insn->code = BPF_ALU | BPF_MOV |
335 return 0; 1045 (BPF_RVAL(fp->code) == BPF_K ?
336 A = skb->dev->type; 1046 BPF_K : BPF_X);
337 continue; 1047 insn->a_reg = 0;
338 case BPF_S_ANC_RXHASH: 1048 insn->x_reg = A_REG;
339 A = skb->hash; 1049 insn->imm = fp->k;
340 continue; 1050 insn++;
341 case BPF_S_ANC_CPU: 1051
342 A = raw_smp_processor_id(); 1052 insn->code = BPF_JMP | BPF_EXIT;
343 continue; 1053 break;
344 case BPF_S_ANC_VLAN_TAG: 1054
345 A = vlan_tx_tag_get(skb); 1055 /* Store to stack. */
346 continue; 1056 case BPF_ST:
347 case BPF_S_ANC_VLAN_TAG_PRESENT: 1057 case BPF_STX:
348 A = !!vlan_tx_tag_present(skb); 1058 insn->code = BPF_STX | BPF_MEM | BPF_W;
349 continue; 1059 insn->a_reg = FP_REG;
350 case BPF_S_ANC_PAY_OFFSET: 1060 insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
351 A = __skb_get_poff(skb); 1061 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
352 continue; 1062 break;
353 case BPF_S_ANC_NLATTR: { 1063
354 struct nlattr *nla; 1064 /* Load from stack. */
355 1065 case BPF_LD | BPF_MEM:
356 if (skb_is_nonlinear(skb)) 1066 case BPF_LDX | BPF_MEM:
357 return 0; 1067 insn->code = BPF_LDX | BPF_MEM | BPF_W;
358 if (A > skb->len - sizeof(struct nlattr)) 1068 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
359 return 0; 1069 A_REG : X_REG;
360 1070 insn->x_reg = FP_REG;
361 nla = nla_find((struct nlattr *)&skb->data[A], 1071 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
362 skb->len - A, X); 1072 break;
363 if (nla) 1073
364 A = (void *)nla - (void *)skb->data; 1074 /* A = K or X = K */
365 else 1075 case BPF_LD | BPF_IMM:
366 A = 0; 1076 case BPF_LDX | BPF_IMM:
367 continue; 1077 insn->code = BPF_ALU | BPF_MOV | BPF_K;
368 } 1078 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
369 case BPF_S_ANC_NLATTR_NEST: { 1079 A_REG : X_REG;
370 struct nlattr *nla; 1080 insn->imm = fp->k;
371 1081 break;
372 if (skb_is_nonlinear(skb)) 1082
373 return 0; 1083 /* X = A */
374 if (A > skb->len - sizeof(struct nlattr)) 1084 case BPF_MISC | BPF_TAX:
375 return 0; 1085 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
376 1086 insn->a_reg = X_REG;
377 nla = (struct nlattr *)&skb->data[A]; 1087 insn->x_reg = A_REG;
378 if (nla->nla_len > A - skb->len) 1088 break;
379 return 0; 1089
380 1090 /* A = X */
381 nla = nla_find_nested(nla, X); 1091 case BPF_MISC | BPF_TXA:
382 if (nla) 1092 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
383 A = (void *)nla - (void *)skb->data; 1093 insn->a_reg = A_REG;
384 else 1094 insn->x_reg = X_REG;
385 A = 0; 1095 break;
386 continue; 1096
387 } 1097 /* A = skb->len or X = skb->len */
388#ifdef CONFIG_SECCOMP_FILTER 1098 case BPF_LD | BPF_W | BPF_LEN:
389 case BPF_S_ANC_SECCOMP_LD_W: 1099 case BPF_LDX | BPF_W | BPF_LEN:
390 A = seccomp_bpf_load(fentry->k); 1100 insn->code = BPF_LDX | BPF_MEM | BPF_W;
391 continue; 1101 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
392#endif 1102 A_REG : X_REG;
1103 insn->x_reg = CTX_REG;
1104 insn->off = offsetof(struct sk_buff, len);
1105 break;
1106
1107 /* access seccomp_data fields */
1108 case BPF_LDX | BPF_ABS | BPF_W:
1109 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1110 insn->a_reg = A_REG;
1111 insn->x_reg = CTX_REG;
1112 insn->off = fp->k;
1113 break;
1114
393 default: 1115 default:
394 WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n", 1116 goto err;
395 fentry->code, fentry->jt,
396 fentry->jf, fentry->k);
397 return 0;
398 } 1117 }
1118
1119 insn++;
1120 if (new_prog)
1121 memcpy(new_insn, tmp_insns,
1122 sizeof(*insn) * (insn - tmp_insns));
1123
1124 new_insn += insn - tmp_insns;
399 } 1125 }
400 1126
1127 if (!new_prog) {
1128 /* Only calculating new length. */
1129 *new_len = new_insn - new_prog;
1130 return 0;
1131 }
1132
1133 pass++;
1134 if (new_flen != new_insn - new_prog) {
1135 new_flen = new_insn - new_prog;
1136 if (pass > 2)
1137 goto err;
1138
1139 goto do_pass;
1140 }
1141
1142 kfree(addrs);
1143 BUG_ON(*new_len != new_flen);
401 return 0; 1144 return 0;
1145err:
1146 kfree(addrs);
1147 return -EINVAL;
402} 1148}
403EXPORT_SYMBOL(sk_run_filter);
404 1149
405/* 1150/* Security:
406 * Security : 1151 *
407 * A BPF program is able to use 16 cells of memory to store intermediate 1152 * A BPF program is able to use 16 cells of memory to store intermediate
408 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) 1153 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
1154 *
409 * As we dont want to clear mem[] array for each packet going through 1155 * As we dont want to clear mem[] array for each packet going through
410 * sk_run_filter(), we check that filter loaded by user never try to read 1156 * sk_run_filter(), we check that filter loaded by user never try to read
411 * a cell if not previously written, and we check all branches to be sure 1157 * a cell if not previously written, and we check all branches to be sure
@@ -696,19 +1442,130 @@ void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
696 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); 1442 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
697} 1443}
698 1444
699static int __sk_prepare_filter(struct sk_filter *fp) 1445static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
1446 struct sock *sk,
1447 unsigned int len)
1448{
1449 struct sk_filter *fp_new;
1450
1451 if (sk == NULL)
1452 return krealloc(fp, len, GFP_KERNEL);
1453
1454 fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
1455 if (fp_new) {
1456 memcpy(fp_new, fp, sizeof(struct sk_filter));
1457 /* As we're kepping orig_prog in fp_new along,
1458 * we need to make sure we're not evicting it
1459 * from the old fp.
1460 */
1461 fp->orig_prog = NULL;
1462 sk_filter_uncharge(sk, fp);
1463 }
1464
1465 return fp_new;
1466}
1467
1468static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1469 struct sock *sk)
1470{
1471 struct sock_filter *old_prog;
1472 struct sk_filter *old_fp;
1473 int i, err, new_len, old_len = fp->len;
1474
1475 /* We are free to overwrite insns et al right here as it
1476 * won't be used at this point in time anymore internally
1477 * after the migration to the internal BPF instruction
1478 * representation.
1479 */
1480 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1481 sizeof(struct sock_filter_int));
1482
1483 /* For now, we need to unfiddle BPF_S_* identifiers in place.
1484 * This can sooner or later on be subject to removal, e.g. when
1485 * JITs have been converted.
1486 */
1487 for (i = 0; i < fp->len; i++)
1488 sk_decode_filter(&fp->insns[i], &fp->insns[i]);
1489
1490 /* Conversion cannot happen on overlapping memory areas,
1491 * so we need to keep the user BPF around until the 2nd
1492 * pass. At this time, the user BPF is stored in fp->insns.
1493 */
1494 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1495 GFP_KERNEL);
1496 if (!old_prog) {
1497 err = -ENOMEM;
1498 goto out_err;
1499 }
1500
1501 /* 1st pass: calculate the new program length. */
1502 err = sk_convert_filter(old_prog, old_len, NULL, &new_len);
1503 if (err)
1504 goto out_err_free;
1505
1506 /* Expand fp for appending the new filter representation. */
1507 old_fp = fp;
1508 fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len));
1509 if (!fp) {
1510 /* The old_fp is still around in case we couldn't
1511 * allocate new memory, so uncharge on that one.
1512 */
1513 fp = old_fp;
1514 err = -ENOMEM;
1515 goto out_err_free;
1516 }
1517
1518 fp->bpf_func = sk_run_filter_int_skb;
1519 fp->len = new_len;
1520
1521 /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
1522 err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
1523 if (err)
1524 /* 2nd sk_convert_filter() can fail only if it fails
1525 * to allocate memory, remapping must succeed. Note,
1526 * that at this time old_fp has already been released
1527 * by __sk_migrate_realloc().
1528 */
1529 goto out_err_free;
1530
1531 kfree(old_prog);
1532 return fp;
1533
1534out_err_free:
1535 kfree(old_prog);
1536out_err:
1537 /* Rollback filter setup. */
1538 if (sk != NULL)
1539 sk_filter_uncharge(sk, fp);
1540 else
1541 kfree(fp);
1542 return ERR_PTR(err);
1543}
1544
1545static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1546 struct sock *sk)
700{ 1547{
701 int err; 1548 int err;
702 1549
703 fp->bpf_func = sk_run_filter; 1550 fp->bpf_func = NULL;
704 fp->jited = 0; 1551 fp->jited = 0;
705 1552
706 err = sk_chk_filter(fp->insns, fp->len); 1553 err = sk_chk_filter(fp->insns, fp->len);
707 if (err) 1554 if (err)
708 return err; 1555 return ERR_PTR(err);
709 1556
1557 /* Probe if we can JIT compile the filter and if so, do
1558 * the compilation of the filter.
1559 */
710 bpf_jit_compile(fp); 1560 bpf_jit_compile(fp);
711 return 0; 1561
1562 /* JIT compiler couldn't process this filter, so do the
1563 * internal BPF translation for the optimized interpreter.
1564 */
1565 if (!fp->jited)
1566 fp = __sk_migrate_filter(fp, sk);
1567
1568 return fp;
712} 1569}
713 1570
714/** 1571/**
@@ -726,7 +1583,6 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
726{ 1583{
727 unsigned int fsize = sk_filter_proglen(fprog); 1584 unsigned int fsize = sk_filter_proglen(fprog);
728 struct sk_filter *fp; 1585 struct sk_filter *fp;
729 int err;
730 1586
731 /* Make sure new filter is there and in the right amounts. */ 1587 /* Make sure new filter is there and in the right amounts. */
732 if (fprog->filter == NULL) 1588 if (fprog->filter == NULL)
@@ -746,15 +1602,15 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
746 */ 1602 */
747 fp->orig_prog = NULL; 1603 fp->orig_prog = NULL;
748 1604
749 err = __sk_prepare_filter(fp); 1605 /* __sk_prepare_filter() already takes care of uncharging
750 if (err) 1606 * memory in case something goes wrong.
751 goto free_mem; 1607 */
1608 fp = __sk_prepare_filter(fp, NULL);
1609 if (IS_ERR(fp))
1610 return PTR_ERR(fp);
752 1611
753 *pfp = fp; 1612 *pfp = fp;
754 return 0; 1613 return 0;
755free_mem:
756 kfree(fp);
757 return err;
758} 1614}
759EXPORT_SYMBOL_GPL(sk_unattached_filter_create); 1615EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
760 1616
@@ -806,11 +1662,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
806 return -ENOMEM; 1662 return -ENOMEM;
807 } 1663 }
808 1664
809 err = __sk_prepare_filter(fp); 1665 /* __sk_prepare_filter() already takes care of uncharging
810 if (err) { 1666 * memory in case something goes wrong.
811 sk_filter_uncharge(sk, fp); 1667 */
812 return err; 1668 fp = __sk_prepare_filter(fp, sk);
813 } 1669 if (IS_ERR(fp))
1670 return PTR_ERR(fp);
814 1671
815 old_fp = rcu_dereference_protected(sk->sk_filter, 1672 old_fp = rcu_dereference_protected(sk->sk_filter,
816 sock_owned_by_user(sk)); 1673 sock_owned_by_user(sk));