aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-03-31 00:45:49 -0400
committerDavid S. Miller <davem@davemloft.net>2014-03-31 00:45:49 -0400
commit9109e17f7c3ace48629397b44db5ce06bf168644 (patch)
tree495b67bcf755829a5409da5b7444ea9b93f60b35 /include
parent64c27237a07129758e33f5f824ba5c33b7f57417 (diff)
parent9a985cdc5ccb0d557720221d01bd70c19f04bb8c (diff)
Merge branch 'filter-next'
Daniel Borkmann says: ==================== BPF updates We sat down and have heavily reworked the whole previous patchset from v10 [1] to address all comments/concerns. This patchset therefore *replaces* the internal BPF interpreter with the new layout as discussed in [1], and migrates some exotic callers to properly use the BPF API for a transparent upgrade. All other callers that already use the BPF API in a way it should be used, need no further changes to run the new internals. We also removed the sysctl knob entirely, and do not expose any structure to userland, so that implementation details only reside in kernel space. Since we are replacing the interpreter we had to migrate seccomp in one patch along with the interpreter to not break anything. When attaching a new filter, the flow can be described as following: i) test if jit compiler is enabled and can compile the user BPF, ii) if so, then go for it, iii) if not, then transparently migrate the filter into the new representation, and run it in the interpreter. Also, we have scratched the jit flag from the len attribute and made it as initial patch in this series as Pablo has suggested in the last feedback, thanks. For details, please refer to the patches themselves. We did extensive testing of BPF and seccomp on the new interpreter itself and also on the user ABIs and could not find any issues; new performance numbers as posted in patch 8 are also still the same. Please find more details in the patches themselves. For all the previous history from v1 to v10, see [1]. We have decided to drop the v11 as we have pedantically reworked the set, but of course, included all previous feedback. v3 -> v4: - Applied feedback from Dave regarding swap insns - Rebased on net-next v2 -> v3: - Rebased to latest net-next (i.e. w/ rxhash->hash rename) - Fixed patch 8/9 commit message/doc as suggested by Dave - Rest is unchanged v1 -> v2: - Rebased to latest net-next - Added static to ptp_filter as suggested by Dave - Fixed a typo in patch 8's commit message - Rest unchanged Thanks ! [1] http://thread.gmane.org/gmane.linux.kernel/1665858 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/filter.h118
-rw-r--r--include/linux/isdn_ppp.h5
-rw-r--r--include/linux/ptp_classify.h14
-rw-r--r--include/linux/seccomp.h1
-rw-r--r--include/net/sock.h27
5 files changed, 98 insertions, 67 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index e568c8ef896b..262dcbb75ffe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -9,28 +9,81 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <uapi/linux/filter.h> 10#include <uapi/linux/filter.h>
11 11
12#ifdef CONFIG_COMPAT 12/* Internally used and optimized filter representation with extended
13/* 13 * instruction set based on top of classic BPF.
14 * A struct sock_filter is architecture independent.
15 */ 14 */
15
16/* instruction classes */
17#define BPF_ALU64 0x07 /* alu mode in double word width */
18
19/* ld/ldx fields */
20#define BPF_DW 0x18 /* double word */
21#define BPF_XADD 0xc0 /* exclusive add */
22
23/* alu/jmp fields */
24#define BPF_MOV 0xb0 /* mov reg to reg */
25#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
26
27/* change endianness of a register */
28#define BPF_END 0xd0 /* flags for endianness conversion: */
29#define BPF_TO_LE 0x00 /* convert to little-endian */
30#define BPF_TO_BE 0x08 /* convert to big-endian */
31#define BPF_FROM_LE BPF_TO_LE
32#define BPF_FROM_BE BPF_TO_BE
33
34#define BPF_JNE 0x50 /* jump != */
35#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
36#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
37#define BPF_CALL 0x80 /* function call */
38#define BPF_EXIT 0x90 /* function return */
39
40/* BPF has 10 general purpose 64-bit registers and stack frame. */
41#define MAX_BPF_REG 11
42
43/* BPF program can access up to 512 bytes of stack space. */
44#define MAX_BPF_STACK 512
45
46/* Arg1, context and stack frame pointer register positions. */
47#define ARG1_REG 1
48#define CTX_REG 6
49#define FP_REG 10
50
51struct sock_filter_int {
52 __u8 code; /* opcode */
53 __u8 a_reg:4; /* dest register */
54 __u8 x_reg:4; /* source register */
55 __s16 off; /* signed offset */
56 __s32 imm; /* signed immediate constant */
57};
58
59#ifdef CONFIG_COMPAT
60/* A struct sock_filter is architecture independent. */
16struct compat_sock_fprog { 61struct compat_sock_fprog {
17 u16 len; 62 u16 len;
18 compat_uptr_t filter; /* struct sock_filter * */ 63 compat_uptr_t filter; /* struct sock_filter * */
19}; 64};
20#endif 65#endif
21 66
67struct sock_fprog_kern {
68 u16 len;
69 struct sock_filter *filter;
70};
71
22struct sk_buff; 72struct sk_buff;
23struct sock; 73struct sock;
74struct seccomp_data;
24 75
25struct sk_filter 76struct sk_filter {
26{
27 atomic_t refcnt; 77 atomic_t refcnt;
28 unsigned int len; /* Number of filter blocks */ 78 u32 jited:1, /* Is our filter JIT'ed? */
79 len:31; /* Number of filter blocks */
80 struct sock_fprog_kern *orig_prog; /* Original BPF program */
29 struct rcu_head rcu; 81 struct rcu_head rcu;
30 unsigned int (*bpf_func)(const struct sk_buff *skb, 82 unsigned int (*bpf_func)(const struct sk_buff *skb,
31 const struct sock_filter *filter); 83 const struct sock_filter_int *filter);
32 union { 84 union {
33 struct sock_filter insns[0]; 85 struct sock_filter insns[0];
86 struct sock_filter_int insnsi[0];
34 struct work_struct work; 87 struct work_struct work;
35 }; 88 };
36}; 89};
@@ -41,25 +94,44 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
41 offsetof(struct sk_filter, insns[proglen])); 94 offsetof(struct sk_filter, insns[proglen]));
42} 95}
43 96
44extern int sk_filter(struct sock *sk, struct sk_buff *skb); 97#define sk_filter_proglen(fprog) \
45extern unsigned int sk_run_filter(const struct sk_buff *skb, 98 (fprog->len * sizeof(fprog->filter[0]))
46 const struct sock_filter *filter); 99
47extern int sk_unattached_filter_create(struct sk_filter **pfp, 100#define SK_RUN_FILTER(filter, ctx) \
48 struct sock_fprog *fprog); 101 (*filter->bpf_func)(ctx, filter->insnsi)
49extern void sk_unattached_filter_destroy(struct sk_filter *fp); 102
50extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 103int sk_filter(struct sock *sk, struct sk_buff *skb);
51extern int sk_detach_filter(struct sock *sk); 104
52extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); 105u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
53extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); 106 const struct sock_filter_int *insni);
54extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); 107u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
108 const struct sock_filter_int *insni);
109
110int sk_convert_filter(struct sock_filter *prog, int len,
111 struct sock_filter_int *new_prog, int *new_len);
112
113int sk_unattached_filter_create(struct sk_filter **pfp,
114 struct sock_fprog *fprog);
115void sk_unattached_filter_destroy(struct sk_filter *fp);
116
117int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
118int sk_detach_filter(struct sock *sk);
119
120int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
121int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
122 unsigned int len);
123void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
124
125void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
126void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
55 127
56#ifdef CONFIG_BPF_JIT 128#ifdef CONFIG_BPF_JIT
57#include <stdarg.h> 129#include <stdarg.h>
58#include <linux/linkage.h> 130#include <linux/linkage.h>
59#include <linux/printk.h> 131#include <linux/printk.h>
60 132
61extern void bpf_jit_compile(struct sk_filter *fp); 133void bpf_jit_compile(struct sk_filter *fp);
62extern void bpf_jit_free(struct sk_filter *fp); 134void bpf_jit_free(struct sk_filter *fp);
63 135
64static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, 136static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
65 u32 pass, void *image) 137 u32 pass, void *image)
@@ -70,7 +142,6 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
70 print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 142 print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
71 16, 1, image, proglen, false); 143 16, 1, image, proglen, false);
72} 144}
73#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
74#else 145#else
75#include <linux/slab.h> 146#include <linux/slab.h>
76static inline void bpf_jit_compile(struct sk_filter *fp) 147static inline void bpf_jit_compile(struct sk_filter *fp)
@@ -80,7 +151,6 @@ static inline void bpf_jit_free(struct sk_filter *fp)
80{ 151{
81 kfree(fp); 152 kfree(fp);
82} 153}
83#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
84#endif 154#endif
85 155
86static inline int bpf_tell_extensions(void) 156static inline int bpf_tell_extensions(void)
diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h
index d5f62bc5f4be..8e10f57f109f 100644
--- a/include/linux/isdn_ppp.h
+++ b/include/linux/isdn_ppp.h
@@ -180,9 +180,8 @@ struct ippp_struct {
180 struct slcompress *slcomp; 180 struct slcompress *slcomp;
181#endif 181#endif
182#ifdef CONFIG_IPPP_FILTER 182#ifdef CONFIG_IPPP_FILTER
183 struct sock_filter *pass_filter; /* filter for packets to pass */ 183 struct sk_filter *pass_filter; /* filter for packets to pass */
184 struct sock_filter *active_filter; /* filter for pkts to reset idle */ 184 struct sk_filter *active_filter; /* filter for pkts to reset idle */
185 unsigned pass_len, active_len;
186#endif 185#endif
187 unsigned long debug; 186 unsigned long debug;
188 struct isdn_ppp_compressor *compressor,*decompressor; 187 struct isdn_ppp_compressor *compressor,*decompressor;
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 1dc420ba213a..6d3b0a2ef9ce 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -27,11 +27,7 @@
27#include <linux/if_vlan.h> 27#include <linux/if_vlan.h>
28#include <linux/ip.h> 28#include <linux/ip.h>
29#include <linux/filter.h> 29#include <linux/filter.h>
30#ifdef __KERNEL__
31#include <linux/in.h> 30#include <linux/in.h>
32#else
33#include <netinet/in.h>
34#endif
35 31
36#define PTP_CLASS_NONE 0x00 /* not a PTP event message */ 32#define PTP_CLASS_NONE 0x00 /* not a PTP event message */
37#define PTP_CLASS_V1 0x01 /* protocol version 1 */ 33#define PTP_CLASS_V1 0x01 /* protocol version 1 */
@@ -84,14 +80,6 @@
84#define OP_RETA (BPF_RET | BPF_A) 80#define OP_RETA (BPF_RET | BPF_A)
85#define OP_RETK (BPF_RET | BPF_K) 81#define OP_RETK (BPF_RET | BPF_K)
86 82
87static inline int ptp_filter_init(struct sock_filter *f, int len)
88{
89 if (OP_LDH == f[0].code)
90 return sk_chk_filter(f, len);
91 else
92 return 0;
93}
94
95#define PTP_FILTER \ 83#define PTP_FILTER \
96 {OP_LDH, 0, 0, OFF_ETYPE }, /* */ \ 84 {OP_LDH, 0, 0, OFF_ETYPE }, /* */ \
97 {OP_JEQ, 0, 12, ETH_P_IP }, /* f goto L20 */ \ 85 {OP_JEQ, 0, 12, ETH_P_IP }, /* f goto L20 */ \
@@ -137,4 +125,6 @@ static inline int ptp_filter_init(struct sock_filter *f, int len)
137 {OP_RETA, 0, 0, 0 }, /* */ \ 125 {OP_RETA, 0, 0, 0 }, /* */ \
138/*L6x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, 126/*L6x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE },
139 127
128unsigned int ptp_classify_raw(const struct sk_buff *skb);
129
140#endif 130#endif
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 6f19cfd1840e..4054b0994071 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -76,7 +76,6 @@ static inline int seccomp_mode(struct seccomp *s)
76#ifdef CONFIG_SECCOMP_FILTER 76#ifdef CONFIG_SECCOMP_FILTER
77extern void put_seccomp_filter(struct task_struct *tsk); 77extern void put_seccomp_filter(struct task_struct *tsk);
78extern void get_seccomp_filter(struct task_struct *tsk); 78extern void get_seccomp_filter(struct task_struct *tsk);
79extern u32 seccomp_bpf_load(int off);
80#else /* CONFIG_SECCOMP_FILTER */ 79#else /* CONFIG_SECCOMP_FILTER */
81static inline void put_seccomp_filter(struct task_struct *tsk) 80static inline void put_seccomp_filter(struct task_struct *tsk)
82{ 81{
diff --git a/include/net/sock.h b/include/net/sock.h
index 8d7c431a0660..06a5668f05c9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1621,33 +1621,6 @@ void sk_common_release(struct sock *sk);
1621/* Initialise core socket variables */ 1621/* Initialise core socket variables */
1622void sock_init_data(struct socket *sock, struct sock *sk); 1622void sock_init_data(struct socket *sock, struct sock *sk);
1623 1623
1624void sk_filter_release_rcu(struct rcu_head *rcu);
1625
1626/**
1627 * sk_filter_release - release a socket filter
1628 * @fp: filter to remove
1629 *
1630 * Remove a filter from a socket and release its resources.
1631 */
1632
1633static inline void sk_filter_release(struct sk_filter *fp)
1634{
1635 if (atomic_dec_and_test(&fp->refcnt))
1636 call_rcu(&fp->rcu, sk_filter_release_rcu);
1637}
1638
1639static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1640{
1641 atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1642 sk_filter_release(fp);
1643}
1644
1645static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1646{
1647 atomic_inc(&fp->refcnt);
1648 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1649}
1650
1651/* 1624/*
1652 * Socket reference counting postulates. 1625 * Socket reference counting postulates.
1653 * 1626 *