diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-04-20 05:27:32 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-04-28 02:05:08 -0400 |
commit | 0a14842f5a3c0e88a1e59fac5c3025db39721f74 (patch) | |
tree | 4d0832c4c9ced2503e2d899eb56952f87511d4ab /net | |
parent | b678027cb77b079bc8e5b94172995d173bdb494b (diff) |
net: filter: Just In Time compiler for x86-64
In order to speedup packet filtering, here is an implementation of a
JIT compiler for x86_64
It is disabled by default, and must be enabled by the admin.
echo 1 >/proc/sys/net/core/bpf_jit_enable
It uses module_alloc() and module_free() to get memory in the 2GB text
kernel range since we call helpers functions from the generated code.
EAX : BPF A accumulator
EBX : BPF X accumulator
RDI : pointer to skb (first argument given to JIT function)
RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
r9d : skb->len - skb->data_len (headlen)
r8 : skb->data
To get a trace of generated code, use :
echo 2 >/proc/sys/net/core/bpf_jit_enable
Example of generated code :
# tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24
flen=18 proglen=147 pass=3 image=ffffffffa00b5000
JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60
JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00
JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00
JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be
JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0
JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00
JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00
JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24
JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31
JIT code: ffffffffa00b5090: c0 c9 c3
BPF program is 144 bytes long, so native program is almost same size ;)
(000) ldh [12]
(001) jeq #0x800 jt 2 jf 8
(002) ld [26]
(003) and #0xffffff00
(004) jeq #0xc0a81400 jt 16 jf 5
(005) ld [30]
(006) and #0xffffff00
(007) jeq #0xc0a81400 jt 16 jf 17
(008) jeq #0x806 jt 10 jf 9
(009) jeq #0x8035 jt 10 jf 17
(010) ld [28]
(011) and #0xffffff00
(012) jeq #0xc0a81400 jt 16 jf 13
(013) ld [38]
(014) and #0xffffff00
(015) jeq #0xc0a81400 jt 16 jf 17
(016) ret #65535
(017) ret #0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/Kconfig | 13 | ||||
-rw-r--r-- | net/core/filter.c | 65 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 9 | ||||
-rw-r--r-- | net/packet/af_packet.c | 2 |
4 files changed, 28 insertions, 61 deletions
diff --git a/net/Kconfig b/net/Kconfig index 79cabf1ee68b..745fb02d2fda 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -232,6 +232,19 @@ config XPS | |||
232 | depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS | 232 | depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS |
233 | default y | 233 | default y |
234 | 234 | ||
235 | config HAVE_BPF_JIT | ||
236 | bool | ||
237 | |||
238 | config BPF_JIT | ||
239 | bool "enable BPF Just In Time compiler" | ||
240 | depends on HAVE_BPF_JIT | ||
241 | ---help--- | ||
242 | Berkeley Packet Filter filtering capabilities are normally handled | ||
243 | by an interpreter. This option allows kernel to generate a native | ||
244 | code when filter is loaded in memory. This should speedup | ||
245 | packet sniffing (libpcap/tcpdump). Note : Admin should enable | ||
246 | this feature changing /proc/sys/net/core/bpf_jit_enable | ||
247 | |||
235 | menu "Network testing" | 248 | menu "Network testing" |
236 | 249 | ||
237 | config NET_PKTGEN | 250 | config NET_PKTGEN |
diff --git a/net/core/filter.c b/net/core/filter.c index afb8afb066bb..0eb8c4466eaa 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -39,65 +39,6 @@ | |||
39 | #include <linux/filter.h> | 39 | #include <linux/filter.h> |
40 | #include <linux/reciprocal_div.h> | 40 | #include <linux/reciprocal_div.h> |
41 | 41 | ||
42 | enum { | ||
43 | BPF_S_RET_K = 1, | ||
44 | BPF_S_RET_A, | ||
45 | BPF_S_ALU_ADD_K, | ||
46 | BPF_S_ALU_ADD_X, | ||
47 | BPF_S_ALU_SUB_K, | ||
48 | BPF_S_ALU_SUB_X, | ||
49 | BPF_S_ALU_MUL_K, | ||
50 | BPF_S_ALU_MUL_X, | ||
51 | BPF_S_ALU_DIV_X, | ||
52 | BPF_S_ALU_AND_K, | ||
53 | BPF_S_ALU_AND_X, | ||
54 | BPF_S_ALU_OR_K, | ||
55 | BPF_S_ALU_OR_X, | ||
56 | BPF_S_ALU_LSH_K, | ||
57 | BPF_S_ALU_LSH_X, | ||
58 | BPF_S_ALU_RSH_K, | ||
59 | BPF_S_ALU_RSH_X, | ||
60 | BPF_S_ALU_NEG, | ||
61 | BPF_S_LD_W_ABS, | ||
62 | BPF_S_LD_H_ABS, | ||
63 | BPF_S_LD_B_ABS, | ||
64 | BPF_S_LD_W_LEN, | ||
65 | BPF_S_LD_W_IND, | ||
66 | BPF_S_LD_H_IND, | ||
67 | BPF_S_LD_B_IND, | ||
68 | BPF_S_LD_IMM, | ||
69 | BPF_S_LDX_W_LEN, | ||
70 | BPF_S_LDX_B_MSH, | ||
71 | BPF_S_LDX_IMM, | ||
72 | BPF_S_MISC_TAX, | ||
73 | BPF_S_MISC_TXA, | ||
74 | BPF_S_ALU_DIV_K, | ||
75 | BPF_S_LD_MEM, | ||
76 | BPF_S_LDX_MEM, | ||
77 | BPF_S_ST, | ||
78 | BPF_S_STX, | ||
79 | BPF_S_JMP_JA, | ||
80 | BPF_S_JMP_JEQ_K, | ||
81 | BPF_S_JMP_JEQ_X, | ||
82 | BPF_S_JMP_JGE_K, | ||
83 | BPF_S_JMP_JGE_X, | ||
84 | BPF_S_JMP_JGT_K, | ||
85 | BPF_S_JMP_JGT_X, | ||
86 | BPF_S_JMP_JSET_K, | ||
87 | BPF_S_JMP_JSET_X, | ||
88 | /* Ancillary data */ | ||
89 | BPF_S_ANC_PROTOCOL, | ||
90 | BPF_S_ANC_PKTTYPE, | ||
91 | BPF_S_ANC_IFINDEX, | ||
92 | BPF_S_ANC_NLATTR, | ||
93 | BPF_S_ANC_NLATTR_NEST, | ||
94 | BPF_S_ANC_MARK, | ||
95 | BPF_S_ANC_QUEUE, | ||
96 | BPF_S_ANC_HATYPE, | ||
97 | BPF_S_ANC_RXHASH, | ||
98 | BPF_S_ANC_CPU, | ||
99 | }; | ||
100 | |||
101 | /* No hurry in this branch */ | 42 | /* No hurry in this branch */ |
102 | static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) | 43 | static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) |
103 | { | 44 | { |
@@ -145,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) | |||
145 | rcu_read_lock(); | 86 | rcu_read_lock(); |
146 | filter = rcu_dereference(sk->sk_filter); | 87 | filter = rcu_dereference(sk->sk_filter); |
147 | if (filter) { | 88 | if (filter) { |
148 | unsigned int pkt_len = sk_run_filter(skb, filter->insns); | 89 | unsigned int pkt_len = SK_RUN_FILTER(filter, skb); |
149 | 90 | ||
150 | err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; | 91 | err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; |
151 | } | 92 | } |
@@ -638,6 +579,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu) | |||
638 | { | 579 | { |
639 | struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); | 580 | struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); |
640 | 581 | ||
582 | bpf_jit_free(fp); | ||
641 | kfree(fp); | 583 | kfree(fp); |
642 | } | 584 | } |
643 | EXPORT_SYMBOL(sk_filter_release_rcu); | 585 | EXPORT_SYMBOL(sk_filter_release_rcu); |
@@ -672,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
672 | 614 | ||
673 | atomic_set(&fp->refcnt, 1); | 615 | atomic_set(&fp->refcnt, 1); |
674 | fp->len = fprog->len; | 616 | fp->len = fprog->len; |
617 | fp->bpf_func = sk_run_filter; | ||
675 | 618 | ||
676 | err = sk_chk_filter(fp->insns, fp->len); | 619 | err = sk_chk_filter(fp->insns, fp->len); |
677 | if (err) { | 620 | if (err) { |
@@ -679,6 +622,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
679 | return err; | 622 | return err; |
680 | } | 623 | } |
681 | 624 | ||
625 | bpf_jit_compile(fp); | ||
626 | |||
682 | old_fp = rcu_dereference_protected(sk->sk_filter, | 627 | old_fp = rcu_dereference_protected(sk->sk_filter, |
683 | sock_owned_by_user(sk)); | 628 | sock_owned_by_user(sk)); |
684 | rcu_assign_pointer(sk->sk_filter, fp); | 629 | rcu_assign_pointer(sk->sk_filter, fp); |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 385b6095fdc4..a829e3f60aeb 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -122,6 +122,15 @@ static struct ctl_table net_core_table[] = { | |||
122 | .mode = 0644, | 122 | .mode = 0644, |
123 | .proc_handler = proc_dointvec | 123 | .proc_handler = proc_dointvec |
124 | }, | 124 | }, |
125 | #ifdef CONFIG_BPF_JIT | ||
126 | { | ||
127 | .procname = "bpf_jit_enable", | ||
128 | .data = &bpf_jit_enable, | ||
129 | .maxlen = sizeof(int), | ||
130 | .mode = 0644, | ||
131 | .proc_handler = proc_dointvec | ||
132 | }, | ||
133 | #endif | ||
125 | { | 134 | { |
126 | .procname = "netdev_tstamp_prequeue", | 135 | .procname = "netdev_tstamp_prequeue", |
127 | .data = &netdev_tstamp_prequeue, | 136 | .data = &netdev_tstamp_prequeue, |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5362e96022b..549527bca87a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -538,7 +538,7 @@ static inline unsigned int run_filter(const struct sk_buff *skb, | |||
538 | rcu_read_lock(); | 538 | rcu_read_lock(); |
539 | filter = rcu_dereference(sk->sk_filter); | 539 | filter = rcu_dereference(sk->sk_filter); |
540 | if (filter != NULL) | 540 | if (filter != NULL) |
541 | res = sk_run_filter(skb, filter->insns); | 541 | res = SK_RUN_FILTER(filter, skb); |
542 | rcu_read_unlock(); | 542 | rcu_read_unlock(); |
543 | 543 | ||
544 | return res; | 544 | return res; |