summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-04-20 05:27:32 -0400
committerDavid S. Miller <davem@davemloft.net>2011-04-28 02:05:08 -0400
commit0a14842f5a3c0e88a1e59fac5c3025db39721f74 (patch)
tree4d0832c4c9ced2503e2d899eb56952f87511d4ab /net
parentb678027cb77b079bc8e5b94172995d173bdb494b (diff)
net: filter: Just In Time compiler for x86-64
In order to speedup packet filtering, here is an implementation of a JIT compiler for x86_64 It is disabled by default, and must be enabled by the admin. echo 1 >/proc/sys/net/core/bpf_jit_enable It uses module_alloc() and module_free() to get memory in the 2GB text kernel range since we call helpers functions from the generated code. EAX : BPF A accumulator EBX : BPF X accumulator RDI : pointer to skb (first argument given to JIT function) RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) r9d : skb->len - skb->data_len (headlen) r8 : skb->data To get a trace of generated code, use : echo 2 >/proc/sys/net/core/bpf_jit_enable Example of generated code : # tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24 flen=18 proglen=147 pass=3 image=ffffffffa00b5000 JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60 JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00 JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00 JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0 JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00 JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00 JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24 JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31 JIT code: ffffffffa00b5090: c0 c9 c3 BPF program is 144 bytes long, so native program is almost same size ;) (000) ldh [12] (001) jeq #0x800 jt 2 jf 8 (002) ld [26] (003) and #0xffffff00 (004) jeq #0xc0a81400 jt 16 jf 5 (005) ld [30] (006) and #0xffffff00 (007) jeq #0xc0a81400 jt 16 jf 17 (008) jeq #0x806 jt 10 jf 9 (009) jeq #0x8035 jt 10 jf 17 (010) ld [28] (011) and #0xffffff00 (012) jeq #0xc0a81400 jt 16 jf 13 (013) ld [38] (014) and #0xffffff00 (015) jeq #0xc0a81400 jt 16 jf 17 (016) ret #65535 (017) ret #0 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Ben Hutchings <bhutchings@solarflare.com> Cc: Hagen Paul Pfeifer <hagen@jauu.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig13
-rw-r--r--net/core/filter.c65
-rw-r--r--net/core/sysctl_net_core.c9
-rw-r--r--net/packet/af_packet.c2
4 files changed, 28 insertions, 61 deletions
diff --git a/net/Kconfig b/net/Kconfig
index 79cabf1ee68b..745fb02d2fda 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,19 @@ config XPS
232 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS 232 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
233 default y 233 default y
234 234
235config HAVE_BPF_JIT
236 bool
237
238config BPF_JIT
239 bool "enable BPF Just In Time compiler"
240 depends on HAVE_BPF_JIT
241 ---help---
242 Berkeley Packet Filter filtering capabilities are normally handled
243 by an interpreter. This option allows kernel to generate a native
244 code when filter is loaded in memory. This should speedup
245 packet sniffing (libpcap/tcpdump). Note : Admin should enable
246 this feature changing /proc/sys/net/core/bpf_jit_enable
247
235menu "Network testing" 248menu "Network testing"
236 249
237config NET_PKTGEN 250config NET_PKTGEN
diff --git a/net/core/filter.c b/net/core/filter.c
index afb8afb066bb..0eb8c4466eaa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,65 +39,6 @@
39#include <linux/filter.h> 39#include <linux/filter.h>
40#include <linux/reciprocal_div.h> 40#include <linux/reciprocal_div.h>
41 41
42enum {
43 BPF_S_RET_K = 1,
44 BPF_S_RET_A,
45 BPF_S_ALU_ADD_K,
46 BPF_S_ALU_ADD_X,
47 BPF_S_ALU_SUB_K,
48 BPF_S_ALU_SUB_X,
49 BPF_S_ALU_MUL_K,
50 BPF_S_ALU_MUL_X,
51 BPF_S_ALU_DIV_X,
52 BPF_S_ALU_AND_K,
53 BPF_S_ALU_AND_X,
54 BPF_S_ALU_OR_K,
55 BPF_S_ALU_OR_X,
56 BPF_S_ALU_LSH_K,
57 BPF_S_ALU_LSH_X,
58 BPF_S_ALU_RSH_K,
59 BPF_S_ALU_RSH_X,
60 BPF_S_ALU_NEG,
61 BPF_S_LD_W_ABS,
62 BPF_S_LD_H_ABS,
63 BPF_S_LD_B_ABS,
64 BPF_S_LD_W_LEN,
65 BPF_S_LD_W_IND,
66 BPF_S_LD_H_IND,
67 BPF_S_LD_B_IND,
68 BPF_S_LD_IMM,
69 BPF_S_LDX_W_LEN,
70 BPF_S_LDX_B_MSH,
71 BPF_S_LDX_IMM,
72 BPF_S_MISC_TAX,
73 BPF_S_MISC_TXA,
74 BPF_S_ALU_DIV_K,
75 BPF_S_LD_MEM,
76 BPF_S_LDX_MEM,
77 BPF_S_ST,
78 BPF_S_STX,
79 BPF_S_JMP_JA,
80 BPF_S_JMP_JEQ_K,
81 BPF_S_JMP_JEQ_X,
82 BPF_S_JMP_JGE_K,
83 BPF_S_JMP_JGE_X,
84 BPF_S_JMP_JGT_K,
85 BPF_S_JMP_JGT_X,
86 BPF_S_JMP_JSET_K,
87 BPF_S_JMP_JSET_X,
88 /* Ancillary data */
89 BPF_S_ANC_PROTOCOL,
90 BPF_S_ANC_PKTTYPE,
91 BPF_S_ANC_IFINDEX,
92 BPF_S_ANC_NLATTR,
93 BPF_S_ANC_NLATTR_NEST,
94 BPF_S_ANC_MARK,
95 BPF_S_ANC_QUEUE,
96 BPF_S_ANC_HATYPE,
97 BPF_S_ANC_RXHASH,
98 BPF_S_ANC_CPU,
99};
100
101/* No hurry in this branch */ 42/* No hurry in this branch */
102static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) 43static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
103{ 44{
@@ -145,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
145 rcu_read_lock(); 86 rcu_read_lock();
146 filter = rcu_dereference(sk->sk_filter); 87 filter = rcu_dereference(sk->sk_filter);
147 if (filter) { 88 if (filter) {
148 unsigned int pkt_len = sk_run_filter(skb, filter->insns); 89 unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
149 90
150 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 91 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
151 } 92 }
@@ -638,6 +579,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
638{ 579{
639 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 580 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
640 581
582 bpf_jit_free(fp);
641 kfree(fp); 583 kfree(fp);
642} 584}
643EXPORT_SYMBOL(sk_filter_release_rcu); 585EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -672,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
672 614
673 atomic_set(&fp->refcnt, 1); 615 atomic_set(&fp->refcnt, 1);
674 fp->len = fprog->len; 616 fp->len = fprog->len;
617 fp->bpf_func = sk_run_filter;
675 618
676 err = sk_chk_filter(fp->insns, fp->len); 619 err = sk_chk_filter(fp->insns, fp->len);
677 if (err) { 620 if (err) {
@@ -679,6 +622,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
679 return err; 622 return err;
680 } 623 }
681 624
625 bpf_jit_compile(fp);
626
682 old_fp = rcu_dereference_protected(sk->sk_filter, 627 old_fp = rcu_dereference_protected(sk->sk_filter,
683 sock_owned_by_user(sk)); 628 sock_owned_by_user(sk));
684 rcu_assign_pointer(sk->sk_filter, fp); 629 rcu_assign_pointer(sk->sk_filter, fp);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 385b6095fdc4..a829e3f60aeb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,6 +122,15 @@ static struct ctl_table net_core_table[] = {
122 .mode = 0644, 122 .mode = 0644,
123 .proc_handler = proc_dointvec 123 .proc_handler = proc_dointvec
124 }, 124 },
125#ifdef CONFIG_BPF_JIT
126 {
127 .procname = "bpf_jit_enable",
128 .data = &bpf_jit_enable,
129 .maxlen = sizeof(int),
130 .mode = 0644,
131 .proc_handler = proc_dointvec
132 },
133#endif
125 { 134 {
126 .procname = "netdev_tstamp_prequeue", 135 .procname = "netdev_tstamp_prequeue",
127 .data = &netdev_tstamp_prequeue, 136 .data = &netdev_tstamp_prequeue,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5362e96022b..549527bca87a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -538,7 +538,7 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
538 rcu_read_lock(); 538 rcu_read_lock();
539 filter = rcu_dereference(sk->sk_filter); 539 filter = rcu_dereference(sk->sk_filter);
540 if (filter != NULL) 540 if (filter != NULL)
541 res = sk_run_filter(skb, filter->insns); 541 res = SK_RUN_FILTER(filter, skb);
542 rcu_read_unlock(); 542 rcu_read_unlock();
543 543
544 return res; 544 return res;