diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2014-12-01 18:06:39 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-12-06 00:47:34 -0500 |
commit | fbe3310840c65f3cf97dd90d23e177d061c376f2 (patch) | |
tree | ee28163a6c53e0131fd2d3d626d02b0610eaed2b /samples/bpf | |
parent | a80857822b0c2ed608c93504bd3687b78f20c619 (diff) |
samples: bpf: large eBPF program in C
sockex2_kern.c is purposefully large eBPF program in C.
llvm compiles ~200 lines of C code into ~300 eBPF instructions.
It's similar to __skb_flow_dissect() to demonstrate that complex packet parsing
can be done by eBPF.
Then it uses (struct flow_keys)->dst IP address (or hash of ipv6 dst) to keep
stats of number of packets per IP.
User space loads eBPF program, attaches it to loopback interface and prints
dest_ip->#packets stats every second.
Usage:
$sudo samples/bpf/sockex2
ip 127.0.0.1 count 19
ip 127.0.0.1 count 178115
ip 127.0.0.1 count 369437
ip 127.0.0.1 count 559841
ip 127.0.0.1 count 750539
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples/bpf')
-rw-r--r-- | samples/bpf/Makefile | 4 | ||||
-rw-r--r-- | samples/bpf/sockex2_kern.c | 215 | ||||
-rw-r--r-- | samples/bpf/sockex2_user.c | 44 |
3 files changed, 263 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 770d145186c3..b5b3600dcdf5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
@@ -5,20 +5,24 @@ obj- := dummy.o | |||
5 | hostprogs-y := test_verifier test_maps | 5 | hostprogs-y := test_verifier test_maps |
6 | hostprogs-y += sock_example | 6 | hostprogs-y += sock_example |
7 | hostprogs-y += sockex1 | 7 | hostprogs-y += sockex1 |
8 | hostprogs-y += sockex2 | ||
8 | 9 | ||
9 | test_verifier-objs := test_verifier.o libbpf.o | 10 | test_verifier-objs := test_verifier.o libbpf.o |
10 | test_maps-objs := test_maps.o libbpf.o | 11 | test_maps-objs := test_maps.o libbpf.o |
11 | sock_example-objs := sock_example.o libbpf.o | 12 | sock_example-objs := sock_example.o libbpf.o |
12 | sockex1-objs := bpf_load.o libbpf.o sockex1_user.o | 13 | sockex1-objs := bpf_load.o libbpf.o sockex1_user.o |
14 | sockex2-objs := bpf_load.o libbpf.o sockex2_user.o | ||
13 | 15 | ||
14 | # Tell kbuild to always build the programs | 16 | # Tell kbuild to always build the programs |
15 | always := $(hostprogs-y) | 17 | always := $(hostprogs-y) |
16 | always += sockex1_kern.o | 18 | always += sockex1_kern.o |
19 | always += sockex2_kern.o | ||
17 | 20 | ||
18 | HOSTCFLAGS += -I$(objtree)/usr/include | 21 | HOSTCFLAGS += -I$(objtree)/usr/include |
19 | 22 | ||
20 | HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable | 23 | HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable |
21 | HOSTLOADLIBES_sockex1 += -lelf | 24 | HOSTLOADLIBES_sockex1 += -lelf |
25 | HOSTLOADLIBES_sockex2 += -lelf | ||
22 | 26 | ||
23 | # point this to your LLVM backend with bpf support | 27 | # point this to your LLVM backend with bpf support |
24 | LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc | 28 | LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc |
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c new file mode 100644 index 000000000000..6f0135f0f217 --- /dev/null +++ b/samples/bpf/sockex2_kern.c | |||
@@ -0,0 +1,215 @@ | |||
1 | #include <uapi/linux/bpf.h> | ||
2 | #include "bpf_helpers.h" | ||
3 | #include <uapi/linux/in.h> | ||
4 | #include <uapi/linux/if.h> | ||
5 | #include <uapi/linux/if_ether.h> | ||
6 | #include <uapi/linux/ip.h> | ||
7 | #include <uapi/linux/ipv6.h> | ||
8 | #include <uapi/linux/if_tunnel.h> | ||
9 | #define IP_MF 0x2000 | ||
10 | #define IP_OFFSET 0x1FFF | ||
11 | |||
12 | struct vlan_hdr { | ||
13 | __be16 h_vlan_TCI; | ||
14 | __be16 h_vlan_encapsulated_proto; | ||
15 | }; | ||
16 | |||
17 | struct flow_keys { | ||
18 | __be32 src; | ||
19 | __be32 dst; | ||
20 | union { | ||
21 | __be32 ports; | ||
22 | __be16 port16[2]; | ||
23 | }; | ||
24 | __u16 thoff; | ||
25 | __u8 ip_proto; | ||
26 | }; | ||
27 | |||
28 | static inline int proto_ports_offset(__u64 proto) | ||
29 | { | ||
30 | switch (proto) { | ||
31 | case IPPROTO_TCP: | ||
32 | case IPPROTO_UDP: | ||
33 | case IPPROTO_DCCP: | ||
34 | case IPPROTO_ESP: | ||
35 | case IPPROTO_SCTP: | ||
36 | case IPPROTO_UDPLITE: | ||
37 | return 0; | ||
38 | case IPPROTO_AH: | ||
39 | return 4; | ||
40 | default: | ||
41 | return 0; | ||
42 | } | ||
43 | } | ||
44 | |||
45 | static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff) | ||
46 | { | ||
47 | return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) | ||
48 | & (IP_MF | IP_OFFSET); | ||
49 | } | ||
50 | |||
51 | static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off) | ||
52 | { | ||
53 | __u64 w0 = load_word(ctx, off); | ||
54 | __u64 w1 = load_word(ctx, off + 4); | ||
55 | __u64 w2 = load_word(ctx, off + 8); | ||
56 | __u64 w3 = load_word(ctx, off + 12); | ||
57 | |||
58 | return (__u32)(w0 ^ w1 ^ w2 ^ w3); | ||
59 | } | ||
60 | |||
61 | static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, | ||
62 | struct flow_keys *flow) | ||
63 | { | ||
64 | __u64 verlen; | ||
65 | |||
66 | if (unlikely(ip_is_fragment(skb, nhoff))) | ||
67 | *ip_proto = 0; | ||
68 | else | ||
69 | *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); | ||
70 | |||
71 | if (*ip_proto != IPPROTO_GRE) { | ||
72 | flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); | ||
73 | flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); | ||
74 | } | ||
75 | |||
76 | verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); | ||
77 | if (likely(verlen == 0x45)) | ||
78 | nhoff += 20; | ||
79 | else | ||
80 | nhoff += (verlen & 0xF) << 2; | ||
81 | |||
82 | return nhoff; | ||
83 | } | ||
84 | |||
85 | static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, | ||
86 | struct flow_keys *flow) | ||
87 | { | ||
88 | *ip_proto = load_byte(skb, | ||
89 | nhoff + offsetof(struct ipv6hdr, nexthdr)); | ||
90 | flow->src = ipv6_addr_hash(skb, | ||
91 | nhoff + offsetof(struct ipv6hdr, saddr)); | ||
92 | flow->dst = ipv6_addr_hash(skb, | ||
93 | nhoff + offsetof(struct ipv6hdr, daddr)); | ||
94 | nhoff += sizeof(struct ipv6hdr); | ||
95 | |||
96 | return nhoff; | ||
97 | } | ||
98 | |||
99 | static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow) | ||
100 | { | ||
101 | __u64 nhoff = ETH_HLEN; | ||
102 | __u64 ip_proto; | ||
103 | __u64 proto = load_half(skb, 12); | ||
104 | int poff; | ||
105 | |||
106 | if (proto == ETH_P_8021AD) { | ||
107 | proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, | ||
108 | h_vlan_encapsulated_proto)); | ||
109 | nhoff += sizeof(struct vlan_hdr); | ||
110 | } | ||
111 | |||
112 | if (proto == ETH_P_8021Q) { | ||
113 | proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, | ||
114 | h_vlan_encapsulated_proto)); | ||
115 | nhoff += sizeof(struct vlan_hdr); | ||
116 | } | ||
117 | |||
118 | if (likely(proto == ETH_P_IP)) | ||
119 | nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
120 | else if (proto == ETH_P_IPV6) | ||
121 | nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
122 | else | ||
123 | return false; | ||
124 | |||
125 | switch (ip_proto) { | ||
126 | case IPPROTO_GRE: { | ||
127 | struct gre_hdr { | ||
128 | __be16 flags; | ||
129 | __be16 proto; | ||
130 | }; | ||
131 | |||
132 | __u64 gre_flags = load_half(skb, | ||
133 | nhoff + offsetof(struct gre_hdr, flags)); | ||
134 | __u64 gre_proto = load_half(skb, | ||
135 | nhoff + offsetof(struct gre_hdr, proto)); | ||
136 | |||
137 | if (gre_flags & (GRE_VERSION|GRE_ROUTING)) | ||
138 | break; | ||
139 | |||
140 | proto = gre_proto; | ||
141 | nhoff += 4; | ||
142 | if (gre_flags & GRE_CSUM) | ||
143 | nhoff += 4; | ||
144 | if (gre_flags & GRE_KEY) | ||
145 | nhoff += 4; | ||
146 | if (gre_flags & GRE_SEQ) | ||
147 | nhoff += 4; | ||
148 | |||
149 | if (proto == ETH_P_8021Q) { | ||
150 | proto = load_half(skb, | ||
151 | nhoff + offsetof(struct vlan_hdr, | ||
152 | h_vlan_encapsulated_proto)); | ||
153 | nhoff += sizeof(struct vlan_hdr); | ||
154 | } | ||
155 | |||
156 | if (proto == ETH_P_IP) | ||
157 | nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
158 | else if (proto == ETH_P_IPV6) | ||
159 | nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
160 | else | ||
161 | return false; | ||
162 | break; | ||
163 | } | ||
164 | case IPPROTO_IPIP: | ||
165 | nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
166 | break; | ||
167 | case IPPROTO_IPV6: | ||
168 | nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
169 | break; | ||
170 | default: | ||
171 | break; | ||
172 | } | ||
173 | |||
174 | flow->ip_proto = ip_proto; | ||
175 | poff = proto_ports_offset(ip_proto); | ||
176 | if (poff >= 0) { | ||
177 | nhoff += poff; | ||
178 | flow->ports = load_word(skb, nhoff); | ||
179 | } | ||
180 | |||
181 | flow->thoff = (__u16) nhoff; | ||
182 | |||
183 | return true; | ||
184 | } | ||
185 | |||
186 | struct bpf_map_def SEC("maps") hash_map = { | ||
187 | .type = BPF_MAP_TYPE_HASH, | ||
188 | .key_size = sizeof(__be32), | ||
189 | .value_size = sizeof(long), | ||
190 | .max_entries = 1024, | ||
191 | }; | ||
192 | |||
193 | SEC("socket2") | ||
194 | int bpf_prog2(struct sk_buff *skb) | ||
195 | { | ||
196 | struct flow_keys flow; | ||
197 | long *value; | ||
198 | u32 key; | ||
199 | |||
200 | if (!flow_dissector(skb, &flow)) | ||
201 | return 0; | ||
202 | |||
203 | key = flow.dst; | ||
204 | value = bpf_map_lookup_elem(&hash_map, &key); | ||
205 | if (value) { | ||
206 | __sync_fetch_and_add(value, 1); | ||
207 | } else { | ||
208 | long val = 1; | ||
209 | |||
210 | bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); | ||
211 | } | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | char _license[] SEC("license") = "GPL"; | ||
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c new file mode 100644 index 000000000000..d2d5f5a790d3 --- /dev/null +++ b/samples/bpf/sockex2_user.c | |||
@@ -0,0 +1,44 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <assert.h> | ||
3 | #include <linux/bpf.h> | ||
4 | #include "libbpf.h" | ||
5 | #include "bpf_load.h" | ||
6 | #include <unistd.h> | ||
7 | #include <arpa/inet.h> | ||
8 | |||
9 | int main(int ac, char **argv) | ||
10 | { | ||
11 | char filename[256]; | ||
12 | FILE *f; | ||
13 | int i, sock; | ||
14 | |||
15 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | ||
16 | |||
17 | if (load_bpf_file(filename)) { | ||
18 | printf("%s", bpf_log_buf); | ||
19 | return 1; | ||
20 | } | ||
21 | |||
22 | sock = open_raw_sock("lo"); | ||
23 | |||
24 | assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, | ||
25 | sizeof(prog_fd[0])) == 0); | ||
26 | |||
27 | f = popen("ping -c5 localhost", "r"); | ||
28 | (void) f; | ||
29 | |||
30 | for (i = 0; i < 5; i++) { | ||
31 | int key = 0, next_key; | ||
32 | long long value; | ||
33 | |||
34 | while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { | ||
35 | bpf_lookup_elem(map_fd[0], &next_key, &value); | ||
36 | printf("ip %s count %lld\n", | ||
37 | inet_ntoa((struct in_addr){htonl(next_key)}), | ||
38 | value); | ||
39 | key = next_key; | ||
40 | } | ||
41 | sleep(1); | ||
42 | } | ||
43 | return 0; | ||
44 | } | ||