summaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@plumgrid.com>2015-05-19 19:59:06 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-21 17:07:59 -0400
commit530b2c8619f25f9c332c85510579943aa46df515 (patch)
tree632ea9b7f4a1df31760730bf432bae625a4fda8e /samples
parent5bacd7805ab4f07a69c7ef4b1d45ce553d2b1c3a (diff)
samples/bpf: bpf_tail_call example for networking
Usage: $ sudo ./sockex3 IP src.port -> dst.port bytes packets 127.0.0.1.42010 -> 127.0.0.1.12865 1568 8 127.0.0.1.59526 -> 127.0.0.1.33778 11422636 173070 127.0.0.1.33778 -> 127.0.0.1.59526 11260224828 341974 127.0.0.1.12865 -> 127.0.0.1.42010 1832 12 IP src.port -> dst.port bytes packets 127.0.0.1.42010 -> 127.0.0.1.12865 1568 8 127.0.0.1.59526 -> 127.0.0.1.33778 23198092 351486 127.0.0.1.33778 -> 127.0.0.1.59526 22972698518 698616 127.0.0.1.12865 -> 127.0.0.1.42010 1832 12 this example is similar to sockex2 in a way that it accumulates per-flow statistics, but it does packet parsing differently. sockex2 inlines full packet parser routine into single bpf program. This sockex3 example have 4 independent programs that parse vlan, mpls, ip, ipv6 and one main program that starts the process. bpf_tail_call() mechanism allows each program to be small and be called on demand potentially multiple times, so that many vlan, mpls, ip in ip, gre encapsulations can be parsed. These and other protocol parsers can be added or removed at runtime. TLVs can be parsed in similar manner. Note, tail_call_cnt dynamic check limits the number of tail calls to 32. Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/bpf_helpers.h2
-rw-r--r--samples/bpf/sockex3_kern.c303
-rw-r--r--samples/bpf/sockex3_user.c66
4 files changed, 375 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index ded10d05617e..46c6a8cf74d3 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -6,6 +6,7 @@ hostprogs-y := test_verifier test_maps
6hostprogs-y += sock_example 6hostprogs-y += sock_example
7hostprogs-y += sockex1 7hostprogs-y += sockex1
8hostprogs-y += sockex2 8hostprogs-y += sockex2
9hostprogs-y += sockex3
9hostprogs-y += tracex1 10hostprogs-y += tracex1
10hostprogs-y += tracex2 11hostprogs-y += tracex2
11hostprogs-y += tracex3 12hostprogs-y += tracex3
@@ -17,6 +18,7 @@ test_maps-objs := test_maps.o libbpf.o
17sock_example-objs := sock_example.o libbpf.o 18sock_example-objs := sock_example.o libbpf.o
18sockex1-objs := bpf_load.o libbpf.o sockex1_user.o 19sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
19sockex2-objs := bpf_load.o libbpf.o sockex2_user.o 20sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
21sockex3-objs := bpf_load.o libbpf.o sockex3_user.o
20tracex1-objs := bpf_load.o libbpf.o tracex1_user.o 22tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
21tracex2-objs := bpf_load.o libbpf.o tracex2_user.o 23tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
22tracex3-objs := bpf_load.o libbpf.o tracex3_user.o 24tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
@@ -27,6 +29,7 @@ tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
27always := $(hostprogs-y) 29always := $(hostprogs-y)
28always += sockex1_kern.o 30always += sockex1_kern.o
29always += sockex2_kern.o 31always += sockex2_kern.o
32always += sockex3_kern.o
30always += tracex1_kern.o 33always += tracex1_kern.o
31always += tracex2_kern.o 34always += tracex2_kern.o
32always += tracex3_kern.o 35always += tracex3_kern.o
@@ -39,6 +42,7 @@ HOSTCFLAGS += -I$(objtree)/usr/include
39HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable 42HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
40HOSTLOADLIBES_sockex1 += -lelf 43HOSTLOADLIBES_sockex1 += -lelf
41HOSTLOADLIBES_sockex2 += -lelf 44HOSTLOADLIBES_sockex2 += -lelf
45HOSTLOADLIBES_sockex3 += -lelf
42HOSTLOADLIBES_tracex1 += -lelf 46HOSTLOADLIBES_tracex1 += -lelf
43HOSTLOADLIBES_tracex2 += -lelf 47HOSTLOADLIBES_tracex2 += -lelf
44HOSTLOADLIBES_tracex3 += -lelf 48HOSTLOADLIBES_tracex3 += -lelf
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 699ed8dbdd64..f531a0b3282d 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -23,6 +23,8 @@ static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
23 (void *) BPF_FUNC_trace_printk; 23 (void *) BPF_FUNC_trace_printk;
24static void (*bpf_tail_call)(void *ctx, void *map, int index) = 24static void (*bpf_tail_call)(void *ctx, void *map, int index) =
25 (void *) BPF_FUNC_tail_call; 25 (void *) BPF_FUNC_tail_call;
26static unsigned long long (*bpf_get_smp_processor_id)(void) =
27 (void *) BPF_FUNC_get_smp_processor_id;
26 28
27/* llvm builtin functions that eBPF C program may use to 29/* llvm builtin functions that eBPF C program may use to
28 * emit BPF_LD_ABS and BPF_LD_IND instructions 30 * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
new file mode 100644
index 000000000000..2625b987944f
--- /dev/null
+++ b/samples/bpf/sockex3_kern.c
@@ -0,0 +1,303 @@
1/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <uapi/linux/bpf.h>
8#include "bpf_helpers.h"
9#include <uapi/linux/in.h>
10#include <uapi/linux/if.h>
11#include <uapi/linux/if_ether.h>
12#include <uapi/linux/ip.h>
13#include <uapi/linux/ipv6.h>
14#include <uapi/linux/if_tunnel.h>
15#include <uapi/linux/mpls.h>
16#define IP_MF 0x2000
17#define IP_OFFSET 0x1FFF
18
19#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
20
21struct bpf_map_def SEC("maps") jmp_table = {
22 .type = BPF_MAP_TYPE_PROG_ARRAY,
23 .key_size = sizeof(u32),
24 .value_size = sizeof(u32),
25 .max_entries = 8,
26};
27
28#define PARSE_VLAN 1
29#define PARSE_MPLS 2
30#define PARSE_IP 3
31#define PARSE_IPV6 4
32
33/* protocol dispatch routine.
34 * It tail-calls next BPF program depending on eth proto
35 * Note, we could have used:
36 * bpf_tail_call(skb, &jmp_table, proto);
37 * but it would need large prog_array
38 */
39static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
40{
41 switch (proto) {
42 case ETH_P_8021Q:
43 case ETH_P_8021AD:
44 bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
45 break;
46 case ETH_P_MPLS_UC:
47 case ETH_P_MPLS_MC:
48 bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
49 break;
50 case ETH_P_IP:
51 bpf_tail_call(skb, &jmp_table, PARSE_IP);
52 break;
53 case ETH_P_IPV6:
54 bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
55 break;
56 }
57}
58
59struct vlan_hdr {
60 __be16 h_vlan_TCI;
61 __be16 h_vlan_encapsulated_proto;
62};
63
64struct flow_keys {
65 __be32 src;
66 __be32 dst;
67 union {
68 __be32 ports;
69 __be16 port16[2];
70 };
71 __u32 ip_proto;
72};
73
74static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
75{
76 return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
77 & (IP_MF | IP_OFFSET);
78}
79
80static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
81{
82 __u64 w0 = load_word(ctx, off);
83 __u64 w1 = load_word(ctx, off + 4);
84 __u64 w2 = load_word(ctx, off + 8);
85 __u64 w3 = load_word(ctx, off + 12);
86
87 return (__u32)(w0 ^ w1 ^ w2 ^ w3);
88}
89
90struct globals {
91 struct flow_keys flow;
92 __u32 nhoff;
93};
94
95struct bpf_map_def SEC("maps") percpu_map = {
96 .type = BPF_MAP_TYPE_ARRAY,
97 .key_size = sizeof(__u32),
98 .value_size = sizeof(struct globals),
99 .max_entries = 32,
100};
101
102/* user poor man's per_cpu until native support is ready */
103static struct globals *this_cpu_globals(void)
104{
105 u32 key = bpf_get_smp_processor_id();
106
107 return bpf_map_lookup_elem(&percpu_map, &key);
108}
109
110/* some simple stats for user space consumption */
111struct pair {
112 __u64 packets;
113 __u64 bytes;
114};
115
116struct bpf_map_def SEC("maps") hash_map = {
117 .type = BPF_MAP_TYPE_HASH,
118 .key_size = sizeof(struct flow_keys),
119 .value_size = sizeof(struct pair),
120 .max_entries = 1024,
121};
122
123static void update_stats(struct __sk_buff *skb, struct globals *g)
124{
125 struct flow_keys key = g->flow;
126 struct pair *value;
127
128 value = bpf_map_lookup_elem(&hash_map, &key);
129 if (value) {
130 __sync_fetch_and_add(&value->packets, 1);
131 __sync_fetch_and_add(&value->bytes, skb->len);
132 } else {
133 struct pair val = {1, skb->len};
134
135 bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
136 }
137}
138
139static __always_inline void parse_ip_proto(struct __sk_buff *skb,
140 struct globals *g, __u32 ip_proto)
141{
142 __u32 nhoff = g->nhoff;
143 int poff;
144
145 switch (ip_proto) {
146 case IPPROTO_GRE: {
147 struct gre_hdr {
148 __be16 flags;
149 __be16 proto;
150 };
151
152 __u32 gre_flags = load_half(skb,
153 nhoff + offsetof(struct gre_hdr, flags));
154 __u32 gre_proto = load_half(skb,
155 nhoff + offsetof(struct gre_hdr, proto));
156
157 if (gre_flags & (GRE_VERSION|GRE_ROUTING))
158 break;
159
160 nhoff += 4;
161 if (gre_flags & GRE_CSUM)
162 nhoff += 4;
163 if (gre_flags & GRE_KEY)
164 nhoff += 4;
165 if (gre_flags & GRE_SEQ)
166 nhoff += 4;
167
168 g->nhoff = nhoff;
169 parse_eth_proto(skb, gre_proto);
170 break;
171 }
172 case IPPROTO_IPIP:
173 parse_eth_proto(skb, ETH_P_IP);
174 break;
175 case IPPROTO_IPV6:
176 parse_eth_proto(skb, ETH_P_IPV6);
177 break;
178 case IPPROTO_TCP:
179 case IPPROTO_UDP:
180 g->flow.ports = load_word(skb, nhoff);
181 case IPPROTO_ICMP:
182 g->flow.ip_proto = ip_proto;
183 update_stats(skb, g);
184 break;
185 default:
186 break;
187 }
188}
189
190PROG(PARSE_IP)(struct __sk_buff *skb)
191{
192 struct globals *g = this_cpu_globals();
193 __u32 nhoff, verlen, ip_proto;
194
195 if (!g)
196 return 0;
197
198 nhoff = g->nhoff;
199
200 if (unlikely(ip_is_fragment(skb, nhoff)))
201 return 0;
202
203 ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
204
205 if (ip_proto != IPPROTO_GRE) {
206 g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
207 g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
208 }
209
210 verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
211 nhoff += (verlen & 0xF) << 2;
212
213 g->nhoff = nhoff;
214 parse_ip_proto(skb, g, ip_proto);
215 return 0;
216}
217
218PROG(PARSE_IPV6)(struct __sk_buff *skb)
219{
220 struct globals *g = this_cpu_globals();
221 __u32 nhoff, ip_proto;
222
223 if (!g)
224 return 0;
225
226 nhoff = g->nhoff;
227
228 ip_proto = load_byte(skb,
229 nhoff + offsetof(struct ipv6hdr, nexthdr));
230 g->flow.src = ipv6_addr_hash(skb,
231 nhoff + offsetof(struct ipv6hdr, saddr));
232 g->flow.dst = ipv6_addr_hash(skb,
233 nhoff + offsetof(struct ipv6hdr, daddr));
234 nhoff += sizeof(struct ipv6hdr);
235
236 g->nhoff = nhoff;
237 parse_ip_proto(skb, g, ip_proto);
238 return 0;
239}
240
241PROG(PARSE_VLAN)(struct __sk_buff *skb)
242{
243 struct globals *g = this_cpu_globals();
244 __u32 nhoff, proto;
245
246 if (!g)
247 return 0;
248
249 nhoff = g->nhoff;
250
251 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
252 h_vlan_encapsulated_proto));
253 nhoff += sizeof(struct vlan_hdr);
254 g->nhoff = nhoff;
255
256 parse_eth_proto(skb, proto);
257
258 return 0;
259}
260
261PROG(PARSE_MPLS)(struct __sk_buff *skb)
262{
263 struct globals *g = this_cpu_globals();
264 __u32 nhoff, label;
265
266 if (!g)
267 return 0;
268
269 nhoff = g->nhoff;
270
271 label = load_word(skb, nhoff);
272 nhoff += sizeof(struct mpls_label);
273 g->nhoff = nhoff;
274
275 if (label & MPLS_LS_S_MASK) {
276 __u8 verlen = load_byte(skb, nhoff);
277 if ((verlen & 0xF0) == 4)
278 parse_eth_proto(skb, ETH_P_IP);
279 else
280 parse_eth_proto(skb, ETH_P_IPV6);
281 } else {
282 parse_eth_proto(skb, ETH_P_MPLS_UC);
283 }
284
285 return 0;
286}
287
288SEC("socket/0")
289int main_prog(struct __sk_buff *skb)
290{
291 struct globals *g = this_cpu_globals();
292 __u32 nhoff = ETH_HLEN;
293 __u32 proto = load_half(skb, 12);
294
295 if (!g)
296 return 0;
297
298 g->nhoff = nhoff;
299 parse_eth_proto(skb, proto);
300 return 0;
301}
302
303char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
new file mode 100644
index 000000000000..2617772d060d
--- /dev/null
+++ b/samples/bpf/sockex3_user.c
@@ -0,0 +1,66 @@
1#include <stdio.h>
2#include <assert.h>
3#include <linux/bpf.h>
4#include "libbpf.h"
5#include "bpf_load.h"
6#include <unistd.h>
7#include <arpa/inet.h>
8
9struct flow_keys {
10 __be32 src;
11 __be32 dst;
12 union {
13 __be32 ports;
14 __be16 port16[2];
15 };
16 __u32 ip_proto;
17};
18
19struct pair {
20 __u64 packets;
21 __u64 bytes;
22};
23
24int main(int argc, char **argv)
25{
26 char filename[256];
27 FILE *f;
28 int i, sock;
29
30 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
31
32 if (load_bpf_file(filename)) {
33 printf("%s", bpf_log_buf);
34 return 1;
35 }
36
37 sock = open_raw_sock("lo");
38
39 assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
40 sizeof(__u32)) == 0);
41
42 if (argc > 1)
43 f = popen("ping -c5 localhost", "r");
44 else
45 f = popen("netperf -l 4 localhost", "r");
46 (void) f;
47
48 for (i = 0; i < 5; i++) {
49 struct flow_keys key = {}, next_key;
50 struct pair value;
51
52 sleep(1);
53 printf("IP src.port -> dst.port bytes packets\n");
54 while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) {
55 bpf_lookup_elem(map_fd[2], &next_key, &value);
56 printf("%s.%05d -> %s.%05d %12lld %12lld\n",
57 inet_ntoa((struct in_addr){htonl(next_key.src)}),
58 next_key.port16[0],
59 inet_ntoa((struct in_addr){htonl(next_key.dst)}),
60 next_key.port16[1],
61 value.bytes, value.packets);
62 key = next_key;
63 }
64 }
65 return 0;
66}