diff options
| author | Alexei Starovoitov <ast@plumgrid.com> | 2014-12-01 18:06:39 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2014-12-06 00:47:34 -0500 |
| commit | fbe3310840c65f3cf97dd90d23e177d061c376f2 (patch) | |
| tree | ee28163a6c53e0131fd2d3d626d02b0610eaed2b /samples | |
| parent | a80857822b0c2ed608c93504bd3687b78f20c619 (diff) | |
samples: bpf: large eBPF program in C
sockex2_kern.c is purposefully large eBPF program in C.
llvm compiles ~200 lines of C code into ~300 eBPF instructions.
It's similar to __skb_flow_dissect() to demonstrate that complex packet parsing
can be done by eBPF.
Then it uses (struct flow_keys)->dst IP address (or hash of ipv6 dst) to keep
stats of number of packets per IP.
User space loads eBPF program, attaches it to loopback interface and prints
dest_ip->#packets stats every second.
Usage:
$sudo samples/bpf/sockex2
ip 127.0.0.1 count 19
ip 127.0.0.1 count 178115
ip 127.0.0.1 count 369437
ip 127.0.0.1 count 559841
ip 127.0.0.1 count 750539
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
| -rw-r--r-- | samples/bpf/Makefile | 4 | ||||
| -rw-r--r-- | samples/bpf/sockex2_kern.c | 215 | ||||
| -rw-r--r-- | samples/bpf/sockex2_user.c | 44 |
3 files changed, 263 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 770d145186c3..b5b3600dcdf5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
| @@ -5,20 +5,24 @@ obj- := dummy.o | |||
| 5 | hostprogs-y := test_verifier test_maps | 5 | hostprogs-y := test_verifier test_maps |
| 6 | hostprogs-y += sock_example | 6 | hostprogs-y += sock_example |
| 7 | hostprogs-y += sockex1 | 7 | hostprogs-y += sockex1 |
| 8 | hostprogs-y += sockex2 | ||
| 8 | 9 | ||
| 9 | test_verifier-objs := test_verifier.o libbpf.o | 10 | test_verifier-objs := test_verifier.o libbpf.o |
| 10 | test_maps-objs := test_maps.o libbpf.o | 11 | test_maps-objs := test_maps.o libbpf.o |
| 11 | sock_example-objs := sock_example.o libbpf.o | 12 | sock_example-objs := sock_example.o libbpf.o |
| 12 | sockex1-objs := bpf_load.o libbpf.o sockex1_user.o | 13 | sockex1-objs := bpf_load.o libbpf.o sockex1_user.o |
| 14 | sockex2-objs := bpf_load.o libbpf.o sockex2_user.o | ||
| 13 | 15 | ||
| 14 | # Tell kbuild to always build the programs | 16 | # Tell kbuild to always build the programs |
| 15 | always := $(hostprogs-y) | 17 | always := $(hostprogs-y) |
| 16 | always += sockex1_kern.o | 18 | always += sockex1_kern.o |
| 19 | always += sockex2_kern.o | ||
| 17 | 20 | ||
| 18 | HOSTCFLAGS += -I$(objtree)/usr/include | 21 | HOSTCFLAGS += -I$(objtree)/usr/include |
| 19 | 22 | ||
| 20 | HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable | 23 | HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable |
| 21 | HOSTLOADLIBES_sockex1 += -lelf | 24 | HOSTLOADLIBES_sockex1 += -lelf |
| 25 | HOSTLOADLIBES_sockex2 += -lelf | ||
| 22 | 26 | ||
| 23 | # point this to your LLVM backend with bpf support | 27 | # point this to your LLVM backend with bpf support |
| 24 | LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc | 28 | LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc |
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c new file mode 100644 index 000000000000..6f0135f0f217 --- /dev/null +++ b/samples/bpf/sockex2_kern.c | |||
| @@ -0,0 +1,215 @@ | |||
| 1 | #include <uapi/linux/bpf.h> | ||
| 2 | #include "bpf_helpers.h" | ||
| 3 | #include <uapi/linux/in.h> | ||
| 4 | #include <uapi/linux/if.h> | ||
| 5 | #include <uapi/linux/if_ether.h> | ||
| 6 | #include <uapi/linux/ip.h> | ||
| 7 | #include <uapi/linux/ipv6.h> | ||
| 8 | #include <uapi/linux/if_tunnel.h> | ||
| 9 | #define IP_MF 0x2000 | ||
| 10 | #define IP_OFFSET 0x1FFF | ||
| 11 | |||
| 12 | struct vlan_hdr { | ||
| 13 | __be16 h_vlan_TCI; | ||
| 14 | __be16 h_vlan_encapsulated_proto; | ||
| 15 | }; | ||
| 16 | |||
| 17 | struct flow_keys { | ||
| 18 | __be32 src; | ||
| 19 | __be32 dst; | ||
| 20 | union { | ||
| 21 | __be32 ports; | ||
| 22 | __be16 port16[2]; | ||
| 23 | }; | ||
| 24 | __u16 thoff; | ||
| 25 | __u8 ip_proto; | ||
| 26 | }; | ||
| 27 | |||
| 28 | static inline int proto_ports_offset(__u64 proto) | ||
| 29 | { | ||
| 30 | switch (proto) { | ||
| 31 | case IPPROTO_TCP: | ||
| 32 | case IPPROTO_UDP: | ||
| 33 | case IPPROTO_DCCP: | ||
| 34 | case IPPROTO_ESP: | ||
| 35 | case IPPROTO_SCTP: | ||
| 36 | case IPPROTO_UDPLITE: | ||
| 37 | return 0; | ||
| 38 | case IPPROTO_AH: | ||
| 39 | return 4; | ||
| 40 | default: | ||
| 41 | return 0; | ||
| 42 | } | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff) | ||
| 46 | { | ||
| 47 | return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) | ||
| 48 | & (IP_MF | IP_OFFSET); | ||
| 49 | } | ||
| 50 | |||
| 51 | static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off) | ||
| 52 | { | ||
| 53 | __u64 w0 = load_word(ctx, off); | ||
| 54 | __u64 w1 = load_word(ctx, off + 4); | ||
| 55 | __u64 w2 = load_word(ctx, off + 8); | ||
| 56 | __u64 w3 = load_word(ctx, off + 12); | ||
| 57 | |||
| 58 | return (__u32)(w0 ^ w1 ^ w2 ^ w3); | ||
| 59 | } | ||
| 60 | |||
| 61 | static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, | ||
| 62 | struct flow_keys *flow) | ||
| 63 | { | ||
| 64 | __u64 verlen; | ||
| 65 | |||
| 66 | if (unlikely(ip_is_fragment(skb, nhoff))) | ||
| 67 | *ip_proto = 0; | ||
| 68 | else | ||
| 69 | *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); | ||
| 70 | |||
| 71 | if (*ip_proto != IPPROTO_GRE) { | ||
| 72 | flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); | ||
| 73 | flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); | ||
| 74 | } | ||
| 75 | |||
| 76 | verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); | ||
| 77 | if (likely(verlen == 0x45)) | ||
| 78 | nhoff += 20; | ||
| 79 | else | ||
| 80 | nhoff += (verlen & 0xF) << 2; | ||
| 81 | |||
| 82 | return nhoff; | ||
| 83 | } | ||
| 84 | |||
| 85 | static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, | ||
| 86 | struct flow_keys *flow) | ||
| 87 | { | ||
| 88 | *ip_proto = load_byte(skb, | ||
| 89 | nhoff + offsetof(struct ipv6hdr, nexthdr)); | ||
| 90 | flow->src = ipv6_addr_hash(skb, | ||
| 91 | nhoff + offsetof(struct ipv6hdr, saddr)); | ||
| 92 | flow->dst = ipv6_addr_hash(skb, | ||
| 93 | nhoff + offsetof(struct ipv6hdr, daddr)); | ||
| 94 | nhoff += sizeof(struct ipv6hdr); | ||
| 95 | |||
| 96 | return nhoff; | ||
| 97 | } | ||
| 98 | |||
| 99 | static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow) | ||
| 100 | { | ||
| 101 | __u64 nhoff = ETH_HLEN; | ||
| 102 | __u64 ip_proto; | ||
| 103 | __u64 proto = load_half(skb, 12); | ||
| 104 | int poff; | ||
| 105 | |||
| 106 | if (proto == ETH_P_8021AD) { | ||
| 107 | proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, | ||
| 108 | h_vlan_encapsulated_proto)); | ||
| 109 | nhoff += sizeof(struct vlan_hdr); | ||
| 110 | } | ||
| 111 | |||
| 112 | if (proto == ETH_P_8021Q) { | ||
| 113 | proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, | ||
| 114 | h_vlan_encapsulated_proto)); | ||
| 115 | nhoff += sizeof(struct vlan_hdr); | ||
| 116 | } | ||
| 117 | |||
| 118 | if (likely(proto == ETH_P_IP)) | ||
| 119 | nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
| 120 | else if (proto == ETH_P_IPV6) | ||
| 121 | nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
| 122 | else | ||
| 123 | return false; | ||
| 124 | |||
| 125 | switch (ip_proto) { | ||
| 126 | case IPPROTO_GRE: { | ||
| 127 | struct gre_hdr { | ||
| 128 | __be16 flags; | ||
| 129 | __be16 proto; | ||
| 130 | }; | ||
| 131 | |||
| 132 | __u64 gre_flags = load_half(skb, | ||
| 133 | nhoff + offsetof(struct gre_hdr, flags)); | ||
| 134 | __u64 gre_proto = load_half(skb, | ||
| 135 | nhoff + offsetof(struct gre_hdr, proto)); | ||
| 136 | |||
| 137 | if (gre_flags & (GRE_VERSION|GRE_ROUTING)) | ||
| 138 | break; | ||
| 139 | |||
| 140 | proto = gre_proto; | ||
| 141 | nhoff += 4; | ||
| 142 | if (gre_flags & GRE_CSUM) | ||
| 143 | nhoff += 4; | ||
| 144 | if (gre_flags & GRE_KEY) | ||
| 145 | nhoff += 4; | ||
| 146 | if (gre_flags & GRE_SEQ) | ||
| 147 | nhoff += 4; | ||
| 148 | |||
| 149 | if (proto == ETH_P_8021Q) { | ||
| 150 | proto = load_half(skb, | ||
| 151 | nhoff + offsetof(struct vlan_hdr, | ||
| 152 | h_vlan_encapsulated_proto)); | ||
| 153 | nhoff += sizeof(struct vlan_hdr); | ||
| 154 | } | ||
| 155 | |||
| 156 | if (proto == ETH_P_IP) | ||
| 157 | nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
| 158 | else if (proto == ETH_P_IPV6) | ||
| 159 | nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
| 160 | else | ||
| 161 | return false; | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | case IPPROTO_IPIP: | ||
| 165 | nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
| 166 | break; | ||
| 167 | case IPPROTO_IPV6: | ||
| 168 | nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
| 169 | break; | ||
| 170 | default: | ||
| 171 | break; | ||
| 172 | } | ||
| 173 | |||
| 174 | flow->ip_proto = ip_proto; | ||
| 175 | poff = proto_ports_offset(ip_proto); | ||
| 176 | if (poff >= 0) { | ||
| 177 | nhoff += poff; | ||
| 178 | flow->ports = load_word(skb, nhoff); | ||
| 179 | } | ||
| 180 | |||
| 181 | flow->thoff = (__u16) nhoff; | ||
| 182 | |||
| 183 | return true; | ||
| 184 | } | ||
| 185 | |||
| 186 | struct bpf_map_def SEC("maps") hash_map = { | ||
| 187 | .type = BPF_MAP_TYPE_HASH, | ||
| 188 | .key_size = sizeof(__be32), | ||
| 189 | .value_size = sizeof(long), | ||
| 190 | .max_entries = 1024, | ||
| 191 | }; | ||
| 192 | |||
| 193 | SEC("socket2") | ||
| 194 | int bpf_prog2(struct sk_buff *skb) | ||
| 195 | { | ||
| 196 | struct flow_keys flow; | ||
| 197 | long *value; | ||
| 198 | u32 key; | ||
| 199 | |||
| 200 | if (!flow_dissector(skb, &flow)) | ||
| 201 | return 0; | ||
| 202 | |||
| 203 | key = flow.dst; | ||
| 204 | value = bpf_map_lookup_elem(&hash_map, &key); | ||
| 205 | if (value) { | ||
| 206 | __sync_fetch_and_add(value, 1); | ||
| 207 | } else { | ||
| 208 | long val = 1; | ||
| 209 | |||
| 210 | bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); | ||
| 211 | } | ||
| 212 | return 0; | ||
| 213 | } | ||
| 214 | |||
| 215 | char _license[] SEC("license") = "GPL"; | ||
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c new file mode 100644 index 000000000000..d2d5f5a790d3 --- /dev/null +++ b/samples/bpf/sockex2_user.c | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include <assert.h> | ||
| 3 | #include <linux/bpf.h> | ||
| 4 | #include "libbpf.h" | ||
| 5 | #include "bpf_load.h" | ||
| 6 | #include <unistd.h> | ||
| 7 | #include <arpa/inet.h> | ||
| 8 | |||
| 9 | int main(int ac, char **argv) | ||
| 10 | { | ||
| 11 | char filename[256]; | ||
| 12 | FILE *f; | ||
| 13 | int i, sock; | ||
| 14 | |||
| 15 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | ||
| 16 | |||
| 17 | if (load_bpf_file(filename)) { | ||
| 18 | printf("%s", bpf_log_buf); | ||
| 19 | return 1; | ||
| 20 | } | ||
| 21 | |||
| 22 | sock = open_raw_sock("lo"); | ||
| 23 | |||
| 24 | assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, | ||
| 25 | sizeof(prog_fd[0])) == 0); | ||
| 26 | |||
| 27 | f = popen("ping -c5 localhost", "r"); | ||
| 28 | (void) f; | ||
| 29 | |||
| 30 | for (i = 0; i < 5; i++) { | ||
| 31 | int key = 0, next_key; | ||
| 32 | long long value; | ||
| 33 | |||
| 34 | while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { | ||
| 35 | bpf_lookup_elem(map_fd[0], &next_key, &value); | ||
| 36 | printf("ip %s count %lld\n", | ||
| 37 | inet_ntoa((struct in_addr){htonl(next_key)}), | ||
| 38 | value); | ||
| 39 | key = next_key; | ||
| 40 | } | ||
| 41 | sleep(1); | ||
| 42 | } | ||
| 43 | return 0; | ||
| 44 | } | ||
