summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2016-12-07 18:53:14 -0500
committerDavid S. Miller <davem@davemloft.net>2016-12-08 14:25:13 -0500
commit12d8bb64e3f65f5287ff17c084d076a28daa8096 (patch)
treee772f55d041956a3e07a487b48b42df8a0b830bf
parentea3349a03519dcd4f32d949cd80ab995623dc5ac (diff)
bpf: xdp: Add XDP example for head adjustment
The XDP prog checks if the incoming packet matches any VIP:PORT combination in the BPF hashmap. If it is, it will encapsulate the packet with a IPv4/v6 header as instructed by the value of the BPF hashmap and then XDP_TX it out. The VIP:PORT -> IP-Encap-Info can be specified by the cmd args of the user prog. Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/bpf_helpers.h2
-rw-r--r--samples/bpf/bpf_load.c94
-rw-r--r--samples/bpf/bpf_load.h1
-rw-r--r--samples/bpf/xdp1_user.c93
-rw-r--r--samples/bpf/xdp_tx_iptunnel_common.h37
-rw-r--r--samples/bpf/xdp_tx_iptunnel_kern.c236
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c256
8 files changed, 630 insertions, 93 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 00cd3081c038..f2219c1489e5 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -33,6 +33,7 @@ hostprogs-y += trace_event
33hostprogs-y += sampleip 33hostprogs-y += sampleip
34hostprogs-y += tc_l2_redirect 34hostprogs-y += tc_l2_redirect
35hostprogs-y += lwt_len_hist 35hostprogs-y += lwt_len_hist
36hostprogs-y += xdp_tx_iptunnel
36 37
37test_lru_dist-objs := test_lru_dist.o libbpf.o 38test_lru_dist-objs := test_lru_dist.o libbpf.o
38sock_example-objs := sock_example.o libbpf.o 39sock_example-objs := sock_example.o libbpf.o
@@ -67,6 +68,7 @@ trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
67sampleip-objs := bpf_load.o libbpf.o sampleip_user.o 68sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
68tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o 69tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
69lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o 70lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o
71xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o
70 72
71# Tell kbuild to always build the programs 73# Tell kbuild to always build the programs
72always := $(hostprogs-y) 74always := $(hostprogs-y)
@@ -99,6 +101,7 @@ always += test_current_task_under_cgroup_kern.o
99always += trace_event_kern.o 101always += trace_event_kern.o
100always += sampleip_kern.o 102always += sampleip_kern.o
101always += lwt_len_hist_kern.o 103always += lwt_len_hist_kern.o
104always += xdp_tx_iptunnel_kern.o
102 105
103HOSTCFLAGS += -I$(objtree)/usr/include 106HOSTCFLAGS += -I$(objtree)/usr/include
104HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ 107HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
@@ -129,6 +132,7 @@ HOSTLOADLIBES_trace_event += -lelf
129HOSTLOADLIBES_sampleip += -lelf 132HOSTLOADLIBES_sampleip += -lelf
130HOSTLOADLIBES_tc_l2_redirect += -l elf 133HOSTLOADLIBES_tc_l2_redirect += -l elf
131HOSTLOADLIBES_lwt_len_hist += -l elf 134HOSTLOADLIBES_lwt_len_hist += -l elf
135HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
132 136
133# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: 137# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
134# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang 138# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 8370a6e3839d..faaffe2e139a 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -57,6 +57,8 @@ static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
57 (void *) BPF_FUNC_skb_set_tunnel_opt; 57 (void *) BPF_FUNC_skb_set_tunnel_opt;
58static unsigned long long (*bpf_get_prandom_u32)(void) = 58static unsigned long long (*bpf_get_prandom_u32)(void) =
59 (void *) BPF_FUNC_get_prandom_u32; 59 (void *) BPF_FUNC_get_prandom_u32;
60static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
61 (void *) BPF_FUNC_xdp_adjust_head;
60 62
61/* llvm builtin functions that eBPF C program may use to 63/* llvm builtin functions that eBPF C program may use to
62 * emit BPF_LD_ABS and BPF_LD_IND instructions 64 * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 49b45ccbe153..e30b6de94f2e 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -12,6 +12,10 @@
12#include <linux/bpf.h> 12#include <linux/bpf.h>
13#include <linux/filter.h> 13#include <linux/filter.h>
14#include <linux/perf_event.h> 14#include <linux/perf_event.h>
15#include <linux/netlink.h>
16#include <linux/rtnetlink.h>
17#include <sys/types.h>
18#include <sys/socket.h>
15#include <sys/syscall.h> 19#include <sys/syscall.h>
16#include <sys/ioctl.h> 20#include <sys/ioctl.h>
17#include <sys/mman.h> 21#include <sys/mman.h>
@@ -450,3 +454,93 @@ struct ksym *ksym_search(long key)
450 /* out of range. return _stext */ 454 /* out of range. return _stext */
451 return &syms[0]; 455 return &syms[0];
452} 456}
457
458int set_link_xdp_fd(int ifindex, int fd)
459{
460 struct sockaddr_nl sa;
461 int sock, seq = 0, len, ret = -1;
462 char buf[4096];
463 struct nlattr *nla, *nla_xdp;
464 struct {
465 struct nlmsghdr nh;
466 struct ifinfomsg ifinfo;
467 char attrbuf[64];
468 } req;
469 struct nlmsghdr *nh;
470 struct nlmsgerr *err;
471
472 memset(&sa, 0, sizeof(sa));
473 sa.nl_family = AF_NETLINK;
474
475 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
476 if (sock < 0) {
477 printf("open netlink socket: %s\n", strerror(errno));
478 return -1;
479 }
480
481 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
482 printf("bind to netlink: %s\n", strerror(errno));
483 goto cleanup;
484 }
485
486 memset(&req, 0, sizeof(req));
487 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
488 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
489 req.nh.nlmsg_type = RTM_SETLINK;
490 req.nh.nlmsg_pid = 0;
491 req.nh.nlmsg_seq = ++seq;
492 req.ifinfo.ifi_family = AF_UNSPEC;
493 req.ifinfo.ifi_index = ifindex;
494 nla = (struct nlattr *)(((char *)&req)
495 + NLMSG_ALIGN(req.nh.nlmsg_len));
496 nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
497
498 nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
499 nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
500 nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
501 memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
502 nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
503
504 req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
505
506 if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
507 printf("send to netlink: %s\n", strerror(errno));
508 goto cleanup;
509 }
510
511 len = recv(sock, buf, sizeof(buf), 0);
512 if (len < 0) {
513 printf("recv from netlink: %s\n", strerror(errno));
514 goto cleanup;
515 }
516
517 for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
518 nh = NLMSG_NEXT(nh, len)) {
519 if (nh->nlmsg_pid != getpid()) {
520 printf("Wrong pid %d, expected %d\n",
521 nh->nlmsg_pid, getpid());
522 goto cleanup;
523 }
524 if (nh->nlmsg_seq != seq) {
525 printf("Wrong seq %d, expected %d\n",
526 nh->nlmsg_seq, seq);
527 goto cleanup;
528 }
529 switch (nh->nlmsg_type) {
530 case NLMSG_ERROR:
531 err = (struct nlmsgerr *)NLMSG_DATA(nh);
532 if (!err->error)
533 continue;
534 printf("nlmsg error %s\n", strerror(-err->error));
535 goto cleanup;
536 case NLMSG_DONE:
537 break;
538 }
539 }
540
541 ret = 0;
542
543cleanup:
544 close(sock);
545 return ret;
546}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 4adeeef53ad6..fb46a421ab41 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -31,4 +31,5 @@ struct ksym {
31 31
32int load_kallsyms(void); 32int load_kallsyms(void);
33struct ksym *ksym_search(long key); 33struct ksym *ksym_search(long key);
34int set_link_xdp_fd(int ifindex, int fd);
34#endif 35#endif
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 2b2150d6d6f7..5f040a0d7712 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -5,111 +5,18 @@
5 * License as published by the Free Software Foundation. 5 * License as published by the Free Software Foundation.
6 */ 6 */
7#include <linux/bpf.h> 7#include <linux/bpf.h>
8#include <linux/netlink.h>
9#include <linux/rtnetlink.h>
10#include <assert.h> 8#include <assert.h>
11#include <errno.h> 9#include <errno.h>
12#include <signal.h> 10#include <signal.h>
13#include <stdio.h> 11#include <stdio.h>
14#include <stdlib.h> 12#include <stdlib.h>
15#include <string.h> 13#include <string.h>
16#include <sys/socket.h>
17#include <unistd.h> 14#include <unistd.h>
18 15
19#include "bpf_load.h" 16#include "bpf_load.h"
20#include "bpf_util.h" 17#include "bpf_util.h"
21#include "libbpf.h" 18#include "libbpf.h"
22 19
23static int set_link_xdp_fd(int ifindex, int fd)
24{
25 struct sockaddr_nl sa;
26 int sock, seq = 0, len, ret = -1;
27 char buf[4096];
28 struct nlattr *nla, *nla_xdp;
29 struct {
30 struct nlmsghdr nh;
31 struct ifinfomsg ifinfo;
32 char attrbuf[64];
33 } req;
34 struct nlmsghdr *nh;
35 struct nlmsgerr *err;
36
37 memset(&sa, 0, sizeof(sa));
38 sa.nl_family = AF_NETLINK;
39
40 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
41 if (sock < 0) {
42 printf("open netlink socket: %s\n", strerror(errno));
43 return -1;
44 }
45
46 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
47 printf("bind to netlink: %s\n", strerror(errno));
48 goto cleanup;
49 }
50
51 memset(&req, 0, sizeof(req));
52 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
53 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
54 req.nh.nlmsg_type = RTM_SETLINK;
55 req.nh.nlmsg_pid = 0;
56 req.nh.nlmsg_seq = ++seq;
57 req.ifinfo.ifi_family = AF_UNSPEC;
58 req.ifinfo.ifi_index = ifindex;
59 nla = (struct nlattr *)(((char *)&req)
60 + NLMSG_ALIGN(req.nh.nlmsg_len));
61 nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
62
63 nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
64 nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
65 nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
66 memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
67 nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
68
69 req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
70
71 if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
72 printf("send to netlink: %s\n", strerror(errno));
73 goto cleanup;
74 }
75
76 len = recv(sock, buf, sizeof(buf), 0);
77 if (len < 0) {
78 printf("recv from netlink: %s\n", strerror(errno));
79 goto cleanup;
80 }
81
82 for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
83 nh = NLMSG_NEXT(nh, len)) {
84 if (nh->nlmsg_pid != getpid()) {
85 printf("Wrong pid %d, expected %d\n",
86 nh->nlmsg_pid, getpid());
87 goto cleanup;
88 }
89 if (nh->nlmsg_seq != seq) {
90 printf("Wrong seq %d, expected %d\n",
91 nh->nlmsg_seq, seq);
92 goto cleanup;
93 }
94 switch (nh->nlmsg_type) {
95 case NLMSG_ERROR:
96 err = (struct nlmsgerr *)NLMSG_DATA(nh);
97 if (!err->error)
98 continue;
99 printf("nlmsg error %s\n", strerror(-err->error));
100 goto cleanup;
101 case NLMSG_DONE:
102 break;
103 }
104 }
105
106 ret = 0;
107
108cleanup:
109 close(sock);
110 return ret;
111}
112
113static int ifindex; 20static int ifindex;
114 21
115static void int_exit(int sig) 22static void int_exit(int sig)
diff --git a/samples/bpf/xdp_tx_iptunnel_common.h b/samples/bpf/xdp_tx_iptunnel_common.h
new file mode 100644
index 000000000000..dd12cc35110f
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_common.h
@@ -0,0 +1,37 @@
1/* Copyright (c) 2016 Facebook
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
8#define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
9
10#include <linux/types.h>
11
12#define MAX_IPTNL_ENTRIES 256U
13
14struct vip {
15 union {
16 __u32 v6[4];
17 __u32 v4;
18 } daddr;
19 __u16 dport;
20 __u16 family;
21 __u8 protocol;
22};
23
24struct iptnl_info {
25 union {
26 __u32 v6[4];
27 __u32 v4;
28 } saddr;
29 union {
30 __u32 v6[4];
31 __u32 v4;
32 } daddr;
33 __u16 family;
34 __u8 dmac[6];
35};
36
37#endif
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
new file mode 100644
index 000000000000..85c38ecd3a2d
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -0,0 +1,236 @@
1/* Copyright (c) 2016 Facebook
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program shows how to use bpf_xdp_adjust_head() by
8 * encapsulating the incoming packet in an IPv4/v6 header
9 * and then XDP_TX it out.
10 */
11#include <uapi/linux/bpf.h>
12#include <linux/in.h>
13#include <linux/if_ether.h>
14#include <linux/if_packet.h>
15#include <linux/if_vlan.h>
16#include <linux/ip.h>
17#include <linux/ipv6.h>
18#include "bpf_helpers.h"
19#include "xdp_tx_iptunnel_common.h"
20
21struct bpf_map_def SEC("maps") rxcnt = {
22 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
23 .key_size = sizeof(__u32),
24 .value_size = sizeof(__u64),
25 .max_entries = 256,
26};
27
28struct bpf_map_def SEC("maps") vip2tnl = {
29 .type = BPF_MAP_TYPE_HASH,
30 .key_size = sizeof(struct vip),
31 .value_size = sizeof(struct iptnl_info),
32 .max_entries = MAX_IPTNL_ENTRIES,
33};
34
35static __always_inline void count_tx(u32 protocol)
36{
37 u64 *rxcnt_count;
38
39 rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
40 if (rxcnt_count)
41 *rxcnt_count += 1;
42}
43
44static __always_inline int get_dport(void *trans_data, void *data_end,
45 u8 protocol)
46{
47 struct tcphdr *th;
48 struct udphdr *uh;
49
50 switch (protocol) {
51 case IPPROTO_TCP:
52 th = (struct tcphdr *)trans_data;
53 if (th + 1 > data_end)
54 return -1;
55 return th->dest;
56 case IPPROTO_UDP:
57 uh = (struct udphdr *)trans_data;
58 if (uh + 1 > data_end)
59 return -1;
60 return uh->dest;
61 default:
62 return 0;
63 }
64}
65
66static __always_inline void set_ethhdr(struct ethhdr *new_eth,
67 const struct ethhdr *old_eth,
68 const struct iptnl_info *tnl,
69 __be16 h_proto)
70{
71 memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
72 memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
73 new_eth->h_proto = h_proto;
74}
75
76static __always_inline int handle_ipv4(struct xdp_md *xdp)
77{
78 void *data_end = (void *)(long)xdp->data_end;
79 void *data = (void *)(long)xdp->data;
80 struct iptnl_info *tnl;
81 struct ethhdr *new_eth;
82 struct ethhdr *old_eth;
83 struct iphdr *iph = data + sizeof(struct ethhdr);
84 u16 *next_iph_u16;
85 u16 payload_len;
86 struct vip vip = {};
87 int dport;
88 u32 csum = 0;
89 int i;
90
91 if (iph + 1 > data_end)
92 return XDP_DROP;
93
94 dport = get_dport(iph + 1, data_end, iph->protocol);
95 if (dport == -1)
96 return XDP_DROP;
97
98 vip.protocol = iph->protocol;
99 vip.family = AF_INET;
100 vip.daddr.v4 = iph->daddr;
101 vip.dport = dport;
102 payload_len = ntohs(iph->tot_len);
103
104 tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
105 /* It only does v4-in-v4 */
106 if (!tnl || tnl->family != AF_INET)
107 return XDP_PASS;
108
109 /* The vip key is found. Add an IP header and send it out */
110
111 if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
112 return XDP_DROP;
113
114 data = (void *)(long)xdp->data;
115 data_end = (void *)(long)xdp->data_end;
116
117 new_eth = data;
118 iph = data + sizeof(*new_eth);
119 old_eth = data + sizeof(*iph);
120
121 if (new_eth + 1 > data_end ||
122 old_eth + 1 > data_end ||
123 iph + 1 > data_end)
124 return XDP_DROP;
125
126 set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
127
128 iph->version = 4;
129 iph->ihl = sizeof(*iph) >> 2;
130 iph->frag_off = 0;
131 iph->protocol = IPPROTO_IPIP;
132 iph->check = 0;
133 iph->tos = 0;
134 iph->tot_len = htons(payload_len + sizeof(*iph));
135 iph->daddr = tnl->daddr.v4;
136 iph->saddr = tnl->saddr.v4;
137 iph->ttl = 8;
138
139 next_iph_u16 = (u16 *)iph;
140#pragma clang loop unroll(full)
141 for (i = 0; i < sizeof(*iph) >> 1; i++)
142 csum += *next_iph_u16++;
143
144 iph->check = ~((csum & 0xffff) + (csum >> 16));
145
146 count_tx(vip.protocol);
147
148 return XDP_TX;
149}
150
151static __always_inline int handle_ipv6(struct xdp_md *xdp)
152{
153 void *data_end = (void *)(long)xdp->data_end;
154 void *data = (void *)(long)xdp->data;
155 struct iptnl_info *tnl;
156 struct ethhdr *new_eth;
157 struct ethhdr *old_eth;
158 struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
159 __u16 payload_len;
160 struct vip vip = {};
161 int dport;
162
163 if (ip6h + 1 > data_end)
164 return XDP_DROP;
165
166 dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
167 if (dport == -1)
168 return XDP_DROP;
169
170 vip.protocol = ip6h->nexthdr;
171 vip.family = AF_INET6;
172 memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
173 vip.dport = dport;
174 payload_len = ip6h->payload_len;
175
176 tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
177 /* It only does v6-in-v6 */
178 if (!tnl || tnl->family != AF_INET6)
179 return XDP_PASS;
180
181 /* The vip key is found. Add an IP header and send it out */
182
183 if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
184 return XDP_DROP;
185
186 data = (void *)(long)xdp->data;
187 data_end = (void *)(long)xdp->data_end;
188
189 new_eth = data;
190 ip6h = data + sizeof(*new_eth);
191 old_eth = data + sizeof(*ip6h);
192
193 if (new_eth + 1 > data_end ||
194 old_eth + 1 > data_end ||
195 ip6h + 1 > data_end)
196 return XDP_DROP;
197
198 set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
199
200 ip6h->version = 6;
201 ip6h->priority = 0;
202 memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
203 ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
204 ip6h->nexthdr = IPPROTO_IPV6;
205 ip6h->hop_limit = 8;
206 memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
207 memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
208
209 count_tx(vip.protocol);
210
211 return XDP_TX;
212}
213
214SEC("xdp_tx_iptunnel")
215int _xdp_tx_iptunnel(struct xdp_md *xdp)
216{
217 void *data_end = (void *)(long)xdp->data_end;
218 void *data = (void *)(long)xdp->data;
219 struct ethhdr *eth = data;
220 __u16 h_proto;
221
222 if (eth + 1 > data_end)
223 return XDP_DROP;
224
225 h_proto = eth->h_proto;
226
227 if (h_proto == htons(ETH_P_IP))
228 return handle_ipv4(xdp);
229 else if (h_proto == htons(ETH_P_IPV6))
230
231 return handle_ipv6(xdp);
232 else
233 return XDP_PASS;
234}
235
236char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
new file mode 100644
index 000000000000..7a71f5c74684
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -0,0 +1,256 @@
1/* Copyright (c) 2016 Facebook
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <linux/bpf.h>
8#include <assert.h>
9#include <errno.h>
10#include <signal.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <sys/resource.h>
15#include <arpa/inet.h>
16#include <netinet/ether.h>
17#include <unistd.h>
18#include <time.h>
19#include "bpf_load.h"
20#include "libbpf.h"
21#include "bpf_util.h"
22#include "xdp_tx_iptunnel_common.h"
23
24#define STATS_INTERVAL_S 2U
25
26static int ifindex = -1;
27
28static void int_exit(int sig)
29{
30 if (ifindex > -1)
31 set_link_xdp_fd(ifindex, -1);
32 exit(0);
33}
34
35/* simple per-protocol drop counter
36 */
37static void poll_stats(unsigned int kill_after_s)
38{
39 const unsigned int nr_protos = 256;
40 unsigned int nr_cpus = bpf_num_possible_cpus();
41 time_t started_at = time(NULL);
42 __u64 values[nr_cpus], prev[nr_protos][nr_cpus];
43 __u32 proto;
44 int i;
45
46 memset(prev, 0, sizeof(prev));
47
48 while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
49 sleep(STATS_INTERVAL_S);
50
51 for (proto = 0; proto < nr_protos; proto++) {
52 __u64 sum = 0;
53
54 assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0);
55 for (i = 0; i < nr_cpus; i++)
56 sum += (values[i] - prev[proto][i]);
57
58 if (sum)
59 printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n",
60 proto, sum, sum / STATS_INTERVAL_S);
61 memcpy(prev[proto], values, sizeof(values));
62 }
63 }
64}
65
66static void usage(const char *cmd)
67{
68 printf("Start a XDP prog which encapsulates incoming packets\n"
69 "in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n"
70 "is used to select packets to encapsulate\n\n");
71 printf("Usage: %s [...]\n", cmd);
72 printf(" -i <ifindex> Interface Index\n");
73 printf(" -a <vip-service-address> IPv4 or IPv6\n");
74 printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
75 printf(" -s <source-ip> Used in the IPTunnel header\n");
76 printf(" -d <dest-ip> Used in the IPTunnel header\n");
77 printf(" -m <dest-MAC> Used in sending the IP Tunneled pkt\n");
78 printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
79 printf(" -P <IP-Protocol> Default is TCP\n");
80 printf(" -h Display this help\n");
81}
82
83static int parse_ipstr(const char *ipstr, unsigned int *addr)
84{
85 if (inet_pton(AF_INET6, ipstr, addr) == 1) {
86 return AF_INET6;
87 } else if (inet_pton(AF_INET, ipstr, addr) == 1) {
88 addr[1] = addr[2] = addr[3] = 0;
89 return AF_INET;
90 }
91
92 fprintf(stderr, "%s is an invalid IP\n", ipstr);
93 return AF_UNSPEC;
94}
95
96static int parse_ports(const char *port_str, int *min_port, int *max_port)
97{
98 char *end;
99 long tmp_min_port;
100 long tmp_max_port;
101
102 tmp_min_port = strtol(optarg, &end, 10);
103 if (tmp_min_port < 1 || tmp_min_port > 65535) {
104 fprintf(stderr, "Invalid port(s):%s\n", optarg);
105 return 1;
106 }
107
108 if (*end == '-') {
109 end++;
110 tmp_max_port = strtol(end, NULL, 10);
111 if (tmp_max_port < 1 || tmp_max_port > 65535) {
112 fprintf(stderr, "Invalid port(s):%s\n", optarg);
113 return 1;
114 }
115 } else {
116 tmp_max_port = tmp_min_port;
117 }
118
119 if (tmp_min_port > tmp_max_port) {
120 fprintf(stderr, "Invalid port(s):%s\n", optarg);
121 return 1;
122 }
123
124 if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) {
125 fprintf(stderr, "Port range (%s) is larger than %u\n",
126 port_str, MAX_IPTNL_ENTRIES);
127 return 1;
128 }
129 *min_port = tmp_min_port;
130 *max_port = tmp_max_port;
131
132 return 0;
133}
134
135int main(int argc, char **argv)
136{
137 unsigned char opt_flags[256] = {};
138 unsigned int kill_after_s = 0;
139 const char *optstr = "i:a:p:s:d:m:T:P:h";
140 int min_port = 0, max_port = 0;
141 struct iptnl_info tnl = {};
142 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
143 struct vip vip = {};
144 char filename[256];
145 int opt;
146 int i;
147
148 tnl.family = AF_UNSPEC;
149 vip.protocol = IPPROTO_TCP;
150
151 for (i = 0; i < strlen(optstr); i++)
152 if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
153 opt_flags[(unsigned char)optstr[i]] = 1;
154
155 while ((opt = getopt(argc, argv, optstr)) != -1) {
156 unsigned short family;
157 unsigned int *v6;
158
159 switch (opt) {
160 case 'i':
161 ifindex = atoi(optarg);
162 break;
163 case 'a':
164 vip.family = parse_ipstr(optarg, vip.daddr.v6);
165 if (vip.family == AF_UNSPEC)
166 return 1;
167 break;
168 case 'p':
169 if (parse_ports(optarg, &min_port, &max_port))
170 return 1;
171 break;
172 case 'P':
173 vip.protocol = atoi(optarg);
174 break;
175 case 's':
176 case 'd':
177 if (opt == 's')
178 v6 = tnl.saddr.v6;
179 else
180 v6 = tnl.daddr.v6;
181
182 family = parse_ipstr(optarg, v6);
183 if (family == AF_UNSPEC)
184 return 1;
185 if (tnl.family == AF_UNSPEC) {
186 tnl.family = family;
187 } else if (tnl.family != family) {
188 fprintf(stderr,
189 "The IP version of the src and dst addresses used in the IP encapsulation does not match\n");
190 return 1;
191 }
192 break;
193 case 'm':
194 if (!ether_aton_r(optarg,
195 (struct ether_addr *)tnl.dmac)) {
196 fprintf(stderr, "Invalid mac address:%s\n",
197 optarg);
198 return 1;
199 }
200 break;
201 case 'T':
202 kill_after_s = atoi(optarg);
203 break;
204 default:
205 usage(argv[0]);
206 return 1;
207 }
208 opt_flags[opt] = 0;
209 }
210
211 for (i = 0; i < strlen(optstr); i++) {
212 if (opt_flags[(unsigned int)optstr[i]]) {
213 fprintf(stderr, "Missing argument -%c\n", optstr[i]);
214 usage(argv[0]);
215 return 1;
216 }
217 }
218
219 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
220 perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
221 return 1;
222 }
223
224 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
225
226 if (load_bpf_file(filename)) {
227 printf("%s", bpf_log_buf);
228 return 1;
229 }
230
231 if (!prog_fd[0]) {
232 printf("load_bpf_file: %s\n", strerror(errno));
233 return 1;
234 }
235
236 signal(SIGINT, int_exit);
237
238 while (min_port <= max_port) {
239 vip.dport = htons(min_port++);
240 if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
241 perror("bpf_update_elem(&vip2tnl)");
242 return 1;
243 }
244 }
245
246 if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) {
247 printf("link set xdp fd failed\n");
248 return 1;
249 }
250
251 poll_stats(kill_after_s);
252
253 set_link_xdp_fd(ifindex, -1);
254
255 return 0;
256}