diff options
author | Martin KaFai Lau <kafai@fb.com> | 2016-12-07 18:53:14 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-12-08 14:25:13 -0500 |
commit | 12d8bb64e3f65f5287ff17c084d076a28daa8096 (patch) | |
tree | e772f55d041956a3e07a487b48b42df8a0b830bf | |
parent | ea3349a03519dcd4f32d949cd80ab995623dc5ac (diff) |
bpf: xdp: Add XDP example for head adjustment
The XDP prog checks if the incoming packet matches any VIP:PORT
combination in the BPF hashmap. If it is, it will encapsulate
the packet with a IPv4/v6 header as instructed by the value of
the BPF hashmap and then XDP_TX it out.
The VIP:PORT -> IP-Encap-Info can be specified by the cmd args
of the user prog.
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | samples/bpf/Makefile | 4 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 2 | ||||
-rw-r--r-- | samples/bpf/bpf_load.c | 94 | ||||
-rw-r--r-- | samples/bpf/bpf_load.h | 1 | ||||
-rw-r--r-- | samples/bpf/xdp1_user.c | 93 | ||||
-rw-r--r-- | samples/bpf/xdp_tx_iptunnel_common.h | 37 | ||||
-rw-r--r-- | samples/bpf/xdp_tx_iptunnel_kern.c | 236 | ||||
-rw-r--r-- | samples/bpf/xdp_tx_iptunnel_user.c | 256 |
8 files changed, 630 insertions, 93 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 00cd3081c038..f2219c1489e5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
@@ -33,6 +33,7 @@ hostprogs-y += trace_event | |||
33 | hostprogs-y += sampleip | 33 | hostprogs-y += sampleip |
34 | hostprogs-y += tc_l2_redirect | 34 | hostprogs-y += tc_l2_redirect |
35 | hostprogs-y += lwt_len_hist | 35 | hostprogs-y += lwt_len_hist |
36 | hostprogs-y += xdp_tx_iptunnel | ||
36 | 37 | ||
37 | test_lru_dist-objs := test_lru_dist.o libbpf.o | 38 | test_lru_dist-objs := test_lru_dist.o libbpf.o |
38 | sock_example-objs := sock_example.o libbpf.o | 39 | sock_example-objs := sock_example.o libbpf.o |
@@ -67,6 +68,7 @@ trace_event-objs := bpf_load.o libbpf.o trace_event_user.o | |||
67 | sampleip-objs := bpf_load.o libbpf.o sampleip_user.o | 68 | sampleip-objs := bpf_load.o libbpf.o sampleip_user.o |
68 | tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o | 69 | tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o |
69 | lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o | 70 | lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o |
71 | xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o | ||
70 | 72 | ||
71 | # Tell kbuild to always build the programs | 73 | # Tell kbuild to always build the programs |
72 | always := $(hostprogs-y) | 74 | always := $(hostprogs-y) |
@@ -99,6 +101,7 @@ always += test_current_task_under_cgroup_kern.o | |||
99 | always += trace_event_kern.o | 101 | always += trace_event_kern.o |
100 | always += sampleip_kern.o | 102 | always += sampleip_kern.o |
101 | always += lwt_len_hist_kern.o | 103 | always += lwt_len_hist_kern.o |
104 | always += xdp_tx_iptunnel_kern.o | ||
102 | 105 | ||
103 | HOSTCFLAGS += -I$(objtree)/usr/include | 106 | HOSTCFLAGS += -I$(objtree)/usr/include |
104 | HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ | 107 | HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ |
@@ -129,6 +132,7 @@ HOSTLOADLIBES_trace_event += -lelf | |||
129 | HOSTLOADLIBES_sampleip += -lelf | 132 | HOSTLOADLIBES_sampleip += -lelf |
130 | HOSTLOADLIBES_tc_l2_redirect += -l elf | 133 | HOSTLOADLIBES_tc_l2_redirect += -l elf |
131 | HOSTLOADLIBES_lwt_len_hist += -l elf | 134 | HOSTLOADLIBES_lwt_len_hist += -l elf |
135 | HOSTLOADLIBES_xdp_tx_iptunnel += -lelf | ||
132 | 136 | ||
133 | # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: | 137 | # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: |
134 | # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang | 138 | # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang |
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 8370a6e3839d..faaffe2e139a 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h | |||
@@ -57,6 +57,8 @@ static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = | |||
57 | (void *) BPF_FUNC_skb_set_tunnel_opt; | 57 | (void *) BPF_FUNC_skb_set_tunnel_opt; |
58 | static unsigned long long (*bpf_get_prandom_u32)(void) = | 58 | static unsigned long long (*bpf_get_prandom_u32)(void) = |
59 | (void *) BPF_FUNC_get_prandom_u32; | 59 | (void *) BPF_FUNC_get_prandom_u32; |
60 | static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = | ||
61 | (void *) BPF_FUNC_xdp_adjust_head; | ||
60 | 62 | ||
61 | /* llvm builtin functions that eBPF C program may use to | 63 | /* llvm builtin functions that eBPF C program may use to |
62 | * emit BPF_LD_ABS and BPF_LD_IND instructions | 64 | * emit BPF_LD_ABS and BPF_LD_IND instructions |
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 49b45ccbe153..e30b6de94f2e 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c | |||
@@ -12,6 +12,10 @@ | |||
12 | #include <linux/bpf.h> | 12 | #include <linux/bpf.h> |
13 | #include <linux/filter.h> | 13 | #include <linux/filter.h> |
14 | #include <linux/perf_event.h> | 14 | #include <linux/perf_event.h> |
15 | #include <linux/netlink.h> | ||
16 | #include <linux/rtnetlink.h> | ||
17 | #include <sys/types.h> | ||
18 | #include <sys/socket.h> | ||
15 | #include <sys/syscall.h> | 19 | #include <sys/syscall.h> |
16 | #include <sys/ioctl.h> | 20 | #include <sys/ioctl.h> |
17 | #include <sys/mman.h> | 21 | #include <sys/mman.h> |
@@ -450,3 +454,93 @@ struct ksym *ksym_search(long key) | |||
450 | /* out of range. return _stext */ | 454 | /* out of range. return _stext */ |
451 | return &syms[0]; | 455 | return &syms[0]; |
452 | } | 456 | } |
457 | |||
458 | int set_link_xdp_fd(int ifindex, int fd) | ||
459 | { | ||
460 | struct sockaddr_nl sa; | ||
461 | int sock, seq = 0, len, ret = -1; | ||
462 | char buf[4096]; | ||
463 | struct nlattr *nla, *nla_xdp; | ||
464 | struct { | ||
465 | struct nlmsghdr nh; | ||
466 | struct ifinfomsg ifinfo; | ||
467 | char attrbuf[64]; | ||
468 | } req; | ||
469 | struct nlmsghdr *nh; | ||
470 | struct nlmsgerr *err; | ||
471 | |||
472 | memset(&sa, 0, sizeof(sa)); | ||
473 | sa.nl_family = AF_NETLINK; | ||
474 | |||
475 | sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); | ||
476 | if (sock < 0) { | ||
477 | printf("open netlink socket: %s\n", strerror(errno)); | ||
478 | return -1; | ||
479 | } | ||
480 | |||
481 | if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { | ||
482 | printf("bind to netlink: %s\n", strerror(errno)); | ||
483 | goto cleanup; | ||
484 | } | ||
485 | |||
486 | memset(&req, 0, sizeof(req)); | ||
487 | req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); | ||
488 | req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; | ||
489 | req.nh.nlmsg_type = RTM_SETLINK; | ||
490 | req.nh.nlmsg_pid = 0; | ||
491 | req.nh.nlmsg_seq = ++seq; | ||
492 | req.ifinfo.ifi_family = AF_UNSPEC; | ||
493 | req.ifinfo.ifi_index = ifindex; | ||
494 | nla = (struct nlattr *)(((char *)&req) | ||
495 | + NLMSG_ALIGN(req.nh.nlmsg_len)); | ||
496 | nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; | ||
497 | |||
498 | nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); | ||
499 | nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; | ||
500 | nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); | ||
501 | memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); | ||
502 | nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; | ||
503 | |||
504 | req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); | ||
505 | |||
506 | if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { | ||
507 | printf("send to netlink: %s\n", strerror(errno)); | ||
508 | goto cleanup; | ||
509 | } | ||
510 | |||
511 | len = recv(sock, buf, sizeof(buf), 0); | ||
512 | if (len < 0) { | ||
513 | printf("recv from netlink: %s\n", strerror(errno)); | ||
514 | goto cleanup; | ||
515 | } | ||
516 | |||
517 | for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); | ||
518 | nh = NLMSG_NEXT(nh, len)) { | ||
519 | if (nh->nlmsg_pid != getpid()) { | ||
520 | printf("Wrong pid %d, expected %d\n", | ||
521 | nh->nlmsg_pid, getpid()); | ||
522 | goto cleanup; | ||
523 | } | ||
524 | if (nh->nlmsg_seq != seq) { | ||
525 | printf("Wrong seq %d, expected %d\n", | ||
526 | nh->nlmsg_seq, seq); | ||
527 | goto cleanup; | ||
528 | } | ||
529 | switch (nh->nlmsg_type) { | ||
530 | case NLMSG_ERROR: | ||
531 | err = (struct nlmsgerr *)NLMSG_DATA(nh); | ||
532 | if (!err->error) | ||
533 | continue; | ||
534 | printf("nlmsg error %s\n", strerror(-err->error)); | ||
535 | goto cleanup; | ||
536 | case NLMSG_DONE: | ||
537 | break; | ||
538 | } | ||
539 | } | ||
540 | |||
541 | ret = 0; | ||
542 | |||
543 | cleanup: | ||
544 | close(sock); | ||
545 | return ret; | ||
546 | } | ||
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 4adeeef53ad6..fb46a421ab41 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h | |||
@@ -31,4 +31,5 @@ struct ksym { | |||
31 | 31 | ||
32 | int load_kallsyms(void); | 32 | int load_kallsyms(void); |
33 | struct ksym *ksym_search(long key); | 33 | struct ksym *ksym_search(long key); |
34 | int set_link_xdp_fd(int ifindex, int fd); | ||
34 | #endif | 35 | #endif |
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 2b2150d6d6f7..5f040a0d7712 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c | |||
@@ -5,111 +5,18 @@ | |||
5 | * License as published by the Free Software Foundation. | 5 | * License as published by the Free Software Foundation. |
6 | */ | 6 | */ |
7 | #include <linux/bpf.h> | 7 | #include <linux/bpf.h> |
8 | #include <linux/netlink.h> | ||
9 | #include <linux/rtnetlink.h> | ||
10 | #include <assert.h> | 8 | #include <assert.h> |
11 | #include <errno.h> | 9 | #include <errno.h> |
12 | #include <signal.h> | 10 | #include <signal.h> |
13 | #include <stdio.h> | 11 | #include <stdio.h> |
14 | #include <stdlib.h> | 12 | #include <stdlib.h> |
15 | #include <string.h> | 13 | #include <string.h> |
16 | #include <sys/socket.h> | ||
17 | #include <unistd.h> | 14 | #include <unistd.h> |
18 | 15 | ||
19 | #include "bpf_load.h" | 16 | #include "bpf_load.h" |
20 | #include "bpf_util.h" | 17 | #include "bpf_util.h" |
21 | #include "libbpf.h" | 18 | #include "libbpf.h" |
22 | 19 | ||
23 | static int set_link_xdp_fd(int ifindex, int fd) | ||
24 | { | ||
25 | struct sockaddr_nl sa; | ||
26 | int sock, seq = 0, len, ret = -1; | ||
27 | char buf[4096]; | ||
28 | struct nlattr *nla, *nla_xdp; | ||
29 | struct { | ||
30 | struct nlmsghdr nh; | ||
31 | struct ifinfomsg ifinfo; | ||
32 | char attrbuf[64]; | ||
33 | } req; | ||
34 | struct nlmsghdr *nh; | ||
35 | struct nlmsgerr *err; | ||
36 | |||
37 | memset(&sa, 0, sizeof(sa)); | ||
38 | sa.nl_family = AF_NETLINK; | ||
39 | |||
40 | sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); | ||
41 | if (sock < 0) { | ||
42 | printf("open netlink socket: %s\n", strerror(errno)); | ||
43 | return -1; | ||
44 | } | ||
45 | |||
46 | if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { | ||
47 | printf("bind to netlink: %s\n", strerror(errno)); | ||
48 | goto cleanup; | ||
49 | } | ||
50 | |||
51 | memset(&req, 0, sizeof(req)); | ||
52 | req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); | ||
53 | req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; | ||
54 | req.nh.nlmsg_type = RTM_SETLINK; | ||
55 | req.nh.nlmsg_pid = 0; | ||
56 | req.nh.nlmsg_seq = ++seq; | ||
57 | req.ifinfo.ifi_family = AF_UNSPEC; | ||
58 | req.ifinfo.ifi_index = ifindex; | ||
59 | nla = (struct nlattr *)(((char *)&req) | ||
60 | + NLMSG_ALIGN(req.nh.nlmsg_len)); | ||
61 | nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; | ||
62 | |||
63 | nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); | ||
64 | nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; | ||
65 | nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); | ||
66 | memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); | ||
67 | nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; | ||
68 | |||
69 | req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); | ||
70 | |||
71 | if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { | ||
72 | printf("send to netlink: %s\n", strerror(errno)); | ||
73 | goto cleanup; | ||
74 | } | ||
75 | |||
76 | len = recv(sock, buf, sizeof(buf), 0); | ||
77 | if (len < 0) { | ||
78 | printf("recv from netlink: %s\n", strerror(errno)); | ||
79 | goto cleanup; | ||
80 | } | ||
81 | |||
82 | for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); | ||
83 | nh = NLMSG_NEXT(nh, len)) { | ||
84 | if (nh->nlmsg_pid != getpid()) { | ||
85 | printf("Wrong pid %d, expected %d\n", | ||
86 | nh->nlmsg_pid, getpid()); | ||
87 | goto cleanup; | ||
88 | } | ||
89 | if (nh->nlmsg_seq != seq) { | ||
90 | printf("Wrong seq %d, expected %d\n", | ||
91 | nh->nlmsg_seq, seq); | ||
92 | goto cleanup; | ||
93 | } | ||
94 | switch (nh->nlmsg_type) { | ||
95 | case NLMSG_ERROR: | ||
96 | err = (struct nlmsgerr *)NLMSG_DATA(nh); | ||
97 | if (!err->error) | ||
98 | continue; | ||
99 | printf("nlmsg error %s\n", strerror(-err->error)); | ||
100 | goto cleanup; | ||
101 | case NLMSG_DONE: | ||
102 | break; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | ret = 0; | ||
107 | |||
108 | cleanup: | ||
109 | close(sock); | ||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | static int ifindex; | 20 | static int ifindex; |
114 | 21 | ||
115 | static void int_exit(int sig) | 22 | static void int_exit(int sig) |
diff --git a/samples/bpf/xdp_tx_iptunnel_common.h b/samples/bpf/xdp_tx_iptunnel_common.h new file mode 100644 index 000000000000..dd12cc35110f --- /dev/null +++ b/samples/bpf/xdp_tx_iptunnel_common.h | |||
@@ -0,0 +1,37 @@ | |||
1 | /* Copyright (c) 2016 Facebook | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H | ||
8 | #define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | |||
12 | #define MAX_IPTNL_ENTRIES 256U | ||
13 | |||
14 | struct vip { | ||
15 | union { | ||
16 | __u32 v6[4]; | ||
17 | __u32 v4; | ||
18 | } daddr; | ||
19 | __u16 dport; | ||
20 | __u16 family; | ||
21 | __u8 protocol; | ||
22 | }; | ||
23 | |||
24 | struct iptnl_info { | ||
25 | union { | ||
26 | __u32 v6[4]; | ||
27 | __u32 v4; | ||
28 | } saddr; | ||
29 | union { | ||
30 | __u32 v6[4]; | ||
31 | __u32 v4; | ||
32 | } daddr; | ||
33 | __u16 family; | ||
34 | __u8 dmac[6]; | ||
35 | }; | ||
36 | |||
37 | #endif | ||
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c new file mode 100644 index 000000000000..85c38ecd3a2d --- /dev/null +++ b/samples/bpf/xdp_tx_iptunnel_kern.c | |||
@@ -0,0 +1,236 @@ | |||
1 | /* Copyright (c) 2016 Facebook | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | * | ||
7 | * This program shows how to use bpf_xdp_adjust_head() by | ||
8 | * encapsulating the incoming packet in an IPv4/v6 header | ||
9 | * and then XDP_TX it out. | ||
10 | */ | ||
11 | #include <uapi/linux/bpf.h> | ||
12 | #include <linux/in.h> | ||
13 | #include <linux/if_ether.h> | ||
14 | #include <linux/if_packet.h> | ||
15 | #include <linux/if_vlan.h> | ||
16 | #include <linux/ip.h> | ||
17 | #include <linux/ipv6.h> | ||
18 | #include "bpf_helpers.h" | ||
19 | #include "xdp_tx_iptunnel_common.h" | ||
20 | |||
21 | struct bpf_map_def SEC("maps") rxcnt = { | ||
22 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | ||
23 | .key_size = sizeof(__u32), | ||
24 | .value_size = sizeof(__u64), | ||
25 | .max_entries = 256, | ||
26 | }; | ||
27 | |||
28 | struct bpf_map_def SEC("maps") vip2tnl = { | ||
29 | .type = BPF_MAP_TYPE_HASH, | ||
30 | .key_size = sizeof(struct vip), | ||
31 | .value_size = sizeof(struct iptnl_info), | ||
32 | .max_entries = MAX_IPTNL_ENTRIES, | ||
33 | }; | ||
34 | |||
35 | static __always_inline void count_tx(u32 protocol) | ||
36 | { | ||
37 | u64 *rxcnt_count; | ||
38 | |||
39 | rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); | ||
40 | if (rxcnt_count) | ||
41 | *rxcnt_count += 1; | ||
42 | } | ||
43 | |||
44 | static __always_inline int get_dport(void *trans_data, void *data_end, | ||
45 | u8 protocol) | ||
46 | { | ||
47 | struct tcphdr *th; | ||
48 | struct udphdr *uh; | ||
49 | |||
50 | switch (protocol) { | ||
51 | case IPPROTO_TCP: | ||
52 | th = (struct tcphdr *)trans_data; | ||
53 | if (th + 1 > data_end) | ||
54 | return -1; | ||
55 | return th->dest; | ||
56 | case IPPROTO_UDP: | ||
57 | uh = (struct udphdr *)trans_data; | ||
58 | if (uh + 1 > data_end) | ||
59 | return -1; | ||
60 | return uh->dest; | ||
61 | default: | ||
62 | return 0; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | static __always_inline void set_ethhdr(struct ethhdr *new_eth, | ||
67 | const struct ethhdr *old_eth, | ||
68 | const struct iptnl_info *tnl, | ||
69 | __be16 h_proto) | ||
70 | { | ||
71 | memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); | ||
72 | memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); | ||
73 | new_eth->h_proto = h_proto; | ||
74 | } | ||
75 | |||
76 | static __always_inline int handle_ipv4(struct xdp_md *xdp) | ||
77 | { | ||
78 | void *data_end = (void *)(long)xdp->data_end; | ||
79 | void *data = (void *)(long)xdp->data; | ||
80 | struct iptnl_info *tnl; | ||
81 | struct ethhdr *new_eth; | ||
82 | struct ethhdr *old_eth; | ||
83 | struct iphdr *iph = data + sizeof(struct ethhdr); | ||
84 | u16 *next_iph_u16; | ||
85 | u16 payload_len; | ||
86 | struct vip vip = {}; | ||
87 | int dport; | ||
88 | u32 csum = 0; | ||
89 | int i; | ||
90 | |||
91 | if (iph + 1 > data_end) | ||
92 | return XDP_DROP; | ||
93 | |||
94 | dport = get_dport(iph + 1, data_end, iph->protocol); | ||
95 | if (dport == -1) | ||
96 | return XDP_DROP; | ||
97 | |||
98 | vip.protocol = iph->protocol; | ||
99 | vip.family = AF_INET; | ||
100 | vip.daddr.v4 = iph->daddr; | ||
101 | vip.dport = dport; | ||
102 | payload_len = ntohs(iph->tot_len); | ||
103 | |||
104 | tnl = bpf_map_lookup_elem(&vip2tnl, &vip); | ||
105 | /* It only does v4-in-v4 */ | ||
106 | if (!tnl || tnl->family != AF_INET) | ||
107 | return XDP_PASS; | ||
108 | |||
109 | /* The vip key is found. Add an IP header and send it out */ | ||
110 | |||
111 | if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) | ||
112 | return XDP_DROP; | ||
113 | |||
114 | data = (void *)(long)xdp->data; | ||
115 | data_end = (void *)(long)xdp->data_end; | ||
116 | |||
117 | new_eth = data; | ||
118 | iph = data + sizeof(*new_eth); | ||
119 | old_eth = data + sizeof(*iph); | ||
120 | |||
121 | if (new_eth + 1 > data_end || | ||
122 | old_eth + 1 > data_end || | ||
123 | iph + 1 > data_end) | ||
124 | return XDP_DROP; | ||
125 | |||
126 | set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP)); | ||
127 | |||
128 | iph->version = 4; | ||
129 | iph->ihl = sizeof(*iph) >> 2; | ||
130 | iph->frag_off = 0; | ||
131 | iph->protocol = IPPROTO_IPIP; | ||
132 | iph->check = 0; | ||
133 | iph->tos = 0; | ||
134 | iph->tot_len = htons(payload_len + sizeof(*iph)); | ||
135 | iph->daddr = tnl->daddr.v4; | ||
136 | iph->saddr = tnl->saddr.v4; | ||
137 | iph->ttl = 8; | ||
138 | |||
139 | next_iph_u16 = (u16 *)iph; | ||
140 | #pragma clang loop unroll(full) | ||
141 | for (i = 0; i < sizeof(*iph) >> 1; i++) | ||
142 | csum += *next_iph_u16++; | ||
143 | |||
144 | iph->check = ~((csum & 0xffff) + (csum >> 16)); | ||
145 | |||
146 | count_tx(vip.protocol); | ||
147 | |||
148 | return XDP_TX; | ||
149 | } | ||
150 | |||
151 | static __always_inline int handle_ipv6(struct xdp_md *xdp) | ||
152 | { | ||
153 | void *data_end = (void *)(long)xdp->data_end; | ||
154 | void *data = (void *)(long)xdp->data; | ||
155 | struct iptnl_info *tnl; | ||
156 | struct ethhdr *new_eth; | ||
157 | struct ethhdr *old_eth; | ||
158 | struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); | ||
159 | __u16 payload_len; | ||
160 | struct vip vip = {}; | ||
161 | int dport; | ||
162 | |||
163 | if (ip6h + 1 > data_end) | ||
164 | return XDP_DROP; | ||
165 | |||
166 | dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr); | ||
167 | if (dport == -1) | ||
168 | return XDP_DROP; | ||
169 | |||
170 | vip.protocol = ip6h->nexthdr; | ||
171 | vip.family = AF_INET6; | ||
172 | memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); | ||
173 | vip.dport = dport; | ||
174 | payload_len = ip6h->payload_len; | ||
175 | |||
176 | tnl = bpf_map_lookup_elem(&vip2tnl, &vip); | ||
177 | /* It only does v6-in-v6 */ | ||
178 | if (!tnl || tnl->family != AF_INET6) | ||
179 | return XDP_PASS; | ||
180 | |||
181 | /* The vip key is found. Add an IP header and send it out */ | ||
182 | |||
183 | if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) | ||
184 | return XDP_DROP; | ||
185 | |||
186 | data = (void *)(long)xdp->data; | ||
187 | data_end = (void *)(long)xdp->data_end; | ||
188 | |||
189 | new_eth = data; | ||
190 | ip6h = data + sizeof(*new_eth); | ||
191 | old_eth = data + sizeof(*ip6h); | ||
192 | |||
193 | if (new_eth + 1 > data_end || | ||
194 | old_eth + 1 > data_end || | ||
195 | ip6h + 1 > data_end) | ||
196 | return XDP_DROP; | ||
197 | |||
198 | set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6)); | ||
199 | |||
200 | ip6h->version = 6; | ||
201 | ip6h->priority = 0; | ||
202 | memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); | ||
203 | ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h)); | ||
204 | ip6h->nexthdr = IPPROTO_IPV6; | ||
205 | ip6h->hop_limit = 8; | ||
206 | memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); | ||
207 | memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); | ||
208 | |||
209 | count_tx(vip.protocol); | ||
210 | |||
211 | return XDP_TX; | ||
212 | } | ||
213 | |||
214 | SEC("xdp_tx_iptunnel") | ||
215 | int _xdp_tx_iptunnel(struct xdp_md *xdp) | ||
216 | { | ||
217 | void *data_end = (void *)(long)xdp->data_end; | ||
218 | void *data = (void *)(long)xdp->data; | ||
219 | struct ethhdr *eth = data; | ||
220 | __u16 h_proto; | ||
221 | |||
222 | if (eth + 1 > data_end) | ||
223 | return XDP_DROP; | ||
224 | |||
225 | h_proto = eth->h_proto; | ||
226 | |||
227 | if (h_proto == htons(ETH_P_IP)) | ||
228 | return handle_ipv4(xdp); | ||
229 | else if (h_proto == htons(ETH_P_IPV6)) | ||
230 | |||
231 | return handle_ipv6(xdp); | ||
232 | else | ||
233 | return XDP_PASS; | ||
234 | } | ||
235 | |||
236 | char _license[] SEC("license") = "GPL"; | ||
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c new file mode 100644 index 000000000000..7a71f5c74684 --- /dev/null +++ b/samples/bpf/xdp_tx_iptunnel_user.c | |||
@@ -0,0 +1,256 @@ | |||
1 | /* Copyright (c) 2016 Facebook | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #include <linux/bpf.h> | ||
8 | #include <assert.h> | ||
9 | #include <errno.h> | ||
10 | #include <signal.h> | ||
11 | #include <stdio.h> | ||
12 | #include <stdlib.h> | ||
13 | #include <string.h> | ||
14 | #include <sys/resource.h> | ||
15 | #include <arpa/inet.h> | ||
16 | #include <netinet/ether.h> | ||
17 | #include <unistd.h> | ||
18 | #include <time.h> | ||
19 | #include "bpf_load.h" | ||
20 | #include "libbpf.h" | ||
21 | #include "bpf_util.h" | ||
22 | #include "xdp_tx_iptunnel_common.h" | ||
23 | |||
24 | #define STATS_INTERVAL_S 2U | ||
25 | |||
26 | static int ifindex = -1; | ||
27 | |||
28 | static void int_exit(int sig) | ||
29 | { | ||
30 | if (ifindex > -1) | ||
31 | set_link_xdp_fd(ifindex, -1); | ||
32 | exit(0); | ||
33 | } | ||
34 | |||
35 | /* simple per-protocol drop counter | ||
36 | */ | ||
37 | static void poll_stats(unsigned int kill_after_s) | ||
38 | { | ||
39 | const unsigned int nr_protos = 256; | ||
40 | unsigned int nr_cpus = bpf_num_possible_cpus(); | ||
41 | time_t started_at = time(NULL); | ||
42 | __u64 values[nr_cpus], prev[nr_protos][nr_cpus]; | ||
43 | __u32 proto; | ||
44 | int i; | ||
45 | |||
46 | memset(prev, 0, sizeof(prev)); | ||
47 | |||
48 | while (!kill_after_s || time(NULL) - started_at <= kill_after_s) { | ||
49 | sleep(STATS_INTERVAL_S); | ||
50 | |||
51 | for (proto = 0; proto < nr_protos; proto++) { | ||
52 | __u64 sum = 0; | ||
53 | |||
54 | assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0); | ||
55 | for (i = 0; i < nr_cpus; i++) | ||
56 | sum += (values[i] - prev[proto][i]); | ||
57 | |||
58 | if (sum) | ||
59 | printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n", | ||
60 | proto, sum, sum / STATS_INTERVAL_S); | ||
61 | memcpy(prev[proto], values, sizeof(values)); | ||
62 | } | ||
63 | } | ||
64 | } | ||
65 | |||
66 | static void usage(const char *cmd) | ||
67 | { | ||
68 | printf("Start a XDP prog which encapsulates incoming packets\n" | ||
69 | "in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n" | ||
70 | "is used to select packets to encapsulate\n\n"); | ||
71 | printf("Usage: %s [...]\n", cmd); | ||
72 | printf(" -i <ifindex> Interface Index\n"); | ||
73 | printf(" -a <vip-service-address> IPv4 or IPv6\n"); | ||
74 | printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n"); | ||
75 | printf(" -s <source-ip> Used in the IPTunnel header\n"); | ||
76 | printf(" -d <dest-ip> Used in the IPTunnel header\n"); | ||
77 | printf(" -m <dest-MAC> Used in sending the IP Tunneled pkt\n"); | ||
78 | printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n"); | ||
79 | printf(" -P <IP-Protocol> Default is TCP\n"); | ||
80 | printf(" -h Display this help\n"); | ||
81 | } | ||
82 | |||
83 | static int parse_ipstr(const char *ipstr, unsigned int *addr) | ||
84 | { | ||
85 | if (inet_pton(AF_INET6, ipstr, addr) == 1) { | ||
86 | return AF_INET6; | ||
87 | } else if (inet_pton(AF_INET, ipstr, addr) == 1) { | ||
88 | addr[1] = addr[2] = addr[3] = 0; | ||
89 | return AF_INET; | ||
90 | } | ||
91 | |||
92 | fprintf(stderr, "%s is an invalid IP\n", ipstr); | ||
93 | return AF_UNSPEC; | ||
94 | } | ||
95 | |||
96 | static int parse_ports(const char *port_str, int *min_port, int *max_port) | ||
97 | { | ||
98 | char *end; | ||
99 | long tmp_min_port; | ||
100 | long tmp_max_port; | ||
101 | |||
102 | tmp_min_port = strtol(optarg, &end, 10); | ||
103 | if (tmp_min_port < 1 || tmp_min_port > 65535) { | ||
104 | fprintf(stderr, "Invalid port(s):%s\n", optarg); | ||
105 | return 1; | ||
106 | } | ||
107 | |||
108 | if (*end == '-') { | ||
109 | end++; | ||
110 | tmp_max_port = strtol(end, NULL, 10); | ||
111 | if (tmp_max_port < 1 || tmp_max_port > 65535) { | ||
112 | fprintf(stderr, "Invalid port(s):%s\n", optarg); | ||
113 | return 1; | ||
114 | } | ||
115 | } else { | ||
116 | tmp_max_port = tmp_min_port; | ||
117 | } | ||
118 | |||
119 | if (tmp_min_port > tmp_max_port) { | ||
120 | fprintf(stderr, "Invalid port(s):%s\n", optarg); | ||
121 | return 1; | ||
122 | } | ||
123 | |||
124 | if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) { | ||
125 | fprintf(stderr, "Port range (%s) is larger than %u\n", | ||
126 | port_str, MAX_IPTNL_ENTRIES); | ||
127 | return 1; | ||
128 | } | ||
129 | *min_port = tmp_min_port; | ||
130 | *max_port = tmp_max_port; | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | int main(int argc, char **argv) | ||
136 | { | ||
137 | unsigned char opt_flags[256] = {}; | ||
138 | unsigned int kill_after_s = 0; | ||
139 | const char *optstr = "i:a:p:s:d:m:T:P:h"; | ||
140 | int min_port = 0, max_port = 0; | ||
141 | struct iptnl_info tnl = {}; | ||
142 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | ||
143 | struct vip vip = {}; | ||
144 | char filename[256]; | ||
145 | int opt; | ||
146 | int i; | ||
147 | |||
148 | tnl.family = AF_UNSPEC; | ||
149 | vip.protocol = IPPROTO_TCP; | ||
150 | |||
151 | for (i = 0; i < strlen(optstr); i++) | ||
152 | if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') | ||
153 | opt_flags[(unsigned char)optstr[i]] = 1; | ||
154 | |||
155 | while ((opt = getopt(argc, argv, optstr)) != -1) { | ||
156 | unsigned short family; | ||
157 | unsigned int *v6; | ||
158 | |||
159 | switch (opt) { | ||
160 | case 'i': | ||
161 | ifindex = atoi(optarg); | ||
162 | break; | ||
163 | case 'a': | ||
164 | vip.family = parse_ipstr(optarg, vip.daddr.v6); | ||
165 | if (vip.family == AF_UNSPEC) | ||
166 | return 1; | ||
167 | break; | ||
168 | case 'p': | ||
169 | if (parse_ports(optarg, &min_port, &max_port)) | ||
170 | return 1; | ||
171 | break; | ||
172 | case 'P': | ||
173 | vip.protocol = atoi(optarg); | ||
174 | break; | ||
175 | case 's': | ||
176 | case 'd': | ||
177 | if (opt == 's') | ||
178 | v6 = tnl.saddr.v6; | ||
179 | else | ||
180 | v6 = tnl.daddr.v6; | ||
181 | |||
182 | family = parse_ipstr(optarg, v6); | ||
183 | if (family == AF_UNSPEC) | ||
184 | return 1; | ||
185 | if (tnl.family == AF_UNSPEC) { | ||
186 | tnl.family = family; | ||
187 | } else if (tnl.family != family) { | ||
188 | fprintf(stderr, | ||
189 | "The IP version of the src and dst addresses used in the IP encapsulation does not match\n"); | ||
190 | return 1; | ||
191 | } | ||
192 | break; | ||
193 | case 'm': | ||
194 | if (!ether_aton_r(optarg, | ||
195 | (struct ether_addr *)tnl.dmac)) { | ||
196 | fprintf(stderr, "Invalid mac address:%s\n", | ||
197 | optarg); | ||
198 | return 1; | ||
199 | } | ||
200 | break; | ||
201 | case 'T': | ||
202 | kill_after_s = atoi(optarg); | ||
203 | break; | ||
204 | default: | ||
205 | usage(argv[0]); | ||
206 | return 1; | ||
207 | } | ||
208 | opt_flags[opt] = 0; | ||
209 | } | ||
210 | |||
211 | for (i = 0; i < strlen(optstr); i++) { | ||
212 | if (opt_flags[(unsigned int)optstr[i]]) { | ||
213 | fprintf(stderr, "Missing argument -%c\n", optstr[i]); | ||
214 | usage(argv[0]); | ||
215 | return 1; | ||
216 | } | ||
217 | } | ||
218 | |||
219 | if (setrlimit(RLIMIT_MEMLOCK, &r)) { | ||
220 | perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); | ||
221 | return 1; | ||
222 | } | ||
223 | |||
224 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | ||
225 | |||
226 | if (load_bpf_file(filename)) { | ||
227 | printf("%s", bpf_log_buf); | ||
228 | return 1; | ||
229 | } | ||
230 | |||
231 | if (!prog_fd[0]) { | ||
232 | printf("load_bpf_file: %s\n", strerror(errno)); | ||
233 | return 1; | ||
234 | } | ||
235 | |||
236 | signal(SIGINT, int_exit); | ||
237 | |||
238 | while (min_port <= max_port) { | ||
239 | vip.dport = htons(min_port++); | ||
240 | if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { | ||
241 | perror("bpf_update_elem(&vip2tnl)"); | ||
242 | return 1; | ||
243 | } | ||
244 | } | ||
245 | |||
246 | if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { | ||
247 | printf("link set xdp fd failed\n"); | ||
248 | return 1; | ||
249 | } | ||
250 | |||
251 | poll_stats(kill_after_s); | ||
252 | |||
253 | set_link_xdp_fd(ifindex, -1); | ||
254 | |||
255 | return 0; | ||
256 | } | ||