aboutsummaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
authorJesper Dangaard Brouer <brouer@redhat.com>2018-01-10 12:21:44 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2018-01-10 19:02:25 -0500
commit36e04a2d78d97cc3a02a168541dfa00c8e4b30f2 (patch)
tree1b55d2ad890c2b3bd8ad31e35706e6fa3b94721b /samples
parent632130ed3bddca3644d58a5cb6e20915f36e4d44 (diff)
samples/bpf: xdp2skb_meta shows transferring info from XDP to SKB
Creating a bpf sample that shows howto use the XDP 'data_meta' infrastructure, created by Daniel Borkmann. Very few drivers support this feature, but I wanted a functional sample to begin with, when working on adding driver support. XDP data_meta is about creating a communication channel between BPF programs. This can be XDP tail-progs, but also other SKB based BPF hooks, like in this case the TC clsact hook. In this sample I show that XDP can store info named "mark", and TC/clsact chooses to use this info and store it into the skb->mark. It is a bit annoying that XDP and TC samples uses different tools/libs when attaching their BPF hooks. As the XDP and TC programs need to cooperate and agree on a struct-layout, it is best/easiest if the two programs can be contained within the same BPF restricted-C file. As the bpf-loader, I choose to not use bpf_load.c (or libbpf), but instead wrote a bash shell scripted named xdp2skb_meta.sh, which demonstrate howto use the iproute cmdline tools 'tc' and 'ip' for loading BPF programs. To make it easy for first time users, the shell script have command line parsing, and support --verbose and --dry-run mode, if you just want to see/learn the tc+ip command syntax: # ./xdp2skb_meta.sh --dev ixgbe2 --dry-run # Dry-run mode: enable VERBOSE and don't call TC+IP tc qdisc del dev ixgbe2 clsact tc qdisc add dev ixgbe2 clsact tc filter add dev ixgbe2 ingress prio 1 handle 1 bpf da obj ./xdp2skb_meta_kern.o sec tc_mark # Flush XDP on device: ixgbe2 ip link set dev ixgbe2 xdp off ip link set dev ixgbe2 xdp obj ./xdp2skb_meta_kern.o sec xdp_mark Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile1
-rwxr-xr-xsamples/bpf/xdp2skb_meta.sh220
-rw-r--r--samples/bpf/xdp2skb_meta_kern.c103
3 files changed, 324 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 3ff7a05bea9a..7f61a3d57fa7 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o
142always += xdp_redirect_cpu_kern.o 142always += xdp_redirect_cpu_kern.o
143always += xdp_monitor_kern.o 143always += xdp_monitor_kern.o
144always += xdp_rxq_info_kern.o 144always += xdp_rxq_info_kern.o
145always += xdp2skb_meta_kern.o
145always += syscall_tp_kern.o 146always += syscall_tp_kern.o
146 147
147HOSTCFLAGS += -I$(objtree)/usr/include 148HOSTCFLAGS += -I$(objtree)/usr/include
diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh
new file mode 100755
index 000000000000..b9c9549c4c27
--- /dev/null
+++ b/samples/bpf/xdp2skb_meta.sh
@@ -0,0 +1,220 @@
1#!/bin/bash
2#
3# SPDX-License-Identifier: GPL-2.0
4# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
5#
6# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load
7# eBPF programs, both for XDP and clsbpf. Shell script function
8# wrappers and even long options parsing is illustrated, for ease of
9# use.
10#
11# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs
12# that need to collaborate between XDP and TC hooks. Thus, it is
13# convenient that the same tool load both programs that need to work
14# together.
15#
16BPF_FILE=xdp2skb_meta_kern.o
17DIR=$(dirname $0)
18
19export TC=/usr/sbin/tc
20export IP=/usr/sbin/ip
21
22function usage() {
23 echo ""
24 echo "Usage: $0 [-vfh] --dev ethX"
25 echo " -d | --dev : Network device (required)"
26 echo " --flush : Cleanup flush TC and XDP progs"
27 echo " --list : (\$LIST) List TC and XDP progs"
28 echo " -v | --verbose : (\$VERBOSE) Verbose"
29 echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)"
30 echo ""
31}
32
33## -- General shell logging cmds --
34function err() {
35 local exitcode=$1
36 shift
37 echo "ERROR: $@" >&2
38 exit $exitcode
39}
40
41function info() {
42 if [[ -n "$VERBOSE" ]]; then
43 echo "# $@"
44 fi
45}
46
47## -- Helper function calls --
48
49# Wrapper call for TC and IP
50# - Will display the offending command on failure
51function _call_cmd() {
52 local cmd="$1"
53 local allow_fail="$2"
54 shift 2
55 if [[ -n "$VERBOSE" ]]; then
56 echo "$(basename $cmd) $@"
57 fi
58 if [[ -n "$DRYRUN" ]]; then
59 return
60 fi
61 $cmd "$@"
62 local status=$?
63 if (( $status != 0 )); then
64 if [[ "$allow_fail" == "" ]]; then
65 err 2 "Exec error($status) occurred cmd: \"$cmd $@\""
66 fi
67 fi
68}
69function call_tc() {
70 _call_cmd "$TC" "" "$@"
71}
72function call_tc_allow_fail() {
73 _call_cmd "$TC" "allow_fail" "$@"
74}
75function call_ip() {
76 _call_cmd "$IP" "" "$@"
77}
78
79## --- Parse command line arguments / parameters ---
80# Using external program "getopt" to get --long-options
81OPTIONS=$(getopt -o vfhd: \
82 --long verbose,flush,help,list,dev:,dry-run -- "$@")
83if (( $? != 0 )); then
84 err 4 "Error calling getopt"
85fi
86eval set -- "$OPTIONS"
87
88unset DEV
89unset FLUSH
90while true; do
91 case "$1" in
92 -d | --dev ) # device
93 DEV=$2
94 info "Device set to: DEV=$DEV" >&2
95 shift 2
96 ;;
97 -v | --verbose)
98 VERBOSE=yes
99 # info "Verbose mode: VERBOSE=$VERBOSE" >&2
100 shift
101 ;;
102 --dry-run )
103 DRYRUN=yes
104 VERBOSE=yes
105 info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2
106 shift
107 ;;
108 -f | --flush )
109 FLUSH=yes
110 shift
111 ;;
112 --list )
113 LIST=yes
114 shift
115 ;;
116 -- )
117 shift
118 break
119 ;;
120 -h | --help )
121 usage;
122 exit 0
123 ;;
124 * )
125 shift
126 break
127 ;;
128 esac
129done
130
131FILE="$DIR/$BPF_FILE"
132if [[ ! -e $FILE ]]; then
133 err 3 "Missing BPF object file ($FILE)"
134fi
135
136if [[ -z $DEV ]]; then
137 usage
138 err 2 "Please specify network device -- required option --dev"
139fi
140
141## -- Function calls --
142
143function list_tc()
144{
145 local device="$1"
146 shift
147 info "Listing current TC ingress rules"
148 call_tc filter show dev $device ingress
149}
150
151function list_xdp()
152{
153 local device="$1"
154 shift
155 info "Listing current XDP device($device) setting"
156 call_ip link show dev $device | grep --color=auto xdp
157}
158
159function flush_tc()
160{
161 local device="$1"
162 shift
163 info "Flush TC on device: $device"
164 call_tc_allow_fail filter del dev $device ingress
165 call_tc_allow_fail qdisc del dev $device clsact
166}
167
168function flush_xdp()
169{
170 local device="$1"
171 shift
172 info "Flush XDP on device: $device"
173 call_ip link set dev $device xdp off
174}
175
176function attach_tc_mark()
177{
178 local device="$1"
179 local file="$2"
180 local prog="tc_mark"
181 shift 2
182
183 # Re-attach clsact to clear/flush existing role
184 call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null
185 call_tc qdisc add dev $device clsact
186
187 # Attach BPF prog
188 call_tc filter add dev $device ingress \
189 prio 1 handle 1 bpf da obj $file sec $prog
190}
191
192function attach_xdp_mark()
193{
194 local device="$1"
195 local file="$2"
196 local prog="xdp_mark"
197 shift 2
198
199 # Remove XDP prog in-case it's already loaded
200 # TODO: Need ip-link option to override/replace existing XDP prog
201 flush_xdp $device
202
203 # Attach XDP/BPF prog
204 call_ip link set dev $device xdp obj $file sec $prog
205}
206
207if [[ -n $FLUSH ]]; then
208 flush_tc $DEV
209 flush_xdp $DEV
210 exit 0
211fi
212
213if [[ -n $LIST ]]; then
214 list_tc $DEV
215 list_xdp $DEV
216 exit 0
217fi
218
219attach_tc_mark $DEV $FILE
220attach_xdp_mark $DEV $FILE
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
new file mode 100644
index 000000000000..12e1024069c2
--- /dev/null
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -0,0 +1,103 @@
1/* SPDX-License-Identifier: GPL-2.0
2 * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
3 *
4 * Example howto transfer info from XDP to SKB, e.g. skb->mark
5 * -----------------------------------------------------------
6 * This uses the XDP data_meta infrastructure, and is a cooperation
7 * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
8 *
9 * Notice: This example does not use the BPF C-loader (bpf_load.c),
10 * but instead rely on the iproute2 TC tool for loading BPF-objects.
11 */
12#include <uapi/linux/bpf.h>
13#include <uapi/linux/pkt_cls.h>
14
15#include "bpf_helpers.h"
16
17/*
18 * This struct is stored in the XDP 'data_meta' area, which is located
19 * just in-front-of the raw packet payload data. The meaning is
20 * specific to these two BPF programs that use it as a communication
21 * channel. XDP adjust/increase the area via a bpf-helper, and TC use
22 * boundary checks to see if data have been provided.
23 *
24 * The struct must be 4 byte aligned, which here is enforced by the
25 * struct __attribute__((aligned(4))).
26 */
27struct meta_info {
28 __u32 mark;
29} __attribute__((aligned(4)));
30
31SEC("xdp_mark")
32int _xdp_mark(struct xdp_md *ctx)
33{
34 struct meta_info *meta;
35 void *data, *data_end;
36 int ret;
37
38 /* Reserve space in-front data pointer for our meta info.
39 * (Notice drivers not supporting data_meta will fail here!)
40 */
41 ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
42 if (ret < 0)
43 return XDP_ABORTED;
44
45 /* For some unknown reason, these ctx pointers must be read
46 * after bpf_xdp_adjust_meta, else verifier will reject prog.
47 */
48 data = (void *)(unsigned long)ctx->data;
49
50 /* Check data_meta have room for meta_info struct */
51 meta = (void *)(unsigned long)ctx->data_meta;
52 if (meta + 1 > data)
53 return XDP_ABORTED;
54
55 meta->mark = 42;
56
57 return XDP_PASS;
58}
59
60SEC("tc_mark")
61int _tc_mark(struct __sk_buff *ctx)
62{
63 void *data = (void *)(unsigned long)ctx->data;
64 void *data_end = (void *)(unsigned long)ctx->data_end;
65 void *data_meta = (void *)(unsigned long)ctx->data_meta;
66 struct meta_info *meta = data_meta;
67
68 /* Check XDP gave us some data_meta */
69 if (meta + 1 > data) {
70 ctx->mark = 41;
71 /* Skip "accept" if no data_meta is avail */
72 return TC_ACT_OK;
73 }
74
75 /* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */
76 ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */
77
78 return TC_ACT_OK;
79}
80
81/* Manually attaching these programs:
82export DEV=ixgbe2
83export FILE=xdp2skb_meta_kern.o
84
85# via TC command
86tc qdisc del dev $DEV clsact 2> /dev/null
87tc qdisc add dev $DEV clsact
88tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark
89tc filter show dev $DEV ingress
90
91# XDP via IP command:
92ip link set dev $DEV xdp off
93ip link set dev $DEV xdp obj $FILE sec xdp_mark
94
95# Use iptable to "see" if SKBs are marked
96iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29
97iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a
98
99# Hint: catch XDP_ABORTED errors via
100perf record -e xdp:*
101perf script
102
103*/