aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-11-12 23:38:08 -0500
committerDavid S. Miller <davem@davemloft.net>2016-11-12 23:38:08 -0500
commit79774d6bfacb40699ecd5a343e5d4ac5a9cdd173 (patch)
tree5732e718ec730c9da67ad4521dca5bfd2068c2d0
parent23dd8315485acae0acf4452509e2be9fc587d72c (diff)
parent90e02896f1a4627b14624245fbcbc19f8fd916cb (diff)
Merge branch 'fix-bpf_redirect'
Martin KaFai Lau says: ==================== bpf: Fix bpf_redirect to an ipip/ip6tnl dev This patch set fixes a bug in bpf_redirect(dev, flags) when dev is an ipip/ip6tnl. The current problem is IP-EthHdr-IP is sent out instead of IP-IP. Patch 1 adds a dev->type test similar to dev_is_mac_header_xmit() in act_mirred.c which is only available in net-next. We can consider to refactor it once this patch is pulled into net-next from net. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h15
-rw-r--r--net/core/dev.c17
-rw-r--r--net/core/filter.c68
-rw-r--r--samples/bpf/Makefile4
-rwxr-xr-xsamples/bpf/tc_l2_redirect.sh173
-rw-r--r--samples/bpf/tc_l2_redirect_kern.c236
-rw-r--r--samples/bpf/tc_l2_redirect_user.c73
7 files changed, 567 insertions, 19 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 91ee3643ccc8..bf04a46f6d5b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
3354bool is_skb_forwardable(const struct net_device *dev, 3354bool is_skb_forwardable(const struct net_device *dev,
3355 const struct sk_buff *skb); 3355 const struct sk_buff *skb);
3356 3356
3357static __always_inline int ____dev_forward_skb(struct net_device *dev,
3358 struct sk_buff *skb)
3359{
3360 if (skb_orphan_frags(skb, GFP_ATOMIC) ||
3361 unlikely(!is_skb_forwardable(dev, skb))) {
3362 atomic_long_inc(&dev->rx_dropped);
3363 kfree_skb(skb);
3364 return NET_RX_DROP;
3365 }
3366
3367 skb_scrub_packet(skb, true);
3368 skb->priority = 0;
3369 return 0;
3370}
3371
3357void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); 3372void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
3358 3373
3359extern int netdev_budget; 3374extern int netdev_budget;
diff --git a/net/core/dev.c b/net/core/dev.c
index eaad4c28069f..6666b28b6815 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
1766 1766
1767int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1767int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1768{ 1768{
1769 if (skb_orphan_frags(skb, GFP_ATOMIC) || 1769 int ret = ____dev_forward_skb(dev, skb);
1770 unlikely(!is_skb_forwardable(dev, skb))) {
1771 atomic_long_inc(&dev->rx_dropped);
1772 kfree_skb(skb);
1773 return NET_RX_DROP;
1774 }
1775 1770
1776 skb_scrub_packet(skb, true); 1771 if (likely(!ret)) {
1777 skb->priority = 0; 1772 skb->protocol = eth_type_trans(skb, dev);
1778 skb->protocol = eth_type_trans(skb, dev); 1773 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1779 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 1774 }
1780 1775
1781 return 0; 1776 return ret;
1782} 1777}
1783EXPORT_SYMBOL_GPL(__dev_forward_skb); 1778EXPORT_SYMBOL_GPL(__dev_forward_skb);
1784 1779
diff --git a/net/core/filter.c b/net/core/filter.c
index 00351cdf7d0c..b391209838ef 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
1628 return dev_forward_skb(dev, skb); 1628 return dev_forward_skb(dev, skb);
1629} 1629}
1630 1630
1631static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
1632 struct sk_buff *skb)
1633{
1634 int ret = ____dev_forward_skb(dev, skb);
1635
1636 if (likely(!ret)) {
1637 skb->dev = dev;
1638 ret = netif_rx(skb);
1639 }
1640
1641 return ret;
1642}
1643
1631static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) 1644static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
1632{ 1645{
1633 int ret; 1646 int ret;
@@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
1647 return ret; 1660 return ret;
1648} 1661}
1649 1662
1663static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
1664 u32 flags)
1665{
1666 /* skb->mac_len is not set on normal egress */
1667 unsigned int mlen = skb->network_header - skb->mac_header;
1668
1669 __skb_pull(skb, mlen);
1670
1671 /* At ingress, the mac header has already been pulled once.
1672 * At egress, skb_pospull_rcsum has to be done in case that
1673 * the skb is originated from ingress (i.e. a forwarded skb)
1674 * to ensure that rcsum starts at net header.
1675 */
1676 if (!skb_at_tc_ingress(skb))
1677 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
1678 skb_pop_mac_header(skb);
1679 skb_reset_mac_len(skb);
1680 return flags & BPF_F_INGRESS ?
1681 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
1682}
1683
1684static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
1685 u32 flags)
1686{
1687 bpf_push_mac_rcsum(skb);
1688 return flags & BPF_F_INGRESS ?
1689 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
1690}
1691
1692static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
1693 u32 flags)
1694{
1695 switch (dev->type) {
1696 case ARPHRD_TUNNEL:
1697 case ARPHRD_TUNNEL6:
1698 case ARPHRD_SIT:
1699 case ARPHRD_IPGRE:
1700 case ARPHRD_VOID:
1701 case ARPHRD_NONE:
1702 return __bpf_redirect_no_mac(skb, dev, flags);
1703 default:
1704 return __bpf_redirect_common(skb, dev, flags);
1705 }
1706}
1707
1650BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) 1708BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
1651{ 1709{
1652 struct net_device *dev; 1710 struct net_device *dev;
@@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
1675 return -ENOMEM; 1733 return -ENOMEM;
1676 } 1734 }
1677 1735
1678 bpf_push_mac_rcsum(clone); 1736 return __bpf_redirect(clone, dev, flags);
1679
1680 return flags & BPF_F_INGRESS ?
1681 __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone);
1682} 1737}
1683 1738
1684static const struct bpf_func_proto bpf_clone_redirect_proto = { 1739static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb)
1722 return -EINVAL; 1777 return -EINVAL;
1723 } 1778 }
1724 1779
1725 bpf_push_mac_rcsum(skb); 1780 return __bpf_redirect(skb, dev, ri->flags);
1726
1727 return ri->flags & BPF_F_INGRESS ?
1728 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
1729} 1781}
1730 1782
1731static const struct bpf_func_proto bpf_redirect_proto = { 1783static const struct bpf_func_proto bpf_redirect_proto = {
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 12b7304d55dc..72c58675973e 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -27,6 +27,7 @@ hostprogs-y += xdp2
27hostprogs-y += test_current_task_under_cgroup 27hostprogs-y += test_current_task_under_cgroup
28hostprogs-y += trace_event 28hostprogs-y += trace_event
29hostprogs-y += sampleip 29hostprogs-y += sampleip
30hostprogs-y += tc_l2_redirect
30 31
31test_verifier-objs := test_verifier.o libbpf.o 32test_verifier-objs := test_verifier.o libbpf.o
32test_maps-objs := test_maps.o libbpf.o 33test_maps-objs := test_maps.o libbpf.o
@@ -56,6 +57,7 @@ test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
56 test_current_task_under_cgroup_user.o 57 test_current_task_under_cgroup_user.o
57trace_event-objs := bpf_load.o libbpf.o trace_event_user.o 58trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
58sampleip-objs := bpf_load.o libbpf.o sampleip_user.o 59sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
60tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
59 61
60# Tell kbuild to always build the programs 62# Tell kbuild to always build the programs
61always := $(hostprogs-y) 63always := $(hostprogs-y)
@@ -72,6 +74,7 @@ always += test_probe_write_user_kern.o
72always += trace_output_kern.o 74always += trace_output_kern.o
73always += tcbpf1_kern.o 75always += tcbpf1_kern.o
74always += tcbpf2_kern.o 76always += tcbpf2_kern.o
77always += tc_l2_redirect_kern.o
75always += lathist_kern.o 78always += lathist_kern.o
76always += offwaketime_kern.o 79always += offwaketime_kern.o
77always += spintest_kern.o 80always += spintest_kern.o
@@ -111,6 +114,7 @@ HOSTLOADLIBES_xdp2 += -lelf
111HOSTLOADLIBES_test_current_task_under_cgroup += -lelf 114HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
112HOSTLOADLIBES_trace_event += -lelf 115HOSTLOADLIBES_trace_event += -lelf
113HOSTLOADLIBES_sampleip += -lelf 116HOSTLOADLIBES_sampleip += -lelf
117HOSTLOADLIBES_tc_l2_redirect += -l elf
114 118
115# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: 119# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
116# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang 120# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh
new file mode 100755
index 000000000000..80a05591a140
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect.sh
@@ -0,0 +1,173 @@
1#!/bin/bash
2
3[[ -z $TC ]] && TC='tc'
4[[ -z $IP ]] && IP='ip'
5
6REDIRECT_USER='./tc_l2_redirect'
7REDIRECT_BPF='./tc_l2_redirect_kern.o'
8
9RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter)
10IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding)
11
12function config_common {
13 local tun_type=$1
14
15 $IP netns add ns1
16 $IP netns add ns2
17 $IP link add ve1 type veth peer name vens1
18 $IP link add ve2 type veth peer name vens2
19 $IP link set dev ve1 up
20 $IP link set dev ve2 up
21 $IP link set dev ve1 mtu 1500
22 $IP link set dev ve2 mtu 1500
23 $IP link set dev vens1 netns ns1
24 $IP link set dev vens2 netns ns2
25
26 $IP -n ns1 link set dev lo up
27 $IP -n ns1 link set dev vens1 up
28 $IP -n ns1 addr add 10.1.1.101/24 dev vens1
29 $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad
30 $IP -n ns1 route add default via 10.1.1.1 dev vens1
31 $IP -n ns1 route add default via 2401:db01::1 dev vens1
32
33 $IP -n ns2 link set dev lo up
34 $IP -n ns2 link set dev vens2 up
35 $IP -n ns2 addr add 10.2.1.102/24 dev vens2
36 $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad
37 $IP -n ns2 addr add 10.10.1.102 dev lo
38 $IP -n ns2 addr add 2401:face::66/64 dev lo nodad
39 $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1
40 $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1
41 $IP -n ns2 link set dev ipt2 up
42 $IP -n ns2 link set dev ip6t2 up
43 $IP netns exec ns2 $TC qdisc add dev vens2 clsact
44 $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip
45 if [[ $tun_type == "ipip" ]]; then
46 $IP -n ns2 route add 10.1.1.0/24 dev ipt2
47 $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
48 $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0
49 else
50 $IP -n ns2 route add 10.1.1.0/24 dev ip6t2
51 $IP -n ns2 route add 2401:db01::/64 dev ip6t2
52 $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
53 $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0
54 fi
55
56 $IP addr add 10.1.1.1/24 dev ve1
57 $IP addr add 2401:db01::1/64 dev ve1 nodad
58 $IP addr add 10.2.1.1/24 dev ve2
59 $IP addr add 2401:db02::1/64 dev ve2 nodad
60
61 $TC qdisc add dev ve2 clsact
62 $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward
63
64 sysctl -q -w net.ipv4.conf.all.rp_filter=0
65 sysctl -q -w net.ipv6.conf.all.forwarding=1
66}
67
68function cleanup {
69 set +e
70 [[ -z $DEBUG ]] || set +x
71 $IP netns delete ns1 >& /dev/null
72 $IP netns delete ns2 >& /dev/null
73 $IP link del ve1 >& /dev/null
74 $IP link del ve2 >& /dev/null
75 $IP link del ipt >& /dev/null
76 $IP link del ip6t >& /dev/null
77 sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER
78 sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING
79 rm -f /sys/fs/bpf/tc/globals/tun_iface
80 [[ -z $DEBUG ]] || set -x
81 set -e
82}
83
84function l2_to_ipip {
85 echo -n "l2_to_ipip $1: "
86
87 local dir=$1
88
89 config_common ipip
90
91 $IP link add ipt type ipip external
92 $IP link set dev ipt up
93 sysctl -q -w net.ipv4.conf.ipt.rp_filter=0
94 sysctl -q -w net.ipv4.conf.ipt.forwarding=1
95
96 if [[ $dir == "egress" ]]; then
97 $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
98 $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
99 sysctl -q -w net.ipv4.conf.ve1.forwarding=1
100 else
101 $TC qdisc add dev ve1 clsact
102 $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
103 fi
104
105 $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex)
106
107 $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
108
109 if [[ $dir == "egress" ]]; then
110 # test direct egress to ve2 (i.e. not forwarding from
111 # ve1 to ve2).
112 ping -c1 10.10.1.102 >& /dev/null
113 fi
114
115 cleanup
116
117 echo "OK"
118}
119
120function l2_to_ip6tnl {
121 echo -n "l2_to_ip6tnl $1: "
122
123 local dir=$1
124
125 config_common ip6tnl
126
127 $IP link add ip6t type ip6tnl mode any external
128 $IP link set dev ip6t up
129 sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0
130 sysctl -q -w net.ipv4.conf.ip6t.forwarding=1
131
132 if [[ $dir == "egress" ]]; then
133 $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
134 $IP route add 2401:face::/64 via 2401:db02::66 dev ve2
135 $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
136 sysctl -q -w net.ipv4.conf.ve1.forwarding=1
137 else
138 $TC qdisc add dev ve1 clsact
139 $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
140 fi
141
142 $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex)
143
144 $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
145 $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null
146
147 if [[ $dir == "egress" ]]; then
148 # test direct egress to ve2 (i.e. not forwarding from
149 # ve1 to ve2).
150 ping -c1 10.10.1.102 >& /dev/null
151 ping -6 -c1 2401:face::66 >& /dev/null
152 fi
153
154 cleanup
155
156 echo "OK"
157}
158
159cleanup
160test_names="l2_to_ipip l2_to_ip6tnl"
161test_dirs="ingress egress"
162if [[ $# -ge 2 ]]; then
163 test_names=$1
164 test_dirs=$2
165elif [[ $# -ge 1 ]]; then
166 test_names=$1
167fi
168
169for t in $test_names; do
170 for d in $test_dirs; do
171 $t $d
172 done
173done
diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c
new file mode 100644
index 000000000000..92a44729dbe4
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect_kern.c
@@ -0,0 +1,236 @@
1/* Copyright (c) 2016 Facebook
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <uapi/linux/bpf.h>
8#include <uapi/linux/if_ether.h>
9#include <uapi/linux/if_packet.h>
10#include <uapi/linux/ip.h>
11#include <uapi/linux/ipv6.h>
12#include <uapi/linux/in.h>
13#include <uapi/linux/tcp.h>
14#include <uapi/linux/filter.h>
15#include <uapi/linux/pkt_cls.h>
16#include <net/ipv6.h>
17#include "bpf_helpers.h"
18
19#define _htonl __builtin_bswap32
20
21#define PIN_GLOBAL_NS 2
22struct bpf_elf_map {
23 __u32 type;
24 __u32 size_key;
25 __u32 size_value;
26 __u32 max_elem;
27 __u32 flags;
28 __u32 id;
29 __u32 pinning;
30};
31
32/* copy of 'struct ethhdr' without __packed */
33struct eth_hdr {
34 unsigned char h_dest[ETH_ALEN];
35 unsigned char h_source[ETH_ALEN];
36 unsigned short h_proto;
37};
38
39struct bpf_elf_map SEC("maps") tun_iface = {
40 .type = BPF_MAP_TYPE_ARRAY,
41 .size_key = sizeof(int),
42 .size_value = sizeof(int),
43 .pinning = PIN_GLOBAL_NS,
44 .max_elem = 1,
45};
46
47static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr)
48{
49 if (eth_proto == htons(ETH_P_IP))
50 return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100);
51 else if (eth_proto == htons(ETH_P_IPV6))
52 return (daddr == _htonl(0x2401face));
53
54 return false;
55}
56
57SEC("l2_to_iptun_ingress_forward")
58int _l2_to_iptun_ingress_forward(struct __sk_buff *skb)
59{
60 struct bpf_tunnel_key tkey = {};
61 void *data = (void *)(long)skb->data;
62 struct eth_hdr *eth = data;
63 void *data_end = (void *)(long)skb->data_end;
64 int key = 0, *ifindex;
65
66 int ret;
67
68 if (data + sizeof(*eth) > data_end)
69 return TC_ACT_OK;
70
71 ifindex = bpf_map_lookup_elem(&tun_iface, &key);
72 if (!ifindex)
73 return TC_ACT_OK;
74
75 if (eth->h_proto == htons(ETH_P_IP)) {
76 char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n";
77 struct iphdr *iph = data + sizeof(*eth);
78
79 if (data + sizeof(*eth) + sizeof(*iph) > data_end)
80 return TC_ACT_OK;
81
82 if (iph->protocol != IPPROTO_IPIP)
83 return TC_ACT_OK;
84
85 bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex,
86 _htonl(iph->daddr));
87 return bpf_redirect(*ifindex, BPF_F_INGRESS);
88 } else if (eth->h_proto == htons(ETH_P_IPV6)) {
89 char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n";
90 struct ipv6hdr *ip6h = data + sizeof(*eth);
91
92 if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
93 return TC_ACT_OK;
94
95 if (ip6h->nexthdr != IPPROTO_IPIP &&
96 ip6h->nexthdr != IPPROTO_IPV6)
97 return TC_ACT_OK;
98
99 bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex,
100 _htonl(ip6h->daddr.s6_addr32[0]),
101 _htonl(ip6h->daddr.s6_addr32[3]));
102 return bpf_redirect(*ifindex, BPF_F_INGRESS);
103 }
104
105 return TC_ACT_OK;
106}
107
108SEC("l2_to_iptun_ingress_redirect")
109int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb)
110{
111 struct bpf_tunnel_key tkey = {};
112 void *data = (void *)(long)skb->data;
113 struct eth_hdr *eth = data;
114 void *data_end = (void *)(long)skb->data_end;
115 int key = 0, *ifindex;
116
117 int ret;
118
119 if (data + sizeof(*eth) > data_end)
120 return TC_ACT_OK;
121
122 ifindex = bpf_map_lookup_elem(&tun_iface, &key);
123 if (!ifindex)
124 return TC_ACT_OK;
125
126 if (eth->h_proto == htons(ETH_P_IP)) {
127 char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n";
128 struct iphdr *iph = data + sizeof(*eth);
129 __be32 daddr = iph->daddr;
130
131 if (data + sizeof(*eth) + sizeof(*iph) > data_end)
132 return TC_ACT_OK;
133
134 if (!is_vip_addr(eth->h_proto, daddr))
135 return TC_ACT_OK;
136
137 bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex);
138 } else {
139 return TC_ACT_OK;
140 }
141
142 tkey.tunnel_id = 10000;
143 tkey.tunnel_ttl = 64;
144 tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */
145 bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0);
146 return bpf_redirect(*ifindex, 0);
147}
148
149SEC("l2_to_ip6tun_ingress_redirect")
150int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb)
151{
152 struct bpf_tunnel_key tkey = {};
153 void *data = (void *)(long)skb->data;
154 struct eth_hdr *eth = data;
155 void *data_end = (void *)(long)skb->data_end;
156 int key = 0, *ifindex;
157
158 if (data + sizeof(*eth) > data_end)
159 return TC_ACT_OK;
160
161 ifindex = bpf_map_lookup_elem(&tun_iface, &key);
162 if (!ifindex)
163 return TC_ACT_OK;
164
165 if (eth->h_proto == htons(ETH_P_IP)) {
166 char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n";
167 struct iphdr *iph = data + sizeof(*eth);
168
169 if (data + sizeof(*eth) + sizeof(*iph) > data_end)
170 return TC_ACT_OK;
171
172 if (!is_vip_addr(eth->h_proto, iph->daddr))
173 return TC_ACT_OK;
174
175 bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr),
176 *ifindex);
177 } else if (eth->h_proto == htons(ETH_P_IPV6)) {
178 char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n";
179 struct ipv6hdr *ip6h = data + sizeof(*eth);
180
181 if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
182 return TC_ACT_OK;
183
184 if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0]))
185 return TC_ACT_OK;
186
187 bpf_trace_printk(fmt6, sizeof(fmt6),
188 _htonl(ip6h->daddr.s6_addr32[0]), *ifindex);
189 } else {
190 return TC_ACT_OK;
191 }
192
193 tkey.tunnel_id = 10000;
194 tkey.tunnel_ttl = 64;
195 /* 2401:db02:0:0:0:0:0:66 */
196 tkey.remote_ipv6[0] = _htonl(0x2401db02);
197 tkey.remote_ipv6[1] = 0;
198 tkey.remote_ipv6[2] = 0;
199 tkey.remote_ipv6[3] = _htonl(0x00000066);
200 bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6);
201 return bpf_redirect(*ifindex, 0);
202}
203
204SEC("drop_non_tun_vip")
205int _drop_non_tun_vip(struct __sk_buff *skb)
206{
207 struct bpf_tunnel_key tkey = {};
208 void *data = (void *)(long)skb->data;
209 struct eth_hdr *eth = data;
210 void *data_end = (void *)(long)skb->data_end;
211
212 if (data + sizeof(*eth) > data_end)
213 return TC_ACT_OK;
214
215 if (eth->h_proto == htons(ETH_P_IP)) {
216 struct iphdr *iph = data + sizeof(*eth);
217
218 if (data + sizeof(*eth) + sizeof(*iph) > data_end)
219 return TC_ACT_OK;
220
221 if (is_vip_addr(eth->h_proto, iph->daddr))
222 return TC_ACT_SHOT;
223 } else if (eth->h_proto == htons(ETH_P_IPV6)) {
224 struct ipv6hdr *ip6h = data + sizeof(*eth);
225
226 if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
227 return TC_ACT_OK;
228
229 if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0]))
230 return TC_ACT_SHOT;
231 }
232
233 return TC_ACT_OK;
234}
235
236char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c
new file mode 100644
index 000000000000..4013c5337b91
--- /dev/null
+++ b/samples/bpf/tc_l2_redirect_user.c
@@ -0,0 +1,73 @@
1/* Copyright (c) 2016 Facebook
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <linux/unistd.h>
8#include <linux/bpf.h>
9
10#include <stdlib.h>
11#include <stdio.h>
12#include <unistd.h>
13#include <string.h>
14#include <errno.h>
15
16#include "libbpf.h"
17
18static void usage(void)
19{
20 printf("Usage: tc_l2_ipip_redirect [...]\n");
21 printf(" -U <file> Update an already pinned BPF array\n");
22 printf(" -i <ifindex> Interface index\n");
23 printf(" -h Display this help\n");
24}
25
26int main(int argc, char **argv)
27{
28 const char *pinned_file = NULL;
29 int ifindex = -1;
30 int array_key = 0;
31 int array_fd = -1;
32 int ret = -1;
33 int opt;
34
35 while ((opt = getopt(argc, argv, "F:U:i:")) != -1) {
36 switch (opt) {
37 /* General args */
38 case 'U':
39 pinned_file = optarg;
40 break;
41 case 'i':
42 ifindex = atoi(optarg);
43 break;
44 default:
45 usage();
46 goto out;
47 }
48 }
49
50 if (ifindex < 0 || !pinned_file) {
51 usage();
52 goto out;
53 }
54
55 array_fd = bpf_obj_get(pinned_file);
56 if (array_fd < 0) {
57 fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
58 pinned_file, strerror(errno), errno);
59 goto out;
60 }
61
62 /* bpf_tunnel_key.remote_ipv4 expects host byte orders */
63 ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0);
64 if (ret) {
65 perror("bpf_update_elem");
66 goto out;
67 }
68
69out:
70 if (array_fd != -1)
71 close(array_fd);
72 return ret;
73}