aboutsummaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
authorChristina Jacob <Christina.Jacob@cavium.com>2017-11-04 23:22:30 -0400
committerDavid S. Miller <davem@davemloft.net>2017-11-07 20:39:41 -0500
commit3e29cd0e6563d5fefd59e7225750ee9922f2dad5 (patch)
treed3a219cd42fd7500d3a874baf2aa57e2f55fbe6b /samples
parent4ad1ceec05e49175d0f967cc87628101e79176f6 (diff)
xdp: Sample xdp program implementing ip forward
Implements port to port forwarding with route table and arp table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie map. Signed-off-by: Christina Jacob <Christina.Jacob@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/xdp_router_ipv4_kern.c186
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c659
3 files changed, 849 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 5994075b080d..3b4945c1eab0 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -29,6 +29,7 @@ hostprogs-y += test_cgrp2_sock
29hostprogs-y += test_cgrp2_sock2 29hostprogs-y += test_cgrp2_sock2
30hostprogs-y += xdp1 30hostprogs-y += xdp1
31hostprogs-y += xdp2 31hostprogs-y += xdp2
32hostprogs-y += xdp_router_ipv4
32hostprogs-y += test_current_task_under_cgroup 33hostprogs-y += test_current_task_under_cgroup
33hostprogs-y += trace_event 34hostprogs-y += trace_event
34hostprogs-y += sampleip 35hostprogs-y += sampleip
@@ -76,6 +77,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
76xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o 77xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
77# reuse xdp1 source intentionally 78# reuse xdp1 source intentionally
78xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o 79xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
80xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
79test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ 81test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
80 test_current_task_under_cgroup_user.o 82 test_current_task_under_cgroup_user.o
81trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o 83trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
@@ -118,6 +120,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
118always += test_cgrp2_tc_kern.o 120always += test_cgrp2_tc_kern.o
119always += xdp1_kern.o 121always += xdp1_kern.o
120always += xdp2_kern.o 122always += xdp2_kern.o
123always += xdp_router_ipv4_kern.o
121always += test_current_task_under_cgroup_kern.o 124always += test_current_task_under_cgroup_kern.o
122always += trace_event_kern.o 125always += trace_event_kern.o
123always += sampleip_kern.o 126always += sampleip_kern.o
@@ -166,6 +169,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
166HOSTLOADLIBES_test_overhead += -lelf -lrt 169HOSTLOADLIBES_test_overhead += -lelf -lrt
167HOSTLOADLIBES_xdp1 += -lelf 170HOSTLOADLIBES_xdp1 += -lelf
168HOSTLOADLIBES_xdp2 += -lelf 171HOSTLOADLIBES_xdp2 += -lelf
172HOSTLOADLIBES_xdp_router_ipv4 += -lelf
169HOSTLOADLIBES_test_current_task_under_cgroup += -lelf 173HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
170HOSTLOADLIBES_trace_event += -lelf 174HOSTLOADLIBES_trace_event += -lelf
171HOSTLOADLIBES_sampleip += -lelf 175HOSTLOADLIBES_sampleip += -lelf
diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c
new file mode 100644
index 000000000000..993f56bc7b9a
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4_kern.c
@@ -0,0 +1,186 @@
1/* Copyright (C) 2017 Cavium, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify it
4 * under the terms of version 2 of the GNU General Public License
5 * as published by the Free Software Foundation.
6 */
7#define KBUILD_MODNAME "foo"
8#include <uapi/linux/bpf.h>
9#include <linux/in.h>
10#include <linux/if_ether.h>
11#include <linux/if_packet.h>
12#include <linux/if_vlan.h>
13#include <linux/ip.h>
14#include <linux/ipv6.h>
15#include "bpf_helpers.h"
16#include <linux/slab.h>
17#include <net/ip_fib.h>
18
19struct trie_value {
20 __u8 prefix[4];
21 __be64 value;
22 int ifindex;
23 int metric;
24 __be32 gw;
25};
26
27/* Key for lpm_trie*/
28union key_4 {
29 u32 b32[2];
30 u8 b8[8];
31};
32
33struct arp_entry {
34 __be64 mac;
35 __be32 dst;
36};
37
38struct direct_map {
39 struct arp_entry arp;
40 int ifindex;
41 __be64 mac;
42};
43
44/* Map for trie implementation*/
45struct bpf_map_def SEC("maps") lpm_map = {
46 .type = BPF_MAP_TYPE_LPM_TRIE,
47 .key_size = 8,
48 .value_size = sizeof(struct trie_value),
49 .max_entries = 50,
50 .map_flags = BPF_F_NO_PREALLOC,
51};
52
53/* Map for counter*/
54struct bpf_map_def SEC("maps") rxcnt = {
55 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
56 .key_size = sizeof(u32),
57 .value_size = sizeof(u64),
58 .max_entries = 256,
59};
60
61/* Map for ARP table*/
62struct bpf_map_def SEC("maps") arp_table = {
63 .type = BPF_MAP_TYPE_HASH,
64 .key_size = sizeof(__be32),
65 .value_size = sizeof(__be64),
66 .max_entries = 50,
67};
68
69/* Map to keep the exact match entries in the route table*/
70struct bpf_map_def SEC("maps") exact_match = {
71 .type = BPF_MAP_TYPE_HASH,
72 .key_size = sizeof(__be32),
73 .value_size = sizeof(struct direct_map),
74 .max_entries = 50,
75};
76
77struct bpf_map_def SEC("maps") tx_port = {
78 .type = BPF_MAP_TYPE_DEVMAP,
79 .key_size = sizeof(int),
80 .value_size = sizeof(int),
81 .max_entries = 100,
82};
83
84/* Function to set source and destination mac of the packet */
85static inline void set_src_dst_mac(void *data, void *src, void *dst)
86{
87 unsigned short *source = src;
88 unsigned short *dest = dst;
89 unsigned short *p = data;
90
91 __builtin_memcpy(p, dest, 6);
92 __builtin_memcpy(p + 3, source, 6);
93}
94
95/* Parse IPV4 packet to get SRC, DST IP and protocol */
96static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
97 __be32 *src, __be32 *dest)
98{
99 struct iphdr *iph = data + nh_off;
100
101 if (iph + 1 > data_end)
102 return 0;
103 *src = iph->saddr;
104 *dest = iph->daddr;
105 return iph->protocol;
106}
107
108SEC("xdp_router_ipv4")
109int xdp_router_ipv4_prog(struct xdp_md *ctx)
110{
111 void *data_end = (void *)(long)ctx->data_end;
112 __be64 *dest_mac = NULL, *src_mac = NULL;
113 void *data = (void *)(long)ctx->data;
114 struct trie_value *prefix_value;
115 int rc = XDP_DROP, forward_to;
116 struct ethhdr *eth = data;
117 union key_4 key4;
118 long *value;
119 u16 h_proto;
120 u32 ipproto;
121 u64 nh_off;
122
123 nh_off = sizeof(*eth);
124 if (data + nh_off > data_end)
125 return rc;
126
127 h_proto = eth->h_proto;
128
129 if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
130 struct vlan_hdr *vhdr;
131
132 vhdr = data + nh_off;
133 nh_off += sizeof(struct vlan_hdr);
134 if (data + nh_off > data_end)
135 return rc;
136 h_proto = vhdr->h_vlan_encapsulated_proto;
137 }
138 if (h_proto == htons(ETH_P_ARP)) {
139 return XDP_PASS;
140 } else if (h_proto == htons(ETH_P_IP)) {
141 struct direct_map *direct_entry;
142 __be32 src_ip = 0, dest_ip = 0;
143
144 ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
145 direct_entry = bpf_map_lookup_elem(&exact_match, &dest_ip);
146 /* Check for exact match, this would give a faster lookup*/
147 if (direct_entry && direct_entry->mac && direct_entry->arp.mac) {
148 src_mac = &direct_entry->mac;
149 dest_mac = &direct_entry->arp.mac;
150 forward_to = direct_entry->ifindex;
151 } else {
152 /* Look up in the trie for lpm*/
153 key4.b32[0] = 32;
154 key4.b8[4] = dest_ip & 0xff;
155 key4.b8[5] = (dest_ip >> 8) & 0xff;
156 key4.b8[6] = (dest_ip >> 16) & 0xff;
157 key4.b8[7] = (dest_ip >> 24) & 0xff;
158 prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
159 if (!prefix_value)
160 return XDP_DROP;
161 src_mac = &prefix_value->value;
162 if (!src_mac)
163 return XDP_DROP;
164 dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip);
165 if (!dest_mac) {
166 if (!prefix_value->gw)
167 return XDP_DROP;
168 dest_ip = prefix_value->gw;
169 dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip);
170 }
171 forward_to = prefix_value->ifindex;
172 }
173 } else {
174 ipproto = 0;
175 }
176 if (src_mac && dest_mac) {
177 set_src_dst_mac(data, src_mac, dest_mac);
178 value = bpf_map_lookup_elem(&rxcnt, &ipproto);
179 if (value)
180 *value += 1;
181 return bpf_redirect_map(&tx_port, forward_to, 0);
182 }
183 return rc;
184}
185
186char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
new file mode 100644
index 000000000000..2c1fe3f4b1a4
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -0,0 +1,659 @@
1/* Copyright (C) 2017 Cavium, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify it
4 * under the terms of version 2 of the GNU General Public License
5 * as published by the Free Software Foundation.
6 */
7#include <linux/bpf.h>
8#include <linux/netlink.h>
9#include <linux/rtnetlink.h>
10#include <assert.h>
11#include <errno.h>
12#include <signal.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <sys/socket.h>
17#include <unistd.h>
18#include "bpf_load.h"
19#include "libbpf.h"
20#include <arpa/inet.h>
21#include <fcntl.h>
22#include <poll.h>
23#include <net/if.h>
24#include <netdb.h>
25#include <sys/ioctl.h>
26#include <sys/syscall.h>
27#include "bpf_util.h"
28
29int sock, sock_arp, flags = 0;
30static int total_ifindex;
31int *ifindex_list;
32char buf[8192];
33
34static int get_route_table(int rtm_family);
35static void int_exit(int sig)
36{
37 int i = 0;
38
39 for (i = 0; i < total_ifindex; i++)
40 set_link_xdp_fd(ifindex_list[i], -1, flags);
41 exit(0);
42}
43
44static void close_and_exit(int sig)
45{
46 int i = 0;
47
48 close(sock);
49 close(sock_arp);
50
51 for (i = 0; i < total_ifindex; i++)
52 set_link_xdp_fd(ifindex_list[i], -1, flags);
53 exit(0);
54}
55
56/* Get the mac address of the interface given interface name */
57static __be64 getmac(char *iface)
58{
59 struct ifreq ifr;
60 __be64 mac = 0;
61 int fd, i;
62
63 fd = socket(AF_INET, SOCK_DGRAM, 0);
64 ifr.ifr_addr.sa_family = AF_INET;
65 strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
66 if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
67 printf("ioctl failed leaving....\n");
68 return -1;
69 }
70 for (i = 0; i < 6 ; i++)
71 *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i];
72 close(fd);
73 return mac;
74}
75
76static int recv_msg(struct sockaddr_nl sock_addr, int sock)
77{
78 struct nlmsghdr *nh;
79 int len, nll = 0;
80 char *buf_ptr;
81
82 buf_ptr = buf;
83 while (1) {
84 len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
85 if (len < 0)
86 return len;
87
88 nh = (struct nlmsghdr *)buf_ptr;
89
90 if (nh->nlmsg_type == NLMSG_DONE)
91 break;
92 buf_ptr += len;
93 nll += len;
94 if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
95 break;
96
97 if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
98 break;
99 }
100 return nll;
101}
102
103/* Function to parse the route entry returned by netlink
104 * Updates the route entry related map entries
105 */
106static void read_route(struct nlmsghdr *nh, int nll)
107{
108 char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
109 struct bpf_lpm_trie_key *prefix_key;
110 struct rtattr *rt_attr;
111 struct rtmsg *rt_msg;
112 int rtm_family;
113 int rtl;
114 int i;
115 struct route_table {
116 int dst_len, iface, metric;
117 char *iface_name;
118 __be32 dst, gw;
119 __be64 mac;
120 } route;
121 struct arp_table {
122 __be64 mac;
123 __be32 dst;
124 };
125
126 struct direct_map {
127 struct arp_table arp;
128 int ifindex;
129 __be64 mac;
130 } direct_entry;
131
132 if (nh->nlmsg_type == RTM_DELROUTE)
133 printf("DELETING Route entry\n");
134 else if (nh->nlmsg_type == RTM_GETROUTE)
135 printf("READING Route entry\n");
136 else if (nh->nlmsg_type == RTM_NEWROUTE)
137 printf("NEW Route entry\n");
138 else
139 printf("%d\n", nh->nlmsg_type);
140
141 memset(&route, 0, sizeof(route));
142 printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
143 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
144 rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
145 rtm_family = rt_msg->rtm_family;
146 if (rtm_family == AF_INET)
147 if (rt_msg->rtm_table != RT_TABLE_MAIN)
148 continue;
149 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
150 rtl = RTM_PAYLOAD(nh);
151
152 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
153 switch (rt_attr->rta_type) {
154 case NDA_DST:
155 sprintf(dsts, "%u",
156 (*((__be32 *)RTA_DATA(rt_attr))));
157 break;
158 case RTA_GATEWAY:
159 sprintf(gws, "%u",
160 *((__be32 *)RTA_DATA(rt_attr)));
161 break;
162 case RTA_OIF:
163 sprintf(ifs, "%u",
164 *((int *)RTA_DATA(rt_attr)));
165 break;
166 case RTA_METRICS:
167 sprintf(metrics, "%u",
168 *((int *)RTA_DATA(rt_attr)));
169 default:
170 break;
171 }
172 }
173 sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
174 route.dst = atoi(dsts);
175 route.dst_len = atoi(dsts_len);
176 route.gw = atoi(gws);
177 route.iface = atoi(ifs);
178 route.metric = atoi(metrics);
179 route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
180 route.iface_name = if_indextoname(route.iface, route.iface_name);
181 route.mac = getmac(route.iface_name);
182 if (route.mac == -1) {
183 int i = 0;
184
185 for (i = 0; i < total_ifindex; i++)
186 set_link_xdp_fd(ifindex_list[i], -1, flags);
187 exit(0);
188 }
189 assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0);
190 if (rtm_family == AF_INET) {
191 struct trie_value {
192 __u8 prefix[4];
193 __be64 value;
194 int ifindex;
195 int metric;
196 __be32 gw;
197 } *prefix_value;
198
199 prefix_key = alloca(sizeof(*prefix_key) + 3);
200 prefix_value = alloca(sizeof(*prefix_value));
201
202 prefix_key->prefixlen = 32;
203 prefix_key->prefixlen = route.dst_len;
204 direct_entry.mac = route.mac & 0xffffffffffff;
205 direct_entry.ifindex = route.iface;
206 direct_entry.arp.mac = 0;
207 direct_entry.arp.dst = 0;
208 if (route.dst_len == 32) {
209 if (nh->nlmsg_type == RTM_DELROUTE)
210 assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0);
211 else
212 if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0)
213 direct_entry.arp.dst = route.dst;
214 assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0);
215 }
216 for (i = 0; i < 4; i++)
217 prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
218
219 printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
220 (int)prefix_key->data[0],
221 (int)prefix_key->data[1],
222 (int)prefix_key->data[2],
223 (int)prefix_key->data[3],
224 route.gw, route.dst_len,
225 route.metric,
226 route.iface_name);
227 if (bpf_map_lookup_elem(map_fd[0], prefix_key,
228 prefix_value) < 0) {
229 for (i = 0; i < 4; i++)
230 prefix_value->prefix[i] = prefix_key->data[i];
231 prefix_value->value = route.mac & 0xffffffffffff;
232 prefix_value->ifindex = route.iface;
233 prefix_value->gw = route.gw;
234 prefix_value->metric = route.metric;
235
236 assert(bpf_map_update_elem(map_fd[0],
237 prefix_key,
238 prefix_value, 0
239 ) == 0);
240 } else {
241 if (nh->nlmsg_type == RTM_DELROUTE) {
242 printf("deleting entry\n");
243 printf("prefix key=%d.%d.%d.%d/%d",
244 prefix_key->data[0],
245 prefix_key->data[1],
246 prefix_key->data[2],
247 prefix_key->data[3],
248 prefix_key->prefixlen);
249 assert(bpf_map_delete_elem(map_fd[0],
250 prefix_key
251 ) == 0);
252 /* Rereading the route table to check if
253 * there is an entry with the same
254 * prefix but a different metric as the
255 * deleted enty.
256 */
257 get_route_table(AF_INET);
258 } else if (prefix_key->data[0] ==
259 prefix_value->prefix[0] &&
260 prefix_key->data[1] ==
261 prefix_value->prefix[1] &&
262 prefix_key->data[2] ==
263 prefix_value->prefix[2] &&
264 prefix_key->data[3] ==
265 prefix_value->prefix[3] &&
266 route.metric >= prefix_value->metric) {
267 continue;
268 } else {
269 for (i = 0; i < 4; i++)
270 prefix_value->prefix[i] =
271 prefix_key->data[i];
272 prefix_value->value =
273 route.mac & 0xffffffffffff;
274 prefix_value->ifindex = route.iface;
275 prefix_value->gw = route.gw;
276 prefix_value->metric = route.metric;
277 assert(bpf_map_update_elem(
278 map_fd[0],
279 prefix_key,
280 prefix_value,
281 0) == 0);
282 }
283 }
284 }
285 memset(&route, 0, sizeof(route));
286 memset(dsts, 0, sizeof(dsts));
287 memset(dsts_len, 0, sizeof(dsts_len));
288 memset(gws, 0, sizeof(gws));
289 memset(ifs, 0, sizeof(ifs));
290 memset(&route, 0, sizeof(route));
291 }
292}
293
294/* Function to read the existing route table when the process is launched*/
295static int get_route_table(int rtm_family)
296{
297 struct sockaddr_nl sa;
298 struct nlmsghdr *nh;
299 int sock, seq = 0;
300 struct msghdr msg;
301 struct iovec iov;
302 int ret = 0;
303 int nll;
304
305 struct {
306 struct nlmsghdr nl;
307 struct rtmsg rt;
308 char buf[8192];
309 } req;
310
311 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
312 if (sock < 0) {
313 printf("open netlink socket: %s\n", strerror(errno));
314 return -1;
315 }
316 memset(&sa, 0, sizeof(sa));
317 sa.nl_family = AF_NETLINK;
318 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
319 printf("bind to netlink: %s\n", strerror(errno));
320 ret = -1;
321 goto cleanup;
322 }
323 memset(&req, 0, sizeof(req));
324 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
325 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
326 req.nl.nlmsg_type = RTM_GETROUTE;
327
328 req.rt.rtm_family = rtm_family;
329 req.rt.rtm_table = RT_TABLE_MAIN;
330 req.nl.nlmsg_pid = 0;
331 req.nl.nlmsg_seq = ++seq;
332 memset(&msg, 0, sizeof(msg));
333 iov.iov_base = (void *)&req.nl;
334 iov.iov_len = req.nl.nlmsg_len;
335 msg.msg_iov = &iov;
336 msg.msg_iovlen = 1;
337 ret = sendmsg(sock, &msg, 0);
338 if (ret < 0) {
339 printf("send to netlink: %s\n", strerror(errno));
340 ret = -1;
341 goto cleanup;
342 }
343 memset(buf, 0, sizeof(buf));
344 nll = recv_msg(sa, sock);
345 if (nll < 0) {
346 printf("recv from netlink: %s\n", strerror(nll));
347 ret = -1;
348 goto cleanup;
349 }
350 nh = (struct nlmsghdr *)buf;
351 read_route(nh, nll);
352cleanup:
353 close(sock);
354 return ret;
355}
356
357/* Function to parse the arp entry returned by netlink
358 * Updates the arp entry related map entries
359 */
360static void read_arp(struct nlmsghdr *nh, int nll)
361{
362 struct rtattr *rt_attr;
363 char dsts[24], mac[24];
364 struct ndmsg *rt_msg;
365 int rtl, ndm_family;
366
367 struct arp_table {
368 __be64 mac;
369 __be32 dst;
370 } arp_entry;
371 struct direct_map {
372 struct arp_table arp;
373 int ifindex;
374 __be64 mac;
375 } direct_entry;
376
377 if (nh->nlmsg_type == RTM_GETNEIGH)
378 printf("READING arp entry\n");
379 printf("Address\tHwAddress\n");
380 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
381 rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
382 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
383 ndm_family = rt_msg->ndm_family;
384 rtl = RTM_PAYLOAD(nh);
385 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
386 switch (rt_attr->rta_type) {
387 case NDA_DST:
388 sprintf(dsts, "%u",
389 *((__be32 *)RTA_DATA(rt_attr)));
390 break;
391 case NDA_LLADDR:
392 sprintf(mac, "%lld",
393 *((__be64 *)RTA_DATA(rt_attr)));
394 break;
395 default:
396 break;
397 }
398 }
399 arp_entry.dst = atoi(dsts);
400 arp_entry.mac = atol(mac);
401 printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac);
402 if (ndm_family == AF_INET) {
403 if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst,
404 &direct_entry) == 0) {
405 if (nh->nlmsg_type == RTM_DELNEIGH) {
406 direct_entry.arp.dst = 0;
407 direct_entry.arp.mac = 0;
408 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
409 direct_entry.arp.dst = arp_entry.dst;
410 direct_entry.arp.mac = arp_entry.mac;
411 }
412 assert(bpf_map_update_elem(map_fd[3],
413 &arp_entry.dst,
414 &direct_entry, 0
415 ) == 0);
416 memset(&direct_entry, 0, sizeof(direct_entry));
417 }
418 if (nh->nlmsg_type == RTM_DELNEIGH) {
419 assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0);
420 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
421 assert(bpf_map_update_elem(map_fd[2],
422 &arp_entry.dst,
423 &arp_entry.mac, 0
424 ) == 0);
425 }
426 }
427 memset(&arp_entry, 0, sizeof(arp_entry));
428 memset(dsts, 0, sizeof(dsts));
429 }
430}
431
432/* Function to read the existing arp table when the process is launched*/
433static int get_arp_table(int rtm_family)
434{
435 struct sockaddr_nl sa;
436 struct nlmsghdr *nh;
437 int sock, seq = 0;
438 struct msghdr msg;
439 struct iovec iov;
440 int ret = 0;
441 int nll;
442 struct {
443 struct nlmsghdr nl;
444 struct ndmsg rt;
445 char buf[8192];
446 } req;
447
448 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
449 if (sock < 0) {
450 printf("open netlink socket: %s\n", strerror(errno));
451 return -1;
452 }
453 memset(&sa, 0, sizeof(sa));
454 sa.nl_family = AF_NETLINK;
455 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
456 printf("bind to netlink: %s\n", strerror(errno));
457 ret = -1;
458 goto cleanup;
459 }
460 memset(&req, 0, sizeof(req));
461 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
462 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
463 req.nl.nlmsg_type = RTM_GETNEIGH;
464 req.rt.ndm_state = NUD_REACHABLE;
465 req.rt.ndm_family = rtm_family;
466 req.nl.nlmsg_pid = 0;
467 req.nl.nlmsg_seq = ++seq;
468 memset(&msg, 0, sizeof(msg));
469 iov.iov_base = (void *)&req.nl;
470 iov.iov_len = req.nl.nlmsg_len;
471 msg.msg_iov = &iov;
472 msg.msg_iovlen = 1;
473 ret = sendmsg(sock, &msg, 0);
474 if (ret < 0) {
475 printf("send to netlink: %s\n", strerror(errno));
476 ret = -1;
477 goto cleanup;
478 }
479 memset(buf, 0, sizeof(buf));
480 nll = recv_msg(sa, sock);
481 if (nll < 0) {
482 printf("recv from netlink: %s\n", strerror(nll));
483 ret = -1;
484 goto cleanup;
485 }
486 nh = (struct nlmsghdr *)buf;
487 read_arp(nh, nll);
488cleanup:
489 close(sock);
490 return ret;
491}
492
493/* Function to keep track and update changes in route and arp table
494 * Give regular statistics of packets forwarded
495 */
496static int monitor_route(void)
497{
498 unsigned int nr_cpus = bpf_num_possible_cpus();
499 const unsigned int nr_keys = 256;
500 struct pollfd fds_route, fds_arp;
501 __u64 prev[nr_keys][nr_cpus];
502 struct sockaddr_nl la, lr;
503 __u64 values[nr_cpus];
504 struct nlmsghdr *nh;
505 int nll, ret = 0;
506 int interval = 5;
507 __u32 key;
508 int i;
509
510 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
511 if (sock < 0) {
512 printf("open netlink socket: %s\n", strerror(errno));
513 return -1;
514 }
515
516 fcntl(sock, F_SETFL, O_NONBLOCK);
517 memset(&lr, 0, sizeof(lr));
518 lr.nl_family = AF_NETLINK;
519 lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
520 if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
521 printf("bind to netlink: %s\n", strerror(errno));
522 ret = -1;
523 goto cleanup;
524 }
525 fds_route.fd = sock;
526 fds_route.events = POLL_IN;
527
528 sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
529 if (sock_arp < 0) {
530 printf("open netlink socket: %s\n", strerror(errno));
531 return -1;
532 }
533
534 fcntl(sock_arp, F_SETFL, O_NONBLOCK);
535 memset(&la, 0, sizeof(la));
536 la.nl_family = AF_NETLINK;
537 la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
538 if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
539 printf("bind to netlink: %s\n", strerror(errno));
540 ret = -1;
541 goto cleanup;
542 }
543 fds_arp.fd = sock_arp;
544 fds_arp.events = POLL_IN;
545
546 memset(prev, 0, sizeof(prev));
547 do {
548 signal(SIGINT, close_and_exit);
549 signal(SIGTERM, close_and_exit);
550
551 sleep(interval);
552 for (key = 0; key < nr_keys; key++) {
553 __u64 sum = 0;
554
555 assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
556 for (i = 0; i < nr_cpus; i++)
557 sum += (values[i] - prev[key][i]);
558 if (sum)
559 printf("proto %u: %10llu pkt/s\n",
560 key, sum / interval);
561 memcpy(prev[key], values, sizeof(values));
562 }
563
564 memset(buf, 0, sizeof(buf));
565 if (poll(&fds_route, 1, 3) == POLL_IN) {
566 nll = recv_msg(lr, sock);
567 if (nll < 0) {
568 printf("recv from netlink: %s\n", strerror(nll));
569 ret = -1;
570 goto cleanup;
571 }
572
573 nh = (struct nlmsghdr *)buf;
574 printf("Routing table updated.\n");
575 read_route(nh, nll);
576 }
577 memset(buf, 0, sizeof(buf));
578 if (poll(&fds_arp, 1, 3) == POLL_IN) {
579 nll = recv_msg(la, sock_arp);
580 if (nll < 0) {
581 printf("recv from netlink: %s\n", strerror(nll));
582 ret = -1;
583 goto cleanup;
584 }
585
586 nh = (struct nlmsghdr *)buf;
587 read_arp(nh, nll);
588 }
589
590 } while (1);
591cleanup:
592 close(sock);
593 return ret;
594}
595
596int main(int ac, char **argv)
597{
598 char filename[256];
599 char **ifname_list;
600 int i = 1;
601
602 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
603 if (ac < 2) {
604 printf("usage: %s [-S] Interface name list\n", argv[0]);
605 return 1;
606 }
607 if (!strcmp(argv[1], "-S")) {
608 flags = XDP_FLAGS_SKB_MODE;
609 total_ifindex = ac - 2;
610 ifname_list = (argv + 2);
611 } else {
612 flags = 0;
613 total_ifindex = ac - 1;
614 ifname_list = (argv + 1);
615 }
616 if (load_bpf_file(filename)) {
617 printf("%s", bpf_log_buf);
618 return 1;
619 }
620 printf("\n**************loading bpf file*********************\n\n\n");
621 if (!prog_fd[0]) {
622 printf("load_bpf_file: %s\n", strerror(errno));
623 return 1;
624 }
625 ifindex_list = (int *)malloc(total_ifindex * sizeof(int *));
626 for (i = 0; i < total_ifindex; i++) {
627 ifindex_list[i] = if_nametoindex(ifname_list[i]);
628 if (!ifindex_list[i]) {
629 printf("Couldn't translate interface name: %s",
630 strerror(errno));
631 return 1;
632 }
633 }
634 for (i = 0; i < total_ifindex; i++) {
635 if (set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
636 printf("link set xdp fd failed\n");
637 int recovery_index = i;
638
639 for (i = 0; i < recovery_index; i++)
640 set_link_xdp_fd(ifindex_list[i], -1, flags);
641
642 return 1;
643 }
644 printf("Attached to %d\n", ifindex_list[i]);
645 }
646 signal(SIGINT, int_exit);
647 signal(SIGTERM, int_exit);
648
649 printf("*******************ROUTE TABLE*************************\n\n\n");
650 get_route_table(AF_INET);
651 printf("*******************ARP TABLE***************************\n\n\n");
652 get_arp_table(AF_INET);
653 if (monitor_route() < 0) {
654 printf("Error in receiving route update");
655 return 1;
656 }
657
658 return 0;
659}