diff options
author | Alexei Starovoitov <ast@fb.com> | 2017-12-14 20:55:12 -0500 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2017-12-17 14:34:36 -0500 |
commit | b0b04fc49e3b97a6039b9b658798efdcda71478d (patch) | |
tree | d8736cb4803d6e53ee0a222d67d287e95a8cbfe3 /tools | |
parent | 3bc35c63cb70466c78d3972ceaf8205aa463a192 (diff) |
selftests/bpf: add xdp noinline test
add large semi-artificial XDP test with 18 functions to stress test
bpf call verification logic
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/testing/selftests/bpf/Makefile | 3 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_progs.c | 81 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_xdp_noinline.c | 833 |
3 files changed, 916 insertions, 1 deletions
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6970d073df5b..7ef9601d04bf 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile | |||
@@ -18,7 +18,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test | |||
18 | TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ | 18 | TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ |
19 | test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ | 19 | test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ |
20 | sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ | 20 | sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ |
21 | test_l4lb_noinline.o | 21 | test_l4lb_noinline.o test_xdp_noinline.o |
22 | 22 | ||
23 | TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ | 23 | TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ |
24 | test_offload.py | 24 | test_offload.py |
@@ -54,6 +54,7 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ | |||
54 | -Wno-compare-distinct-pointer-types | 54 | -Wno-compare-distinct-pointer-types |
55 | 55 | ||
56 | $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline | 56 | $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline |
57 | $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline | ||
57 | 58 | ||
58 | %.o: %.c | 59 | %.o: %.c |
59 | $(CLANG) $(CLANG_FLAGS) \ | 60 | $(CLANG) $(CLANG_FLAGS) \ |
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index abff83bf8d40..6472ca98690e 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c | |||
@@ -257,6 +257,86 @@ static void test_l4lb_all(void) | |||
257 | test_l4lb(file2); | 257 | test_l4lb(file2); |
258 | } | 258 | } |
259 | 259 | ||
260 | static void test_xdp_noinline(void) | ||
261 | { | ||
262 | const char *file = "./test_xdp_noinline.o"; | ||
263 | unsigned int nr_cpus = bpf_num_possible_cpus(); | ||
264 | struct vip key = {.protocol = 6}; | ||
265 | struct vip_meta { | ||
266 | __u32 flags; | ||
267 | __u32 vip_num; | ||
268 | } value = {.vip_num = VIP_NUM}; | ||
269 | __u32 stats_key = VIP_NUM; | ||
270 | struct vip_stats { | ||
271 | __u64 bytes; | ||
272 | __u64 pkts; | ||
273 | } stats[nr_cpus]; | ||
274 | struct real_definition { | ||
275 | union { | ||
276 | __be32 dst; | ||
277 | __be32 dstv6[4]; | ||
278 | }; | ||
279 | __u8 flags; | ||
280 | } real_def = {.dst = MAGIC_VAL}; | ||
281 | __u32 ch_key = 11, real_num = 3; | ||
282 | __u32 duration, retval, size; | ||
283 | int err, i, prog_fd, map_fd; | ||
284 | __u64 bytes = 0, pkts = 0; | ||
285 | struct bpf_object *obj; | ||
286 | char buf[128]; | ||
287 | u32 *magic = (u32 *)buf; | ||
288 | |||
289 | err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); | ||
290 | if (err) { | ||
291 | error_cnt++; | ||
292 | return; | ||
293 | } | ||
294 | |||
295 | map_fd = bpf_find_map(__func__, obj, "vip_map"); | ||
296 | if (map_fd < 0) | ||
297 | goto out; | ||
298 | bpf_map_update_elem(map_fd, &key, &value, 0); | ||
299 | |||
300 | map_fd = bpf_find_map(__func__, obj, "ch_rings"); | ||
301 | if (map_fd < 0) | ||
302 | goto out; | ||
303 | bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); | ||
304 | |||
305 | map_fd = bpf_find_map(__func__, obj, "reals"); | ||
306 | if (map_fd < 0) | ||
307 | goto out; | ||
308 | bpf_map_update_elem(map_fd, &real_num, &real_def, 0); | ||
309 | |||
310 | err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), | ||
311 | buf, &size, &retval, &duration); | ||
312 | CHECK(err || errno || retval != 1 || size != 54 || | ||
313 | *magic != MAGIC_VAL, "ipv4", | ||
314 | "err %d errno %d retval %d size %d magic %x\n", | ||
315 | err, errno, retval, size, *magic); | ||
316 | |||
317 | err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), | ||
318 | buf, &size, &retval, &duration); | ||
319 | CHECK(err || errno || retval != 1 || size != 74 || | ||
320 | *magic != MAGIC_VAL, "ipv6", | ||
321 | "err %d errno %d retval %d size %d magic %x\n", | ||
322 | err, errno, retval, size, *magic); | ||
323 | |||
324 | map_fd = bpf_find_map(__func__, obj, "stats"); | ||
325 | if (map_fd < 0) | ||
326 | goto out; | ||
327 | bpf_map_lookup_elem(map_fd, &stats_key, stats); | ||
328 | for (i = 0; i < nr_cpus; i++) { | ||
329 | bytes += stats[i].bytes; | ||
330 | pkts += stats[i].pkts; | ||
331 | } | ||
332 | if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { | ||
333 | error_cnt++; | ||
334 | printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); | ||
335 | } | ||
336 | out: | ||
337 | bpf_object__close(obj); | ||
338 | } | ||
339 | |||
260 | static void test_tcp_estats(void) | 340 | static void test_tcp_estats(void) |
261 | { | 341 | { |
262 | const char *file = "./test_tcp_estats.o"; | 342 | const char *file = "./test_tcp_estats.o"; |
@@ -766,6 +846,7 @@ int main(void) | |||
766 | test_pkt_access(); | 846 | test_pkt_access(); |
767 | test_xdp(); | 847 | test_xdp(); |
768 | test_l4lb_all(); | 848 | test_l4lb_all(); |
849 | test_xdp_noinline(); | ||
769 | test_tcp_estats(); | 850 | test_tcp_estats(); |
770 | test_bpf_obj_id(); | 851 | test_bpf_obj_id(); |
771 | test_pkt_md_access(); | 852 | test_pkt_md_access(); |
diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c new file mode 100644 index 000000000000..5e4aac74f9d0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_noinline.c | |||
@@ -0,0 +1,833 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | // Copyright (c) 2017 Facebook | ||
3 | #include <stddef.h> | ||
4 | #include <stdbool.h> | ||
5 | #include <string.h> | ||
6 | #include <linux/pkt_cls.h> | ||
7 | #include <linux/bpf.h> | ||
8 | #include <linux/in.h> | ||
9 | #include <linux/if_ether.h> | ||
10 | #include <linux/ip.h> | ||
11 | #include <linux/ipv6.h> | ||
12 | #include <linux/icmp.h> | ||
13 | #include <linux/icmpv6.h> | ||
14 | #include <linux/tcp.h> | ||
15 | #include <linux/udp.h> | ||
16 | #include "bpf_helpers.h" | ||
17 | |||
18 | #define bpf_printk(fmt, ...) \ | ||
19 | ({ \ | ||
20 | char ____fmt[] = fmt; \ | ||
21 | bpf_trace_printk(____fmt, sizeof(____fmt), \ | ||
22 | ##__VA_ARGS__); \ | ||
23 | }) | ||
24 | |||
25 | static __u32 rol32(__u32 word, unsigned int shift) | ||
26 | { | ||
27 | return (word << shift) | (word >> ((-shift) & 31)); | ||
28 | } | ||
29 | |||
30 | /* copy paste of jhash from kernel sources to make sure llvm | ||
31 | * can compile it into valid sequence of bpf instructions | ||
32 | */ | ||
33 | #define __jhash_mix(a, b, c) \ | ||
34 | { \ | ||
35 | a -= c; a ^= rol32(c, 4); c += b; \ | ||
36 | b -= a; b ^= rol32(a, 6); a += c; \ | ||
37 | c -= b; c ^= rol32(b, 8); b += a; \ | ||
38 | a -= c; a ^= rol32(c, 16); c += b; \ | ||
39 | b -= a; b ^= rol32(a, 19); a += c; \ | ||
40 | c -= b; c ^= rol32(b, 4); b += a; \ | ||
41 | } | ||
42 | |||
43 | #define __jhash_final(a, b, c) \ | ||
44 | { \ | ||
45 | c ^= b; c -= rol32(b, 14); \ | ||
46 | a ^= c; a -= rol32(c, 11); \ | ||
47 | b ^= a; b -= rol32(a, 25); \ | ||
48 | c ^= b; c -= rol32(b, 16); \ | ||
49 | a ^= c; a -= rol32(c, 4); \ | ||
50 | b ^= a; b -= rol32(a, 14); \ | ||
51 | c ^= b; c -= rol32(b, 24); \ | ||
52 | } | ||
53 | |||
54 | #define JHASH_INITVAL 0xdeadbeef | ||
55 | |||
56 | typedef unsigned int u32; | ||
57 | |||
58 | static __attribute__ ((noinline)) | ||
59 | u32 jhash(const void *key, u32 length, u32 initval) | ||
60 | { | ||
61 | u32 a, b, c; | ||
62 | const unsigned char *k = key; | ||
63 | |||
64 | a = b = c = JHASH_INITVAL + length + initval; | ||
65 | |||
66 | while (length > 12) { | ||
67 | a += *(u32 *)(k); | ||
68 | b += *(u32 *)(k + 4); | ||
69 | c += *(u32 *)(k + 8); | ||
70 | __jhash_mix(a, b, c); | ||
71 | length -= 12; | ||
72 | k += 12; | ||
73 | } | ||
74 | switch (length) { | ||
75 | case 12: c += (u32)k[11]<<24; | ||
76 | case 11: c += (u32)k[10]<<16; | ||
77 | case 10: c += (u32)k[9]<<8; | ||
78 | case 9: c += k[8]; | ||
79 | case 8: b += (u32)k[7]<<24; | ||
80 | case 7: b += (u32)k[6]<<16; | ||
81 | case 6: b += (u32)k[5]<<8; | ||
82 | case 5: b += k[4]; | ||
83 | case 4: a += (u32)k[3]<<24; | ||
84 | case 3: a += (u32)k[2]<<16; | ||
85 | case 2: a += (u32)k[1]<<8; | ||
86 | case 1: a += k[0]; | ||
87 | __jhash_final(a, b, c); | ||
88 | case 0: /* Nothing left to add */ | ||
89 | break; | ||
90 | } | ||
91 | |||
92 | return c; | ||
93 | } | ||
94 | |||
95 | static __attribute__ ((noinline)) | ||
96 | u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) | ||
97 | { | ||
98 | a += initval; | ||
99 | b += initval; | ||
100 | c += initval; | ||
101 | __jhash_final(a, b, c); | ||
102 | return c; | ||
103 | } | ||
104 | |||
105 | static __attribute__ ((noinline)) | ||
106 | u32 jhash_2words(u32 a, u32 b, u32 initval) | ||
107 | { | ||
108 | return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); | ||
109 | } | ||
110 | |||
111 | struct flow_key { | ||
112 | union { | ||
113 | __be32 src; | ||
114 | __be32 srcv6[4]; | ||
115 | }; | ||
116 | union { | ||
117 | __be32 dst; | ||
118 | __be32 dstv6[4]; | ||
119 | }; | ||
120 | union { | ||
121 | __u32 ports; | ||
122 | __u16 port16[2]; | ||
123 | }; | ||
124 | __u8 proto; | ||
125 | }; | ||
126 | |||
127 | struct packet_description { | ||
128 | struct flow_key flow; | ||
129 | __u8 flags; | ||
130 | }; | ||
131 | |||
132 | struct ctl_value { | ||
133 | union { | ||
134 | __u64 value; | ||
135 | __u32 ifindex; | ||
136 | __u8 mac[6]; | ||
137 | }; | ||
138 | }; | ||
139 | |||
140 | struct vip_definition { | ||
141 | union { | ||
142 | __be32 vip; | ||
143 | __be32 vipv6[4]; | ||
144 | }; | ||
145 | __u16 port; | ||
146 | __u16 family; | ||
147 | __u8 proto; | ||
148 | }; | ||
149 | |||
150 | struct vip_meta { | ||
151 | __u32 flags; | ||
152 | __u32 vip_num; | ||
153 | }; | ||
154 | |||
155 | struct real_pos_lru { | ||
156 | __u32 pos; | ||
157 | __u64 atime; | ||
158 | }; | ||
159 | |||
160 | struct real_definition { | ||
161 | union { | ||
162 | __be32 dst; | ||
163 | __be32 dstv6[4]; | ||
164 | }; | ||
165 | __u8 flags; | ||
166 | }; | ||
167 | |||
168 | struct lb_stats { | ||
169 | __u64 v2; | ||
170 | __u64 v1; | ||
171 | }; | ||
172 | |||
173 | struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { | ||
174 | .type = BPF_MAP_TYPE_HASH, | ||
175 | .key_size = sizeof(struct vip_definition), | ||
176 | .value_size = sizeof(struct vip_meta), | ||
177 | .max_entries = 512, | ||
178 | .map_flags = 0, | ||
179 | }; | ||
180 | |||
181 | struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { | ||
182 | .type = BPF_MAP_TYPE_LRU_HASH, | ||
183 | .key_size = sizeof(struct flow_key), | ||
184 | .value_size = sizeof(struct real_pos_lru), | ||
185 | .max_entries = 300, | ||
186 | .map_flags = 1U << 1, | ||
187 | }; | ||
188 | |||
189 | struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { | ||
190 | .type = BPF_MAP_TYPE_ARRAY, | ||
191 | .key_size = sizeof(__u32), | ||
192 | .value_size = sizeof(__u32), | ||
193 | .max_entries = 12 * 655, | ||
194 | .map_flags = 0, | ||
195 | }; | ||
196 | |||
197 | struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { | ||
198 | .type = BPF_MAP_TYPE_ARRAY, | ||
199 | .key_size = sizeof(__u32), | ||
200 | .value_size = sizeof(struct real_definition), | ||
201 | .max_entries = 40, | ||
202 | .map_flags = 0, | ||
203 | }; | ||
204 | |||
205 | struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { | ||
206 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | ||
207 | .key_size = sizeof(__u32), | ||
208 | .value_size = sizeof(struct lb_stats), | ||
209 | .max_entries = 515, | ||
210 | .map_flags = 0, | ||
211 | }; | ||
212 | |||
213 | struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { | ||
214 | .type = BPF_MAP_TYPE_ARRAY, | ||
215 | .key_size = sizeof(__u32), | ||
216 | .value_size = sizeof(struct ctl_value), | ||
217 | .max_entries = 16, | ||
218 | .map_flags = 0, | ||
219 | }; | ||
220 | |||
221 | struct eth_hdr { | ||
222 | unsigned char eth_dest[6]; | ||
223 | unsigned char eth_source[6]; | ||
224 | unsigned short eth_proto; | ||
225 | }; | ||
226 | |||
227 | static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) | ||
228 | { | ||
229 | __u64 off = sizeof(struct eth_hdr); | ||
230 | if (is_ipv6) { | ||
231 | off += sizeof(struct ipv6hdr); | ||
232 | if (is_icmp) | ||
233 | off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); | ||
234 | } else { | ||
235 | off += sizeof(struct iphdr); | ||
236 | if (is_icmp) | ||
237 | off += sizeof(struct icmphdr) + sizeof(struct iphdr); | ||
238 | } | ||
239 | return off; | ||
240 | } | ||
241 | |||
242 | static __attribute__ ((noinline)) | ||
243 | bool parse_udp(void *data, void *data_end, | ||
244 | bool is_ipv6, struct packet_description *pckt) | ||
245 | { | ||
246 | |||
247 | bool is_icmp = !((pckt->flags & (1 << 0)) == 0); | ||
248 | __u64 off = calc_offset(is_ipv6, is_icmp); | ||
249 | struct udphdr *udp; | ||
250 | udp = data + off; | ||
251 | |||
252 | if (udp + 1 > data_end) | ||
253 | return 0; | ||
254 | if (!is_icmp) { | ||
255 | pckt->flow.port16[0] = udp->source; | ||
256 | pckt->flow.port16[1] = udp->dest; | ||
257 | } else { | ||
258 | pckt->flow.port16[0] = udp->dest; | ||
259 | pckt->flow.port16[1] = udp->source; | ||
260 | } | ||
261 | return 1; | ||
262 | } | ||
263 | |||
264 | static __attribute__ ((noinline)) | ||
265 | bool parse_tcp(void *data, void *data_end, | ||
266 | bool is_ipv6, struct packet_description *pckt) | ||
267 | { | ||
268 | |||
269 | bool is_icmp = !((pckt->flags & (1 << 0)) == 0); | ||
270 | __u64 off = calc_offset(is_ipv6, is_icmp); | ||
271 | struct tcphdr *tcp; | ||
272 | |||
273 | tcp = data + off; | ||
274 | if (tcp + 1 > data_end) | ||
275 | return 0; | ||
276 | if (tcp->syn) | ||
277 | pckt->flags |= (1 << 1); | ||
278 | if (!is_icmp) { | ||
279 | pckt->flow.port16[0] = tcp->source; | ||
280 | pckt->flow.port16[1] = tcp->dest; | ||
281 | } else { | ||
282 | pckt->flow.port16[0] = tcp->dest; | ||
283 | pckt->flow.port16[1] = tcp->source; | ||
284 | } | ||
285 | return 1; | ||
286 | } | ||
287 | |||
288 | static __attribute__ ((noinline)) | ||
289 | bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, | ||
290 | struct packet_description *pckt, | ||
291 | struct real_definition *dst, __u32 pkt_bytes) | ||
292 | { | ||
293 | struct eth_hdr *new_eth; | ||
294 | struct eth_hdr *old_eth; | ||
295 | struct ipv6hdr *ip6h; | ||
296 | __u32 ip_suffix; | ||
297 | void *data_end; | ||
298 | void *data; | ||
299 | |||
300 | if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) | ||
301 | return 0; | ||
302 | data = (void *)(long)xdp->data; | ||
303 | data_end = (void *)(long)xdp->data_end; | ||
304 | new_eth = data; | ||
305 | ip6h = data + sizeof(struct eth_hdr); | ||
306 | old_eth = data + sizeof(struct ipv6hdr); | ||
307 | if (new_eth + 1 > data_end || | ||
308 | old_eth + 1 > data_end || ip6h + 1 > data_end) | ||
309 | return 0; | ||
310 | memcpy(new_eth->eth_dest, cval->mac, 6); | ||
311 | memcpy(new_eth->eth_source, old_eth->eth_dest, 6); | ||
312 | new_eth->eth_proto = 56710; | ||
313 | ip6h->version = 6; | ||
314 | ip6h->priority = 0; | ||
315 | memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); | ||
316 | |||
317 | ip6h->nexthdr = IPPROTO_IPV6; | ||
318 | ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; | ||
319 | ip6h->payload_len = | ||
320 | __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); | ||
321 | ip6h->hop_limit = 4; | ||
322 | |||
323 | ip6h->saddr.in6_u.u6_addr32[0] = 1; | ||
324 | ip6h->saddr.in6_u.u6_addr32[1] = 2; | ||
325 | ip6h->saddr.in6_u.u6_addr32[2] = 3; | ||
326 | ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; | ||
327 | memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); | ||
328 | return 1; | ||
329 | } | ||
330 | |||
331 | static __attribute__ ((noinline)) | ||
332 | bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, | ||
333 | struct packet_description *pckt, | ||
334 | struct real_definition *dst, __u32 pkt_bytes) | ||
335 | { | ||
336 | |||
337 | __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); | ||
338 | struct eth_hdr *new_eth; | ||
339 | struct eth_hdr *old_eth; | ||
340 | __u16 *next_iph_u16; | ||
341 | struct iphdr *iph; | ||
342 | __u32 csum = 0; | ||
343 | void *data_end; | ||
344 | void *data; | ||
345 | |||
346 | ip_suffix <<= 15; | ||
347 | ip_suffix ^= pckt->flow.src; | ||
348 | if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) | ||
349 | return 0; | ||
350 | data = (void *)(long)xdp->data; | ||
351 | data_end = (void *)(long)xdp->data_end; | ||
352 | new_eth = data; | ||
353 | iph = data + sizeof(struct eth_hdr); | ||
354 | old_eth = data + sizeof(struct iphdr); | ||
355 | if (new_eth + 1 > data_end || | ||
356 | old_eth + 1 > data_end || iph + 1 > data_end) | ||
357 | return 0; | ||
358 | memcpy(new_eth->eth_dest, cval->mac, 6); | ||
359 | memcpy(new_eth->eth_source, old_eth->eth_dest, 6); | ||
360 | new_eth->eth_proto = 8; | ||
361 | iph->version = 4; | ||
362 | iph->ihl = 5; | ||
363 | iph->frag_off = 0; | ||
364 | iph->protocol = IPPROTO_IPIP; | ||
365 | iph->check = 0; | ||
366 | iph->tos = 1; | ||
367 | iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); | ||
368 | /* don't update iph->daddr, since it will overwrite old eth_proto | ||
369 | * and multiple iterations of bpf_prog_run() will fail | ||
370 | */ | ||
371 | |||
372 | iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; | ||
373 | iph->ttl = 4; | ||
374 | |||
375 | next_iph_u16 = (__u16 *) iph; | ||
376 | #pragma clang loop unroll(full) | ||
377 | for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) | ||
378 | csum += *next_iph_u16++; | ||
379 | iph->check = ~((csum & 0xffff) + (csum >> 16)); | ||
380 | if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) | ||
381 | return 0; | ||
382 | return 1; | ||
383 | } | ||
384 | |||
385 | static __attribute__ ((noinline)) | ||
386 | bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) | ||
387 | { | ||
388 | struct eth_hdr *new_eth; | ||
389 | struct eth_hdr *old_eth; | ||
390 | |||
391 | old_eth = *data; | ||
392 | new_eth = *data + sizeof(struct ipv6hdr); | ||
393 | memcpy(new_eth->eth_source, old_eth->eth_source, 6); | ||
394 | memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); | ||
395 | if (inner_v4) | ||
396 | new_eth->eth_proto = 8; | ||
397 | else | ||
398 | new_eth->eth_proto = 56710; | ||
399 | if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) | ||
400 | return 0; | ||
401 | *data = (void *)(long)xdp->data; | ||
402 | *data_end = (void *)(long)xdp->data_end; | ||
403 | return 1; | ||
404 | } | ||
405 | |||
406 | static __attribute__ ((noinline)) | ||
407 | bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) | ||
408 | { | ||
409 | struct eth_hdr *new_eth; | ||
410 | struct eth_hdr *old_eth; | ||
411 | |||
412 | old_eth = *data; | ||
413 | new_eth = *data + sizeof(struct iphdr); | ||
414 | memcpy(new_eth->eth_source, old_eth->eth_source, 6); | ||
415 | memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); | ||
416 | new_eth->eth_proto = 8; | ||
417 | if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) | ||
418 | return 0; | ||
419 | *data = (void *)(long)xdp->data; | ||
420 | *data_end = (void *)(long)xdp->data_end; | ||
421 | return 1; | ||
422 | } | ||
423 | |||
424 | static __attribute__ ((noinline)) | ||
425 | int swap_mac_and_send(void *data, void *data_end) | ||
426 | { | ||
427 | unsigned char tmp_mac[6]; | ||
428 | struct eth_hdr *eth; | ||
429 | |||
430 | eth = data; | ||
431 | memcpy(tmp_mac, eth->eth_source, 6); | ||
432 | memcpy(eth->eth_source, eth->eth_dest, 6); | ||
433 | memcpy(eth->eth_dest, tmp_mac, 6); | ||
434 | return XDP_TX; | ||
435 | } | ||
436 | |||
437 | static __attribute__ ((noinline)) | ||
438 | int send_icmp_reply(void *data, void *data_end) | ||
439 | { | ||
440 | struct icmphdr *icmp_hdr; | ||
441 | __u16 *next_iph_u16; | ||
442 | __u32 tmp_addr = 0; | ||
443 | struct iphdr *iph; | ||
444 | __u32 csum1 = 0; | ||
445 | __u32 csum = 0; | ||
446 | __u64 off = 0; | ||
447 | |||
448 | if (data + sizeof(struct eth_hdr) | ||
449 | + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) | ||
450 | return XDP_DROP; | ||
451 | off += sizeof(struct eth_hdr); | ||
452 | iph = data + off; | ||
453 | off += sizeof(struct iphdr); | ||
454 | icmp_hdr = data + off; | ||
455 | icmp_hdr->type = 0; | ||
456 | icmp_hdr->checksum += 0x0007; | ||
457 | iph->ttl = 4; | ||
458 | tmp_addr = iph->daddr; | ||
459 | iph->daddr = iph->saddr; | ||
460 | iph->saddr = tmp_addr; | ||
461 | iph->check = 0; | ||
462 | next_iph_u16 = (__u16 *) iph; | ||
463 | #pragma clang loop unroll(full) | ||
464 | for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) | ||
465 | csum += *next_iph_u16++; | ||
466 | iph->check = ~((csum & 0xffff) + (csum >> 16)); | ||
467 | return swap_mac_and_send(data, data_end); | ||
468 | } | ||
469 | |||
470 | static __attribute__ ((noinline)) | ||
471 | int send_icmp6_reply(void *data, void *data_end) | ||
472 | { | ||
473 | struct icmp6hdr *icmp_hdr; | ||
474 | struct ipv6hdr *ip6h; | ||
475 | __be32 tmp_addr[4]; | ||
476 | __u64 off = 0; | ||
477 | |||
478 | if (data + sizeof(struct eth_hdr) | ||
479 | + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) | ||
480 | return XDP_DROP; | ||
481 | off += sizeof(struct eth_hdr); | ||
482 | ip6h = data + off; | ||
483 | off += sizeof(struct ipv6hdr); | ||
484 | icmp_hdr = data + off; | ||
485 | icmp_hdr->icmp6_type = 129; | ||
486 | icmp_hdr->icmp6_cksum -= 0x0001; | ||
487 | ip6h->hop_limit = 4; | ||
488 | memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); | ||
489 | memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); | ||
490 | memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); | ||
491 | return swap_mac_and_send(data, data_end); | ||
492 | } | ||
493 | |||
494 | static __attribute__ ((noinline)) | ||
495 | int parse_icmpv6(void *data, void *data_end, __u64 off, | ||
496 | struct packet_description *pckt) | ||
497 | { | ||
498 | struct icmp6hdr *icmp_hdr; | ||
499 | struct ipv6hdr *ip6h; | ||
500 | |||
501 | icmp_hdr = data + off; | ||
502 | if (icmp_hdr + 1 > data_end) | ||
503 | return XDP_DROP; | ||
504 | if (icmp_hdr->icmp6_type == 128) | ||
505 | return send_icmp6_reply(data, data_end); | ||
506 | if (icmp_hdr->icmp6_type != 3) | ||
507 | return XDP_PASS; | ||
508 | off += sizeof(struct icmp6hdr); | ||
509 | ip6h = data + off; | ||
510 | if (ip6h + 1 > data_end) | ||
511 | return XDP_DROP; | ||
512 | pckt->flow.proto = ip6h->nexthdr; | ||
513 | pckt->flags |= (1 << 0); | ||
514 | memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); | ||
515 | memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); | ||
516 | return -1; | ||
517 | } | ||
518 | |||
519 | static __attribute__ ((noinline)) | ||
520 | int parse_icmp(void *data, void *data_end, __u64 off, | ||
521 | struct packet_description *pckt) | ||
522 | { | ||
523 | struct icmphdr *icmp_hdr; | ||
524 | struct iphdr *iph; | ||
525 | |||
526 | icmp_hdr = data + off; | ||
527 | if (icmp_hdr + 1 > data_end) | ||
528 | return XDP_DROP; | ||
529 | if (icmp_hdr->type == 8) | ||
530 | return send_icmp_reply(data, data_end); | ||
531 | if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) | ||
532 | return XDP_PASS; | ||
533 | off += sizeof(struct icmphdr); | ||
534 | iph = data + off; | ||
535 | if (iph + 1 > data_end) | ||
536 | return XDP_DROP; | ||
537 | if (iph->ihl != 5) | ||
538 | return XDP_DROP; | ||
539 | pckt->flow.proto = iph->protocol; | ||
540 | pckt->flags |= (1 << 0); | ||
541 | pckt->flow.src = iph->daddr; | ||
542 | pckt->flow.dst = iph->saddr; | ||
543 | return -1; | ||
544 | } | ||
545 | |||
546 | static __attribute__ ((noinline)) | ||
547 | __u32 get_packet_hash(struct packet_description *pckt, | ||
548 | bool hash_16bytes) | ||
549 | { | ||
550 | if (hash_16bytes) | ||
551 | return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), | ||
552 | pckt->flow.ports, 24); | ||
553 | else | ||
554 | return jhash_2words(pckt->flow.src, pckt->flow.ports, | ||
555 | 24); | ||
556 | } | ||
557 | |||
558 | __attribute__ ((noinline)) | ||
559 | static bool get_packet_dst(struct real_definition **real, | ||
560 | struct packet_description *pckt, | ||
561 | struct vip_meta *vip_info, | ||
562 | bool is_ipv6, void *lru_map) | ||
563 | { | ||
564 | struct real_pos_lru new_dst_lru = { }; | ||
565 | bool hash_16bytes = is_ipv6; | ||
566 | __u32 *real_pos, hash, key; | ||
567 | __u64 cur_time; | ||
568 | |||
569 | if (vip_info->flags & (1 << 2)) | ||
570 | hash_16bytes = 1; | ||
571 | if (vip_info->flags & (1 << 3)) { | ||
572 | pckt->flow.port16[0] = pckt->flow.port16[1]; | ||
573 | memset(pckt->flow.srcv6, 0, 16); | ||
574 | } | ||
575 | hash = get_packet_hash(pckt, hash_16bytes); | ||
576 | if (hash != 0x358459b7 /* jhash of ipv4 packet */ && | ||
577 | hash != 0x2f4bc6bb /* jhash of ipv6 packet */) | ||
578 | return 0; | ||
579 | key = 2 * vip_info->vip_num + hash % 2; | ||
580 | real_pos = bpf_map_lookup_elem(&ch_rings, &key); | ||
581 | if (!real_pos) | ||
582 | return 0; | ||
583 | key = *real_pos; | ||
584 | *real = bpf_map_lookup_elem(&reals, &key); | ||
585 | if (!(*real)) | ||
586 | return 0; | ||
587 | if (!(vip_info->flags & (1 << 1))) { | ||
588 | __u32 conn_rate_key = 512 + 2; | ||
589 | struct lb_stats *conn_rate_stats = | ||
590 | bpf_map_lookup_elem(&stats, &conn_rate_key); | ||
591 | |||
592 | if (!conn_rate_stats) | ||
593 | return 1; | ||
594 | cur_time = bpf_ktime_get_ns(); | ||
595 | if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { | ||
596 | conn_rate_stats->v1 = 1; | ||
597 | conn_rate_stats->v2 = cur_time; | ||
598 | } else { | ||
599 | conn_rate_stats->v1 += 1; | ||
600 | if (conn_rate_stats->v1 >= 1) | ||
601 | return 1; | ||
602 | } | ||
603 | if (pckt->flow.proto == IPPROTO_UDP) | ||
604 | new_dst_lru.atime = cur_time; | ||
605 | new_dst_lru.pos = key; | ||
606 | bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); | ||
607 | } | ||
608 | return 1; | ||
609 | } | ||
610 | |||
611 | __attribute__ ((noinline)) | ||
612 | static void connection_table_lookup(struct real_definition **real, | ||
613 | struct packet_description *pckt, | ||
614 | void *lru_map) | ||
615 | { | ||
616 | |||
617 | struct real_pos_lru *dst_lru; | ||
618 | __u64 cur_time; | ||
619 | __u32 key; | ||
620 | |||
621 | dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); | ||
622 | if (!dst_lru) | ||
623 | return; | ||
624 | if (pckt->flow.proto == IPPROTO_UDP) { | ||
625 | cur_time = bpf_ktime_get_ns(); | ||
626 | if (cur_time - dst_lru->atime > 300000) | ||
627 | return; | ||
628 | dst_lru->atime = cur_time; | ||
629 | } | ||
630 | key = dst_lru->pos; | ||
631 | *real = bpf_map_lookup_elem(&reals, &key); | ||
632 | } | ||
633 | |||
634 | /* don't believe your eyes! | ||
635 | * below function has 6 arguments whereas bpf and llvm allow maximum of 5 | ||
636 | * but since it's _static_ llvm can optimize one argument away | ||
637 | */ | ||
638 | __attribute__ ((noinline)) | ||
639 | static int process_l3_headers_v6(struct packet_description *pckt, | ||
640 | __u8 *protocol, __u64 off, | ||
641 | __u16 *pkt_bytes, void *data, | ||
642 | void *data_end) | ||
643 | { | ||
644 | struct ipv6hdr *ip6h; | ||
645 | __u64 iph_len; | ||
646 | int action; | ||
647 | |||
648 | ip6h = data + off; | ||
649 | if (ip6h + 1 > data_end) | ||
650 | return XDP_DROP; | ||
651 | iph_len = sizeof(struct ipv6hdr); | ||
652 | *protocol = ip6h->nexthdr; | ||
653 | pckt->flow.proto = *protocol; | ||
654 | *pkt_bytes = __builtin_bswap16(ip6h->payload_len); | ||
655 | off += iph_len; | ||
656 | if (*protocol == 45) { | ||
657 | return XDP_DROP; | ||
658 | } else if (*protocol == 59) { | ||
659 | action = parse_icmpv6(data, data_end, off, pckt); | ||
660 | if (action >= 0) | ||
661 | return action; | ||
662 | } else { | ||
663 | memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); | ||
664 | memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); | ||
665 | } | ||
666 | return -1; | ||
667 | } | ||
668 | |||
669 | __attribute__ ((noinline)) | ||
670 | static int process_l3_headers_v4(struct packet_description *pckt, | ||
671 | __u8 *protocol, __u64 off, | ||
672 | __u16 *pkt_bytes, void *data, | ||
673 | void *data_end) | ||
674 | { | ||
675 | struct iphdr *iph; | ||
676 | __u64 iph_len; | ||
677 | int action; | ||
678 | |||
679 | iph = data + off; | ||
680 | if (iph + 1 > data_end) | ||
681 | return XDP_DROP; | ||
682 | if (iph->ihl != 5) | ||
683 | return XDP_DROP; | ||
684 | *protocol = iph->protocol; | ||
685 | pckt->flow.proto = *protocol; | ||
686 | *pkt_bytes = __builtin_bswap16(iph->tot_len); | ||
687 | off += 20; | ||
688 | if (iph->frag_off & 65343) | ||
689 | return XDP_DROP; | ||
690 | if (*protocol == IPPROTO_ICMP) { | ||
691 | action = parse_icmp(data, data_end, off, pckt); | ||
692 | if (action >= 0) | ||
693 | return action; | ||
694 | } else { | ||
695 | pckt->flow.src = iph->saddr; | ||
696 | pckt->flow.dst = iph->daddr; | ||
697 | } | ||
698 | return -1; | ||
699 | } | ||
700 | |||
701 | __attribute__ ((noinline)) | ||
702 | static int process_packet(void *data, __u64 off, void *data_end, | ||
703 | bool is_ipv6, struct xdp_md *xdp) | ||
704 | { | ||
705 | |||
706 | struct real_definition *dst = NULL; | ||
707 | struct packet_description pckt = { }; | ||
708 | struct vip_definition vip = { }; | ||
709 | struct lb_stats *data_stats; | ||
710 | struct eth_hdr *eth = data; | ||
711 | void *lru_map = &lru_cache; | ||
712 | struct vip_meta *vip_info; | ||
713 | __u32 lru_stats_key = 513; | ||
714 | __u32 mac_addr_pos = 0; | ||
715 | __u32 stats_key = 512; | ||
716 | struct ctl_value *cval; | ||
717 | __u16 pkt_bytes; | ||
718 | __u64 iph_len; | ||
719 | __u8 protocol; | ||
720 | __u32 vip_num; | ||
721 | int action; | ||
722 | |||
723 | if (is_ipv6) | ||
724 | action = process_l3_headers_v6(&pckt, &protocol, off, | ||
725 | &pkt_bytes, data, data_end); | ||
726 | else | ||
727 | action = process_l3_headers_v4(&pckt, &protocol, off, | ||
728 | &pkt_bytes, data, data_end); | ||
729 | if (action >= 0) | ||
730 | return action; | ||
731 | protocol = pckt.flow.proto; | ||
732 | if (protocol == IPPROTO_TCP) { | ||
733 | if (!parse_tcp(data, data_end, is_ipv6, &pckt)) | ||
734 | return XDP_DROP; | ||
735 | } else if (protocol == IPPROTO_UDP) { | ||
736 | if (!parse_udp(data, data_end, is_ipv6, &pckt)) | ||
737 | return XDP_DROP; | ||
738 | } else { | ||
739 | return XDP_TX; | ||
740 | } | ||
741 | |||
742 | if (is_ipv6) | ||
743 | memcpy(vip.vipv6, pckt.flow.dstv6, 16); | ||
744 | else | ||
745 | vip.vip = pckt.flow.dst; | ||
746 | vip.port = pckt.flow.port16[1]; | ||
747 | vip.proto = pckt.flow.proto; | ||
748 | vip_info = bpf_map_lookup_elem(&vip_map, &vip); | ||
749 | if (!vip_info) { | ||
750 | vip.port = 0; | ||
751 | vip_info = bpf_map_lookup_elem(&vip_map, &vip); | ||
752 | if (!vip_info) | ||
753 | return XDP_PASS; | ||
754 | if (!(vip_info->flags & (1 << 4))) | ||
755 | pckt.flow.port16[1] = 0; | ||
756 | } | ||
757 | if (data_end - data > 1400) | ||
758 | return XDP_DROP; | ||
759 | data_stats = bpf_map_lookup_elem(&stats, &stats_key); | ||
760 | if (!data_stats) | ||
761 | return XDP_DROP; | ||
762 | data_stats->v1 += 1; | ||
763 | if (!dst) { | ||
764 | if (vip_info->flags & (1 << 0)) | ||
765 | pckt.flow.port16[0] = 0; | ||
766 | if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) | ||
767 | connection_table_lookup(&dst, &pckt, lru_map); | ||
768 | if (dst) | ||
769 | goto out; | ||
770 | if (pckt.flow.proto == IPPROTO_TCP) { | ||
771 | struct lb_stats *lru_stats = | ||
772 | bpf_map_lookup_elem(&stats, &lru_stats_key); | ||
773 | |||
774 | if (!lru_stats) | ||
775 | return XDP_DROP; | ||
776 | if (pckt.flags & (1 << 1)) | ||
777 | lru_stats->v1 += 1; | ||
778 | else | ||
779 | lru_stats->v2 += 1; | ||
780 | } | ||
781 | if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) | ||
782 | return XDP_DROP; | ||
783 | data_stats->v2 += 1; | ||
784 | } | ||
785 | out: | ||
786 | cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); | ||
787 | if (!cval) | ||
788 | return XDP_DROP; | ||
789 | if (dst->flags & (1 << 0)) { | ||
790 | if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) | ||
791 | return XDP_DROP; | ||
792 | } else { | ||
793 | if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) | ||
794 | return XDP_DROP; | ||
795 | } | ||
796 | vip_num = vip_info->vip_num; | ||
797 | data_stats = bpf_map_lookup_elem(&stats, &vip_num); | ||
798 | if (!data_stats) | ||
799 | return XDP_DROP; | ||
800 | data_stats->v1 += 1; | ||
801 | data_stats->v2 += pkt_bytes; | ||
802 | |||
803 | data = (void *)(long)xdp->data; | ||
804 | data_end = (void *)(long)xdp->data_end; | ||
805 | if (data + 4 > data_end) | ||
806 | return XDP_DROP; | ||
807 | *(u32 *)data = dst->dst; | ||
808 | return XDP_DROP; | ||
809 | } | ||
810 | |||
811 | __attribute__ ((section("xdp-test"), used)) | ||
812 | int balancer_ingress(struct xdp_md *ctx) | ||
813 | { | ||
814 | void *data = (void *)(long)ctx->data; | ||
815 | void *data_end = (void *)(long)ctx->data_end; | ||
816 | struct eth_hdr *eth = data; | ||
817 | __u32 eth_proto; | ||
818 | __u32 nh_off; | ||
819 | |||
820 | nh_off = sizeof(struct eth_hdr); | ||
821 | if (data + nh_off > data_end) | ||
822 | return XDP_DROP; | ||
823 | eth_proto = eth->eth_proto; | ||
824 | if (eth_proto == 8) | ||
825 | return process_packet(data, nh_off, data_end, 0, ctx); | ||
826 | else if (eth_proto == 56710) | ||
827 | return process_packet(data, nh_off, data_end, 1, ctx); | ||
828 | else | ||
829 | return XDP_DROP; | ||
830 | } | ||
831 | |||
832 | char _license[] __attribute__ ((section("license"), used)) = "GPL"; | ||
833 | int _version __attribute__ ((section("version"), used)) = 1; | ||