diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2015-04-01 20:12:13 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-04-06 16:42:35 -0400 |
commit | 91bc4822c3d61b9bb7ef66d3b77948a4f9177954 (patch) | |
tree | ca92a811b501957c1e876290d305aaf81c8d9aff | |
parent | 5888b93b750609680735d6b8b737703083ef40ff (diff) |
tc: bpf: add checksum helpers
Commit 608cd71a9c7c ("tc: bpf: generalize pedit action") has added the
possibility to mangle packet data to BPF programs in the tc pipeline.
This patch adds two helpers bpf_l3_csum_replace() and bpf_l4_csum_replace()
for fixing up the protocol checksums after the packet mangling.
It also adds 'flags' argument to bpf_skb_store_bytes() helper to avoid
unnecessary checksum recomputations when BPF programs adjusting l3/l4
checksums and documents all three helpers in uapi header.
Moreover, a sample program is added to show how BPF programs can make use
of the mangle and csum helpers.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/uapi/linux/bpf.h | 38 | ||||
-rw-r--r-- | net/core/filter.c | 108 | ||||
-rw-r--r-- | samples/bpf/Makefile | 1 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 7 | ||||
-rw-r--r-- | samples/bpf/tcbpf1_kern.c | 71 |
5 files changed, 220 insertions, 5 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0db8580f3cca..23df3e7f8e7d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -168,7 +168,43 @@ enum bpf_func_id { | |||
168 | BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ | 168 | BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ |
169 | BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ | 169 | BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ |
170 | BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ | 170 | BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ |
171 | BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */ | 171 | |
172 | /** | ||
173 | * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet | ||
174 | * @skb: pointer to skb | ||
175 | * @offset: offset within packet from skb->data | ||
176 | * @from: pointer where to copy bytes from | ||
177 | * @len: number of bytes to store into packet | ||
178 | * @flags: bit 0 - if true, recompute skb->csum | ||
179 | * other bits - reserved | ||
180 | * Return: 0 on success | ||
181 | */ | ||
182 | BPF_FUNC_skb_store_bytes, | ||
183 | |||
184 | /** | ||
185 | * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum | ||
186 | * @skb: pointer to skb | ||
187 | * @offset: offset within packet where IP checksum is located | ||
188 | * @from: old value of header field | ||
189 | * @to: new value of header field | ||
190 | * @flags: bits 0-3 - size of header field | ||
191 | * other bits - reserved | ||
192 | * Return: 0 on success | ||
193 | */ | ||
194 | BPF_FUNC_l3_csum_replace, | ||
195 | |||
196 | /** | ||
197 | * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum | ||
198 | * @skb: pointer to skb | ||
199 | * @offset: offset within packet where TCP/UDP checksum is located | ||
200 | * @from: old value of header field | ||
201 | * @to: new value of header field | ||
202 | * @flags: bits 0-3 - size of header field | ||
203 | * bit 4 - is pseudo header | ||
204 | * other bits - reserved | ||
205 | * Return: 0 on success | ||
206 | */ | ||
207 | BPF_FUNC_l4_csum_replace, | ||
172 | __BPF_FUNC_MAX_ID, | 208 | __BPF_FUNC_MAX_ID, |
173 | }; | 209 | }; |
174 | 210 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index 955a7d77decd..b669e75d2b36 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -1175,7 +1175,9 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) | |||
1175 | return 0; | 1175 | return 0; |
1176 | } | 1176 | } |
1177 | 1177 | ||
1178 | static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | 1178 | #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) |
1179 | |||
1180 | static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) | ||
1179 | { | 1181 | { |
1180 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | 1182 | struct sk_buff *skb = (struct sk_buff *) (long) r1; |
1181 | unsigned int offset = (unsigned int) r2; | 1183 | unsigned int offset = (unsigned int) r2; |
@@ -1192,7 +1194,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | |||
1192 | * | 1194 | * |
1193 | * so check for invalid 'offset' and too large 'len' | 1195 | * so check for invalid 'offset' and too large 'len' |
1194 | */ | 1196 | */ |
1195 | if (offset > 0xffff || len > sizeof(buf)) | 1197 | if (unlikely(offset > 0xffff || len > sizeof(buf))) |
1196 | return -EFAULT; | 1198 | return -EFAULT; |
1197 | 1199 | ||
1198 | if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) | 1200 | if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) |
@@ -1202,7 +1204,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | |||
1202 | if (unlikely(!ptr)) | 1204 | if (unlikely(!ptr)) |
1203 | return -EFAULT; | 1205 | return -EFAULT; |
1204 | 1206 | ||
1205 | skb_postpull_rcsum(skb, ptr, len); | 1207 | if (BPF_RECOMPUTE_CSUM(flags)) |
1208 | skb_postpull_rcsum(skb, ptr, len); | ||
1206 | 1209 | ||
1207 | memcpy(ptr, from, len); | 1210 | memcpy(ptr, from, len); |
1208 | 1211 | ||
@@ -1210,7 +1213,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | |||
1210 | /* skb_store_bits cannot return -EFAULT here */ | 1213 | /* skb_store_bits cannot return -EFAULT here */ |
1211 | skb_store_bits(skb, offset, ptr, len); | 1214 | skb_store_bits(skb, offset, ptr, len); |
1212 | 1215 | ||
1213 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 1216 | if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE) |
1214 | skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); | 1217 | skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); |
1215 | return 0; | 1218 | return 0; |
1216 | } | 1219 | } |
@@ -1223,6 +1226,99 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { | |||
1223 | .arg2_type = ARG_ANYTHING, | 1226 | .arg2_type = ARG_ANYTHING, |
1224 | .arg3_type = ARG_PTR_TO_STACK, | 1227 | .arg3_type = ARG_PTR_TO_STACK, |
1225 | .arg4_type = ARG_CONST_STACK_SIZE, | 1228 | .arg4_type = ARG_CONST_STACK_SIZE, |
1229 | .arg5_type = ARG_ANYTHING, | ||
1230 | }; | ||
1231 | |||
1232 | #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) | ||
1233 | #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) | ||
1234 | |||
1235 | static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) | ||
1236 | { | ||
1237 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | ||
1238 | __sum16 sum, *ptr; | ||
1239 | |||
1240 | if (unlikely(offset > 0xffff)) | ||
1241 | return -EFAULT; | ||
1242 | |||
1243 | if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) | ||
1244 | return -EFAULT; | ||
1245 | |||
1246 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); | ||
1247 | if (unlikely(!ptr)) | ||
1248 | return -EFAULT; | ||
1249 | |||
1250 | switch (BPF_HEADER_FIELD_SIZE(flags)) { | ||
1251 | case 2: | ||
1252 | csum_replace2(ptr, from, to); | ||
1253 | break; | ||
1254 | case 4: | ||
1255 | csum_replace4(ptr, from, to); | ||
1256 | break; | ||
1257 | default: | ||
1258 | return -EINVAL; | ||
1259 | } | ||
1260 | |||
1261 | if (ptr == &sum) | ||
1262 | /* skb_store_bits guaranteed to not return -EFAULT here */ | ||
1263 | skb_store_bits(skb, offset, ptr, sizeof(sum)); | ||
1264 | |||
1265 | return 0; | ||
1266 | } | ||
1267 | |||
1268 | const struct bpf_func_proto bpf_l3_csum_replace_proto = { | ||
1269 | .func = bpf_l3_csum_replace, | ||
1270 | .gpl_only = false, | ||
1271 | .ret_type = RET_INTEGER, | ||
1272 | .arg1_type = ARG_PTR_TO_CTX, | ||
1273 | .arg2_type = ARG_ANYTHING, | ||
1274 | .arg3_type = ARG_ANYTHING, | ||
1275 | .arg4_type = ARG_ANYTHING, | ||
1276 | .arg5_type = ARG_ANYTHING, | ||
1277 | }; | ||
1278 | |||
1279 | static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) | ||
1280 | { | ||
1281 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | ||
1282 | u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); | ||
1283 | __sum16 sum, *ptr; | ||
1284 | |||
1285 | if (unlikely(offset > 0xffff)) | ||
1286 | return -EFAULT; | ||
1287 | |||
1288 | if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) | ||
1289 | return -EFAULT; | ||
1290 | |||
1291 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); | ||
1292 | if (unlikely(!ptr)) | ||
1293 | return -EFAULT; | ||
1294 | |||
1295 | switch (BPF_HEADER_FIELD_SIZE(flags)) { | ||
1296 | case 2: | ||
1297 | inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); | ||
1298 | break; | ||
1299 | case 4: | ||
1300 | inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo); | ||
1301 | break; | ||
1302 | default: | ||
1303 | return -EINVAL; | ||
1304 | } | ||
1305 | |||
1306 | if (ptr == &sum) | ||
1307 | /* skb_store_bits guaranteed to not return -EFAULT here */ | ||
1308 | skb_store_bits(skb, offset, ptr, sizeof(sum)); | ||
1309 | |||
1310 | return 0; | ||
1311 | } | ||
1312 | |||
1313 | const struct bpf_func_proto bpf_l4_csum_replace_proto = { | ||
1314 | .func = bpf_l4_csum_replace, | ||
1315 | .gpl_only = false, | ||
1316 | .ret_type = RET_INTEGER, | ||
1317 | .arg1_type = ARG_PTR_TO_CTX, | ||
1318 | .arg2_type = ARG_ANYTHING, | ||
1319 | .arg3_type = ARG_ANYTHING, | ||
1320 | .arg4_type = ARG_ANYTHING, | ||
1321 | .arg5_type = ARG_ANYTHING, | ||
1226 | }; | 1322 | }; |
1227 | 1323 | ||
1228 | static const struct bpf_func_proto * | 1324 | static const struct bpf_func_proto * |
@@ -1250,6 +1346,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) | |||
1250 | switch (func_id) { | 1346 | switch (func_id) { |
1251 | case BPF_FUNC_skb_store_bytes: | 1347 | case BPF_FUNC_skb_store_bytes: |
1252 | return &bpf_skb_store_bytes_proto; | 1348 | return &bpf_skb_store_bytes_proto; |
1349 | case BPF_FUNC_l3_csum_replace: | ||
1350 | return &bpf_l3_csum_replace_proto; | ||
1351 | case BPF_FUNC_l4_csum_replace: | ||
1352 | return &bpf_l4_csum_replace_proto; | ||
1253 | default: | 1353 | default: |
1254 | return sk_filter_func_proto(func_id); | 1354 | return sk_filter_func_proto(func_id); |
1255 | } | 1355 | } |
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b5b3600dcdf5..d24f51bca465 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
@@ -17,6 +17,7 @@ sockex2-objs := bpf_load.o libbpf.o sockex2_user.o | |||
17 | always := $(hostprogs-y) | 17 | always := $(hostprogs-y) |
18 | always += sockex1_kern.o | 18 | always += sockex1_kern.o |
19 | always += sockex2_kern.o | 19 | always += sockex2_kern.o |
20 | always += tcbpf1_kern.o | ||
20 | 21 | ||
21 | HOSTCFLAGS += -I$(objtree)/usr/include | 22 | HOSTCFLAGS += -I$(objtree)/usr/include |
22 | 23 | ||
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index ca0333146006..72540ec1f003 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h | |||
@@ -37,4 +37,11 @@ struct bpf_map_def { | |||
37 | unsigned int max_entries; | 37 | unsigned int max_entries; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = | ||
41 | (void *) BPF_FUNC_skb_store_bytes; | ||
42 | static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = | ||
43 | (void *) BPF_FUNC_l3_csum_replace; | ||
44 | static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = | ||
45 | (void *) BPF_FUNC_l4_csum_replace; | ||
46 | |||
40 | #endif | 47 | #endif |
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c new file mode 100644 index 000000000000..7cf3f42a6e39 --- /dev/null +++ b/samples/bpf/tcbpf1_kern.c | |||
@@ -0,0 +1,71 @@ | |||
1 | #include <uapi/linux/bpf.h> | ||
2 | #include <uapi/linux/if_ether.h> | ||
3 | #include <uapi/linux/if_packet.h> | ||
4 | #include <uapi/linux/ip.h> | ||
5 | #include <uapi/linux/in.h> | ||
6 | #include <uapi/linux/tcp.h> | ||
7 | #include "bpf_helpers.h" | ||
8 | |||
9 | /* compiler workaround */ | ||
10 | #define _htonl __builtin_bswap32 | ||
11 | |||
12 | static inline void set_dst_mac(struct __sk_buff *skb, char *mac) | ||
13 | { | ||
14 | bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1); | ||
15 | } | ||
16 | |||
17 | /* use 1 below for ingress qdisc and 0 for egress */ | ||
18 | #if 0 | ||
19 | #undef ETH_HLEN | ||
20 | #define ETH_HLEN 0 | ||
21 | #endif | ||
22 | |||
23 | #define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check)) | ||
24 | #define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos)) | ||
25 | |||
26 | static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) | ||
27 | { | ||
28 | __u8 old_tos = load_byte(skb, TOS_OFF); | ||
29 | |||
30 | bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2); | ||
31 | bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0); | ||
32 | } | ||
33 | |||
34 | #define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check)) | ||
35 | #define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr)) | ||
36 | |||
37 | #define IS_PSEUDO 0x10 | ||
38 | |||
39 | static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) | ||
40 | { | ||
41 | __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF)); | ||
42 | |||
43 | bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip)); | ||
44 | bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip)); | ||
45 | bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0); | ||
46 | } | ||
47 | |||
48 | #define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest)) | ||
49 | static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) | ||
50 | { | ||
51 | __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF)); | ||
52 | |||
53 | bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port)); | ||
54 | bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0); | ||
55 | } | ||
56 | |||
57 | SEC("classifier") | ||
58 | int bpf_prog1(struct __sk_buff *skb) | ||
59 | { | ||
60 | __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); | ||
61 | long *value; | ||
62 | |||
63 | if (proto == IPPROTO_TCP) { | ||
64 | set_ip_tos(skb, 8); | ||
65 | set_tcp_ip_src(skb, 0xA010101); | ||
66 | set_tcp_dest_port(skb, 5001); | ||
67 | } | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | char _license[] SEC("license") = "GPL"; | ||