aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2018-10-19 22:56:49 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-10-20 15:37:11 -0400
commit6fff607e2f14bd7c63c06c464a6f93b8efbabe28 (patch)
tree3ac21ecddcd5903aa5eaecb5ba54d2434ccf5184
parent5032d079909d1ac5c2535acc32d5f01cd245d8ea (diff)
bpf: sk_msg program helper bpf_msg_push_data
This allows user to push data into a msg using sk_msg program types. The format is as follows, bpf_msg_push_data(msg, offset, len, flags) this will insert 'len' bytes at offset 'offset'. For example to prepend 10 bytes at the front of the message the user can, bpf_msg_push_data(msg, 0, 10, 0); This will invalidate data bounds so BPF user will have to then recheck data bounds after calling this. After this the msg size will have been updated and the user is free to write into the added bytes. We allow any offset/len as long as it is within the (data, data_end) range. However, a copy will be required if the ring is full and its possible for the helper to fail with ENOMEM or EINVAL errors which need to be handled by the BPF program. This can be used similar to XDP metadata to pass data between sk_msg layer and lower layers. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/linux/skmsg.h5
-rw-r--r--include/uapi/linux/bpf.h20
-rw-r--r--net/core/filter.c134
3 files changed, 158 insertions, 1 deletions
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 84e18863f6a4..2a11e9d91dfa 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -207,6 +207,11 @@ static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
207 return &msg->sg.data[which]; 207 return &msg->sg.data[which];
208} 208}
209 209
210static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which)
211{
212 return msg->sg.data[which];
213}
214
210static inline struct page *sk_msg_page(struct sk_msg *msg, int which) 215static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
211{ 216{
212 return sg_page(sk_msg_elem(msg, which)); 217 return sg_page(sk_msg_elem(msg, which));
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a2fb333290dc..852dc17ab47a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2240,6 +2240,23 @@ union bpf_attr {
2240 * pointer that was returned from bpf_sk_lookup_xxx\ (). 2240 * pointer that was returned from bpf_sk_lookup_xxx\ ().
2241 * Return 2241 * Return
2242 * 0 on success, or a negative error in case of failure. 2242 * 0 on success, or a negative error in case of failure.
2243 *
2244 * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
2245 * Description
2246 * For socket policies, insert *len* bytes into msg at offset
2247 * *start*.
2248 *
2249 * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
2250 * *msg* it may want to insert metadata or options into the msg.
2251 * This can later be read and used by any of the lower layer BPF
2252 * hooks.
2253 *
2254 * This helper may fail if under memory pressure (a malloc
2255 * fails) in these cases BPF programs will get an appropriate
2256 * error and BPF programs will need to handle them.
2257 *
2258 * Return
2259 * 0 on success, or a negative error in case of failure.
2243 */ 2260 */
2244#define __BPF_FUNC_MAPPER(FN) \ 2261#define __BPF_FUNC_MAPPER(FN) \
2245 FN(unspec), \ 2262 FN(unspec), \
@@ -2331,7 +2348,8 @@ union bpf_attr {
2331 FN(sk_release), \ 2348 FN(sk_release), \
2332 FN(map_push_elem), \ 2349 FN(map_push_elem), \
2333 FN(map_pop_elem), \ 2350 FN(map_pop_elem), \
2334 FN(map_peek_elem), 2351 FN(map_peek_elem), \
2352 FN(msg_push_data),
2335 2353
2336/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2354/* integer value in 'imm' field of BPF_CALL instruction selects which helper
2337 * function eBPF program intends to call 2355 * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 5fd5139e8638..35c6933c2622 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2297,6 +2297,137 @@ static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2297 .arg4_type = ARG_ANYTHING, 2297 .arg4_type = ARG_ANYTHING,
2298}; 2298};
2299 2299
2300BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2301 u32, len, u64, flags)
2302{
2303 struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
2304 u32 new, i = 0, l, space, copy = 0, offset = 0;
2305 u8 *raw, *to, *from;
2306 struct page *page;
2307
2308 if (unlikely(flags))
2309 return -EINVAL;
2310
2311 /* First find the starting scatterlist element */
2312 i = msg->sg.start;
2313 do {
2314 l = sk_msg_elem(msg, i)->length;
2315
2316 if (start < offset + l)
2317 break;
2318 offset += l;
2319 sk_msg_iter_var_next(i);
2320 } while (i != msg->sg.end);
2321
2322 if (start >= offset + l)
2323 return -EINVAL;
2324
2325 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2326
2327 /* If no space available will fallback to copy, we need at
2328 * least one scatterlist elem available to push data into
2329 * when start aligns to the beginning of an element or two
2330 * when it falls inside an element. We handle the start equals
2331 * offset case because its the common case for inserting a
2332 * header.
2333 */
2334 if (!space || (space == 1 && start != offset))
2335 copy = msg->sg.data[i].length;
2336
2337 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2338 get_order(copy + len));
2339 if (unlikely(!page))
2340 return -ENOMEM;
2341
2342 if (copy) {
2343 int front, back;
2344
2345 raw = page_address(page);
2346
2347 psge = sk_msg_elem(msg, i);
2348 front = start - offset;
2349 back = psge->length - front;
2350 from = sg_virt(psge);
2351
2352 if (front)
2353 memcpy(raw, from, front);
2354
2355 if (back) {
2356 from += front;
2357 to = raw + front + len;
2358
2359 memcpy(to, from, back);
2360 }
2361
2362 put_page(sg_page(psge));
2363 } else if (start - offset) {
2364 psge = sk_msg_elem(msg, i);
2365 rsge = sk_msg_elem_cpy(msg, i);
2366
2367 psge->length = start - offset;
2368 rsge.length -= psge->length;
2369 rsge.offset += start;
2370
2371 sk_msg_iter_var_next(i);
2372 sg_unmark_end(psge);
2373 sk_msg_iter_next(msg, end);
2374 }
2375
2376 /* Slot(s) to place newly allocated data */
2377 new = i;
2378
2379 /* Shift one or two slots as needed */
2380 if (!copy) {
2381 sge = sk_msg_elem_cpy(msg, i);
2382
2383 sk_msg_iter_var_next(i);
2384 sg_unmark_end(&sge);
2385 sk_msg_iter_next(msg, end);
2386
2387 nsge = sk_msg_elem_cpy(msg, i);
2388 if (rsge.length) {
2389 sk_msg_iter_var_next(i);
2390 nnsge = sk_msg_elem_cpy(msg, i);
2391 }
2392
2393 while (i != msg->sg.end) {
2394 msg->sg.data[i] = sge;
2395 sge = nsge;
2396 sk_msg_iter_var_next(i);
2397 if (rsge.length) {
2398 nsge = nnsge;
2399 nnsge = sk_msg_elem_cpy(msg, i);
2400 } else {
2401 nsge = sk_msg_elem_cpy(msg, i);
2402 }
2403 }
2404 }
2405
2406 /* Place newly allocated data buffer */
2407 sk_mem_charge(msg->sk, len);
2408 msg->sg.size += len;
2409 msg->sg.copy[new] = false;
2410 sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2411 if (rsge.length) {
2412 get_page(sg_page(&rsge));
2413 sk_msg_iter_var_next(new);
2414 msg->sg.data[new] = rsge;
2415 }
2416
2417 sk_msg_compute_data_pointers(msg);
2418 return 0;
2419}
2420
2421static const struct bpf_func_proto bpf_msg_push_data_proto = {
2422 .func = bpf_msg_push_data,
2423 .gpl_only = false,
2424 .ret_type = RET_INTEGER,
2425 .arg1_type = ARG_PTR_TO_CTX,
2426 .arg2_type = ARG_ANYTHING,
2427 .arg3_type = ARG_ANYTHING,
2428 .arg4_type = ARG_ANYTHING,
2429};
2430
2300BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) 2431BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
2301{ 2432{
2302 return task_get_classid(skb); 2433 return task_get_classid(skb);
@@ -4854,6 +4985,7 @@ bool bpf_helper_changes_pkt_data(void *func)
4854 func == bpf_xdp_adjust_head || 4985 func == bpf_xdp_adjust_head ||
4855 func == bpf_xdp_adjust_meta || 4986 func == bpf_xdp_adjust_meta ||
4856 func == bpf_msg_pull_data || 4987 func == bpf_msg_pull_data ||
4988 func == bpf_msg_push_data ||
4857 func == bpf_xdp_adjust_tail || 4989 func == bpf_xdp_adjust_tail ||
4858#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 4990#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
4859 func == bpf_lwt_seg6_store_bytes || 4991 func == bpf_lwt_seg6_store_bytes ||
@@ -5130,6 +5262,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5130 return &bpf_msg_cork_bytes_proto; 5262 return &bpf_msg_cork_bytes_proto;
5131 case BPF_FUNC_msg_pull_data: 5263 case BPF_FUNC_msg_pull_data:
5132 return &bpf_msg_pull_data_proto; 5264 return &bpf_msg_pull_data_proto;
5265 case BPF_FUNC_msg_push_data:
5266 return &bpf_msg_push_data_proto;
5133 case BPF_FUNC_get_local_storage: 5267 case BPF_FUNC_get_local_storage:
5134 return &bpf_get_local_storage_proto; 5268 return &bpf_get_local_storage_proto;
5135 default: 5269 default: