diff options
Diffstat (limited to 'kernel/bpf/sockmap.c')
-rw-r--r-- | kernel/bpf/sockmap.c | 99 |
1 files changed, 73 insertions, 26 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index a3b21385e947..098eca568c2b 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <net/tcp.h> | 43 | #include <net/tcp.h> |
44 | #include <linux/ptr_ring.h> | 44 | #include <linux/ptr_ring.h> |
45 | #include <net/inet_common.h> | 45 | #include <net/inet_common.h> |
46 | #include <linux/sched/signal.h> | ||
46 | 47 | ||
47 | #define SOCK_CREATE_FLAG_MASK \ | 48 | #define SOCK_CREATE_FLAG_MASK \ |
48 | (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) | 49 | (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) |
@@ -325,6 +326,9 @@ retry: | |||
325 | if (ret > 0) { | 326 | if (ret > 0) { |
326 | if (apply) | 327 | if (apply) |
327 | apply_bytes -= ret; | 328 | apply_bytes -= ret; |
329 | |||
330 | sg->offset += ret; | ||
331 | sg->length -= ret; | ||
328 | size -= ret; | 332 | size -= ret; |
329 | offset += ret; | 333 | offset += ret; |
330 | if (uncharge) | 334 | if (uncharge) |
@@ -332,8 +336,6 @@ retry: | |||
332 | goto retry; | 336 | goto retry; |
333 | } | 337 | } |
334 | 338 | ||
335 | sg->length = size; | ||
336 | sg->offset = offset; | ||
337 | return ret; | 339 | return ret; |
338 | } | 340 | } |
339 | 341 | ||
@@ -391,7 +393,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) | |||
391 | } while (i != md->sg_end); | 393 | } while (i != md->sg_end); |
392 | } | 394 | } |
393 | 395 | ||
394 | static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) | 396 | static void free_bytes_sg(struct sock *sk, int bytes, |
397 | struct sk_msg_buff *md, bool charge) | ||
395 | { | 398 | { |
396 | struct scatterlist *sg = md->sg_data; | 399 | struct scatterlist *sg = md->sg_data; |
397 | int i = md->sg_start, free; | 400 | int i = md->sg_start, free; |
@@ -401,11 +404,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) | |||
401 | if (bytes < free) { | 404 | if (bytes < free) { |
402 | sg[i].length -= bytes; | 405 | sg[i].length -= bytes; |
403 | sg[i].offset += bytes; | 406 | sg[i].offset += bytes; |
404 | sk_mem_uncharge(sk, bytes); | 407 | if (charge) |
408 | sk_mem_uncharge(sk, bytes); | ||
405 | break; | 409 | break; |
406 | } | 410 | } |
407 | 411 | ||
408 | sk_mem_uncharge(sk, sg[i].length); | 412 | if (charge) |
413 | sk_mem_uncharge(sk, sg[i].length); | ||
409 | put_page(sg_page(&sg[i])); | 414 | put_page(sg_page(&sg[i])); |
410 | bytes -= sg[i].length; | 415 | bytes -= sg[i].length; |
411 | sg[i].length = 0; | 416 | sg[i].length = 0; |
@@ -416,6 +421,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) | |||
416 | if (i == MAX_SKB_FRAGS) | 421 | if (i == MAX_SKB_FRAGS) |
417 | i = 0; | 422 | i = 0; |
418 | } | 423 | } |
424 | md->sg_start = i; | ||
419 | } | 425 | } |
420 | 426 | ||
421 | static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) | 427 | static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) |
@@ -523,8 +529,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, | |||
523 | i = md->sg_start; | 529 | i = md->sg_start; |
524 | 530 | ||
525 | do { | 531 | do { |
526 | r->sg_data[i] = md->sg_data[i]; | ||
527 | |||
528 | size = (apply && apply_bytes < md->sg_data[i].length) ? | 532 | size = (apply && apply_bytes < md->sg_data[i].length) ? |
529 | apply_bytes : md->sg_data[i].length; | 533 | apply_bytes : md->sg_data[i].length; |
530 | 534 | ||
@@ -535,6 +539,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, | |||
535 | } | 539 | } |
536 | 540 | ||
537 | sk_mem_charge(sk, size); | 541 | sk_mem_charge(sk, size); |
542 | r->sg_data[i] = md->sg_data[i]; | ||
538 | r->sg_data[i].length = size; | 543 | r->sg_data[i].length = size; |
539 | md->sg_data[i].length -= size; | 544 | md->sg_data[i].length -= size; |
540 | md->sg_data[i].offset += size; | 545 | md->sg_data[i].offset += size; |
@@ -575,10 +580,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, | |||
575 | struct sk_msg_buff *md, | 580 | struct sk_msg_buff *md, |
576 | int flags) | 581 | int flags) |
577 | { | 582 | { |
583 | bool ingress = !!(md->flags & BPF_F_INGRESS); | ||
578 | struct smap_psock *psock; | 584 | struct smap_psock *psock; |
579 | struct scatterlist *sg; | 585 | struct scatterlist *sg; |
580 | int i, err, free = 0; | 586 | int err = 0; |
581 | bool ingress = !!(md->flags & BPF_F_INGRESS); | ||
582 | 587 | ||
583 | sg = md->sg_data; | 588 | sg = md->sg_data; |
584 | 589 | ||
@@ -606,16 +611,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, | |||
606 | out_rcu: | 611 | out_rcu: |
607 | rcu_read_unlock(); | 612 | rcu_read_unlock(); |
608 | out: | 613 | out: |
609 | i = md->sg_start; | 614 | free_bytes_sg(NULL, send, md, false); |
610 | while (sg[i].length) { | 615 | return err; |
611 | free += sg[i].length; | ||
612 | put_page(sg_page(&sg[i])); | ||
613 | sg[i].length = 0; | ||
614 | i++; | ||
615 | if (i == MAX_SKB_FRAGS) | ||
616 | i = 0; | ||
617 | } | ||
618 | return free; | ||
619 | } | 616 | } |
620 | 617 | ||
621 | static inline void bpf_md_init(struct smap_psock *psock) | 618 | static inline void bpf_md_init(struct smap_psock *psock) |
@@ -700,19 +697,26 @@ more_data: | |||
700 | err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags); | 697 | err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags); |
701 | lock_sock(sk); | 698 | lock_sock(sk); |
702 | 699 | ||
700 | if (unlikely(err < 0)) { | ||
701 | free_start_sg(sk, m); | ||
702 | psock->sg_size = 0; | ||
703 | if (!cork) | ||
704 | *copied -= send; | ||
705 | } else { | ||
706 | psock->sg_size -= send; | ||
707 | } | ||
708 | |||
703 | if (cork) { | 709 | if (cork) { |
704 | free_start_sg(sk, m); | 710 | free_start_sg(sk, m); |
711 | psock->sg_size = 0; | ||
705 | kfree(m); | 712 | kfree(m); |
706 | m = NULL; | 713 | m = NULL; |
714 | err = 0; | ||
707 | } | 715 | } |
708 | if (unlikely(err)) | ||
709 | *copied -= err; | ||
710 | else | ||
711 | psock->sg_size -= send; | ||
712 | break; | 716 | break; |
713 | case __SK_DROP: | 717 | case __SK_DROP: |
714 | default: | 718 | default: |
715 | free_bytes_sg(sk, send, m); | 719 | free_bytes_sg(sk, send, m, true); |
716 | apply_bytes_dec(psock, send); | 720 | apply_bytes_dec(psock, send); |
717 | *copied -= send; | 721 | *copied -= send; |
718 | psock->sg_size -= send; | 722 | psock->sg_size -= send; |
@@ -732,6 +736,26 @@ out_err: | |||
732 | return err; | 736 | return err; |
733 | } | 737 | } |
734 | 738 | ||
739 | static int bpf_wait_data(struct sock *sk, | ||
740 | struct smap_psock *psk, int flags, | ||
741 | long timeo, int *err) | ||
742 | { | ||
743 | int rc; | ||
744 | |||
745 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | ||
746 | |||
747 | add_wait_queue(sk_sleep(sk), &wait); | ||
748 | sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); | ||
749 | rc = sk_wait_event(sk, &timeo, | ||
750 | !list_empty(&psk->ingress) || | ||
751 | !skb_queue_empty(&sk->sk_receive_queue), | ||
752 | &wait); | ||
753 | sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); | ||
754 | remove_wait_queue(sk_sleep(sk), &wait); | ||
755 | |||
756 | return rc; | ||
757 | } | ||
758 | |||
735 | static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | 759 | static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, |
736 | int nonblock, int flags, int *addr_len) | 760 | int nonblock, int flags, int *addr_len) |
737 | { | 761 | { |
@@ -755,6 +779,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | |||
755 | return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); | 779 | return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); |
756 | 780 | ||
757 | lock_sock(sk); | 781 | lock_sock(sk); |
782 | bytes_ready: | ||
758 | while (copied != len) { | 783 | while (copied != len) { |
759 | struct scatterlist *sg; | 784 | struct scatterlist *sg; |
760 | struct sk_msg_buff *md; | 785 | struct sk_msg_buff *md; |
@@ -809,6 +834,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | |||
809 | } | 834 | } |
810 | } | 835 | } |
811 | 836 | ||
837 | if (!copied) { | ||
838 | long timeo; | ||
839 | int data; | ||
840 | int err = 0; | ||
841 | |||
842 | timeo = sock_rcvtimeo(sk, nonblock); | ||
843 | data = bpf_wait_data(sk, psock, flags, timeo, &err); | ||
844 | |||
845 | if (data) { | ||
846 | if (!skb_queue_empty(&sk->sk_receive_queue)) { | ||
847 | release_sock(sk); | ||
848 | smap_release_sock(psock, sk); | ||
849 | copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); | ||
850 | return copied; | ||
851 | } | ||
852 | goto bytes_ready; | ||
853 | } | ||
854 | |||
855 | if (err) | ||
856 | copied = err; | ||
857 | } | ||
858 | |||
812 | release_sock(sk); | 859 | release_sock(sk); |
813 | smap_release_sock(psock, sk); | 860 | smap_release_sock(psock, sk); |
814 | return copied; | 861 | return copied; |
@@ -1831,7 +1878,7 @@ static int sock_map_update_elem(struct bpf_map *map, | |||
1831 | return err; | 1878 | return err; |
1832 | } | 1879 | } |
1833 | 1880 | ||
1834 | static void sock_map_release(struct bpf_map *map, struct file *map_file) | 1881 | static void sock_map_release(struct bpf_map *map) |
1835 | { | 1882 | { |
1836 | struct bpf_stab *stab = container_of(map, struct bpf_stab, map); | 1883 | struct bpf_stab *stab = container_of(map, struct bpf_stab, map); |
1837 | struct bpf_prog *orig; | 1884 | struct bpf_prog *orig; |
@@ -1855,7 +1902,7 @@ const struct bpf_map_ops sock_map_ops = { | |||
1855 | .map_get_next_key = sock_map_get_next_key, | 1902 | .map_get_next_key = sock_map_get_next_key, |
1856 | .map_update_elem = sock_map_update_elem, | 1903 | .map_update_elem = sock_map_update_elem, |
1857 | .map_delete_elem = sock_map_delete_elem, | 1904 | .map_delete_elem = sock_map_delete_elem, |
1858 | .map_release = sock_map_release, | 1905 | .map_release_uref = sock_map_release, |
1859 | }; | 1906 | }; |
1860 | 1907 | ||
1861 | BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, | 1908 | BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, |