diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 99 |
1 files changed, 64 insertions, 35 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 995a2259bcfc..46efa03d2b11 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -252,6 +252,7 @@ | |||
252 | #include <linux/types.h> | 252 | #include <linux/types.h> |
253 | #include <linux/fcntl.h> | 253 | #include <linux/fcntl.h> |
254 | #include <linux/poll.h> | 254 | #include <linux/poll.h> |
255 | #include <linux/inet_diag.h> | ||
255 | #include <linux/init.h> | 256 | #include <linux/init.h> |
256 | #include <linux/fs.h> | 257 | #include <linux/fs.h> |
257 | #include <linux/skbuff.h> | 258 | #include <linux/skbuff.h> |
@@ -496,7 +497,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
496 | 497 | ||
497 | /* Connected or passive Fast Open socket? */ | 498 | /* Connected or passive Fast Open socket? */ |
498 | if (sk->sk_state != TCP_SYN_SENT && | 499 | if (sk->sk_state != TCP_SYN_SENT && |
499 | (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { | 500 | (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) { |
500 | int target = sock_rcvlowat(sk, 0, INT_MAX); | 501 | int target = sock_rcvlowat(sk, 0, INT_MAX); |
501 | 502 | ||
502 | if (tp->urg_seq == tp->copied_seq && | 503 | if (tp->urg_seq == tp->copied_seq && |
@@ -520,8 +521,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
520 | 521 | ||
521 | /* Race breaker. If space is freed after | 522 | /* Race breaker. If space is freed after |
522 | * wspace test but before the flags are set, | 523 | * wspace test but before the flags are set, |
523 | * IO signal will be lost. | 524 | * IO signal will be lost. Memory barrier |
525 | * pairs with the input side. | ||
524 | */ | 526 | */ |
527 | smp_mb__after_atomic(); | ||
525 | if (sk_stream_is_writeable(sk)) | 528 | if (sk_stream_is_writeable(sk)) |
526 | mask |= POLLOUT | POLLWRNORM; | 529 | mask |= POLLOUT | POLLWRNORM; |
527 | } | 530 | } |
@@ -1028,7 +1031,7 @@ static inline int select_size(const struct sock *sk, bool sg) | |||
1028 | 1031 | ||
1029 | void tcp_free_fastopen_req(struct tcp_sock *tp) | 1032 | void tcp_free_fastopen_req(struct tcp_sock *tp) |
1030 | { | 1033 | { |
1031 | if (tp->fastopen_req != NULL) { | 1034 | if (tp->fastopen_req) { |
1032 | kfree(tp->fastopen_req); | 1035 | kfree(tp->fastopen_req); |
1033 | tp->fastopen_req = NULL; | 1036 | tp->fastopen_req = NULL; |
1034 | } | 1037 | } |
@@ -1042,12 +1045,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, | |||
1042 | 1045 | ||
1043 | if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) | 1046 | if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) |
1044 | return -EOPNOTSUPP; | 1047 | return -EOPNOTSUPP; |
1045 | if (tp->fastopen_req != NULL) | 1048 | if (tp->fastopen_req) |
1046 | return -EALREADY; /* Another Fast Open is in progress */ | 1049 | return -EALREADY; /* Another Fast Open is in progress */ |
1047 | 1050 | ||
1048 | tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), | 1051 | tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), |
1049 | sk->sk_allocation); | 1052 | sk->sk_allocation); |
1050 | if (unlikely(tp->fastopen_req == NULL)) | 1053 | if (unlikely(!tp->fastopen_req)) |
1051 | return -ENOBUFS; | 1054 | return -ENOBUFS; |
1052 | tp->fastopen_req->data = msg; | 1055 | tp->fastopen_req->data = msg; |
1053 | tp->fastopen_req->size = size; | 1056 | tp->fastopen_req->size = size; |
@@ -1060,8 +1063,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, | |||
1060 | return err; | 1063 | return err; |
1061 | } | 1064 | } |
1062 | 1065 | ||
1063 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 1066 | int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) |
1064 | size_t size) | ||
1065 | { | 1067 | { |
1066 | struct tcp_sock *tp = tcp_sk(sk); | 1068 | struct tcp_sock *tp = tcp_sk(sk); |
1067 | struct sk_buff *skb; | 1069 | struct sk_buff *skb; |
@@ -1120,7 +1122,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1120 | 1122 | ||
1121 | sg = !!(sk->sk_route_caps & NETIF_F_SG); | 1123 | sg = !!(sk->sk_route_caps & NETIF_F_SG); |
1122 | 1124 | ||
1123 | while (iov_iter_count(&msg->msg_iter)) { | 1125 | while (msg_data_left(msg)) { |
1124 | int copy = 0; | 1126 | int copy = 0; |
1125 | int max = size_goal; | 1127 | int max = size_goal; |
1126 | 1128 | ||
@@ -1164,8 +1166,8 @@ new_segment: | |||
1164 | } | 1166 | } |
1165 | 1167 | ||
1166 | /* Try to append data to the end of skb. */ | 1168 | /* Try to append data to the end of skb. */ |
1167 | if (copy > iov_iter_count(&msg->msg_iter)) | 1169 | if (copy > msg_data_left(msg)) |
1168 | copy = iov_iter_count(&msg->msg_iter); | 1170 | copy = msg_data_left(msg); |
1169 | 1171 | ||
1170 | /* Where to copy to? */ | 1172 | /* Where to copy to? */ |
1171 | if (skb_availroom(skb) > 0) { | 1173 | if (skb_availroom(skb) > 0) { |
@@ -1222,7 +1224,7 @@ new_segment: | |||
1222 | tcp_skb_pcount_set(skb, 0); | 1224 | tcp_skb_pcount_set(skb, 0); |
1223 | 1225 | ||
1224 | copied += copy; | 1226 | copied += copy; |
1225 | if (!iov_iter_count(&msg->msg_iter)) { | 1227 | if (!msg_data_left(msg)) { |
1226 | tcp_tx_timestamp(sk, skb); | 1228 | tcp_tx_timestamp(sk, skb); |
1227 | goto out; | 1229 | goto out; |
1228 | } | 1230 | } |
@@ -1539,8 +1541,8 @@ EXPORT_SYMBOL(tcp_read_sock); | |||
1539 | * Probably, code can be easily improved even more. | 1541 | * Probably, code can be easily improved even more. |
1540 | */ | 1542 | */ |
1541 | 1543 | ||
1542 | int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 1544 | int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, |
1543 | size_t len, int nonblock, int flags, int *addr_len) | 1545 | int flags, int *addr_len) |
1544 | { | 1546 | { |
1545 | struct tcp_sock *tp = tcp_sk(sk); | 1547 | struct tcp_sock *tp = tcp_sk(sk); |
1546 | int copied = 0; | 1548 | int copied = 0; |
@@ -1914,18 +1916,19 @@ EXPORT_SYMBOL_GPL(tcp_set_state); | |||
1914 | 1916 | ||
1915 | static const unsigned char new_state[16] = { | 1917 | static const unsigned char new_state[16] = { |
1916 | /* current state: new state: action: */ | 1918 | /* current state: new state: action: */ |
1917 | /* (Invalid) */ TCP_CLOSE, | 1919 | [0 /* (Invalid) */] = TCP_CLOSE, |
1918 | /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, | 1920 | [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, |
1919 | /* TCP_SYN_SENT */ TCP_CLOSE, | 1921 | [TCP_SYN_SENT] = TCP_CLOSE, |
1920 | /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, | 1922 | [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, |
1921 | /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, | 1923 | [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, |
1922 | /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, | 1924 | [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, |
1923 | /* TCP_TIME_WAIT */ TCP_CLOSE, | 1925 | [TCP_TIME_WAIT] = TCP_CLOSE, |
1924 | /* TCP_CLOSE */ TCP_CLOSE, | 1926 | [TCP_CLOSE] = TCP_CLOSE, |
1925 | /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, | 1927 | [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, |
1926 | /* TCP_LAST_ACK */ TCP_LAST_ACK, | 1928 | [TCP_LAST_ACK] = TCP_LAST_ACK, |
1927 | /* TCP_LISTEN */ TCP_CLOSE, | 1929 | [TCP_LISTEN] = TCP_CLOSE, |
1928 | /* TCP_CLOSING */ TCP_CLOSING, | 1930 | [TCP_CLOSING] = TCP_CLOSING, |
1931 | [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ | ||
1929 | }; | 1932 | }; |
1930 | 1933 | ||
1931 | static int tcp_close_state(struct sock *sk) | 1934 | static int tcp_close_state(struct sock *sk) |
@@ -2138,7 +2141,7 @@ adjudge_to_death: | |||
2138 | * aborted (e.g., closed with unread data) before 3WHS | 2141 | * aborted (e.g., closed with unread data) before 3WHS |
2139 | * finishes. | 2142 | * finishes. |
2140 | */ | 2143 | */ |
2141 | if (req != NULL) | 2144 | if (req) |
2142 | reqsk_fastopen_remove(sk, req, false); | 2145 | reqsk_fastopen_remove(sk, req, false); |
2143 | inet_csk_destroy_sock(sk); | 2146 | inet_csk_destroy_sock(sk); |
2144 | } | 2147 | } |
@@ -2590,11 +2593,12 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); | |||
2590 | #endif | 2593 | #endif |
2591 | 2594 | ||
2592 | /* Return information about state of tcp endpoint in API format. */ | 2595 | /* Return information about state of tcp endpoint in API format. */ |
2593 | void tcp_get_info(const struct sock *sk, struct tcp_info *info) | 2596 | void tcp_get_info(struct sock *sk, struct tcp_info *info) |
2594 | { | 2597 | { |
2595 | const struct tcp_sock *tp = tcp_sk(sk); | 2598 | const struct tcp_sock *tp = tcp_sk(sk); |
2596 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2599 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2597 | u32 now = tcp_time_stamp; | 2600 | u32 now = tcp_time_stamp; |
2601 | u32 rate; | ||
2598 | 2602 | ||
2599 | memset(info, 0, sizeof(*info)); | 2603 | memset(info, 0, sizeof(*info)); |
2600 | 2604 | ||
@@ -2655,10 +2659,16 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) | |||
2655 | 2659 | ||
2656 | info->tcpi_total_retrans = tp->total_retrans; | 2660 | info->tcpi_total_retrans = tp->total_retrans; |
2657 | 2661 | ||
2658 | info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ? | 2662 | rate = READ_ONCE(sk->sk_pacing_rate); |
2659 | sk->sk_pacing_rate : ~0ULL; | 2663 | info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; |
2660 | info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ? | 2664 | |
2661 | sk->sk_max_pacing_rate : ~0ULL; | 2665 | rate = READ_ONCE(sk->sk_max_pacing_rate); |
2666 | info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; | ||
2667 | |||
2668 | spin_lock_bh(&sk->sk_lock.slock); | ||
2669 | info->tcpi_bytes_acked = tp->bytes_acked; | ||
2670 | info->tcpi_bytes_received = tp->bytes_received; | ||
2671 | spin_unlock_bh(&sk->sk_lock.slock); | ||
2662 | } | 2672 | } |
2663 | EXPORT_SYMBOL_GPL(tcp_get_info); | 2673 | EXPORT_SYMBOL_GPL(tcp_get_info); |
2664 | 2674 | ||
@@ -2730,6 +2740,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2730 | return -EFAULT; | 2740 | return -EFAULT; |
2731 | return 0; | 2741 | return 0; |
2732 | } | 2742 | } |
2743 | case TCP_CC_INFO: { | ||
2744 | const struct tcp_congestion_ops *ca_ops; | ||
2745 | union tcp_cc_info info; | ||
2746 | size_t sz = 0; | ||
2747 | int attr; | ||
2748 | |||
2749 | if (get_user(len, optlen)) | ||
2750 | return -EFAULT; | ||
2751 | |||
2752 | ca_ops = icsk->icsk_ca_ops; | ||
2753 | if (ca_ops && ca_ops->get_info) | ||
2754 | sz = ca_ops->get_info(sk, ~0U, &attr, &info); | ||
2755 | |||
2756 | len = min_t(unsigned int, len, sz); | ||
2757 | if (put_user(len, optlen)) | ||
2758 | return -EFAULT; | ||
2759 | if (copy_to_user(optval, &info, len)) | ||
2760 | return -EFAULT; | ||
2761 | return 0; | ||
2762 | } | ||
2733 | case TCP_QUICKACK: | 2763 | case TCP_QUICKACK: |
2734 | val = !icsk->icsk_ack.pingpong; | 2764 | val = !icsk->icsk_ack.pingpong; |
2735 | break; | 2765 | break; |
@@ -2776,7 +2806,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2776 | break; | 2806 | break; |
2777 | 2807 | ||
2778 | case TCP_FASTOPEN: | 2808 | case TCP_FASTOPEN: |
2779 | if (icsk->icsk_accept_queue.fastopenq != NULL) | 2809 | if (icsk->icsk_accept_queue.fastopenq) |
2780 | val = icsk->icsk_accept_queue.fastopenq->max_qlen; | 2810 | val = icsk->icsk_accept_queue.fastopenq->max_qlen; |
2781 | else | 2811 | else |
2782 | val = 0; | 2812 | val = 0; |
@@ -2960,7 +2990,7 @@ void tcp_done(struct sock *sk) | |||
2960 | 2990 | ||
2961 | tcp_set_state(sk, TCP_CLOSE); | 2991 | tcp_set_state(sk, TCP_CLOSE); |
2962 | tcp_clear_xmit_timers(sk); | 2992 | tcp_clear_xmit_timers(sk); |
2963 | if (req != NULL) | 2993 | if (req) |
2964 | reqsk_fastopen_remove(sk, req, false); | 2994 | reqsk_fastopen_remove(sk, req, false); |
2965 | 2995 | ||
2966 | sk->sk_shutdown = SHUTDOWN_MASK; | 2996 | sk->sk_shutdown = SHUTDOWN_MASK; |
@@ -3001,12 +3031,11 @@ static void __init tcp_init_mem(void) | |||
3001 | 3031 | ||
3002 | void __init tcp_init(void) | 3032 | void __init tcp_init(void) |
3003 | { | 3033 | { |
3004 | struct sk_buff *skb = NULL; | ||
3005 | unsigned long limit; | 3034 | unsigned long limit; |
3006 | int max_rshare, max_wshare, cnt; | 3035 | int max_rshare, max_wshare, cnt; |
3007 | unsigned int i; | 3036 | unsigned int i; |
3008 | 3037 | ||
3009 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3038 | sock_skb_cb_check_size(sizeof(struct tcp_skb_cb)); |
3010 | 3039 | ||
3011 | percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); | 3040 | percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); |
3012 | percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); | 3041 | percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); |