diff options
Diffstat (limited to 'net/ipv4/tcp.c')
| -rw-r--r-- | net/ipv4/tcp.c | 130 |
1 files changed, 85 insertions, 45 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b0a26bb25e2e..6afb6d8662b2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -429,7 +429,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
| 429 | if (tp->urg_seq == tp->copied_seq && | 429 | if (tp->urg_seq == tp->copied_seq && |
| 430 | !sock_flag(sk, SOCK_URGINLINE) && | 430 | !sock_flag(sk, SOCK_URGINLINE) && |
| 431 | tp->urg_data) | 431 | tp->urg_data) |
| 432 | target--; | 432 | target++; |
| 433 | 433 | ||
| 434 | /* Potential race condition. If read of tp below will | 434 | /* Potential race condition. If read of tp below will |
| 435 | * escape above sk->sk_state, we can be illegally awaken | 435 | * escape above sk->sk_state, we can be illegally awaken |
| @@ -536,8 +536,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) | |||
| 536 | tp->nonagle &= ~TCP_NAGLE_PUSH; | 536 | tp->nonagle &= ~TCP_NAGLE_PUSH; |
| 537 | } | 537 | } |
| 538 | 538 | ||
| 539 | static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | 539 | static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) |
| 540 | struct sk_buff *skb) | ||
| 541 | { | 540 | { |
| 542 | if (flags & MSG_OOB) | 541 | if (flags & MSG_OOB) |
| 543 | tp->snd_up = tp->write_seq; | 542 | tp->snd_up = tp->write_seq; |
| @@ -546,13 +545,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | |||
| 546 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, | 545 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, |
| 547 | int nonagle) | 546 | int nonagle) |
| 548 | { | 547 | { |
| 549 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 550 | |||
| 551 | if (tcp_send_head(sk)) { | 548 | if (tcp_send_head(sk)) { |
| 552 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 549 | struct tcp_sock *tp = tcp_sk(sk); |
| 550 | |||
| 553 | if (!(flags & MSG_MORE) || forced_push(tp)) | 551 | if (!(flags & MSG_MORE) || forced_push(tp)) |
| 554 | tcp_mark_push(tp, skb); | 552 | tcp_mark_push(tp, tcp_write_queue_tail(sk)); |
| 555 | tcp_mark_urg(tp, flags, skb); | 553 | |
| 554 | tcp_mark_urg(tp, flags); | ||
| 556 | __tcp_push_pending_frames(sk, mss_now, | 555 | __tcp_push_pending_frames(sk, mss_now, |
| 557 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); | 556 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); |
| 558 | } | 557 | } |
| @@ -877,12 +876,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
| 877 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) | 876 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) |
| 878 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | 877 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) |
| 879 | 878 | ||
| 880 | static inline int select_size(struct sock *sk) | 879 | static inline int select_size(struct sock *sk, int sg) |
| 881 | { | 880 | { |
| 882 | struct tcp_sock *tp = tcp_sk(sk); | 881 | struct tcp_sock *tp = tcp_sk(sk); |
| 883 | int tmp = tp->mss_cache; | 882 | int tmp = tp->mss_cache; |
| 884 | 883 | ||
| 885 | if (sk->sk_route_caps & NETIF_F_SG) { | 884 | if (sg) { |
| 886 | if (sk_can_gso(sk)) | 885 | if (sk_can_gso(sk)) |
| 887 | tmp = 0; | 886 | tmp = 0; |
| 888 | else { | 887 | else { |
| @@ -906,7 +905,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
| 906 | struct sk_buff *skb; | 905 | struct sk_buff *skb; |
| 907 | int iovlen, flags; | 906 | int iovlen, flags; |
| 908 | int mss_now, size_goal; | 907 | int mss_now, size_goal; |
| 909 | int err, copied; | 908 | int sg, err, copied; |
| 910 | long timeo; | 909 | long timeo; |
| 911 | 910 | ||
| 912 | lock_sock(sk); | 911 | lock_sock(sk); |
| @@ -934,6 +933,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
| 934 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 933 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
| 935 | goto out_err; | 934 | goto out_err; |
| 936 | 935 | ||
| 936 | sg = sk->sk_route_caps & NETIF_F_SG; | ||
| 937 | |||
| 937 | while (--iovlen >= 0) { | 938 | while (--iovlen >= 0) { |
| 938 | int seglen = iov->iov_len; | 939 | int seglen = iov->iov_len; |
| 939 | unsigned char __user *from = iov->iov_base; | 940 | unsigned char __user *from = iov->iov_base; |
| @@ -959,8 +960,9 @@ new_segment: | |||
| 959 | if (!sk_stream_memory_free(sk)) | 960 | if (!sk_stream_memory_free(sk)) |
| 960 | goto wait_for_sndbuf; | 961 | goto wait_for_sndbuf; |
| 961 | 962 | ||
| 962 | skb = sk_stream_alloc_skb(sk, select_size(sk), | 963 | skb = sk_stream_alloc_skb(sk, |
| 963 | sk->sk_allocation); | 964 | select_size(sk, sg), |
| 965 | sk->sk_allocation); | ||
| 964 | if (!skb) | 966 | if (!skb) |
| 965 | goto wait_for_memory; | 967 | goto wait_for_memory; |
| 966 | 968 | ||
| @@ -997,9 +999,7 @@ new_segment: | |||
| 997 | /* We can extend the last page | 999 | /* We can extend the last page |
| 998 | * fragment. */ | 1000 | * fragment. */ |
| 999 | merge = 1; | 1001 | merge = 1; |
| 1000 | } else if (i == MAX_SKB_FRAGS || | 1002 | } else if (i == MAX_SKB_FRAGS || !sg) { |
| 1001 | (!i && | ||
| 1002 | !(sk->sk_route_caps & NETIF_F_SG))) { | ||
| 1003 | /* Need to add new fragment and cannot | 1003 | /* Need to add new fragment and cannot |
| 1004 | * do this because interface is non-SG, | 1004 | * do this because interface is non-SG, |
| 1005 | * or because all the page slots are | 1005 | * or because all the page slots are |
| @@ -1254,6 +1254,39 @@ static void tcp_prequeue_process(struct sock *sk) | |||
| 1254 | tp->ucopy.memory = 0; | 1254 | tp->ucopy.memory = 0; |
| 1255 | } | 1255 | } |
| 1256 | 1256 | ||
| 1257 | #ifdef CONFIG_NET_DMA | ||
| 1258 | static void tcp_service_net_dma(struct sock *sk, bool wait) | ||
| 1259 | { | ||
| 1260 | dma_cookie_t done, used; | ||
| 1261 | dma_cookie_t last_issued; | ||
| 1262 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1263 | |||
| 1264 | if (!tp->ucopy.dma_chan) | ||
| 1265 | return; | ||
| 1266 | |||
| 1267 | last_issued = tp->ucopy.dma_cookie; | ||
| 1268 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1269 | |||
| 1270 | do { | ||
| 1271 | if (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
| 1272 | last_issued, &done, | ||
| 1273 | &used) == DMA_SUCCESS) { | ||
| 1274 | /* Safe to free early-copied skbs now */ | ||
| 1275 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
| 1276 | break; | ||
| 1277 | } else { | ||
| 1278 | struct sk_buff *skb; | ||
| 1279 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
| 1280 | (dma_async_is_complete(skb->dma_cookie, done, | ||
| 1281 | used) == DMA_SUCCESS)) { | ||
| 1282 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
| 1283 | kfree_skb(skb); | ||
| 1284 | } | ||
| 1285 | } | ||
| 1286 | } while (wait); | ||
| 1287 | } | ||
| 1288 | #endif | ||
| 1289 | |||
| 1257 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | 1290 | static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) |
| 1258 | { | 1291 | { |
| 1259 | struct sk_buff *skb; | 1292 | struct sk_buff *skb; |
| @@ -1546,6 +1579,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1546 | /* __ Set realtime policy in scheduler __ */ | 1579 | /* __ Set realtime policy in scheduler __ */ |
| 1547 | } | 1580 | } |
| 1548 | 1581 | ||
| 1582 | #ifdef CONFIG_NET_DMA | ||
| 1583 | if (tp->ucopy.dma_chan) | ||
| 1584 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1585 | #endif | ||
| 1549 | if (copied >= target) { | 1586 | if (copied >= target) { |
| 1550 | /* Do not sleep, just process backlog. */ | 1587 | /* Do not sleep, just process backlog. */ |
| 1551 | release_sock(sk); | 1588 | release_sock(sk); |
| @@ -1554,6 +1591,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1554 | sk_wait_data(sk, &timeo); | 1591 | sk_wait_data(sk, &timeo); |
| 1555 | 1592 | ||
| 1556 | #ifdef CONFIG_NET_DMA | 1593 | #ifdef CONFIG_NET_DMA |
| 1594 | tcp_service_net_dma(sk, false); /* Don't block */ | ||
| 1557 | tp->ucopy.wakeup = 0; | 1595 | tp->ucopy.wakeup = 0; |
| 1558 | #endif | 1596 | #endif |
| 1559 | 1597 | ||
| @@ -1633,6 +1671,9 @@ do_prequeue: | |||
| 1633 | copied = -EFAULT; | 1671 | copied = -EFAULT; |
| 1634 | break; | 1672 | break; |
| 1635 | } | 1673 | } |
| 1674 | |||
| 1675 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1676 | |||
| 1636 | if ((offset + used) == skb->len) | 1677 | if ((offset + used) == skb->len) |
| 1637 | copied_early = 1; | 1678 | copied_early = 1; |
| 1638 | 1679 | ||
| @@ -1702,27 +1743,9 @@ skip_copy: | |||
| 1702 | } | 1743 | } |
| 1703 | 1744 | ||
| 1704 | #ifdef CONFIG_NET_DMA | 1745 | #ifdef CONFIG_NET_DMA |
| 1705 | if (tp->ucopy.dma_chan) { | 1746 | tcp_service_net_dma(sk, true); /* Wait for queue to drain */ |
| 1706 | dma_cookie_t done, used; | 1747 | tp->ucopy.dma_chan = NULL; |
| 1707 | |||
| 1708 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1709 | |||
| 1710 | while (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
| 1711 | tp->ucopy.dma_cookie, &done, | ||
| 1712 | &used) == DMA_IN_PROGRESS) { | ||
| 1713 | /* do partial cleanup of sk_async_wait_queue */ | ||
| 1714 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
| 1715 | (dma_async_is_complete(skb->dma_cookie, done, | ||
| 1716 | used) == DMA_SUCCESS)) { | ||
| 1717 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
| 1718 | kfree_skb(skb); | ||
| 1719 | } | ||
| 1720 | } | ||
| 1721 | 1748 | ||
| 1722 | /* Safe to free early-copied skbs now */ | ||
| 1723 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
| 1724 | tp->ucopy.dma_chan = NULL; | ||
| 1725 | } | ||
| 1726 | if (tp->ucopy.pinned_list) { | 1749 | if (tp->ucopy.pinned_list) { |
| 1727 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); | 1750 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); |
| 1728 | tp->ucopy.pinned_list = NULL; | 1751 | tp->ucopy.pinned_list = NULL; |
| @@ -2229,6 +2252,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2229 | } | 2252 | } |
| 2230 | break; | 2253 | break; |
| 2231 | 2254 | ||
| 2255 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
| 2256 | if (val < 0 || val > 1) | ||
| 2257 | err = -EINVAL; | ||
| 2258 | else | ||
| 2259 | tp->thin_lto = val; | ||
| 2260 | break; | ||
| 2261 | |||
| 2262 | case TCP_THIN_DUPACK: | ||
| 2263 | if (val < 0 || val > 1) | ||
| 2264 | err = -EINVAL; | ||
| 2265 | else | ||
| 2266 | tp->thin_dupack = val; | ||
| 2267 | break; | ||
| 2268 | |||
| 2232 | case TCP_CORK: | 2269 | case TCP_CORK: |
| 2233 | /* When set indicates to always queue non-full frames. | 2270 | /* When set indicates to always queue non-full frames. |
| 2234 | * Later the user clears this option and we transmit | 2271 | * Later the user clears this option and we transmit |
| @@ -2788,10 +2825,10 @@ EXPORT_SYMBOL(tcp_gro_complete); | |||
| 2788 | 2825 | ||
| 2789 | #ifdef CONFIG_TCP_MD5SIG | 2826 | #ifdef CONFIG_TCP_MD5SIG |
| 2790 | static unsigned long tcp_md5sig_users; | 2827 | static unsigned long tcp_md5sig_users; |
| 2791 | static struct tcp_md5sig_pool **tcp_md5sig_pool; | 2828 | static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; |
| 2792 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); | 2829 | static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); |
| 2793 | 2830 | ||
| 2794 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | 2831 | static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) |
| 2795 | { | 2832 | { |
| 2796 | int cpu; | 2833 | int cpu; |
| 2797 | for_each_possible_cpu(cpu) { | 2834 | for_each_possible_cpu(cpu) { |
| @@ -2808,7 +2845,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) | |||
| 2808 | 2845 | ||
| 2809 | void tcp_free_md5sig_pool(void) | 2846 | void tcp_free_md5sig_pool(void) |
| 2810 | { | 2847 | { |
| 2811 | struct tcp_md5sig_pool **pool = NULL; | 2848 | struct tcp_md5sig_pool * __percpu *pool = NULL; |
| 2812 | 2849 | ||
| 2813 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2850 | spin_lock_bh(&tcp_md5sig_pool_lock); |
| 2814 | if (--tcp_md5sig_users == 0) { | 2851 | if (--tcp_md5sig_users == 0) { |
| @@ -2822,10 +2859,11 @@ void tcp_free_md5sig_pool(void) | |||
| 2822 | 2859 | ||
| 2823 | EXPORT_SYMBOL(tcp_free_md5sig_pool); | 2860 | EXPORT_SYMBOL(tcp_free_md5sig_pool); |
| 2824 | 2861 | ||
| 2825 | static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) | 2862 | static struct tcp_md5sig_pool * __percpu * |
| 2863 | __tcp_alloc_md5sig_pool(struct sock *sk) | ||
| 2826 | { | 2864 | { |
| 2827 | int cpu; | 2865 | int cpu; |
| 2828 | struct tcp_md5sig_pool **pool; | 2866 | struct tcp_md5sig_pool * __percpu *pool; |
| 2829 | 2867 | ||
| 2830 | pool = alloc_percpu(struct tcp_md5sig_pool *); | 2868 | pool = alloc_percpu(struct tcp_md5sig_pool *); |
| 2831 | if (!pool) | 2869 | if (!pool) |
| @@ -2852,9 +2890,9 @@ out_free: | |||
| 2852 | return NULL; | 2890 | return NULL; |
| 2853 | } | 2891 | } |
| 2854 | 2892 | ||
| 2855 | struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) | 2893 | struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) |
| 2856 | { | 2894 | { |
| 2857 | struct tcp_md5sig_pool **pool; | 2895 | struct tcp_md5sig_pool * __percpu *pool; |
| 2858 | int alloc = 0; | 2896 | int alloc = 0; |
| 2859 | 2897 | ||
| 2860 | retry: | 2898 | retry: |
| @@ -2873,7 +2911,9 @@ retry: | |||
| 2873 | 2911 | ||
| 2874 | if (alloc) { | 2912 | if (alloc) { |
| 2875 | /* we cannot hold spinlock here because this may sleep. */ | 2913 | /* we cannot hold spinlock here because this may sleep. */ |
| 2876 | struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); | 2914 | struct tcp_md5sig_pool * __percpu *p; |
| 2915 | |||
| 2916 | p = __tcp_alloc_md5sig_pool(sk); | ||
| 2877 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2917 | spin_lock_bh(&tcp_md5sig_pool_lock); |
| 2878 | if (!p) { | 2918 | if (!p) { |
| 2879 | tcp_md5sig_users--; | 2919 | tcp_md5sig_users--; |
| @@ -2897,7 +2937,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); | |||
| 2897 | 2937 | ||
| 2898 | struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) | 2938 | struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) |
| 2899 | { | 2939 | { |
| 2900 | struct tcp_md5sig_pool **p; | 2940 | struct tcp_md5sig_pool * __percpu *p; |
| 2901 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2941 | spin_lock_bh(&tcp_md5sig_pool_lock); |
| 2902 | p = tcp_md5sig_pool; | 2942 | p = tcp_md5sig_pool; |
| 2903 | if (p) | 2943 | if (p) |
