aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c130
1 files changed, 85 insertions, 45 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b0a26bb25e2e..6afb6d8662b2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -429,7 +429,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
429 if (tp->urg_seq == tp->copied_seq && 429 if (tp->urg_seq == tp->copied_seq &&
430 !sock_flag(sk, SOCK_URGINLINE) && 430 !sock_flag(sk, SOCK_URGINLINE) &&
431 tp->urg_data) 431 tp->urg_data)
432 target--; 432 target++;
433 433
434 /* Potential race condition. If read of tp below will 434 /* Potential race condition. If read of tp below will
435 * escape above sk->sk_state, we can be illegally awaken 435 * escape above sk->sk_state, we can be illegally awaken
@@ -536,8 +536,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
536 tp->nonagle &= ~TCP_NAGLE_PUSH; 536 tp->nonagle &= ~TCP_NAGLE_PUSH;
537} 537}
538 538
539static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, 539static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
540 struct sk_buff *skb)
541{ 540{
542 if (flags & MSG_OOB) 541 if (flags & MSG_OOB)
543 tp->snd_up = tp->write_seq; 542 tp->snd_up = tp->write_seq;
@@ -546,13 +545,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
546static inline void tcp_push(struct sock *sk, int flags, int mss_now, 545static inline void tcp_push(struct sock *sk, int flags, int mss_now,
547 int nonagle) 546 int nonagle)
548{ 547{
549 struct tcp_sock *tp = tcp_sk(sk);
550
551 if (tcp_send_head(sk)) { 548 if (tcp_send_head(sk)) {
552 struct sk_buff *skb = tcp_write_queue_tail(sk); 549 struct tcp_sock *tp = tcp_sk(sk);
550
553 if (!(flags & MSG_MORE) || forced_push(tp)) 551 if (!(flags & MSG_MORE) || forced_push(tp))
554 tcp_mark_push(tp, skb); 552 tcp_mark_push(tp, tcp_write_queue_tail(sk));
555 tcp_mark_urg(tp, flags, skb); 553
554 tcp_mark_urg(tp, flags);
556 __tcp_push_pending_frames(sk, mss_now, 555 __tcp_push_pending_frames(sk, mss_now,
557 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); 556 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
558 } 557 }
@@ -877,12 +876,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
877#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 876#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
878#define TCP_OFF(sk) (sk->sk_sndmsg_off) 877#define TCP_OFF(sk) (sk->sk_sndmsg_off)
879 878
880static inline int select_size(struct sock *sk) 879static inline int select_size(struct sock *sk, int sg)
881{ 880{
882 struct tcp_sock *tp = tcp_sk(sk); 881 struct tcp_sock *tp = tcp_sk(sk);
883 int tmp = tp->mss_cache; 882 int tmp = tp->mss_cache;
884 883
885 if (sk->sk_route_caps & NETIF_F_SG) { 884 if (sg) {
886 if (sk_can_gso(sk)) 885 if (sk_can_gso(sk))
887 tmp = 0; 886 tmp = 0;
888 else { 887 else {
@@ -906,7 +905,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
906 struct sk_buff *skb; 905 struct sk_buff *skb;
907 int iovlen, flags; 906 int iovlen, flags;
908 int mss_now, size_goal; 907 int mss_now, size_goal;
909 int err, copied; 908 int sg, err, copied;
910 long timeo; 909 long timeo;
911 910
912 lock_sock(sk); 911 lock_sock(sk);
@@ -934,6 +933,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
934 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 933 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
935 goto out_err; 934 goto out_err;
936 935
936 sg = sk->sk_route_caps & NETIF_F_SG;
937
937 while (--iovlen >= 0) { 938 while (--iovlen >= 0) {
938 int seglen = iov->iov_len; 939 int seglen = iov->iov_len;
939 unsigned char __user *from = iov->iov_base; 940 unsigned char __user *from = iov->iov_base;
@@ -959,8 +960,9 @@ new_segment:
959 if (!sk_stream_memory_free(sk)) 960 if (!sk_stream_memory_free(sk))
960 goto wait_for_sndbuf; 961 goto wait_for_sndbuf;
961 962
962 skb = sk_stream_alloc_skb(sk, select_size(sk), 963 skb = sk_stream_alloc_skb(sk,
963 sk->sk_allocation); 964 select_size(sk, sg),
965 sk->sk_allocation);
964 if (!skb) 966 if (!skb)
965 goto wait_for_memory; 967 goto wait_for_memory;
966 968
@@ -997,9 +999,7 @@ new_segment:
997 /* We can extend the last page 999 /* We can extend the last page
998 * fragment. */ 1000 * fragment. */
999 merge = 1; 1001 merge = 1;
1000 } else if (i == MAX_SKB_FRAGS || 1002 } else if (i == MAX_SKB_FRAGS || !sg) {
1001 (!i &&
1002 !(sk->sk_route_caps & NETIF_F_SG))) {
1003 /* Need to add new fragment and cannot 1003 /* Need to add new fragment and cannot
1004 * do this because interface is non-SG, 1004 * do this because interface is non-SG,
1005 * or because all the page slots are 1005 * or because all the page slots are
@@ -1254,6 +1254,39 @@ static void tcp_prequeue_process(struct sock *sk)
1254 tp->ucopy.memory = 0; 1254 tp->ucopy.memory = 0;
1255} 1255}
1256 1256
1257#ifdef CONFIG_NET_DMA
1258static void tcp_service_net_dma(struct sock *sk, bool wait)
1259{
1260 dma_cookie_t done, used;
1261 dma_cookie_t last_issued;
1262 struct tcp_sock *tp = tcp_sk(sk);
1263
1264 if (!tp->ucopy.dma_chan)
1265 return;
1266
1267 last_issued = tp->ucopy.dma_cookie;
1268 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1269
1270 do {
1271 if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1272 last_issued, &done,
1273 &used) == DMA_SUCCESS) {
1274 /* Safe to free early-copied skbs now */
1275 __skb_queue_purge(&sk->sk_async_wait_queue);
1276 break;
1277 } else {
1278 struct sk_buff *skb;
1279 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1280 (dma_async_is_complete(skb->dma_cookie, done,
1281 used) == DMA_SUCCESS)) {
1282 __skb_dequeue(&sk->sk_async_wait_queue);
1283 kfree_skb(skb);
1284 }
1285 }
1286 } while (wait);
1287}
1288#endif
1289
1257static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) 1290static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1258{ 1291{
1259 struct sk_buff *skb; 1292 struct sk_buff *skb;
@@ -1546,6 +1579,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1546 /* __ Set realtime policy in scheduler __ */ 1579 /* __ Set realtime policy in scheduler __ */
1547 } 1580 }
1548 1581
1582#ifdef CONFIG_NET_DMA
1583 if (tp->ucopy.dma_chan)
1584 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1585#endif
1549 if (copied >= target) { 1586 if (copied >= target) {
1550 /* Do not sleep, just process backlog. */ 1587 /* Do not sleep, just process backlog. */
1551 release_sock(sk); 1588 release_sock(sk);
@@ -1554,6 +1591,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1554 sk_wait_data(sk, &timeo); 1591 sk_wait_data(sk, &timeo);
1555 1592
1556#ifdef CONFIG_NET_DMA 1593#ifdef CONFIG_NET_DMA
1594 tcp_service_net_dma(sk, false); /* Don't block */
1557 tp->ucopy.wakeup = 0; 1595 tp->ucopy.wakeup = 0;
1558#endif 1596#endif
1559 1597
@@ -1633,6 +1671,9 @@ do_prequeue:
1633 copied = -EFAULT; 1671 copied = -EFAULT;
1634 break; 1672 break;
1635 } 1673 }
1674
1675 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1676
1636 if ((offset + used) == skb->len) 1677 if ((offset + used) == skb->len)
1637 copied_early = 1; 1678 copied_early = 1;
1638 1679
@@ -1702,27 +1743,9 @@ skip_copy:
1702 } 1743 }
1703 1744
1704#ifdef CONFIG_NET_DMA 1745#ifdef CONFIG_NET_DMA
1705 if (tp->ucopy.dma_chan) { 1746 tcp_service_net_dma(sk, true); /* Wait for queue to drain */
1706 dma_cookie_t done, used; 1747 tp->ucopy.dma_chan = NULL;
1707
1708 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1709
1710 while (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1711 tp->ucopy.dma_cookie, &done,
1712 &used) == DMA_IN_PROGRESS) {
1713 /* do partial cleanup of sk_async_wait_queue */
1714 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1715 (dma_async_is_complete(skb->dma_cookie, done,
1716 used) == DMA_SUCCESS)) {
1717 __skb_dequeue(&sk->sk_async_wait_queue);
1718 kfree_skb(skb);
1719 }
1720 }
1721 1748
1722 /* Safe to free early-copied skbs now */
1723 __skb_queue_purge(&sk->sk_async_wait_queue);
1724 tp->ucopy.dma_chan = NULL;
1725 }
1726 if (tp->ucopy.pinned_list) { 1749 if (tp->ucopy.pinned_list) {
1727 dma_unpin_iovec_pages(tp->ucopy.pinned_list); 1750 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1728 tp->ucopy.pinned_list = NULL; 1751 tp->ucopy.pinned_list = NULL;
@@ -2229,6 +2252,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2229 } 2252 }
2230 break; 2253 break;
2231 2254
2255 case TCP_THIN_LINEAR_TIMEOUTS:
2256 if (val < 0 || val > 1)
2257 err = -EINVAL;
2258 else
2259 tp->thin_lto = val;
2260 break;
2261
2262 case TCP_THIN_DUPACK:
2263 if (val < 0 || val > 1)
2264 err = -EINVAL;
2265 else
2266 tp->thin_dupack = val;
2267 break;
2268
2232 case TCP_CORK: 2269 case TCP_CORK:
2233 /* When set indicates to always queue non-full frames. 2270 /* When set indicates to always queue non-full frames.
2234 * Later the user clears this option and we transmit 2271 * Later the user clears this option and we transmit
@@ -2788,10 +2825,10 @@ EXPORT_SYMBOL(tcp_gro_complete);
2788 2825
2789#ifdef CONFIG_TCP_MD5SIG 2826#ifdef CONFIG_TCP_MD5SIG
2790static unsigned long tcp_md5sig_users; 2827static unsigned long tcp_md5sig_users;
2791static struct tcp_md5sig_pool **tcp_md5sig_pool; 2828static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool;
2792static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); 2829static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
2793 2830
2794static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) 2831static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
2795{ 2832{
2796 int cpu; 2833 int cpu;
2797 for_each_possible_cpu(cpu) { 2834 for_each_possible_cpu(cpu) {
@@ -2808,7 +2845,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool)
2808 2845
2809void tcp_free_md5sig_pool(void) 2846void tcp_free_md5sig_pool(void)
2810{ 2847{
2811 struct tcp_md5sig_pool **pool = NULL; 2848 struct tcp_md5sig_pool * __percpu *pool = NULL;
2812 2849
2813 spin_lock_bh(&tcp_md5sig_pool_lock); 2850 spin_lock_bh(&tcp_md5sig_pool_lock);
2814 if (--tcp_md5sig_users == 0) { 2851 if (--tcp_md5sig_users == 0) {
@@ -2822,10 +2859,11 @@ void tcp_free_md5sig_pool(void)
2822 2859
2823EXPORT_SYMBOL(tcp_free_md5sig_pool); 2860EXPORT_SYMBOL(tcp_free_md5sig_pool);
2824 2861
2825static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) 2862static struct tcp_md5sig_pool * __percpu *
2863__tcp_alloc_md5sig_pool(struct sock *sk)
2826{ 2864{
2827 int cpu; 2865 int cpu;
2828 struct tcp_md5sig_pool **pool; 2866 struct tcp_md5sig_pool * __percpu *pool;
2829 2867
2830 pool = alloc_percpu(struct tcp_md5sig_pool *); 2868 pool = alloc_percpu(struct tcp_md5sig_pool *);
2831 if (!pool) 2869 if (!pool)
@@ -2852,9 +2890,9 @@ out_free:
2852 return NULL; 2890 return NULL;
2853} 2891}
2854 2892
2855struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) 2893struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
2856{ 2894{
2857 struct tcp_md5sig_pool **pool; 2895 struct tcp_md5sig_pool * __percpu *pool;
2858 int alloc = 0; 2896 int alloc = 0;
2859 2897
2860retry: 2898retry:
@@ -2873,7 +2911,9 @@ retry:
2873 2911
2874 if (alloc) { 2912 if (alloc) {
2875 /* we cannot hold spinlock here because this may sleep. */ 2913 /* we cannot hold spinlock here because this may sleep. */
2876 struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); 2914 struct tcp_md5sig_pool * __percpu *p;
2915
2916 p = __tcp_alloc_md5sig_pool(sk);
2877 spin_lock_bh(&tcp_md5sig_pool_lock); 2917 spin_lock_bh(&tcp_md5sig_pool_lock);
2878 if (!p) { 2918 if (!p) {
2879 tcp_md5sig_users--; 2919 tcp_md5sig_users--;
@@ -2897,7 +2937,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2897 2937
2898struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) 2938struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
2899{ 2939{
2900 struct tcp_md5sig_pool **p; 2940 struct tcp_md5sig_pool * __percpu *p;
2901 spin_lock_bh(&tcp_md5sig_pool_lock); 2941 spin_lock_bh(&tcp_md5sig_pool_lock);
2902 p = tcp_md5sig_pool; 2942 p = tcp_md5sig_pool;
2903 if (p) 2943 if (p)