aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c127
1 files changed, 100 insertions, 27 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e2b7b8055037..74998f250071 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -263,7 +263,7 @@
263#include <net/tcp.h> 263#include <net/tcp.h>
264#include <net/xfrm.h> 264#include <net/xfrm.h>
265#include <net/ip.h> 265#include <net/ip.h>
266 266#include <net/netdma.h>
267 267
268#include <asm/uaccess.h> 268#include <asm/uaccess.h>
269#include <asm/ioctls.h> 269#include <asm/ioctls.h>
@@ -622,14 +622,10 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
622 ssize_t res; 622 ssize_t res;
623 struct sock *sk = sock->sk; 623 struct sock *sk = sock->sk;
624 624
625#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
626
627 if (!(sk->sk_route_caps & NETIF_F_SG) || 625 if (!(sk->sk_route_caps & NETIF_F_SG) ||
628 !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) 626 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
629 return sock_no_sendpage(sock, page, offset, size, flags); 627 return sock_no_sendpage(sock, page, offset, size, flags);
630 628
631#undef TCP_ZC_CSUM_FLAGS
632
633 lock_sock(sk); 629 lock_sock(sk);
634 TCP_CHECK_TIMER(sk); 630 TCP_CHECK_TIMER(sk);
635 res = do_tcp_sendpages(sk, &page, offset, size, flags); 631 res = do_tcp_sendpages(sk, &page, offset, size, flags);
@@ -726,9 +722,7 @@ new_segment:
726 /* 722 /*
727 * Check whether we can use HW checksum. 723 * Check whether we can use HW checksum.
728 */ 724 */
729 if (sk->sk_route_caps & 725 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
730 (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
731 NETIF_F_HW_CSUM))
732 skb->ip_summed = CHECKSUM_HW; 726 skb->ip_summed = CHECKSUM_HW;
733 727
734 skb_entail(sk, tp, skb); 728 skb_entail(sk, tp, skb);
@@ -937,7 +931,7 @@ static int tcp_recv_urg(struct sock *sk, long timeo,
937 * calculation of whether or not we must ACK for the sake of 931 * calculation of whether or not we must ACK for the sake of
938 * a window update. 932 * a window update.
939 */ 933 */
940static void cleanup_rbuf(struct sock *sk, int copied) 934void tcp_cleanup_rbuf(struct sock *sk, int copied)
941{ 935{
942 struct tcp_sock *tp = tcp_sk(sk); 936 struct tcp_sock *tp = tcp_sk(sk);
943 int time_to_ack = 0; 937 int time_to_ack = 0;
@@ -1072,11 +1066,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1072 break; 1066 break;
1073 } 1067 }
1074 if (skb->h.th->fin) { 1068 if (skb->h.th->fin) {
1075 sk_eat_skb(sk, skb); 1069 sk_eat_skb(sk, skb, 0);
1076 ++seq; 1070 ++seq;
1077 break; 1071 break;
1078 } 1072 }
1079 sk_eat_skb(sk, skb); 1073 sk_eat_skb(sk, skb, 0);
1080 if (!desc->count) 1074 if (!desc->count)
1081 break; 1075 break;
1082 } 1076 }
@@ -1086,7 +1080,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1086 1080
1087 /* Clean up data we have read: This will do ACK frames. */ 1081 /* Clean up data we have read: This will do ACK frames. */
1088 if (copied) 1082 if (copied)
1089 cleanup_rbuf(sk, copied); 1083 tcp_cleanup_rbuf(sk, copied);
1090 return copied; 1084 return copied;
1091} 1085}
1092 1086
@@ -1110,6 +1104,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1110 int target; /* Read at least this many bytes */ 1104 int target; /* Read at least this many bytes */
1111 long timeo; 1105 long timeo;
1112 struct task_struct *user_recv = NULL; 1106 struct task_struct *user_recv = NULL;
1107 int copied_early = 0;
1113 1108
1114 lock_sock(sk); 1109 lock_sock(sk);
1115 1110
@@ -1133,6 +1128,17 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1133 1128
1134 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1129 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1135 1130
1131#ifdef CONFIG_NET_DMA
1132 tp->ucopy.dma_chan = NULL;
1133 preempt_disable();
1134 if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1135 !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) {
1136 preempt_enable_no_resched();
1137 tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len);
1138 } else
1139 preempt_enable_no_resched();
1140#endif
1141
1136 do { 1142 do {
1137 struct sk_buff *skb; 1143 struct sk_buff *skb;
1138 u32 offset; 1144 u32 offset;
@@ -1220,7 +1226,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1220 } 1226 }
1221 } 1227 }
1222 1228
1223 cleanup_rbuf(sk, copied); 1229 tcp_cleanup_rbuf(sk, copied);
1224 1230
1225 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { 1231 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1226 /* Install new reader */ 1232 /* Install new reader */
@@ -1274,6 +1280,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1274 } else 1280 } else
1275 sk_wait_data(sk, &timeo); 1281 sk_wait_data(sk, &timeo);
1276 1282
1283#ifdef CONFIG_NET_DMA
1284 tp->ucopy.wakeup = 0;
1285#endif
1286
1277 if (user_recv) { 1287 if (user_recv) {
1278 int chunk; 1288 int chunk;
1279 1289
@@ -1329,13 +1339,39 @@ do_prequeue:
1329 } 1339 }
1330 1340
1331 if (!(flags & MSG_TRUNC)) { 1341 if (!(flags & MSG_TRUNC)) {
1332 err = skb_copy_datagram_iovec(skb, offset, 1342#ifdef CONFIG_NET_DMA
1333 msg->msg_iov, used); 1343 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1334 if (err) { 1344 tp->ucopy.dma_chan = get_softnet_dma();
1335 /* Exception. Bailout! */ 1345
1336 if (!copied) 1346 if (tp->ucopy.dma_chan) {
1337 copied = -EFAULT; 1347 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
1338 break; 1348 tp->ucopy.dma_chan, skb, offset,
1349 msg->msg_iov, used,
1350 tp->ucopy.pinned_list);
1351
1352 if (tp->ucopy.dma_cookie < 0) {
1353
1354 printk(KERN_ALERT "dma_cookie < 0\n");
1355
1356 /* Exception. Bailout! */
1357 if (!copied)
1358 copied = -EFAULT;
1359 break;
1360 }
1361 if ((offset + used) == skb->len)
1362 copied_early = 1;
1363
1364 } else
1365#endif
1366 {
1367 err = skb_copy_datagram_iovec(skb, offset,
1368 msg->msg_iov, used);
1369 if (err) {
1370 /* Exception. Bailout! */
1371 if (!copied)
1372 copied = -EFAULT;
1373 break;
1374 }
1339 } 1375 }
1340 } 1376 }
1341 1377
@@ -1355,15 +1391,19 @@ skip_copy:
1355 1391
1356 if (skb->h.th->fin) 1392 if (skb->h.th->fin)
1357 goto found_fin_ok; 1393 goto found_fin_ok;
1358 if (!(flags & MSG_PEEK)) 1394 if (!(flags & MSG_PEEK)) {
1359 sk_eat_skb(sk, skb); 1395 sk_eat_skb(sk, skb, copied_early);
1396 copied_early = 0;
1397 }
1360 continue; 1398 continue;
1361 1399
1362 found_fin_ok: 1400 found_fin_ok:
1363 /* Process the FIN. */ 1401 /* Process the FIN. */
1364 ++*seq; 1402 ++*seq;
1365 if (!(flags & MSG_PEEK)) 1403 if (!(flags & MSG_PEEK)) {
1366 sk_eat_skb(sk, skb); 1404 sk_eat_skb(sk, skb, copied_early);
1405 copied_early = 0;
1406 }
1367 break; 1407 break;
1368 } while (len > 0); 1408 } while (len > 0);
1369 1409
@@ -1386,12 +1426,42 @@ skip_copy:
1386 tp->ucopy.len = 0; 1426 tp->ucopy.len = 0;
1387 } 1427 }
1388 1428
1429#ifdef CONFIG_NET_DMA
1430 if (tp->ucopy.dma_chan) {
1431 struct sk_buff *skb;
1432 dma_cookie_t done, used;
1433
1434 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1435
1436 while (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1437 tp->ucopy.dma_cookie, &done,
1438 &used) == DMA_IN_PROGRESS) {
1439 /* do partial cleanup of sk_async_wait_queue */
1440 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1441 (dma_async_is_complete(skb->dma_cookie, done,
1442 used) == DMA_SUCCESS)) {
1443 __skb_dequeue(&sk->sk_async_wait_queue);
1444 kfree_skb(skb);
1445 }
1446 }
1447
1448 /* Safe to free early-copied skbs now */
1449 __skb_queue_purge(&sk->sk_async_wait_queue);
1450 dma_chan_put(tp->ucopy.dma_chan);
1451 tp->ucopy.dma_chan = NULL;
1452 }
1453 if (tp->ucopy.pinned_list) {
1454 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1455 tp->ucopy.pinned_list = NULL;
1456 }
1457#endif
1458
1389 /* According to UNIX98, msg_name/msg_namelen are ignored 1459 /* According to UNIX98, msg_name/msg_namelen are ignored
1390 * on connected socket. I was just happy when found this 8) --ANK 1460 * on connected socket. I was just happy when found this 8) --ANK
1391 */ 1461 */
1392 1462
1393 /* Clean up data we have read: This will do ACK frames. */ 1463 /* Clean up data we have read: This will do ACK frames. */
1394 cleanup_rbuf(sk, copied); 1464 tcp_cleanup_rbuf(sk, copied);
1395 1465
1396 TCP_CHECK_TIMER(sk); 1466 TCP_CHECK_TIMER(sk);
1397 release_sock(sk); 1467 release_sock(sk);
@@ -1658,6 +1728,9 @@ int tcp_disconnect(struct sock *sk, int flags)
1658 __skb_queue_purge(&sk->sk_receive_queue); 1728 __skb_queue_purge(&sk->sk_receive_queue);
1659 sk_stream_writequeue_purge(sk); 1729 sk_stream_writequeue_purge(sk);
1660 __skb_queue_purge(&tp->out_of_order_queue); 1730 __skb_queue_purge(&tp->out_of_order_queue);
1731#ifdef CONFIG_NET_DMA
1732 __skb_queue_purge(&sk->sk_async_wait_queue);
1733#endif
1661 1734
1662 inet->dport = 0; 1735 inet->dport = 0;
1663 1736
@@ -1858,7 +1931,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
1858 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && 1931 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
1859 inet_csk_ack_scheduled(sk)) { 1932 inet_csk_ack_scheduled(sk)) {
1860 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; 1933 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
1861 cleanup_rbuf(sk, 1); 1934 tcp_cleanup_rbuf(sk, 1);
1862 if (!(val & 1)) 1935 if (!(val & 1))
1863 icsk->icsk_ack.pingpong = 1; 1936 icsk->icsk_ack.pingpong = 1;
1864 } 1937 }