diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 127 |
1 files changed, 100 insertions, 27 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e2b7b8055037..74998f250071 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -263,7 +263,7 @@ | |||
263 | #include <net/tcp.h> | 263 | #include <net/tcp.h> |
264 | #include <net/xfrm.h> | 264 | #include <net/xfrm.h> |
265 | #include <net/ip.h> | 265 | #include <net/ip.h> |
266 | 266 | #include <net/netdma.h> | |
267 | 267 | ||
268 | #include <asm/uaccess.h> | 268 | #include <asm/uaccess.h> |
269 | #include <asm/ioctls.h> | 269 | #include <asm/ioctls.h> |
@@ -622,14 +622,10 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
622 | ssize_t res; | 622 | ssize_t res; |
623 | struct sock *sk = sock->sk; | 623 | struct sock *sk = sock->sk; |
624 | 624 | ||
625 | #define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) | ||
626 | |||
627 | if (!(sk->sk_route_caps & NETIF_F_SG) || | 625 | if (!(sk->sk_route_caps & NETIF_F_SG) || |
628 | !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) | 626 | !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) |
629 | return sock_no_sendpage(sock, page, offset, size, flags); | 627 | return sock_no_sendpage(sock, page, offset, size, flags); |
630 | 628 | ||
631 | #undef TCP_ZC_CSUM_FLAGS | ||
632 | |||
633 | lock_sock(sk); | 629 | lock_sock(sk); |
634 | TCP_CHECK_TIMER(sk); | 630 | TCP_CHECK_TIMER(sk); |
635 | res = do_tcp_sendpages(sk, &page, offset, size, flags); | 631 | res = do_tcp_sendpages(sk, &page, offset, size, flags); |
@@ -726,9 +722,7 @@ new_segment: | |||
726 | /* | 722 | /* |
727 | * Check whether we can use HW checksum. | 723 | * Check whether we can use HW checksum. |
728 | */ | 724 | */ |
729 | if (sk->sk_route_caps & | 725 | if (sk->sk_route_caps & NETIF_F_ALL_CSUM) |
730 | (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | | ||
731 | NETIF_F_HW_CSUM)) | ||
732 | skb->ip_summed = CHECKSUM_HW; | 726 | skb->ip_summed = CHECKSUM_HW; |
733 | 727 | ||
734 | skb_entail(sk, tp, skb); | 728 | skb_entail(sk, tp, skb); |
@@ -937,7 +931,7 @@ static int tcp_recv_urg(struct sock *sk, long timeo, | |||
937 | * calculation of whether or not we must ACK for the sake of | 931 | * calculation of whether or not we must ACK for the sake of |
938 | * a window update. | 932 | * a window update. |
939 | */ | 933 | */ |
940 | static void cleanup_rbuf(struct sock *sk, int copied) | 934 | void tcp_cleanup_rbuf(struct sock *sk, int copied) |
941 | { | 935 | { |
942 | struct tcp_sock *tp = tcp_sk(sk); | 936 | struct tcp_sock *tp = tcp_sk(sk); |
943 | int time_to_ack = 0; | 937 | int time_to_ack = 0; |
@@ -1072,11 +1066,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1072 | break; | 1066 | break; |
1073 | } | 1067 | } |
1074 | if (skb->h.th->fin) { | 1068 | if (skb->h.th->fin) { |
1075 | sk_eat_skb(sk, skb); | 1069 | sk_eat_skb(sk, skb, 0); |
1076 | ++seq; | 1070 | ++seq; |
1077 | break; | 1071 | break; |
1078 | } | 1072 | } |
1079 | sk_eat_skb(sk, skb); | 1073 | sk_eat_skb(sk, skb, 0); |
1080 | if (!desc->count) | 1074 | if (!desc->count) |
1081 | break; | 1075 | break; |
1082 | } | 1076 | } |
@@ -1086,7 +1080,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1086 | 1080 | ||
1087 | /* Clean up data we have read: This will do ACK frames. */ | 1081 | /* Clean up data we have read: This will do ACK frames. */ |
1088 | if (copied) | 1082 | if (copied) |
1089 | cleanup_rbuf(sk, copied); | 1083 | tcp_cleanup_rbuf(sk, copied); |
1090 | return copied; | 1084 | return copied; |
1091 | } | 1085 | } |
1092 | 1086 | ||
@@ -1110,6 +1104,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1110 | int target; /* Read at least this many bytes */ | 1104 | int target; /* Read at least this many bytes */ |
1111 | long timeo; | 1105 | long timeo; |
1112 | struct task_struct *user_recv = NULL; | 1106 | struct task_struct *user_recv = NULL; |
1107 | int copied_early = 0; | ||
1113 | 1108 | ||
1114 | lock_sock(sk); | 1109 | lock_sock(sk); |
1115 | 1110 | ||
@@ -1133,6 +1128,17 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1133 | 1128 | ||
1134 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | 1129 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); |
1135 | 1130 | ||
1131 | #ifdef CONFIG_NET_DMA | ||
1132 | tp->ucopy.dma_chan = NULL; | ||
1133 | preempt_disable(); | ||
1134 | if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && | ||
1135 | !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { | ||
1136 | preempt_enable_no_resched(); | ||
1137 | tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); | ||
1138 | } else | ||
1139 | preempt_enable_no_resched(); | ||
1140 | #endif | ||
1141 | |||
1136 | do { | 1142 | do { |
1137 | struct sk_buff *skb; | 1143 | struct sk_buff *skb; |
1138 | u32 offset; | 1144 | u32 offset; |
@@ -1220,7 +1226,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1220 | } | 1226 | } |
1221 | } | 1227 | } |
1222 | 1228 | ||
1223 | cleanup_rbuf(sk, copied); | 1229 | tcp_cleanup_rbuf(sk, copied); |
1224 | 1230 | ||
1225 | if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { | 1231 | if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { |
1226 | /* Install new reader */ | 1232 | /* Install new reader */ |
@@ -1274,6 +1280,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1274 | } else | 1280 | } else |
1275 | sk_wait_data(sk, &timeo); | 1281 | sk_wait_data(sk, &timeo); |
1276 | 1282 | ||
1283 | #ifdef CONFIG_NET_DMA | ||
1284 | tp->ucopy.wakeup = 0; | ||
1285 | #endif | ||
1286 | |||
1277 | if (user_recv) { | 1287 | if (user_recv) { |
1278 | int chunk; | 1288 | int chunk; |
1279 | 1289 | ||
@@ -1329,13 +1339,39 @@ do_prequeue: | |||
1329 | } | 1339 | } |
1330 | 1340 | ||
1331 | if (!(flags & MSG_TRUNC)) { | 1341 | if (!(flags & MSG_TRUNC)) { |
1332 | err = skb_copy_datagram_iovec(skb, offset, | 1342 | #ifdef CONFIG_NET_DMA |
1333 | msg->msg_iov, used); | 1343 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
1334 | if (err) { | 1344 | tp->ucopy.dma_chan = get_softnet_dma(); |
1335 | /* Exception. Bailout! */ | 1345 | |
1336 | if (!copied) | 1346 | if (tp->ucopy.dma_chan) { |
1337 | copied = -EFAULT; | 1347 | tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( |
1338 | break; | 1348 | tp->ucopy.dma_chan, skb, offset, |
1349 | msg->msg_iov, used, | ||
1350 | tp->ucopy.pinned_list); | ||
1351 | |||
1352 | if (tp->ucopy.dma_cookie < 0) { | ||
1353 | |||
1354 | printk(KERN_ALERT "dma_cookie < 0\n"); | ||
1355 | |||
1356 | /* Exception. Bailout! */ | ||
1357 | if (!copied) | ||
1358 | copied = -EFAULT; | ||
1359 | break; | ||
1360 | } | ||
1361 | if ((offset + used) == skb->len) | ||
1362 | copied_early = 1; | ||
1363 | |||
1364 | } else | ||
1365 | #endif | ||
1366 | { | ||
1367 | err = skb_copy_datagram_iovec(skb, offset, | ||
1368 | msg->msg_iov, used); | ||
1369 | if (err) { | ||
1370 | /* Exception. Bailout! */ | ||
1371 | if (!copied) | ||
1372 | copied = -EFAULT; | ||
1373 | break; | ||
1374 | } | ||
1339 | } | 1375 | } |
1340 | } | 1376 | } |
1341 | 1377 | ||
@@ -1355,15 +1391,19 @@ skip_copy: | |||
1355 | 1391 | ||
1356 | if (skb->h.th->fin) | 1392 | if (skb->h.th->fin) |
1357 | goto found_fin_ok; | 1393 | goto found_fin_ok; |
1358 | if (!(flags & MSG_PEEK)) | 1394 | if (!(flags & MSG_PEEK)) { |
1359 | sk_eat_skb(sk, skb); | 1395 | sk_eat_skb(sk, skb, copied_early); |
1396 | copied_early = 0; | ||
1397 | } | ||
1360 | continue; | 1398 | continue; |
1361 | 1399 | ||
1362 | found_fin_ok: | 1400 | found_fin_ok: |
1363 | /* Process the FIN. */ | 1401 | /* Process the FIN. */ |
1364 | ++*seq; | 1402 | ++*seq; |
1365 | if (!(flags & MSG_PEEK)) | 1403 | if (!(flags & MSG_PEEK)) { |
1366 | sk_eat_skb(sk, skb); | 1404 | sk_eat_skb(sk, skb, copied_early); |
1405 | copied_early = 0; | ||
1406 | } | ||
1367 | break; | 1407 | break; |
1368 | } while (len > 0); | 1408 | } while (len > 0); |
1369 | 1409 | ||
@@ -1386,12 +1426,42 @@ skip_copy: | |||
1386 | tp->ucopy.len = 0; | 1426 | tp->ucopy.len = 0; |
1387 | } | 1427 | } |
1388 | 1428 | ||
1429 | #ifdef CONFIG_NET_DMA | ||
1430 | if (tp->ucopy.dma_chan) { | ||
1431 | struct sk_buff *skb; | ||
1432 | dma_cookie_t done, used; | ||
1433 | |||
1434 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
1435 | |||
1436 | while (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
1437 | tp->ucopy.dma_cookie, &done, | ||
1438 | &used) == DMA_IN_PROGRESS) { | ||
1439 | /* do partial cleanup of sk_async_wait_queue */ | ||
1440 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
1441 | (dma_async_is_complete(skb->dma_cookie, done, | ||
1442 | used) == DMA_SUCCESS)) { | ||
1443 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
1444 | kfree_skb(skb); | ||
1445 | } | ||
1446 | } | ||
1447 | |||
1448 | /* Safe to free early-copied skbs now */ | ||
1449 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
1450 | dma_chan_put(tp->ucopy.dma_chan); | ||
1451 | tp->ucopy.dma_chan = NULL; | ||
1452 | } | ||
1453 | if (tp->ucopy.pinned_list) { | ||
1454 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); | ||
1455 | tp->ucopy.pinned_list = NULL; | ||
1456 | } | ||
1457 | #endif | ||
1458 | |||
1389 | /* According to UNIX98, msg_name/msg_namelen are ignored | 1459 | /* According to UNIX98, msg_name/msg_namelen are ignored |
1390 | * on connected socket. I was just happy when found this 8) --ANK | 1460 | * on connected socket. I was just happy when found this 8) --ANK |
1391 | */ | 1461 | */ |
1392 | 1462 | ||
1393 | /* Clean up data we have read: This will do ACK frames. */ | 1463 | /* Clean up data we have read: This will do ACK frames. */ |
1394 | cleanup_rbuf(sk, copied); | 1464 | tcp_cleanup_rbuf(sk, copied); |
1395 | 1465 | ||
1396 | TCP_CHECK_TIMER(sk); | 1466 | TCP_CHECK_TIMER(sk); |
1397 | release_sock(sk); | 1467 | release_sock(sk); |
@@ -1658,6 +1728,9 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1658 | __skb_queue_purge(&sk->sk_receive_queue); | 1728 | __skb_queue_purge(&sk->sk_receive_queue); |
1659 | sk_stream_writequeue_purge(sk); | 1729 | sk_stream_writequeue_purge(sk); |
1660 | __skb_queue_purge(&tp->out_of_order_queue); | 1730 | __skb_queue_purge(&tp->out_of_order_queue); |
1731 | #ifdef CONFIG_NET_DMA | ||
1732 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
1733 | #endif | ||
1661 | 1734 | ||
1662 | inet->dport = 0; | 1735 | inet->dport = 0; |
1663 | 1736 | ||
@@ -1858,7 +1931,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
1858 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && | 1931 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && |
1859 | inet_csk_ack_scheduled(sk)) { | 1932 | inet_csk_ack_scheduled(sk)) { |
1860 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; | 1933 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; |
1861 | cleanup_rbuf(sk, 1); | 1934 | tcp_cleanup_rbuf(sk, 1); |
1862 | if (!(val & 1)) | 1935 | if (!(val & 1)) |
1863 | icsk->icsk_ack.pingpong = 1; | 1936 | icsk->icsk_ack.pingpong = 1; |
1864 | } | 1937 | } |