diff options
Diffstat (limited to 'net/ipv4/tcp.c')
| -rw-r--r-- | net/ipv4/tcp.c | 127 |
1 files changed, 100 insertions, 27 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e2b7b8055037..74998f250071 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -263,7 +263,7 @@ | |||
| 263 | #include <net/tcp.h> | 263 | #include <net/tcp.h> |
| 264 | #include <net/xfrm.h> | 264 | #include <net/xfrm.h> |
| 265 | #include <net/ip.h> | 265 | #include <net/ip.h> |
| 266 | 266 | #include <net/netdma.h> | |
| 267 | 267 | ||
| 268 | #include <asm/uaccess.h> | 268 | #include <asm/uaccess.h> |
| 269 | #include <asm/ioctls.h> | 269 | #include <asm/ioctls.h> |
| @@ -622,14 +622,10 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
| 622 | ssize_t res; | 622 | ssize_t res; |
| 623 | struct sock *sk = sock->sk; | 623 | struct sock *sk = sock->sk; |
| 624 | 624 | ||
| 625 | #define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) | ||
| 626 | |||
| 627 | if (!(sk->sk_route_caps & NETIF_F_SG) || | 625 | if (!(sk->sk_route_caps & NETIF_F_SG) || |
| 628 | !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) | 626 | !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) |
| 629 | return sock_no_sendpage(sock, page, offset, size, flags); | 627 | return sock_no_sendpage(sock, page, offset, size, flags); |
| 630 | 628 | ||
| 631 | #undef TCP_ZC_CSUM_FLAGS | ||
| 632 | |||
| 633 | lock_sock(sk); | 629 | lock_sock(sk); |
| 634 | TCP_CHECK_TIMER(sk); | 630 | TCP_CHECK_TIMER(sk); |
| 635 | res = do_tcp_sendpages(sk, &page, offset, size, flags); | 631 | res = do_tcp_sendpages(sk, &page, offset, size, flags); |
| @@ -726,9 +722,7 @@ new_segment: | |||
| 726 | /* | 722 | /* |
| 727 | * Check whether we can use HW checksum. | 723 | * Check whether we can use HW checksum. |
| 728 | */ | 724 | */ |
| 729 | if (sk->sk_route_caps & | 725 | if (sk->sk_route_caps & NETIF_F_ALL_CSUM) |
| 730 | (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | | ||
| 731 | NETIF_F_HW_CSUM)) | ||
| 732 | skb->ip_summed = CHECKSUM_HW; | 726 | skb->ip_summed = CHECKSUM_HW; |
| 733 | 727 | ||
| 734 | skb_entail(sk, tp, skb); | 728 | skb_entail(sk, tp, skb); |
| @@ -937,7 +931,7 @@ static int tcp_recv_urg(struct sock *sk, long timeo, | |||
| 937 | * calculation of whether or not we must ACK for the sake of | 931 | * calculation of whether or not we must ACK for the sake of |
| 938 | * a window update. | 932 | * a window update. |
| 939 | */ | 933 | */ |
| 940 | static void cleanup_rbuf(struct sock *sk, int copied) | 934 | void tcp_cleanup_rbuf(struct sock *sk, int copied) |
| 941 | { | 935 | { |
| 942 | struct tcp_sock *tp = tcp_sk(sk); | 936 | struct tcp_sock *tp = tcp_sk(sk); |
| 943 | int time_to_ack = 0; | 937 | int time_to_ack = 0; |
| @@ -1072,11 +1066,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
| 1072 | break; | 1066 | break; |
| 1073 | } | 1067 | } |
| 1074 | if (skb->h.th->fin) { | 1068 | if (skb->h.th->fin) { |
| 1075 | sk_eat_skb(sk, skb); | 1069 | sk_eat_skb(sk, skb, 0); |
| 1076 | ++seq; | 1070 | ++seq; |
| 1077 | break; | 1071 | break; |
| 1078 | } | 1072 | } |
| 1079 | sk_eat_skb(sk, skb); | 1073 | sk_eat_skb(sk, skb, 0); |
| 1080 | if (!desc->count) | 1074 | if (!desc->count) |
| 1081 | break; | 1075 | break; |
| 1082 | } | 1076 | } |
| @@ -1086,7 +1080,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
| 1086 | 1080 | ||
| 1087 | /* Clean up data we have read: This will do ACK frames. */ | 1081 | /* Clean up data we have read: This will do ACK frames. */ |
| 1088 | if (copied) | 1082 | if (copied) |
| 1089 | cleanup_rbuf(sk, copied); | 1083 | tcp_cleanup_rbuf(sk, copied); |
| 1090 | return copied; | 1084 | return copied; |
| 1091 | } | 1085 | } |
| 1092 | 1086 | ||
| @@ -1110,6 +1104,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1110 | int target; /* Read at least this many bytes */ | 1104 | int target; /* Read at least this many bytes */ |
| 1111 | long timeo; | 1105 | long timeo; |
| 1112 | struct task_struct *user_recv = NULL; | 1106 | struct task_struct *user_recv = NULL; |
| 1107 | int copied_early = 0; | ||
| 1113 | 1108 | ||
| 1114 | lock_sock(sk); | 1109 | lock_sock(sk); |
| 1115 | 1110 | ||
| @@ -1133,6 +1128,17 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1133 | 1128 | ||
| 1134 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | 1129 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); |
| 1135 | 1130 | ||
| 1131 | #ifdef CONFIG_NET_DMA | ||
| 1132 | tp->ucopy.dma_chan = NULL; | ||
| 1133 | preempt_disable(); | ||
| 1134 | if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && | ||
| 1135 | !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { | ||
| 1136 | preempt_enable_no_resched(); | ||
| 1137 | tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); | ||
| 1138 | } else | ||
| 1139 | preempt_enable_no_resched(); | ||
| 1140 | #endif | ||
| 1141 | |||
| 1136 | do { | 1142 | do { |
| 1137 | struct sk_buff *skb; | 1143 | struct sk_buff *skb; |
| 1138 | u32 offset; | 1144 | u32 offset; |
| @@ -1220,7 +1226,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1220 | } | 1226 | } |
| 1221 | } | 1227 | } |
| 1222 | 1228 | ||
| 1223 | cleanup_rbuf(sk, copied); | 1229 | tcp_cleanup_rbuf(sk, copied); |
| 1224 | 1230 | ||
| 1225 | if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { | 1231 | if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { |
| 1226 | /* Install new reader */ | 1232 | /* Install new reader */ |
| @@ -1274,6 +1280,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1274 | } else | 1280 | } else |
| 1275 | sk_wait_data(sk, &timeo); | 1281 | sk_wait_data(sk, &timeo); |
| 1276 | 1282 | ||
| 1283 | #ifdef CONFIG_NET_DMA | ||
| 1284 | tp->ucopy.wakeup = 0; | ||
| 1285 | #endif | ||
| 1286 | |||
| 1277 | if (user_recv) { | 1287 | if (user_recv) { |
| 1278 | int chunk; | 1288 | int chunk; |
| 1279 | 1289 | ||
| @@ -1329,13 +1339,39 @@ do_prequeue: | |||
| 1329 | } | 1339 | } |
| 1330 | 1340 | ||
| 1331 | if (!(flags & MSG_TRUNC)) { | 1341 | if (!(flags & MSG_TRUNC)) { |
| 1332 | err = skb_copy_datagram_iovec(skb, offset, | 1342 | #ifdef CONFIG_NET_DMA |
| 1333 | msg->msg_iov, used); | 1343 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
| 1334 | if (err) { | 1344 | tp->ucopy.dma_chan = get_softnet_dma(); |
| 1335 | /* Exception. Bailout! */ | 1345 | |
| 1336 | if (!copied) | 1346 | if (tp->ucopy.dma_chan) { |
| 1337 | copied = -EFAULT; | 1347 | tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( |
| 1338 | break; | 1348 | tp->ucopy.dma_chan, skb, offset, |
| 1349 | msg->msg_iov, used, | ||
| 1350 | tp->ucopy.pinned_list); | ||
| 1351 | |||
| 1352 | if (tp->ucopy.dma_cookie < 0) { | ||
| 1353 | |||
| 1354 | printk(KERN_ALERT "dma_cookie < 0\n"); | ||
| 1355 | |||
| 1356 | /* Exception. Bailout! */ | ||
| 1357 | if (!copied) | ||
| 1358 | copied = -EFAULT; | ||
| 1359 | break; | ||
| 1360 | } | ||
| 1361 | if ((offset + used) == skb->len) | ||
| 1362 | copied_early = 1; | ||
| 1363 | |||
| 1364 | } else | ||
| 1365 | #endif | ||
| 1366 | { | ||
| 1367 | err = skb_copy_datagram_iovec(skb, offset, | ||
| 1368 | msg->msg_iov, used); | ||
| 1369 | if (err) { | ||
| 1370 | /* Exception. Bailout! */ | ||
| 1371 | if (!copied) | ||
| 1372 | copied = -EFAULT; | ||
| 1373 | break; | ||
| 1374 | } | ||
| 1339 | } | 1375 | } |
| 1340 | } | 1376 | } |
| 1341 | 1377 | ||
| @@ -1355,15 +1391,19 @@ skip_copy: | |||
| 1355 | 1391 | ||
| 1356 | if (skb->h.th->fin) | 1392 | if (skb->h.th->fin) |
| 1357 | goto found_fin_ok; | 1393 | goto found_fin_ok; |
| 1358 | if (!(flags & MSG_PEEK)) | 1394 | if (!(flags & MSG_PEEK)) { |
| 1359 | sk_eat_skb(sk, skb); | 1395 | sk_eat_skb(sk, skb, copied_early); |
| 1396 | copied_early = 0; | ||
| 1397 | } | ||
| 1360 | continue; | 1398 | continue; |
| 1361 | 1399 | ||
| 1362 | found_fin_ok: | 1400 | found_fin_ok: |
| 1363 | /* Process the FIN. */ | 1401 | /* Process the FIN. */ |
| 1364 | ++*seq; | 1402 | ++*seq; |
| 1365 | if (!(flags & MSG_PEEK)) | 1403 | if (!(flags & MSG_PEEK)) { |
| 1366 | sk_eat_skb(sk, skb); | 1404 | sk_eat_skb(sk, skb, copied_early); |
| 1405 | copied_early = 0; | ||
| 1406 | } | ||
| 1367 | break; | 1407 | break; |
| 1368 | } while (len > 0); | 1408 | } while (len > 0); |
| 1369 | 1409 | ||
| @@ -1386,12 +1426,42 @@ skip_copy: | |||
| 1386 | tp->ucopy.len = 0; | 1426 | tp->ucopy.len = 0; |
| 1387 | } | 1427 | } |
| 1388 | 1428 | ||
| 1429 | #ifdef CONFIG_NET_DMA | ||
| 1430 | if (tp->ucopy.dma_chan) { | ||
| 1431 | struct sk_buff *skb; | ||
| 1432 | dma_cookie_t done, used; | ||
| 1433 | |||
| 1434 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
| 1435 | |||
| 1436 | while (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
| 1437 | tp->ucopy.dma_cookie, &done, | ||
| 1438 | &used) == DMA_IN_PROGRESS) { | ||
| 1439 | /* do partial cleanup of sk_async_wait_queue */ | ||
| 1440 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
| 1441 | (dma_async_is_complete(skb->dma_cookie, done, | ||
| 1442 | used) == DMA_SUCCESS)) { | ||
| 1443 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
| 1444 | kfree_skb(skb); | ||
| 1445 | } | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | /* Safe to free early-copied skbs now */ | ||
| 1449 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
| 1450 | dma_chan_put(tp->ucopy.dma_chan); | ||
| 1451 | tp->ucopy.dma_chan = NULL; | ||
| 1452 | } | ||
| 1453 | if (tp->ucopy.pinned_list) { | ||
| 1454 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); | ||
| 1455 | tp->ucopy.pinned_list = NULL; | ||
| 1456 | } | ||
| 1457 | #endif | ||
| 1458 | |||
| 1389 | /* According to UNIX98, msg_name/msg_namelen are ignored | 1459 | /* According to UNIX98, msg_name/msg_namelen are ignored |
| 1390 | * on connected socket. I was just happy when found this 8) --ANK | 1460 | * on connected socket. I was just happy when found this 8) --ANK |
| 1391 | */ | 1461 | */ |
| 1392 | 1462 | ||
| 1393 | /* Clean up data we have read: This will do ACK frames. */ | 1463 | /* Clean up data we have read: This will do ACK frames. */ |
| 1394 | cleanup_rbuf(sk, copied); | 1464 | tcp_cleanup_rbuf(sk, copied); |
| 1395 | 1465 | ||
| 1396 | TCP_CHECK_TIMER(sk); | 1466 | TCP_CHECK_TIMER(sk); |
| 1397 | release_sock(sk); | 1467 | release_sock(sk); |
| @@ -1658,6 +1728,9 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
| 1658 | __skb_queue_purge(&sk->sk_receive_queue); | 1728 | __skb_queue_purge(&sk->sk_receive_queue); |
| 1659 | sk_stream_writequeue_purge(sk); | 1729 | sk_stream_writequeue_purge(sk); |
| 1660 | __skb_queue_purge(&tp->out_of_order_queue); | 1730 | __skb_queue_purge(&tp->out_of_order_queue); |
| 1731 | #ifdef CONFIG_NET_DMA | ||
| 1732 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
| 1733 | #endif | ||
| 1661 | 1734 | ||
| 1662 | inet->dport = 0; | 1735 | inet->dport = 0; |
| 1663 | 1736 | ||
| @@ -1858,7 +1931,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 1858 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && | 1931 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && |
| 1859 | inet_csk_ack_scheduled(sk)) { | 1932 | inet_csk_ack_scheduled(sk)) { |
| 1860 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; | 1933 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; |
| 1861 | cleanup_rbuf(sk, 1); | 1934 | tcp_cleanup_rbuf(sk, 1); |
| 1862 | if (!(val & 1)) | 1935 | if (!(val & 1)) |
| 1863 | icsk->icsk_ack.pingpong = 1; | 1936 | icsk->icsk_ack.pingpong = 1; |
| 1864 | } | 1937 | } |
