diff options
author | Chris Leech <christopher.leech@intel.com> | 2006-05-23 21:05:53 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-06-18 00:25:56 -0400 |
commit | 1a2449a87bb7606113b1aa1a9d3c3e78ef189a1c (patch) | |
tree | 86e833a8694f201de138697646e7e8469e9c8ef6 | |
parent | 9593782585e0cf70babe787a8463d492a68b1744 (diff) |
[I/OAT]: TCP recv offload to I/OAT
Locks down user pages and sets up for DMA in tcp_recvmsg, then calls
dma_async_try_early_copy in tcp_v4_do_rcv
Signed-off-by: Chris Leech <christopher.leech@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp.c | 103 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 74 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 18 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 12 |
4 files changed, 185 insertions, 22 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4e067d25a63c..ff6ccda9ff46 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -263,7 +263,7 @@ | |||
263 | #include <net/tcp.h> | 263 | #include <net/tcp.h> |
264 | #include <net/xfrm.h> | 264 | #include <net/xfrm.h> |
265 | #include <net/ip.h> | 265 | #include <net/ip.h> |
266 | 266 | #include <net/netdma.h> | |
267 | 267 | ||
268 | #include <asm/uaccess.h> | 268 | #include <asm/uaccess.h> |
269 | #include <asm/ioctls.h> | 269 | #include <asm/ioctls.h> |
@@ -1110,6 +1110,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1110 | int target; /* Read at least this many bytes */ | 1110 | int target; /* Read at least this many bytes */ |
1111 | long timeo; | 1111 | long timeo; |
1112 | struct task_struct *user_recv = NULL; | 1112 | struct task_struct *user_recv = NULL; |
1113 | int copied_early = 0; | ||
1113 | 1114 | ||
1114 | lock_sock(sk); | 1115 | lock_sock(sk); |
1115 | 1116 | ||
@@ -1133,6 +1134,17 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1133 | 1134 | ||
1134 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | 1135 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); |
1135 | 1136 | ||
1137 | #ifdef CONFIG_NET_DMA | ||
1138 | tp->ucopy.dma_chan = NULL; | ||
1139 | preempt_disable(); | ||
1140 | if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && | ||
1141 | !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { | ||
1142 | preempt_enable_no_resched(); | ||
1143 | tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); | ||
1144 | } else | ||
1145 | preempt_enable_no_resched(); | ||
1146 | #endif | ||
1147 | |||
1136 | do { | 1148 | do { |
1137 | struct sk_buff *skb; | 1149 | struct sk_buff *skb; |
1138 | u32 offset; | 1150 | u32 offset; |
@@ -1274,6 +1286,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1274 | } else | 1286 | } else |
1275 | sk_wait_data(sk, &timeo); | 1287 | sk_wait_data(sk, &timeo); |
1276 | 1288 | ||
1289 | #ifdef CONFIG_NET_DMA | ||
1290 | tp->ucopy.wakeup = 0; | ||
1291 | #endif | ||
1292 | |||
1277 | if (user_recv) { | 1293 | if (user_recv) { |
1278 | int chunk; | 1294 | int chunk; |
1279 | 1295 | ||
@@ -1329,13 +1345,39 @@ do_prequeue: | |||
1329 | } | 1345 | } |
1330 | 1346 | ||
1331 | if (!(flags & MSG_TRUNC)) { | 1347 | if (!(flags & MSG_TRUNC)) { |
1332 | err = skb_copy_datagram_iovec(skb, offset, | 1348 | #ifdef CONFIG_NET_DMA |
1333 | msg->msg_iov, used); | 1349 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
1334 | if (err) { | 1350 | tp->ucopy.dma_chan = get_softnet_dma(); |
1335 | /* Exception. Bailout! */ | 1351 | |
1336 | if (!copied) | 1352 | if (tp->ucopy.dma_chan) { |
1337 | copied = -EFAULT; | 1353 | tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( |
1338 | break; | 1354 | tp->ucopy.dma_chan, skb, offset, |
1355 | msg->msg_iov, used, | ||
1356 | tp->ucopy.pinned_list); | ||
1357 | |||
1358 | if (tp->ucopy.dma_cookie < 0) { | ||
1359 | |||
1360 | printk(KERN_ALERT "dma_cookie < 0\n"); | ||
1361 | |||
1362 | /* Exception. Bailout! */ | ||
1363 | if (!copied) | ||
1364 | copied = -EFAULT; | ||
1365 | break; | ||
1366 | } | ||
1367 | if ((offset + used) == skb->len) | ||
1368 | copied_early = 1; | ||
1369 | |||
1370 | } else | ||
1371 | #endif | ||
1372 | { | ||
1373 | err = skb_copy_datagram_iovec(skb, offset, | ||
1374 | msg->msg_iov, used); | ||
1375 | if (err) { | ||
1376 | /* Exception. Bailout! */ | ||
1377 | if (!copied) | ||
1378 | copied = -EFAULT; | ||
1379 | break; | ||
1380 | } | ||
1339 | } | 1381 | } |
1340 | } | 1382 | } |
1341 | 1383 | ||
@@ -1355,15 +1397,19 @@ skip_copy: | |||
1355 | 1397 | ||
1356 | if (skb->h.th->fin) | 1398 | if (skb->h.th->fin) |
1357 | goto found_fin_ok; | 1399 | goto found_fin_ok; |
1358 | if (!(flags & MSG_PEEK)) | 1400 | if (!(flags & MSG_PEEK)) { |
1359 | sk_eat_skb(sk, skb, 0); | 1401 | sk_eat_skb(sk, skb, copied_early); |
1402 | copied_early = 0; | ||
1403 | } | ||
1360 | continue; | 1404 | continue; |
1361 | 1405 | ||
1362 | found_fin_ok: | 1406 | found_fin_ok: |
1363 | /* Process the FIN. */ | 1407 | /* Process the FIN. */ |
1364 | ++*seq; | 1408 | ++*seq; |
1365 | if (!(flags & MSG_PEEK)) | 1409 | if (!(flags & MSG_PEEK)) { |
1366 | sk_eat_skb(sk, skb, 0); | 1410 | sk_eat_skb(sk, skb, copied_early); |
1411 | copied_early = 0; | ||
1412 | } | ||
1367 | break; | 1413 | break; |
1368 | } while (len > 0); | 1414 | } while (len > 0); |
1369 | 1415 | ||
@@ -1386,6 +1432,36 @@ skip_copy: | |||
1386 | tp->ucopy.len = 0; | 1432 | tp->ucopy.len = 0; |
1387 | } | 1433 | } |
1388 | 1434 | ||
1435 | #ifdef CONFIG_NET_DMA | ||
1436 | if (tp->ucopy.dma_chan) { | ||
1437 | struct sk_buff *skb; | ||
1438 | dma_cookie_t done, used; | ||
1439 | |||
1440 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | ||
1441 | |||
1442 | while (dma_async_memcpy_complete(tp->ucopy.dma_chan, | ||
1443 | tp->ucopy.dma_cookie, &done, | ||
1444 | &used) == DMA_IN_PROGRESS) { | ||
1445 | /* do partial cleanup of sk_async_wait_queue */ | ||
1446 | while ((skb = skb_peek(&sk->sk_async_wait_queue)) && | ||
1447 | (dma_async_is_complete(skb->dma_cookie, done, | ||
1448 | used) == DMA_SUCCESS)) { | ||
1449 | __skb_dequeue(&sk->sk_async_wait_queue); | ||
1450 | kfree_skb(skb); | ||
1451 | } | ||
1452 | } | ||
1453 | |||
1454 | /* Safe to free early-copied skbs now */ | ||
1455 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
1456 | dma_chan_put(tp->ucopy.dma_chan); | ||
1457 | tp->ucopy.dma_chan = NULL; | ||
1458 | } | ||
1459 | if (tp->ucopy.pinned_list) { | ||
1460 | dma_unpin_iovec_pages(tp->ucopy.pinned_list); | ||
1461 | tp->ucopy.pinned_list = NULL; | ||
1462 | } | ||
1463 | #endif | ||
1464 | |||
1389 | /* According to UNIX98, msg_name/msg_namelen are ignored | 1465 | /* According to UNIX98, msg_name/msg_namelen are ignored |
1390 | * on connected socket. I was just happy when found this 8) --ANK | 1466 | * on connected socket. I was just happy when found this 8) --ANK |
1391 | */ | 1467 | */ |
@@ -1658,6 +1734,9 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1658 | __skb_queue_purge(&sk->sk_receive_queue); | 1734 | __skb_queue_purge(&sk->sk_receive_queue); |
1659 | sk_stream_writequeue_purge(sk); | 1735 | sk_stream_writequeue_purge(sk); |
1660 | __skb_queue_purge(&tp->out_of_order_queue); | 1736 | __skb_queue_purge(&tp->out_of_order_queue); |
1737 | #ifdef CONFIG_NET_DMA | ||
1738 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
1739 | #endif | ||
1661 | 1740 | ||
1662 | inet->dport = 0; | 1741 | inet->dport = 0; |
1663 | 1742 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b5521a9d3dc1..c6d62f0a9966 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <net/inet_common.h> | 71 | #include <net/inet_common.h> |
72 | #include <linux/ipsec.h> | 72 | #include <linux/ipsec.h> |
73 | #include <asm/unaligned.h> | 73 | #include <asm/unaligned.h> |
74 | #include <net/netdma.h> | ||
74 | 75 | ||
75 | int sysctl_tcp_timestamps = 1; | 76 | int sysctl_tcp_timestamps = 1; |
76 | int sysctl_tcp_window_scaling = 1; | 77 | int sysctl_tcp_window_scaling = 1; |
@@ -3785,6 +3786,50 @@ static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *sk | |||
3785 | __tcp_checksum_complete_user(sk, skb); | 3786 | __tcp_checksum_complete_user(sk, skb); |
3786 | } | 3787 | } |
3787 | 3788 | ||
3789 | #ifdef CONFIG_NET_DMA | ||
3790 | static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen) | ||
3791 | { | ||
3792 | struct tcp_sock *tp = tcp_sk(sk); | ||
3793 | int chunk = skb->len - hlen; | ||
3794 | int dma_cookie; | ||
3795 | int copied_early = 0; | ||
3796 | |||
3797 | if (tp->ucopy.wakeup) | ||
3798 | return 0; | ||
3799 | |||
3800 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | ||
3801 | tp->ucopy.dma_chan = get_softnet_dma(); | ||
3802 | |||
3803 | if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) { | ||
3804 | |||
3805 | dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, | ||
3806 | skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); | ||
3807 | |||
3808 | if (dma_cookie < 0) | ||
3809 | goto out; | ||
3810 | |||
3811 | tp->ucopy.dma_cookie = dma_cookie; | ||
3812 | copied_early = 1; | ||
3813 | |||
3814 | tp->ucopy.len -= chunk; | ||
3815 | tp->copied_seq += chunk; | ||
3816 | tcp_rcv_space_adjust(sk); | ||
3817 | |||
3818 | if ((tp->ucopy.len == 0) || | ||
3819 | (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) || | ||
3820 | (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { | ||
3821 | tp->ucopy.wakeup = 1; | ||
3822 | sk->sk_data_ready(sk, 0); | ||
3823 | } | ||
3824 | } else if (chunk > 0) { | ||
3825 | tp->ucopy.wakeup = 1; | ||
3826 | sk->sk_data_ready(sk, 0); | ||
3827 | } | ||
3828 | out: | ||
3829 | return copied_early; | ||
3830 | } | ||
3831 | #endif /* CONFIG_NET_DMA */ | ||
3832 | |||
3788 | /* | 3833 | /* |
3789 | * TCP receive function for the ESTABLISHED state. | 3834 | * TCP receive function for the ESTABLISHED state. |
3790 | * | 3835 | * |
@@ -3901,14 +3946,23 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3901 | } | 3946 | } |
3902 | } else { | 3947 | } else { |
3903 | int eaten = 0; | 3948 | int eaten = 0; |
3949 | int copied_early = 0; | ||
3904 | 3950 | ||
3905 | if (tp->ucopy.task == current && | 3951 | if (tp->copied_seq == tp->rcv_nxt && |
3906 | tp->copied_seq == tp->rcv_nxt && | 3952 | len - tcp_header_len <= tp->ucopy.len) { |
3907 | len - tcp_header_len <= tp->ucopy.len && | 3953 | #ifdef CONFIG_NET_DMA |
3908 | sock_owned_by_user(sk)) { | 3954 | if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { |
3909 | __set_current_state(TASK_RUNNING); | 3955 | copied_early = 1; |
3956 | eaten = 1; | ||
3957 | } | ||
3958 | #endif | ||
3959 | if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) { | ||
3960 | __set_current_state(TASK_RUNNING); | ||
3910 | 3961 | ||
3911 | if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { | 3962 | if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) |
3963 | eaten = 1; | ||
3964 | } | ||
3965 | if (eaten) { | ||
3912 | /* Predicted packet is in window by definition. | 3966 | /* Predicted packet is in window by definition. |
3913 | * seq == rcv_nxt and rcv_wup <= rcv_nxt. | 3967 | * seq == rcv_nxt and rcv_wup <= rcv_nxt. |
3914 | * Hence, check seq<=rcv_wup reduces to: | 3968 | * Hence, check seq<=rcv_wup reduces to: |
@@ -3924,8 +3978,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3924 | __skb_pull(skb, tcp_header_len); | 3978 | __skb_pull(skb, tcp_header_len); |
3925 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 3979 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
3926 | NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER); | 3980 | NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER); |
3927 | eaten = 1; | ||
3928 | } | 3981 | } |
3982 | if (copied_early) | ||
3983 | tcp_cleanup_rbuf(sk, skb->len); | ||
3929 | } | 3984 | } |
3930 | if (!eaten) { | 3985 | if (!eaten) { |
3931 | if (tcp_checksum_complete_user(sk, skb)) | 3986 | if (tcp_checksum_complete_user(sk, skb)) |
@@ -3966,6 +4021,11 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3966 | 4021 | ||
3967 | __tcp_ack_snd_check(sk, 0); | 4022 | __tcp_ack_snd_check(sk, 0); |
3968 | no_ack: | 4023 | no_ack: |
4024 | #ifdef CONFIG_NET_DMA | ||
4025 | if (copied_early) | ||
4026 | __skb_queue_tail(&sk->sk_async_wait_queue, skb); | ||
4027 | else | ||
4028 | #endif | ||
3969 | if (eaten) | 4029 | if (eaten) |
3970 | __kfree_skb(skb); | 4030 | __kfree_skb(skb); |
3971 | else | 4031 | else |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 672950e54c49..25ecc6e2478b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <net/inet_common.h> | 71 | #include <net/inet_common.h> |
72 | #include <net/timewait_sock.h> | 72 | #include <net/timewait_sock.h> |
73 | #include <net/xfrm.h> | 73 | #include <net/xfrm.h> |
74 | #include <net/netdma.h> | ||
74 | 75 | ||
75 | #include <linux/inet.h> | 76 | #include <linux/inet.h> |
76 | #include <linux/ipv6.h> | 77 | #include <linux/ipv6.h> |
@@ -1091,8 +1092,18 @@ process: | |||
1091 | bh_lock_sock(sk); | 1092 | bh_lock_sock(sk); |
1092 | ret = 0; | 1093 | ret = 0; |
1093 | if (!sock_owned_by_user(sk)) { | 1094 | if (!sock_owned_by_user(sk)) { |
1094 | if (!tcp_prequeue(sk, skb)) | 1095 | #ifdef CONFIG_NET_DMA |
1096 | struct tcp_sock *tp = tcp_sk(sk); | ||
1097 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | ||
1098 | tp->ucopy.dma_chan = get_softnet_dma(); | ||
1099 | if (tp->ucopy.dma_chan) | ||
1095 | ret = tcp_v4_do_rcv(sk, skb); | 1100 | ret = tcp_v4_do_rcv(sk, skb); |
1101 | else | ||
1102 | #endif | ||
1103 | { | ||
1104 | if (!tcp_prequeue(sk, skb)) | ||
1105 | ret = tcp_v4_do_rcv(sk, skb); | ||
1106 | } | ||
1096 | } else | 1107 | } else |
1097 | sk_add_backlog(sk, skb); | 1108 | sk_add_backlog(sk, skb); |
1098 | bh_unlock_sock(sk); | 1109 | bh_unlock_sock(sk); |
@@ -1296,6 +1307,11 @@ int tcp_v4_destroy_sock(struct sock *sk) | |||
1296 | /* Cleans up our, hopefully empty, out_of_order_queue. */ | 1307 | /* Cleans up our, hopefully empty, out_of_order_queue. */ |
1297 | __skb_queue_purge(&tp->out_of_order_queue); | 1308 | __skb_queue_purge(&tp->out_of_order_queue); |
1298 | 1309 | ||
1310 | #ifdef CONFIG_NET_DMA | ||
1311 | /* Cleans up our sk_async_wait_queue */ | ||
1312 | __skb_queue_purge(&sk->sk_async_wait_queue); | ||
1313 | #endif | ||
1314 | |||
1299 | /* Clean prequeue, it must be empty really */ | 1315 | /* Clean prequeue, it must be empty really */ |
1300 | __skb_queue_purge(&tp->ucopy.prequeue); | 1316 | __skb_queue_purge(&tp->ucopy.prequeue); |
1301 | 1317 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 301eee726b0f..a50eb306e9e2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1218,8 +1218,16 @@ process: | |||
1218 | bh_lock_sock(sk); | 1218 | bh_lock_sock(sk); |
1219 | ret = 0; | 1219 | ret = 0; |
1220 | if (!sock_owned_by_user(sk)) { | 1220 | if (!sock_owned_by_user(sk)) { |
1221 | if (!tcp_prequeue(sk, skb)) | 1221 | #ifdef CONFIG_NET_DMA |
1222 | ret = tcp_v6_do_rcv(sk, skb); | 1222 | struct tcp_sock *tp = tcp_sk(sk); |
1223 | if (tp->ucopy.dma_chan) | ||
1224 | ret = tcp_v6_do_rcv(sk, skb); | ||
1225 | else | ||
1226 | #endif | ||
1227 | { | ||
1228 | if (!tcp_prequeue(sk, skb)) | ||
1229 | ret = tcp_v6_do_rcv(sk, skb); | ||
1230 | } | ||
1223 | } else | 1231 | } else |
1224 | sk_add_backlog(sk, skb); | 1232 | sk_add_backlog(sk, skb); |
1225 | bh_unlock_sock(sk); | 1233 | bh_unlock_sock(sk); |