aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Leech <christopher.leech@intel.com>2006-05-23 21:05:53 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2006-06-18 00:25:56 -0400
commit1a2449a87bb7606113b1aa1a9d3c3e78ef189a1c (patch)
tree86e833a8694f201de138697646e7e8469e9c8ef6
parent9593782585e0cf70babe787a8463d492a68b1744 (diff)
[I/OAT]: TCP recv offload to I/OAT
Locks down user pages and sets up for DMA in tcp_recvmsg, then calls dma_async_try_early_copy in tcp_v4_do_rcv Signed-off-by: Chris Leech <christopher.leech@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp.c103
-rw-r--r--net/ipv4/tcp_input.c74
-rw-r--r--net/ipv4/tcp_ipv4.c18
-rw-r--r--net/ipv6/tcp_ipv6.c12
4 files changed, 185 insertions, 22 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4e067d25a63c..ff6ccda9ff46 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -263,7 +263,7 @@
263#include <net/tcp.h> 263#include <net/tcp.h>
264#include <net/xfrm.h> 264#include <net/xfrm.h>
265#include <net/ip.h> 265#include <net/ip.h>
266 266#include <net/netdma.h>
267 267
268#include <asm/uaccess.h> 268#include <asm/uaccess.h>
269#include <asm/ioctls.h> 269#include <asm/ioctls.h>
@@ -1110,6 +1110,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1110 int target; /* Read at least this many bytes */ 1110 int target; /* Read at least this many bytes */
1111 long timeo; 1111 long timeo;
1112 struct task_struct *user_recv = NULL; 1112 struct task_struct *user_recv = NULL;
1113 int copied_early = 0;
1113 1114
1114 lock_sock(sk); 1115 lock_sock(sk);
1115 1116
@@ -1133,6 +1134,17 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1133 1134
1134 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1135 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1135 1136
1137#ifdef CONFIG_NET_DMA
1138 tp->ucopy.dma_chan = NULL;
1139 preempt_disable();
1140 if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1141 !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) {
1142 preempt_enable_no_resched();
1143 tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len);
1144 } else
1145 preempt_enable_no_resched();
1146#endif
1147
1136 do { 1148 do {
1137 struct sk_buff *skb; 1149 struct sk_buff *skb;
1138 u32 offset; 1150 u32 offset;
@@ -1274,6 +1286,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1274 } else 1286 } else
1275 sk_wait_data(sk, &timeo); 1287 sk_wait_data(sk, &timeo);
1276 1288
1289#ifdef CONFIG_NET_DMA
1290 tp->ucopy.wakeup = 0;
1291#endif
1292
1277 if (user_recv) { 1293 if (user_recv) {
1278 int chunk; 1294 int chunk;
1279 1295
@@ -1329,13 +1345,39 @@ do_prequeue:
1329 } 1345 }
1330 1346
1331 if (!(flags & MSG_TRUNC)) { 1347 if (!(flags & MSG_TRUNC)) {
1332 err = skb_copy_datagram_iovec(skb, offset, 1348#ifdef CONFIG_NET_DMA
1333 msg->msg_iov, used); 1349 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1334 if (err) { 1350 tp->ucopy.dma_chan = get_softnet_dma();
1335 /* Exception. Bailout! */ 1351
1336 if (!copied) 1352 if (tp->ucopy.dma_chan) {
1337 copied = -EFAULT; 1353 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
1338 break; 1354 tp->ucopy.dma_chan, skb, offset,
1355 msg->msg_iov, used,
1356 tp->ucopy.pinned_list);
1357
1358 if (tp->ucopy.dma_cookie < 0) {
1359
1360 printk(KERN_ALERT "dma_cookie < 0\n");
1361
1362 /* Exception. Bailout! */
1363 if (!copied)
1364 copied = -EFAULT;
1365 break;
1366 }
1367 if ((offset + used) == skb->len)
1368 copied_early = 1;
1369
1370 } else
1371#endif
1372 {
1373 err = skb_copy_datagram_iovec(skb, offset,
1374 msg->msg_iov, used);
1375 if (err) {
1376 /* Exception. Bailout! */
1377 if (!copied)
1378 copied = -EFAULT;
1379 break;
1380 }
1339 } 1381 }
1340 } 1382 }
1341 1383
@@ -1355,15 +1397,19 @@ skip_copy:
1355 1397
1356 if (skb->h.th->fin) 1398 if (skb->h.th->fin)
1357 goto found_fin_ok; 1399 goto found_fin_ok;
1358 if (!(flags & MSG_PEEK)) 1400 if (!(flags & MSG_PEEK)) {
1359 sk_eat_skb(sk, skb, 0); 1401 sk_eat_skb(sk, skb, copied_early);
1402 copied_early = 0;
1403 }
1360 continue; 1404 continue;
1361 1405
1362 found_fin_ok: 1406 found_fin_ok:
1363 /* Process the FIN. */ 1407 /* Process the FIN. */
1364 ++*seq; 1408 ++*seq;
1365 if (!(flags & MSG_PEEK)) 1409 if (!(flags & MSG_PEEK)) {
1366 sk_eat_skb(sk, skb, 0); 1410 sk_eat_skb(sk, skb, copied_early);
1411 copied_early = 0;
1412 }
1367 break; 1413 break;
1368 } while (len > 0); 1414 } while (len > 0);
1369 1415
@@ -1386,6 +1432,36 @@ skip_copy:
1386 tp->ucopy.len = 0; 1432 tp->ucopy.len = 0;
1387 } 1433 }
1388 1434
1435#ifdef CONFIG_NET_DMA
1436 if (tp->ucopy.dma_chan) {
1437 struct sk_buff *skb;
1438 dma_cookie_t done, used;
1439
1440 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1441
1442 while (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1443 tp->ucopy.dma_cookie, &done,
1444 &used) == DMA_IN_PROGRESS) {
1445 /* do partial cleanup of sk_async_wait_queue */
1446 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1447 (dma_async_is_complete(skb->dma_cookie, done,
1448 used) == DMA_SUCCESS)) {
1449 __skb_dequeue(&sk->sk_async_wait_queue);
1450 kfree_skb(skb);
1451 }
1452 }
1453
1454 /* Safe to free early-copied skbs now */
1455 __skb_queue_purge(&sk->sk_async_wait_queue);
1456 dma_chan_put(tp->ucopy.dma_chan);
1457 tp->ucopy.dma_chan = NULL;
1458 }
1459 if (tp->ucopy.pinned_list) {
1460 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1461 tp->ucopy.pinned_list = NULL;
1462 }
1463#endif
1464
1389 /* According to UNIX98, msg_name/msg_namelen are ignored 1465 /* According to UNIX98, msg_name/msg_namelen are ignored
1390 * on connected socket. I was just happy when found this 8) --ANK 1466 * on connected socket. I was just happy when found this 8) --ANK
1391 */ 1467 */
@@ -1658,6 +1734,9 @@ int tcp_disconnect(struct sock *sk, int flags)
1658 __skb_queue_purge(&sk->sk_receive_queue); 1734 __skb_queue_purge(&sk->sk_receive_queue);
1659 sk_stream_writequeue_purge(sk); 1735 sk_stream_writequeue_purge(sk);
1660 __skb_queue_purge(&tp->out_of_order_queue); 1736 __skb_queue_purge(&tp->out_of_order_queue);
1737#ifdef CONFIG_NET_DMA
1738 __skb_queue_purge(&sk->sk_async_wait_queue);
1739#endif
1661 1740
1662 inet->dport = 0; 1741 inet->dport = 0;
1663 1742
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b5521a9d3dc1..c6d62f0a9966 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -71,6 +71,7 @@
71#include <net/inet_common.h> 71#include <net/inet_common.h>
72#include <linux/ipsec.h> 72#include <linux/ipsec.h>
73#include <asm/unaligned.h> 73#include <asm/unaligned.h>
74#include <net/netdma.h>
74 75
75int sysctl_tcp_timestamps = 1; 76int sysctl_tcp_timestamps = 1;
76int sysctl_tcp_window_scaling = 1; 77int sysctl_tcp_window_scaling = 1;
@@ -3785,6 +3786,50 @@ static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *sk
3785 __tcp_checksum_complete_user(sk, skb); 3786 __tcp_checksum_complete_user(sk, skb);
3786} 3787}
3787 3788
3789#ifdef CONFIG_NET_DMA
3790static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
3791{
3792 struct tcp_sock *tp = tcp_sk(sk);
3793 int chunk = skb->len - hlen;
3794 int dma_cookie;
3795 int copied_early = 0;
3796
3797 if (tp->ucopy.wakeup)
3798 return 0;
3799
3800 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
3801 tp->ucopy.dma_chan = get_softnet_dma();
3802
3803 if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) {
3804
3805 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
3806 skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
3807
3808 if (dma_cookie < 0)
3809 goto out;
3810
3811 tp->ucopy.dma_cookie = dma_cookie;
3812 copied_early = 1;
3813
3814 tp->ucopy.len -= chunk;
3815 tp->copied_seq += chunk;
3816 tcp_rcv_space_adjust(sk);
3817
3818 if ((tp->ucopy.len == 0) ||
3819 (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) ||
3820 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
3821 tp->ucopy.wakeup = 1;
3822 sk->sk_data_ready(sk, 0);
3823 }
3824 } else if (chunk > 0) {
3825 tp->ucopy.wakeup = 1;
3826 sk->sk_data_ready(sk, 0);
3827 }
3828out:
3829 return copied_early;
3830}
3831#endif /* CONFIG_NET_DMA */
3832
3788/* 3833/*
3789 * TCP receive function for the ESTABLISHED state. 3834 * TCP receive function for the ESTABLISHED state.
3790 * 3835 *
@@ -3901,14 +3946,23 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3901 } 3946 }
3902 } else { 3947 } else {
3903 int eaten = 0; 3948 int eaten = 0;
3949 int copied_early = 0;
3904 3950
3905 if (tp->ucopy.task == current && 3951 if (tp->copied_seq == tp->rcv_nxt &&
3906 tp->copied_seq == tp->rcv_nxt && 3952 len - tcp_header_len <= tp->ucopy.len) {
3907 len - tcp_header_len <= tp->ucopy.len && 3953#ifdef CONFIG_NET_DMA
3908 sock_owned_by_user(sk)) { 3954 if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
3909 __set_current_state(TASK_RUNNING); 3955 copied_early = 1;
3956 eaten = 1;
3957 }
3958#endif
3959 if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) {
3960 __set_current_state(TASK_RUNNING);
3910 3961
3911 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { 3962 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
3963 eaten = 1;
3964 }
3965 if (eaten) {
3912 /* Predicted packet is in window by definition. 3966 /* Predicted packet is in window by definition.
3913 * seq == rcv_nxt and rcv_wup <= rcv_nxt. 3967 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
3914 * Hence, check seq<=rcv_wup reduces to: 3968 * Hence, check seq<=rcv_wup reduces to:
@@ -3924,8 +3978,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3924 __skb_pull(skb, tcp_header_len); 3978 __skb_pull(skb, tcp_header_len);
3925 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 3979 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3926 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER); 3980 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
3927 eaten = 1;
3928 } 3981 }
3982 if (copied_early)
3983 tcp_cleanup_rbuf(sk, skb->len);
3929 } 3984 }
3930 if (!eaten) { 3985 if (!eaten) {
3931 if (tcp_checksum_complete_user(sk, skb)) 3986 if (tcp_checksum_complete_user(sk, skb))
@@ -3966,6 +4021,11 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3966 4021
3967 __tcp_ack_snd_check(sk, 0); 4022 __tcp_ack_snd_check(sk, 0);
3968no_ack: 4023no_ack:
4024#ifdef CONFIG_NET_DMA
4025 if (copied_early)
4026 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
4027 else
4028#endif
3969 if (eaten) 4029 if (eaten)
3970 __kfree_skb(skb); 4030 __kfree_skb(skb);
3971 else 4031 else
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 672950e54c49..25ecc6e2478b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -71,6 +71,7 @@
71#include <net/inet_common.h> 71#include <net/inet_common.h>
72#include <net/timewait_sock.h> 72#include <net/timewait_sock.h>
73#include <net/xfrm.h> 73#include <net/xfrm.h>
74#include <net/netdma.h>
74 75
75#include <linux/inet.h> 76#include <linux/inet.h>
76#include <linux/ipv6.h> 77#include <linux/ipv6.h>
@@ -1091,8 +1092,18 @@ process:
1091 bh_lock_sock(sk); 1092 bh_lock_sock(sk);
1092 ret = 0; 1093 ret = 0;
1093 if (!sock_owned_by_user(sk)) { 1094 if (!sock_owned_by_user(sk)) {
1094 if (!tcp_prequeue(sk, skb)) 1095#ifdef CONFIG_NET_DMA
1096 struct tcp_sock *tp = tcp_sk(sk);
1097 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1098 tp->ucopy.dma_chan = get_softnet_dma();
1099 if (tp->ucopy.dma_chan)
1095 ret = tcp_v4_do_rcv(sk, skb); 1100 ret = tcp_v4_do_rcv(sk, skb);
1101 else
1102#endif
1103 {
1104 if (!tcp_prequeue(sk, skb))
1105 ret = tcp_v4_do_rcv(sk, skb);
1106 }
1096 } else 1107 } else
1097 sk_add_backlog(sk, skb); 1108 sk_add_backlog(sk, skb);
1098 bh_unlock_sock(sk); 1109 bh_unlock_sock(sk);
@@ -1296,6 +1307,11 @@ int tcp_v4_destroy_sock(struct sock *sk)
1296 /* Cleans up our, hopefully empty, out_of_order_queue. */ 1307 /* Cleans up our, hopefully empty, out_of_order_queue. */
1297 __skb_queue_purge(&tp->out_of_order_queue); 1308 __skb_queue_purge(&tp->out_of_order_queue);
1298 1309
1310#ifdef CONFIG_NET_DMA
1311 /* Cleans up our sk_async_wait_queue */
1312 __skb_queue_purge(&sk->sk_async_wait_queue);
1313#endif
1314
1299 /* Clean prequeue, it must be empty really */ 1315 /* Clean prequeue, it must be empty really */
1300 __skb_queue_purge(&tp->ucopy.prequeue); 1316 __skb_queue_purge(&tp->ucopy.prequeue);
1301 1317
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 301eee726b0f..a50eb306e9e2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1218,8 +1218,16 @@ process:
1218 bh_lock_sock(sk); 1218 bh_lock_sock(sk);
1219 ret = 0; 1219 ret = 0;
1220 if (!sock_owned_by_user(sk)) { 1220 if (!sock_owned_by_user(sk)) {
1221 if (!tcp_prequeue(sk, skb)) 1221#ifdef CONFIG_NET_DMA
1222 ret = tcp_v6_do_rcv(sk, skb); 1222 struct tcp_sock *tp = tcp_sk(sk);
1223 if (tp->ucopy.dma_chan)
1224 ret = tcp_v6_do_rcv(sk, skb);
1225 else
1226#endif
1227 {
1228 if (!tcp_prequeue(sk, skb))
1229 ret = tcp_v6_do_rcv(sk, skb);
1230 }
1223 } else 1231 } else
1224 sk_add_backlog(sk, skb); 1232 sk_add_backlog(sk, skb);
1225 bh_unlock_sock(sk); 1233 bh_unlock_sock(sk);