aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2015-10-05 10:53:49 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2015-10-08 08:27:04 -0400
commitedc1b01cd3b20a5fff049e98f82a2b0d24a34c89 (patch)
tree1f6a52705eeec5ef5e450963cac1d7e38905f446
parent66d7a56a6254389587d0999dcaab1d2634cd4e24 (diff)
SUNRPC: Move TCP receive data path into a workqueue context
Stream protocols such as TCP can often build up a backlog of data to be read due to ordering. Combine this with the fact that some workloads such as NFS read()-intensive workloads need to receive a lot of data per RPC call, and it turns out that receiving the data from inside a softirq context can cause starvation. The following patch moves the TCP data receive into a workqueue context. We still end up calling tcp_read_sock(), but we do so from a process context, meaning that softirqs are enabled for most of the time. With this patch, I see a doubling of read bandwidth when running a multi-threaded iozone workload between a virtual client and server setup. Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r--include/linux/sunrpc/xprtsock.h2
-rw-r--r--net/sunrpc/xprtsock.c51
2 files changed, 38 insertions, 15 deletions
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 357e44c1a46b..0ece4ba06f06 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -44,6 +44,8 @@ struct sock_xprt {
44 */ 44 */
45 unsigned long sock_state; 45 unsigned long sock_state;
46 struct delayed_work connect_worker; 46 struct delayed_work connect_worker;
47 struct work_struct recv_worker;
48 struct mutex recv_mutex;
47 struct sockaddr_storage srcaddr; 49 struct sockaddr_storage srcaddr;
48 unsigned short srcport; 50 unsigned short srcport;
49 51
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fa8d0c15c8cd..58dc90ccebb6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -823,6 +823,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
823 823
824 kernel_sock_shutdown(sock, SHUT_RDWR); 824 kernel_sock_shutdown(sock, SHUT_RDWR);
825 825
826 mutex_lock(&transport->recv_mutex);
826 write_lock_bh(&sk->sk_callback_lock); 827 write_lock_bh(&sk->sk_callback_lock);
827 transport->inet = NULL; 828 transport->inet = NULL;
828 transport->sock = NULL; 829 transport->sock = NULL;
@@ -833,6 +834,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
833 xprt_clear_connected(xprt); 834 xprt_clear_connected(xprt);
834 write_unlock_bh(&sk->sk_callback_lock); 835 write_unlock_bh(&sk->sk_callback_lock);
835 xs_sock_reset_connection_flags(xprt); 836 xs_sock_reset_connection_flags(xprt);
837 mutex_unlock(&transport->recv_mutex);
836 838
837 trace_rpc_socket_close(xprt, sock); 839 trace_rpc_socket_close(xprt, sock);
838 sock_release(sock); 840 sock_release(sock);
@@ -886,6 +888,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
886 888
887 cancel_delayed_work_sync(&transport->connect_worker); 889 cancel_delayed_work_sync(&transport->connect_worker);
888 xs_close(xprt); 890 xs_close(xprt);
891 cancel_work_sync(&transport->recv_worker);
889 xs_xprt_free(xprt); 892 xs_xprt_free(xprt);
890 module_put(THIS_MODULE); 893 module_put(THIS_MODULE);
891} 894}
@@ -1243,12 +1246,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
1243 dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); 1246 dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid));
1244 1247
1245 /* Find and lock the request corresponding to this xid */ 1248 /* Find and lock the request corresponding to this xid */
1246 spin_lock(&xprt->transport_lock); 1249 spin_lock_bh(&xprt->transport_lock);
1247 req = xprt_lookup_rqst(xprt, transport->tcp_xid); 1250 req = xprt_lookup_rqst(xprt, transport->tcp_xid);
1248 if (!req) { 1251 if (!req) {
1249 dprintk("RPC: XID %08x request not found!\n", 1252 dprintk("RPC: XID %08x request not found!\n",
1250 ntohl(transport->tcp_xid)); 1253 ntohl(transport->tcp_xid));
1251 spin_unlock(&xprt->transport_lock); 1254 spin_unlock_bh(&xprt->transport_lock);
1252 return -1; 1255 return -1;
1253 } 1256 }
1254 1257
@@ -1257,7 +1260,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
1257 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1260 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
1258 xprt_complete_rqst(req->rq_task, transport->tcp_copied); 1261 xprt_complete_rqst(req->rq_task, transport->tcp_copied);
1259 1262
1260 spin_unlock(&xprt->transport_lock); 1263 spin_unlock_bh(&xprt->transport_lock);
1261 return 0; 1264 return 0;
1262} 1265}
1263 1266
@@ -1277,10 +1280,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
1277 struct rpc_rqst *req; 1280 struct rpc_rqst *req;
1278 1281
1279 /* Look up and lock the request corresponding to the given XID */ 1282 /* Look up and lock the request corresponding to the given XID */
1280 spin_lock(&xprt->transport_lock); 1283 spin_lock_bh(&xprt->transport_lock);
1281 req = xprt_lookup_bc_request(xprt, transport->tcp_xid); 1284 req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
1282 if (req == NULL) { 1285 if (req == NULL) {
1283 spin_unlock(&xprt->transport_lock); 1286 spin_unlock_bh(&xprt->transport_lock);
1284 printk(KERN_WARNING "Callback slot table overflowed\n"); 1287 printk(KERN_WARNING "Callback slot table overflowed\n");
1285 xprt_force_disconnect(xprt); 1288 xprt_force_disconnect(xprt);
1286 return -1; 1289 return -1;
@@ -1291,7 +1294,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
1291 1294
1292 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1295 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
1293 xprt_complete_bc_request(req, transport->tcp_copied); 1296 xprt_complete_bc_request(req, transport->tcp_copied);
1294 spin_unlock(&xprt->transport_lock); 1297 spin_unlock_bh(&xprt->transport_lock);
1295 1298
1296 return 0; 1299 return 0;
1297} 1300}
@@ -1402,19 +1405,33 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
1402 unsigned long total = 0; 1405 unsigned long total = 0;
1403 int read = 0; 1406 int read = 0;
1404 1407
1408 mutex_lock(&transport->recv_mutex);
1405 sk = transport->inet; 1409 sk = transport->inet;
1410 if (sk == NULL)
1411 goto out;
1406 1412
1407 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ 1413 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
1408 for (;;) { 1414 for (;;) {
1415 lock_sock(sk);
1409 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); 1416 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
1417 release_sock(sk);
1410 if (read <= 0) 1418 if (read <= 0)
1411 break; 1419 break;
1412 total += read; 1420 total += read;
1413 rd_desc.count = 65536; 1421 rd_desc.count = 65536;
1414 } 1422 }
1423out:
1424 mutex_unlock(&transport->recv_mutex);
1415 trace_xs_tcp_data_ready(xprt, read, total); 1425 trace_xs_tcp_data_ready(xprt, read, total);
1416} 1426}
1417 1427
1428static void xs_tcp_data_receive_workfn(struct work_struct *work)
1429{
1430 struct sock_xprt *transport =
1431 container_of(work, struct sock_xprt, recv_worker);
1432 xs_tcp_data_receive(transport);
1433}
1434
1418/** 1435/**
1419 * xs_tcp_data_ready - "data ready" callback for TCP sockets 1436 * xs_tcp_data_ready - "data ready" callback for TCP sockets
1420 * @sk: socket with data to read 1437 * @sk: socket with data to read
@@ -1437,8 +1454,8 @@ static void xs_tcp_data_ready(struct sock *sk)
1437 */ 1454 */
1438 if (xprt->reestablish_timeout) 1455 if (xprt->reestablish_timeout)
1439 xprt->reestablish_timeout = 0; 1456 xprt->reestablish_timeout = 0;
1457 queue_work(rpciod_workqueue, &transport->recv_worker);
1440 1458
1441 xs_tcp_data_receive(transport);
1442out: 1459out:
1443 read_unlock_bh(&sk->sk_callback_lock); 1460 read_unlock_bh(&sk->sk_callback_lock);
1444} 1461}
@@ -1840,6 +1857,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
1840} 1857}
1841#endif 1858#endif
1842 1859
1860static void xs_dummy_data_receive_workfn(struct work_struct *work)
1861{
1862}
1863
1843static void xs_dummy_setup_socket(struct work_struct *work) 1864static void xs_dummy_setup_socket(struct work_struct *work)
1844{ 1865{
1845} 1866}
@@ -2664,6 +2685,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2664 } 2685 }
2665 2686
2666 new = container_of(xprt, struct sock_xprt, xprt); 2687 new = container_of(xprt, struct sock_xprt, xprt);
2688 mutex_init(&new->recv_mutex);
2667 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2689 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
2668 xprt->addrlen = args->addrlen; 2690 xprt->addrlen = args->addrlen;
2669 if (args->srcaddr) 2691 if (args->srcaddr)
@@ -2717,6 +2739,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
2717 xprt->ops = &xs_local_ops; 2739 xprt->ops = &xs_local_ops;
2718 xprt->timeout = &xs_local_default_timeout; 2740 xprt->timeout = &xs_local_default_timeout;
2719 2741
2742 INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn);
2720 INIT_DELAYED_WORK(&transport->connect_worker, 2743 INIT_DELAYED_WORK(&transport->connect_worker,
2721 xs_dummy_setup_socket); 2744 xs_dummy_setup_socket);
2722 2745
@@ -2788,21 +2811,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2788 2811
2789 xprt->timeout = &xs_udp_default_timeout; 2812 xprt->timeout = &xs_udp_default_timeout;
2790 2813
2814 INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn);
2815 INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
2816
2791 switch (addr->sa_family) { 2817 switch (addr->sa_family) {
2792 case AF_INET: 2818 case AF_INET:
2793 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 2819 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
2794 xprt_set_bound(xprt); 2820 xprt_set_bound(xprt);
2795 2821
2796 INIT_DELAYED_WORK(&transport->connect_worker,
2797 xs_udp_setup_socket);
2798 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2822 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
2799 break; 2823 break;
2800 case AF_INET6: 2824 case AF_INET6:
2801 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2825 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
2802 xprt_set_bound(xprt); 2826 xprt_set_bound(xprt);
2803 2827
2804 INIT_DELAYED_WORK(&transport->connect_worker,
2805 xs_udp_setup_socket);
2806 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2828 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2807 break; 2829 break;
2808 default: 2830 default:
@@ -2867,21 +2889,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2867 xprt->ops = &xs_tcp_ops; 2889 xprt->ops = &xs_tcp_ops;
2868 xprt->timeout = &xs_tcp_default_timeout; 2890 xprt->timeout = &xs_tcp_default_timeout;
2869 2891
2892 INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
2893 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
2894
2870 switch (addr->sa_family) { 2895 switch (addr->sa_family) {
2871 case AF_INET: 2896 case AF_INET:
2872 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 2897 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
2873 xprt_set_bound(xprt); 2898 xprt_set_bound(xprt);
2874 2899
2875 INIT_DELAYED_WORK(&transport->connect_worker,
2876 xs_tcp_setup_socket);
2877 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2900 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
2878 break; 2901 break;
2879 case AF_INET6: 2902 case AF_INET6:
2880 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2903 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
2881 xprt_set_bound(xprt); 2904 xprt_set_bound(xprt);
2882 2905
2883 INIT_DELAYED_WORK(&transport->connect_worker,
2884 xs_tcp_setup_socket);
2885 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2906 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2886 break; 2907 break;
2887 default: 2908 default: