diff options
author | Trond Myklebust <trond.myklebust@primarydata.com> | 2015-10-05 10:53:49 -0400 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2015-10-08 08:27:04 -0400 |
commit | edc1b01cd3b20a5fff049e98f82a2b0d24a34c89 (patch) | |
tree | 1f6a52705eeec5ef5e450963cac1d7e38905f446 | |
parent | 66d7a56a6254389587d0999dcaab1d2634cd4e24 (diff) |
SUNRPC: Move TCP receive data path into a workqueue context
Stream protocols such as TCP can often build up a backlog of data to be
read due to ordering. Combine this with the fact that some workloads such
as NFS read()-intensive workloads need to receive a lot of data per RPC
call, and it turns out that receiving the data from inside a softirq
context can cause starvation.
The following patch moves the TCP data receive into a workqueue context.
We still end up calling tcp_read_sock(), but we do so from a process
context, meaning that softirqs are enabled for most of the time.
With this patch, I see a doubling of read bandwidth when running a
multi-threaded iozone workload between a virtual client and server setup.
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r-- | include/linux/sunrpc/xprtsock.h | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 51 |
2 files changed, 38 insertions, 15 deletions
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 357e44c1a46b..0ece4ba06f06 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h | |||
@@ -44,6 +44,8 @@ struct sock_xprt { | |||
44 | */ | 44 | */ |
45 | unsigned long sock_state; | 45 | unsigned long sock_state; |
46 | struct delayed_work connect_worker; | 46 | struct delayed_work connect_worker; |
47 | struct work_struct recv_worker; | ||
48 | struct mutex recv_mutex; | ||
47 | struct sockaddr_storage srcaddr; | 49 | struct sockaddr_storage srcaddr; |
48 | unsigned short srcport; | 50 | unsigned short srcport; |
49 | 51 | ||
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fa8d0c15c8cd..58dc90ccebb6 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -823,6 +823,7 @@ static void xs_reset_transport(struct sock_xprt *transport) | |||
823 | 823 | ||
824 | kernel_sock_shutdown(sock, SHUT_RDWR); | 824 | kernel_sock_shutdown(sock, SHUT_RDWR); |
825 | 825 | ||
826 | mutex_lock(&transport->recv_mutex); | ||
826 | write_lock_bh(&sk->sk_callback_lock); | 827 | write_lock_bh(&sk->sk_callback_lock); |
827 | transport->inet = NULL; | 828 | transport->inet = NULL; |
828 | transport->sock = NULL; | 829 | transport->sock = NULL; |
@@ -833,6 +834,7 @@ static void xs_reset_transport(struct sock_xprt *transport) | |||
833 | xprt_clear_connected(xprt); | 834 | xprt_clear_connected(xprt); |
834 | write_unlock_bh(&sk->sk_callback_lock); | 835 | write_unlock_bh(&sk->sk_callback_lock); |
835 | xs_sock_reset_connection_flags(xprt); | 836 | xs_sock_reset_connection_flags(xprt); |
837 | mutex_unlock(&transport->recv_mutex); | ||
836 | 838 | ||
837 | trace_rpc_socket_close(xprt, sock); | 839 | trace_rpc_socket_close(xprt, sock); |
838 | sock_release(sock); | 840 | sock_release(sock); |
@@ -886,6 +888,7 @@ static void xs_destroy(struct rpc_xprt *xprt) | |||
886 | 888 | ||
887 | cancel_delayed_work_sync(&transport->connect_worker); | 889 | cancel_delayed_work_sync(&transport->connect_worker); |
888 | xs_close(xprt); | 890 | xs_close(xprt); |
891 | cancel_work_sync(&transport->recv_worker); | ||
889 | xs_xprt_free(xprt); | 892 | xs_xprt_free(xprt); |
890 | module_put(THIS_MODULE); | 893 | module_put(THIS_MODULE); |
891 | } | 894 | } |
@@ -1243,12 +1246,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, | |||
1243 | dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); | 1246 | dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); |
1244 | 1247 | ||
1245 | /* Find and lock the request corresponding to this xid */ | 1248 | /* Find and lock the request corresponding to this xid */ |
1246 | spin_lock(&xprt->transport_lock); | 1249 | spin_lock_bh(&xprt->transport_lock); |
1247 | req = xprt_lookup_rqst(xprt, transport->tcp_xid); | 1250 | req = xprt_lookup_rqst(xprt, transport->tcp_xid); |
1248 | if (!req) { | 1251 | if (!req) { |
1249 | dprintk("RPC: XID %08x request not found!\n", | 1252 | dprintk("RPC: XID %08x request not found!\n", |
1250 | ntohl(transport->tcp_xid)); | 1253 | ntohl(transport->tcp_xid)); |
1251 | spin_unlock(&xprt->transport_lock); | 1254 | spin_unlock_bh(&xprt->transport_lock); |
1252 | return -1; | 1255 | return -1; |
1253 | } | 1256 | } |
1254 | 1257 | ||
@@ -1257,7 +1260,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, | |||
1257 | if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) | 1260 | if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) |
1258 | xprt_complete_rqst(req->rq_task, transport->tcp_copied); | 1261 | xprt_complete_rqst(req->rq_task, transport->tcp_copied); |
1259 | 1262 | ||
1260 | spin_unlock(&xprt->transport_lock); | 1263 | spin_unlock_bh(&xprt->transport_lock); |
1261 | return 0; | 1264 | return 0; |
1262 | } | 1265 | } |
1263 | 1266 | ||
@@ -1277,10 +1280,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt, | |||
1277 | struct rpc_rqst *req; | 1280 | struct rpc_rqst *req; |
1278 | 1281 | ||
1279 | /* Look up and lock the request corresponding to the given XID */ | 1282 | /* Look up and lock the request corresponding to the given XID */ |
1280 | spin_lock(&xprt->transport_lock); | 1283 | spin_lock_bh(&xprt->transport_lock); |
1281 | req = xprt_lookup_bc_request(xprt, transport->tcp_xid); | 1284 | req = xprt_lookup_bc_request(xprt, transport->tcp_xid); |
1282 | if (req == NULL) { | 1285 | if (req == NULL) { |
1283 | spin_unlock(&xprt->transport_lock); | 1286 | spin_unlock_bh(&xprt->transport_lock); |
1284 | printk(KERN_WARNING "Callback slot table overflowed\n"); | 1287 | printk(KERN_WARNING "Callback slot table overflowed\n"); |
1285 | xprt_force_disconnect(xprt); | 1288 | xprt_force_disconnect(xprt); |
1286 | return -1; | 1289 | return -1; |
@@ -1291,7 +1294,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt, | |||
1291 | 1294 | ||
1292 | if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) | 1295 | if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) |
1293 | xprt_complete_bc_request(req, transport->tcp_copied); | 1296 | xprt_complete_bc_request(req, transport->tcp_copied); |
1294 | spin_unlock(&xprt->transport_lock); | 1297 | spin_unlock_bh(&xprt->transport_lock); |
1295 | 1298 | ||
1296 | return 0; | 1299 | return 0; |
1297 | } | 1300 | } |
@@ -1402,19 +1405,33 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) | |||
1402 | unsigned long total = 0; | 1405 | unsigned long total = 0; |
1403 | int read = 0; | 1406 | int read = 0; |
1404 | 1407 | ||
1408 | mutex_lock(&transport->recv_mutex); | ||
1405 | sk = transport->inet; | 1409 | sk = transport->inet; |
1410 | if (sk == NULL) | ||
1411 | goto out; | ||
1406 | 1412 | ||
1407 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ | 1413 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ |
1408 | for (;;) { | 1414 | for (;;) { |
1415 | lock_sock(sk); | ||
1409 | read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); | 1416 | read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); |
1417 | release_sock(sk); | ||
1410 | if (read <= 0) | 1418 | if (read <= 0) |
1411 | break; | 1419 | break; |
1412 | total += read; | 1420 | total += read; |
1413 | rd_desc.count = 65536; | 1421 | rd_desc.count = 65536; |
1414 | } | 1422 | } |
1423 | out: | ||
1424 | mutex_unlock(&transport->recv_mutex); | ||
1415 | trace_xs_tcp_data_ready(xprt, read, total); | 1425 | trace_xs_tcp_data_ready(xprt, read, total); |
1416 | } | 1426 | } |
1417 | 1427 | ||
1428 | static void xs_tcp_data_receive_workfn(struct work_struct *work) | ||
1429 | { | ||
1430 | struct sock_xprt *transport = | ||
1431 | container_of(work, struct sock_xprt, recv_worker); | ||
1432 | xs_tcp_data_receive(transport); | ||
1433 | } | ||
1434 | |||
1418 | /** | 1435 | /** |
1419 | * xs_tcp_data_ready - "data ready" callback for TCP sockets | 1436 | * xs_tcp_data_ready - "data ready" callback for TCP sockets |
1420 | * @sk: socket with data to read | 1437 | * @sk: socket with data to read |
@@ -1437,8 +1454,8 @@ static void xs_tcp_data_ready(struct sock *sk) | |||
1437 | */ | 1454 | */ |
1438 | if (xprt->reestablish_timeout) | 1455 | if (xprt->reestablish_timeout) |
1439 | xprt->reestablish_timeout = 0; | 1456 | xprt->reestablish_timeout = 0; |
1457 | queue_work(rpciod_workqueue, &transport->recv_worker); | ||
1440 | 1458 | ||
1441 | xs_tcp_data_receive(transport); | ||
1442 | out: | 1459 | out: |
1443 | read_unlock_bh(&sk->sk_callback_lock); | 1460 | read_unlock_bh(&sk->sk_callback_lock); |
1444 | } | 1461 | } |
@@ -1840,6 +1857,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) | |||
1840 | } | 1857 | } |
1841 | #endif | 1858 | #endif |
1842 | 1859 | ||
1860 | static void xs_dummy_data_receive_workfn(struct work_struct *work) | ||
1861 | { | ||
1862 | } | ||
1863 | |||
1843 | static void xs_dummy_setup_socket(struct work_struct *work) | 1864 | static void xs_dummy_setup_socket(struct work_struct *work) |
1844 | { | 1865 | { |
1845 | } | 1866 | } |
@@ -2664,6 +2685,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, | |||
2664 | } | 2685 | } |
2665 | 2686 | ||
2666 | new = container_of(xprt, struct sock_xprt, xprt); | 2687 | new = container_of(xprt, struct sock_xprt, xprt); |
2688 | mutex_init(&new->recv_mutex); | ||
2667 | memcpy(&xprt->addr, args->dstaddr, args->addrlen); | 2689 | memcpy(&xprt->addr, args->dstaddr, args->addrlen); |
2668 | xprt->addrlen = args->addrlen; | 2690 | xprt->addrlen = args->addrlen; |
2669 | if (args->srcaddr) | 2691 | if (args->srcaddr) |
@@ -2717,6 +2739,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) | |||
2717 | xprt->ops = &xs_local_ops; | 2739 | xprt->ops = &xs_local_ops; |
2718 | xprt->timeout = &xs_local_default_timeout; | 2740 | xprt->timeout = &xs_local_default_timeout; |
2719 | 2741 | ||
2742 | INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn); | ||
2720 | INIT_DELAYED_WORK(&transport->connect_worker, | 2743 | INIT_DELAYED_WORK(&transport->connect_worker, |
2721 | xs_dummy_setup_socket); | 2744 | xs_dummy_setup_socket); |
2722 | 2745 | ||
@@ -2788,21 +2811,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) | |||
2788 | 2811 | ||
2789 | xprt->timeout = &xs_udp_default_timeout; | 2812 | xprt->timeout = &xs_udp_default_timeout; |
2790 | 2813 | ||
2814 | INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn); | ||
2815 | INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket); | ||
2816 | |||
2791 | switch (addr->sa_family) { | 2817 | switch (addr->sa_family) { |
2792 | case AF_INET: | 2818 | case AF_INET: |
2793 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) | 2819 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) |
2794 | xprt_set_bound(xprt); | 2820 | xprt_set_bound(xprt); |
2795 | 2821 | ||
2796 | INIT_DELAYED_WORK(&transport->connect_worker, | ||
2797 | xs_udp_setup_socket); | ||
2798 | xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); | 2822 | xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); |
2799 | break; | 2823 | break; |
2800 | case AF_INET6: | 2824 | case AF_INET6: |
2801 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) | 2825 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) |
2802 | xprt_set_bound(xprt); | 2826 | xprt_set_bound(xprt); |
2803 | 2827 | ||
2804 | INIT_DELAYED_WORK(&transport->connect_worker, | ||
2805 | xs_udp_setup_socket); | ||
2806 | xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); | 2828 | xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); |
2807 | break; | 2829 | break; |
2808 | default: | 2830 | default: |
@@ -2867,21 +2889,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) | |||
2867 | xprt->ops = &xs_tcp_ops; | 2889 | xprt->ops = &xs_tcp_ops; |
2868 | xprt->timeout = &xs_tcp_default_timeout; | 2890 | xprt->timeout = &xs_tcp_default_timeout; |
2869 | 2891 | ||
2892 | INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); | ||
2893 | INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); | ||
2894 | |||
2870 | switch (addr->sa_family) { | 2895 | switch (addr->sa_family) { |
2871 | case AF_INET: | 2896 | case AF_INET: |
2872 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) | 2897 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) |
2873 | xprt_set_bound(xprt); | 2898 | xprt_set_bound(xprt); |
2874 | 2899 | ||
2875 | INIT_DELAYED_WORK(&transport->connect_worker, | ||
2876 | xs_tcp_setup_socket); | ||
2877 | xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); | 2900 | xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); |
2878 | break; | 2901 | break; |
2879 | case AF_INET6: | 2902 | case AF_INET6: |
2880 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) | 2903 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) |
2881 | xprt_set_bound(xprt); | 2904 | xprt_set_bound(xprt); |
2882 | 2905 | ||
2883 | INIT_DELAYED_WORK(&transport->connect_worker, | ||
2884 | xs_tcp_setup_socket); | ||
2885 | xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); | 2906 | xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); |
2886 | break; | 2907 | break; |
2887 | default: | 2908 | default: |