aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2015-12-16 17:23:11 -0500
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2015-12-18 15:34:33 -0500
commit68791649a725ac58c88b472ea6187853e67b3415 (patch)
treeebf7737b18657445c9874829cf9458855cd96e34 /net
parent73eee9b2de1fa08f2a82bb32ac4ec5e605716a91 (diff)
xprtrdma: Invalidate in the RPC reply handler
There is a window between the time the RPC reply handler wakes the waiting RPC task and when xprt_release() invokes ops->buf_free. During this time, memory regions containing the data payload may still be accessed by a broken or malicious server, but the RPC application has already been allowed access to the memory containing the RPC request's data payloads. The server should be fenced from client memory containing RPC data payloads _before_ the RPC application is allowed to continue. This change also more strongly enforces send queue accounting. There is a maximum number of RPC calls allowed to be outstanding. When an RPC/RDMA transport is set up, just enough send queue resources are allocated to handle registration, Send, and invalidation WRs for each those RPCs at the same time. Before, additional RPC calls could be dispatched while invalidation WRs were still consuming send WQEs. When invalidation WRs backed up, dispatching additional RPCs resulted in a send queue overrun. Now, the reply handler prevents RPC dispatch until invalidation is complete. This prevents RPC call dispatch until there are enough send queue resources to proceed. Still to do: If an RPC exits early (say, ^C), the reply handler has no opportunity to perform invalidation. Currently, xprt_rdma_free() still frees remaining RDMA resources, which could deadlock. Additional changes are needed to handle invalidation properly in this case. Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Devesh Sharma <devesh.sharma@avagotech.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c16
1 files changed, 16 insertions, 0 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c10d9699441c..0f28f2d743ed 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
804 if (req->rl_reply) 804 if (req->rl_reply)
805 goto out_duplicate; 805 goto out_duplicate;
806 806
807 /* Sanity checking has passed. We are now committed
808 * to complete this transaction.
809 */
810 list_del_init(&rqst->rq_list);
811 spin_unlock_bh(&xprt->transport_lock);
807 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" 812 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
808 " RPC request 0x%p xid 0x%08x\n", 813 " RPC request 0x%p xid 0x%08x\n",
809 __func__, rep, req, rqst, 814 __func__, rep, req, rqst,
@@ -888,12 +893,23 @@ badheader:
888 break; 893 break;
889 } 894 }
890 895
896 /* Invalidate and flush the data payloads before waking the
897 * waiting application. This guarantees the memory region is
898 * properly fenced from the server before the application
899 * accesses the data. It also ensures proper send flow
900 * control: waking the next RPC waits until this RPC has
901 * relinquished all its Send Queue entries.
902 */
903 if (req->rl_nchunks)
904 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
905
891 credits = be32_to_cpu(headerp->rm_credit); 906 credits = be32_to_cpu(headerp->rm_credit);
892 if (credits == 0) 907 if (credits == 0)
893 credits = 1; /* don't deadlock */ 908 credits = 1; /* don't deadlock */
894 else if (credits > r_xprt->rx_buf.rb_max_requests) 909 else if (credits > r_xprt->rx_buf.rb_max_requests)
895 credits = r_xprt->rx_buf.rb_max_requests; 910 credits = r_xprt->rx_buf.rb_max_requests;
896 911
912 spin_lock_bh(&xprt->transport_lock);
897 cwnd = xprt->cwnd; 913 cwnd = xprt->cwnd;
898 xprt->cwnd = credits << RPC_CWNDSHIFT; 914 xprt->cwnd = credits << RPC_CWNDSHIFT;
899 if (xprt->cwnd > cwnd) 915 if (xprt->cwnd > cwnd)