diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-14 19:08:23 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-14 19:08:23 -0500 |
commit | 75f26df6ae6f8787fc6198609c8be17400a69e25 (patch) | |
tree | 04d2173357b402ce1ee097ed7e40bcfc2153b937 /net | |
parent | 63f729cb4aa9a224cfd6bb35eab6b4556c29115d (diff) | |
parent | 44aab3e09ef947e546ee61c5082c41b86dd15e53 (diff) |
Merge tag 'nfs-for-4.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable fixes:
- Fix a regression in the SunRPC socket polling code
- Fix the attribute cache revalidation code
- Fix race in __update_open_stateid()
- Fix an lo->plh_block_lgets imbalance in layoutreturn
- Fix an Oopsable typo in ff_mirror_match_fh()
Features:
- pNFS layout recall performance improvements.
- pNFS/flexfiles: Support server-supplied layoutstats sampling period
Bugfixes + cleanups:
- NFSv4: Don't perform cached access checks before we've OPENed the
file
- Fix starvation issues with background flushes
- Reclaim writes should be flushed as unstable writes if there are
already entries in the commit lists
- Various bugfixes from Chuck to fix NFS/RDMA send queue ordering
problems
- Ensure that we propagate fatal layoutget errors back to the
application
- Fixes for sundry flexfiles layoutstats bugs
- Fix files/flexfiles to not cache invalidated layouts in the DS
commit buckets"
* tag 'nfs-for-4.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (68 commits)
NFS: Fix a compile warning about unused variable in nfs_generic_pg_pgios()
NFSv4: Fix a compile warning about no prototype for nfs4_ioctl()
NFS: Use wait_on_atomic_t() for unlock after readahead
SUNRPC: Fixup socket wait for memory
NFSv4.1/pNFS: Cleanup constify struct pnfs_layout_range arguments
NFSv4.1/pnfs: Cleanup copying of pnfs_layout_range structures
NFSv4.1/pNFS: Cleanup pnfs_mark_matching_lsegs_invalid()
NFSv4.1/pNFS: Fix a race in initiate_file_draining()
NFSv4.1/pNFS: pnfs_error_mark_layout_for_return() must always return layout
NFSv4.1/pNFS: pnfs_mark_matching_lsegs_return() should set the iomode
NFSv4.1/pNFS: Use nfs4_stateid_copy for copying stateids
NFSv4.1/pNFS: Don't pass stateids by value to pnfs_send_layoutreturn()
NFS: Relax requirements in nfs_flush_incompatible
NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid
NFS: Allow multiple commit requests in flight per file
NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs
SUNRPC: Fix a missing break in rpc_anyaddr()
pNFS/flexfiles: Fix an Oopsable typo in ff_mirror_match_fh()
NFS: Fix attribute cache revalidation
NFS: Ensure we revalidate attributes before using execute_ok()
...
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/clnt.c | 1 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/backchannel.c | 26 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/fmr_ops.c | 64 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 174 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/physical_ops.c | 13 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 16 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 16 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 14 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 63 |
10 files changed, 298 insertions, 92 deletions
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 23608eb0ded2..b7f21044f4d8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1217,6 +1217,7 @@ static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen) | |||
1217 | return -EINVAL; | 1217 | return -EINVAL; |
1218 | memcpy(buf, &rpc_in6addr_loopback, | 1218 | memcpy(buf, &rpc_in6addr_loopback, |
1219 | sizeof(rpc_in6addr_loopback)); | 1219 | sizeof(rpc_in6addr_loopback)); |
1220 | break; | ||
1220 | default: | 1221 | default: |
1221 | dprintk("RPC: %s: address family not supported\n", | 1222 | dprintk("RPC: %s: address family not supported\n", |
1222 | __func__); | 1223 | __func__); |
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 2dcb44f69e53..cc1251d07297 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c | |||
@@ -15,7 +15,7 @@ | |||
15 | # define RPCDBG_FACILITY RPCDBG_TRANS | 15 | # define RPCDBG_FACILITY RPCDBG_TRANS |
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | #define RPCRDMA_BACKCHANNEL_DEBUG | 18 | #undef RPCRDMA_BACKCHANNEL_DEBUG |
19 | 19 | ||
20 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, | 20 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, |
21 | struct rpc_rqst *rqst) | 21 | struct rpc_rqst *rqst) |
@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, | |||
42 | size_t size; | 42 | size_t size; |
43 | 43 | ||
44 | req = rpcrdma_create_req(r_xprt); | 44 | req = rpcrdma_create_req(r_xprt); |
45 | if (!req) | 45 | if (IS_ERR(req)) |
46 | return -ENOMEM; | 46 | return PTR_ERR(req); |
47 | req->rl_backchannel = true; | 47 | req->rl_backchannel = true; |
48 | 48 | ||
49 | size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); | 49 | size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); |
@@ -84,9 +84,7 @@ out_fail: | |||
84 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, | 84 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, |
85 | unsigned int count) | 85 | unsigned int count) |
86 | { | 86 | { |
87 | struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; | ||
88 | struct rpcrdma_rep *rep; | 87 | struct rpcrdma_rep *rep; |
89 | unsigned long flags; | ||
90 | int rc = 0; | 88 | int rc = 0; |
91 | 89 | ||
92 | while (count--) { | 90 | while (count--) { |
@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, | |||
98 | break; | 96 | break; |
99 | } | 97 | } |
100 | 98 | ||
101 | spin_lock_irqsave(&buffers->rb_lock, flags); | 99 | rpcrdma_recv_buffer_put(rep); |
102 | list_add(&rep->rr_list, &buffers->rb_recv_bufs); | ||
103 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | ||
104 | } | 100 | } |
105 | 101 | ||
106 | return rc; | 102 | return rc; |
@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) | |||
140 | __func__); | 136 | __func__); |
141 | goto out_free; | 137 | goto out_free; |
142 | } | 138 | } |
139 | dprintk("RPC: %s: new rqst %p\n", __func__, rqst); | ||
143 | 140 | ||
144 | rqst->rq_xprt = &r_xprt->rx_xprt; | 141 | rqst->rq_xprt = &r_xprt->rx_xprt; |
145 | INIT_LIST_HEAD(&rqst->rq_list); | 142 | INIT_LIST_HEAD(&rqst->rq_list); |
@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | |||
220 | 217 | ||
221 | rpclen = rqst->rq_svec[0].iov_len; | 218 | rpclen = rqst->rq_svec[0].iov_len; |
222 | 219 | ||
220 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | ||
223 | pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", | 221 | pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", |
224 | __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); | 222 | __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); |
225 | pr_info("RPC: %s: RPC/RDMA: %*ph\n", | 223 | pr_info("RPC: %s: RPC/RDMA: %*ph\n", |
226 | __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); | 224 | __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); |
227 | pr_info("RPC: %s: RPC: %*ph\n", | 225 | pr_info("RPC: %s: RPC: %*ph\n", |
228 | __func__, (int)rpclen, rqst->rq_svec[0].iov_base); | 226 | __func__, (int)rpclen, rqst->rq_svec[0].iov_base); |
227 | #endif | ||
229 | 228 | ||
230 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); | 229 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); |
231 | req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; | 230 | req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; |
@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) | |||
269 | { | 268 | { |
270 | struct rpc_xprt *xprt = rqst->rq_xprt; | 269 | struct rpc_xprt *xprt = rqst->rq_xprt; |
271 | 270 | ||
271 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", | ||
272 | __func__, rqst, rpcr_to_rdmar(rqst)); | ||
273 | |||
272 | smp_mb__before_atomic(); | 274 | smp_mb__before_atomic(); |
273 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); | 275 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); |
274 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | 276 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); |
@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
333 | struct rpc_rqst, rq_bc_pa_list); | 335 | struct rpc_rqst, rq_bc_pa_list); |
334 | list_del(&rqst->rq_bc_pa_list); | 336 | list_del(&rqst->rq_bc_pa_list); |
335 | spin_unlock(&xprt->bc_pa_lock); | 337 | spin_unlock(&xprt->bc_pa_lock); |
336 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | 338 | dprintk("RPC: %s: using rqst %p\n", __func__, rqst); |
337 | pr_info("RPC: %s: using rqst %p\n", __func__, rqst); | ||
338 | #endif | ||
339 | 339 | ||
340 | /* Prepare rqst */ | 340 | /* Prepare rqst */ |
341 | rqst->rq_reply_bytes_recvd = 0; | 341 | rqst->rq_reply_bytes_recvd = 0; |
@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
355 | * direction reply. | 355 | * direction reply. |
356 | */ | 356 | */ |
357 | req = rpcr_to_rdmar(rqst); | 357 | req = rpcr_to_rdmar(rqst); |
358 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG | 358 | dprintk("RPC: %s: attaching rep %p to req %p\n", |
359 | pr_info("RPC: %s: attaching rep %p to req %p\n", | ||
360 | __func__, rep, req); | 359 | __func__, rep, req); |
361 | #endif | ||
362 | req->rl_reply = rep; | 360 | req->rl_reply = rep; |
363 | 361 | ||
364 | /* Defeat the retransmit detection logic in send_request */ | 362 | /* Defeat the retransmit detection logic in send_request */ |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index f1e8dafbd507..c14f3a4bff68 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -179,6 +179,69 @@ out_maperr: | |||
179 | return rc; | 179 | return rc; |
180 | } | 180 | } |
181 | 181 | ||
182 | static void | ||
183 | __fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
184 | { | ||
185 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
186 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
187 | int nsegs = seg->mr_nsegs; | ||
188 | |||
189 | seg->rl_mw = NULL; | ||
190 | |||
191 | while (nsegs--) | ||
192 | rpcrdma_unmap_one(device, seg++); | ||
193 | |||
194 | rpcrdma_put_mw(r_xprt, mw); | ||
195 | } | ||
196 | |||
197 | /* Invalidate all memory regions that were registered for "req". | ||
198 | * | ||
199 | * Sleeps until it is safe for the host CPU to access the | ||
200 | * previously mapped memory regions. | ||
201 | */ | ||
202 | static void | ||
203 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
204 | { | ||
205 | struct rpcrdma_mr_seg *seg; | ||
206 | unsigned int i, nchunks; | ||
207 | struct rpcrdma_mw *mw; | ||
208 | LIST_HEAD(unmap_list); | ||
209 | int rc; | ||
210 | |||
211 | dprintk("RPC: %s: req %p\n", __func__, req); | ||
212 | |||
213 | /* ORDER: Invalidate all of the req's MRs first | ||
214 | * | ||
215 | * ib_unmap_fmr() is slow, so use a single call instead | ||
216 | * of one call per mapped MR. | ||
217 | */ | ||
218 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
219 | seg = &req->rl_segments[i]; | ||
220 | mw = seg->rl_mw; | ||
221 | |||
222 | list_add(&mw->r.fmr.fmr->list, &unmap_list); | ||
223 | |||
224 | i += seg->mr_nsegs; | ||
225 | } | ||
226 | rc = ib_unmap_fmr(&unmap_list); | ||
227 | if (rc) | ||
228 | pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc); | ||
229 | |||
230 | /* ORDER: Now DMA unmap all of the req's MRs, and return | ||
231 | * them to the free MW list. | ||
232 | */ | ||
233 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
234 | seg = &req->rl_segments[i]; | ||
235 | |||
236 | __fmr_dma_unmap(r_xprt, seg); | ||
237 | |||
238 | i += seg->mr_nsegs; | ||
239 | seg->mr_nsegs = 0; | ||
240 | } | ||
241 | |||
242 | req->rl_nchunks = 0; | ||
243 | } | ||
244 | |||
182 | /* Use the ib_unmap_fmr() verb to prevent further remote | 245 | /* Use the ib_unmap_fmr() verb to prevent further remote |
183 | * access via RDMA READ or RDMA WRITE. | 246 | * access via RDMA READ or RDMA WRITE. |
184 | */ | 247 | */ |
@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) | |||
231 | 294 | ||
232 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | 295 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { |
233 | .ro_map = fmr_op_map, | 296 | .ro_map = fmr_op_map, |
297 | .ro_unmap_sync = fmr_op_unmap_sync, | ||
234 | .ro_unmap = fmr_op_unmap, | 298 | .ro_unmap = fmr_op_unmap, |
235 | .ro_open = fmr_op_open, | 299 | .ro_open = fmr_op_open, |
236 | .ro_maxpages = fmr_op_maxpages, | 300 | .ro_maxpages = fmr_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 88cf9e7269c2..c6836844bd0e 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
245 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | 245 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); |
246 | } | 246 | } |
247 | 247 | ||
248 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ | 248 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs |
249 | * to be reset. | ||
250 | * | ||
251 | * WARNING: Only wr_id and status are reliable at this point | ||
252 | */ | ||
249 | static void | 253 | static void |
250 | frwr_sendcompletion(struct ib_wc *wc) | 254 | __frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r) |
251 | { | 255 | { |
252 | struct rpcrdma_mw *r; | ||
253 | |||
254 | if (likely(wc->status == IB_WC_SUCCESS)) | 256 | if (likely(wc->status == IB_WC_SUCCESS)) |
255 | return; | 257 | return; |
256 | 258 | ||
@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc) | |||
261 | else | 263 | else |
262 | pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", | 264 | pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", |
263 | __func__, r, ib_wc_status_msg(wc->status), wc->status); | 265 | __func__, r, ib_wc_status_msg(wc->status), wc->status); |
266 | |||
264 | r->r.frmr.fr_state = FRMR_IS_STALE; | 267 | r->r.frmr.fr_state = FRMR_IS_STALE; |
265 | } | 268 | } |
266 | 269 | ||
270 | static void | ||
271 | frwr_sendcompletion(struct ib_wc *wc) | ||
272 | { | ||
273 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
274 | struct rpcrdma_frmr *f = &r->r.frmr; | ||
275 | |||
276 | if (unlikely(wc->status != IB_WC_SUCCESS)) | ||
277 | __frwr_sendcompletion_flush(wc, r); | ||
278 | |||
279 | if (f->fr_waiter) | ||
280 | complete(&f->fr_linv_done); | ||
281 | } | ||
282 | |||
267 | static int | 283 | static int |
268 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | 284 | frwr_op_init(struct rpcrdma_xprt *r_xprt) |
269 | { | 285 | { |
@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
319 | struct rpcrdma_mw *mw; | 335 | struct rpcrdma_mw *mw; |
320 | struct rpcrdma_frmr *frmr; | 336 | struct rpcrdma_frmr *frmr; |
321 | struct ib_mr *mr; | 337 | struct ib_mr *mr; |
322 | struct ib_reg_wr reg_wr; | 338 | struct ib_reg_wr *reg_wr; |
323 | struct ib_send_wr *bad_wr; | 339 | struct ib_send_wr *bad_wr; |
324 | int rc, i, n, dma_nents; | 340 | int rc, i, n, dma_nents; |
325 | u8 key; | 341 | u8 key; |
@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
335 | } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); | 351 | } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); |
336 | frmr = &mw->r.frmr; | 352 | frmr = &mw->r.frmr; |
337 | frmr->fr_state = FRMR_IS_VALID; | 353 | frmr->fr_state = FRMR_IS_VALID; |
354 | frmr->fr_waiter = false; | ||
338 | mr = frmr->fr_mr; | 355 | mr = frmr->fr_mr; |
356 | reg_wr = &frmr->fr_regwr; | ||
339 | 357 | ||
340 | if (nsegs > ia->ri_max_frmr_depth) | 358 | if (nsegs > ia->ri_max_frmr_depth) |
341 | nsegs = ia->ri_max_frmr_depth; | 359 | nsegs = ia->ri_max_frmr_depth; |
@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
381 | key = (u8)(mr->rkey & 0x000000FF); | 399 | key = (u8)(mr->rkey & 0x000000FF); |
382 | ib_update_fast_reg_key(mr, ++key); | 400 | ib_update_fast_reg_key(mr, ++key); |
383 | 401 | ||
384 | reg_wr.wr.next = NULL; | 402 | reg_wr->wr.next = NULL; |
385 | reg_wr.wr.opcode = IB_WR_REG_MR; | 403 | reg_wr->wr.opcode = IB_WR_REG_MR; |
386 | reg_wr.wr.wr_id = (uintptr_t)mw; | 404 | reg_wr->wr.wr_id = (uintptr_t)mw; |
387 | reg_wr.wr.num_sge = 0; | 405 | reg_wr->wr.num_sge = 0; |
388 | reg_wr.wr.send_flags = 0; | 406 | reg_wr->wr.send_flags = 0; |
389 | reg_wr.mr = mr; | 407 | reg_wr->mr = mr; |
390 | reg_wr.key = mr->rkey; | 408 | reg_wr->key = mr->rkey; |
391 | reg_wr.access = writing ? | 409 | reg_wr->access = writing ? |
392 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 410 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
393 | IB_ACCESS_REMOTE_READ; | 411 | IB_ACCESS_REMOTE_READ; |
394 | 412 | ||
395 | DECR_CQCOUNT(&r_xprt->rx_ep); | 413 | DECR_CQCOUNT(&r_xprt->rx_ep); |
396 | rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); | 414 | rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); |
397 | if (rc) | 415 | if (rc) |
398 | goto out_senderr; | 416 | goto out_senderr; |
399 | 417 | ||
@@ -413,6 +431,116 @@ out_senderr: | |||
413 | return rc; | 431 | return rc; |
414 | } | 432 | } |
415 | 433 | ||
434 | static struct ib_send_wr * | ||
435 | __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) | ||
436 | { | ||
437 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
438 | struct rpcrdma_frmr *f = &mw->r.frmr; | ||
439 | struct ib_send_wr *invalidate_wr; | ||
440 | |||
441 | f->fr_waiter = false; | ||
442 | f->fr_state = FRMR_IS_INVALID; | ||
443 | invalidate_wr = &f->fr_invwr; | ||
444 | |||
445 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); | ||
446 | invalidate_wr->wr_id = (unsigned long)(void *)mw; | ||
447 | invalidate_wr->opcode = IB_WR_LOCAL_INV; | ||
448 | invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; | ||
449 | |||
450 | return invalidate_wr; | ||
451 | } | ||
452 | |||
453 | static void | ||
454 | __frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
455 | int rc) | ||
456 | { | ||
457 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
458 | struct rpcrdma_mw *mw = seg->rl_mw; | ||
459 | struct rpcrdma_frmr *f = &mw->r.frmr; | ||
460 | |||
461 | seg->rl_mw = NULL; | ||
462 | |||
463 | ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir); | ||
464 | |||
465 | if (!rc) | ||
466 | rpcrdma_put_mw(r_xprt, mw); | ||
467 | else | ||
468 | __frwr_queue_recovery(mw); | ||
469 | } | ||
470 | |||
471 | /* Invalidate all memory regions that were registered for "req". | ||
472 | * | ||
473 | * Sleeps until it is safe for the host CPU to access the | ||
474 | * previously mapped memory regions. | ||
475 | */ | ||
476 | static void | ||
477 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
478 | { | ||
479 | struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; | ||
480 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
481 | struct rpcrdma_mr_seg *seg; | ||
482 | unsigned int i, nchunks; | ||
483 | struct rpcrdma_frmr *f; | ||
484 | int rc; | ||
485 | |||
486 | dprintk("RPC: %s: req %p\n", __func__, req); | ||
487 | |||
488 | /* ORDER: Invalidate all of the req's MRs first | ||
489 | * | ||
490 | * Chain the LOCAL_INV Work Requests and post them with | ||
491 | * a single ib_post_send() call. | ||
492 | */ | ||
493 | invalidate_wrs = pos = prev = NULL; | ||
494 | seg = NULL; | ||
495 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
496 | seg = &req->rl_segments[i]; | ||
497 | |||
498 | pos = __frwr_prepare_linv_wr(seg); | ||
499 | |||
500 | if (!invalidate_wrs) | ||
501 | invalidate_wrs = pos; | ||
502 | else | ||
503 | prev->next = pos; | ||
504 | prev = pos; | ||
505 | |||
506 | i += seg->mr_nsegs; | ||
507 | } | ||
508 | f = &seg->rl_mw->r.frmr; | ||
509 | |||
510 | /* Strong send queue ordering guarantees that when the | ||
511 | * last WR in the chain completes, all WRs in the chain | ||
512 | * are complete. | ||
513 | */ | ||
514 | f->fr_invwr.send_flags = IB_SEND_SIGNALED; | ||
515 | f->fr_waiter = true; | ||
516 | init_completion(&f->fr_linv_done); | ||
517 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
518 | |||
519 | /* Transport disconnect drains the receive CQ before it | ||
520 | * replaces the QP. The RPC reply handler won't call us | ||
521 | * unless ri_id->qp is a valid pointer. | ||
522 | */ | ||
523 | rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); | ||
524 | if (rc) | ||
525 | pr_warn("%s: ib_post_send failed %i\n", __func__, rc); | ||
526 | |||
527 | wait_for_completion(&f->fr_linv_done); | ||
528 | |||
529 | /* ORDER: Now DMA unmap all of the req's MRs, and return | ||
530 | * them to the free MW list. | ||
531 | */ | ||
532 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | ||
533 | seg = &req->rl_segments[i]; | ||
534 | |||
535 | __frwr_dma_unmap(r_xprt, seg, rc); | ||
536 | |||
537 | i += seg->mr_nsegs; | ||
538 | seg->mr_nsegs = 0; | ||
539 | } | ||
540 | |||
541 | req->rl_nchunks = 0; | ||
542 | } | ||
543 | |||
416 | /* Post a LOCAL_INV Work Request to prevent further remote access | 544 | /* Post a LOCAL_INV Work Request to prevent further remote access |
417 | * via RDMA READ or RDMA WRITE. | 545 | * via RDMA READ or RDMA WRITE. |
418 | */ | 546 | */ |
@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
423 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 551 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
424 | struct rpcrdma_mw *mw = seg1->rl_mw; | 552 | struct rpcrdma_mw *mw = seg1->rl_mw; |
425 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | 553 | struct rpcrdma_frmr *frmr = &mw->r.frmr; |
426 | struct ib_send_wr invalidate_wr, *bad_wr; | 554 | struct ib_send_wr *invalidate_wr, *bad_wr; |
427 | int rc, nsegs = seg->mr_nsegs; | 555 | int rc, nsegs = seg->mr_nsegs; |
428 | 556 | ||
429 | dprintk("RPC: %s: FRMR %p\n", __func__, mw); | 557 | dprintk("RPC: %s: FRMR %p\n", __func__, mw); |
430 | 558 | ||
431 | seg1->rl_mw = NULL; | 559 | seg1->rl_mw = NULL; |
432 | frmr->fr_state = FRMR_IS_INVALID; | 560 | frmr->fr_state = FRMR_IS_INVALID; |
561 | invalidate_wr = &mw->r.frmr.fr_invwr; | ||
433 | 562 | ||
434 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | 563 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); |
435 | invalidate_wr.wr_id = (unsigned long)(void *)mw; | 564 | invalidate_wr->wr_id = (uintptr_t)mw; |
436 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | 565 | invalidate_wr->opcode = IB_WR_LOCAL_INV; |
437 | invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; | 566 | invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey; |
438 | DECR_CQCOUNT(&r_xprt->rx_ep); | 567 | DECR_CQCOUNT(&r_xprt->rx_ep); |
439 | 568 | ||
440 | ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); | 569 | ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); |
441 | read_lock(&ia->ri_qplock); | 570 | read_lock(&ia->ri_qplock); |
442 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | 571 | rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr); |
443 | read_unlock(&ia->ri_qplock); | 572 | read_unlock(&ia->ri_qplock); |
444 | if (rc) | 573 | if (rc) |
445 | goto out_err; | 574 | goto out_err; |
@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf) | |||
471 | 600 | ||
472 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | 601 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
473 | .ro_map = frwr_op_map, | 602 | .ro_map = frwr_op_map, |
603 | .ro_unmap_sync = frwr_op_unmap_sync, | ||
474 | .ro_unmap = frwr_op_unmap, | 604 | .ro_unmap = frwr_op_unmap, |
475 | .ro_open = frwr_op_open, | 605 | .ro_open = frwr_op_open, |
476 | .ro_maxpages = frwr_op_maxpages, | 606 | .ro_maxpages = frwr_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index 617b76f22154..dbb302ecf590 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
83 | return 1; | 83 | return 1; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* DMA unmap all memory regions that were mapped for "req". | ||
87 | */ | ||
88 | static void | ||
89 | physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | ||
90 | { | ||
91 | struct ib_device *device = r_xprt->rx_ia.ri_device; | ||
92 | unsigned int i; | ||
93 | |||
94 | for (i = 0; req->rl_nchunks; --req->rl_nchunks) | ||
95 | rpcrdma_unmap_one(device, &req->rl_segments[i++]); | ||
96 | } | ||
97 | |||
86 | static void | 98 | static void |
87 | physical_op_destroy(struct rpcrdma_buffer *buf) | 99 | physical_op_destroy(struct rpcrdma_buffer *buf) |
88 | { | 100 | { |
@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf) | |||
90 | 102 | ||
91 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | 103 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { |
92 | .ro_map = physical_op_map, | 104 | .ro_map = physical_op_map, |
105 | .ro_unmap_sync = physical_op_unmap_sync, | ||
93 | .ro_unmap = physical_op_unmap, | 106 | .ro_unmap = physical_op_unmap, |
94 | .ro_open = physical_op_open, | 107 | .ro_open = physical_op_open, |
95 | .ro_maxpages = physical_op_maxpages, | 108 | .ro_maxpages = physical_op_maxpages, |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c10d9699441c..0f28f2d743ed 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
804 | if (req->rl_reply) | 804 | if (req->rl_reply) |
805 | goto out_duplicate; | 805 | goto out_duplicate; |
806 | 806 | ||
807 | /* Sanity checking has passed. We are now committed | ||
808 | * to complete this transaction. | ||
809 | */ | ||
810 | list_del_init(&rqst->rq_list); | ||
811 | spin_unlock_bh(&xprt->transport_lock); | ||
807 | dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" | 812 | dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" |
808 | " RPC request 0x%p xid 0x%08x\n", | 813 | " RPC request 0x%p xid 0x%08x\n", |
809 | __func__, rep, req, rqst, | 814 | __func__, rep, req, rqst, |
@@ -888,12 +893,23 @@ badheader: | |||
888 | break; | 893 | break; |
889 | } | 894 | } |
890 | 895 | ||
896 | /* Invalidate and flush the data payloads before waking the | ||
897 | * waiting application. This guarantees the memory region is | ||
898 | * properly fenced from the server before the application | ||
899 | * accesses the data. It also ensures proper send flow | ||
900 | * control: waking the next RPC waits until this RPC has | ||
901 | * relinquished all its Send Queue entries. | ||
902 | */ | ||
903 | if (req->rl_nchunks) | ||
904 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); | ||
905 | |||
891 | credits = be32_to_cpu(headerp->rm_credit); | 906 | credits = be32_to_cpu(headerp->rm_credit); |
892 | if (credits == 0) | 907 | if (credits == 0) |
893 | credits = 1; /* don't deadlock */ | 908 | credits = 1; /* don't deadlock */ |
894 | else if (credits > r_xprt->rx_buf.rb_max_requests) | 909 | else if (credits > r_xprt->rx_buf.rb_max_requests) |
895 | credits = r_xprt->rx_buf.rb_max_requests; | 910 | credits = r_xprt->rx_buf.rb_max_requests; |
896 | 911 | ||
912 | spin_lock_bh(&xprt->transport_lock); | ||
897 | cwnd = xprt->cwnd; | 913 | cwnd = xprt->cwnd; |
898 | xprt->cwnd = credits << RPC_CWNDSHIFT; | 914 | xprt->cwnd = credits << RPC_CWNDSHIFT; |
899 | if (xprt->cwnd > cwnd) | 915 | if (xprt->cwnd > cwnd) |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8c545f7d7525..740bddcf3488 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer) | |||
576 | 576 | ||
577 | rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); | 577 | rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); |
578 | req = rb->rg_owner; | 578 | req = rb->rg_owner; |
579 | if (req->rl_backchannel) | ||
580 | return; | ||
581 | |||
579 | r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); | 582 | r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); |
580 | 583 | ||
581 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); | 584 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index eadd1655145a..732c71ce5dca 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
616 | 616 | ||
617 | /* set trigger for requesting send completion */ | 617 | /* set trigger for requesting send completion */ |
618 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; | 618 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; |
619 | if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) | 619 | if (ep->rep_cqinit <= 2) |
620 | ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; | 620 | ep->rep_cqinit = 0; /* always signal? */ |
621 | else if (ep->rep_cqinit <= 2) | ||
622 | ep->rep_cqinit = 0; | ||
623 | INIT_CQCOUNT(ep); | 621 | INIT_CQCOUNT(ep); |
624 | init_waitqueue_head(&ep->rep_connect_wait); | 622 | init_waitqueue_head(&ep->rep_connect_wait); |
625 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); | 623 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); |
@@ -852,10 +850,11 @@ retry: | |||
852 | 850 | ||
853 | if (extras) { | 851 | if (extras) { |
854 | rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); | 852 | rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); |
855 | if (rc) | 853 | if (rc) { |
856 | pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", | 854 | pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", |
857 | __func__, rc); | 855 | __func__, rc); |
858 | rc = 0; | 856 | rc = 0; |
857 | } | ||
859 | } | 858 | } |
860 | } | 859 | } |
861 | 860 | ||
@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) | |||
1337 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1336 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1338 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; | 1337 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
1339 | struct rpcrdma_rep *rep; | 1338 | struct rpcrdma_rep *rep; |
1340 | unsigned long flags; | ||
1341 | int rc; | 1339 | int rc; |
1342 | 1340 | ||
1343 | while (count--) { | 1341 | while (count--) { |
1344 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1342 | spin_lock(&buffers->rb_lock); |
1345 | if (list_empty(&buffers->rb_recv_bufs)) | 1343 | if (list_empty(&buffers->rb_recv_bufs)) |
1346 | goto out_reqbuf; | 1344 | goto out_reqbuf; |
1347 | rep = rpcrdma_buffer_get_rep_locked(buffers); | 1345 | rep = rpcrdma_buffer_get_rep_locked(buffers); |
1348 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1346 | spin_unlock(&buffers->rb_lock); |
1349 | 1347 | ||
1350 | rc = rpcrdma_ep_post_recv(ia, ep, rep); | 1348 | rc = rpcrdma_ep_post_recv(ia, ep, rep); |
1351 | if (rc) | 1349 | if (rc) |
@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) | |||
1355 | return 0; | 1353 | return 0; |
1356 | 1354 | ||
1357 | out_reqbuf: | 1355 | out_reqbuf: |
1358 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1356 | spin_unlock(&buffers->rb_lock); |
1359 | pr_warn("%s: no extra receive buffers\n", __func__); | 1357 | pr_warn("%s: no extra receive buffers\n", __func__); |
1360 | return -ENOMEM; | 1358 | return -ENOMEM; |
1361 | 1359 | ||
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ac7f8d4f632a..728101ddc44b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -88,12 +88,6 @@ struct rpcrdma_ep { | |||
88 | struct delayed_work rep_connect_worker; | 88 | struct delayed_work rep_connect_worker; |
89 | }; | 89 | }; |
90 | 90 | ||
91 | /* | ||
92 | * Force a signaled SEND Work Request every so often, | ||
93 | * in case the provider needs to do some housekeeping. | ||
94 | */ | ||
95 | #define RPCRDMA_MAX_UNSIGNALED_SENDS (32) | ||
96 | |||
97 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 91 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
98 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 92 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
99 | 93 | ||
@@ -207,6 +201,12 @@ struct rpcrdma_frmr { | |||
207 | enum rpcrdma_frmr_state fr_state; | 201 | enum rpcrdma_frmr_state fr_state; |
208 | struct work_struct fr_work; | 202 | struct work_struct fr_work; |
209 | struct rpcrdma_xprt *fr_xprt; | 203 | struct rpcrdma_xprt *fr_xprt; |
204 | bool fr_waiter; | ||
205 | struct completion fr_linv_done;; | ||
206 | union { | ||
207 | struct ib_reg_wr fr_regwr; | ||
208 | struct ib_send_wr fr_invwr; | ||
209 | }; | ||
210 | }; | 210 | }; |
211 | 211 | ||
212 | struct rpcrdma_fmr { | 212 | struct rpcrdma_fmr { |
@@ -364,6 +364,8 @@ struct rpcrdma_xprt; | |||
364 | struct rpcrdma_memreg_ops { | 364 | struct rpcrdma_memreg_ops { |
365 | int (*ro_map)(struct rpcrdma_xprt *, | 365 | int (*ro_map)(struct rpcrdma_xprt *, |
366 | struct rpcrdma_mr_seg *, int, bool); | 366 | struct rpcrdma_mr_seg *, int, bool); |
367 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, | ||
368 | struct rpcrdma_req *); | ||
367 | int (*ro_unmap)(struct rpcrdma_xprt *, | 369 | int (*ro_unmap)(struct rpcrdma_xprt *, |
368 | struct rpcrdma_mr_seg *); | 370 | struct rpcrdma_mr_seg *); |
369 | int (*ro_open)(struct rpcrdma_ia *, | 371 | int (*ro_open)(struct rpcrdma_ia *, |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2ffaf6a79499..fde2138b81e7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, | |||
398 | if (unlikely(!sock)) | 398 | if (unlikely(!sock)) |
399 | return -ENOTSOCK; | 399 | return -ENOTSOCK; |
400 | 400 | ||
401 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags); | ||
402 | if (base != 0) { | 401 | if (base != 0) { |
403 | addr = NULL; | 402 | addr = NULL; |
404 | addrlen = 0; | 403 | addrlen = 0; |
@@ -442,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task) | |||
442 | struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); | 441 | struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); |
443 | 442 | ||
444 | transport->inet->sk_write_pending--; | 443 | transport->inet->sk_write_pending--; |
445 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); | ||
446 | } | 444 | } |
447 | 445 | ||
448 | /** | 446 | /** |
@@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *task) | |||
467 | 465 | ||
468 | /* Don't race with disconnect */ | 466 | /* Don't race with disconnect */ |
469 | if (xprt_connected(xprt)) { | 467 | if (xprt_connected(xprt)) { |
470 | if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) { | 468 | /* wait for more buffer space */ |
471 | /* | 469 | sk->sk_write_pending++; |
472 | * Notify TCP that we're limited by the application | 470 | xprt_wait_for_buffer_space(task, xs_nospace_callback); |
473 | * window size | 471 | } else |
474 | */ | ||
475 | set_bit(SOCK_NOSPACE, &transport->sock->flags); | ||
476 | sk->sk_write_pending++; | ||
477 | /* ...and wait for more buffer space */ | ||
478 | xprt_wait_for_buffer_space(task, xs_nospace_callback); | ||
479 | } | ||
480 | } else { | ||
481 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); | ||
482 | ret = -ENOTCONN; | 472 | ret = -ENOTCONN; |
483 | } | ||
484 | 473 | ||
485 | spin_unlock_bh(&xprt->transport_lock); | 474 | spin_unlock_bh(&xprt->transport_lock); |
486 | 475 | ||
@@ -616,9 +605,6 @@ process_status: | |||
616 | case -EAGAIN: | 605 | case -EAGAIN: |
617 | status = xs_nospace(task); | 606 | status = xs_nospace(task); |
618 | break; | 607 | break; |
619 | default: | ||
620 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
621 | -status); | ||
622 | case -ENETUNREACH: | 608 | case -ENETUNREACH: |
623 | case -ENOBUFS: | 609 | case -ENOBUFS: |
624 | case -EPIPE: | 610 | case -EPIPE: |
@@ -626,7 +612,10 @@ process_status: | |||
626 | case -EPERM: | 612 | case -EPERM: |
627 | /* When the server has died, an ICMP port unreachable message | 613 | /* When the server has died, an ICMP port unreachable message |
628 | * prompts ECONNREFUSED. */ | 614 | * prompts ECONNREFUSED. */ |
629 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); | 615 | break; |
616 | default: | ||
617 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
618 | -status); | ||
630 | } | 619 | } |
631 | 620 | ||
632 | return status; | 621 | return status; |
@@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task) | |||
706 | case -EAGAIN: | 695 | case -EAGAIN: |
707 | status = xs_nospace(task); | 696 | status = xs_nospace(task); |
708 | break; | 697 | break; |
709 | default: | ||
710 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
711 | -status); | ||
712 | case -ECONNRESET: | 698 | case -ECONNRESET: |
713 | case -ECONNREFUSED: | 699 | case -ECONNREFUSED: |
714 | case -ENOTCONN: | 700 | case -ENOTCONN: |
715 | case -EADDRINUSE: | 701 | case -EADDRINUSE: |
716 | case -ENOBUFS: | 702 | case -ENOBUFS: |
717 | case -EPIPE: | 703 | case -EPIPE: |
718 | clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); | 704 | break; |
705 | default: | ||
706 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
707 | -status); | ||
719 | } | 708 | } |
720 | 709 | ||
721 | return status; | 710 | return status; |
@@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk) | |||
1609 | 1598 | ||
1610 | static void xs_write_space(struct sock *sk) | 1599 | static void xs_write_space(struct sock *sk) |
1611 | { | 1600 | { |
1612 | struct socket *sock; | 1601 | struct socket_wq *wq; |
1613 | struct rpc_xprt *xprt; | 1602 | struct rpc_xprt *xprt; |
1614 | 1603 | ||
1615 | if (unlikely(!(sock = sk->sk_socket))) | 1604 | if (!sk->sk_socket) |
1616 | return; | 1605 | return; |
1617 | clear_bit(SOCK_NOSPACE, &sock->flags); | 1606 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
1618 | 1607 | ||
1619 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | 1608 | if (unlikely(!(xprt = xprt_from_sock(sk)))) |
1620 | return; | 1609 | return; |
1621 | if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0) | 1610 | rcu_read_lock(); |
1622 | return; | 1611 | wq = rcu_dereference(sk->sk_wq); |
1612 | if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) | ||
1613 | goto out; | ||
1623 | 1614 | ||
1624 | xprt_write_space(xprt); | 1615 | xprt_write_space(xprt); |
1616 | out: | ||
1617 | rcu_read_unlock(); | ||
1625 | } | 1618 | } |
1626 | 1619 | ||
1627 | /** | 1620 | /** |
@@ -1907,18 +1900,6 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) | |||
1907 | } | 1900 | } |
1908 | } | 1901 | } |
1909 | #else | 1902 | #else |
1910 | static inline void xs_reclassify_socketu(struct socket *sock) | ||
1911 | { | ||
1912 | } | ||
1913 | |||
1914 | static inline void xs_reclassify_socket4(struct socket *sock) | ||
1915 | { | ||
1916 | } | ||
1917 | |||
1918 | static inline void xs_reclassify_socket6(struct socket *sock) | ||
1919 | { | ||
1920 | } | ||
1921 | |||
1922 | static inline void xs_reclassify_socket(int family, struct socket *sock) | 1903 | static inline void xs_reclassify_socket(int family, struct socket *sock) |
1923 | { | 1904 | { |
1924 | } | 1905 | } |
@@ -2008,7 +1989,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport) | |||
2008 | "transport socket (%d).\n", -status); | 1989 | "transport socket (%d).\n", -status); |
2009 | goto out; | 1990 | goto out; |
2010 | } | 1991 | } |
2011 | xs_reclassify_socketu(sock); | 1992 | xs_reclassify_socket(AF_LOCAL, sock); |
2012 | 1993 | ||
2013 | dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", | 1994 | dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", |
2014 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); | 1995 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |