diff options
45 files changed, 1995 insertions, 821 deletions
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 066ac313ae5c..a2c0dfc6fdc0 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -48,13 +48,13 @@ void nlmclnt_next_cookie(struct nlm_cookie *c) | |||
48 | 48 | ||
49 | static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) | 49 | static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) |
50 | { | 50 | { |
51 | atomic_inc(&lockowner->count); | 51 | refcount_inc(&lockowner->count); |
52 | return lockowner; | 52 | return lockowner; |
53 | } | 53 | } |
54 | 54 | ||
55 | static void nlm_put_lockowner(struct nlm_lockowner *lockowner) | 55 | static void nlm_put_lockowner(struct nlm_lockowner *lockowner) |
56 | { | 56 | { |
57 | if (!atomic_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) | 57 | if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) |
58 | return; | 58 | return; |
59 | list_del(&lockowner->list); | 59 | list_del(&lockowner->list); |
60 | spin_unlock(&lockowner->host->h_lock); | 60 | spin_unlock(&lockowner->host->h_lock); |
@@ -105,7 +105,7 @@ static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_ | |||
105 | res = __nlm_find_lockowner(host, owner); | 105 | res = __nlm_find_lockowner(host, owner); |
106 | if (res == NULL && new != NULL) { | 106 | if (res == NULL && new != NULL) { |
107 | res = new; | 107 | res = new; |
108 | atomic_set(&new->count, 1); | 108 | refcount_set(&new->count, 1); |
109 | new->owner = owner; | 109 | new->owner = owner; |
110 | new->pid = __nlm_alloc_pid(host); | 110 | new->pid = __nlm_alloc_pid(host); |
111 | new->host = nlm_get_host(host); | 111 | new->host = nlm_get_host(host); |
@@ -204,7 +204,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) | |||
204 | for(;;) { | 204 | for(;;) { |
205 | call = kzalloc(sizeof(*call), GFP_KERNEL); | 205 | call = kzalloc(sizeof(*call), GFP_KERNEL); |
206 | if (call != NULL) { | 206 | if (call != NULL) { |
207 | atomic_set(&call->a_count, 1); | 207 | refcount_set(&call->a_count, 1); |
208 | locks_init_lock(&call->a_args.lock.fl); | 208 | locks_init_lock(&call->a_args.lock.fl); |
209 | locks_init_lock(&call->a_res.lock.fl); | 209 | locks_init_lock(&call->a_res.lock.fl); |
210 | call->a_host = nlm_get_host(host); | 210 | call->a_host = nlm_get_host(host); |
@@ -222,7 +222,7 @@ void nlmclnt_release_call(struct nlm_rqst *call) | |||
222 | { | 222 | { |
223 | const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops; | 223 | const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops; |
224 | 224 | ||
225 | if (!atomic_dec_and_test(&call->a_count)) | 225 | if (!refcount_dec_and_test(&call->a_count)) |
226 | return; | 226 | return; |
227 | if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call) | 227 | if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call) |
228 | nlmclnt_ops->nlmclnt_release_call(call->a_callback_data); | 228 | nlmclnt_ops->nlmclnt_release_call(call->a_callback_data); |
@@ -678,7 +678,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) | |||
678 | goto out; | 678 | goto out; |
679 | } | 679 | } |
680 | 680 | ||
681 | atomic_inc(&req->a_count); | 681 | refcount_inc(&req->a_count); |
682 | status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req, | 682 | status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req, |
683 | NLMPROC_UNLOCK, &nlmclnt_unlock_ops); | 683 | NLMPROC_UNLOCK, &nlmclnt_unlock_ops); |
684 | if (status < 0) | 684 | if (status < 0) |
@@ -769,7 +769,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl | |||
769 | nlmclnt_setlockargs(req, fl); | 769 | nlmclnt_setlockargs(req, fl); |
770 | req->a_args.block = block; | 770 | req->a_args.block = block; |
771 | 771 | ||
772 | atomic_inc(&req->a_count); | 772 | refcount_inc(&req->a_count); |
773 | status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req, | 773 | status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req, |
774 | NLMPROC_CANCEL, &nlmclnt_cancel_ops); | 774 | NLMPROC_CANCEL, &nlmclnt_cancel_ops); |
775 | if (status == 0 && req->a_res.status == nlm_lck_denied) | 775 | if (status == 0 && req->a_res.status == nlm_lck_denied) |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 826a89184f90..d35cd6be0675 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -114,7 +114,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, | |||
114 | unsigned long now = jiffies; | 114 | unsigned long now = jiffies; |
115 | 115 | ||
116 | if (nsm != NULL) | 116 | if (nsm != NULL) |
117 | atomic_inc(&nsm->sm_count); | 117 | refcount_inc(&nsm->sm_count); |
118 | else { | 118 | else { |
119 | host = NULL; | 119 | host = NULL; |
120 | nsm = nsm_get_handle(ni->net, ni->sap, ni->salen, | 120 | nsm = nsm_get_handle(ni->net, ni->sap, ni->salen, |
@@ -151,7 +151,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, | |||
151 | host->h_state = 0; | 151 | host->h_state = 0; |
152 | host->h_nsmstate = 0; | 152 | host->h_nsmstate = 0; |
153 | host->h_pidcount = 0; | 153 | host->h_pidcount = 0; |
154 | atomic_set(&host->h_count, 1); | 154 | refcount_set(&host->h_count, 1); |
155 | mutex_init(&host->h_mutex); | 155 | mutex_init(&host->h_mutex); |
156 | host->h_nextrebind = now + NLM_HOST_REBIND; | 156 | host->h_nextrebind = now + NLM_HOST_REBIND; |
157 | host->h_expires = now + NLM_HOST_EXPIRE; | 157 | host->h_expires = now + NLM_HOST_EXPIRE; |
@@ -290,7 +290,7 @@ void nlmclnt_release_host(struct nlm_host *host) | |||
290 | 290 | ||
291 | WARN_ON_ONCE(host->h_server); | 291 | WARN_ON_ONCE(host->h_server); |
292 | 292 | ||
293 | if (atomic_dec_and_test(&host->h_count)) { | 293 | if (refcount_dec_and_test(&host->h_count)) { |
294 | WARN_ON_ONCE(!list_empty(&host->h_lockowners)); | 294 | WARN_ON_ONCE(!list_empty(&host->h_lockowners)); |
295 | WARN_ON_ONCE(!list_empty(&host->h_granted)); | 295 | WARN_ON_ONCE(!list_empty(&host->h_granted)); |
296 | WARN_ON_ONCE(!list_empty(&host->h_reclaim)); | 296 | WARN_ON_ONCE(!list_empty(&host->h_reclaim)); |
@@ -388,6 +388,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, | |||
388 | ln->nrhosts++; | 388 | ln->nrhosts++; |
389 | nrhosts++; | 389 | nrhosts++; |
390 | 390 | ||
391 | refcount_inc(&host->h_count); | ||
392 | |||
391 | dprintk("lockd: %s created host %s (%s)\n", | 393 | dprintk("lockd: %s created host %s (%s)\n", |
392 | __func__, host->h_name, host->h_addrbuf); | 394 | __func__, host->h_name, host->h_addrbuf); |
393 | 395 | ||
@@ -410,7 +412,7 @@ void nlmsvc_release_host(struct nlm_host *host) | |||
410 | dprintk("lockd: release server host %s\n", host->h_name); | 412 | dprintk("lockd: release server host %s\n", host->h_name); |
411 | 413 | ||
412 | WARN_ON_ONCE(!host->h_server); | 414 | WARN_ON_ONCE(!host->h_server); |
413 | atomic_dec(&host->h_count); | 415 | refcount_dec(&host->h_count); |
414 | } | 416 | } |
415 | 417 | ||
416 | /* | 418 | /* |
@@ -504,7 +506,7 @@ struct nlm_host * nlm_get_host(struct nlm_host *host) | |||
504 | { | 506 | { |
505 | if (host) { | 507 | if (host) { |
506 | dprintk("lockd: get host %s\n", host->h_name); | 508 | dprintk("lockd: get host %s\n", host->h_name); |
507 | atomic_inc(&host->h_count); | 509 | refcount_inc(&host->h_count); |
508 | host->h_expires = jiffies + NLM_HOST_EXPIRE; | 510 | host->h_expires = jiffies + NLM_HOST_EXPIRE; |
509 | } | 511 | } |
510 | return host; | 512 | return host; |
@@ -593,7 +595,7 @@ static void nlm_complain_hosts(struct net *net) | |||
593 | if (net && host->net != net) | 595 | if (net && host->net != net) |
594 | continue; | 596 | continue; |
595 | dprintk(" %s (cnt %d use %d exp %ld net %x)\n", | 597 | dprintk(" %s (cnt %d use %d exp %ld net %x)\n", |
596 | host->h_name, atomic_read(&host->h_count), | 598 | host->h_name, refcount_read(&host->h_count), |
597 | host->h_inuse, host->h_expires, host->net->ns.inum); | 599 | host->h_inuse, host->h_expires, host->net->ns.inum); |
598 | } | 600 | } |
599 | } | 601 | } |
@@ -662,16 +664,16 @@ nlm_gc_hosts(struct net *net) | |||
662 | for_each_host_safe(host, next, chain, nlm_server_hosts) { | 664 | for_each_host_safe(host, next, chain, nlm_server_hosts) { |
663 | if (net && host->net != net) | 665 | if (net && host->net != net) |
664 | continue; | 666 | continue; |
665 | if (atomic_read(&host->h_count) || host->h_inuse | 667 | if (host->h_inuse || time_before(jiffies, host->h_expires)) { |
666 | || time_before(jiffies, host->h_expires)) { | ||
667 | dprintk("nlm_gc_hosts skipping %s " | 668 | dprintk("nlm_gc_hosts skipping %s " |
668 | "(cnt %d use %d exp %ld net %x)\n", | 669 | "(cnt %d use %d exp %ld net %x)\n", |
669 | host->h_name, atomic_read(&host->h_count), | 670 | host->h_name, refcount_read(&host->h_count), |
670 | host->h_inuse, host->h_expires, | 671 | host->h_inuse, host->h_expires, |
671 | host->net->ns.inum); | 672 | host->net->ns.inum); |
672 | continue; | 673 | continue; |
673 | } | 674 | } |
674 | nlm_destroy_host_locked(host); | 675 | if (refcount_dec_if_one(&host->h_count)) |
676 | nlm_destroy_host_locked(host); | ||
675 | } | 677 | } |
676 | 678 | ||
677 | if (net) { | 679 | if (net) { |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 96cfb2967ac7..654594ef4f94 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -191,7 +191,7 @@ void nsm_unmonitor(const struct nlm_host *host) | |||
191 | struct nsm_res res; | 191 | struct nsm_res res; |
192 | int status; | 192 | int status; |
193 | 193 | ||
194 | if (atomic_read(&nsm->sm_count) == 1 | 194 | if (refcount_read(&nsm->sm_count) == 1 |
195 | && nsm->sm_monitored && !nsm->sm_sticky) { | 195 | && nsm->sm_monitored && !nsm->sm_sticky) { |
196 | dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); | 196 | dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); |
197 | 197 | ||
@@ -279,7 +279,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, | |||
279 | if (unlikely(new == NULL)) | 279 | if (unlikely(new == NULL)) |
280 | return NULL; | 280 | return NULL; |
281 | 281 | ||
282 | atomic_set(&new->sm_count, 1); | 282 | refcount_set(&new->sm_count, 1); |
283 | new->sm_name = (char *)(new + 1); | 283 | new->sm_name = (char *)(new + 1); |
284 | memcpy(nsm_addr(new), sap, salen); | 284 | memcpy(nsm_addr(new), sap, salen); |
285 | new->sm_addrlen = salen; | 285 | new->sm_addrlen = salen; |
@@ -337,13 +337,13 @@ retry: | |||
337 | cached = nsm_lookup_addr(&ln->nsm_handles, sap); | 337 | cached = nsm_lookup_addr(&ln->nsm_handles, sap); |
338 | 338 | ||
339 | if (cached != NULL) { | 339 | if (cached != NULL) { |
340 | atomic_inc(&cached->sm_count); | 340 | refcount_inc(&cached->sm_count); |
341 | spin_unlock(&nsm_lock); | 341 | spin_unlock(&nsm_lock); |
342 | kfree(new); | 342 | kfree(new); |
343 | dprintk("lockd: found nsm_handle for %s (%s), " | 343 | dprintk("lockd: found nsm_handle for %s (%s), " |
344 | "cnt %d\n", cached->sm_name, | 344 | "cnt %d\n", cached->sm_name, |
345 | cached->sm_addrbuf, | 345 | cached->sm_addrbuf, |
346 | atomic_read(&cached->sm_count)); | 346 | refcount_read(&cached->sm_count)); |
347 | return cached; | 347 | return cached; |
348 | } | 348 | } |
349 | 349 | ||
@@ -388,12 +388,12 @@ struct nsm_handle *nsm_reboot_lookup(const struct net *net, | |||
388 | return cached; | 388 | return cached; |
389 | } | 389 | } |
390 | 390 | ||
391 | atomic_inc(&cached->sm_count); | 391 | refcount_inc(&cached->sm_count); |
392 | spin_unlock(&nsm_lock); | 392 | spin_unlock(&nsm_lock); |
393 | 393 | ||
394 | dprintk("lockd: host %s (%s) rebooted, cnt %d\n", | 394 | dprintk("lockd: host %s (%s) rebooted, cnt %d\n", |
395 | cached->sm_name, cached->sm_addrbuf, | 395 | cached->sm_name, cached->sm_addrbuf, |
396 | atomic_read(&cached->sm_count)); | 396 | refcount_read(&cached->sm_count)); |
397 | return cached; | 397 | return cached; |
398 | } | 398 | } |
399 | 399 | ||
@@ -404,7 +404,7 @@ struct nsm_handle *nsm_reboot_lookup(const struct net *net, | |||
404 | */ | 404 | */ |
405 | void nsm_release(struct nsm_handle *nsm) | 405 | void nsm_release(struct nsm_handle *nsm) |
406 | { | 406 | { |
407 | if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) { | 407 | if (refcount_dec_and_lock(&nsm->sm_count, &nsm_lock)) { |
408 | list_del(&nsm->sm_link); | 408 | list_del(&nsm->sm_link); |
409 | spin_unlock(&nsm_lock); | 409 | spin_unlock(&nsm_lock); |
410 | dprintk("lockd: destroyed nsm_handle for %s (%s)\n", | 410 | dprintk("lockd: destroyed nsm_handle for %s (%s)\n", |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 0d670c5c378f..ea77c66d3cc3 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -295,7 +295,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) | |||
295 | 295 | ||
296 | void nlmsvc_release_call(struct nlm_rqst *call) | 296 | void nlmsvc_release_call(struct nlm_rqst *call) |
297 | { | 297 | { |
298 | if (!atomic_dec_and_test(&call->a_count)) | 298 | if (!refcount_dec_and_test(&call->a_count)) |
299 | return; | 299 | return; |
300 | nlmsvc_release_host(call->a_host); | 300 | nlmsvc_release_host(call->a_host); |
301 | kfree(call); | 301 | kfree(call); |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 995d707537da..7cb5c38c19e4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -137,6 +137,11 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, | |||
137 | return bio; | 137 | return bio; |
138 | } | 138 | } |
139 | 139 | ||
140 | static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map) | ||
141 | { | ||
142 | return offset >= map->start && offset < map->start + map->len; | ||
143 | } | ||
144 | |||
140 | static struct bio * | 145 | static struct bio * |
141 | do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, | 146 | do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, |
142 | struct page *page, struct pnfs_block_dev_map *map, | 147 | struct page *page, struct pnfs_block_dev_map *map, |
@@ -156,8 +161,8 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, | |||
156 | 161 | ||
157 | /* translate to physical disk offset */ | 162 | /* translate to physical disk offset */ |
158 | disk_addr = (u64)isect << SECTOR_SHIFT; | 163 | disk_addr = (u64)isect << SECTOR_SHIFT; |
159 | if (disk_addr < map->start || disk_addr >= map->start + map->len) { | 164 | if (!offset_in_map(disk_addr, map)) { |
160 | if (!dev->map(dev, disk_addr, map)) | 165 | if (!dev->map(dev, disk_addr, map) || !offset_in_map(disk_addr, map)) |
161 | return ERR_PTR(-EIO); | 166 | return ERR_PTR(-EIO); |
162 | bio = bl_submit_bio(bio); | 167 | bio = bl_submit_bio(bio); |
163 | } | 168 | } |
@@ -184,6 +189,29 @@ retry: | |||
184 | return bio; | 189 | return bio; |
185 | } | 190 | } |
186 | 191 | ||
192 | static void bl_mark_devices_unavailable(struct nfs_pgio_header *header, bool rw) | ||
193 | { | ||
194 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); | ||
195 | size_t bytes_left = header->args.count; | ||
196 | sector_t isect, extent_length = 0; | ||
197 | struct pnfs_block_extent be; | ||
198 | |||
199 | isect = header->args.offset >> SECTOR_SHIFT; | ||
200 | bytes_left += header->args.offset - (isect << SECTOR_SHIFT); | ||
201 | |||
202 | while (bytes_left > 0) { | ||
203 | if (!ext_tree_lookup(bl, isect, &be, rw)) | ||
204 | return; | ||
205 | extent_length = be.be_length - (isect - be.be_f_offset); | ||
206 | nfs4_mark_deviceid_unavailable(be.be_device); | ||
207 | isect += extent_length; | ||
208 | if (bytes_left > extent_length << SECTOR_SHIFT) | ||
209 | bytes_left -= extent_length << SECTOR_SHIFT; | ||
210 | else | ||
211 | bytes_left = 0; | ||
212 | } | ||
213 | } | ||
214 | |||
187 | static void bl_end_io_read(struct bio *bio) | 215 | static void bl_end_io_read(struct bio *bio) |
188 | { | 216 | { |
189 | struct parallel_io *par = bio->bi_private; | 217 | struct parallel_io *par = bio->bi_private; |
@@ -194,6 +222,7 @@ static void bl_end_io_read(struct bio *bio) | |||
194 | if (!header->pnfs_error) | 222 | if (!header->pnfs_error) |
195 | header->pnfs_error = -EIO; | 223 | header->pnfs_error = -EIO; |
196 | pnfs_set_lo_fail(header->lseg); | 224 | pnfs_set_lo_fail(header->lseg); |
225 | bl_mark_devices_unavailable(header, false); | ||
197 | } | 226 | } |
198 | 227 | ||
199 | bio_put(bio); | 228 | bio_put(bio); |
@@ -323,6 +352,7 @@ static void bl_end_io_write(struct bio *bio) | |||
323 | if (!header->pnfs_error) | 352 | if (!header->pnfs_error) |
324 | header->pnfs_error = -EIO; | 353 | header->pnfs_error = -EIO; |
325 | pnfs_set_lo_fail(header->lseg); | 354 | pnfs_set_lo_fail(header->lseg); |
355 | bl_mark_devices_unavailable(header, true); | ||
326 | } | 356 | } |
327 | bio_put(bio); | 357 | bio_put(bio); |
328 | put_parallel(par); | 358 | put_parallel(par); |
@@ -552,6 +582,31 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) | |||
552 | return 0; | 582 | return 0; |
553 | } | 583 | } |
554 | 584 | ||
585 | static struct nfs4_deviceid_node * | ||
586 | bl_find_get_deviceid(struct nfs_server *server, | ||
587 | const struct nfs4_deviceid *id, struct rpc_cred *cred, | ||
588 | gfp_t gfp_mask) | ||
589 | { | ||
590 | struct nfs4_deviceid_node *node; | ||
591 | unsigned long start, end; | ||
592 | |||
593 | retry: | ||
594 | node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); | ||
595 | if (!node) | ||
596 | return ERR_PTR(-ENODEV); | ||
597 | |||
598 | if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) | ||
599 | return node; | ||
600 | |||
601 | end = jiffies; | ||
602 | start = end - PNFS_DEVICE_RETRY_TIMEOUT; | ||
603 | if (!time_in_range(node->timestamp_unavailable, start, end)) { | ||
604 | nfs4_delete_deviceid(node->ld, node->nfs_client, id); | ||
605 | goto retry; | ||
606 | } | ||
607 | return ERR_PTR(-ENODEV); | ||
608 | } | ||
609 | |||
555 | static int | 610 | static int |
556 | bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, | 611 | bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, |
557 | struct layout_verification *lv, struct list_head *extents, | 612 | struct layout_verification *lv, struct list_head *extents, |
@@ -573,16 +628,18 @@ bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, | |||
573 | memcpy(&id, p, NFS4_DEVICEID4_SIZE); | 628 | memcpy(&id, p, NFS4_DEVICEID4_SIZE); |
574 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | 629 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); |
575 | 630 | ||
576 | error = -EIO; | 631 | be->be_device = bl_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id, |
577 | be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id, | ||
578 | lo->plh_lc_cred, gfp_mask); | 632 | lo->plh_lc_cred, gfp_mask); |
579 | if (!be->be_device) | 633 | if (IS_ERR(be->be_device)) { |
634 | error = PTR_ERR(be->be_device); | ||
580 | goto out_free_be; | 635 | goto out_free_be; |
636 | } | ||
581 | 637 | ||
582 | /* | 638 | /* |
583 | * The next three values are read in as bytes, but stored in the | 639 | * The next three values are read in as bytes, but stored in the |
584 | * extent structure in 512-byte granularity. | 640 | * extent structure in 512-byte granularity. |
585 | */ | 641 | */ |
642 | error = -EIO; | ||
586 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | 643 | if (decode_sector_number(&p, &be->be_f_offset) < 0) |
587 | goto out_put_deviceid; | 644 | goto out_put_deviceid; |
588 | if (decode_sector_number(&p, &be->be_length) < 0) | 645 | if (decode_sector_number(&p, &be->be_length) < 0) |
@@ -692,11 +749,16 @@ out_free_scratch: | |||
692 | __free_page(scratch); | 749 | __free_page(scratch); |
693 | out: | 750 | out: |
694 | dprintk("%s returns %d\n", __func__, status); | 751 | dprintk("%s returns %d\n", __func__, status); |
695 | if (status) { | 752 | switch (status) { |
753 | case -ENODEV: | ||
754 | /* Our extent block devices are unavailable */ | ||
755 | set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags); | ||
756 | case 0: | ||
757 | return lseg; | ||
758 | default: | ||
696 | kfree(lseg); | 759 | kfree(lseg); |
697 | return ERR_PTR(status); | 760 | return ERR_PTR(status); |
698 | } | 761 | } |
699 | return lseg; | ||
700 | } | 762 | } |
701 | 763 | ||
702 | static void | 764 | static void |
@@ -798,6 +860,13 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | |||
798 | } | 860 | } |
799 | 861 | ||
800 | pnfs_generic_pg_init_read(pgio, req); | 862 | pnfs_generic_pg_init_read(pgio, req); |
863 | |||
864 | if (pgio->pg_lseg && | ||
865 | test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) { | ||
866 | pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg); | ||
867 | pnfs_set_lo_fail(pgio->pg_lseg); | ||
868 | nfs_pageio_reset_read_mds(pgio); | ||
869 | } | ||
801 | } | 870 | } |
802 | 871 | ||
803 | /* | 872 | /* |
@@ -853,6 +922,14 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | |||
853 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | 922 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); |
854 | 923 | ||
855 | pnfs_generic_pg_init_write(pgio, req, wb_size); | 924 | pnfs_generic_pg_init_write(pgio, req, wb_size); |
925 | |||
926 | if (pgio->pg_lseg && | ||
927 | test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) { | ||
928 | |||
929 | pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg); | ||
930 | pnfs_set_lo_fail(pgio->pg_lseg); | ||
931 | nfs_pageio_reset_write_mds(pgio); | ||
932 | } | ||
856 | } | 933 | } |
857 | 934 | ||
858 | /* | 935 | /* |
@@ -887,6 +964,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { | |||
887 | .name = "LAYOUT_BLOCK_VOLUME", | 964 | .name = "LAYOUT_BLOCK_VOLUME", |
888 | .owner = THIS_MODULE, | 965 | .owner = THIS_MODULE, |
889 | .flags = PNFS_LAYOUTRET_ON_SETATTR | | 966 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
967 | PNFS_LAYOUTRET_ON_ERROR | | ||
890 | PNFS_READ_WHOLE_PAGE, | 968 | PNFS_READ_WHOLE_PAGE, |
891 | .read_pagelist = bl_read_pagelist, | 969 | .read_pagelist = bl_read_pagelist, |
892 | .write_pagelist = bl_write_pagelist, | 970 | .write_pagelist = bl_write_pagelist, |
@@ -910,6 +988,7 @@ static struct pnfs_layoutdriver_type scsilayout_type = { | |||
910 | .name = "LAYOUT_SCSI", | 988 | .name = "LAYOUT_SCSI", |
911 | .owner = THIS_MODULE, | 989 | .owner = THIS_MODULE, |
912 | .flags = PNFS_LAYOUTRET_ON_SETATTR | | 990 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
991 | PNFS_LAYOUTRET_ON_ERROR | | ||
913 | PNFS_READ_WHOLE_PAGE, | 992 | PNFS_READ_WHOLE_PAGE, |
914 | .read_pagelist = bl_read_pagelist, | 993 | .read_pagelist = bl_read_pagelist, |
915 | .write_pagelist = bl_write_pagelist, | 994 | .write_pagelist = bl_write_pagelist, |
@@ -967,6 +1046,7 @@ static void __exit nfs4blocklayout_exit(void) | |||
967 | } | 1046 | } |
968 | 1047 | ||
969 | MODULE_ALIAS("nfs-layouttype4-3"); | 1048 | MODULE_ALIAS("nfs-layouttype4-3"); |
1049 | MODULE_ALIAS("nfs-layouttype4-5"); | ||
970 | 1050 | ||
971 | module_init(nfs4blocklayout_init); | 1051 | module_init(nfs4blocklayout_init); |
972 | module_exit(nfs4blocklayout_exit); | 1052 | module_exit(nfs4blocklayout_exit); |
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index efc007f00742..716bc75e9ed2 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -92,10 +92,9 @@ struct pnfs_block_volume { | |||
92 | }; | 92 | }; |
93 | 93 | ||
94 | struct pnfs_block_dev_map { | 94 | struct pnfs_block_dev_map { |
95 | sector_t start; | 95 | u64 start; |
96 | sector_t len; | 96 | u64 len; |
97 | 97 | u64 disk_offset; | |
98 | sector_t disk_offset; | ||
99 | struct block_device *bdev; | 98 | struct block_device *bdev; |
100 | }; | 99 | }; |
101 | 100 | ||
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 95f74bd2c067..a7efd83779d2 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c | |||
@@ -533,14 +533,11 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |||
533 | goto out_free_volumes; | 533 | goto out_free_volumes; |
534 | 534 | ||
535 | ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); | 535 | ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); |
536 | if (ret) { | ||
537 | bl_free_device(top); | ||
538 | kfree(top); | ||
539 | goto out_free_volumes; | ||
540 | } | ||
541 | 536 | ||
542 | node = &top->node; | 537 | node = &top->node; |
543 | nfs4_init_deviceid_node(node, server, &pdev->dev_id); | 538 | nfs4_init_deviceid_node(node, server, &pdev->dev_id); |
539 | if (ret) | ||
540 | nfs4_mark_deviceid_unavailable(node); | ||
544 | 541 | ||
545 | out_free_volumes: | 542 | out_free_volumes: |
546 | kfree(volumes); | 543 | kfree(volumes); |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d2972d537469..8c10b0562e75 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -775,10 +775,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
775 | 775 | ||
776 | spin_lock(&dreq->lock); | 776 | spin_lock(&dreq->lock); |
777 | 777 | ||
778 | if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { | 778 | if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) |
779 | dreq->flags = 0; | ||
780 | dreq->error = hdr->error; | 779 | dreq->error = hdr->error; |
781 | } | ||
782 | if (dreq->error == 0) { | 780 | if (dreq->error == 0) { |
783 | nfs_direct_good_bytes(dreq, hdr); | 781 | nfs_direct_good_bytes(dreq, hdr); |
784 | if (nfs_write_need_commit(hdr)) { | 782 | if (nfs_write_need_commit(hdr)) { |
diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 83fd09fc8f77..ab5de3246c5c 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c | |||
@@ -48,10 +48,6 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent) | |||
48 | *max_len = len; | 48 | *max_len = len; |
49 | return FILEID_INVALID; | 49 | return FILEID_INVALID; |
50 | } | 50 | } |
51 | if (IS_AUTOMOUNT(inode)) { | ||
52 | *max_len = FILEID_INVALID; | ||
53 | goto out; | ||
54 | } | ||
55 | 51 | ||
56 | p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32; | 52 | p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32; |
57 | p[FILEID_LOW_OFF] = NFS_FILEID(inode); | 53 | p[FILEID_LOW_OFF] = NFS_FILEID(inode); |
@@ -59,7 +55,6 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent) | |||
59 | p[len - 1] = 0; /* Padding */ | 55 | p[len - 1] = 0; /* Padding */ |
60 | nfs_copy_fh(clnt_fh, server_fh); | 56 | nfs_copy_fh(clnt_fh, server_fh); |
61 | *max_len = len; | 57 | *max_len = len; |
62 | out: | ||
63 | dprintk("%s: result fh fileid %llu mode %u size %d\n", | 58 | dprintk("%s: result fh fileid %llu mode %u size %d\n", |
64 | __func__, NFS_FILEID(inode), inode->i_mode, *max_len); | 59 | __func__, NFS_FILEID(inode), inode->i_mode, *max_len); |
65 | return *max_len; | 60 | return *max_len; |
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 4e54d8b5413a..d175724ff566 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -895,9 +895,7 @@ fl_pnfs_update_layout(struct inode *ino, | |||
895 | 895 | ||
896 | lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, | 896 | lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, |
897 | gfp_flags); | 897 | gfp_flags); |
898 | if (!lseg) | 898 | if (IS_ERR_OR_NULL(lseg)) |
899 | lseg = ERR_PTR(-ENOMEM); | ||
900 | if (IS_ERR(lseg)) | ||
901 | goto out; | 899 | goto out; |
902 | 900 | ||
903 | lo = NFS_I(ino)->layout; | 901 | lo = NFS_I(ino)->layout; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 93552c482992..ceeaf0fb6657 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -735,12 +735,20 @@ int nfs_getattr(const struct path *path, struct kstat *stat, | |||
735 | u32 request_mask, unsigned int query_flags) | 735 | u32 request_mask, unsigned int query_flags) |
736 | { | 736 | { |
737 | struct inode *inode = d_inode(path->dentry); | 737 | struct inode *inode = d_inode(path->dentry); |
738 | int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; | 738 | struct nfs_server *server = NFS_SERVER(inode); |
739 | unsigned long cache_validity; | ||
739 | int err = 0; | 740 | int err = 0; |
741 | bool force_sync = query_flags & AT_STATX_FORCE_SYNC; | ||
742 | bool do_update = false; | ||
740 | 743 | ||
741 | trace_nfs_getattr_enter(inode); | 744 | trace_nfs_getattr_enter(inode); |
745 | |||
746 | if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) | ||
747 | goto out_no_update; | ||
748 | |||
742 | /* Flush out writes to the server in order to update c/mtime. */ | 749 | /* Flush out writes to the server in order to update c/mtime. */ |
743 | if (S_ISREG(inode->i_mode)) { | 750 | if ((request_mask & (STATX_CTIME|STATX_MTIME)) && |
751 | S_ISREG(inode->i_mode)) { | ||
744 | err = filemap_write_and_wait(inode->i_mapping); | 752 | err = filemap_write_and_wait(inode->i_mapping); |
745 | if (err) | 753 | if (err) |
746 | goto out; | 754 | goto out; |
@@ -757,24 +765,42 @@ int nfs_getattr(const struct path *path, struct kstat *stat, | |||
757 | */ | 765 | */ |
758 | if ((path->mnt->mnt_flags & MNT_NOATIME) || | 766 | if ((path->mnt->mnt_flags & MNT_NOATIME) || |
759 | ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) | 767 | ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) |
760 | need_atime = 0; | 768 | request_mask &= ~STATX_ATIME; |
761 | 769 | ||
762 | if (need_atime || nfs_need_revalidate_inode(inode)) { | 770 | /* Is the user requesting attributes that might need revalidation? */ |
763 | struct nfs_server *server = NFS_SERVER(inode); | 771 | if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME| |
764 | 772 | STATX_MTIME|STATX_UID|STATX_GID| | |
773 | STATX_SIZE|STATX_BLOCKS))) | ||
774 | goto out_no_revalidate; | ||
775 | |||
776 | /* Check whether the cached attributes are stale */ | ||
777 | do_update |= force_sync || nfs_attribute_cache_expired(inode); | ||
778 | cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); | ||
779 | do_update |= cache_validity & | ||
780 | (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL); | ||
781 | if (request_mask & STATX_ATIME) | ||
782 | do_update |= cache_validity & NFS_INO_INVALID_ATIME; | ||
783 | if (request_mask & (STATX_CTIME|STATX_MTIME)) | ||
784 | do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE; | ||
785 | if (do_update) { | ||
786 | /* Update the attribute cache */ | ||
765 | if (!(server->flags & NFS_MOUNT_NOAC)) | 787 | if (!(server->flags & NFS_MOUNT_NOAC)) |
766 | nfs_readdirplus_parent_cache_miss(path->dentry); | 788 | nfs_readdirplus_parent_cache_miss(path->dentry); |
767 | else | 789 | else |
768 | nfs_readdirplus_parent_cache_hit(path->dentry); | 790 | nfs_readdirplus_parent_cache_hit(path->dentry); |
769 | err = __nfs_revalidate_inode(server, inode); | 791 | err = __nfs_revalidate_inode(server, inode); |
792 | if (err) | ||
793 | goto out; | ||
770 | } else | 794 | } else |
771 | nfs_readdirplus_parent_cache_hit(path->dentry); | 795 | nfs_readdirplus_parent_cache_hit(path->dentry); |
772 | if (!err) { | 796 | out_no_revalidate: |
773 | generic_fillattr(inode, stat); | 797 | /* Only return attributes that were revalidated. */ |
774 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); | 798 | stat->result_mask &= request_mask; |
775 | if (S_ISDIR(inode->i_mode)) | 799 | out_no_update: |
776 | stat->blksize = NFS_SERVER(inode)->dtsize; | 800 | generic_fillattr(inode, stat); |
777 | } | 801 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); |
802 | if (S_ISDIR(inode->i_mode)) | ||
803 | stat->blksize = NFS_SERVER(inode)->dtsize; | ||
778 | out: | 804 | out: |
779 | trace_nfs_getattr_exit(inode, err); | 805 | trace_nfs_getattr_exit(inode, err); |
780 | return err; | 806 | return err; |
@@ -1144,7 +1170,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map | |||
1144 | 1170 | ||
1145 | if (mapping->nrpages != 0) { | 1171 | if (mapping->nrpages != 0) { |
1146 | if (S_ISREG(inode->i_mode)) { | 1172 | if (S_ISREG(inode->i_mode)) { |
1147 | unmap_mapping_range(mapping, 0, 0, 0); | ||
1148 | ret = nfs_sync_mapping(mapping); | 1173 | ret = nfs_sync_mapping(mapping); |
1149 | if (ret < 0) | 1174 | if (ret < 0) |
1150 | return ret; | 1175 | return ret; |
diff --git a/fs/nfs/io.c b/fs/nfs/io.c index 20fef85d2bb1..9034b4926909 100644 --- a/fs/nfs/io.c +++ b/fs/nfs/io.c | |||
@@ -99,7 +99,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) | |||
99 | { | 99 | { |
100 | if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { | 100 | if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { |
101 | set_bit(NFS_INO_ODIRECT, &nfsi->flags); | 101 | set_bit(NFS_INO_ODIRECT, &nfsi->flags); |
102 | nfs_wb_all(inode); | 102 | nfs_sync_mapping(inode->i_mapping); |
103 | } | 103 | } |
104 | } | 104 | } |
105 | 105 | ||
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 65a7e5da508c..04612c24d394 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -861,6 +861,7 @@ static int nfs4_set_client(struct nfs_server *server, | |||
861 | set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); | 861 | set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); |
862 | if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status)) | 862 | if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status)) |
863 | set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); | 863 | set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); |
864 | server->port = rpc_get_port(addr); | ||
864 | 865 | ||
865 | /* Allocate or find a client reference we can use */ | 866 | /* Allocate or find a client reference we can use */ |
866 | clp = nfs_get_client(&cl_init); | 867 | clp = nfs_get_client(&cl_init); |
@@ -1123,19 +1124,36 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, | |||
1123 | /* Initialise the client representation from the parent server */ | 1124 | /* Initialise the client representation from the parent server */ |
1124 | nfs_server_copy_userdata(server, parent_server); | 1125 | nfs_server_copy_userdata(server, parent_server); |
1125 | 1126 | ||
1126 | /* Get a client representation. | 1127 | /* Get a client representation */ |
1127 | * Note: NFSv4 always uses TCP, */ | 1128 | #ifdef CONFIG_SUNRPC_XPRT_RDMA |
1129 | rpc_set_port(data->addr, NFS_RDMA_PORT); | ||
1128 | error = nfs4_set_client(server, data->hostname, | 1130 | error = nfs4_set_client(server, data->hostname, |
1129 | data->addr, | 1131 | data->addr, |
1130 | data->addrlen, | 1132 | data->addrlen, |
1131 | parent_client->cl_ipaddr, | 1133 | parent_client->cl_ipaddr, |
1132 | rpc_protocol(parent_server->client), | 1134 | XPRT_TRANSPORT_RDMA, |
1135 | parent_server->client->cl_timeout, | ||
1136 | parent_client->cl_mvops->minor_version, | ||
1137 | parent_client->cl_net); | ||
1138 | if (!error) | ||
1139 | goto init_server; | ||
1140 | #endif /* CONFIG_SUNRPC_XPRT_RDMA */ | ||
1141 | |||
1142 | rpc_set_port(data->addr, NFS_PORT); | ||
1143 | error = nfs4_set_client(server, data->hostname, | ||
1144 | data->addr, | ||
1145 | data->addrlen, | ||
1146 | parent_client->cl_ipaddr, | ||
1147 | XPRT_TRANSPORT_TCP, | ||
1133 | parent_server->client->cl_timeout, | 1148 | parent_server->client->cl_timeout, |
1134 | parent_client->cl_mvops->minor_version, | 1149 | parent_client->cl_mvops->minor_version, |
1135 | parent_client->cl_net); | 1150 | parent_client->cl_net); |
1136 | if (error < 0) | 1151 | if (error < 0) |
1137 | goto error; | 1152 | goto error; |
1138 | 1153 | ||
1154 | #ifdef CONFIG_SUNRPC_XPRT_RDMA | ||
1155 | init_server: | ||
1156 | #endif | ||
1139 | error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); | 1157 | error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); |
1140 | if (error < 0) | 1158 | if (error < 0) |
1141 | goto error; | 1159 | goto error; |
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 30426c1a1bbd..22dc30a679a0 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c | |||
@@ -568,9 +568,13 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, | |||
568 | struct idmap_msg *im; | 568 | struct idmap_msg *im; |
569 | struct idmap *idmap = (struct idmap *)aux; | 569 | struct idmap *idmap = (struct idmap *)aux; |
570 | struct key *key = cons->key; | 570 | struct key *key = cons->key; |
571 | int ret = -ENOMEM; | 571 | int ret = -ENOKEY; |
572 | |||
573 | if (!aux) | ||
574 | goto out1; | ||
572 | 575 | ||
573 | /* msg and im are freed in idmap_pipe_destroy_msg */ | 576 | /* msg and im are freed in idmap_pipe_destroy_msg */ |
577 | ret = -ENOMEM; | ||
574 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 578 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
575 | if (!data) | 579 | if (!data) |
576 | goto out1; | 580 | goto out1; |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 8c3f327d858d..24f06dcc2b08 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
@@ -270,8 +270,6 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, | |||
270 | if (mountdata->addrlen == 0) | 270 | if (mountdata->addrlen == 0) |
271 | continue; | 271 | continue; |
272 | 272 | ||
273 | rpc_set_port(mountdata->addr, NFS_PORT); | ||
274 | |||
275 | memcpy(page2, buf->data, buf->len); | 273 | memcpy(page2, buf->data, buf->len); |
276 | page2[buf->len] = '\0'; | 274 | page2[buf->len] = '\0'; |
277 | mountdata->hostname = page2; | 275 | mountdata->hostname = page2; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 17a03f2c4330..47f3c273245e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2020,7 +2020,7 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta | |||
2020 | return ret; | 2020 | return ret; |
2021 | } | 2021 | } |
2022 | 2022 | ||
2023 | static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err) | 2023 | static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, struct file_lock *fl, int err) |
2024 | { | 2024 | { |
2025 | switch (err) { | 2025 | switch (err) { |
2026 | default: | 2026 | default: |
@@ -2067,7 +2067,11 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct | |||
2067 | return -EAGAIN; | 2067 | return -EAGAIN; |
2068 | case -ENOMEM: | 2068 | case -ENOMEM: |
2069 | case -NFS4ERR_DENIED: | 2069 | case -NFS4ERR_DENIED: |
2070 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ | 2070 | if (fl) { |
2071 | struct nfs4_lock_state *lsp = fl->fl_u.nfs4_fl.owner; | ||
2072 | if (lsp) | ||
2073 | set_bit(NFS_LOCK_LOST, &lsp->ls_flags); | ||
2074 | } | ||
2071 | return 0; | 2075 | return 0; |
2072 | } | 2076 | } |
2073 | return err; | 2077 | return err; |
@@ -2103,7 +2107,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, | |||
2103 | err = nfs4_open_recover_helper(opendata, FMODE_READ); | 2107 | err = nfs4_open_recover_helper(opendata, FMODE_READ); |
2104 | } | 2108 | } |
2105 | nfs4_opendata_put(opendata); | 2109 | nfs4_opendata_put(opendata); |
2106 | return nfs4_handle_delegation_recall_error(server, state, stateid, err); | 2110 | return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err); |
2107 | } | 2111 | } |
2108 | 2112 | ||
2109 | static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) | 2113 | static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) |
@@ -3150,6 +3154,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
3150 | struct nfs4_state *state = calldata->state; | 3154 | struct nfs4_state *state = calldata->state; |
3151 | struct nfs_server *server = NFS_SERVER(calldata->inode); | 3155 | struct nfs_server *server = NFS_SERVER(calldata->inode); |
3152 | nfs4_stateid *res_stateid = NULL; | 3156 | nfs4_stateid *res_stateid = NULL; |
3157 | struct nfs4_exception exception = { | ||
3158 | .state = state, | ||
3159 | .inode = calldata->inode, | ||
3160 | .stateid = &calldata->arg.stateid, | ||
3161 | }; | ||
3153 | 3162 | ||
3154 | dprintk("%s: begin!\n", __func__); | 3163 | dprintk("%s: begin!\n", __func__); |
3155 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) | 3164 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) |
@@ -3215,7 +3224,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
3215 | case -NFS4ERR_BAD_STATEID: | 3224 | case -NFS4ERR_BAD_STATEID: |
3216 | break; | 3225 | break; |
3217 | default: | 3226 | default: |
3218 | if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) | 3227 | task->tk_status = nfs4_async_handle_exception(task, |
3228 | server, task->tk_status, &exception); | ||
3229 | if (exception.retry) | ||
3219 | goto out_restart; | 3230 | goto out_restart; |
3220 | } | 3231 | } |
3221 | nfs_clear_open_stateid(state, &calldata->arg.stateid, | 3232 | nfs_clear_open_stateid(state, &calldata->arg.stateid, |
@@ -5759,6 +5770,10 @@ struct nfs4_delegreturndata { | |||
5759 | static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | 5770 | static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) |
5760 | { | 5771 | { |
5761 | struct nfs4_delegreturndata *data = calldata; | 5772 | struct nfs4_delegreturndata *data = calldata; |
5773 | struct nfs4_exception exception = { | ||
5774 | .inode = data->inode, | ||
5775 | .stateid = &data->stateid, | ||
5776 | }; | ||
5762 | 5777 | ||
5763 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | 5778 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
5764 | return; | 5779 | return; |
@@ -5820,10 +5835,11 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
5820 | } | 5835 | } |
5821 | /* Fallthrough */ | 5836 | /* Fallthrough */ |
5822 | default: | 5837 | default: |
5823 | if (nfs4_async_handle_error(task, data->res.server, | 5838 | task->tk_status = nfs4_async_handle_exception(task, |
5824 | NULL, NULL) == -EAGAIN) { | 5839 | data->res.server, task->tk_status, |
5840 | &exception); | ||
5841 | if (exception.retry) | ||
5825 | goto out_restart; | 5842 | goto out_restart; |
5826 | } | ||
5827 | } | 5843 | } |
5828 | data->rpc_status = task->tk_status; | 5844 | data->rpc_status = task->tk_status; |
5829 | return; | 5845 | return; |
@@ -6061,6 +6077,10 @@ static void nfs4_locku_release_calldata(void *data) | |||
6061 | static void nfs4_locku_done(struct rpc_task *task, void *data) | 6077 | static void nfs4_locku_done(struct rpc_task *task, void *data) |
6062 | { | 6078 | { |
6063 | struct nfs4_unlockdata *calldata = data; | 6079 | struct nfs4_unlockdata *calldata = data; |
6080 | struct nfs4_exception exception = { | ||
6081 | .inode = calldata->lsp->ls_state->inode, | ||
6082 | .stateid = &calldata->arg.stateid, | ||
6083 | }; | ||
6064 | 6084 | ||
6065 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) | 6085 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) |
6066 | return; | 6086 | return; |
@@ -6084,8 +6104,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
6084 | rpc_restart_call_prepare(task); | 6104 | rpc_restart_call_prepare(task); |
6085 | break; | 6105 | break; |
6086 | default: | 6106 | default: |
6087 | if (nfs4_async_handle_error(task, calldata->server, | 6107 | task->tk_status = nfs4_async_handle_exception(task, |
6088 | NULL, NULL) == -EAGAIN) | 6108 | calldata->server, task->tk_status, |
6109 | &exception); | ||
6110 | if (exception.retry) | ||
6089 | rpc_restart_call_prepare(task); | 6111 | rpc_restart_call_prepare(task); |
6090 | } | 6112 | } |
6091 | nfs_release_seqid(calldata->arg.seqid); | 6113 | nfs_release_seqid(calldata->arg.seqid); |
@@ -6741,7 +6763,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, | |||
6741 | if (err != 0) | 6763 | if (err != 0) |
6742 | return err; | 6764 | return err; |
6743 | err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); | 6765 | err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); |
6744 | return nfs4_handle_delegation_recall_error(server, state, stateid, err); | 6766 | return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err); |
6745 | } | 6767 | } |
6746 | 6768 | ||
6747 | struct nfs_release_lockowner_data { | 6769 | struct nfs_release_lockowner_data { |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e4f4a09ed9f4..91a4d4eeb235 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1482,6 +1482,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
1482 | struct inode *inode = state->inode; | 1482 | struct inode *inode = state->inode; |
1483 | struct nfs_inode *nfsi = NFS_I(inode); | 1483 | struct nfs_inode *nfsi = NFS_I(inode); |
1484 | struct file_lock *fl; | 1484 | struct file_lock *fl; |
1485 | struct nfs4_lock_state *lsp; | ||
1485 | int status = 0; | 1486 | int status = 0; |
1486 | struct file_lock_context *flctx = inode->i_flctx; | 1487 | struct file_lock_context *flctx = inode->i_flctx; |
1487 | struct list_head *list; | 1488 | struct list_head *list; |
@@ -1522,7 +1523,9 @@ restart: | |||
1522 | case -NFS4ERR_DENIED: | 1523 | case -NFS4ERR_DENIED: |
1523 | case -NFS4ERR_RECLAIM_BAD: | 1524 | case -NFS4ERR_RECLAIM_BAD: |
1524 | case -NFS4ERR_RECLAIM_CONFLICT: | 1525 | case -NFS4ERR_RECLAIM_CONFLICT: |
1525 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ | 1526 | lsp = fl->fl_u.nfs4_fl.owner; |
1527 | if (lsp) | ||
1528 | set_bit(NFS_LOCK_LOST, &lsp->ls_flags); | ||
1526 | status = 0; | 1529 | status = 0; |
1527 | } | 1530 | } |
1528 | spin_lock(&flctx->flc_lock); | 1531 | spin_lock(&flctx->flc_lock); |
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index 0d91d84e5822..c394e4447100 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c | |||
@@ -32,7 +32,7 @@ static struct ctl_table nfs4_cb_sysctls[] = { | |||
32 | .data = &nfs_idmap_cache_timeout, | 32 | .data = &nfs_idmap_cache_timeout, |
33 | .maxlen = sizeof(int), | 33 | .maxlen = sizeof(int), |
34 | .mode = 0644, | 34 | .mode = 0644, |
35 | .proc_handler = proc_dointvec_jiffies, | 35 | .proc_handler = proc_dointvec, |
36 | }, | 36 | }, |
37 | { } | 37 | { } |
38 | }; | 38 | }; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 77c6729e57f0..65c9c4175145 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -7678,6 +7678,22 @@ nfs4_stat_to_errno(int stat) | |||
7678 | .p_name = #proc, \ | 7678 | .p_name = #proc, \ |
7679 | } | 7679 | } |
7680 | 7680 | ||
7681 | #if defined(CONFIG_NFS_V4_1) | ||
7682 | #define PROC41(proc, argtype, restype) \ | ||
7683 | PROC(proc, argtype, restype) | ||
7684 | #else | ||
7685 | #define PROC41(proc, argtype, restype) \ | ||
7686 | STUB(proc) | ||
7687 | #endif | ||
7688 | |||
7689 | #if defined(CONFIG_NFS_V4_2) | ||
7690 | #define PROC42(proc, argtype, restype) \ | ||
7691 | PROC(proc, argtype, restype) | ||
7692 | #else | ||
7693 | #define PROC42(proc, argtype, restype) \ | ||
7694 | STUB(proc) | ||
7695 | #endif | ||
7696 | |||
7681 | const struct rpc_procinfo nfs4_procedures[] = { | 7697 | const struct rpc_procinfo nfs4_procedures[] = { |
7682 | PROC(READ, enc_read, dec_read), | 7698 | PROC(READ, enc_read, dec_read), |
7683 | PROC(WRITE, enc_write, dec_write), | 7699 | PROC(WRITE, enc_write, dec_write), |
@@ -7698,7 +7714,6 @@ const struct rpc_procinfo nfs4_procedures[] = { | |||
7698 | PROC(ACCESS, enc_access, dec_access), | 7714 | PROC(ACCESS, enc_access, dec_access), |
7699 | PROC(GETATTR, enc_getattr, dec_getattr), | 7715 | PROC(GETATTR, enc_getattr, dec_getattr), |
7700 | PROC(LOOKUP, enc_lookup, dec_lookup), | 7716 | PROC(LOOKUP, enc_lookup, dec_lookup), |
7701 | PROC(LOOKUPP, enc_lookupp, dec_lookupp), | ||
7702 | PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), | 7717 | PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), |
7703 | PROC(REMOVE, enc_remove, dec_remove), | 7718 | PROC(REMOVE, enc_remove, dec_remove), |
7704 | PROC(RENAME, enc_rename, dec_rename), | 7719 | PROC(RENAME, enc_rename, dec_rename), |
@@ -7717,33 +7732,30 @@ const struct rpc_procinfo nfs4_procedures[] = { | |||
7717 | PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), | 7732 | PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), |
7718 | PROC(SECINFO, enc_secinfo, dec_secinfo), | 7733 | PROC(SECINFO, enc_secinfo, dec_secinfo), |
7719 | PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present), | 7734 | PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present), |
7720 | #if defined(CONFIG_NFS_V4_1) | 7735 | PROC41(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), |
7721 | PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), | 7736 | PROC41(CREATE_SESSION, enc_create_session, dec_create_session), |
7722 | PROC(CREATE_SESSION, enc_create_session, dec_create_session), | 7737 | PROC41(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), |
7723 | PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), | 7738 | PROC41(SEQUENCE, enc_sequence, dec_sequence), |
7724 | PROC(SEQUENCE, enc_sequence, dec_sequence), | 7739 | PROC41(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), |
7725 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), | 7740 | PROC41(RECLAIM_COMPLETE,enc_reclaim_complete, dec_reclaim_complete), |
7726 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), | 7741 | PROC41(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), |
7727 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | 7742 | PROC41(LAYOUTGET, enc_layoutget, dec_layoutget), |
7728 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | 7743 | PROC41(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), |
7729 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), | 7744 | PROC41(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), |
7730 | PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), | 7745 | PROC41(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), |
7731 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), | 7746 | PROC41(TEST_STATEID, enc_test_stateid, dec_test_stateid), |
7732 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), | 7747 | PROC41(FREE_STATEID, enc_free_stateid, dec_free_stateid), |
7733 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), | ||
7734 | STUB(GETDEVICELIST), | 7748 | STUB(GETDEVICELIST), |
7735 | PROC(BIND_CONN_TO_SESSION, | 7749 | PROC41(BIND_CONN_TO_SESSION, |
7736 | enc_bind_conn_to_session, dec_bind_conn_to_session), | 7750 | enc_bind_conn_to_session, dec_bind_conn_to_session), |
7737 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), | 7751 | PROC41(DESTROY_CLIENTID,enc_destroy_clientid, dec_destroy_clientid), |
7738 | #endif /* CONFIG_NFS_V4_1 */ | 7752 | PROC42(SEEK, enc_seek, dec_seek), |
7739 | #ifdef CONFIG_NFS_V4_2 | 7753 | PROC42(ALLOCATE, enc_allocate, dec_allocate), |
7740 | PROC(SEEK, enc_seek, dec_seek), | 7754 | PROC42(DEALLOCATE, enc_deallocate, dec_deallocate), |
7741 | PROC(ALLOCATE, enc_allocate, dec_allocate), | 7755 | PROC42(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), |
7742 | PROC(DEALLOCATE, enc_deallocate, dec_deallocate), | 7756 | PROC42(CLONE, enc_clone, dec_clone), |
7743 | PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), | 7757 | PROC42(COPY, enc_copy, dec_copy), |
7744 | PROC(CLONE, enc_clone, dec_clone), | 7758 | PROC(LOOKUPP, enc_lookupp, dec_lookupp), |
7745 | PROC(COPY, enc_copy, dec_copy), | ||
7746 | #endif /* CONFIG_NFS_V4_2 */ | ||
7747 | }; | 7759 | }; |
7748 | 7760 | ||
7749 | static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; | 7761 | static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; |
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 610d89d8942e..bd60f8d1e181 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h | |||
@@ -797,15 +797,15 @@ TRACE_EVENT(nfs_readpage_done, | |||
797 | ) | 797 | ) |
798 | ); | 798 | ); |
799 | 799 | ||
800 | /* | 800 | TRACE_DEFINE_ENUM(NFS_UNSTABLE); |
801 | * XXX: I tried using NFS_UNSTABLE and friends in this table, but they | 801 | TRACE_DEFINE_ENUM(NFS_DATA_SYNC); |
802 | * all evaluate to 0 for some reason, even if I include linux/nfs.h. | 802 | TRACE_DEFINE_ENUM(NFS_FILE_SYNC); |
803 | */ | 803 | |
804 | #define nfs_show_stable(stable) \ | 804 | #define nfs_show_stable(stable) \ |
805 | __print_symbolic(stable, \ | 805 | __print_symbolic(stable, \ |
806 | { 0, " (UNSTABLE)" }, \ | 806 | { NFS_UNSTABLE, "UNSTABLE" }, \ |
807 | { 1, " (DATA_SYNC)" }, \ | 807 | { NFS_DATA_SYNC, "DATA_SYNC" }, \ |
808 | { 2, " (FILE_SYNC)" }) | 808 | { NFS_FILE_SYNC, "FILE_SYNC" }) |
809 | 809 | ||
810 | TRACE_EVENT(nfs_initiate_write, | 810 | TRACE_EVENT(nfs_initiate_write, |
811 | TP_PROTO( | 811 | TP_PROTO( |
@@ -838,12 +838,12 @@ TRACE_EVENT(nfs_initiate_write, | |||
838 | 838 | ||
839 | TP_printk( | 839 | TP_printk( |
840 | "fileid=%02x:%02x:%llu fhandle=0x%08x " | 840 | "fileid=%02x:%02x:%llu fhandle=0x%08x " |
841 | "offset=%lld count=%lu stable=%d%s", | 841 | "offset=%lld count=%lu stable=%s", |
842 | MAJOR(__entry->dev), MINOR(__entry->dev), | 842 | MAJOR(__entry->dev), MINOR(__entry->dev), |
843 | (unsigned long long)__entry->fileid, | 843 | (unsigned long long)__entry->fileid, |
844 | __entry->fhandle, | 844 | __entry->fhandle, |
845 | __entry->offset, __entry->count, | 845 | __entry->offset, __entry->count, |
846 | __entry->stable, nfs_show_stable(__entry->stable) | 846 | nfs_show_stable(__entry->stable) |
847 | ) | 847 | ) |
848 | ); | 848 | ); |
849 | 849 | ||
@@ -882,13 +882,13 @@ TRACE_EVENT(nfs_writeback_done, | |||
882 | 882 | ||
883 | TP_printk( | 883 | TP_printk( |
884 | "fileid=%02x:%02x:%llu fhandle=0x%08x " | 884 | "fileid=%02x:%02x:%llu fhandle=0x%08x " |
885 | "offset=%lld status=%d stable=%d%s " | 885 | "offset=%lld status=%d stable=%s " |
886 | "verifier 0x%016llx", | 886 | "verifier 0x%016llx", |
887 | MAJOR(__entry->dev), MINOR(__entry->dev), | 887 | MAJOR(__entry->dev), MINOR(__entry->dev), |
888 | (unsigned long long)__entry->fileid, | 888 | (unsigned long long)__entry->fileid, |
889 | __entry->fhandle, | 889 | __entry->fhandle, |
890 | __entry->offset, __entry->status, | 890 | __entry->offset, __entry->status, |
891 | __entry->stable, nfs_show_stable(__entry->stable), | 891 | nfs_show_stable(__entry->stable), |
892 | __entry->verifier | 892 | __entry->verifier |
893 | ) | 893 | ) |
894 | ); | 894 | ); |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d0543e19098a..18a7626ac638 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -537,7 +537,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); | |||
537 | * @cinfo: Commit information for the call (writes only) | 537 | * @cinfo: Commit information for the call (writes only) |
538 | */ | 538 | */ |
539 | static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, | 539 | static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, |
540 | unsigned int count, unsigned int offset, | 540 | unsigned int count, |
541 | int how, struct nfs_commit_info *cinfo) | 541 | int how, struct nfs_commit_info *cinfo) |
542 | { | 542 | { |
543 | struct nfs_page *req = hdr->req; | 543 | struct nfs_page *req = hdr->req; |
@@ -546,10 +546,10 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, | |||
546 | * NB: take care not to mess about with hdr->commit et al. */ | 546 | * NB: take care not to mess about with hdr->commit et al. */ |
547 | 547 | ||
548 | hdr->args.fh = NFS_FH(hdr->inode); | 548 | hdr->args.fh = NFS_FH(hdr->inode); |
549 | hdr->args.offset = req_offset(req) + offset; | 549 | hdr->args.offset = req_offset(req); |
550 | /* pnfs_set_layoutcommit needs this */ | 550 | /* pnfs_set_layoutcommit needs this */ |
551 | hdr->mds_offset = hdr->args.offset; | 551 | hdr->mds_offset = hdr->args.offset; |
552 | hdr->args.pgbase = req->wb_pgbase + offset; | 552 | hdr->args.pgbase = req->wb_pgbase; |
553 | hdr->args.pages = hdr->page_array.pagevec; | 553 | hdr->args.pages = hdr->page_array.pagevec; |
554 | hdr->args.count = count; | 554 | hdr->args.count = count; |
555 | hdr->args.context = get_nfs_open_context(req->wb_context); | 555 | hdr->args.context = get_nfs_open_context(req->wb_context); |
@@ -789,7 +789,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | |||
789 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 789 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; |
790 | 790 | ||
791 | /* Set up the argument struct */ | 791 | /* Set up the argument struct */ |
792 | nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); | 792 | nfs_pgio_rpcsetup(hdr, mirror->pg_count, desc->pg_ioflags, &cinfo); |
793 | desc->pg_rpc_callops = &nfs_pgio_common_ops; | 793 | desc->pg_rpc_callops = &nfs_pgio_common_ops; |
794 | return 0; | 794 | return 0; |
795 | } | 795 | } |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d602fe9e1ac8..c13e826614b5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -655,7 +655,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | |||
655 | return 0; | 655 | return 0; |
656 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 656 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) |
657 | if (pnfs_match_lseg_recall(lseg, recall_range, seq)) { | 657 | if (pnfs_match_lseg_recall(lseg, recall_range, seq)) { |
658 | dprintk("%s: freeing lseg %p iomode %d seq %u" | 658 | dprintk("%s: freeing lseg %p iomode %d seq %u " |
659 | "offset %llu length %llu\n", __func__, | 659 | "offset %llu length %llu\n", __func__, |
660 | lseg, lseg->pls_range.iomode, lseg->pls_seq, | 660 | lseg, lseg->pls_range.iomode, lseg->pls_seq, |
661 | lseg->pls_range.offset, lseg->pls_range.length); | 661 | lseg->pls_range.offset, lseg->pls_range.length); |
@@ -2255,7 +2255,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | |||
2255 | nfs_pageio_reset_write_mds(desc); | 2255 | nfs_pageio_reset_write_mds(desc); |
2256 | mirror->pg_recoalesce = 1; | 2256 | mirror->pg_recoalesce = 1; |
2257 | } | 2257 | } |
2258 | hdr->release(hdr); | 2258 | hdr->completion_ops->completion(hdr); |
2259 | } | 2259 | } |
2260 | 2260 | ||
2261 | static enum pnfs_try_status | 2261 | static enum pnfs_try_status |
@@ -2378,7 +2378,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | |||
2378 | nfs_pageio_reset_read_mds(desc); | 2378 | nfs_pageio_reset_read_mds(desc); |
2379 | mirror->pg_recoalesce = 1; | 2379 | mirror->pg_recoalesce = 1; |
2380 | } | 2380 | } |
2381 | hdr->release(hdr); | 2381 | hdr->completion_ops->completion(hdr); |
2382 | } | 2382 | } |
2383 | 2383 | ||
2384 | /* | 2384 | /* |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8d507c361d98..daf6cbf5c15f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -40,6 +40,7 @@ enum { | |||
40 | NFS_LSEG_ROC, /* roc bit received from server */ | 40 | NFS_LSEG_ROC, /* roc bit received from server */ |
41 | NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ | 41 | NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ |
42 | NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */ | 42 | NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */ |
43 | NFS_LSEG_UNAVAILABLE, /* unavailable bit set for temporary problem */ | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | /* Individual ip address */ | 46 | /* Individual ip address */ |
@@ -86,6 +87,7 @@ enum pnfs_try_status { | |||
86 | */ | 87 | */ |
87 | #define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ | 88 | #define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ |
88 | #define NFS4_DEF_DS_RETRANS 5 | 89 | #define NFS4_DEF_DS_RETRANS 5 |
90 | #define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ) | ||
89 | 91 | ||
90 | /* error codes for internal use */ | 92 | /* error codes for internal use */ |
91 | #define NFS4ERR_RESET_TO_MDS 12001 | 93 | #define NFS4ERR_RESET_TO_MDS 12001 |
@@ -524,8 +526,10 @@ static inline int pnfs_return_layout(struct inode *ino) | |||
524 | struct nfs_inode *nfsi = NFS_I(ino); | 526 | struct nfs_inode *nfsi = NFS_I(ino); |
525 | struct nfs_server *nfss = NFS_SERVER(ino); | 527 | struct nfs_server *nfss = NFS_SERVER(ino); |
526 | 528 | ||
527 | if (pnfs_enabled_sb(nfss) && nfsi->layout) | 529 | if (pnfs_enabled_sb(nfss) && nfsi->layout) { |
530 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, &nfsi->layout->plh_flags); | ||
528 | return _pnfs_return_layout(ino); | 531 | return _pnfs_return_layout(ino); |
532 | } | ||
529 | 533 | ||
530 | return 0; | 534 | return 0; |
531 | } | 535 | } |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 2961fcd7a2df..e8a07b3f9aaa 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) | 43 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) |
44 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) | 44 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) |
45 | 45 | ||
46 | #define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ) | ||
47 | 46 | ||
48 | static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; | 47 | static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; |
49 | static DEFINE_SPINLOCK(nfs4_deviceid_lock); | 48 | static DEFINE_SPINLOCK(nfs4_deviceid_lock); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 12b2d477836b..7428a669d7a7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1835,6 +1835,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1835 | set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); | 1835 | set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); |
1836 | next: | 1836 | next: |
1837 | nfs_unlock_and_release_request(req); | 1837 | nfs_unlock_and_release_request(req); |
1838 | /* Latency breaker */ | ||
1839 | cond_resched(); | ||
1838 | } | 1840 | } |
1839 | nfss = NFS_SERVER(data->inode); | 1841 | nfss = NFS_SERVER(data->inode); |
1840 | if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) | 1842 | if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) |
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index d7d313fb9cd4..4fd95dbeb52f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <net/ipv6.h> | 17 | #include <net/ipv6.h> |
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/kref.h> | 19 | #include <linux/kref.h> |
20 | #include <linux/refcount.h> | ||
20 | #include <linux/utsname.h> | 21 | #include <linux/utsname.h> |
21 | #include <linux/lockd/bind.h> | 22 | #include <linux/lockd/bind.h> |
22 | #include <linux/lockd/xdr.h> | 23 | #include <linux/lockd/xdr.h> |
@@ -58,7 +59,7 @@ struct nlm_host { | |||
58 | u32 h_state; /* pseudo-state counter */ | 59 | u32 h_state; /* pseudo-state counter */ |
59 | u32 h_nsmstate; /* true remote NSM state */ | 60 | u32 h_nsmstate; /* true remote NSM state */ |
60 | u32 h_pidcount; /* Pseudopids */ | 61 | u32 h_pidcount; /* Pseudopids */ |
61 | atomic_t h_count; /* reference count */ | 62 | refcount_t h_count; /* reference count */ |
62 | struct mutex h_mutex; /* mutex for pmap binding */ | 63 | struct mutex h_mutex; /* mutex for pmap binding */ |
63 | unsigned long h_nextrebind; /* next portmap call */ | 64 | unsigned long h_nextrebind; /* next portmap call */ |
64 | unsigned long h_expires; /* eligible for GC */ | 65 | unsigned long h_expires; /* eligible for GC */ |
@@ -83,7 +84,7 @@ struct nlm_host { | |||
83 | 84 | ||
84 | struct nsm_handle { | 85 | struct nsm_handle { |
85 | struct list_head sm_link; | 86 | struct list_head sm_link; |
86 | atomic_t sm_count; | 87 | refcount_t sm_count; |
87 | char *sm_mon_name; | 88 | char *sm_mon_name; |
88 | char *sm_name; | 89 | char *sm_name; |
89 | struct sockaddr_storage sm_addr; | 90 | struct sockaddr_storage sm_addr; |
@@ -122,7 +123,7 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) | |||
122 | */ | 123 | */ |
123 | struct nlm_lockowner { | 124 | struct nlm_lockowner { |
124 | struct list_head list; | 125 | struct list_head list; |
125 | atomic_t count; | 126 | refcount_t count; |
126 | 127 | ||
127 | struct nlm_host *host; | 128 | struct nlm_host *host; |
128 | fl_owner_t owner; | 129 | fl_owner_t owner; |
@@ -136,7 +137,7 @@ struct nlm_wait; | |||
136 | */ | 137 | */ |
137 | #define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u) | 138 | #define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u) |
138 | struct nlm_rqst { | 139 | struct nlm_rqst { |
139 | atomic_t a_count; | 140 | refcount_t a_count; |
140 | unsigned int a_flags; /* initial RPC task flags */ | 141 | unsigned int a_flags; /* initial RPC task flags */ |
141 | struct nlm_host * a_host; /* host handle */ | 142 | struct nlm_host * a_host; /* host handle */ |
142 | struct nlm_args a_args; /* arguments */ | 143 | struct nlm_args a_args; /* arguments */ |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 47adac640191..57ffaa20d564 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
@@ -457,7 +457,12 @@ enum lock_type4 { | |||
457 | 457 | ||
458 | #define NFS4_DEBUG 1 | 458 | #define NFS4_DEBUG 1 |
459 | 459 | ||
460 | /* Index of predefined Linux client operations */ | 460 | /* |
461 | * Index of predefined Linux client operations | ||
462 | * | ||
463 | * To ensure that /proc/net/rpc/nfs remains correctly ordered, please | ||
464 | * append only to this enum when adding new client operations. | ||
465 | */ | ||
461 | 466 | ||
462 | enum { | 467 | enum { |
463 | NFSPROC4_CLNT_NULL = 0, /* Unused */ | 468 | NFSPROC4_CLNT_NULL = 0, /* Unused */ |
@@ -480,7 +485,6 @@ enum { | |||
480 | NFSPROC4_CLNT_ACCESS, | 485 | NFSPROC4_CLNT_ACCESS, |
481 | NFSPROC4_CLNT_GETATTR, | 486 | NFSPROC4_CLNT_GETATTR, |
482 | NFSPROC4_CLNT_LOOKUP, | 487 | NFSPROC4_CLNT_LOOKUP, |
483 | NFSPROC4_CLNT_LOOKUPP, | ||
484 | NFSPROC4_CLNT_LOOKUP_ROOT, | 488 | NFSPROC4_CLNT_LOOKUP_ROOT, |
485 | NFSPROC4_CLNT_REMOVE, | 489 | NFSPROC4_CLNT_REMOVE, |
486 | NFSPROC4_CLNT_RENAME, | 490 | NFSPROC4_CLNT_RENAME, |
@@ -500,7 +504,6 @@ enum { | |||
500 | NFSPROC4_CLNT_SECINFO, | 504 | NFSPROC4_CLNT_SECINFO, |
501 | NFSPROC4_CLNT_FSID_PRESENT, | 505 | NFSPROC4_CLNT_FSID_PRESENT, |
502 | 506 | ||
503 | /* nfs41 */ | ||
504 | NFSPROC4_CLNT_EXCHANGE_ID, | 507 | NFSPROC4_CLNT_EXCHANGE_ID, |
505 | NFSPROC4_CLNT_CREATE_SESSION, | 508 | NFSPROC4_CLNT_CREATE_SESSION, |
506 | NFSPROC4_CLNT_DESTROY_SESSION, | 509 | NFSPROC4_CLNT_DESTROY_SESSION, |
@@ -518,13 +521,14 @@ enum { | |||
518 | NFSPROC4_CLNT_BIND_CONN_TO_SESSION, | 521 | NFSPROC4_CLNT_BIND_CONN_TO_SESSION, |
519 | NFSPROC4_CLNT_DESTROY_CLIENTID, | 522 | NFSPROC4_CLNT_DESTROY_CLIENTID, |
520 | 523 | ||
521 | /* nfs42 */ | ||
522 | NFSPROC4_CLNT_SEEK, | 524 | NFSPROC4_CLNT_SEEK, |
523 | NFSPROC4_CLNT_ALLOCATE, | 525 | NFSPROC4_CLNT_ALLOCATE, |
524 | NFSPROC4_CLNT_DEALLOCATE, | 526 | NFSPROC4_CLNT_DEALLOCATE, |
525 | NFSPROC4_CLNT_LAYOUTSTATS, | 527 | NFSPROC4_CLNT_LAYOUTSTATS, |
526 | NFSPROC4_CLNT_CLONE, | 528 | NFSPROC4_CLNT_CLONE, |
527 | NFSPROC4_CLNT_COPY, | 529 | NFSPROC4_CLNT_COPY, |
530 | |||
531 | NFSPROC4_CLNT_LOOKUPP, | ||
528 | }; | 532 | }; |
529 | 533 | ||
530 | /* nfs41 types */ | 534 | /* nfs41 types */ |
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 71c237e8240e..ed761f751ecb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h | |||
@@ -179,7 +179,6 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, | |||
179 | int rpc_restart_call_prepare(struct rpc_task *); | 179 | int rpc_restart_call_prepare(struct rpc_task *); |
180 | int rpc_restart_call(struct rpc_task *); | 180 | int rpc_restart_call(struct rpc_task *); |
181 | void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); | 181 | void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); |
182 | int rpc_protocol(struct rpc_clnt *); | ||
183 | struct net * rpc_net_ns(struct rpc_clnt *); | 182 | struct net * rpc_net_ns(struct rpc_clnt *); |
184 | size_t rpc_max_payload(struct rpc_clnt *); | 183 | size_t rpc_max_payload(struct rpc_clnt *); |
185 | size_t rpc_max_bc_payload(struct rpc_clnt *); | 184 | size_t rpc_max_bc_payload(struct rpc_clnt *); |
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 221b7a2e5406..5859563e3c1f 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h | |||
@@ -64,7 +64,7 @@ enum rpcrdma_memreg { | |||
64 | RPCRDMA_MEMWINDOWS, | 64 | RPCRDMA_MEMWINDOWS, |
65 | RPCRDMA_MEMWINDOWS_ASYNC, | 65 | RPCRDMA_MEMWINDOWS_ASYNC, |
66 | RPCRDMA_MTHCAFMR, | 66 | RPCRDMA_MTHCAFMR, |
67 | RPCRDMA_FRMR, | 67 | RPCRDMA_FRWR, |
68 | RPCRDMA_ALLPHYSICAL, | 68 | RPCRDMA_ALLPHYSICAL, |
69 | RPCRDMA_LAST | 69 | RPCRDMA_LAST |
70 | }; | 70 | }; |
diff --git a/include/trace/events/rdma.h b/include/trace/events/rdma.h new file mode 100644 index 000000000000..aa19afc73a4e --- /dev/null +++ b/include/trace/events/rdma.h | |||
@@ -0,0 +1,129 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Copyright (c) 2017 Oracle. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* | ||
7 | * enum ib_event_type, from include/rdma/ib_verbs.h | ||
8 | */ | ||
9 | |||
10 | #define IB_EVENT_LIST \ | ||
11 | ib_event(CQ_ERR) \ | ||
12 | ib_event(QP_FATAL) \ | ||
13 | ib_event(QP_REQ_ERR) \ | ||
14 | ib_event(QP_ACCESS_ERR) \ | ||
15 | ib_event(COMM_EST) \ | ||
16 | ib_event(SQ_DRAINED) \ | ||
17 | ib_event(PATH_MIG) \ | ||
18 | ib_event(PATH_MIG_ERR) \ | ||
19 | ib_event(DEVICE_FATAL) \ | ||
20 | ib_event(PORT_ACTIVE) \ | ||
21 | ib_event(PORT_ERR) \ | ||
22 | ib_event(LID_CHANGE) \ | ||
23 | ib_event(PKEY_CHANGE) \ | ||
24 | ib_event(SM_CHANGE) \ | ||
25 | ib_event(SRQ_ERR) \ | ||
26 | ib_event(SRQ_LIMIT_REACHED) \ | ||
27 | ib_event(QP_LAST_WQE_REACHED) \ | ||
28 | ib_event(CLIENT_REREGISTER) \ | ||
29 | ib_event(GID_CHANGE) \ | ||
30 | ib_event_end(WQ_FATAL) | ||
31 | |||
32 | #undef ib_event | ||
33 | #undef ib_event_end | ||
34 | |||
35 | #define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x); | ||
36 | #define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x); | ||
37 | |||
38 | IB_EVENT_LIST | ||
39 | |||
40 | #undef ib_event | ||
41 | #undef ib_event_end | ||
42 | |||
43 | #define ib_event(x) { IB_EVENT_##x, #x }, | ||
44 | #define ib_event_end(x) { IB_EVENT_##x, #x } | ||
45 | |||
46 | #define rdma_show_ib_event(x) \ | ||
47 | __print_symbolic(x, IB_EVENT_LIST) | ||
48 | |||
49 | /* | ||
50 | * enum ib_wc_status type, from include/rdma/ib_verbs.h | ||
51 | */ | ||
52 | #define IB_WC_STATUS_LIST \ | ||
53 | ib_wc_status(SUCCESS) \ | ||
54 | ib_wc_status(LOC_LEN_ERR) \ | ||
55 | ib_wc_status(LOC_QP_OP_ERR) \ | ||
56 | ib_wc_status(LOC_EEC_OP_ERR) \ | ||
57 | ib_wc_status(LOC_PROT_ERR) \ | ||
58 | ib_wc_status(WR_FLUSH_ERR) \ | ||
59 | ib_wc_status(MW_BIND_ERR) \ | ||
60 | ib_wc_status(BAD_RESP_ERR) \ | ||
61 | ib_wc_status(LOC_ACCESS_ERR) \ | ||
62 | ib_wc_status(REM_INV_REQ_ERR) \ | ||
63 | ib_wc_status(REM_ACCESS_ERR) \ | ||
64 | ib_wc_status(REM_OP_ERR) \ | ||
65 | ib_wc_status(RETRY_EXC_ERR) \ | ||
66 | ib_wc_status(RNR_RETRY_EXC_ERR) \ | ||
67 | ib_wc_status(LOC_RDD_VIOL_ERR) \ | ||
68 | ib_wc_status(REM_INV_RD_REQ_ERR) \ | ||
69 | ib_wc_status(REM_ABORT_ERR) \ | ||
70 | ib_wc_status(INV_EECN_ERR) \ | ||
71 | ib_wc_status(INV_EEC_STATE_ERR) \ | ||
72 | ib_wc_status(FATAL_ERR) \ | ||
73 | ib_wc_status(RESP_TIMEOUT_ERR) \ | ||
74 | ib_wc_status_end(GENERAL_ERR) | ||
75 | |||
76 | #undef ib_wc_status | ||
77 | #undef ib_wc_status_end | ||
78 | |||
79 | #define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x); | ||
80 | #define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x); | ||
81 | |||
82 | IB_WC_STATUS_LIST | ||
83 | |||
84 | #undef ib_wc_status | ||
85 | #undef ib_wc_status_end | ||
86 | |||
87 | #define ib_wc_status(x) { IB_WC_##x, #x }, | ||
88 | #define ib_wc_status_end(x) { IB_WC_##x, #x } | ||
89 | |||
90 | #define rdma_show_wc_status(x) \ | ||
91 | __print_symbolic(x, IB_WC_STATUS_LIST) | ||
92 | |||
93 | /* | ||
94 | * enum rdma_cm_event_type, from include/rdma/rdma_cm.h | ||
95 | */ | ||
96 | #define RDMA_CM_EVENT_LIST \ | ||
97 | rdma_cm_event(ADDR_RESOLVED) \ | ||
98 | rdma_cm_event(ADDR_ERROR) \ | ||
99 | rdma_cm_event(ROUTE_RESOLVED) \ | ||
100 | rdma_cm_event(ROUTE_ERROR) \ | ||
101 | rdma_cm_event(CONNECT_REQUEST) \ | ||
102 | rdma_cm_event(CONNECT_RESPONSE) \ | ||
103 | rdma_cm_event(CONNECT_ERROR) \ | ||
104 | rdma_cm_event(UNREACHABLE) \ | ||
105 | rdma_cm_event(REJECTED) \ | ||
106 | rdma_cm_event(ESTABLISHED) \ | ||
107 | rdma_cm_event(DISCONNECTED) \ | ||
108 | rdma_cm_event(DEVICE_REMOVAL) \ | ||
109 | rdma_cm_event(MULTICAST_JOIN) \ | ||
110 | rdma_cm_event(MULTICAST_ERROR) \ | ||
111 | rdma_cm_event(ADDR_CHANGE) \ | ||
112 | rdma_cm_event_end(TIMEWAIT_EXIT) | ||
113 | |||
114 | #undef rdma_cm_event | ||
115 | #undef rdma_cm_event_end | ||
116 | |||
117 | #define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x); | ||
118 | #define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x); | ||
119 | |||
120 | RDMA_CM_EVENT_LIST | ||
121 | |||
122 | #undef rdma_cm_event | ||
123 | #undef rdma_cm_event_end | ||
124 | |||
125 | #define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x }, | ||
126 | #define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x } | ||
127 | |||
128 | #define rdma_show_cm_event(x) \ | ||
129 | __print_symbolic(x, RDMA_CM_EVENT_LIST) | ||
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h new file mode 100644 index 000000000000..50ed3f8bf534 --- /dev/null +++ b/include/trace/events/rpcrdma.h | |||
@@ -0,0 +1,890 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Copyright (c) 2017 Oracle. All rights reserved. | ||
4 | */ | ||
5 | #undef TRACE_SYSTEM | ||
6 | #define TRACE_SYSTEM rpcrdma | ||
7 | |||
8 | #if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ) | ||
9 | #define _TRACE_RPCRDMA_H | ||
10 | |||
11 | #include <linux/tracepoint.h> | ||
12 | #include <trace/events/rdma.h> | ||
13 | |||
14 | /** | ||
15 | ** Event classes | ||
16 | **/ | ||
17 | |||
18 | DECLARE_EVENT_CLASS(xprtrdma_reply_event, | ||
19 | TP_PROTO( | ||
20 | const struct rpcrdma_rep *rep | ||
21 | ), | ||
22 | |||
23 | TP_ARGS(rep), | ||
24 | |||
25 | TP_STRUCT__entry( | ||
26 | __field(const void *, rep) | ||
27 | __field(const void *, r_xprt) | ||
28 | __field(u32, xid) | ||
29 | __field(u32, version) | ||
30 | __field(u32, proc) | ||
31 | ), | ||
32 | |||
33 | TP_fast_assign( | ||
34 | __entry->rep = rep; | ||
35 | __entry->r_xprt = rep->rr_rxprt; | ||
36 | __entry->xid = be32_to_cpu(rep->rr_xid); | ||
37 | __entry->version = be32_to_cpu(rep->rr_vers); | ||
38 | __entry->proc = be32_to_cpu(rep->rr_proc); | ||
39 | ), | ||
40 | |||
41 | TP_printk("rxprt %p xid=0x%08x rep=%p: version %u proc %u", | ||
42 | __entry->r_xprt, __entry->xid, __entry->rep, | ||
43 | __entry->version, __entry->proc | ||
44 | ) | ||
45 | ); | ||
46 | |||
47 | #define DEFINE_REPLY_EVENT(name) \ | ||
48 | DEFINE_EVENT(xprtrdma_reply_event, name, \ | ||
49 | TP_PROTO( \ | ||
50 | const struct rpcrdma_rep *rep \ | ||
51 | ), \ | ||
52 | TP_ARGS(rep)) | ||
53 | |||
54 | DECLARE_EVENT_CLASS(xprtrdma_rxprt, | ||
55 | TP_PROTO( | ||
56 | const struct rpcrdma_xprt *r_xprt | ||
57 | ), | ||
58 | |||
59 | TP_ARGS(r_xprt), | ||
60 | |||
61 | TP_STRUCT__entry( | ||
62 | __field(const void *, r_xprt) | ||
63 | __string(addr, rpcrdma_addrstr(r_xprt)) | ||
64 | __string(port, rpcrdma_portstr(r_xprt)) | ||
65 | ), | ||
66 | |||
67 | TP_fast_assign( | ||
68 | __entry->r_xprt = r_xprt; | ||
69 | __assign_str(addr, rpcrdma_addrstr(r_xprt)); | ||
70 | __assign_str(port, rpcrdma_portstr(r_xprt)); | ||
71 | ), | ||
72 | |||
73 | TP_printk("peer=[%s]:%s r_xprt=%p", | ||
74 | __get_str(addr), __get_str(port), __entry->r_xprt | ||
75 | ) | ||
76 | ); | ||
77 | |||
78 | #define DEFINE_RXPRT_EVENT(name) \ | ||
79 | DEFINE_EVENT(xprtrdma_rxprt, name, \ | ||
80 | TP_PROTO( \ | ||
81 | const struct rpcrdma_xprt *r_xprt \ | ||
82 | ), \ | ||
83 | TP_ARGS(r_xprt)) | ||
84 | |||
85 | DECLARE_EVENT_CLASS(xprtrdma_rdch_event, | ||
86 | TP_PROTO( | ||
87 | const struct rpc_task *task, | ||
88 | unsigned int pos, | ||
89 | struct rpcrdma_mr *mr, | ||
90 | int nsegs | ||
91 | ), | ||
92 | |||
93 | TP_ARGS(task, pos, mr, nsegs), | ||
94 | |||
95 | TP_STRUCT__entry( | ||
96 | __field(unsigned int, task_id) | ||
97 | __field(unsigned int, client_id) | ||
98 | __field(const void *, mr) | ||
99 | __field(unsigned int, pos) | ||
100 | __field(int, nents) | ||
101 | __field(u32, handle) | ||
102 | __field(u32, length) | ||
103 | __field(u64, offset) | ||
104 | __field(int, nsegs) | ||
105 | ), | ||
106 | |||
107 | TP_fast_assign( | ||
108 | __entry->task_id = task->tk_pid; | ||
109 | __entry->client_id = task->tk_client->cl_clid; | ||
110 | __entry->mr = mr; | ||
111 | __entry->pos = pos; | ||
112 | __entry->nents = mr->mr_nents; | ||
113 | __entry->handle = mr->mr_handle; | ||
114 | __entry->length = mr->mr_length; | ||
115 | __entry->offset = mr->mr_offset; | ||
116 | __entry->nsegs = nsegs; | ||
117 | ), | ||
118 | |||
119 | TP_printk("task:%u@%u mr=%p pos=%u %u@0x%016llx:0x%08x (%s)", | ||
120 | __entry->task_id, __entry->client_id, __entry->mr, | ||
121 | __entry->pos, __entry->length, | ||
122 | (unsigned long long)__entry->offset, __entry->handle, | ||
123 | __entry->nents < __entry->nsegs ? "more" : "last" | ||
124 | ) | ||
125 | ); | ||
126 | |||
127 | #define DEFINE_RDCH_EVENT(name) \ | ||
128 | DEFINE_EVENT(xprtrdma_rdch_event, name, \ | ||
129 | TP_PROTO( \ | ||
130 | const struct rpc_task *task, \ | ||
131 | unsigned int pos, \ | ||
132 | struct rpcrdma_mr *mr, \ | ||
133 | int nsegs \ | ||
134 | ), \ | ||
135 | TP_ARGS(task, pos, mr, nsegs)) | ||
136 | |||
137 | DECLARE_EVENT_CLASS(xprtrdma_wrch_event, | ||
138 | TP_PROTO( | ||
139 | const struct rpc_task *task, | ||
140 | struct rpcrdma_mr *mr, | ||
141 | int nsegs | ||
142 | ), | ||
143 | |||
144 | TP_ARGS(task, mr, nsegs), | ||
145 | |||
146 | TP_STRUCT__entry( | ||
147 | __field(unsigned int, task_id) | ||
148 | __field(unsigned int, client_id) | ||
149 | __field(const void *, mr) | ||
150 | __field(int, nents) | ||
151 | __field(u32, handle) | ||
152 | __field(u32, length) | ||
153 | __field(u64, offset) | ||
154 | __field(int, nsegs) | ||
155 | ), | ||
156 | |||
157 | TP_fast_assign( | ||
158 | __entry->task_id = task->tk_pid; | ||
159 | __entry->client_id = task->tk_client->cl_clid; | ||
160 | __entry->mr = mr; | ||
161 | __entry->nents = mr->mr_nents; | ||
162 | __entry->handle = mr->mr_handle; | ||
163 | __entry->length = mr->mr_length; | ||
164 | __entry->offset = mr->mr_offset; | ||
165 | __entry->nsegs = nsegs; | ||
166 | ), | ||
167 | |||
168 | TP_printk("task:%u@%u mr=%p %u@0x%016llx:0x%08x (%s)", | ||
169 | __entry->task_id, __entry->client_id, __entry->mr, | ||
170 | __entry->length, (unsigned long long)__entry->offset, | ||
171 | __entry->handle, | ||
172 | __entry->nents < __entry->nsegs ? "more" : "last" | ||
173 | ) | ||
174 | ); | ||
175 | |||
176 | #define DEFINE_WRCH_EVENT(name) \ | ||
177 | DEFINE_EVENT(xprtrdma_wrch_event, name, \ | ||
178 | TP_PROTO( \ | ||
179 | const struct rpc_task *task, \ | ||
180 | struct rpcrdma_mr *mr, \ | ||
181 | int nsegs \ | ||
182 | ), \ | ||
183 | TP_ARGS(task, mr, nsegs)) | ||
184 | |||
185 | TRACE_DEFINE_ENUM(FRWR_IS_INVALID); | ||
186 | TRACE_DEFINE_ENUM(FRWR_IS_VALID); | ||
187 | TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR); | ||
188 | TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI); | ||
189 | |||
190 | #define xprtrdma_show_frwr_state(x) \ | ||
191 | __print_symbolic(x, \ | ||
192 | { FRWR_IS_INVALID, "INVALID" }, \ | ||
193 | { FRWR_IS_VALID, "VALID" }, \ | ||
194 | { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \ | ||
195 | { FRWR_FLUSHED_LI, "FLUSHED_LI" }) | ||
196 | |||
197 | DECLARE_EVENT_CLASS(xprtrdma_frwr_done, | ||
198 | TP_PROTO( | ||
199 | const struct ib_wc *wc, | ||
200 | const struct rpcrdma_frwr *frwr | ||
201 | ), | ||
202 | |||
203 | TP_ARGS(wc, frwr), | ||
204 | |||
205 | TP_STRUCT__entry( | ||
206 | __field(const void *, mr) | ||
207 | __field(unsigned int, state) | ||
208 | __field(unsigned int, status) | ||
209 | __field(unsigned int, vendor_err) | ||
210 | ), | ||
211 | |||
212 | TP_fast_assign( | ||
213 | __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr); | ||
214 | __entry->state = frwr->fr_state; | ||
215 | __entry->status = wc->status; | ||
216 | __entry->vendor_err = __entry->status ? wc->vendor_err : 0; | ||
217 | ), | ||
218 | |||
219 | TP_printk( | ||
220 | "mr=%p state=%s: %s (%u/0x%x)", | ||
221 | __entry->mr, xprtrdma_show_frwr_state(__entry->state), | ||
222 | rdma_show_wc_status(__entry->status), | ||
223 | __entry->status, __entry->vendor_err | ||
224 | ) | ||
225 | ); | ||
226 | |||
227 | #define DEFINE_FRWR_DONE_EVENT(name) \ | ||
228 | DEFINE_EVENT(xprtrdma_frwr_done, name, \ | ||
229 | TP_PROTO( \ | ||
230 | const struct ib_wc *wc, \ | ||
231 | const struct rpcrdma_frwr *frwr \ | ||
232 | ), \ | ||
233 | TP_ARGS(wc, frwr)) | ||
234 | |||
235 | DECLARE_EVENT_CLASS(xprtrdma_mr, | ||
236 | TP_PROTO( | ||
237 | const struct rpcrdma_mr *mr | ||
238 | ), | ||
239 | |||
240 | TP_ARGS(mr), | ||
241 | |||
242 | TP_STRUCT__entry( | ||
243 | __field(const void *, mr) | ||
244 | __field(u32, handle) | ||
245 | __field(u32, length) | ||
246 | __field(u64, offset) | ||
247 | ), | ||
248 | |||
249 | TP_fast_assign( | ||
250 | __entry->mr = mr; | ||
251 | __entry->handle = mr->mr_handle; | ||
252 | __entry->length = mr->mr_length; | ||
253 | __entry->offset = mr->mr_offset; | ||
254 | ), | ||
255 | |||
256 | TP_printk("mr=%p %u@0x%016llx:0x%08x", | ||
257 | __entry->mr, __entry->length, | ||
258 | (unsigned long long)__entry->offset, | ||
259 | __entry->handle | ||
260 | ) | ||
261 | ); | ||
262 | |||
263 | #define DEFINE_MR_EVENT(name) \ | ||
264 | DEFINE_EVENT(xprtrdma_mr, name, \ | ||
265 | TP_PROTO( \ | ||
266 | const struct rpcrdma_mr *mr \ | ||
267 | ), \ | ||
268 | TP_ARGS(mr)) | ||
269 | |||
270 | DECLARE_EVENT_CLASS(xprtrdma_cb_event, | ||
271 | TP_PROTO( | ||
272 | const struct rpc_rqst *rqst | ||
273 | ), | ||
274 | |||
275 | TP_ARGS(rqst), | ||
276 | |||
277 | TP_STRUCT__entry( | ||
278 | __field(const void *, rqst) | ||
279 | __field(const void *, rep) | ||
280 | __field(const void *, req) | ||
281 | __field(u32, xid) | ||
282 | ), | ||
283 | |||
284 | TP_fast_assign( | ||
285 | __entry->rqst = rqst; | ||
286 | __entry->req = rpcr_to_rdmar(rqst); | ||
287 | __entry->rep = rpcr_to_rdmar(rqst)->rl_reply; | ||
288 | __entry->xid = be32_to_cpu(rqst->rq_xid); | ||
289 | ), | ||
290 | |||
291 | TP_printk("xid=0x%08x, rqst=%p req=%p rep=%p", | ||
292 | __entry->xid, __entry->rqst, __entry->req, __entry->rep | ||
293 | ) | ||
294 | ); | ||
295 | |||
296 | #define DEFINE_CB_EVENT(name) \ | ||
297 | DEFINE_EVENT(xprtrdma_cb_event, name, \ | ||
298 | TP_PROTO( \ | ||
299 | const struct rpc_rqst *rqst \ | ||
300 | ), \ | ||
301 | TP_ARGS(rqst)) | ||
302 | |||
303 | /** | ||
304 | ** Connection events | ||
305 | **/ | ||
306 | |||
307 | TRACE_EVENT(xprtrdma_conn_upcall, | ||
308 | TP_PROTO( | ||
309 | const struct rpcrdma_xprt *r_xprt, | ||
310 | struct rdma_cm_event *event | ||
311 | ), | ||
312 | |||
313 | TP_ARGS(r_xprt, event), | ||
314 | |||
315 | TP_STRUCT__entry( | ||
316 | __field(const void *, r_xprt) | ||
317 | __field(unsigned int, event) | ||
318 | __field(int, status) | ||
319 | __string(addr, rpcrdma_addrstr(r_xprt)) | ||
320 | __string(port, rpcrdma_portstr(r_xprt)) | ||
321 | ), | ||
322 | |||
323 | TP_fast_assign( | ||
324 | __entry->r_xprt = r_xprt; | ||
325 | __entry->event = event->event; | ||
326 | __entry->status = event->status; | ||
327 | __assign_str(addr, rpcrdma_addrstr(r_xprt)); | ||
328 | __assign_str(port, rpcrdma_portstr(r_xprt)); | ||
329 | ), | ||
330 | |||
331 | TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)", | ||
332 | __get_str(addr), __get_str(port), | ||
333 | __entry->r_xprt, rdma_show_cm_event(__entry->event), | ||
334 | __entry->event, __entry->status | ||
335 | ) | ||
336 | ); | ||
337 | |||
338 | TRACE_EVENT(xprtrdma_disconnect, | ||
339 | TP_PROTO( | ||
340 | const struct rpcrdma_xprt *r_xprt, | ||
341 | int status | ||
342 | ), | ||
343 | |||
344 | TP_ARGS(r_xprt, status), | ||
345 | |||
346 | TP_STRUCT__entry( | ||
347 | __field(const void *, r_xprt) | ||
348 | __field(int, status) | ||
349 | __field(int, connected) | ||
350 | __string(addr, rpcrdma_addrstr(r_xprt)) | ||
351 | __string(port, rpcrdma_portstr(r_xprt)) | ||
352 | ), | ||
353 | |||
354 | TP_fast_assign( | ||
355 | __entry->r_xprt = r_xprt; | ||
356 | __entry->status = status; | ||
357 | __entry->connected = r_xprt->rx_ep.rep_connected; | ||
358 | __assign_str(addr, rpcrdma_addrstr(r_xprt)); | ||
359 | __assign_str(port, rpcrdma_portstr(r_xprt)); | ||
360 | ), | ||
361 | |||
362 | TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected", | ||
363 | __get_str(addr), __get_str(port), | ||
364 | __entry->r_xprt, __entry->status, | ||
365 | __entry->connected == 1 ? "still " : "dis" | ||
366 | ) | ||
367 | ); | ||
368 | |||
369 | DEFINE_RXPRT_EVENT(xprtrdma_conn_start); | ||
370 | DEFINE_RXPRT_EVENT(xprtrdma_conn_tout); | ||
371 | DEFINE_RXPRT_EVENT(xprtrdma_create); | ||
372 | DEFINE_RXPRT_EVENT(xprtrdma_destroy); | ||
373 | DEFINE_RXPRT_EVENT(xprtrdma_remove); | ||
374 | DEFINE_RXPRT_EVENT(xprtrdma_reinsert); | ||
375 | DEFINE_RXPRT_EVENT(xprtrdma_reconnect); | ||
376 | DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc); | ||
377 | |||
378 | TRACE_EVENT(xprtrdma_qp_error, | ||
379 | TP_PROTO( | ||
380 | const struct rpcrdma_xprt *r_xprt, | ||
381 | const struct ib_event *event | ||
382 | ), | ||
383 | |||
384 | TP_ARGS(r_xprt, event), | ||
385 | |||
386 | TP_STRUCT__entry( | ||
387 | __field(const void *, r_xprt) | ||
388 | __field(unsigned int, event) | ||
389 | __string(name, event->device->name) | ||
390 | __string(addr, rpcrdma_addrstr(r_xprt)) | ||
391 | __string(port, rpcrdma_portstr(r_xprt)) | ||
392 | ), | ||
393 | |||
394 | TP_fast_assign( | ||
395 | __entry->r_xprt = r_xprt; | ||
396 | __entry->event = event->event; | ||
397 | __assign_str(name, event->device->name); | ||
398 | __assign_str(addr, rpcrdma_addrstr(r_xprt)); | ||
399 | __assign_str(port, rpcrdma_portstr(r_xprt)); | ||
400 | ), | ||
401 | |||
402 | TP_printk("peer=[%s]:%s r_xprt=%p: dev %s: %s (%u)", | ||
403 | __get_str(addr), __get_str(port), __entry->r_xprt, | ||
404 | __get_str(name), rdma_show_ib_event(__entry->event), | ||
405 | __entry->event | ||
406 | ) | ||
407 | ); | ||
408 | |||
409 | /** | ||
410 | ** Call events | ||
411 | **/ | ||
412 | |||
413 | TRACE_EVENT(xprtrdma_createmrs, | ||
414 | TP_PROTO( | ||
415 | const struct rpcrdma_xprt *r_xprt, | ||
416 | unsigned int count | ||
417 | ), | ||
418 | |||
419 | TP_ARGS(r_xprt, count), | ||
420 | |||
421 | TP_STRUCT__entry( | ||
422 | __field(const void *, r_xprt) | ||
423 | __field(unsigned int, count) | ||
424 | ), | ||
425 | |||
426 | TP_fast_assign( | ||
427 | __entry->r_xprt = r_xprt; | ||
428 | __entry->count = count; | ||
429 | ), | ||
430 | |||
431 | TP_printk("r_xprt=%p: created %u MRs", | ||
432 | __entry->r_xprt, __entry->count | ||
433 | ) | ||
434 | ); | ||
435 | |||
436 | DEFINE_RXPRT_EVENT(xprtrdma_nomrs); | ||
437 | |||
438 | DEFINE_RDCH_EVENT(xprtrdma_read_chunk); | ||
439 | DEFINE_WRCH_EVENT(xprtrdma_write_chunk); | ||
440 | DEFINE_WRCH_EVENT(xprtrdma_reply_chunk); | ||
441 | |||
442 | TRACE_DEFINE_ENUM(rpcrdma_noch); | ||
443 | TRACE_DEFINE_ENUM(rpcrdma_readch); | ||
444 | TRACE_DEFINE_ENUM(rpcrdma_areadch); | ||
445 | TRACE_DEFINE_ENUM(rpcrdma_writech); | ||
446 | TRACE_DEFINE_ENUM(rpcrdma_replych); | ||
447 | |||
448 | #define xprtrdma_show_chunktype(x) \ | ||
449 | __print_symbolic(x, \ | ||
450 | { rpcrdma_noch, "inline" }, \ | ||
451 | { rpcrdma_readch, "read list" }, \ | ||
452 | { rpcrdma_areadch, "*read list" }, \ | ||
453 | { rpcrdma_writech, "write list" }, \ | ||
454 | { rpcrdma_replych, "reply chunk" }) | ||
455 | |||
456 | TRACE_EVENT(xprtrdma_marshal, | ||
457 | TP_PROTO( | ||
458 | const struct rpc_rqst *rqst, | ||
459 | unsigned int hdrlen, | ||
460 | unsigned int rtype, | ||
461 | unsigned int wtype | ||
462 | ), | ||
463 | |||
464 | TP_ARGS(rqst, hdrlen, rtype, wtype), | ||
465 | |||
466 | TP_STRUCT__entry( | ||
467 | __field(unsigned int, task_id) | ||
468 | __field(unsigned int, client_id) | ||
469 | __field(u32, xid) | ||
470 | __field(unsigned int, hdrlen) | ||
471 | __field(unsigned int, headlen) | ||
472 | __field(unsigned int, pagelen) | ||
473 | __field(unsigned int, taillen) | ||
474 | __field(unsigned int, rtype) | ||
475 | __field(unsigned int, wtype) | ||
476 | ), | ||
477 | |||
478 | TP_fast_assign( | ||
479 | __entry->task_id = rqst->rq_task->tk_pid; | ||
480 | __entry->client_id = rqst->rq_task->tk_client->cl_clid; | ||
481 | __entry->xid = be32_to_cpu(rqst->rq_xid); | ||
482 | __entry->hdrlen = hdrlen; | ||
483 | __entry->headlen = rqst->rq_snd_buf.head[0].iov_len; | ||
484 | __entry->pagelen = rqst->rq_snd_buf.page_len; | ||
485 | __entry->taillen = rqst->rq_snd_buf.tail[0].iov_len; | ||
486 | __entry->rtype = rtype; | ||
487 | __entry->wtype = wtype; | ||
488 | ), | ||
489 | |||
490 | TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s", | ||
491 | __entry->task_id, __entry->client_id, __entry->xid, | ||
492 | __entry->hdrlen, | ||
493 | __entry->headlen, __entry->pagelen, __entry->taillen, | ||
494 | xprtrdma_show_chunktype(__entry->rtype), | ||
495 | xprtrdma_show_chunktype(__entry->wtype) | ||
496 | ) | ||
497 | ); | ||
498 | |||
499 | TRACE_EVENT(xprtrdma_post_send, | ||
500 | TP_PROTO( | ||
501 | const struct rpcrdma_req *req, | ||
502 | int status | ||
503 | ), | ||
504 | |||
505 | TP_ARGS(req, status), | ||
506 | |||
507 | TP_STRUCT__entry( | ||
508 | __field(const void *, req) | ||
509 | __field(int, num_sge) | ||
510 | __field(bool, signaled) | ||
511 | __field(int, status) | ||
512 | ), | ||
513 | |||
514 | TP_fast_assign( | ||
515 | __entry->req = req; | ||
516 | __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; | ||
517 | __entry->signaled = req->rl_sendctx->sc_wr.send_flags & | ||
518 | IB_SEND_SIGNALED; | ||
519 | __entry->status = status; | ||
520 | ), | ||
521 | |||
522 | TP_printk("req=%p, %d SGEs%s, status=%d", | ||
523 | __entry->req, __entry->num_sge, | ||
524 | (__entry->signaled ? ", signaled" : ""), | ||
525 | __entry->status | ||
526 | ) | ||
527 | ); | ||
528 | |||
529 | TRACE_EVENT(xprtrdma_post_recv, | ||
530 | TP_PROTO( | ||
531 | const struct rpcrdma_rep *rep, | ||
532 | int status | ||
533 | ), | ||
534 | |||
535 | TP_ARGS(rep, status), | ||
536 | |||
537 | TP_STRUCT__entry( | ||
538 | __field(const void *, rep) | ||
539 | __field(int, status) | ||
540 | ), | ||
541 | |||
542 | TP_fast_assign( | ||
543 | __entry->rep = rep; | ||
544 | __entry->status = status; | ||
545 | ), | ||
546 | |||
547 | TP_printk("rep=%p status=%d", | ||
548 | __entry->rep, __entry->status | ||
549 | ) | ||
550 | ); | ||
551 | |||
552 | /** | ||
553 | ** Completion events | ||
554 | **/ | ||
555 | |||
556 | TRACE_EVENT(xprtrdma_wc_send, | ||
557 | TP_PROTO( | ||
558 | const struct rpcrdma_sendctx *sc, | ||
559 | const struct ib_wc *wc | ||
560 | ), | ||
561 | |||
562 | TP_ARGS(sc, wc), | ||
563 | |||
564 | TP_STRUCT__entry( | ||
565 | __field(const void *, req) | ||
566 | __field(unsigned int, unmap_count) | ||
567 | __field(unsigned int, status) | ||
568 | __field(unsigned int, vendor_err) | ||
569 | ), | ||
570 | |||
571 | TP_fast_assign( | ||
572 | __entry->req = sc->sc_req; | ||
573 | __entry->unmap_count = sc->sc_unmap_count; | ||
574 | __entry->status = wc->status; | ||
575 | __entry->vendor_err = __entry->status ? wc->vendor_err : 0; | ||
576 | ), | ||
577 | |||
578 | TP_printk("req=%p, unmapped %u pages: %s (%u/0x%x)", | ||
579 | __entry->req, __entry->unmap_count, | ||
580 | rdma_show_wc_status(__entry->status), | ||
581 | __entry->status, __entry->vendor_err | ||
582 | ) | ||
583 | ); | ||
584 | |||
585 | TRACE_EVENT(xprtrdma_wc_receive, | ||
586 | TP_PROTO( | ||
587 | const struct rpcrdma_rep *rep, | ||
588 | const struct ib_wc *wc | ||
589 | ), | ||
590 | |||
591 | TP_ARGS(rep, wc), | ||
592 | |||
593 | TP_STRUCT__entry( | ||
594 | __field(const void *, rep) | ||
595 | __field(unsigned int, byte_len) | ||
596 | __field(unsigned int, status) | ||
597 | __field(unsigned int, vendor_err) | ||
598 | ), | ||
599 | |||
600 | TP_fast_assign( | ||
601 | __entry->rep = rep; | ||
602 | __entry->byte_len = wc->byte_len; | ||
603 | __entry->status = wc->status; | ||
604 | __entry->vendor_err = __entry->status ? wc->vendor_err : 0; | ||
605 | ), | ||
606 | |||
607 | TP_printk("rep=%p, %u bytes: %s (%u/0x%x)", | ||
608 | __entry->rep, __entry->byte_len, | ||
609 | rdma_show_wc_status(__entry->status), | ||
610 | __entry->status, __entry->vendor_err | ||
611 | ) | ||
612 | ); | ||
613 | |||
614 | DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg); | ||
615 | DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li); | ||
616 | DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); | ||
617 | |||
618 | DEFINE_MR_EVENT(xprtrdma_localinv); | ||
619 | DEFINE_MR_EVENT(xprtrdma_dma_unmap); | ||
620 | DEFINE_MR_EVENT(xprtrdma_remoteinv); | ||
621 | DEFINE_MR_EVENT(xprtrdma_recover_mr); | ||
622 | |||
623 | /** | ||
624 | ** Reply events | ||
625 | **/ | ||
626 | |||
627 | TRACE_EVENT(xprtrdma_reply, | ||
628 | TP_PROTO( | ||
629 | const struct rpc_task *task, | ||
630 | const struct rpcrdma_rep *rep, | ||
631 | const struct rpcrdma_req *req, | ||
632 | unsigned int credits | ||
633 | ), | ||
634 | |||
635 | TP_ARGS(task, rep, req, credits), | ||
636 | |||
637 | TP_STRUCT__entry( | ||
638 | __field(unsigned int, task_id) | ||
639 | __field(unsigned int, client_id) | ||
640 | __field(const void *, rep) | ||
641 | __field(const void *, req) | ||
642 | __field(u32, xid) | ||
643 | __field(unsigned int, credits) | ||
644 | ), | ||
645 | |||
646 | TP_fast_assign( | ||
647 | __entry->task_id = task->tk_pid; | ||
648 | __entry->client_id = task->tk_client->cl_clid; | ||
649 | __entry->rep = rep; | ||
650 | __entry->req = req; | ||
651 | __entry->xid = be32_to_cpu(rep->rr_xid); | ||
652 | __entry->credits = credits; | ||
653 | ), | ||
654 | |||
655 | TP_printk("task:%u@%u xid=0x%08x, %u credits, rep=%p -> req=%p", | ||
656 | __entry->task_id, __entry->client_id, __entry->xid, | ||
657 | __entry->credits, __entry->rep, __entry->req | ||
658 | ) | ||
659 | ); | ||
660 | |||
661 | TRACE_EVENT(xprtrdma_defer_cmp, | ||
662 | TP_PROTO( | ||
663 | const struct rpcrdma_rep *rep | ||
664 | ), | ||
665 | |||
666 | TP_ARGS(rep), | ||
667 | |||
668 | TP_STRUCT__entry( | ||
669 | __field(unsigned int, task_id) | ||
670 | __field(unsigned int, client_id) | ||
671 | __field(const void *, rep) | ||
672 | __field(u32, xid) | ||
673 | ), | ||
674 | |||
675 | TP_fast_assign( | ||
676 | __entry->task_id = rep->rr_rqst->rq_task->tk_pid; | ||
677 | __entry->client_id = rep->rr_rqst->rq_task->tk_client->cl_clid; | ||
678 | __entry->rep = rep; | ||
679 | __entry->xid = be32_to_cpu(rep->rr_xid); | ||
680 | ), | ||
681 | |||
682 | TP_printk("task:%u@%u xid=0x%08x rep=%p", | ||
683 | __entry->task_id, __entry->client_id, __entry->xid, | ||
684 | __entry->rep | ||
685 | ) | ||
686 | ); | ||
687 | |||
688 | DEFINE_REPLY_EVENT(xprtrdma_reply_vers); | ||
689 | DEFINE_REPLY_EVENT(xprtrdma_reply_rqst); | ||
690 | DEFINE_REPLY_EVENT(xprtrdma_reply_short); | ||
691 | DEFINE_REPLY_EVENT(xprtrdma_reply_hdr); | ||
692 | |||
693 | TRACE_EVENT(xprtrdma_fixup, | ||
694 | TP_PROTO( | ||
695 | const struct rpc_rqst *rqst, | ||
696 | int len, | ||
697 | int hdrlen | ||
698 | ), | ||
699 | |||
700 | TP_ARGS(rqst, len, hdrlen), | ||
701 | |||
702 | TP_STRUCT__entry( | ||
703 | __field(unsigned int, task_id) | ||
704 | __field(unsigned int, client_id) | ||
705 | __field(const void *, base) | ||
706 | __field(int, len) | ||
707 | __field(int, hdrlen) | ||
708 | ), | ||
709 | |||
710 | TP_fast_assign( | ||
711 | __entry->task_id = rqst->rq_task->tk_pid; | ||
712 | __entry->client_id = rqst->rq_task->tk_client->cl_clid; | ||
713 | __entry->base = rqst->rq_rcv_buf.head[0].iov_base; | ||
714 | __entry->len = len; | ||
715 | __entry->hdrlen = hdrlen; | ||
716 | ), | ||
717 | |||
718 | TP_printk("task:%u@%u base=%p len=%d hdrlen=%d", | ||
719 | __entry->task_id, __entry->client_id, | ||
720 | __entry->base, __entry->len, __entry->hdrlen | ||
721 | ) | ||
722 | ); | ||
723 | |||
724 | TRACE_EVENT(xprtrdma_fixup_pg, | ||
725 | TP_PROTO( | ||
726 | const struct rpc_rqst *rqst, | ||
727 | int pageno, | ||
728 | const void *pos, | ||
729 | int len, | ||
730 | int curlen | ||
731 | ), | ||
732 | |||
733 | TP_ARGS(rqst, pageno, pos, len, curlen), | ||
734 | |||
735 | TP_STRUCT__entry( | ||
736 | __field(unsigned int, task_id) | ||
737 | __field(unsigned int, client_id) | ||
738 | __field(const void *, pos) | ||
739 | __field(int, pageno) | ||
740 | __field(int, len) | ||
741 | __field(int, curlen) | ||
742 | ), | ||
743 | |||
744 | TP_fast_assign( | ||
745 | __entry->task_id = rqst->rq_task->tk_pid; | ||
746 | __entry->client_id = rqst->rq_task->tk_client->cl_clid; | ||
747 | __entry->pos = pos; | ||
748 | __entry->pageno = pageno; | ||
749 | __entry->len = len; | ||
750 | __entry->curlen = curlen; | ||
751 | ), | ||
752 | |||
753 | TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d", | ||
754 | __entry->task_id, __entry->client_id, | ||
755 | __entry->pageno, __entry->pos, __entry->len, __entry->curlen | ||
756 | ) | ||
757 | ); | ||
758 | |||
759 | TRACE_EVENT(xprtrdma_decode_seg, | ||
760 | TP_PROTO( | ||
761 | u32 handle, | ||
762 | u32 length, | ||
763 | u64 offset | ||
764 | ), | ||
765 | |||
766 | TP_ARGS(handle, length, offset), | ||
767 | |||
768 | TP_STRUCT__entry( | ||
769 | __field(u32, handle) | ||
770 | __field(u32, length) | ||
771 | __field(u64, offset) | ||
772 | ), | ||
773 | |||
774 | TP_fast_assign( | ||
775 | __entry->handle = handle; | ||
776 | __entry->length = length; | ||
777 | __entry->offset = offset; | ||
778 | ), | ||
779 | |||
780 | TP_printk("%u@0x%016llx:0x%08x", | ||
781 | __entry->length, (unsigned long long)__entry->offset, | ||
782 | __entry->handle | ||
783 | ) | ||
784 | ); | ||
785 | |||
786 | /** | ||
787 | ** Allocation/release of rpcrdma_reqs and rpcrdma_reps | ||
788 | **/ | ||
789 | |||
790 | TRACE_EVENT(xprtrdma_allocate, | ||
791 | TP_PROTO( | ||
792 | const struct rpc_task *task, | ||
793 | const struct rpcrdma_req *req | ||
794 | ), | ||
795 | |||
796 | TP_ARGS(task, req), | ||
797 | |||
798 | TP_STRUCT__entry( | ||
799 | __field(unsigned int, task_id) | ||
800 | __field(unsigned int, client_id) | ||
801 | __field(const void *, req) | ||
802 | __field(const void *, rep) | ||
803 | __field(size_t, callsize) | ||
804 | __field(size_t, rcvsize) | ||
805 | ), | ||
806 | |||
807 | TP_fast_assign( | ||
808 | __entry->task_id = task->tk_pid; | ||
809 | __entry->client_id = task->tk_client->cl_clid; | ||
810 | __entry->req = req; | ||
811 | __entry->rep = req ? req->rl_reply : NULL; | ||
812 | __entry->callsize = task->tk_rqstp->rq_callsize; | ||
813 | __entry->rcvsize = task->tk_rqstp->rq_rcvsize; | ||
814 | ), | ||
815 | |||
816 | TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)", | ||
817 | __entry->task_id, __entry->client_id, | ||
818 | __entry->req, __entry->rep, | ||
819 | __entry->callsize, __entry->rcvsize | ||
820 | ) | ||
821 | ); | ||
822 | |||
823 | TRACE_EVENT(xprtrdma_rpc_done, | ||
824 | TP_PROTO( | ||
825 | const struct rpc_task *task, | ||
826 | const struct rpcrdma_req *req | ||
827 | ), | ||
828 | |||
829 | TP_ARGS(task, req), | ||
830 | |||
831 | TP_STRUCT__entry( | ||
832 | __field(unsigned int, task_id) | ||
833 | __field(unsigned int, client_id) | ||
834 | __field(const void *, req) | ||
835 | __field(const void *, rep) | ||
836 | ), | ||
837 | |||
838 | TP_fast_assign( | ||
839 | __entry->task_id = task->tk_pid; | ||
840 | __entry->client_id = task->tk_client->cl_clid; | ||
841 | __entry->req = req; | ||
842 | __entry->rep = req->rl_reply; | ||
843 | ), | ||
844 | |||
845 | TP_printk("task:%u@%u req=%p rep=%p", | ||
846 | __entry->task_id, __entry->client_id, | ||
847 | __entry->req, __entry->rep | ||
848 | ) | ||
849 | ); | ||
850 | |||
851 | DEFINE_RXPRT_EVENT(xprtrdma_noreps); | ||
852 | |||
853 | /** | ||
854 | ** Callback events | ||
855 | **/ | ||
856 | |||
857 | TRACE_EVENT(xprtrdma_cb_setup, | ||
858 | TP_PROTO( | ||
859 | const struct rpcrdma_xprt *r_xprt, | ||
860 | unsigned int reqs | ||
861 | ), | ||
862 | |||
863 | TP_ARGS(r_xprt, reqs), | ||
864 | |||
865 | TP_STRUCT__entry( | ||
866 | __field(const void *, r_xprt) | ||
867 | __field(unsigned int, reqs) | ||
868 | __string(addr, rpcrdma_addrstr(r_xprt)) | ||
869 | __string(port, rpcrdma_portstr(r_xprt)) | ||
870 | ), | ||
871 | |||
872 | TP_fast_assign( | ||
873 | __entry->r_xprt = r_xprt; | ||
874 | __entry->reqs = reqs; | ||
875 | __assign_str(addr, rpcrdma_addrstr(r_xprt)); | ||
876 | __assign_str(port, rpcrdma_portstr(r_xprt)); | ||
877 | ), | ||
878 | |||
879 | TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs", | ||
880 | __get_str(addr), __get_str(port), | ||
881 | __entry->r_xprt, __entry->reqs | ||
882 | ) | ||
883 | ); | ||
884 | |||
885 | DEFINE_CB_EVENT(xprtrdma_cb_call); | ||
886 | DEFINE_CB_EVENT(xprtrdma_cb_reply); | ||
887 | |||
888 | #endif /* _TRACE_RPCRDMA_H */ | ||
889 | |||
890 | #include <trace/define_trace.h> | ||
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 8c153f68509e..970c91a83173 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h | |||
@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(rpc_task_status, | |||
32 | __entry->status = task->tk_status; | 32 | __entry->status = task->tk_status; |
33 | ), | 33 | ), |
34 | 34 | ||
35 | TP_printk("task:%u@%u, status %d", | 35 | TP_printk("task:%u@%u status=%d", |
36 | __entry->task_id, __entry->client_id, | 36 | __entry->task_id, __entry->client_id, |
37 | __entry->status) | 37 | __entry->status) |
38 | ); | 38 | ); |
@@ -66,7 +66,7 @@ TRACE_EVENT(rpc_connect_status, | |||
66 | __entry->status = status; | 66 | __entry->status = status; |
67 | ), | 67 | ), |
68 | 68 | ||
69 | TP_printk("task:%u@%u, status %d", | 69 | TP_printk("task:%u@%u status=%d", |
70 | __entry->task_id, __entry->client_id, | 70 | __entry->task_id, __entry->client_id, |
71 | __entry->status) | 71 | __entry->status) |
72 | ); | 72 | ); |
@@ -175,7 +175,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued, | |||
175 | ), | 175 | ), |
176 | 176 | ||
177 | TP_fast_assign( | 177 | TP_fast_assign( |
178 | __entry->client_id = clnt->cl_clid; | 178 | __entry->client_id = clnt ? clnt->cl_clid : -1; |
179 | __entry->task_id = task->tk_pid; | 179 | __entry->task_id = task->tk_pid; |
180 | __entry->timeout = task->tk_timeout; | 180 | __entry->timeout = task->tk_timeout; |
181 | __entry->runstate = task->tk_runstate; | 181 | __entry->runstate = task->tk_runstate; |
@@ -184,7 +184,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued, | |||
184 | __assign_str(q_name, rpc_qname(q)); | 184 | __assign_str(q_name, rpc_qname(q)); |
185 | ), | 185 | ), |
186 | 186 | ||
187 | TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", | 187 | TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s", |
188 | __entry->task_id, __entry->client_id, | 188 | __entry->task_id, __entry->client_id, |
189 | __entry->flags, | 189 | __entry->flags, |
190 | __entry->runstate, | 190 | __entry->runstate, |
@@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, | |||
390 | __entry->status) | 390 | __entry->status) |
391 | ); | 391 | ); |
392 | 392 | ||
393 | DEFINE_EVENT(rpc_xprt_event, xprt_timer, | ||
394 | TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), | ||
395 | TP_ARGS(xprt, xid, status)); | ||
396 | |||
393 | DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst, | 397 | DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst, |
394 | TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), | 398 | TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), |
395 | TP_ARGS(xprt, xid, status)); | 399 | TP_ARGS(xprt, xid, status)); |
diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h index 057d22a48416..946cb62d64b0 100644 --- a/include/uapi/linux/nfs.h +++ b/include/uapi/linux/nfs.h | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #define NFS_PROGRAM 100003 | 13 | #define NFS_PROGRAM 100003 |
14 | #define NFS_PORT 2049 | 14 | #define NFS_PORT 2049 |
15 | #define NFS_RDMA_PORT 20049 | ||
15 | #define NFS_MAXDATA 8192 | 16 | #define NFS_MAXDATA 8192 |
16 | #define NFS_MAXPATHLEN 1024 | 17 | #define NFS_MAXPATHLEN 1024 |
17 | #define NFS_MAXNAMLEN 255 | 18 | #define NFS_MAXNAMLEN 255 |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e2a4184f3c5d..6e432ecd7f99 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1376,22 +1376,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize | |||
1376 | EXPORT_SYMBOL_GPL(rpc_setbufsize); | 1376 | EXPORT_SYMBOL_GPL(rpc_setbufsize); |
1377 | 1377 | ||
1378 | /** | 1378 | /** |
1379 | * rpc_protocol - Get transport protocol number for an RPC client | ||
1380 | * @clnt: RPC client to query | ||
1381 | * | ||
1382 | */ | ||
1383 | int rpc_protocol(struct rpc_clnt *clnt) | ||
1384 | { | ||
1385 | int protocol; | ||
1386 | |||
1387 | rcu_read_lock(); | ||
1388 | protocol = rcu_dereference(clnt->cl_xprt)->prot; | ||
1389 | rcu_read_unlock(); | ||
1390 | return protocol; | ||
1391 | } | ||
1392 | EXPORT_SYMBOL_GPL(rpc_protocol); | ||
1393 | |||
1394 | /** | ||
1395 | * rpc_net_ns - Get the network namespace for this RPC client | 1379 | * rpc_net_ns - Get the network namespace for this RPC client |
1396 | * @clnt: RPC client to query | 1380 | * @clnt: RPC client to query |
1397 | * | 1381 | * |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b1b49edd7c4d..896691afbb1a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task) | |||
755 | void (*do_action)(struct rpc_task *); | 755 | void (*do_action)(struct rpc_task *); |
756 | 756 | ||
757 | /* | 757 | /* |
758 | * Execute any pending callback first. | 758 | * Perform the next FSM step or a pending callback. |
759 | * | ||
760 | * tk_action may be NULL if the task has been killed. | ||
761 | * In particular, note that rpc_killall_tasks may | ||
762 | * do this at any time, so beware when dereferencing. | ||
759 | */ | 763 | */ |
760 | do_action = task->tk_callback; | 764 | do_action = task->tk_action; |
761 | task->tk_callback = NULL; | 765 | if (task->tk_callback) { |
762 | if (do_action == NULL) { | 766 | do_action = task->tk_callback; |
763 | /* | 767 | task->tk_callback = NULL; |
764 | * Perform the next FSM step. | ||
765 | * tk_action may be NULL if the task has been killed. | ||
766 | * In particular, note that rpc_killall_tasks may | ||
767 | * do this at any time, so beware when dereferencing. | ||
768 | */ | ||
769 | do_action = task->tk_action; | ||
770 | if (do_action == NULL) | ||
771 | break; | ||
772 | } | 768 | } |
773 | trace_rpc_task_run_action(task->tk_client, task, task->tk_action); | 769 | if (!do_action) |
770 | break; | ||
771 | trace_rpc_task_run_action(task->tk_client, task, do_action); | ||
774 | do_action(task); | 772 | do_action(task); |
775 | 773 | ||
776 | /* | 774 | /* |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 33b74fd84051..2436fd1125fc 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task) | |||
940 | 940 | ||
941 | if (task->tk_status != -ETIMEDOUT) | 941 | if (task->tk_status != -ETIMEDOUT) |
942 | return; | 942 | return; |
943 | dprintk("RPC: %5u xprt_timer\n", task->tk_pid); | ||
944 | 943 | ||
944 | trace_xprt_timer(xprt, req->rq_xid, task->tk_status); | ||
945 | if (!req->rq_reply_bytes_recvd) { | 945 | if (!req->rq_reply_bytes_recvd) { |
946 | if (xprt->ops->timer) | 946 | if (xprt->ops->timer) |
947 | xprt->ops->timer(xprt, task); | 947 | xprt->ops->timer(xprt, task); |
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 8b818bb3518a..ed1a4a3065ee 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c | |||
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, | |||
43 | req = rpcrdma_create_req(r_xprt); | 43 | req = rpcrdma_create_req(r_xprt); |
44 | if (IS_ERR(req)) | 44 | if (IS_ERR(req)) |
45 | return PTR_ERR(req); | 45 | return PTR_ERR(req); |
46 | __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags); | ||
47 | 46 | ||
48 | rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, | 47 | rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, |
49 | DMA_TO_DEVICE, GFP_KERNEL); | 48 | DMA_TO_DEVICE, GFP_KERNEL); |
@@ -74,21 +73,13 @@ out_fail: | |||
74 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, | 73 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, |
75 | unsigned int count) | 74 | unsigned int count) |
76 | { | 75 | { |
77 | struct rpcrdma_rep *rep; | ||
78 | int rc = 0; | 76 | int rc = 0; |
79 | 77 | ||
80 | while (count--) { | 78 | while (count--) { |
81 | rep = rpcrdma_create_rep(r_xprt); | 79 | rc = rpcrdma_create_rep(r_xprt); |
82 | if (IS_ERR(rep)) { | 80 | if (rc) |
83 | pr_err("RPC: %s: reply buffer alloc failed\n", | ||
84 | __func__); | ||
85 | rc = PTR_ERR(rep); | ||
86 | break; | 81 | break; |
87 | } | ||
88 | |||
89 | rpcrdma_recv_buffer_put(rep); | ||
90 | } | 82 | } |
91 | |||
92 | return rc; | 83 | return rc; |
93 | } | 84 | } |
94 | 85 | ||
@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) | |||
129 | rqst->rq_xprt = &r_xprt->rx_xprt; | 120 | rqst->rq_xprt = &r_xprt->rx_xprt; |
130 | INIT_LIST_HEAD(&rqst->rq_list); | 121 | INIT_LIST_HEAD(&rqst->rq_list); |
131 | INIT_LIST_HEAD(&rqst->rq_bc_list); | 122 | INIT_LIST_HEAD(&rqst->rq_bc_list); |
123 | __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | ||
132 | 124 | ||
133 | if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) | 125 | if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) |
134 | goto out_free; | 126 | goto out_free; |
@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) | |||
148 | 140 | ||
149 | buffer->rb_bc_srv_max_requests = reqs; | 141 | buffer->rb_bc_srv_max_requests = reqs; |
150 | request_module("svcrdma"); | 142 | request_module("svcrdma"); |
151 | 143 | trace_xprtrdma_cb_setup(r_xprt, reqs); | |
152 | return 0; | 144 | return 0; |
153 | 145 | ||
154 | out_free: | 146 | out_free: |
@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) | |||
196 | return maxmsg - RPCRDMA_HDRLEN_MIN; | 188 | return maxmsg - RPCRDMA_HDRLEN_MIN; |
197 | } | 189 | } |
198 | 190 | ||
199 | /** | 191 | static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) |
200 | * rpcrdma_bc_marshal_reply - Send backwards direction reply | ||
201 | * @rqst: buffer containing RPC reply data | ||
202 | * | ||
203 | * Returns zero on success. | ||
204 | */ | ||
205 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | ||
206 | { | 192 | { |
207 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 193 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
208 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 194 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | |||
226 | if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, | 212 | if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, |
227 | &rqst->rq_snd_buf, rpcrdma_noch)) | 213 | &rqst->rq_snd_buf, rpcrdma_noch)) |
228 | return -EIO; | 214 | return -EIO; |
215 | |||
216 | trace_xprtrdma_cb_reply(rqst); | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * xprt_rdma_bc_send_reply - marshal and send a backchannel reply | ||
222 | * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf | ||
223 | * | ||
224 | * Caller holds the transport's write lock. | ||
225 | * | ||
226 | * Returns: | ||
227 | * %0 if the RPC message has been sent | ||
228 | * %-ENOTCONN if the caller should reconnect and call again | ||
229 | * %-EIO if a permanent error occurred and the request was not | ||
230 | * sent. Do not try to send this message again. | ||
231 | */ | ||
232 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) | ||
233 | { | ||
234 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | ||
235 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
236 | int rc; | ||
237 | |||
238 | if (!xprt_connected(rqst->rq_xprt)) | ||
239 | goto drop_connection; | ||
240 | |||
241 | rc = rpcrdma_bc_marshal_reply(rqst); | ||
242 | if (rc < 0) | ||
243 | goto failed_marshal; | ||
244 | |||
245 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | ||
246 | goto drop_connection; | ||
229 | return 0; | 247 | return 0; |
248 | |||
249 | failed_marshal: | ||
250 | if (rc != -ENOTCONN) | ||
251 | return rc; | ||
252 | drop_connection: | ||
253 | xprt_disconnect_done(rqst->rq_xprt); | ||
254 | return -ENOTCONN; | ||
230 | } | 255 | } |
231 | 256 | ||
232 | /** | 257 | /** |
@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) | |||
262 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", | 287 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", |
263 | __func__, rqst, rpcr_to_rdmar(rqst)); | 288 | __func__, rqst, rpcr_to_rdmar(rqst)); |
264 | 289 | ||
265 | smp_mb__before_atomic(); | ||
266 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); | ||
267 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | ||
268 | smp_mb__after_atomic(); | ||
269 | |||
270 | spin_lock_bh(&xprt->bc_pa_lock); | 290 | spin_lock_bh(&xprt->bc_pa_lock); |
271 | list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); | 291 | list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); |
272 | spin_unlock_bh(&xprt->bc_pa_lock); | 292 | spin_unlock_bh(&xprt->bc_pa_lock); |
@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) | |||
274 | 294 | ||
275 | /** | 295 | /** |
276 | * rpcrdma_bc_receive_call - Handle a backward direction call | 296 | * rpcrdma_bc_receive_call - Handle a backward direction call |
277 | * @xprt: transport receiving the call | 297 | * @r_xprt: transport receiving the call |
278 | * @rep: receive buffer containing the call | 298 | * @rep: receive buffer containing the call |
279 | * | 299 | * |
280 | * Operational assumptions: | 300 | * Operational assumptions: |
@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
313 | struct rpc_rqst, rq_bc_pa_list); | 333 | struct rpc_rqst, rq_bc_pa_list); |
314 | list_del(&rqst->rq_bc_pa_list); | 334 | list_del(&rqst->rq_bc_pa_list); |
315 | spin_unlock(&xprt->bc_pa_lock); | 335 | spin_unlock(&xprt->bc_pa_lock); |
316 | dprintk("RPC: %s: using rqst %p\n", __func__, rqst); | ||
317 | 336 | ||
318 | /* Prepare rqst */ | 337 | /* Prepare rqst */ |
319 | rqst->rq_reply_bytes_recvd = 0; | 338 | rqst->rq_reply_bytes_recvd = 0; |
@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
321 | rqst->rq_xid = *p; | 340 | rqst->rq_xid = *p; |
322 | 341 | ||
323 | rqst->rq_private_buf.len = size; | 342 | rqst->rq_private_buf.len = size; |
324 | set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); | ||
325 | 343 | ||
326 | buf = &rqst->rq_rcv_buf; | 344 | buf = &rqst->rq_rcv_buf; |
327 | memset(buf, 0, sizeof(*buf)); | 345 | memset(buf, 0, sizeof(*buf)); |
@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
335 | * the Upper Layer is done decoding it. | 353 | * the Upper Layer is done decoding it. |
336 | */ | 354 | */ |
337 | req = rpcr_to_rdmar(rqst); | 355 | req = rpcr_to_rdmar(rqst); |
338 | dprintk("RPC: %s: attaching rep %p to req %p\n", | ||
339 | __func__, rep, req); | ||
340 | req->rl_reply = rep; | 356 | req->rl_reply = rep; |
341 | 357 | trace_xprtrdma_cb_call(rqst); | |
342 | /* Defeat the retransmit detection logic in send_request */ | ||
343 | req->rl_connect_cookie = 0; | ||
344 | 358 | ||
345 | /* Queue rqst for ULP's callback service */ | 359 | /* Queue rqst for ULP's callback service */ |
346 | bc_serv = xprt->bc_serv; | 360 | bc_serv = xprt->bc_serv; |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 29fc84c7ff98..d5f95bb39300 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -1,6 +1,6 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | 2 | /* |
3 | * Copyright (c) 2015 Oracle. All rights reserved. | 3 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | 4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | */ | 5 | */ |
6 | 6 | ||
@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia) | |||
47 | } | 47 | } |
48 | 48 | ||
49 | static int | 49 | static int |
50 | fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) | 50 | fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
51 | { | 51 | { |
52 | static struct ib_fmr_attr fmr_attr = { | 52 | static struct ib_fmr_attr fmr_attr = { |
53 | .max_pages = RPCRDMA_MAX_FMR_SGES, | 53 | .max_pages = RPCRDMA_MAX_FMR_SGES, |
@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) | |||
55 | .page_shift = PAGE_SHIFT | 55 | .page_shift = PAGE_SHIFT |
56 | }; | 56 | }; |
57 | 57 | ||
58 | mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, | 58 | mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, |
59 | sizeof(u64), GFP_KERNEL); | 59 | sizeof(u64), GFP_KERNEL); |
60 | if (!mw->fmr.fm_physaddrs) | 60 | if (!mr->fmr.fm_physaddrs) |
61 | goto out_free; | 61 | goto out_free; |
62 | 62 | ||
63 | mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, | 63 | mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, |
64 | sizeof(*mw->mw_sg), GFP_KERNEL); | 64 | sizeof(*mr->mr_sg), GFP_KERNEL); |
65 | if (!mw->mw_sg) | 65 | if (!mr->mr_sg) |
66 | goto out_free; | 66 | goto out_free; |
67 | 67 | ||
68 | sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); | 68 | sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES); |
69 | 69 | ||
70 | mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, | 70 | mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, |
71 | &fmr_attr); | 71 | &fmr_attr); |
72 | if (IS_ERR(mw->fmr.fm_mr)) | 72 | if (IS_ERR(mr->fmr.fm_mr)) |
73 | goto out_fmr_err; | 73 | goto out_fmr_err; |
74 | 74 | ||
75 | return 0; | 75 | return 0; |
76 | 76 | ||
77 | out_fmr_err: | 77 | out_fmr_err: |
78 | dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, | 78 | dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, |
79 | PTR_ERR(mw->fmr.fm_mr)); | 79 | PTR_ERR(mr->fmr.fm_mr)); |
80 | 80 | ||
81 | out_free: | 81 | out_free: |
82 | kfree(mw->mw_sg); | 82 | kfree(mr->mr_sg); |
83 | kfree(mw->fmr.fm_physaddrs); | 83 | kfree(mr->fmr.fm_physaddrs); |
84 | return -ENOMEM; | 84 | return -ENOMEM; |
85 | } | 85 | } |
86 | 86 | ||
87 | static int | 87 | static int |
88 | __fmr_unmap(struct rpcrdma_mw *mw) | 88 | __fmr_unmap(struct rpcrdma_mr *mr) |
89 | { | 89 | { |
90 | LIST_HEAD(l); | 90 | LIST_HEAD(l); |
91 | int rc; | 91 | int rc; |
92 | 92 | ||
93 | list_add(&mw->fmr.fm_mr->list, &l); | 93 | list_add(&mr->fmr.fm_mr->list, &l); |
94 | rc = ib_unmap_fmr(&l); | 94 | rc = ib_unmap_fmr(&l); |
95 | list_del(&mw->fmr.fm_mr->list); | 95 | list_del(&mr->fmr.fm_mr->list); |
96 | return rc; | 96 | return rc; |
97 | } | 97 | } |
98 | 98 | ||
99 | static void | 99 | static void |
100 | fmr_op_release_mr(struct rpcrdma_mw *r) | 100 | fmr_op_release_mr(struct rpcrdma_mr *mr) |
101 | { | 101 | { |
102 | LIST_HEAD(unmap_list); | 102 | LIST_HEAD(unmap_list); |
103 | int rc; | 103 | int rc; |
104 | 104 | ||
105 | /* Ensure MW is not on any rl_registered list */ | 105 | /* Ensure MW is not on any rl_registered list */ |
106 | if (!list_empty(&r->mw_list)) | 106 | if (!list_empty(&mr->mr_list)) |
107 | list_del(&r->mw_list); | 107 | list_del(&mr->mr_list); |
108 | 108 | ||
109 | kfree(r->fmr.fm_physaddrs); | 109 | kfree(mr->fmr.fm_physaddrs); |
110 | kfree(r->mw_sg); | 110 | kfree(mr->mr_sg); |
111 | 111 | ||
112 | /* In case this one was left mapped, try to unmap it | 112 | /* In case this one was left mapped, try to unmap it |
113 | * to prevent dealloc_fmr from failing with EBUSY | 113 | * to prevent dealloc_fmr from failing with EBUSY |
114 | */ | 114 | */ |
115 | rc = __fmr_unmap(r); | 115 | rc = __fmr_unmap(mr); |
116 | if (rc) | 116 | if (rc) |
117 | pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", | 117 | pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", |
118 | r, rc); | 118 | mr, rc); |
119 | 119 | ||
120 | rc = ib_dealloc_fmr(r->fmr.fm_mr); | 120 | rc = ib_dealloc_fmr(mr->fmr.fm_mr); |
121 | if (rc) | 121 | if (rc) |
122 | pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", | 122 | pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", |
123 | r, rc); | 123 | mr, rc); |
124 | 124 | ||
125 | kfree(r); | 125 | kfree(mr); |
126 | } | 126 | } |
127 | 127 | ||
128 | /* Reset of a single FMR. | 128 | /* Reset of a single FMR. |
129 | */ | 129 | */ |
130 | static void | 130 | static void |
131 | fmr_op_recover_mr(struct rpcrdma_mw *mw) | 131 | fmr_op_recover_mr(struct rpcrdma_mr *mr) |
132 | { | 132 | { |
133 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | 133 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
134 | int rc; | 134 | int rc; |
135 | 135 | ||
136 | /* ORDER: invalidate first */ | 136 | /* ORDER: invalidate first */ |
137 | rc = __fmr_unmap(mw); | 137 | rc = __fmr_unmap(mr); |
138 | |||
139 | /* ORDER: then DMA unmap */ | ||
140 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | ||
141 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
142 | if (rc) | 138 | if (rc) |
143 | goto out_release; | 139 | goto out_release; |
144 | 140 | ||
145 | rpcrdma_put_mw(r_xprt, mw); | 141 | /* ORDER: then DMA unmap */ |
142 | rpcrdma_mr_unmap_and_put(mr); | ||
143 | |||
146 | r_xprt->rx_stats.mrs_recovered++; | 144 | r_xprt->rx_stats.mrs_recovered++; |
147 | return; | 145 | return; |
148 | 146 | ||
149 | out_release: | 147 | out_release: |
150 | pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); | 148 | pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr); |
151 | r_xprt->rx_stats.mrs_orphaned++; | 149 | r_xprt->rx_stats.mrs_orphaned++; |
152 | 150 | ||
153 | spin_lock(&r_xprt->rx_buf.rb_mwlock); | 151 | trace_xprtrdma_dma_unmap(mr); |
154 | list_del(&mw->mw_all); | 152 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
155 | spin_unlock(&r_xprt->rx_buf.rb_mwlock); | 153 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
154 | |||
155 | spin_lock(&r_xprt->rx_buf.rb_mrlock); | ||
156 | list_del(&mr->mr_all); | ||
157 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); | ||
156 | 158 | ||
157 | fmr_op_release_mr(mw); | 159 | fmr_op_release_mr(mr); |
158 | } | 160 | } |
159 | 161 | ||
160 | static int | 162 | static int |
@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
180 | */ | 182 | */ |
181 | static struct rpcrdma_mr_seg * | 183 | static struct rpcrdma_mr_seg * |
182 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | 184 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
183 | int nsegs, bool writing, struct rpcrdma_mw **out) | 185 | int nsegs, bool writing, struct rpcrdma_mr **out) |
184 | { | 186 | { |
185 | struct rpcrdma_mr_seg *seg1 = seg; | 187 | struct rpcrdma_mr_seg *seg1 = seg; |
186 | int len, pageoff, i, rc; | 188 | int len, pageoff, i, rc; |
187 | struct rpcrdma_mw *mw; | 189 | struct rpcrdma_mr *mr; |
188 | u64 *dma_pages; | 190 | u64 *dma_pages; |
189 | 191 | ||
190 | mw = rpcrdma_get_mw(r_xprt); | 192 | mr = rpcrdma_mr_get(r_xprt); |
191 | if (!mw) | 193 | if (!mr) |
192 | return ERR_PTR(-ENOBUFS); | 194 | return ERR_PTR(-ENOBUFS); |
193 | 195 | ||
194 | pageoff = offset_in_page(seg1->mr_offset); | 196 | pageoff = offset_in_page(seg1->mr_offset); |
@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
199 | nsegs = RPCRDMA_MAX_FMR_SGES; | 201 | nsegs = RPCRDMA_MAX_FMR_SGES; |
200 | for (i = 0; i < nsegs;) { | 202 | for (i = 0; i < nsegs;) { |
201 | if (seg->mr_page) | 203 | if (seg->mr_page) |
202 | sg_set_page(&mw->mw_sg[i], | 204 | sg_set_page(&mr->mr_sg[i], |
203 | seg->mr_page, | 205 | seg->mr_page, |
204 | seg->mr_len, | 206 | seg->mr_len, |
205 | offset_in_page(seg->mr_offset)); | 207 | offset_in_page(seg->mr_offset)); |
206 | else | 208 | else |
207 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, | 209 | sg_set_buf(&mr->mr_sg[i], seg->mr_offset, |
208 | seg->mr_len); | 210 | seg->mr_len); |
209 | len += seg->mr_len; | 211 | len += seg->mr_len; |
210 | ++seg; | 212 | ++seg; |
@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
214 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 216 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) |
215 | break; | 217 | break; |
216 | } | 218 | } |
217 | mw->mw_dir = rpcrdma_data_dir(writing); | 219 | mr->mr_dir = rpcrdma_data_dir(writing); |
218 | 220 | ||
219 | mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, | 221 | mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, |
220 | mw->mw_sg, i, mw->mw_dir); | 222 | mr->mr_sg, i, mr->mr_dir); |
221 | if (!mw->mw_nents) | 223 | if (!mr->mr_nents) |
222 | goto out_dmamap_err; | 224 | goto out_dmamap_err; |
223 | 225 | ||
224 | for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) | 226 | for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) |
225 | dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); | 227 | dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); |
226 | rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, | 228 | rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents, |
227 | dma_pages[0]); | 229 | dma_pages[0]); |
228 | if (rc) | 230 | if (rc) |
229 | goto out_maperr; | 231 | goto out_maperr; |
230 | 232 | ||
231 | mw->mw_handle = mw->fmr.fm_mr->rkey; | 233 | mr->mr_handle = mr->fmr.fm_mr->rkey; |
232 | mw->mw_length = len; | 234 | mr->mr_length = len; |
233 | mw->mw_offset = dma_pages[0] + pageoff; | 235 | mr->mr_offset = dma_pages[0] + pageoff; |
234 | 236 | ||
235 | *out = mw; | 237 | *out = mr; |
236 | return seg; | 238 | return seg; |
237 | 239 | ||
238 | out_dmamap_err: | 240 | out_dmamap_err: |
239 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", | 241 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", |
240 | mw->mw_sg, i); | 242 | mr->mr_sg, i); |
241 | rpcrdma_put_mw(r_xprt, mw); | 243 | rpcrdma_mr_put(mr); |
242 | return ERR_PTR(-EIO); | 244 | return ERR_PTR(-EIO); |
243 | 245 | ||
244 | out_maperr: | 246 | out_maperr: |
245 | pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | 247 | pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", |
246 | len, (unsigned long long)dma_pages[0], | 248 | len, (unsigned long long)dma_pages[0], |
247 | pageoff, mw->mw_nents, rc); | 249 | pageoff, mr->mr_nents, rc); |
248 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | 250 | rpcrdma_mr_unmap_and_put(mr); |
249 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
250 | rpcrdma_put_mw(r_xprt, mw); | ||
251 | return ERR_PTR(-EIO); | 251 | return ERR_PTR(-EIO); |
252 | } | 252 | } |
253 | 253 | ||
@@ -256,13 +256,13 @@ out_maperr: | |||
256 | * Sleeps until it is safe for the host CPU to access the | 256 | * Sleeps until it is safe for the host CPU to access the |
257 | * previously mapped memory regions. | 257 | * previously mapped memory regions. |
258 | * | 258 | * |
259 | * Caller ensures that @mws is not empty before the call. This | 259 | * Caller ensures that @mrs is not empty before the call. This |
260 | * function empties the list. | 260 | * function empties the list. |
261 | */ | 261 | */ |
262 | static void | 262 | static void |
263 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | 263 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) |
264 | { | 264 | { |
265 | struct rpcrdma_mw *mw; | 265 | struct rpcrdma_mr *mr; |
266 | LIST_HEAD(unmap_list); | 266 | LIST_HEAD(unmap_list); |
267 | int rc; | 267 | int rc; |
268 | 268 | ||
@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
271 | * ib_unmap_fmr() is slow, so use a single call instead | 271 | * ib_unmap_fmr() is slow, so use a single call instead |
272 | * of one call per mapped FMR. | 272 | * of one call per mapped FMR. |
273 | */ | 273 | */ |
274 | list_for_each_entry(mw, mws, mw_list) { | 274 | list_for_each_entry(mr, mrs, mr_list) { |
275 | dprintk("RPC: %s: unmapping fmr %p\n", | 275 | dprintk("RPC: %s: unmapping fmr %p\n", |
276 | __func__, &mw->fmr); | 276 | __func__, &mr->fmr); |
277 | list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); | 277 | trace_xprtrdma_localinv(mr); |
278 | list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); | ||
278 | } | 279 | } |
279 | r_xprt->rx_stats.local_inv_needed++; | 280 | r_xprt->rx_stats.local_inv_needed++; |
280 | rc = ib_unmap_fmr(&unmap_list); | 281 | rc = ib_unmap_fmr(&unmap_list); |
@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
284 | /* ORDER: Now DMA unmap all of the req's MRs, and return | 285 | /* ORDER: Now DMA unmap all of the req's MRs, and return |
285 | * them to the free MW list. | 286 | * them to the free MW list. |
286 | */ | 287 | */ |
287 | while (!list_empty(mws)) { | 288 | while (!list_empty(mrs)) { |
288 | mw = rpcrdma_pop_mw(mws); | 289 | mr = rpcrdma_mr_pop(mrs); |
289 | dprintk("RPC: %s: DMA unmapping fmr %p\n", | 290 | list_del(&mr->fmr.fm_mr->list); |
290 | __func__, &mw->fmr); | 291 | rpcrdma_mr_unmap_and_put(mr); |
291 | list_del(&mw->fmr.fm_mr->list); | ||
292 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | ||
293 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
294 | rpcrdma_put_mw(r_xprt, mw); | ||
295 | } | 292 | } |
296 | 293 | ||
297 | return; | 294 | return; |
@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
299 | out_reset: | 296 | out_reset: |
300 | pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); | 297 | pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); |
301 | 298 | ||
302 | while (!list_empty(mws)) { | 299 | while (!list_empty(mrs)) { |
303 | mw = rpcrdma_pop_mw(mws); | 300 | mr = rpcrdma_mr_pop(mrs); |
304 | list_del(&mw->fmr.fm_mr->list); | 301 | list_del(&mr->fmr.fm_mr->list); |
305 | fmr_op_recover_mr(mw); | 302 | fmr_op_recover_mr(mr); |
306 | } | 303 | } |
307 | } | 304 | } |
308 | 305 | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 773e66e10a15..90f688f19783 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -1,11 +1,11 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | 2 | /* |
3 | * Copyright (c) 2015 Oracle. All rights reserved. | 3 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | 4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | /* Lightweight memory registration using Fast Registration Work | 7 | /* Lightweight memory registration using Fast Registration Work |
8 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | 8 | * Requests (FRWR). |
9 | * | 9 | * |
10 | * FRWR features ordered asynchronous registration and deregistration | 10 | * FRWR features ordered asynchronous registration and deregistration |
11 | * of arbitrarily sized memory regions. This is the fastest and safest | 11 | * of arbitrarily sized memory regions. This is the fastest and safest |
@@ -15,9 +15,9 @@ | |||
15 | /* Normal operation | 15 | /* Normal operation |
16 | * | 16 | * |
17 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG | 17 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG |
18 | * Work Request (frmr_op_map). When the RDMA operation is finished, this | 18 | * Work Request (frwr_op_map). When the RDMA operation is finished, this |
19 | * Memory Region is invalidated using a LOCAL_INV Work Request | 19 | * Memory Region is invalidated using a LOCAL_INV Work Request |
20 | * (frmr_op_unmap). | 20 | * (frwr_op_unmap_sync). |
21 | * | 21 | * |
22 | * Typically these Work Requests are not signaled, and neither are RDMA | 22 | * Typically these Work Requests are not signaled, and neither are RDMA |
23 | * SEND Work Requests (with the exception of signaling occasionally to | 23 | * SEND Work Requests (with the exception of signaling occasionally to |
@@ -26,7 +26,7 @@ | |||
26 | * | 26 | * |
27 | * As an optimization, frwr_op_unmap marks MRs INVALID before the | 27 | * As an optimization, frwr_op_unmap marks MRs INVALID before the |
28 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on | 28 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on |
29 | * rb_mws immediately so that no work (like managing a linked list | 29 | * rb_mrs immediately so that no work (like managing a linked list |
30 | * under a spinlock) is needed in the completion upcall. | 30 | * under a spinlock) is needed in the completion upcall. |
31 | * | 31 | * |
32 | * But this means that frwr_op_map() can occasionally encounter an MR | 32 | * But this means that frwr_op_map() can occasionally encounter an MR |
@@ -60,7 +60,7 @@ | |||
60 | * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered | 60 | * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered |
61 | * with ib_dereg_mr and then are re-initialized. Because MR recovery | 61 | * with ib_dereg_mr and then are re-initialized. Because MR recovery |
62 | * allocates fresh resources, it is deferred to a workqueue, and the | 62 | * allocates fresh resources, it is deferred to a workqueue, and the |
63 | * recovered MRs are placed back on the rb_mws list when recovery is | 63 | * recovered MRs are placed back on the rb_mrs list when recovery is |
64 | * complete. frwr_op_map allocates another MR for the current RPC while | 64 | * complete. frwr_op_map allocates another MR for the current RPC while |
65 | * the broken MR is reset. | 65 | * the broken MR is reset. |
66 | * | 66 | * |
@@ -96,26 +96,26 @@ out_not_supported: | |||
96 | } | 96 | } |
97 | 97 | ||
98 | static int | 98 | static int |
99 | frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) | 99 | frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
100 | { | 100 | { |
101 | unsigned int depth = ia->ri_max_frmr_depth; | 101 | unsigned int depth = ia->ri_max_frwr_depth; |
102 | struct rpcrdma_frmr *f = &r->frmr; | 102 | struct rpcrdma_frwr *frwr = &mr->frwr; |
103 | int rc; | 103 | int rc; |
104 | 104 | ||
105 | f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); | 105 | frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); |
106 | if (IS_ERR(f->fr_mr)) | 106 | if (IS_ERR(frwr->fr_mr)) |
107 | goto out_mr_err; | 107 | goto out_mr_err; |
108 | 108 | ||
109 | r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); | 109 | mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL); |
110 | if (!r->mw_sg) | 110 | if (!mr->mr_sg) |
111 | goto out_list_err; | 111 | goto out_list_err; |
112 | 112 | ||
113 | sg_init_table(r->mw_sg, depth); | 113 | sg_init_table(mr->mr_sg, depth); |
114 | init_completion(&f->fr_linv_done); | 114 | init_completion(&frwr->fr_linv_done); |
115 | return 0; | 115 | return 0; |
116 | 116 | ||
117 | out_mr_err: | 117 | out_mr_err: |
118 | rc = PTR_ERR(f->fr_mr); | 118 | rc = PTR_ERR(frwr->fr_mr); |
119 | dprintk("RPC: %s: ib_alloc_mr status %i\n", | 119 | dprintk("RPC: %s: ib_alloc_mr status %i\n", |
120 | __func__, rc); | 120 | __func__, rc); |
121 | return rc; | 121 | return rc; |
@@ -124,83 +124,85 @@ out_list_err: | |||
124 | rc = -ENOMEM; | 124 | rc = -ENOMEM; |
125 | dprintk("RPC: %s: sg allocation failure\n", | 125 | dprintk("RPC: %s: sg allocation failure\n", |
126 | __func__); | 126 | __func__); |
127 | ib_dereg_mr(f->fr_mr); | 127 | ib_dereg_mr(frwr->fr_mr); |
128 | return rc; | 128 | return rc; |
129 | } | 129 | } |
130 | 130 | ||
131 | static void | 131 | static void |
132 | frwr_op_release_mr(struct rpcrdma_mw *r) | 132 | frwr_op_release_mr(struct rpcrdma_mr *mr) |
133 | { | 133 | { |
134 | int rc; | 134 | int rc; |
135 | 135 | ||
136 | /* Ensure MW is not on any rl_registered list */ | 136 | /* Ensure MR is not on any rl_registered list */ |
137 | if (!list_empty(&r->mw_list)) | 137 | if (!list_empty(&mr->mr_list)) |
138 | list_del(&r->mw_list); | 138 | list_del(&mr->mr_list); |
139 | 139 | ||
140 | rc = ib_dereg_mr(r->frmr.fr_mr); | 140 | rc = ib_dereg_mr(mr->frwr.fr_mr); |
141 | if (rc) | 141 | if (rc) |
142 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", | 142 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", |
143 | r, rc); | 143 | mr, rc); |
144 | kfree(r->mw_sg); | 144 | kfree(mr->mr_sg); |
145 | kfree(r); | 145 | kfree(mr); |
146 | } | 146 | } |
147 | 147 | ||
148 | static int | 148 | static int |
149 | __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) | 149 | __frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
150 | { | 150 | { |
151 | struct rpcrdma_frmr *f = &r->frmr; | 151 | struct rpcrdma_frwr *frwr = &mr->frwr; |
152 | int rc; | 152 | int rc; |
153 | 153 | ||
154 | rc = ib_dereg_mr(f->fr_mr); | 154 | rc = ib_dereg_mr(frwr->fr_mr); |
155 | if (rc) { | 155 | if (rc) { |
156 | pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", | 156 | pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", |
157 | rc, r); | 157 | rc, mr); |
158 | return rc; | 158 | return rc; |
159 | } | 159 | } |
160 | 160 | ||
161 | f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, | 161 | frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, |
162 | ia->ri_max_frmr_depth); | 162 | ia->ri_max_frwr_depth); |
163 | if (IS_ERR(f->fr_mr)) { | 163 | if (IS_ERR(frwr->fr_mr)) { |
164 | pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", | 164 | pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", |
165 | PTR_ERR(f->fr_mr), r); | 165 | PTR_ERR(frwr->fr_mr), mr); |
166 | return PTR_ERR(f->fr_mr); | 166 | return PTR_ERR(frwr->fr_mr); |
167 | } | 167 | } |
168 | 168 | ||
169 | dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); | 169 | dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr); |
170 | f->fr_state = FRMR_IS_INVALID; | 170 | frwr->fr_state = FRWR_IS_INVALID; |
171 | return 0; | 171 | return 0; |
172 | } | 172 | } |
173 | 173 | ||
174 | /* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. | 174 | /* Reset of a single FRWR. Generate a fresh rkey by replacing the MR. |
175 | */ | 175 | */ |
176 | static void | 176 | static void |
177 | frwr_op_recover_mr(struct rpcrdma_mw *mw) | 177 | frwr_op_recover_mr(struct rpcrdma_mr *mr) |
178 | { | 178 | { |
179 | enum rpcrdma_frmr_state state = mw->frmr.fr_state; | 179 | enum rpcrdma_frwr_state state = mr->frwr.fr_state; |
180 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | 180 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
181 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 181 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
182 | int rc; | 182 | int rc; |
183 | 183 | ||
184 | rc = __frwr_reset_mr(ia, mw); | 184 | rc = __frwr_mr_reset(ia, mr); |
185 | if (state != FRMR_FLUSHED_LI) | 185 | if (state != FRWR_FLUSHED_LI) { |
186 | trace_xprtrdma_dma_unmap(mr); | ||
186 | ib_dma_unmap_sg(ia->ri_device, | 187 | ib_dma_unmap_sg(ia->ri_device, |
187 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | 188 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
189 | } | ||
188 | if (rc) | 190 | if (rc) |
189 | goto out_release; | 191 | goto out_release; |
190 | 192 | ||
191 | rpcrdma_put_mw(r_xprt, mw); | 193 | rpcrdma_mr_put(mr); |
192 | r_xprt->rx_stats.mrs_recovered++; | 194 | r_xprt->rx_stats.mrs_recovered++; |
193 | return; | 195 | return; |
194 | 196 | ||
195 | out_release: | 197 | out_release: |
196 | pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); | 198 | pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr); |
197 | r_xprt->rx_stats.mrs_orphaned++; | 199 | r_xprt->rx_stats.mrs_orphaned++; |
198 | 200 | ||
199 | spin_lock(&r_xprt->rx_buf.rb_mwlock); | 201 | spin_lock(&r_xprt->rx_buf.rb_mrlock); |
200 | list_del(&mw->mw_all); | 202 | list_del(&mr->mr_all); |
201 | spin_unlock(&r_xprt->rx_buf.rb_mwlock); | 203 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); |
202 | 204 | ||
203 | frwr_op_release_mr(mw); | 205 | frwr_op_release_mr(mr); |
204 | } | 206 | } |
205 | 207 | ||
206 | static int | 208 | static int |
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
214 | if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) | 216 | if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) |
215 | ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; | 217 | ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; |
216 | 218 | ||
217 | ia->ri_max_frmr_depth = | 219 | ia->ri_max_frwr_depth = |
218 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 220 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
219 | attrs->max_fast_reg_page_list_len); | 221 | attrs->max_fast_reg_page_list_len); |
220 | dprintk("RPC: %s: device's max FR page list len = %u\n", | 222 | dprintk("RPC: %s: device's max FR page list len = %u\n", |
221 | __func__, ia->ri_max_frmr_depth); | 223 | __func__, ia->ri_max_frwr_depth); |
222 | 224 | ||
223 | /* Add room for frmr register and invalidate WRs. | 225 | /* Add room for frwr register and invalidate WRs. |
224 | * 1. FRMR reg WR for head | 226 | * 1. FRWR reg WR for head |
225 | * 2. FRMR invalidate WR for head | 227 | * 2. FRWR invalidate WR for head |
226 | * 3. N FRMR reg WRs for pagelist | 228 | * 3. N FRWR reg WRs for pagelist |
227 | * 4. N FRMR invalidate WRs for pagelist | 229 | * 4. N FRWR invalidate WRs for pagelist |
228 | * 5. FRMR reg WR for tail | 230 | * 5. FRWR reg WR for tail |
229 | * 6. FRMR invalidate WR for tail | 231 | * 6. FRWR invalidate WR for tail |
230 | * 7. The RDMA_SEND WR | 232 | * 7. The RDMA_SEND WR |
231 | */ | 233 | */ |
232 | depth = 7; | 234 | depth = 7; |
233 | 235 | ||
234 | /* Calculate N if the device max FRMR depth is smaller than | 236 | /* Calculate N if the device max FRWR depth is smaller than |
235 | * RPCRDMA_MAX_DATA_SEGS. | 237 | * RPCRDMA_MAX_DATA_SEGS. |
236 | */ | 238 | */ |
237 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | 239 | if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) { |
238 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | 240 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth; |
239 | do { | 241 | do { |
240 | depth += 2; /* FRMR reg + invalidate */ | 242 | depth += 2; /* FRWR reg + invalidate */ |
241 | delta -= ia->ri_max_frmr_depth; | 243 | delta -= ia->ri_max_frwr_depth; |
242 | } while (delta > 0); | 244 | } while (delta > 0); |
243 | } | 245 | } |
244 | 246 | ||
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
252 | } | 254 | } |
253 | 255 | ||
254 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / | 256 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / |
255 | ia->ri_max_frmr_depth); | 257 | ia->ri_max_frwr_depth); |
256 | return 0; | 258 | return 0; |
257 | } | 259 | } |
258 | 260 | ||
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |||
265 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 267 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
266 | 268 | ||
267 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 269 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
268 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); | 270 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth); |
269 | } | 271 | } |
270 | 272 | ||
271 | static void | 273 | static void |
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) | |||
286 | static void | 288 | static void |
287 | frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) | 289 | frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) |
288 | { | 290 | { |
289 | struct rpcrdma_frmr *frmr; | 291 | struct ib_cqe *cqe = wc->wr_cqe; |
290 | struct ib_cqe *cqe; | 292 | struct rpcrdma_frwr *frwr = |
293 | container_of(cqe, struct rpcrdma_frwr, fr_cqe); | ||
291 | 294 | ||
292 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 295 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
293 | if (wc->status != IB_WC_SUCCESS) { | 296 | if (wc->status != IB_WC_SUCCESS) { |
294 | cqe = wc->wr_cqe; | 297 | frwr->fr_state = FRWR_FLUSHED_FR; |
295 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | ||
296 | frmr->fr_state = FRMR_FLUSHED_FR; | ||
297 | __frwr_sendcompletion_flush(wc, "fastreg"); | 298 | __frwr_sendcompletion_flush(wc, "fastreg"); |
298 | } | 299 | } |
300 | trace_xprtrdma_wc_fastreg(wc, frwr); | ||
299 | } | 301 | } |
300 | 302 | ||
301 | /** | 303 | /** |
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) | |||
307 | static void | 309 | static void |
308 | frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) | 310 | frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) |
309 | { | 311 | { |
310 | struct rpcrdma_frmr *frmr; | 312 | struct ib_cqe *cqe = wc->wr_cqe; |
311 | struct ib_cqe *cqe; | 313 | struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, |
314 | fr_cqe); | ||
312 | 315 | ||
313 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 316 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
314 | if (wc->status != IB_WC_SUCCESS) { | 317 | if (wc->status != IB_WC_SUCCESS) { |
315 | cqe = wc->wr_cqe; | 318 | frwr->fr_state = FRWR_FLUSHED_LI; |
316 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | ||
317 | frmr->fr_state = FRMR_FLUSHED_LI; | ||
318 | __frwr_sendcompletion_flush(wc, "localinv"); | 319 | __frwr_sendcompletion_flush(wc, "localinv"); |
319 | } | 320 | } |
321 | trace_xprtrdma_wc_li(wc, frwr); | ||
320 | } | 322 | } |
321 | 323 | ||
322 | /** | 324 | /** |
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) | |||
329 | static void | 331 | static void |
330 | frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | 332 | frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) |
331 | { | 333 | { |
332 | struct rpcrdma_frmr *frmr; | 334 | struct ib_cqe *cqe = wc->wr_cqe; |
333 | struct ib_cqe *cqe; | 335 | struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, |
336 | fr_cqe); | ||
334 | 337 | ||
335 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 338 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
336 | cqe = wc->wr_cqe; | ||
337 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | ||
338 | if (wc->status != IB_WC_SUCCESS) { | 339 | if (wc->status != IB_WC_SUCCESS) { |
339 | frmr->fr_state = FRMR_FLUSHED_LI; | 340 | frwr->fr_state = FRWR_FLUSHED_LI; |
340 | __frwr_sendcompletion_flush(wc, "localinv"); | 341 | __frwr_sendcompletion_flush(wc, "localinv"); |
341 | } | 342 | } |
342 | complete(&frmr->fr_linv_done); | 343 | complete(&frwr->fr_linv_done); |
344 | trace_xprtrdma_wc_li_wake(wc, frwr); | ||
343 | } | 345 | } |
344 | 346 | ||
345 | /* Post a REG_MR Work Request to register a memory region | 347 | /* Post a REG_MR Work Request to register a memory region |
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | |||
347 | */ | 349 | */ |
348 | static struct rpcrdma_mr_seg * | 350 | static struct rpcrdma_mr_seg * |
349 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | 351 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
350 | int nsegs, bool writing, struct rpcrdma_mw **out) | 352 | int nsegs, bool writing, struct rpcrdma_mr **out) |
351 | { | 353 | { |
352 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 354 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
353 | bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; | 355 | bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; |
354 | struct rpcrdma_mw *mw; | 356 | struct rpcrdma_frwr *frwr; |
355 | struct rpcrdma_frmr *frmr; | 357 | struct rpcrdma_mr *mr; |
356 | struct ib_mr *mr; | 358 | struct ib_mr *ibmr; |
357 | struct ib_reg_wr *reg_wr; | 359 | struct ib_reg_wr *reg_wr; |
358 | struct ib_send_wr *bad_wr; | 360 | struct ib_send_wr *bad_wr; |
359 | int rc, i, n; | 361 | int rc, i, n; |
360 | u8 key; | 362 | u8 key; |
361 | 363 | ||
362 | mw = NULL; | 364 | mr = NULL; |
363 | do { | 365 | do { |
364 | if (mw) | 366 | if (mr) |
365 | rpcrdma_defer_mr_recovery(mw); | 367 | rpcrdma_mr_defer_recovery(mr); |
366 | mw = rpcrdma_get_mw(r_xprt); | 368 | mr = rpcrdma_mr_get(r_xprt); |
367 | if (!mw) | 369 | if (!mr) |
368 | return ERR_PTR(-ENOBUFS); | 370 | return ERR_PTR(-ENOBUFS); |
369 | } while (mw->frmr.fr_state != FRMR_IS_INVALID); | 371 | } while (mr->frwr.fr_state != FRWR_IS_INVALID); |
370 | frmr = &mw->frmr; | 372 | frwr = &mr->frwr; |
371 | frmr->fr_state = FRMR_IS_VALID; | 373 | frwr->fr_state = FRWR_IS_VALID; |
372 | mr = frmr->fr_mr; | 374 | |
373 | reg_wr = &frmr->fr_regwr; | 375 | if (nsegs > ia->ri_max_frwr_depth) |
374 | 376 | nsegs = ia->ri_max_frwr_depth; | |
375 | if (nsegs > ia->ri_max_frmr_depth) | ||
376 | nsegs = ia->ri_max_frmr_depth; | ||
377 | for (i = 0; i < nsegs;) { | 377 | for (i = 0; i < nsegs;) { |
378 | if (seg->mr_page) | 378 | if (seg->mr_page) |
379 | sg_set_page(&mw->mw_sg[i], | 379 | sg_set_page(&mr->mr_sg[i], |
380 | seg->mr_page, | 380 | seg->mr_page, |
381 | seg->mr_len, | 381 | seg->mr_len, |
382 | offset_in_page(seg->mr_offset)); | 382 | offset_in_page(seg->mr_offset)); |
383 | else | 383 | else |
384 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, | 384 | sg_set_buf(&mr->mr_sg[i], seg->mr_offset, |
385 | seg->mr_len); | 385 | seg->mr_len); |
386 | 386 | ||
387 | ++seg; | 387 | ++seg; |
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
392 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 392 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) |
393 | break; | 393 | break; |
394 | } | 394 | } |
395 | mw->mw_dir = rpcrdma_data_dir(writing); | 395 | mr->mr_dir = rpcrdma_data_dir(writing); |
396 | 396 | ||
397 | mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir); | 397 | mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); |
398 | if (!mw->mw_nents) | 398 | if (!mr->mr_nents) |
399 | goto out_dmamap_err; | 399 | goto out_dmamap_err; |
400 | 400 | ||
401 | n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); | 401 | ibmr = frwr->fr_mr; |
402 | if (unlikely(n != mw->mw_nents)) | 402 | n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); |
403 | if (unlikely(n != mr->mr_nents)) | ||
403 | goto out_mapmr_err; | 404 | goto out_mapmr_err; |
404 | 405 | ||
405 | dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", | 406 | key = (u8)(ibmr->rkey & 0x000000FF); |
406 | __func__, frmr, mw->mw_nents, mr->length); | 407 | ib_update_fast_reg_key(ibmr, ++key); |
407 | |||
408 | key = (u8)(mr->rkey & 0x000000FF); | ||
409 | ib_update_fast_reg_key(mr, ++key); | ||
410 | 408 | ||
409 | reg_wr = &frwr->fr_regwr; | ||
411 | reg_wr->wr.next = NULL; | 410 | reg_wr->wr.next = NULL; |
412 | reg_wr->wr.opcode = IB_WR_REG_MR; | 411 | reg_wr->wr.opcode = IB_WR_REG_MR; |
413 | frmr->fr_cqe.done = frwr_wc_fastreg; | 412 | frwr->fr_cqe.done = frwr_wc_fastreg; |
414 | reg_wr->wr.wr_cqe = &frmr->fr_cqe; | 413 | reg_wr->wr.wr_cqe = &frwr->fr_cqe; |
415 | reg_wr->wr.num_sge = 0; | 414 | reg_wr->wr.num_sge = 0; |
416 | reg_wr->wr.send_flags = 0; | 415 | reg_wr->wr.send_flags = 0; |
417 | reg_wr->mr = mr; | 416 | reg_wr->mr = ibmr; |
418 | reg_wr->key = mr->rkey; | 417 | reg_wr->key = ibmr->rkey; |
419 | reg_wr->access = writing ? | 418 | reg_wr->access = writing ? |
420 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 419 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
421 | IB_ACCESS_REMOTE_READ; | 420 | IB_ACCESS_REMOTE_READ; |
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
424 | if (rc) | 423 | if (rc) |
425 | goto out_senderr; | 424 | goto out_senderr; |
426 | 425 | ||
427 | mw->mw_handle = mr->rkey; | 426 | mr->mr_handle = ibmr->rkey; |
428 | mw->mw_length = mr->length; | 427 | mr->mr_length = ibmr->length; |
429 | mw->mw_offset = mr->iova; | 428 | mr->mr_offset = ibmr->iova; |
430 | 429 | ||
431 | *out = mw; | 430 | *out = mr; |
432 | return seg; | 431 | return seg; |
433 | 432 | ||
434 | out_dmamap_err: | 433 | out_dmamap_err: |
435 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", | 434 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", |
436 | mw->mw_sg, i); | 435 | mr->mr_sg, i); |
437 | frmr->fr_state = FRMR_IS_INVALID; | 436 | frwr->fr_state = FRWR_IS_INVALID; |
438 | rpcrdma_put_mw(r_xprt, mw); | 437 | rpcrdma_mr_put(mr); |
439 | return ERR_PTR(-EIO); | 438 | return ERR_PTR(-EIO); |
440 | 439 | ||
441 | out_mapmr_err: | 440 | out_mapmr_err: |
442 | pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", | 441 | pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", |
443 | frmr->fr_mr, n, mw->mw_nents); | 442 | frwr->fr_mr, n, mr->mr_nents); |
444 | rpcrdma_defer_mr_recovery(mw); | 443 | rpcrdma_mr_defer_recovery(mr); |
445 | return ERR_PTR(-EIO); | 444 | return ERR_PTR(-EIO); |
446 | 445 | ||
447 | out_senderr: | 446 | out_senderr: |
448 | pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); | 447 | pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); |
449 | rpcrdma_defer_mr_recovery(mw); | 448 | rpcrdma_mr_defer_recovery(mr); |
450 | return ERR_PTR(-ENOTCONN); | 449 | return ERR_PTR(-ENOTCONN); |
451 | } | 450 | } |
452 | 451 | ||
452 | /* Handle a remotely invalidated mr on the @mrs list | ||
453 | */ | ||
454 | static void | ||
455 | frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) | ||
456 | { | ||
457 | struct rpcrdma_mr *mr; | ||
458 | |||
459 | list_for_each_entry(mr, mrs, mr_list) | ||
460 | if (mr->mr_handle == rep->rr_inv_rkey) { | ||
461 | list_del(&mr->mr_list); | ||
462 | trace_xprtrdma_remoteinv(mr); | ||
463 | mr->frwr.fr_state = FRWR_IS_INVALID; | ||
464 | rpcrdma_mr_unmap_and_put(mr); | ||
465 | break; /* only one invalidated MR per RPC */ | ||
466 | } | ||
467 | } | ||
468 | |||
453 | /* Invalidate all memory regions that were registered for "req". | 469 | /* Invalidate all memory regions that were registered for "req". |
454 | * | 470 | * |
455 | * Sleeps until it is safe for the host CPU to access the | 471 | * Sleeps until it is safe for the host CPU to access the |
456 | * previously mapped memory regions. | 472 | * previously mapped memory regions. |
457 | * | 473 | * |
458 | * Caller ensures that @mws is not empty before the call. This | 474 | * Caller ensures that @mrs is not empty before the call. This |
459 | * function empties the list. | 475 | * function empties the list. |
460 | */ | 476 | */ |
461 | static void | 477 | static void |
462 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | 478 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) |
463 | { | 479 | { |
464 | struct ib_send_wr *first, **prev, *last, *bad_wr; | 480 | struct ib_send_wr *first, **prev, *last, *bad_wr; |
465 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 481 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
466 | struct rpcrdma_frmr *f; | 482 | struct rpcrdma_frwr *frwr; |
467 | struct rpcrdma_mw *mw; | 483 | struct rpcrdma_mr *mr; |
468 | int count, rc; | 484 | int count, rc; |
469 | 485 | ||
470 | /* ORDER: Invalidate all of the MRs first | 486 | /* ORDER: Invalidate all of the MRs first |
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
472 | * Chain the LOCAL_INV Work Requests and post them with | 488 | * Chain the LOCAL_INV Work Requests and post them with |
473 | * a single ib_post_send() call. | 489 | * a single ib_post_send() call. |
474 | */ | 490 | */ |
475 | f = NULL; | 491 | frwr = NULL; |
476 | count = 0; | 492 | count = 0; |
477 | prev = &first; | 493 | prev = &first; |
478 | list_for_each_entry(mw, mws, mw_list) { | 494 | list_for_each_entry(mr, mrs, mr_list) { |
479 | mw->frmr.fr_state = FRMR_IS_INVALID; | 495 | mr->frwr.fr_state = FRWR_IS_INVALID; |
480 | 496 | ||
481 | if (mw->mw_flags & RPCRDMA_MW_F_RI) | 497 | frwr = &mr->frwr; |
482 | continue; | 498 | trace_xprtrdma_localinv(mr); |
483 | 499 | ||
484 | f = &mw->frmr; | 500 | frwr->fr_cqe.done = frwr_wc_localinv; |
485 | dprintk("RPC: %s: invalidating frmr %p\n", | 501 | last = &frwr->fr_invwr; |
486 | __func__, f); | ||
487 | |||
488 | f->fr_cqe.done = frwr_wc_localinv; | ||
489 | last = &f->fr_invwr; | ||
490 | memset(last, 0, sizeof(*last)); | 502 | memset(last, 0, sizeof(*last)); |
491 | last->wr_cqe = &f->fr_cqe; | 503 | last->wr_cqe = &frwr->fr_cqe; |
492 | last->opcode = IB_WR_LOCAL_INV; | 504 | last->opcode = IB_WR_LOCAL_INV; |
493 | last->ex.invalidate_rkey = mw->mw_handle; | 505 | last->ex.invalidate_rkey = mr->mr_handle; |
494 | count++; | 506 | count++; |
495 | 507 | ||
496 | *prev = last; | 508 | *prev = last; |
497 | prev = &last->next; | 509 | prev = &last->next; |
498 | } | 510 | } |
499 | if (!f) | 511 | if (!frwr) |
500 | goto unmap; | 512 | goto unmap; |
501 | 513 | ||
502 | /* Strong send queue ordering guarantees that when the | 514 | /* Strong send queue ordering guarantees that when the |
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
504 | * are complete. | 516 | * are complete. |
505 | */ | 517 | */ |
506 | last->send_flags = IB_SEND_SIGNALED; | 518 | last->send_flags = IB_SEND_SIGNALED; |
507 | f->fr_cqe.done = frwr_wc_localinv_wake; | 519 | frwr->fr_cqe.done = frwr_wc_localinv_wake; |
508 | reinit_completion(&f->fr_linv_done); | 520 | reinit_completion(&frwr->fr_linv_done); |
509 | 521 | ||
510 | /* Transport disconnect drains the receive CQ before it | 522 | /* Transport disconnect drains the receive CQ before it |
511 | * replaces the QP. The RPC reply handler won't call us | 523 | * replaces the QP. The RPC reply handler won't call us |
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) | |||
515 | bad_wr = NULL; | 527 | bad_wr = NULL; |
516 | rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); | 528 | rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); |
517 | if (bad_wr != first) | 529 | if (bad_wr != first) |
518 | wait_for_completion(&f->fr_linv_done); | 530 | wait_for_completion(&frwr->fr_linv_done); |
519 | if (rc) | 531 | if (rc) |
520 | goto reset_mrs; | 532 | goto reset_mrs; |
521 | 533 | ||
522 | /* ORDER: Now DMA unmap all of the MRs, and return | 534 | /* ORDER: Now DMA unmap all of the MRs, and return |
523 | * them to the free MW list. | 535 | * them to the free MR list. |
524 | */ | 536 | */ |
525 | unmap: | 537 | unmap: |
526 | while (!list_empty(mws)) { | 538 | while (!list_empty(mrs)) { |
527 | mw = rpcrdma_pop_mw(mws); | 539 | mr = rpcrdma_mr_pop(mrs); |
528 | dprintk("RPC: %s: DMA unmapping frmr %p\n", | 540 | rpcrdma_mr_unmap_and_put(mr); |
529 | __func__, &mw->frmr); | ||
530 | ib_dma_unmap_sg(ia->ri_device, | ||
531 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | ||
532 | rpcrdma_put_mw(r_xprt, mw); | ||
533 | } | 541 | } |
534 | return; | 542 | return; |
535 | 543 | ||
536 | reset_mrs: | 544 | reset_mrs: |
537 | pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); | 545 | pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); |
538 | 546 | ||
539 | /* Find and reset the MRs in the LOCAL_INV WRs that did not | 547 | /* Find and reset the MRs in the LOCAL_INV WRs that did not |
540 | * get posted. | 548 | * get posted. |
541 | */ | 549 | */ |
542 | while (bad_wr) { | 550 | while (bad_wr) { |
543 | f = container_of(bad_wr, struct rpcrdma_frmr, | 551 | frwr = container_of(bad_wr, struct rpcrdma_frwr, |
544 | fr_invwr); | 552 | fr_invwr); |
545 | mw = container_of(f, struct rpcrdma_mw, frmr); | 553 | mr = container_of(frwr, struct rpcrdma_mr, frwr); |
546 | 554 | ||
547 | __frwr_reset_mr(ia, mw); | 555 | __frwr_mr_reset(ia, mr); |
548 | 556 | ||
549 | bad_wr = bad_wr->next; | 557 | bad_wr = bad_wr->next; |
550 | } | 558 | } |
@@ -553,6 +561,7 @@ reset_mrs: | |||
553 | 561 | ||
554 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | 562 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
555 | .ro_map = frwr_op_map, | 563 | .ro_map = frwr_op_map, |
564 | .ro_reminv = frwr_op_reminv, | ||
556 | .ro_unmap_sync = frwr_op_unmap_sync, | 565 | .ro_unmap_sync = frwr_op_unmap_sync, |
557 | .ro_recover_mr = frwr_op_recover_mr, | 566 | .ro_recover_mr = frwr_op_recover_mr, |
558 | .ro_open = frwr_op_open, | 567 | .ro_open = frwr_op_open, |
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c index 560712bd9fa2..a762d192372b 100644 --- a/net/sunrpc/xprtrdma/module.c +++ b/net/sunrpc/xprtrdma/module.c | |||
@@ -1,18 +1,20 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2015 Oracle. All rights reserved. | 2 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. |
3 | */ | 3 | */ |
4 | 4 | ||
5 | /* rpcrdma.ko module initialization | 5 | /* rpcrdma.ko module initialization |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/types.h> | ||
9 | #include <linux/compiler.h> | ||
8 | #include <linux/module.h> | 10 | #include <linux/module.h> |
9 | #include <linux/init.h> | 11 | #include <linux/init.h> |
10 | #include <linux/sunrpc/svc_rdma.h> | 12 | #include <linux/sunrpc/svc_rdma.h> |
11 | #include "xprt_rdma.h" | ||
12 | 13 | ||
13 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 14 | #include <asm/swab.h> |
14 | # define RPCDBG_FACILITY RPCDBG_TRANS | 15 | |
15 | #endif | 16 | #define CREATE_TRACE_POINTS |
17 | #include "xprt_rdma.h" | ||
16 | 18 | ||
17 | MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); | 19 | MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); |
18 | MODULE_DESCRIPTION("RPC/RDMA Transport"); | 20 | MODULE_DESCRIPTION("RPC/RDMA Transport"); |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a3f2ab283aeb..162e5dd82466 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr) | |||
292 | } | 292 | } |
293 | 293 | ||
294 | static void | 294 | static void |
295 | xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) | 295 | xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) |
296 | { | 296 | { |
297 | *iptr++ = cpu_to_be32(mw->mw_handle); | 297 | *iptr++ = cpu_to_be32(mr->mr_handle); |
298 | *iptr++ = cpu_to_be32(mw->mw_length); | 298 | *iptr++ = cpu_to_be32(mr->mr_length); |
299 | xdr_encode_hyper(iptr, mw->mw_offset); | 299 | xdr_encode_hyper(iptr, mr->mr_offset); |
300 | } | 300 | } |
301 | 301 | ||
302 | static int | 302 | static int |
303 | encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) | 303 | encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr) |
304 | { | 304 | { |
305 | __be32 *p; | 305 | __be32 *p; |
306 | 306 | ||
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) | |||
308 | if (unlikely(!p)) | 308 | if (unlikely(!p)) |
309 | return -EMSGSIZE; | 309 | return -EMSGSIZE; |
310 | 310 | ||
311 | xdr_encode_rdma_segment(p, mw); | 311 | xdr_encode_rdma_segment(p, mr); |
312 | return 0; | 312 | return 0; |
313 | } | 313 | } |
314 | 314 | ||
315 | static int | 315 | static int |
316 | encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, | 316 | encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr, |
317 | u32 position) | 317 | u32 position) |
318 | { | 318 | { |
319 | __be32 *p; | 319 | __be32 *p; |
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, | |||
324 | 324 | ||
325 | *p++ = xdr_one; /* Item present */ | 325 | *p++ = xdr_one; /* Item present */ |
326 | *p++ = cpu_to_be32(position); | 326 | *p++ = cpu_to_be32(position); |
327 | xdr_encode_rdma_segment(p, mw); | 327 | xdr_encode_rdma_segment(p, mr); |
328 | return 0; | 328 | return 0; |
329 | } | 329 | } |
330 | 330 | ||
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
348 | { | 348 | { |
349 | struct xdr_stream *xdr = &req->rl_stream; | 349 | struct xdr_stream *xdr = &req->rl_stream; |
350 | struct rpcrdma_mr_seg *seg; | 350 | struct rpcrdma_mr_seg *seg; |
351 | struct rpcrdma_mw *mw; | 351 | struct rpcrdma_mr *mr; |
352 | unsigned int pos; | 352 | unsigned int pos; |
353 | int nsegs; | 353 | int nsegs; |
354 | 354 | ||
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
363 | 363 | ||
364 | do { | 364 | do { |
365 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 365 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
366 | false, &mw); | 366 | false, &mr); |
367 | if (IS_ERR(seg)) | 367 | if (IS_ERR(seg)) |
368 | return PTR_ERR(seg); | 368 | return PTR_ERR(seg); |
369 | rpcrdma_push_mw(mw, &req->rl_registered); | 369 | rpcrdma_mr_push(mr, &req->rl_registered); |
370 | 370 | ||
371 | if (encode_read_segment(xdr, mw, pos) < 0) | 371 | if (encode_read_segment(xdr, mr, pos) < 0) |
372 | return -EMSGSIZE; | 372 | return -EMSGSIZE; |
373 | 373 | ||
374 | dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", | 374 | trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs); |
375 | rqst->rq_task->tk_pid, __func__, pos, | ||
376 | mw->mw_length, (unsigned long long)mw->mw_offset, | ||
377 | mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); | ||
378 | |||
379 | r_xprt->rx_stats.read_chunk_count++; | 375 | r_xprt->rx_stats.read_chunk_count++; |
380 | nsegs -= mw->mw_nents; | 376 | nsegs -= mr->mr_nents; |
381 | } while (nsegs); | 377 | } while (nsegs); |
382 | 378 | ||
383 | return 0; | 379 | return 0; |
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
404 | { | 400 | { |
405 | struct xdr_stream *xdr = &req->rl_stream; | 401 | struct xdr_stream *xdr = &req->rl_stream; |
406 | struct rpcrdma_mr_seg *seg; | 402 | struct rpcrdma_mr_seg *seg; |
407 | struct rpcrdma_mw *mw; | 403 | struct rpcrdma_mr *mr; |
408 | int nsegs, nchunks; | 404 | int nsegs, nchunks; |
409 | __be32 *segcount; | 405 | __be32 *segcount; |
410 | 406 | ||
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
425 | nchunks = 0; | 421 | nchunks = 0; |
426 | do { | 422 | do { |
427 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 423 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
428 | true, &mw); | 424 | true, &mr); |
429 | if (IS_ERR(seg)) | 425 | if (IS_ERR(seg)) |
430 | return PTR_ERR(seg); | 426 | return PTR_ERR(seg); |
431 | rpcrdma_push_mw(mw, &req->rl_registered); | 427 | rpcrdma_mr_push(mr, &req->rl_registered); |
432 | 428 | ||
433 | if (encode_rdma_segment(xdr, mw) < 0) | 429 | if (encode_rdma_segment(xdr, mr) < 0) |
434 | return -EMSGSIZE; | 430 | return -EMSGSIZE; |
435 | 431 | ||
436 | dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", | 432 | trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs); |
437 | rqst->rq_task->tk_pid, __func__, | ||
438 | mw->mw_length, (unsigned long long)mw->mw_offset, | ||
439 | mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); | ||
440 | |||
441 | r_xprt->rx_stats.write_chunk_count++; | 433 | r_xprt->rx_stats.write_chunk_count++; |
442 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | 434 | r_xprt->rx_stats.total_rdma_request += mr->mr_length; |
443 | nchunks++; | 435 | nchunks++; |
444 | nsegs -= mw->mw_nents; | 436 | nsegs -= mr->mr_nents; |
445 | } while (nsegs); | 437 | } while (nsegs); |
446 | 438 | ||
447 | /* Update count of segments in this Write chunk */ | 439 | /* Update count of segments in this Write chunk */ |
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
468 | { | 460 | { |
469 | struct xdr_stream *xdr = &req->rl_stream; | 461 | struct xdr_stream *xdr = &req->rl_stream; |
470 | struct rpcrdma_mr_seg *seg; | 462 | struct rpcrdma_mr_seg *seg; |
471 | struct rpcrdma_mw *mw; | 463 | struct rpcrdma_mr *mr; |
472 | int nsegs, nchunks; | 464 | int nsegs, nchunks; |
473 | __be32 *segcount; | 465 | __be32 *segcount; |
474 | 466 | ||
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
487 | nchunks = 0; | 479 | nchunks = 0; |
488 | do { | 480 | do { |
489 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 481 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, |
490 | true, &mw); | 482 | true, &mr); |
491 | if (IS_ERR(seg)) | 483 | if (IS_ERR(seg)) |
492 | return PTR_ERR(seg); | 484 | return PTR_ERR(seg); |
493 | rpcrdma_push_mw(mw, &req->rl_registered); | 485 | rpcrdma_mr_push(mr, &req->rl_registered); |
494 | 486 | ||
495 | if (encode_rdma_segment(xdr, mw) < 0) | 487 | if (encode_rdma_segment(xdr, mr) < 0) |
496 | return -EMSGSIZE; | 488 | return -EMSGSIZE; |
497 | 489 | ||
498 | dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", | 490 | trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs); |
499 | rqst->rq_task->tk_pid, __func__, | ||
500 | mw->mw_length, (unsigned long long)mw->mw_offset, | ||
501 | mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); | ||
502 | |||
503 | r_xprt->rx_stats.reply_chunk_count++; | 491 | r_xprt->rx_stats.reply_chunk_count++; |
504 | r_xprt->rx_stats.total_rdma_request += seg->mr_len; | 492 | r_xprt->rx_stats.total_rdma_request += mr->mr_length; |
505 | nchunks++; | 493 | nchunks++; |
506 | nsegs -= mw->mw_nents; | 494 | nsegs -= mr->mr_nents; |
507 | } while (nsegs); | 495 | } while (nsegs); |
508 | 496 | ||
509 | /* Update count of segments in the Reply chunk */ | 497 | /* Update count of segments in the Reply chunk */ |
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) | |||
524 | struct ib_sge *sge; | 512 | struct ib_sge *sge; |
525 | unsigned int count; | 513 | unsigned int count; |
526 | 514 | ||
527 | dprintk("RPC: %s: unmapping %u sges for sc=%p\n", | ||
528 | __func__, sc->sc_unmap_count, sc); | ||
529 | |||
530 | /* The first two SGEs contain the transport header and | 515 | /* The first two SGEs contain the transport header and |
531 | * the inline buffer. These are always left mapped so | 516 | * the inline buffer. These are always left mapped so |
532 | * they can be cheaply re-used. | 517 | * they can be cheaply re-used. |
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) | |||
754 | __be32 *p; | 739 | __be32 *p; |
755 | int ret; | 740 | int ret; |
756 | 741 | ||
757 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | ||
758 | if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) | ||
759 | return rpcrdma_bc_marshal_reply(rqst); | ||
760 | #endif | ||
761 | |||
762 | rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); | 742 | rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); |
763 | xdr_init_encode(xdr, &req->rl_hdrbuf, | 743 | xdr_init_encode(xdr, &req->rl_hdrbuf, |
764 | req->rl_rdmabuf->rg_base); | 744 | req->rl_rdmabuf->rg_base); |
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) | |||
821 | rtype = rpcrdma_areadch; | 801 | rtype = rpcrdma_areadch; |
822 | } | 802 | } |
823 | 803 | ||
804 | /* If this is a retransmit, discard previously registered | ||
805 | * chunks. Very likely the connection has been replaced, | ||
806 | * so these registrations are invalid and unusable. | ||
807 | */ | ||
808 | while (unlikely(!list_empty(&req->rl_registered))) { | ||
809 | struct rpcrdma_mr *mr; | ||
810 | |||
811 | mr = rpcrdma_mr_pop(&req->rl_registered); | ||
812 | rpcrdma_mr_defer_recovery(mr); | ||
813 | } | ||
814 | |||
824 | /* This implementation supports the following combinations | 815 | /* This implementation supports the following combinations |
825 | * of chunk lists in one RPC-over-RDMA Call message: | 816 | * of chunk lists in one RPC-over-RDMA Call message: |
826 | * | 817 | * |
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) | |||
868 | if (ret) | 859 | if (ret) |
869 | goto out_err; | 860 | goto out_err; |
870 | 861 | ||
871 | dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", | 862 | trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype); |
872 | rqst->rq_task->tk_pid, __func__, | ||
873 | transfertypes[rtype], transfertypes[wtype], | ||
874 | xdr_stream_pos(xdr)); | ||
875 | 863 | ||
876 | ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), | 864 | ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), |
877 | &rqst->rq_snd_buf, rtype); | 865 | &rqst->rq_snd_buf, rtype); |
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
926 | curlen = rqst->rq_rcv_buf.head[0].iov_len; | 914 | curlen = rqst->rq_rcv_buf.head[0].iov_len; |
927 | if (curlen > copy_len) | 915 | if (curlen > copy_len) |
928 | curlen = copy_len; | 916 | curlen = copy_len; |
929 | dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", | 917 | trace_xprtrdma_fixup(rqst, copy_len, curlen); |
930 | __func__, srcp, copy_len, curlen); | ||
931 | srcp += curlen; | 918 | srcp += curlen; |
932 | copy_len -= curlen; | 919 | copy_len -= curlen; |
933 | 920 | ||
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
947 | if (curlen > pagelist_len) | 934 | if (curlen > pagelist_len) |
948 | curlen = pagelist_len; | 935 | curlen = pagelist_len; |
949 | 936 | ||
950 | dprintk("RPC: %s: page %d" | 937 | trace_xprtrdma_fixup_pg(rqst, i, srcp, |
951 | " srcp 0x%p len %d curlen %d\n", | 938 | copy_len, curlen); |
952 | __func__, i, srcp, copy_len, curlen); | ||
953 | destp = kmap_atomic(ppages[i]); | 939 | destp = kmap_atomic(ppages[i]); |
954 | memcpy(destp + page_base, srcp, curlen); | 940 | memcpy(destp + page_base, srcp, curlen); |
955 | flush_dcache_page(ppages[i]); | 941 | flush_dcache_page(ppages[i]); |
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
984 | return fixup_copy_count; | 970 | return fixup_copy_count; |
985 | } | 971 | } |
986 | 972 | ||
987 | /* Caller must guarantee @rep remains stable during this call. | ||
988 | */ | ||
989 | static void | ||
990 | rpcrdma_mark_remote_invalidation(struct list_head *mws, | ||
991 | struct rpcrdma_rep *rep) | ||
992 | { | ||
993 | struct rpcrdma_mw *mw; | ||
994 | |||
995 | if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)) | ||
996 | return; | ||
997 | |||
998 | list_for_each_entry(mw, mws, mw_list) | ||
999 | if (mw->mw_handle == rep->rr_inv_rkey) { | ||
1000 | mw->mw_flags = RPCRDMA_MW_F_RI; | ||
1001 | break; /* only one invalidated MR per RPC */ | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | /* By convention, backchannel calls arrive via rdma_msg type | 973 | /* By convention, backchannel calls arrive via rdma_msg type |
1006 | * messages, and never populate the chunk lists. This makes | 974 | * messages, and never populate the chunk lists. This makes |
1007 | * the RPC/RDMA header small and fixed in size, so it is | 975 | * the RPC/RDMA header small and fixed in size, so it is |
@@ -1058,26 +1026,19 @@ out_short: | |||
1058 | 1026 | ||
1059 | static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) | 1027 | static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) |
1060 | { | 1028 | { |
1029 | u32 handle; | ||
1030 | u64 offset; | ||
1061 | __be32 *p; | 1031 | __be32 *p; |
1062 | 1032 | ||
1063 | p = xdr_inline_decode(xdr, 4 * sizeof(*p)); | 1033 | p = xdr_inline_decode(xdr, 4 * sizeof(*p)); |
1064 | if (unlikely(!p)) | 1034 | if (unlikely(!p)) |
1065 | return -EIO; | 1035 | return -EIO; |
1066 | 1036 | ||
1067 | ifdebug(FACILITY) { | 1037 | handle = be32_to_cpup(p++); |
1068 | u64 offset; | 1038 | *length = be32_to_cpup(p++); |
1069 | u32 handle; | 1039 | xdr_decode_hyper(p, &offset); |
1070 | |||
1071 | handle = be32_to_cpup(p++); | ||
1072 | *length = be32_to_cpup(p++); | ||
1073 | xdr_decode_hyper(p, &offset); | ||
1074 | dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n", | ||
1075 | __func__, *length, (unsigned long long)offset, | ||
1076 | handle); | ||
1077 | } else { | ||
1078 | *length = be32_to_cpup(p + 1); | ||
1079 | } | ||
1080 | 1040 | ||
1041 | trace_xprtrdma_decode_seg(handle, *length, offset); | ||
1081 | return 0; | 1042 | return 0; |
1082 | } | 1043 | } |
1083 | 1044 | ||
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length) | |||
1098 | *length += seglength; | 1059 | *length += seglength; |
1099 | } | 1060 | } |
1100 | 1061 | ||
1101 | dprintk("RPC: %s: segcount=%u, %u bytes\n", | ||
1102 | __func__, be32_to_cpup(p), *length); | ||
1103 | return 0; | 1062 | return 0; |
1104 | } | 1063 | } |
1105 | 1064 | ||
@@ -1296,8 +1255,7 @@ out: | |||
1296 | * being marshaled. | 1255 | * being marshaled. |
1297 | */ | 1256 | */ |
1298 | out_badheader: | 1257 | out_badheader: |
1299 | dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", | 1258 | trace_xprtrdma_reply_hdr(rep); |
1300 | rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc)); | ||
1301 | r_xprt->rx_stats.bad_reply_count++; | 1259 | r_xprt->rx_stats.bad_reply_count++; |
1302 | status = -EIO; | 1260 | status = -EIO; |
1303 | goto out; | 1261 | goto out; |
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work) | |||
1339 | struct rpcrdma_rep *rep = | 1297 | struct rpcrdma_rep *rep = |
1340 | container_of(work, struct rpcrdma_rep, rr_work); | 1298 | container_of(work, struct rpcrdma_rep, rr_work); |
1341 | struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); | 1299 | struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); |
1300 | struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; | ||
1342 | 1301 | ||
1343 | rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); | 1302 | trace_xprtrdma_defer_cmp(rep); |
1344 | rpcrdma_release_rqst(rep->rr_rxprt, req); | 1303 | if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) |
1304 | r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered); | ||
1305 | rpcrdma_release_rqst(r_xprt, req); | ||
1345 | rpcrdma_complete_rqst(rep); | 1306 | rpcrdma_complete_rqst(rep); |
1346 | } | 1307 | } |
1347 | 1308 | ||
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
1360 | u32 credits; | 1321 | u32 credits; |
1361 | __be32 *p; | 1322 | __be32 *p; |
1362 | 1323 | ||
1363 | dprintk("RPC: %s: incoming rep %p\n", __func__, rep); | ||
1364 | |||
1365 | if (rep->rr_hdrbuf.head[0].iov_len == 0) | 1324 | if (rep->rr_hdrbuf.head[0].iov_len == 0) |
1366 | goto out_badstatus; | 1325 | goto out_badstatus; |
1367 | 1326 | ||
@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
1405 | rep->rr_rqst = rqst; | 1364 | rep->rr_rqst = rqst; |
1406 | clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); | 1365 | clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); |
1407 | 1366 | ||
1408 | dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", | 1367 | trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); |
1409 | __func__, rep, req, be32_to_cpu(rep->rr_xid)); | ||
1410 | 1368 | ||
1411 | queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); | 1369 | queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); |
1412 | return; | 1370 | return; |
@@ -1420,8 +1378,7 @@ out_badstatus: | |||
1420 | return; | 1378 | return; |
1421 | 1379 | ||
1422 | out_badversion: | 1380 | out_badversion: |
1423 | dprintk("RPC: %s: invalid version %d\n", | 1381 | trace_xprtrdma_reply_vers(rep); |
1424 | __func__, be32_to_cpu(rep->rr_vers)); | ||
1425 | goto repost; | 1382 | goto repost; |
1426 | 1383 | ||
1427 | /* The RPC transaction has already been terminated, or the header | 1384 | /* The RPC transaction has already been terminated, or the header |
@@ -1429,12 +1386,11 @@ out_badversion: | |||
1429 | */ | 1386 | */ |
1430 | out_norqst: | 1387 | out_norqst: |
1431 | spin_unlock(&xprt->recv_lock); | 1388 | spin_unlock(&xprt->recv_lock); |
1432 | dprintk("RPC: %s: no match for incoming xid 0x%08x\n", | 1389 | trace_xprtrdma_reply_rqst(rep); |
1433 | __func__, be32_to_cpu(rep->rr_xid)); | ||
1434 | goto repost; | 1390 | goto repost; |
1435 | 1391 | ||
1436 | out_shortreply: | 1392 | out_shortreply: |
1437 | dprintk("RPC: %s: short/invalid reply\n", __func__); | 1393 | trace_xprtrdma_reply_short(rep); |
1438 | 1394 | ||
1439 | /* If no pending RPC transaction was matched, post a replacement | 1395 | /* If no pending RPC transaction was matched, post a replacement |
1440 | * receive buffer before returning. | 1396 | * receive buffer before returning. |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6ee1ad8978f3..4b1ecfe979cf 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -67,8 +67,7 @@ | |||
67 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | 67 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; |
68 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | 68 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
69 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 69 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
70 | static unsigned int xprt_rdma_inline_write_padding; | 70 | unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; |
71 | unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; | ||
72 | int xprt_rdma_pad_optimize; | 71 | int xprt_rdma_pad_optimize; |
73 | 72 | ||
74 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 73 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
@@ -81,6 +80,7 @@ static unsigned int zero; | |||
81 | static unsigned int max_padding = PAGE_SIZE; | 80 | static unsigned int max_padding = PAGE_SIZE; |
82 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; | 81 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; |
83 | static unsigned int max_memreg = RPCRDMA_LAST - 1; | 82 | static unsigned int max_memreg = RPCRDMA_LAST - 1; |
83 | static unsigned int dummy; | ||
84 | 84 | ||
85 | static struct ctl_table_header *sunrpc_table_header; | 85 | static struct ctl_table_header *sunrpc_table_header; |
86 | 86 | ||
@@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = { | |||
114 | }, | 114 | }, |
115 | { | 115 | { |
116 | .procname = "rdma_inline_write_padding", | 116 | .procname = "rdma_inline_write_padding", |
117 | .data = &xprt_rdma_inline_write_padding, | 117 | .data = &dummy, |
118 | .maxlen = sizeof(unsigned int), | 118 | .maxlen = sizeof(unsigned int), |
119 | .mode = 0644, | 119 | .mode = 0644, |
120 | .proc_handler = proc_dointvec_minmax, | 120 | .proc_handler = proc_dointvec_minmax, |
@@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work) | |||
259 | 259 | ||
260 | xprt_clear_connected(xprt); | 260 | xprt_clear_connected(xprt); |
261 | 261 | ||
262 | dprintk("RPC: %s: %sconnect\n", __func__, | ||
263 | r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); | ||
264 | rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); | 262 | rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
265 | if (rc) | 263 | if (rc) |
266 | xprt_wake_pending_tasks(xprt, rc); | 264 | xprt_wake_pending_tasks(xprt, rc); |
267 | 265 | ||
268 | dprintk("RPC: %s: exit\n", __func__); | ||
269 | xprt_clear_connecting(xprt); | 266 | xprt_clear_connecting(xprt); |
270 | } | 267 | } |
271 | 268 | ||
@@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) | |||
275 | struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, | 272 | struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, |
276 | rx_xprt); | 273 | rx_xprt); |
277 | 274 | ||
278 | pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); | 275 | trace_xprtrdma_inject_dsc(r_xprt); |
279 | rdma_disconnect(r_xprt->rx_ia.ri_id); | 276 | rdma_disconnect(r_xprt->rx_ia.ri_id); |
280 | } | 277 | } |
281 | 278 | ||
@@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) | |||
295 | { | 292 | { |
296 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 293 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
297 | 294 | ||
298 | dprintk("RPC: %s: called\n", __func__); | 295 | trace_xprtrdma_destroy(r_xprt); |
299 | 296 | ||
300 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); | 297 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); |
301 | 298 | ||
@@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) | |||
306 | rpcrdma_ia_close(&r_xprt->rx_ia); | 303 | rpcrdma_ia_close(&r_xprt->rx_ia); |
307 | 304 | ||
308 | xprt_rdma_free_addresses(xprt); | 305 | xprt_rdma_free_addresses(xprt); |
309 | |||
310 | xprt_free(xprt); | 306 | xprt_free(xprt); |
311 | 307 | ||
312 | dprintk("RPC: %s: returning\n", __func__); | ||
313 | |||
314 | module_put(THIS_MODULE); | 308 | module_put(THIS_MODULE); |
315 | } | 309 | } |
316 | 310 | ||
@@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args) | |||
361 | /* | 355 | /* |
362 | * Set up RDMA-specific connect data. | 356 | * Set up RDMA-specific connect data. |
363 | */ | 357 | */ |
364 | 358 | sap = args->dstaddr; | |
365 | sap = (struct sockaddr *)&cdata.addr; | ||
366 | memcpy(sap, args->dstaddr, args->addrlen); | ||
367 | 359 | ||
368 | /* Ensure xprt->addr holds valid server TCP (not RDMA) | 360 | /* Ensure xprt->addr holds valid server TCP (not RDMA) |
369 | * address, for any side protocols which peek at it */ | 361 | * address, for any side protocols which peek at it */ |
@@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args) | |||
373 | 365 | ||
374 | if (rpc_get_port(sap)) | 366 | if (rpc_get_port(sap)) |
375 | xprt_set_bound(xprt); | 367 | xprt_set_bound(xprt); |
368 | xprt_rdma_format_addresses(xprt, sap); | ||
376 | 369 | ||
377 | cdata.max_requests = xprt->max_reqs; | 370 | cdata.max_requests = xprt->max_reqs; |
378 | 371 | ||
@@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args) | |||
387 | if (cdata.inline_rsize > cdata.rsize) | 380 | if (cdata.inline_rsize > cdata.rsize) |
388 | cdata.inline_rsize = cdata.rsize; | 381 | cdata.inline_rsize = cdata.rsize; |
389 | 382 | ||
390 | cdata.padding = xprt_rdma_inline_write_padding; | ||
391 | |||
392 | /* | 383 | /* |
393 | * Create new transport instance, which includes initialized | 384 | * Create new transport instance, which includes initialized |
394 | * o ia | 385 | * o ia |
@@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args) | |||
398 | 389 | ||
399 | new_xprt = rpcx_to_rdmax(xprt); | 390 | new_xprt = rpcx_to_rdmax(xprt); |
400 | 391 | ||
401 | rc = rpcrdma_ia_open(new_xprt, sap); | 392 | rc = rpcrdma_ia_open(new_xprt); |
402 | if (rc) | 393 | if (rc) |
403 | goto out1; | 394 | goto out1; |
404 | 395 | ||
@@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args) | |||
407 | */ | 398 | */ |
408 | new_xprt->rx_data = cdata; | 399 | new_xprt->rx_data = cdata; |
409 | new_ep = &new_xprt->rx_ep; | 400 | new_ep = &new_xprt->rx_ep; |
410 | new_ep->rep_remote_addr = cdata.addr; | ||
411 | 401 | ||
412 | rc = rpcrdma_ep_create(&new_xprt->rx_ep, | 402 | rc = rpcrdma_ep_create(&new_xprt->rx_ep, |
413 | &new_xprt->rx_ia, &new_xprt->rx_data); | 403 | &new_xprt->rx_ia, &new_xprt->rx_data); |
414 | if (rc) | 404 | if (rc) |
415 | goto out2; | 405 | goto out2; |
416 | 406 | ||
417 | /* | ||
418 | * Allocate pre-registered send and receive buffers for headers and | ||
419 | * any inline data. Also specify any padding which will be provided | ||
420 | * from a preregistered zero buffer. | ||
421 | */ | ||
422 | rc = rpcrdma_buffer_create(new_xprt); | 407 | rc = rpcrdma_buffer_create(new_xprt); |
423 | if (rc) | 408 | if (rc) |
424 | goto out3; | 409 | goto out3; |
425 | 410 | ||
426 | /* | ||
427 | * Register a callback for connection events. This is necessary because | ||
428 | * connection loss notification is async. We also catch connection loss | ||
429 | * when reaping receives. | ||
430 | */ | ||
431 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, | 411 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, |
432 | xprt_rdma_connect_worker); | 412 | xprt_rdma_connect_worker); |
433 | 413 | ||
434 | xprt_rdma_format_addresses(xprt, sap); | ||
435 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); | 414 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
436 | if (xprt->max_payload == 0) | 415 | if (xprt->max_payload == 0) |
437 | goto out4; | 416 | goto out4; |
@@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args) | |||
445 | dprintk("RPC: %s: %s:%s\n", __func__, | 424 | dprintk("RPC: %s: %s:%s\n", __func__, |
446 | xprt->address_strings[RPC_DISPLAY_ADDR], | 425 | xprt->address_strings[RPC_DISPLAY_ADDR], |
447 | xprt->address_strings[RPC_DISPLAY_PORT]); | 426 | xprt->address_strings[RPC_DISPLAY_PORT]); |
427 | trace_xprtrdma_create(new_xprt); | ||
448 | return xprt; | 428 | return xprt; |
449 | 429 | ||
450 | out4: | 430 | out4: |
451 | xprt_rdma_free_addresses(xprt); | 431 | rpcrdma_buffer_destroy(&new_xprt->rx_buf); |
452 | rc = -EINVAL; | 432 | rc = -ENODEV; |
453 | out3: | 433 | out3: |
454 | rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); | 434 | rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); |
455 | out2: | 435 | out2: |
456 | rpcrdma_ia_close(&new_xprt->rx_ia); | 436 | rpcrdma_ia_close(&new_xprt->rx_ia); |
457 | out1: | 437 | out1: |
438 | trace_xprtrdma_destroy(new_xprt); | ||
439 | xprt_rdma_free_addresses(xprt); | ||
458 | xprt_free(xprt); | 440 | xprt_free(xprt); |
459 | return ERR_PTR(rc); | 441 | return ERR_PTR(rc); |
460 | } | 442 | } |
@@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt) | |||
488 | rpcrdma_ep_disconnect(ep, ia); | 470 | rpcrdma_ep_disconnect(ep, ia); |
489 | } | 471 | } |
490 | 472 | ||
473 | /** | ||
474 | * xprt_rdma_set_port - update server port with rpcbind result | ||
475 | * @xprt: controlling RPC transport | ||
476 | * @port: new port value | ||
477 | * | ||
478 | * Transport connect status is unchanged. | ||
479 | */ | ||
491 | static void | 480 | static void |
492 | xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) | 481 | xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) |
493 | { | 482 | { |
494 | struct sockaddr_in *sap; | 483 | struct sockaddr *sap = (struct sockaddr *)&xprt->addr; |
484 | char buf[8]; | ||
495 | 485 | ||
496 | sap = (struct sockaddr_in *)&xprt->addr; | 486 | dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", |
497 | sap->sin_port = htons(port); | 487 | __func__, xprt, |
498 | sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; | 488 | xprt->address_strings[RPC_DISPLAY_ADDR], |
499 | sap->sin_port = htons(port); | 489 | xprt->address_strings[RPC_DISPLAY_PORT], |
500 | dprintk("RPC: %s: %u\n", __func__, port); | 490 | port); |
491 | |||
492 | rpc_set_port(sap, port); | ||
493 | |||
494 | kfree(xprt->address_strings[RPC_DISPLAY_PORT]); | ||
495 | snprintf(buf, sizeof(buf), "%u", port); | ||
496 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | ||
497 | |||
498 | kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); | ||
499 | snprintf(buf, sizeof(buf), "%4hx", port); | ||
500 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | ||
501 | } | 501 | } |
502 | 502 | ||
503 | /** | 503 | /** |
@@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) | |||
516 | static void | 516 | static void |
517 | xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) | 517 | xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) |
518 | { | 518 | { |
519 | dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt); | ||
520 | |||
521 | xprt_force_disconnect(xprt); | 519 | xprt_force_disconnect(xprt); |
522 | } | 520 | } |
523 | 521 | ||
@@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task) | |||
640 | 638 | ||
641 | req = rpcrdma_buffer_get(&r_xprt->rx_buf); | 639 | req = rpcrdma_buffer_get(&r_xprt->rx_buf); |
642 | if (req == NULL) | 640 | if (req == NULL) |
643 | return -ENOMEM; | 641 | goto out_get; |
644 | 642 | ||
645 | flags = RPCRDMA_DEF_GFP; | 643 | flags = RPCRDMA_DEF_GFP; |
646 | if (RPC_IS_SWAPPER(task)) | 644 | if (RPC_IS_SWAPPER(task)) |
@@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task) | |||
653 | if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) | 651 | if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) |
654 | goto out_fail; | 652 | goto out_fail; |
655 | 653 | ||
656 | dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n", | ||
657 | task->tk_pid, __func__, rqst->rq_callsize, | ||
658 | rqst->rq_rcvsize, req); | ||
659 | |||
660 | req->rl_cpu = smp_processor_id(); | 654 | req->rl_cpu = smp_processor_id(); |
661 | req->rl_connect_cookie = 0; /* our reserved value */ | 655 | req->rl_connect_cookie = 0; /* our reserved value */ |
662 | rpcrdma_set_xprtdata(rqst, req); | 656 | rpcrdma_set_xprtdata(rqst, req); |
663 | rqst->rq_buffer = req->rl_sendbuf->rg_base; | 657 | rqst->rq_buffer = req->rl_sendbuf->rg_base; |
664 | rqst->rq_rbuffer = req->rl_recvbuf->rg_base; | 658 | rqst->rq_rbuffer = req->rl_recvbuf->rg_base; |
659 | trace_xprtrdma_allocate(task, req); | ||
665 | return 0; | 660 | return 0; |
666 | 661 | ||
667 | out_fail: | 662 | out_fail: |
668 | rpcrdma_buffer_put(req); | 663 | rpcrdma_buffer_put(req); |
664 | out_get: | ||
665 | trace_xprtrdma_allocate(task, NULL); | ||
669 | return -ENOMEM; | 666 | return -ENOMEM; |
670 | } | 667 | } |
671 | 668 | ||
@@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task) | |||
682 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 679 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
683 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 680 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
684 | 681 | ||
685 | if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags)) | ||
686 | return; | ||
687 | |||
688 | dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); | ||
689 | |||
690 | if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) | 682 | if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) |
691 | rpcrdma_release_rqst(r_xprt, req); | 683 | rpcrdma_release_rqst(r_xprt, req); |
684 | trace_xprtrdma_rpc_done(task, req); | ||
692 | rpcrdma_buffer_put(req); | 685 | rpcrdma_buffer_put(req); |
693 | } | 686 | } |
694 | 687 | ||
@@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task) | |||
698 | * | 691 | * |
699 | * Caller holds the transport's write lock. | 692 | * Caller holds the transport's write lock. |
700 | * | 693 | * |
701 | * Return values: | 694 | * Returns: |
702 | * 0: The request has been sent | 695 | * %0 if the RPC message has been sent |
703 | * ENOTCONN: Caller needs to invoke connect logic then call again | 696 | * %-ENOTCONN if the caller should reconnect and call again |
704 | * ENOBUFS: Call again later to send the request | 697 | * %-ENOBUFS if the caller should call again later |
705 | * EIO: A permanent error occurred. The request was not sent, | 698 | * %-EIO if a permanent error occurred and the request was not |
706 | * and don't try it again | 699 | * sent. Do not try to send this message again. |
707 | * | ||
708 | * send_request invokes the meat of RPC RDMA. It must do the following: | ||
709 | * | ||
710 | * 1. Marshal the RPC request into an RPC RDMA request, which means | ||
711 | * putting a header in front of data, and creating IOVs for RDMA | ||
712 | * from those in the request. | ||
713 | * 2. In marshaling, detect opportunities for RDMA, and use them. | ||
714 | * 3. Post a recv message to set up asynch completion, then send | ||
715 | * the request (rpcrdma_ep_post). | ||
716 | * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). | ||
717 | */ | 700 | */ |
718 | static int | 701 | static int |
719 | xprt_rdma_send_request(struct rpc_task *task) | 702 | xprt_rdma_send_request(struct rpc_task *task) |
@@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
724 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 707 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
725 | int rc = 0; | 708 | int rc = 0; |
726 | 709 | ||
710 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | ||
711 | if (unlikely(!rqst->rq_buffer)) | ||
712 | return xprt_rdma_bc_send_reply(rqst); | ||
713 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | ||
714 | |||
727 | if (!xprt_connected(xprt)) | 715 | if (!xprt_connected(xprt)) |
728 | goto drop_connection; | 716 | goto drop_connection; |
729 | 717 | ||
730 | /* On retransmit, remove any previously registered chunks */ | ||
731 | if (unlikely(!list_empty(&req->rl_registered))) | ||
732 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, | ||
733 | &req->rl_registered); | ||
734 | |||
735 | rc = rpcrdma_marshal_req(r_xprt, rqst); | 718 | rc = rpcrdma_marshal_req(r_xprt, rqst); |
736 | if (rc < 0) | 719 | if (rc < 0) |
737 | goto failed_marshal; | 720 | goto failed_marshal; |
@@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
744 | goto drop_connection; | 727 | goto drop_connection; |
745 | req->rl_connect_cookie = xprt->connect_cookie; | 728 | req->rl_connect_cookie = xprt->connect_cookie; |
746 | 729 | ||
747 | set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); | 730 | __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); |
748 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | 731 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) |
749 | goto drop_connection; | 732 | goto drop_connection; |
750 | 733 | ||
@@ -904,8 +887,7 @@ int xprt_rdma_init(void) | |||
904 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", | 887 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", |
905 | xprt_rdma_slot_table_entries, | 888 | xprt_rdma_slot_table_entries, |
906 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); | 889 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); |
907 | dprintk("\tPadding %d\n\tMemreg %d\n", | 890 | dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy); |
908 | xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); | ||
909 | 891 | ||
910 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 892 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
911 | if (!sunrpc_table_header) | 893 | if (!sunrpc_table_header) |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8607c029c0dd..f4eb63e8e689 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -71,8 +71,8 @@ | |||
71 | /* | 71 | /* |
72 | * internal functions | 72 | * internal functions |
73 | */ | 73 | */ |
74 | static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); | 74 | static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); |
75 | static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); | 75 | static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); |
76 | static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); | 76 | static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); |
77 | 77 | ||
78 | struct workqueue_struct *rpcrdma_receive_wq __read_mostly; | 78 | struct workqueue_struct *rpcrdma_receive_wq __read_mostly; |
@@ -108,7 +108,10 @@ static void | |||
108 | rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) | 108 | rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) |
109 | { | 109 | { |
110 | struct rpcrdma_ep *ep = context; | 110 | struct rpcrdma_ep *ep = context; |
111 | struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, | ||
112 | rx_ep); | ||
111 | 113 | ||
114 | trace_xprtrdma_qp_error(r_xprt, event); | ||
112 | pr_err("rpcrdma: %s on device %s ep %p\n", | 115 | pr_err("rpcrdma: %s on device %s ep %p\n", |
113 | ib_event_msg(event->event), event->device->name, context); | 116 | ib_event_msg(event->event), event->device->name, context); |
114 | 117 | ||
@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) | |||
133 | container_of(cqe, struct rpcrdma_sendctx, sc_cqe); | 136 | container_of(cqe, struct rpcrdma_sendctx, sc_cqe); |
134 | 137 | ||
135 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 138 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
139 | trace_xprtrdma_wc_send(sc, wc); | ||
136 | if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) | 140 | if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) |
137 | pr_err("rpcrdma: Send: %s (%u/0x%x)\n", | 141 | pr_err("rpcrdma: Send: %s (%u/0x%x)\n", |
138 | ib_wc_status_msg(wc->status), | 142 | ib_wc_status_msg(wc->status), |
@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) | |||
155 | rr_cqe); | 159 | rr_cqe); |
156 | 160 | ||
157 | /* WARNING: Only wr_id and status are reliable at this point */ | 161 | /* WARNING: Only wr_id and status are reliable at this point */ |
162 | trace_xprtrdma_wc_receive(rep, wc); | ||
158 | if (wc->status != IB_WC_SUCCESS) | 163 | if (wc->status != IB_WC_SUCCESS) |
159 | goto out_fail; | 164 | goto out_fail; |
160 | 165 | ||
161 | /* status == SUCCESS means all fields in wc are trustworthy */ | 166 | /* status == SUCCESS means all fields in wc are trustworthy */ |
162 | dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", | ||
163 | __func__, rep, wc->byte_len); | ||
164 | |||
165 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); | 167 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); |
166 | rep->rr_wc_flags = wc->wc_flags; | 168 | rep->rr_wc_flags = wc->wc_flags; |
167 | rep->rr_inv_rkey = wc->ex.invalidate_rkey; | 169 | rep->rr_inv_rkey = wc->ex.invalidate_rkey; |
@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, | |||
192 | unsigned int rsize, wsize; | 194 | unsigned int rsize, wsize; |
193 | 195 | ||
194 | /* Default settings for RPC-over-RDMA Version One */ | 196 | /* Default settings for RPC-over-RDMA Version One */ |
195 | r_xprt->rx_ia.ri_reminv_expected = false; | ||
196 | r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; | 197 | r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; |
197 | rsize = RPCRDMA_V1_DEF_INLINE_SIZE; | 198 | rsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
198 | wsize = RPCRDMA_V1_DEF_INLINE_SIZE; | 199 | wsize = RPCRDMA_V1_DEF_INLINE_SIZE; |
@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, | |||
200 | if (pmsg && | 201 | if (pmsg && |
201 | pmsg->cp_magic == rpcrdma_cmp_magic && | 202 | pmsg->cp_magic == rpcrdma_cmp_magic && |
202 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { | 203 | pmsg->cp_version == RPCRDMA_CMP_VERSION) { |
203 | r_xprt->rx_ia.ri_reminv_expected = true; | ||
204 | r_xprt->rx_ia.ri_implicit_roundup = true; | 204 | r_xprt->rx_ia.ri_implicit_roundup = true; |
205 | rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); | 205 | rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); |
206 | wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); | 206 | wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); |
@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
221 | struct rpcrdma_xprt *xprt = id->context; | 221 | struct rpcrdma_xprt *xprt = id->context; |
222 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 222 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
223 | struct rpcrdma_ep *ep = &xprt->rx_ep; | 223 | struct rpcrdma_ep *ep = &xprt->rx_ep; |
224 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
225 | struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; | ||
226 | #endif | ||
227 | int connstate = 0; | 224 | int connstate = 0; |
228 | 225 | ||
226 | trace_xprtrdma_conn_upcall(xprt, event); | ||
229 | switch (event->event) { | 227 | switch (event->event) { |
230 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 228 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
231 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | 229 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
234 | break; | 232 | break; |
235 | case RDMA_CM_EVENT_ADDR_ERROR: | 233 | case RDMA_CM_EVENT_ADDR_ERROR: |
236 | ia->ri_async_rc = -EHOSTUNREACH; | 234 | ia->ri_async_rc = -EHOSTUNREACH; |
237 | dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", | ||
238 | __func__, ep); | ||
239 | complete(&ia->ri_done); | 235 | complete(&ia->ri_done); |
240 | break; | 236 | break; |
241 | case RDMA_CM_EVENT_ROUTE_ERROR: | 237 | case RDMA_CM_EVENT_ROUTE_ERROR: |
242 | ia->ri_async_rc = -ENETUNREACH; | 238 | ia->ri_async_rc = -ENETUNREACH; |
243 | dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", | ||
244 | __func__, ep); | ||
245 | complete(&ia->ri_done); | 239 | complete(&ia->ri_done); |
246 | break; | 240 | break; |
247 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | 241 | case RDMA_CM_EVENT_DEVICE_REMOVAL: |
248 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 242 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
249 | pr_info("rpcrdma: removing device %s for %pIS:%u\n", | 243 | pr_info("rpcrdma: removing device %s for %s:%s\n", |
250 | ia->ri_device->name, | 244 | ia->ri_device->name, |
251 | sap, rpc_get_port(sap)); | 245 | rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt)); |
252 | #endif | 246 | #endif |
253 | set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); | 247 | set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); |
254 | ep->rep_connected = -ENODEV; | 248 | ep->rep_connected = -ENODEV; |
@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
271 | connstate = -ENETDOWN; | 265 | connstate = -ENETDOWN; |
272 | goto connected; | 266 | goto connected; |
273 | case RDMA_CM_EVENT_REJECTED: | 267 | case RDMA_CM_EVENT_REJECTED: |
274 | dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n", | 268 | dprintk("rpcrdma: connection to %s:%s rejected: %s\n", |
275 | sap, rpc_get_port(sap), | 269 | rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), |
276 | rdma_reject_msg(id, event->status)); | 270 | rdma_reject_msg(id, event->status)); |
277 | connstate = -ECONNREFUSED; | 271 | connstate = -ECONNREFUSED; |
278 | if (event->status == IB_CM_REJ_STALE_CONN) | 272 | if (event->status == IB_CM_REJ_STALE_CONN) |
@@ -287,8 +281,9 @@ connected: | |||
287 | wake_up_all(&ep->rep_connect_wait); | 281 | wake_up_all(&ep->rep_connect_wait); |
288 | /*FALLTHROUGH*/ | 282 | /*FALLTHROUGH*/ |
289 | default: | 283 | default: |
290 | dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n", | 284 | dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n", |
291 | __func__, sap, rpc_get_port(sap), | 285 | __func__, |
286 | rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt), | ||
292 | ia->ri_device->name, ia->ri_ops->ro_displayname, | 287 | ia->ri_device->name, ia->ri_ops->ro_displayname, |
293 | ep, rdma_event_msg(event->event)); | 288 | ep, rdma_event_msg(event->event)); |
294 | break; | 289 | break; |
@@ -298,13 +293,14 @@ connected: | |||
298 | } | 293 | } |
299 | 294 | ||
300 | static struct rdma_cm_id * | 295 | static struct rdma_cm_id * |
301 | rpcrdma_create_id(struct rpcrdma_xprt *xprt, | 296 | rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) |
302 | struct rpcrdma_ia *ia, struct sockaddr *addr) | ||
303 | { | 297 | { |
304 | unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; | 298 | unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; |
305 | struct rdma_cm_id *id; | 299 | struct rdma_cm_id *id; |
306 | int rc; | 300 | int rc; |
307 | 301 | ||
302 | trace_xprtrdma_conn_start(xprt); | ||
303 | |||
308 | init_completion(&ia->ri_done); | 304 | init_completion(&ia->ri_done); |
309 | init_completion(&ia->ri_remove_done); | 305 | init_completion(&ia->ri_remove_done); |
310 | 306 | ||
@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
318 | } | 314 | } |
319 | 315 | ||
320 | ia->ri_async_rc = -ETIMEDOUT; | 316 | ia->ri_async_rc = -ETIMEDOUT; |
321 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); | 317 | rc = rdma_resolve_addr(id, NULL, |
318 | (struct sockaddr *)&xprt->rx_xprt.addr, | ||
319 | RDMA_RESOLVE_TIMEOUT); | ||
322 | if (rc) { | 320 | if (rc) { |
323 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | 321 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
324 | __func__, rc); | 322 | __func__, rc); |
@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
326 | } | 324 | } |
327 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); | 325 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
328 | if (rc < 0) { | 326 | if (rc < 0) { |
329 | dprintk("RPC: %s: wait() exited: %i\n", | 327 | trace_xprtrdma_conn_tout(xprt); |
330 | __func__, rc); | ||
331 | goto out; | 328 | goto out; |
332 | } | 329 | } |
333 | 330 | ||
@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
344 | } | 341 | } |
345 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); | 342 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
346 | if (rc < 0) { | 343 | if (rc < 0) { |
347 | dprintk("RPC: %s: wait() exited: %i\n", | 344 | trace_xprtrdma_conn_tout(xprt); |
348 | __func__, rc); | ||
349 | goto out; | 345 | goto out; |
350 | } | 346 | } |
351 | rc = ia->ri_async_rc; | 347 | rc = ia->ri_async_rc; |
@@ -365,19 +361,18 @@ out: | |||
365 | 361 | ||
366 | /** | 362 | /** |
367 | * rpcrdma_ia_open - Open and initialize an Interface Adapter. | 363 | * rpcrdma_ia_open - Open and initialize an Interface Adapter. |
368 | * @xprt: controlling transport | 364 | * @xprt: transport with IA to (re)initialize |
369 | * @addr: IP address of remote peer | ||
370 | * | 365 | * |
371 | * Returns 0 on success, negative errno if an appropriate | 366 | * Returns 0 on success, negative errno if an appropriate |
372 | * Interface Adapter could not be found and opened. | 367 | * Interface Adapter could not be found and opened. |
373 | */ | 368 | */ |
374 | int | 369 | int |
375 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) | 370 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt) |
376 | { | 371 | { |
377 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 372 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
378 | int rc; | 373 | int rc; |
379 | 374 | ||
380 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | 375 | ia->ri_id = rpcrdma_create_id(xprt, ia); |
381 | if (IS_ERR(ia->ri_id)) { | 376 | if (IS_ERR(ia->ri_id)) { |
382 | rc = PTR_ERR(ia->ri_id); | 377 | rc = PTR_ERR(ia->ri_id); |
383 | goto out_err; | 378 | goto out_err; |
@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) | |||
392 | } | 387 | } |
393 | 388 | ||
394 | switch (xprt_rdma_memreg_strategy) { | 389 | switch (xprt_rdma_memreg_strategy) { |
395 | case RPCRDMA_FRMR: | 390 | case RPCRDMA_FRWR: |
396 | if (frwr_is_supported(ia)) { | 391 | if (frwr_is_supported(ia)) { |
397 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | 392 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; |
398 | break; | 393 | break; |
@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) | |||
462 | rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); | 457 | rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); |
463 | rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); | 458 | rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); |
464 | } | 459 | } |
465 | rpcrdma_destroy_mrs(buf); | 460 | rpcrdma_mrs_destroy(buf); |
466 | 461 | ||
467 | /* Allow waiters to continue */ | 462 | /* Allow waiters to continue */ |
468 | complete(&ia->ri_remove_done); | 463 | complete(&ia->ri_remove_done); |
464 | |||
465 | trace_xprtrdma_remove(r_xprt); | ||
469 | } | 466 | } |
470 | 467 | ||
471 | /** | 468 | /** |
@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) | |||
476 | void | 473 | void |
477 | rpcrdma_ia_close(struct rpcrdma_ia *ia) | 474 | rpcrdma_ia_close(struct rpcrdma_ia *ia) |
478 | { | 475 | { |
479 | dprintk("RPC: %s: entering\n", __func__); | ||
480 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { | 476 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
481 | if (ia->ri_id->qp) | 477 | if (ia->ri_id->qp) |
482 | rdma_destroy_qp(ia->ri_id); | 478 | rdma_destroy_qp(ia->ri_id); |
@@ -630,9 +626,6 @@ out1: | |||
630 | void | 626 | void |
631 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 627 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
632 | { | 628 | { |
633 | dprintk("RPC: %s: entering, connected is %d\n", | ||
634 | __func__, ep->rep_connected); | ||
635 | |||
636 | cancel_delayed_work_sync(&ep->rep_connect_worker); | 629 | cancel_delayed_work_sync(&ep->rep_connect_worker); |
637 | 630 | ||
638 | if (ia->ri_id->qp) { | 631 | if (ia->ri_id->qp) { |
@@ -653,13 +646,12 @@ static int | |||
653 | rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, | 646 | rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, |
654 | struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 647 | struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
655 | { | 648 | { |
656 | struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr; | ||
657 | int rc, err; | 649 | int rc, err; |
658 | 650 | ||
659 | pr_info("%s: r_xprt = %p\n", __func__, r_xprt); | 651 | trace_xprtrdma_reinsert(r_xprt); |
660 | 652 | ||
661 | rc = -EHOSTUNREACH; | 653 | rc = -EHOSTUNREACH; |
662 | if (rpcrdma_ia_open(r_xprt, sap)) | 654 | if (rpcrdma_ia_open(r_xprt)) |
663 | goto out1; | 655 | goto out1; |
664 | 656 | ||
665 | rc = -ENOMEM; | 657 | rc = -ENOMEM; |
@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, | |||
676 | goto out3; | 668 | goto out3; |
677 | } | 669 | } |
678 | 670 | ||
679 | rpcrdma_create_mrs(r_xprt); | 671 | rpcrdma_mrs_create(r_xprt); |
680 | return 0; | 672 | return 0; |
681 | 673 | ||
682 | out3: | 674 | out3: |
@@ -691,16 +683,15 @@ static int | |||
691 | rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, | 683 | rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, |
692 | struct rpcrdma_ia *ia) | 684 | struct rpcrdma_ia *ia) |
693 | { | 685 | { |
694 | struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr; | ||
695 | struct rdma_cm_id *id, *old; | 686 | struct rdma_cm_id *id, *old; |
696 | int err, rc; | 687 | int err, rc; |
697 | 688 | ||
698 | dprintk("RPC: %s: reconnecting...\n", __func__); | 689 | trace_xprtrdma_reconnect(r_xprt); |
699 | 690 | ||
700 | rpcrdma_ep_disconnect(ep, ia); | 691 | rpcrdma_ep_disconnect(ep, ia); |
701 | 692 | ||
702 | rc = -EHOSTUNREACH; | 693 | rc = -EHOSTUNREACH; |
703 | id = rpcrdma_create_id(r_xprt, ia, sap); | 694 | id = rpcrdma_create_id(r_xprt, ia); |
704 | if (IS_ERR(id)) | 695 | if (IS_ERR(id)) |
705 | goto out; | 696 | goto out; |
706 | 697 | ||
@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
817 | int rc; | 808 | int rc; |
818 | 809 | ||
819 | rc = rdma_disconnect(ia->ri_id); | 810 | rc = rdma_disconnect(ia->ri_id); |
820 | if (!rc) { | 811 | if (!rc) |
821 | /* returns without wait if not connected */ | 812 | /* returns without wait if not connected */ |
822 | wait_event_interruptible(ep->rep_connect_wait, | 813 | wait_event_interruptible(ep->rep_connect_wait, |
823 | ep->rep_connected != 1); | 814 | ep->rep_connected != 1); |
824 | dprintk("RPC: %s: after wait, %sconnected\n", __func__, | 815 | else |
825 | (ep->rep_connected == 1) ? "still " : "dis"); | ||
826 | } else { | ||
827 | dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); | ||
828 | ep->rep_connected = rc; | 816 | ep->rep_connected = rc; |
829 | } | 817 | trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, |
818 | rx_ep), rc); | ||
830 | 819 | ||
831 | ib_drain_qp(ia->ri_id->qp); | 820 | ib_drain_qp(ia->ri_id->qp); |
832 | } | 821 | } |
@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) | |||
998 | { | 987 | { |
999 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, | 988 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, |
1000 | rb_recovery_worker.work); | 989 | rb_recovery_worker.work); |
1001 | struct rpcrdma_mw *mw; | 990 | struct rpcrdma_mr *mr; |
1002 | 991 | ||
1003 | spin_lock(&buf->rb_recovery_lock); | 992 | spin_lock(&buf->rb_recovery_lock); |
1004 | while (!list_empty(&buf->rb_stale_mrs)) { | 993 | while (!list_empty(&buf->rb_stale_mrs)) { |
1005 | mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); | 994 | mr = rpcrdma_mr_pop(&buf->rb_stale_mrs); |
1006 | spin_unlock(&buf->rb_recovery_lock); | 995 | spin_unlock(&buf->rb_recovery_lock); |
1007 | 996 | ||
1008 | dprintk("RPC: %s: recovering MR %p\n", __func__, mw); | 997 | trace_xprtrdma_recover_mr(mr); |
1009 | mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw); | 998 | mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr); |
1010 | 999 | ||
1011 | spin_lock(&buf->rb_recovery_lock); | 1000 | spin_lock(&buf->rb_recovery_lock); |
1012 | } | 1001 | } |
@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) | |||
1014 | } | 1003 | } |
1015 | 1004 | ||
1016 | void | 1005 | void |
1017 | rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) | 1006 | rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr) |
1018 | { | 1007 | { |
1019 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; | 1008 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
1020 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1009 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1021 | 1010 | ||
1022 | spin_lock(&buf->rb_recovery_lock); | 1011 | spin_lock(&buf->rb_recovery_lock); |
1023 | rpcrdma_push_mw(mw, &buf->rb_stale_mrs); | 1012 | rpcrdma_mr_push(mr, &buf->rb_stale_mrs); |
1024 | spin_unlock(&buf->rb_recovery_lock); | 1013 | spin_unlock(&buf->rb_recovery_lock); |
1025 | 1014 | ||
1026 | schedule_delayed_work(&buf->rb_recovery_worker, 0); | 1015 | schedule_delayed_work(&buf->rb_recovery_worker, 0); |
1027 | } | 1016 | } |
1028 | 1017 | ||
1029 | static void | 1018 | static void |
1030 | rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) | 1019 | rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) |
1031 | { | 1020 | { |
1032 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1021 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1033 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1022 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) | |||
1036 | LIST_HEAD(all); | 1025 | LIST_HEAD(all); |
1037 | 1026 | ||
1038 | for (count = 0; count < 32; count++) { | 1027 | for (count = 0; count < 32; count++) { |
1039 | struct rpcrdma_mw *mw; | 1028 | struct rpcrdma_mr *mr; |
1040 | int rc; | 1029 | int rc; |
1041 | 1030 | ||
1042 | mw = kzalloc(sizeof(*mw), GFP_KERNEL); | 1031 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
1043 | if (!mw) | 1032 | if (!mr) |
1044 | break; | 1033 | break; |
1045 | 1034 | ||
1046 | rc = ia->ri_ops->ro_init_mr(ia, mw); | 1035 | rc = ia->ri_ops->ro_init_mr(ia, mr); |
1047 | if (rc) { | 1036 | if (rc) { |
1048 | kfree(mw); | 1037 | kfree(mr); |
1049 | break; | 1038 | break; |
1050 | } | 1039 | } |
1051 | 1040 | ||
1052 | mw->mw_xprt = r_xprt; | 1041 | mr->mr_xprt = r_xprt; |
1053 | 1042 | ||
1054 | list_add(&mw->mw_list, &free); | 1043 | list_add(&mr->mr_list, &free); |
1055 | list_add(&mw->mw_all, &all); | 1044 | list_add(&mr->mr_all, &all); |
1056 | } | 1045 | } |
1057 | 1046 | ||
1058 | spin_lock(&buf->rb_mwlock); | 1047 | spin_lock(&buf->rb_mrlock); |
1059 | list_splice(&free, &buf->rb_mws); | 1048 | list_splice(&free, &buf->rb_mrs); |
1060 | list_splice(&all, &buf->rb_all); | 1049 | list_splice(&all, &buf->rb_all); |
1061 | r_xprt->rx_stats.mrs_allocated += count; | 1050 | r_xprt->rx_stats.mrs_allocated += count; |
1062 | spin_unlock(&buf->rb_mwlock); | 1051 | spin_unlock(&buf->rb_mrlock); |
1063 | 1052 | ||
1064 | dprintk("RPC: %s: created %u MRs\n", __func__, count); | 1053 | trace_xprtrdma_createmrs(r_xprt, count); |
1065 | } | 1054 | } |
1066 | 1055 | ||
1067 | static void | 1056 | static void |
@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work) | |||
1072 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, | 1061 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
1073 | rx_buf); | 1062 | rx_buf); |
1074 | 1063 | ||
1075 | rpcrdma_create_mrs(r_xprt); | 1064 | rpcrdma_mrs_create(r_xprt); |
1076 | } | 1065 | } |
1077 | 1066 | ||
1078 | struct rpcrdma_req * | 1067 | struct rpcrdma_req * |
@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) | |||
1093 | return req; | 1082 | return req; |
1094 | } | 1083 | } |
1095 | 1084 | ||
1096 | struct rpcrdma_rep * | 1085 | /** |
1086 | * rpcrdma_create_rep - Allocate an rpcrdma_rep object | ||
1087 | * @r_xprt: controlling transport | ||
1088 | * | ||
1089 | * Returns 0 on success or a negative errno on failure. | ||
1090 | */ | ||
1091 | int | ||
1097 | rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) | 1092 | rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) |
1098 | { | 1093 | { |
1099 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | 1094 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
1095 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1100 | struct rpcrdma_rep *rep; | 1096 | struct rpcrdma_rep *rep; |
1101 | int rc; | 1097 | int rc; |
1102 | 1098 | ||
@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) | |||
1121 | rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; | 1117 | rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; |
1122 | rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; | 1118 | rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; |
1123 | rep->rr_recv_wr.num_sge = 1; | 1119 | rep->rr_recv_wr.num_sge = 1; |
1124 | return rep; | 1120 | |
1121 | spin_lock(&buf->rb_lock); | ||
1122 | list_add(&rep->rr_list, &buf->rb_recv_bufs); | ||
1123 | spin_unlock(&buf->rb_lock); | ||
1124 | return 0; | ||
1125 | 1125 | ||
1126 | out_free: | 1126 | out_free: |
1127 | kfree(rep); | 1127 | kfree(rep); |
1128 | out: | 1128 | out: |
1129 | return ERR_PTR(rc); | 1129 | dprintk("RPC: %s: reply buffer %d alloc failed\n", |
1130 | __func__, rc); | ||
1131 | return rc; | ||
1130 | } | 1132 | } |
1131 | 1133 | ||
1132 | int | 1134 | int |
@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1137 | 1139 | ||
1138 | buf->rb_max_requests = r_xprt->rx_data.max_requests; | 1140 | buf->rb_max_requests = r_xprt->rx_data.max_requests; |
1139 | buf->rb_bc_srv_max_requests = 0; | 1141 | buf->rb_bc_srv_max_requests = 0; |
1140 | spin_lock_init(&buf->rb_mwlock); | 1142 | spin_lock_init(&buf->rb_mrlock); |
1141 | spin_lock_init(&buf->rb_lock); | 1143 | spin_lock_init(&buf->rb_lock); |
1142 | spin_lock_init(&buf->rb_recovery_lock); | 1144 | spin_lock_init(&buf->rb_recovery_lock); |
1143 | INIT_LIST_HEAD(&buf->rb_mws); | 1145 | INIT_LIST_HEAD(&buf->rb_mrs); |
1144 | INIT_LIST_HEAD(&buf->rb_all); | 1146 | INIT_LIST_HEAD(&buf->rb_all); |
1145 | INIT_LIST_HEAD(&buf->rb_stale_mrs); | 1147 | INIT_LIST_HEAD(&buf->rb_stale_mrs); |
1146 | INIT_DELAYED_WORK(&buf->rb_refresh_worker, | 1148 | INIT_DELAYED_WORK(&buf->rb_refresh_worker, |
@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1148 | INIT_DELAYED_WORK(&buf->rb_recovery_worker, | 1150 | INIT_DELAYED_WORK(&buf->rb_recovery_worker, |
1149 | rpcrdma_mr_recovery_worker); | 1151 | rpcrdma_mr_recovery_worker); |
1150 | 1152 | ||
1151 | rpcrdma_create_mrs(r_xprt); | 1153 | rpcrdma_mrs_create(r_xprt); |
1152 | 1154 | ||
1153 | INIT_LIST_HEAD(&buf->rb_send_bufs); | 1155 | INIT_LIST_HEAD(&buf->rb_send_bufs); |
1154 | INIT_LIST_HEAD(&buf->rb_allreqs); | 1156 | INIT_LIST_HEAD(&buf->rb_allreqs); |
@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1167 | } | 1169 | } |
1168 | 1170 | ||
1169 | INIT_LIST_HEAD(&buf->rb_recv_bufs); | 1171 | INIT_LIST_HEAD(&buf->rb_recv_bufs); |
1170 | for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { | 1172 | for (i = 0; i <= buf->rb_max_requests; i++) { |
1171 | struct rpcrdma_rep *rep; | 1173 | rc = rpcrdma_create_rep(r_xprt); |
1172 | 1174 | if (rc) | |
1173 | rep = rpcrdma_create_rep(r_xprt); | ||
1174 | if (IS_ERR(rep)) { | ||
1175 | dprintk("RPC: %s: reply buffer %d alloc failed\n", | ||
1176 | __func__, i); | ||
1177 | rc = PTR_ERR(rep); | ||
1178 | goto out; | 1175 | goto out; |
1179 | } | ||
1180 | list_add(&rep->rr_list, &buf->rb_recv_bufs); | ||
1181 | } | 1176 | } |
1182 | 1177 | ||
1183 | rc = rpcrdma_sendctxs_create(r_xprt); | 1178 | rc = rpcrdma_sendctxs_create(r_xprt); |
@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req) | |||
1229 | } | 1224 | } |
1230 | 1225 | ||
1231 | static void | 1226 | static void |
1232 | rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) | 1227 | rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) |
1233 | { | 1228 | { |
1234 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, | 1229 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
1235 | rx_buf); | 1230 | rx_buf); |
1236 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1231 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
1237 | struct rpcrdma_mw *mw; | 1232 | struct rpcrdma_mr *mr; |
1238 | unsigned int count; | 1233 | unsigned int count; |
1239 | 1234 | ||
1240 | count = 0; | 1235 | count = 0; |
1241 | spin_lock(&buf->rb_mwlock); | 1236 | spin_lock(&buf->rb_mrlock); |
1242 | while (!list_empty(&buf->rb_all)) { | 1237 | while (!list_empty(&buf->rb_all)) { |
1243 | mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | 1238 | mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all); |
1244 | list_del(&mw->mw_all); | 1239 | list_del(&mr->mr_all); |
1245 | 1240 | ||
1246 | spin_unlock(&buf->rb_mwlock); | 1241 | spin_unlock(&buf->rb_mrlock); |
1247 | ia->ri_ops->ro_release_mr(mw); | 1242 | ia->ri_ops->ro_release_mr(mr); |
1248 | count++; | 1243 | count++; |
1249 | spin_lock(&buf->rb_mwlock); | 1244 | spin_lock(&buf->rb_mrlock); |
1250 | } | 1245 | } |
1251 | spin_unlock(&buf->rb_mwlock); | 1246 | spin_unlock(&buf->rb_mrlock); |
1252 | r_xprt->rx_stats.mrs_allocated = 0; | 1247 | r_xprt->rx_stats.mrs_allocated = 0; |
1253 | 1248 | ||
1254 | dprintk("RPC: %s: released %u MRs\n", __func__, count); | 1249 | dprintk("RPC: %s: released %u MRs\n", __func__, count); |
@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1285 | spin_unlock(&buf->rb_reqslock); | 1280 | spin_unlock(&buf->rb_reqslock); |
1286 | buf->rb_recv_count = 0; | 1281 | buf->rb_recv_count = 0; |
1287 | 1282 | ||
1288 | rpcrdma_destroy_mrs(buf); | 1283 | rpcrdma_mrs_destroy(buf); |
1289 | } | 1284 | } |
1290 | 1285 | ||
1291 | struct rpcrdma_mw * | 1286 | /** |
1292 | rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) | 1287 | * rpcrdma_mr_get - Allocate an rpcrdma_mr object |
1288 | * @r_xprt: controlling transport | ||
1289 | * | ||
1290 | * Returns an initialized rpcrdma_mr or NULL if no free | ||
1291 | * rpcrdma_mr objects are available. | ||
1292 | */ | ||
1293 | struct rpcrdma_mr * | ||
1294 | rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) | ||
1293 | { | 1295 | { |
1294 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1296 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1295 | struct rpcrdma_mw *mw = NULL; | 1297 | struct rpcrdma_mr *mr = NULL; |
1296 | 1298 | ||
1297 | spin_lock(&buf->rb_mwlock); | 1299 | spin_lock(&buf->rb_mrlock); |
1298 | if (!list_empty(&buf->rb_mws)) | 1300 | if (!list_empty(&buf->rb_mrs)) |
1299 | mw = rpcrdma_pop_mw(&buf->rb_mws); | 1301 | mr = rpcrdma_mr_pop(&buf->rb_mrs); |
1300 | spin_unlock(&buf->rb_mwlock); | 1302 | spin_unlock(&buf->rb_mrlock); |
1301 | 1303 | ||
1302 | if (!mw) | 1304 | if (!mr) |
1303 | goto out_nomws; | 1305 | goto out_nomrs; |
1304 | mw->mw_flags = 0; | 1306 | return mr; |
1305 | return mw; | ||
1306 | 1307 | ||
1307 | out_nomws: | 1308 | out_nomrs: |
1308 | dprintk("RPC: %s: no MWs available\n", __func__); | 1309 | trace_xprtrdma_nomrs(r_xprt); |
1309 | if (r_xprt->rx_ep.rep_connected != -ENODEV) | 1310 | if (r_xprt->rx_ep.rep_connected != -ENODEV) |
1310 | schedule_delayed_work(&buf->rb_refresh_worker, 0); | 1311 | schedule_delayed_work(&buf->rb_refresh_worker, 0); |
1311 | 1312 | ||
@@ -1315,14 +1316,39 @@ out_nomws: | |||
1315 | return NULL; | 1316 | return NULL; |
1316 | } | 1317 | } |
1317 | 1318 | ||
1319 | static void | ||
1320 | __rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr) | ||
1321 | { | ||
1322 | spin_lock(&buf->rb_mrlock); | ||
1323 | rpcrdma_mr_push(mr, &buf->rb_mrs); | ||
1324 | spin_unlock(&buf->rb_mrlock); | ||
1325 | } | ||
1326 | |||
1327 | /** | ||
1328 | * rpcrdma_mr_put - Release an rpcrdma_mr object | ||
1329 | * @mr: object to release | ||
1330 | * | ||
1331 | */ | ||
1318 | void | 1332 | void |
1319 | rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) | 1333 | rpcrdma_mr_put(struct rpcrdma_mr *mr) |
1320 | { | 1334 | { |
1321 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1335 | __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr); |
1336 | } | ||
1337 | |||
1338 | /** | ||
1339 | * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it | ||
1340 | * @mr: object to release | ||
1341 | * | ||
1342 | */ | ||
1343 | void | ||
1344 | rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) | ||
1345 | { | ||
1346 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | ||
1322 | 1347 | ||
1323 | spin_lock(&buf->rb_mwlock); | 1348 | trace_xprtrdma_dma_unmap(mr); |
1324 | rpcrdma_push_mw(mw, &buf->rb_mws); | 1349 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
1325 | spin_unlock(&buf->rb_mwlock); | 1350 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
1351 | __rpcrdma_mr_put(&r_xprt->rx_buf, mr); | ||
1326 | } | 1352 | } |
1327 | 1353 | ||
1328 | static struct rpcrdma_rep * | 1354 | static struct rpcrdma_rep * |
@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1359 | req = rpcrdma_buffer_get_req_locked(buffers); | 1385 | req = rpcrdma_buffer_get_req_locked(buffers); |
1360 | req->rl_reply = rpcrdma_buffer_get_rep(buffers); | 1386 | req->rl_reply = rpcrdma_buffer_get_rep(buffers); |
1361 | spin_unlock(&buffers->rb_lock); | 1387 | spin_unlock(&buffers->rb_lock); |
1388 | |||
1362 | return req; | 1389 | return req; |
1363 | 1390 | ||
1364 | out_reqbuf: | 1391 | out_reqbuf: |
1365 | spin_unlock(&buffers->rb_lock); | 1392 | spin_unlock(&buffers->rb_lock); |
1366 | pr_warn("RPC: %s: out of request buffers\n", __func__); | ||
1367 | return NULL; | 1393 | return NULL; |
1368 | } | 1394 | } |
1369 | 1395 | ||
@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
1519 | req->rl_reply = NULL; | 1545 | req->rl_reply = NULL; |
1520 | } | 1546 | } |
1521 | 1547 | ||
1522 | dprintk("RPC: %s: posting %d s/g entries\n", | ||
1523 | __func__, send_wr->num_sge); | ||
1524 | |||
1525 | if (!ep->rep_send_count || | 1548 | if (!ep->rep_send_count || |
1526 | test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { | 1549 | test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { |
1527 | send_wr->send_flags |= IB_SEND_SIGNALED; | 1550 | send_wr->send_flags |= IB_SEND_SIGNALED; |
@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
1530 | send_wr->send_flags &= ~IB_SEND_SIGNALED; | 1553 | send_wr->send_flags &= ~IB_SEND_SIGNALED; |
1531 | --ep->rep_send_count; | 1554 | --ep->rep_send_count; |
1532 | } | 1555 | } |
1556 | |||
1533 | rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); | 1557 | rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); |
1558 | trace_xprtrdma_post_send(req, rc); | ||
1534 | if (rc) | 1559 | if (rc) |
1535 | goto out_postsend_err; | 1560 | return -ENOTCONN; |
1536 | return 0; | 1561 | return 0; |
1537 | |||
1538 | out_postsend_err: | ||
1539 | pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc); | ||
1540 | return -ENOTCONN; | ||
1541 | } | 1562 | } |
1542 | 1563 | ||
1543 | int | 1564 | int |
@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
1550 | if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) | 1571 | if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) |
1551 | goto out_map; | 1572 | goto out_map; |
1552 | rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); | 1573 | rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); |
1574 | trace_xprtrdma_post_recv(rep, rc); | ||
1553 | if (rc) | 1575 | if (rc) |
1554 | goto out_postrecv; | 1576 | return -ENOTCONN; |
1555 | return 0; | 1577 | return 0; |
1556 | 1578 | ||
1557 | out_map: | 1579 | out_map: |
1558 | pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); | 1580 | pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); |
1559 | return -EIO; | 1581 | return -EIO; |
1560 | |||
1561 | out_postrecv: | ||
1562 | pr_err("rpcrdma: ib_post_recv returned %i\n", rc); | ||
1563 | return -ENOTCONN; | ||
1564 | } | 1582 | } |
1565 | 1583 | ||
1566 | /** | 1584 | /** |
1567 | * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests | 1585 | * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests |
1568 | * @r_xprt: transport associated with these backchannel resources | 1586 | * @r_xprt: transport associated with these backchannel resources |
1569 | * @min_reqs: minimum number of incoming requests expected | 1587 | * @count: minimum number of incoming requests expected |
1570 | * | 1588 | * |
1571 | * Returns zero if all requested buffers were posted, or a negative errno. | 1589 | * Returns zero if all requested buffers were posted, or a negative errno. |
1572 | */ | 1590 | */ |
@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) | |||
1594 | 1612 | ||
1595 | out_reqbuf: | 1613 | out_reqbuf: |
1596 | spin_unlock(&buffers->rb_lock); | 1614 | spin_unlock(&buffers->rb_lock); |
1597 | pr_warn("%s: no extra receive buffers\n", __func__); | 1615 | trace_xprtrdma_noreps(r_xprt); |
1598 | return -ENOMEM; | 1616 | return -ENOMEM; |
1599 | 1617 | ||
1600 | out_rc: | 1618 | out_rc: |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 1342f743f1c4..69883a960a3f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -73,11 +73,10 @@ struct rpcrdma_ia { | |||
73 | struct completion ri_remove_done; | 73 | struct completion ri_remove_done; |
74 | int ri_async_rc; | 74 | int ri_async_rc; |
75 | unsigned int ri_max_segs; | 75 | unsigned int ri_max_segs; |
76 | unsigned int ri_max_frmr_depth; | 76 | unsigned int ri_max_frwr_depth; |
77 | unsigned int ri_max_inline_write; | 77 | unsigned int ri_max_inline_write; |
78 | unsigned int ri_max_inline_read; | 78 | unsigned int ri_max_inline_read; |
79 | unsigned int ri_max_send_sges; | 79 | unsigned int ri_max_send_sges; |
80 | bool ri_reminv_expected; | ||
81 | bool ri_implicit_roundup; | 80 | bool ri_implicit_roundup; |
82 | enum ib_mr_type ri_mrtype; | 81 | enum ib_mr_type ri_mrtype; |
83 | unsigned long ri_flags; | 82 | unsigned long ri_flags; |
@@ -101,7 +100,6 @@ struct rpcrdma_ep { | |||
101 | wait_queue_head_t rep_connect_wait; | 100 | wait_queue_head_t rep_connect_wait; |
102 | struct rpcrdma_connect_private rep_cm_private; | 101 | struct rpcrdma_connect_private rep_cm_private; |
103 | struct rdma_conn_param rep_remote_cma; | 102 | struct rdma_conn_param rep_remote_cma; |
104 | struct sockaddr_storage rep_remote_addr; | ||
105 | struct delayed_work rep_connect_worker; | 103 | struct delayed_work rep_connect_worker; |
106 | }; | 104 | }; |
107 | 105 | ||
@@ -232,29 +230,29 @@ enum { | |||
232 | }; | 230 | }; |
233 | 231 | ||
234 | /* | 232 | /* |
235 | * struct rpcrdma_mw - external memory region metadata | 233 | * struct rpcrdma_mr - external memory region metadata |
236 | * | 234 | * |
237 | * An external memory region is any buffer or page that is registered | 235 | * An external memory region is any buffer or page that is registered |
238 | * on the fly (ie, not pre-registered). | 236 | * on the fly (ie, not pre-registered). |
239 | * | 237 | * |
240 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During | 238 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During |
241 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in | 239 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in |
242 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep | 240 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep |
243 | * track of registration metadata while each RPC is pending. | 241 | * track of registration metadata while each RPC is pending. |
244 | * rpcrdma_deregister_external() uses this metadata to unmap and | 242 | * rpcrdma_deregister_external() uses this metadata to unmap and |
245 | * release these resources when an RPC is complete. | 243 | * release these resources when an RPC is complete. |
246 | */ | 244 | */ |
247 | enum rpcrdma_frmr_state { | 245 | enum rpcrdma_frwr_state { |
248 | FRMR_IS_INVALID, /* ready to be used */ | 246 | FRWR_IS_INVALID, /* ready to be used */ |
249 | FRMR_IS_VALID, /* in use */ | 247 | FRWR_IS_VALID, /* in use */ |
250 | FRMR_FLUSHED_FR, /* flushed FASTREG WR */ | 248 | FRWR_FLUSHED_FR, /* flushed FASTREG WR */ |
251 | FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ | 249 | FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ |
252 | }; | 250 | }; |
253 | 251 | ||
254 | struct rpcrdma_frmr { | 252 | struct rpcrdma_frwr { |
255 | struct ib_mr *fr_mr; | 253 | struct ib_mr *fr_mr; |
256 | struct ib_cqe fr_cqe; | 254 | struct ib_cqe fr_cqe; |
257 | enum rpcrdma_frmr_state fr_state; | 255 | enum rpcrdma_frwr_state fr_state; |
258 | struct completion fr_linv_done; | 256 | struct completion fr_linv_done; |
259 | union { | 257 | union { |
260 | struct ib_reg_wr fr_regwr; | 258 | struct ib_reg_wr fr_regwr; |
@@ -267,26 +265,20 @@ struct rpcrdma_fmr { | |||
267 | u64 *fm_physaddrs; | 265 | u64 *fm_physaddrs; |
268 | }; | 266 | }; |
269 | 267 | ||
270 | struct rpcrdma_mw { | 268 | struct rpcrdma_mr { |
271 | struct list_head mw_list; | 269 | struct list_head mr_list; |
272 | struct scatterlist *mw_sg; | 270 | struct scatterlist *mr_sg; |
273 | int mw_nents; | 271 | int mr_nents; |
274 | enum dma_data_direction mw_dir; | 272 | enum dma_data_direction mr_dir; |
275 | unsigned long mw_flags; | ||
276 | union { | 273 | union { |
277 | struct rpcrdma_fmr fmr; | 274 | struct rpcrdma_fmr fmr; |
278 | struct rpcrdma_frmr frmr; | 275 | struct rpcrdma_frwr frwr; |
279 | }; | 276 | }; |
280 | struct rpcrdma_xprt *mw_xprt; | 277 | struct rpcrdma_xprt *mr_xprt; |
281 | u32 mw_handle; | 278 | u32 mr_handle; |
282 | u32 mw_length; | 279 | u32 mr_length; |
283 | u64 mw_offset; | 280 | u64 mr_offset; |
284 | struct list_head mw_all; | 281 | struct list_head mr_all; |
285 | }; | ||
286 | |||
287 | /* mw_flags */ | ||
288 | enum { | ||
289 | RPCRDMA_MW_F_RI = 1, | ||
290 | }; | 282 | }; |
291 | 283 | ||
292 | /* | 284 | /* |
@@ -362,8 +354,7 @@ struct rpcrdma_req { | |||
362 | 354 | ||
363 | /* rl_flags */ | 355 | /* rl_flags */ |
364 | enum { | 356 | enum { |
365 | RPCRDMA_REQ_F_BACKCHANNEL = 0, | 357 | RPCRDMA_REQ_F_PENDING = 0, |
366 | RPCRDMA_REQ_F_PENDING, | ||
367 | RPCRDMA_REQ_F_TX_RESOURCES, | 358 | RPCRDMA_REQ_F_TX_RESOURCES, |
368 | }; | 359 | }; |
369 | 360 | ||
@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) | |||
374 | } | 365 | } |
375 | 366 | ||
376 | static inline struct rpcrdma_req * | 367 | static inline struct rpcrdma_req * |
377 | rpcr_to_rdmar(struct rpc_rqst *rqst) | 368 | rpcr_to_rdmar(const struct rpc_rqst *rqst) |
378 | { | 369 | { |
379 | return rqst->rq_xprtdata; | 370 | return rqst->rq_xprtdata; |
380 | } | 371 | } |
381 | 372 | ||
382 | static inline void | 373 | static inline void |
383 | rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) | 374 | rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list) |
384 | { | 375 | { |
385 | list_add_tail(&mw->mw_list, list); | 376 | list_add_tail(&mr->mr_list, list); |
386 | } | 377 | } |
387 | 378 | ||
388 | static inline struct rpcrdma_mw * | 379 | static inline struct rpcrdma_mr * |
389 | rpcrdma_pop_mw(struct list_head *list) | 380 | rpcrdma_mr_pop(struct list_head *list) |
390 | { | 381 | { |
391 | struct rpcrdma_mw *mw; | 382 | struct rpcrdma_mr *mr; |
392 | 383 | ||
393 | mw = list_first_entry(list, struct rpcrdma_mw, mw_list); | 384 | mr = list_first_entry(list, struct rpcrdma_mr, mr_list); |
394 | list_del(&mw->mw_list); | 385 | list_del(&mr->mr_list); |
395 | return mw; | 386 | return mr; |
396 | } | 387 | } |
397 | 388 | ||
398 | /* | 389 | /* |
@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list) | |||
402 | * One of these is associated with a transport instance | 393 | * One of these is associated with a transport instance |
403 | */ | 394 | */ |
404 | struct rpcrdma_buffer { | 395 | struct rpcrdma_buffer { |
405 | spinlock_t rb_mwlock; /* protect rb_mws list */ | 396 | spinlock_t rb_mrlock; /* protect rb_mrs list */ |
406 | struct list_head rb_mws; | 397 | struct list_head rb_mrs; |
407 | struct list_head rb_all; | 398 | struct list_head rb_all; |
408 | 399 | ||
409 | unsigned long rb_sc_head; | 400 | unsigned long rb_sc_head; |
@@ -438,13 +429,11 @@ struct rpcrdma_buffer { | |||
438 | * This data should be set with mount options | 429 | * This data should be set with mount options |
439 | */ | 430 | */ |
440 | struct rpcrdma_create_data_internal { | 431 | struct rpcrdma_create_data_internal { |
441 | struct sockaddr_storage addr; /* RDMA server address */ | ||
442 | unsigned int max_requests; /* max requests (slots) in flight */ | 432 | unsigned int max_requests; /* max requests (slots) in flight */ |
443 | unsigned int rsize; /* mount rsize - max read hdr+data */ | 433 | unsigned int rsize; /* mount rsize - max read hdr+data */ |
444 | unsigned int wsize; /* mount wsize - max write hdr+data */ | 434 | unsigned int wsize; /* mount wsize - max write hdr+data */ |
445 | unsigned int inline_rsize; /* max non-rdma read data payload */ | 435 | unsigned int inline_rsize; /* max non-rdma read data payload */ |
446 | unsigned int inline_wsize; /* max non-rdma write data payload */ | 436 | unsigned int inline_wsize; /* max non-rdma write data payload */ |
447 | unsigned int padding; /* non-rdma write header padding */ | ||
448 | }; | 437 | }; |
449 | 438 | ||
450 | /* | 439 | /* |
@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops { | |||
484 | struct rpcrdma_mr_seg * | 473 | struct rpcrdma_mr_seg * |
485 | (*ro_map)(struct rpcrdma_xprt *, | 474 | (*ro_map)(struct rpcrdma_xprt *, |
486 | struct rpcrdma_mr_seg *, int, bool, | 475 | struct rpcrdma_mr_seg *, int, bool, |
487 | struct rpcrdma_mw **); | 476 | struct rpcrdma_mr **); |
477 | void (*ro_reminv)(struct rpcrdma_rep *rep, | ||
478 | struct list_head *mrs); | ||
488 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, | 479 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, |
489 | struct list_head *); | 480 | struct list_head *); |
490 | void (*ro_recover_mr)(struct rpcrdma_mw *); | 481 | void (*ro_recover_mr)(struct rpcrdma_mr *mr); |
491 | int (*ro_open)(struct rpcrdma_ia *, | 482 | int (*ro_open)(struct rpcrdma_ia *, |
492 | struct rpcrdma_ep *, | 483 | struct rpcrdma_ep *, |
493 | struct rpcrdma_create_data_internal *); | 484 | struct rpcrdma_create_data_internal *); |
494 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | 485 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); |
495 | int (*ro_init_mr)(struct rpcrdma_ia *, | 486 | int (*ro_init_mr)(struct rpcrdma_ia *, |
496 | struct rpcrdma_mw *); | 487 | struct rpcrdma_mr *); |
497 | void (*ro_release_mr)(struct rpcrdma_mw *); | 488 | void (*ro_release_mr)(struct rpcrdma_mr *mr); |
498 | const char *ro_displayname; | 489 | const char *ro_displayname; |
499 | const int ro_send_w_inv_ok; | 490 | const int ro_send_w_inv_ok; |
500 | }; | 491 | }; |
@@ -525,6 +516,18 @@ struct rpcrdma_xprt { | |||
525 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) | 516 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) |
526 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) | 517 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
527 | 518 | ||
519 | static inline const char * | ||
520 | rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt) | ||
521 | { | ||
522 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]; | ||
523 | } | ||
524 | |||
525 | static inline const char * | ||
526 | rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt) | ||
527 | { | ||
528 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT]; | ||
529 | } | ||
530 | |||
528 | /* Setting this to 0 ensures interoperability with early servers. | 531 | /* Setting this to 0 ensures interoperability with early servers. |
529 | * Setting this to 1 enhances certain unaligned read/write performance. | 532 | * Setting this to 1 enhances certain unaligned read/write performance. |
530 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | 533 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ |
@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy; | |||
538 | /* | 541 | /* |
539 | * Interface Adapter calls - xprtrdma/verbs.c | 542 | * Interface Adapter calls - xprtrdma/verbs.c |
540 | */ | 543 | */ |
541 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); | 544 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); |
542 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); | 545 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); |
543 | void rpcrdma_ia_close(struct rpcrdma_ia *); | 546 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
544 | bool frwr_is_supported(struct rpcrdma_ia *); | 547 | bool frwr_is_supported(struct rpcrdma_ia *); |
@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); | |||
564 | * Buffer calls - xprtrdma/verbs.c | 567 | * Buffer calls - xprtrdma/verbs.c |
565 | */ | 568 | */ |
566 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); | 569 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); |
567 | struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); | ||
568 | void rpcrdma_destroy_req(struct rpcrdma_req *); | 570 | void rpcrdma_destroy_req(struct rpcrdma_req *); |
571 | int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt); | ||
569 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); | 572 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
570 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); | 573 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
571 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); | 574 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); |
572 | void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); | 575 | void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); |
573 | 576 | ||
574 | struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); | 577 | struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); |
575 | void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); | 578 | void rpcrdma_mr_put(struct rpcrdma_mr *mr); |
579 | void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr); | ||
580 | void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr); | ||
581 | |||
576 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); | 582 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); |
577 | void rpcrdma_buffer_put(struct rpcrdma_req *); | 583 | void rpcrdma_buffer_put(struct rpcrdma_req *); |
578 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 584 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
579 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | 585 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
580 | 586 | ||
581 | void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); | ||
582 | |||
583 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, | 587 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, |
584 | gfp_t); | 588 | gfp_t); |
585 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); | 589 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); |
@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *); | |||
663 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); | 667 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); |
664 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); | 668 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); |
665 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); | 669 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); |
666 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *); | 670 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); |
667 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); | 671 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); |
668 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | 672 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); |
669 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | 673 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | |||
671 | extern struct xprt_class xprt_rdma_bc; | 675 | extern struct xprt_class xprt_rdma_bc; |
672 | 676 | ||
673 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ | 677 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |
678 | |||
679 | #include <trace/events/rpcrdma.h> | ||
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6d0cc3b8f932..18803021f242 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -52,6 +52,8 @@ | |||
52 | 52 | ||
53 | #include "sunrpc.h" | 53 | #include "sunrpc.h" |
54 | 54 | ||
55 | #define RPC_TCP_READ_CHUNK_SZ (3*512*1024) | ||
56 | |||
55 | static void xs_close(struct rpc_xprt *xprt); | 57 | static void xs_close(struct rpc_xprt *xprt); |
56 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, | 58 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, |
57 | struct socket *sock); | 59 | struct socket *sock); |
@@ -1003,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport) | |||
1003 | struct sock *sk; | 1005 | struct sock *sk; |
1004 | int err; | 1006 | int err; |
1005 | 1007 | ||
1008 | restart: | ||
1006 | mutex_lock(&transport->recv_mutex); | 1009 | mutex_lock(&transport->recv_mutex); |
1007 | sk = transport->inet; | 1010 | sk = transport->inet; |
1008 | if (sk == NULL) | 1011 | if (sk == NULL) |
@@ -1016,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport) | |||
1016 | } | 1019 | } |
1017 | if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) | 1020 | if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) |
1018 | break; | 1021 | break; |
1022 | if (need_resched()) { | ||
1023 | mutex_unlock(&transport->recv_mutex); | ||
1024 | cond_resched(); | ||
1025 | goto restart; | ||
1026 | } | ||
1019 | } | 1027 | } |
1020 | out: | 1028 | out: |
1021 | mutex_unlock(&transport->recv_mutex); | 1029 | mutex_unlock(&transport->recv_mutex); |
@@ -1094,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) | |||
1094 | struct sock *sk; | 1102 | struct sock *sk; |
1095 | int err; | 1103 | int err; |
1096 | 1104 | ||
1105 | restart: | ||
1097 | mutex_lock(&transport->recv_mutex); | 1106 | mutex_lock(&transport->recv_mutex); |
1098 | sk = transport->inet; | 1107 | sk = transport->inet; |
1099 | if (sk == NULL) | 1108 | if (sk == NULL) |
@@ -1107,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport) | |||
1107 | } | 1116 | } |
1108 | if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) | 1117 | if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) |
1109 | break; | 1118 | break; |
1119 | if (need_resched()) { | ||
1120 | mutex_unlock(&transport->recv_mutex); | ||
1121 | cond_resched(); | ||
1122 | goto restart; | ||
1123 | } | ||
1110 | } | 1124 | } |
1111 | out: | 1125 | out: |
1112 | mutex_unlock(&transport->recv_mutex); | 1126 | mutex_unlock(&transport->recv_mutex); |
@@ -1479,6 +1493,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns | |||
1479 | .offset = offset, | 1493 | .offset = offset, |
1480 | .count = len, | 1494 | .count = len, |
1481 | }; | 1495 | }; |
1496 | size_t ret; | ||
1482 | 1497 | ||
1483 | dprintk("RPC: xs_tcp_data_recv started\n"); | 1498 | dprintk("RPC: xs_tcp_data_recv started\n"); |
1484 | do { | 1499 | do { |
@@ -1507,9 +1522,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns | |||
1507 | /* Skip over any trailing bytes on short reads */ | 1522 | /* Skip over any trailing bytes on short reads */ |
1508 | xs_tcp_read_discard(transport, &desc); | 1523 | xs_tcp_read_discard(transport, &desc); |
1509 | } while (desc.count); | 1524 | } while (desc.count); |
1525 | ret = len - desc.count; | ||
1526 | if (ret < rd_desc->count) | ||
1527 | rd_desc->count -= ret; | ||
1528 | else | ||
1529 | rd_desc->count = 0; | ||
1510 | trace_xs_tcp_data_recv(transport); | 1530 | trace_xs_tcp_data_recv(transport); |
1511 | dprintk("RPC: xs_tcp_data_recv done\n"); | 1531 | dprintk("RPC: xs_tcp_data_recv done\n"); |
1512 | return len - desc.count; | 1532 | return ret; |
1513 | } | 1533 | } |
1514 | 1534 | ||
1515 | static void xs_tcp_data_receive(struct sock_xprt *transport) | 1535 | static void xs_tcp_data_receive(struct sock_xprt *transport) |
@@ -1517,30 +1537,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) | |||
1517 | struct rpc_xprt *xprt = &transport->xprt; | 1537 | struct rpc_xprt *xprt = &transport->xprt; |
1518 | struct sock *sk; | 1538 | struct sock *sk; |
1519 | read_descriptor_t rd_desc = { | 1539 | read_descriptor_t rd_desc = { |
1520 | .count = 2*1024*1024, | ||
1521 | .arg.data = xprt, | 1540 | .arg.data = xprt, |
1522 | }; | 1541 | }; |
1523 | unsigned long total = 0; | 1542 | unsigned long total = 0; |
1524 | int loop; | ||
1525 | int read = 0; | 1543 | int read = 0; |
1526 | 1544 | ||
1545 | restart: | ||
1527 | mutex_lock(&transport->recv_mutex); | 1546 | mutex_lock(&transport->recv_mutex); |
1528 | sk = transport->inet; | 1547 | sk = transport->inet; |
1529 | if (sk == NULL) | 1548 | if (sk == NULL) |
1530 | goto out; | 1549 | goto out; |
1531 | 1550 | ||
1532 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ | 1551 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ |
1533 | for (loop = 0; loop < 64; loop++) { | 1552 | for (;;) { |
1553 | rd_desc.count = RPC_TCP_READ_CHUNK_SZ; | ||
1534 | lock_sock(sk); | 1554 | lock_sock(sk); |
1535 | read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); | 1555 | read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); |
1536 | if (read <= 0) { | 1556 | if (rd_desc.count != 0 || read < 0) { |
1537 | clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); | 1557 | clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); |
1538 | release_sock(sk); | 1558 | release_sock(sk); |
1539 | break; | 1559 | break; |
1540 | } | 1560 | } |
1541 | release_sock(sk); | 1561 | release_sock(sk); |
1542 | total += read; | 1562 | total += read; |
1543 | rd_desc.count = 65536; | 1563 | if (need_resched()) { |
1564 | mutex_unlock(&transport->recv_mutex); | ||
1565 | cond_resched(); | ||
1566 | goto restart; | ||
1567 | } | ||
1544 | } | 1568 | } |
1545 | if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) | 1569 | if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) |
1546 | queue_work(xprtiod_workqueue, &transport->recv_worker); | 1570 | queue_work(xprtiod_workqueue, &transport->recv_worker); |