diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-26 15:20:14 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-26 15:20:14 -0400 |
commit | 972a2bf7dfe39ebf49dd47f68d27c416392e53b1 (patch) | |
tree | 1fc6277f7b24c854b3c48a9e082b6625c18145a0 | |
parent | 7be3cb019db1cbd5fd5ffe6d64a23fefa4b6f229 (diff) | |
parent | a8fd0feeca35cb8f9ddd950191f4aeb777f52f89 (diff) |
Merge tag 'nfs-for-5.4-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker:
"Stable bugfixes:
- Dequeue the request from the receive queue while we're re-encoding
# v4.20+
- Fix buffer handling of GSS MIC without slack # 5.1
Features:
- Increase xprtrdma maximum transport header and slot table sizes
- Add support for nfs4_call_sync() calls using a custom
rpc_task_struct
- Optimize the default readahead size
- Enable pNFS filelayout LAYOUTGET on OPEN
Other bugfixes and cleanups:
- Fix possible null-pointer dereferences and memory leaks
- Various NFS over RDMA cleanups
- Various NFS over RDMA comment updates
- Don't receive TCP data into a reset request buffer
- Don't try to parse incomplete RPC messages
- Fix congestion window race with disconnect
- Clean up pNFS return-on-close error handling
- Fixes for NFS4ERR_OLD_STATEID handling"
* tag 'nfs-for-5.4-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (53 commits)
pNFS/filelayout: enable LAYOUTGET on OPEN
NFS: Optimise the default readahead size
NFSv4: Handle NFS4ERR_OLD_STATEID in LOCKU
NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE
NFSv4: Fix OPEN_DOWNGRADE error handling
pNFS: Handle NFS4ERR_OLD_STATEID on layoutreturn by bumping the state seqid
NFSv4: Add a helper to increment stateid seqids
NFSv4: Handle RPC level errors in LAYOUTRETURN
NFSv4: Handle NFS4ERR_DELAY correctly in return-on-close
NFSv4: Clean up pNFS return-on-close error handling
pNFS: Ensure we do clear the return-on-close layout stateid on fatal errors
NFS: remove unused check for negative dentry
NFSv3: use nfs_add_or_obtain() to create and reference inodes
NFS: Refactor nfs_instantiate() for dentry referencing callers
SUNRPC: Fix congestion window race with disconnect
SUNRPC: Don't try to parse incomplete RPC messages
SUNRPC: Rename xdr_buf_read_netobj to xdr_buf_read_mic
SUNRPC: Fix buffer handling of GSS MIC without slack
SUNRPC: RPC level errors should always set task->tk_rpc_status
SUNRPC: Don't receive TCP data into a request buffer that has been reset
...
29 files changed, 835 insertions, 580 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0adfd8840110..e180033e35cf 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1669,10 +1669,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) | |||
1669 | 1669 | ||
1670 | #endif /* CONFIG_NFSV4 */ | 1670 | #endif /* CONFIG_NFSV4 */ |
1671 | 1671 | ||
1672 | /* | 1672 | struct dentry * |
1673 | * Code common to create, mkdir, and mknod. | 1673 | nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, |
1674 | */ | ||
1675 | int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | ||
1676 | struct nfs_fattr *fattr, | 1674 | struct nfs_fattr *fattr, |
1677 | struct nfs4_label *label) | 1675 | struct nfs4_label *label) |
1678 | { | 1676 | { |
@@ -1680,13 +1678,10 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | |||
1680 | struct inode *dir = d_inode(parent); | 1678 | struct inode *dir = d_inode(parent); |
1681 | struct inode *inode; | 1679 | struct inode *inode; |
1682 | struct dentry *d; | 1680 | struct dentry *d; |
1683 | int error = -EACCES; | 1681 | int error; |
1684 | 1682 | ||
1685 | d_drop(dentry); | 1683 | d_drop(dentry); |
1686 | 1684 | ||
1687 | /* We may have been initialized further down */ | ||
1688 | if (d_really_is_positive(dentry)) | ||
1689 | goto out; | ||
1690 | if (fhandle->size == 0) { | 1685 | if (fhandle->size == 0) { |
1691 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL); | 1686 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL); |
1692 | if (error) | 1687 | if (error) |
@@ -1702,18 +1697,32 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | |||
1702 | } | 1697 | } |
1703 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); | 1698 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); |
1704 | d = d_splice_alias(inode, dentry); | 1699 | d = d_splice_alias(inode, dentry); |
1705 | if (IS_ERR(d)) { | ||
1706 | error = PTR_ERR(d); | ||
1707 | goto out_error; | ||
1708 | } | ||
1709 | dput(d); | ||
1710 | out: | 1700 | out: |
1711 | dput(parent); | 1701 | dput(parent); |
1712 | return 0; | 1702 | return d; |
1713 | out_error: | 1703 | out_error: |
1714 | nfs_mark_for_revalidate(dir); | 1704 | nfs_mark_for_revalidate(dir); |
1715 | dput(parent); | 1705 | d = ERR_PTR(error); |
1716 | return error; | 1706 | goto out; |
1707 | } | ||
1708 | EXPORT_SYMBOL_GPL(nfs_add_or_obtain); | ||
1709 | |||
1710 | /* | ||
1711 | * Code common to create, mkdir, and mknod. | ||
1712 | */ | ||
1713 | int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | ||
1714 | struct nfs_fattr *fattr, | ||
1715 | struct nfs4_label *label) | ||
1716 | { | ||
1717 | struct dentry *d; | ||
1718 | |||
1719 | d = nfs_add_or_obtain(dentry, fhandle, fattr, label); | ||
1720 | if (IS_ERR(d)) | ||
1721 | return PTR_ERR(d); | ||
1722 | |||
1723 | /* Callers don't care */ | ||
1724 | dput(d); | ||
1725 | return 0; | ||
1717 | } | 1726 | } |
1718 | EXPORT_SYMBOL_GPL(nfs_instantiate); | 1727 | EXPORT_SYMBOL_GPL(nfs_instantiate); |
1719 | 1728 | ||
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 3cb073c50fa6..c9b605f6c9cb 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -1164,6 +1164,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
1164 | .id = LAYOUT_NFSV4_1_FILES, | 1164 | .id = LAYOUT_NFSV4_1_FILES, |
1165 | .name = "LAYOUT_NFSV4_1_FILES", | 1165 | .name = "LAYOUT_NFSV4_1_FILES", |
1166 | .owner = THIS_MODULE, | 1166 | .owner = THIS_MODULE, |
1167 | .flags = PNFS_LAYOUTGET_ON_OPEN, | ||
1167 | .max_layoutget_response = 4096, /* 1 page or so... */ | 1168 | .max_layoutget_response = 4096, /* 1 page or so... */ |
1168 | .alloc_layout_hdr = filelayout_alloc_layout_hdr, | 1169 | .alloc_layout_hdr = filelayout_alloc_layout_hdr, |
1169 | .free_layout_hdr = filelayout_free_layout_hdr, | 1170 | .free_layout_hdr = filelayout_free_layout_hdr, |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e64f810223be..447a3c17fa8e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -16,14 +16,6 @@ extern const struct export_operations nfs_export_ops; | |||
16 | 16 | ||
17 | struct nfs_string; | 17 | struct nfs_string; |
18 | 18 | ||
19 | /* Maximum number of readahead requests | ||
20 | * FIXME: this should really be a sysctl so that users may tune it to suit | ||
21 | * their needs. People that do NFS over a slow network, might for | ||
22 | * instance want to reduce it to something closer to 1 for improved | ||
23 | * interactive response. | ||
24 | */ | ||
25 | #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) | ||
26 | |||
27 | static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) | 19 | static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) |
28 | { | 20 | { |
29 | if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) | 21 | if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index a3ad2d46fd42..9eb2f1a503ab 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -279,15 +279,17 @@ static struct nfs3_createdata *nfs3_alloc_createdata(void) | |||
279 | return data; | 279 | return data; |
280 | } | 280 | } |
281 | 281 | ||
282 | static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data) | 282 | static struct dentry * |
283 | nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data) | ||
283 | { | 284 | { |
284 | int status; | 285 | int status; |
285 | 286 | ||
286 | status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); | 287 | status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); |
287 | nfs_post_op_update_inode(dir, data->res.dir_attr); | 288 | nfs_post_op_update_inode(dir, data->res.dir_attr); |
288 | if (status == 0) | 289 | if (status != 0) |
289 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); | 290 | return ERR_PTR(status); |
290 | return status; | 291 | |
292 | return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr, NULL); | ||
291 | } | 293 | } |
292 | 294 | ||
293 | static void nfs3_free_createdata(struct nfs3_createdata *data) | 295 | static void nfs3_free_createdata(struct nfs3_createdata *data) |
@@ -304,6 +306,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
304 | { | 306 | { |
305 | struct posix_acl *default_acl, *acl; | 307 | struct posix_acl *default_acl, *acl; |
306 | struct nfs3_createdata *data; | 308 | struct nfs3_createdata *data; |
309 | struct dentry *d_alias; | ||
307 | int status = -ENOMEM; | 310 | int status = -ENOMEM; |
308 | 311 | ||
309 | dprintk("NFS call create %pd\n", dentry); | 312 | dprintk("NFS call create %pd\n", dentry); |
@@ -330,7 +333,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
330 | goto out; | 333 | goto out; |
331 | 334 | ||
332 | for (;;) { | 335 | for (;;) { |
333 | status = nfs3_do_create(dir, dentry, data); | 336 | d_alias = nfs3_do_create(dir, dentry, data); |
337 | status = PTR_ERR_OR_ZERO(d_alias); | ||
334 | 338 | ||
335 | if (status != -ENOTSUPP) | 339 | if (status != -ENOTSUPP) |
336 | break; | 340 | break; |
@@ -355,6 +359,9 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
355 | if (status != 0) | 359 | if (status != 0) |
356 | goto out_release_acls; | 360 | goto out_release_acls; |
357 | 361 | ||
362 | if (d_alias) | ||
363 | dentry = d_alias; | ||
364 | |||
358 | /* When we created the file with exclusive semantics, make | 365 | /* When we created the file with exclusive semantics, make |
359 | * sure we set the attributes afterwards. */ | 366 | * sure we set the attributes afterwards. */ |
360 | if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) { | 367 | if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) { |
@@ -372,11 +379,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
372 | nfs_post_op_update_inode(d_inode(dentry), data->res.fattr); | 379 | nfs_post_op_update_inode(d_inode(dentry), data->res.fattr); |
373 | dprintk("NFS reply setattr (post-create): %d\n", status); | 380 | dprintk("NFS reply setattr (post-create): %d\n", status); |
374 | if (status != 0) | 381 | if (status != 0) |
375 | goto out_release_acls; | 382 | goto out_dput; |
376 | } | 383 | } |
377 | 384 | ||
378 | status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl); | 385 | status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl); |
379 | 386 | ||
387 | out_dput: | ||
388 | dput(d_alias); | ||
380 | out_release_acls: | 389 | out_release_acls: |
381 | posix_acl_release(acl); | 390 | posix_acl_release(acl); |
382 | posix_acl_release(default_acl); | 391 | posix_acl_release(default_acl); |
@@ -504,6 +513,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, | |||
504 | unsigned int len, struct iattr *sattr) | 513 | unsigned int len, struct iattr *sattr) |
505 | { | 514 | { |
506 | struct nfs3_createdata *data; | 515 | struct nfs3_createdata *data; |
516 | struct dentry *d_alias; | ||
507 | int status = -ENOMEM; | 517 | int status = -ENOMEM; |
508 | 518 | ||
509 | if (len > NFS3_MAXPATHLEN) | 519 | if (len > NFS3_MAXPATHLEN) |
@@ -522,7 +532,11 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, | |||
522 | data->arg.symlink.pathlen = len; | 532 | data->arg.symlink.pathlen = len; |
523 | data->arg.symlink.sattr = sattr; | 533 | data->arg.symlink.sattr = sattr; |
524 | 534 | ||
525 | status = nfs3_do_create(dir, dentry, data); | 535 | d_alias = nfs3_do_create(dir, dentry, data); |
536 | status = PTR_ERR_OR_ZERO(d_alias); | ||
537 | |||
538 | if (status == 0) | ||
539 | dput(d_alias); | ||
526 | 540 | ||
527 | nfs3_free_createdata(data); | 541 | nfs3_free_createdata(data); |
528 | out: | 542 | out: |
@@ -535,6 +549,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) | |||
535 | { | 549 | { |
536 | struct posix_acl *default_acl, *acl; | 550 | struct posix_acl *default_acl, *acl; |
537 | struct nfs3_createdata *data; | 551 | struct nfs3_createdata *data; |
552 | struct dentry *d_alias; | ||
538 | int status = -ENOMEM; | 553 | int status = -ENOMEM; |
539 | 554 | ||
540 | dprintk("NFS call mkdir %pd\n", dentry); | 555 | dprintk("NFS call mkdir %pd\n", dentry); |
@@ -553,12 +568,18 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) | |||
553 | data->arg.mkdir.len = dentry->d_name.len; | 568 | data->arg.mkdir.len = dentry->d_name.len; |
554 | data->arg.mkdir.sattr = sattr; | 569 | data->arg.mkdir.sattr = sattr; |
555 | 570 | ||
556 | status = nfs3_do_create(dir, dentry, data); | 571 | d_alias = nfs3_do_create(dir, dentry, data); |
572 | status = PTR_ERR_OR_ZERO(d_alias); | ||
573 | |||
557 | if (status != 0) | 574 | if (status != 0) |
558 | goto out_release_acls; | 575 | goto out_release_acls; |
559 | 576 | ||
577 | if (d_alias) | ||
578 | dentry = d_alias; | ||
579 | |||
560 | status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl); | 580 | status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl); |
561 | 581 | ||
582 | dput(d_alias); | ||
562 | out_release_acls: | 583 | out_release_acls: |
563 | posix_acl_release(acl); | 584 | posix_acl_release(acl); |
564 | posix_acl_release(default_acl); | 585 | posix_acl_release(default_acl); |
@@ -660,6 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
660 | { | 681 | { |
661 | struct posix_acl *default_acl, *acl; | 682 | struct posix_acl *default_acl, *acl; |
662 | struct nfs3_createdata *data; | 683 | struct nfs3_createdata *data; |
684 | struct dentry *d_alias; | ||
663 | int status = -ENOMEM; | 685 | int status = -ENOMEM; |
664 | 686 | ||
665 | dprintk("NFS call mknod %pd %u:%u\n", dentry, | 687 | dprintk("NFS call mknod %pd %u:%u\n", dentry, |
@@ -698,12 +720,17 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
698 | goto out; | 720 | goto out; |
699 | } | 721 | } |
700 | 722 | ||
701 | status = nfs3_do_create(dir, dentry, data); | 723 | d_alias = nfs3_do_create(dir, dentry, data); |
724 | status = PTR_ERR_OR_ZERO(d_alias); | ||
702 | if (status != 0) | 725 | if (status != 0) |
703 | goto out_release_acls; | 726 | goto out_release_acls; |
704 | 727 | ||
728 | if (d_alias) | ||
729 | dentry = d_alias; | ||
730 | |||
705 | status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl); | 731 | status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl); |
706 | 732 | ||
733 | dput(d_alias); | ||
707 | out_release_acls: | 734 | out_release_acls: |
708 | posix_acl_release(acl); | 735 | posix_acl_release(acl); |
709 | posix_acl_release(default_acl); | 736 | posix_acl_release(default_acl); |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3564da1ba8a1..16b2e5cc3e94 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -491,8 +491,6 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); | |||
491 | extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, | 491 | extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, |
492 | const struct nfs_lock_context *, nfs4_stateid *, | 492 | const struct nfs_lock_context *, nfs4_stateid *, |
493 | const struct cred **); | 493 | const struct cred **); |
494 | extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst, | ||
495 | struct nfs4_state *state); | ||
496 | extern bool nfs4_copy_open_stateid(nfs4_stateid *dst, | 494 | extern bool nfs4_copy_open_stateid(nfs4_stateid *dst, |
497 | struct nfs4_state *state); | 495 | struct nfs4_state *state); |
498 | 496 | ||
@@ -574,6 +572,15 @@ static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stat | |||
574 | return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; | 572 | return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; |
575 | } | 573 | } |
576 | 574 | ||
575 | static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1) | ||
576 | { | ||
577 | u32 seqid = be32_to_cpu(s1->seqid); | ||
578 | |||
579 | if (++seqid == 0) | ||
580 | ++seqid; | ||
581 | s1->seqid = cpu_to_be32(seqid); | ||
582 | } | ||
583 | |||
577 | static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) | 584 | static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) |
578 | { | 585 | { |
579 | return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; | 586 | return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1406858bae6c..11eafcfc490b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -1073,14 +1073,26 @@ static const struct rpc_call_ops nfs40_call_sync_ops = { | |||
1073 | .rpc_call_done = nfs40_call_sync_done, | 1073 | .rpc_call_done = nfs40_call_sync_done, |
1074 | }; | 1074 | }; |
1075 | 1075 | ||
1076 | static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup) | ||
1077 | { | ||
1078 | int ret; | ||
1079 | struct rpc_task *task; | ||
1080 | |||
1081 | task = rpc_run_task(task_setup); | ||
1082 | if (IS_ERR(task)) | ||
1083 | return PTR_ERR(task); | ||
1084 | |||
1085 | ret = task->tk_status; | ||
1086 | rpc_put_task(task); | ||
1087 | return ret; | ||
1088 | } | ||
1089 | |||
1076 | static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, | 1090 | static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, |
1077 | struct nfs_server *server, | 1091 | struct nfs_server *server, |
1078 | struct rpc_message *msg, | 1092 | struct rpc_message *msg, |
1079 | struct nfs4_sequence_args *args, | 1093 | struct nfs4_sequence_args *args, |
1080 | struct nfs4_sequence_res *res) | 1094 | struct nfs4_sequence_res *res) |
1081 | { | 1095 | { |
1082 | int ret; | ||
1083 | struct rpc_task *task; | ||
1084 | struct nfs_client *clp = server->nfs_client; | 1096 | struct nfs_client *clp = server->nfs_client; |
1085 | struct nfs4_call_sync_data data = { | 1097 | struct nfs4_call_sync_data data = { |
1086 | .seq_server = server, | 1098 | .seq_server = server, |
@@ -1094,14 +1106,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, | |||
1094 | .callback_data = &data | 1106 | .callback_data = &data |
1095 | }; | 1107 | }; |
1096 | 1108 | ||
1097 | task = rpc_run_task(&task_setup); | 1109 | return nfs4_call_sync_custom(&task_setup); |
1098 | if (IS_ERR(task)) | ||
1099 | ret = PTR_ERR(task); | ||
1100 | else { | ||
1101 | ret = task->tk_status; | ||
1102 | rpc_put_task(task); | ||
1103 | } | ||
1104 | return ret; | ||
1105 | } | 1110 | } |
1106 | 1111 | ||
1107 | int nfs4_call_sync(struct rpc_clnt *clnt, | 1112 | int nfs4_call_sync(struct rpc_clnt *clnt, |
@@ -3308,6 +3313,75 @@ nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task) | |||
3308 | return pnfs_wait_on_layoutreturn(inode, task); | 3313 | return pnfs_wait_on_layoutreturn(inode, task); |
3309 | } | 3314 | } |
3310 | 3315 | ||
3316 | /* | ||
3317 | * Update the seqid of an open stateid | ||
3318 | */ | ||
3319 | static void nfs4_sync_open_stateid(nfs4_stateid *dst, | ||
3320 | struct nfs4_state *state) | ||
3321 | { | ||
3322 | __be32 seqid_open; | ||
3323 | u32 dst_seqid; | ||
3324 | int seq; | ||
3325 | |||
3326 | for (;;) { | ||
3327 | if (!nfs4_valid_open_stateid(state)) | ||
3328 | break; | ||
3329 | seq = read_seqbegin(&state->seqlock); | ||
3330 | if (!nfs4_state_match_open_stateid_other(state, dst)) { | ||
3331 | nfs4_stateid_copy(dst, &state->open_stateid); | ||
3332 | if (read_seqretry(&state->seqlock, seq)) | ||
3333 | continue; | ||
3334 | break; | ||
3335 | } | ||
3336 | seqid_open = state->open_stateid.seqid; | ||
3337 | if (read_seqretry(&state->seqlock, seq)) | ||
3338 | continue; | ||
3339 | |||
3340 | dst_seqid = be32_to_cpu(dst->seqid); | ||
3341 | if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) < 0) | ||
3342 | dst->seqid = seqid_open; | ||
3343 | break; | ||
3344 | } | ||
3345 | } | ||
3346 | |||
3347 | /* | ||
3348 | * Update the seqid of an open stateid after receiving | ||
3349 | * NFS4ERR_OLD_STATEID | ||
3350 | */ | ||
3351 | static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, | ||
3352 | struct nfs4_state *state) | ||
3353 | { | ||
3354 | __be32 seqid_open; | ||
3355 | u32 dst_seqid; | ||
3356 | bool ret; | ||
3357 | int seq; | ||
3358 | |||
3359 | for (;;) { | ||
3360 | ret = false; | ||
3361 | if (!nfs4_valid_open_stateid(state)) | ||
3362 | break; | ||
3363 | seq = read_seqbegin(&state->seqlock); | ||
3364 | if (!nfs4_state_match_open_stateid_other(state, dst)) { | ||
3365 | if (read_seqretry(&state->seqlock, seq)) | ||
3366 | continue; | ||
3367 | break; | ||
3368 | } | ||
3369 | seqid_open = state->open_stateid.seqid; | ||
3370 | if (read_seqretry(&state->seqlock, seq)) | ||
3371 | continue; | ||
3372 | |||
3373 | dst_seqid = be32_to_cpu(dst->seqid); | ||
3374 | if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) >= 0) | ||
3375 | dst->seqid = cpu_to_be32(dst_seqid + 1); | ||
3376 | else | ||
3377 | dst->seqid = seqid_open; | ||
3378 | ret = true; | ||
3379 | break; | ||
3380 | } | ||
3381 | |||
3382 | return ret; | ||
3383 | } | ||
3384 | |||
3311 | struct nfs4_closedata { | 3385 | struct nfs4_closedata { |
3312 | struct inode *inode; | 3386 | struct inode *inode; |
3313 | struct nfs4_state *state; | 3387 | struct nfs4_state *state; |
@@ -3358,32 +3432,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
3358 | trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); | 3432 | trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); |
3359 | 3433 | ||
3360 | /* Handle Layoutreturn errors */ | 3434 | /* Handle Layoutreturn errors */ |
3361 | if (calldata->arg.lr_args && task->tk_status != 0) { | 3435 | if (pnfs_roc_done(task, calldata->inode, |
3362 | switch (calldata->res.lr_ret) { | 3436 | &calldata->arg.lr_args, |
3363 | default: | 3437 | &calldata->res.lr_res, |
3364 | calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; | 3438 | &calldata->res.lr_ret) == -EAGAIN) |
3365 | break; | 3439 | goto out_restart; |
3366 | case 0: | ||
3367 | calldata->arg.lr_args = NULL; | ||
3368 | calldata->res.lr_res = NULL; | ||
3369 | break; | ||
3370 | case -NFS4ERR_OLD_STATEID: | ||
3371 | if (nfs4_layoutreturn_refresh_stateid(&calldata->arg.lr_args->stateid, | ||
3372 | &calldata->arg.lr_args->range, | ||
3373 | calldata->inode)) | ||
3374 | goto lr_restart; | ||
3375 | /* Fallthrough */ | ||
3376 | case -NFS4ERR_ADMIN_REVOKED: | ||
3377 | case -NFS4ERR_DELEG_REVOKED: | ||
3378 | case -NFS4ERR_EXPIRED: | ||
3379 | case -NFS4ERR_BAD_STATEID: | ||
3380 | case -NFS4ERR_UNKNOWN_LAYOUTTYPE: | ||
3381 | case -NFS4ERR_WRONG_CRED: | ||
3382 | calldata->arg.lr_args = NULL; | ||
3383 | calldata->res.lr_res = NULL; | ||
3384 | goto lr_restart; | ||
3385 | } | ||
3386 | } | ||
3387 | 3440 | ||
3388 | /* hmm. we are done with the inode, and in the process of freeing | 3441 | /* hmm. we are done with the inode, and in the process of freeing |
3389 | * the state_owner. we keep this around to process errors | 3442 | * the state_owner. we keep this around to process errors |
@@ -3403,7 +3456,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
3403 | break; | 3456 | break; |
3404 | case -NFS4ERR_OLD_STATEID: | 3457 | case -NFS4ERR_OLD_STATEID: |
3405 | /* Did we race with OPEN? */ | 3458 | /* Did we race with OPEN? */ |
3406 | if (nfs4_refresh_open_stateid(&calldata->arg.stateid, | 3459 | if (nfs4_refresh_open_old_stateid(&calldata->arg.stateid, |
3407 | state)) | 3460 | state)) |
3408 | goto out_restart; | 3461 | goto out_restart; |
3409 | goto out_release; | 3462 | goto out_release; |
@@ -3415,7 +3468,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
3415 | task->tk_msg.rpc_cred); | 3468 | task->tk_msg.rpc_cred); |
3416 | /* Fallthrough */ | 3469 | /* Fallthrough */ |
3417 | case -NFS4ERR_BAD_STATEID: | 3470 | case -NFS4ERR_BAD_STATEID: |
3418 | break; | 3471 | if (calldata->arg.fmode == 0) |
3472 | break; | ||
3473 | /* Fallthrough */ | ||
3419 | default: | 3474 | default: |
3420 | task->tk_status = nfs4_async_handle_exception(task, | 3475 | task->tk_status = nfs4_async_handle_exception(task, |
3421 | server, task->tk_status, &exception); | 3476 | server, task->tk_status, &exception); |
@@ -3430,8 +3485,6 @@ out_release: | |||
3430 | nfs_refresh_inode(calldata->inode, &calldata->fattr); | 3485 | nfs_refresh_inode(calldata->inode, &calldata->fattr); |
3431 | dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); | 3486 | dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); |
3432 | return; | 3487 | return; |
3433 | lr_restart: | ||
3434 | calldata->res.lr_ret = 0; | ||
3435 | out_restart: | 3488 | out_restart: |
3436 | task->tk_status = 0; | 3489 | task->tk_status = 0; |
3437 | rpc_restart_call_prepare(task); | 3490 | rpc_restart_call_prepare(task); |
@@ -3472,8 +3525,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
3472 | } else if (is_rdwr) | 3525 | } else if (is_rdwr) |
3473 | calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; | 3526 | calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; |
3474 | 3527 | ||
3475 | if (!nfs4_valid_open_stateid(state) || | 3528 | nfs4_sync_open_stateid(&calldata->arg.stateid, state); |
3476 | !nfs4_refresh_open_stateid(&calldata->arg.stateid, state)) | 3529 | if (!nfs4_valid_open_stateid(state)) |
3477 | call_close = 0; | 3530 | call_close = 0; |
3478 | spin_unlock(&state->owner->so_lock); | 3531 | spin_unlock(&state->owner->so_lock); |
3479 | 3532 | ||
@@ -6018,7 +6071,6 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
6018 | .rpc_resp = res, | 6071 | .rpc_resp = res, |
6019 | .rpc_cred = cred, | 6072 | .rpc_cred = cred, |
6020 | }; | 6073 | }; |
6021 | struct rpc_task *task; | ||
6022 | struct rpc_task_setup task_setup_data = { | 6074 | struct rpc_task_setup task_setup_data = { |
6023 | .rpc_client = clp->cl_rpcclient, | 6075 | .rpc_client = clp->cl_rpcclient, |
6024 | .rpc_message = &msg, | 6076 | .rpc_message = &msg, |
@@ -6051,17 +6103,12 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
6051 | dprintk("NFS call setclientid auth=%s, '%s'\n", | 6103 | dprintk("NFS call setclientid auth=%s, '%s'\n", |
6052 | clp->cl_rpcclient->cl_auth->au_ops->au_name, | 6104 | clp->cl_rpcclient->cl_auth->au_ops->au_name, |
6053 | clp->cl_owner_id); | 6105 | clp->cl_owner_id); |
6054 | task = rpc_run_task(&task_setup_data); | 6106 | |
6055 | if (IS_ERR(task)) { | 6107 | status = nfs4_call_sync_custom(&task_setup_data); |
6056 | status = PTR_ERR(task); | ||
6057 | goto out; | ||
6058 | } | ||
6059 | status = task->tk_status; | ||
6060 | if (setclientid.sc_cred) { | 6108 | if (setclientid.sc_cred) { |
6061 | clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); | 6109 | clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); |
6062 | put_rpccred(setclientid.sc_cred); | 6110 | put_rpccred(setclientid.sc_cred); |
6063 | } | 6111 | } |
6064 | rpc_put_task(task); | ||
6065 | out: | 6112 | out: |
6066 | trace_nfs4_setclientid(clp, status); | 6113 | trace_nfs4_setclientid(clp, status); |
6067 | dprintk("NFS reply setclientid: %d\n", status); | 6114 | dprintk("NFS reply setclientid: %d\n", status); |
@@ -6129,32 +6176,11 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
6129 | trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); | 6176 | trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); |
6130 | 6177 | ||
6131 | /* Handle Layoutreturn errors */ | 6178 | /* Handle Layoutreturn errors */ |
6132 | if (data->args.lr_args && task->tk_status != 0) { | 6179 | if (pnfs_roc_done(task, data->inode, |
6133 | switch(data->res.lr_ret) { | 6180 | &data->args.lr_args, |
6134 | default: | 6181 | &data->res.lr_res, |
6135 | data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; | 6182 | &data->res.lr_ret) == -EAGAIN) |
6136 | break; | 6183 | goto out_restart; |
6137 | case 0: | ||
6138 | data->args.lr_args = NULL; | ||
6139 | data->res.lr_res = NULL; | ||
6140 | break; | ||
6141 | case -NFS4ERR_OLD_STATEID: | ||
6142 | if (nfs4_layoutreturn_refresh_stateid(&data->args.lr_args->stateid, | ||
6143 | &data->args.lr_args->range, | ||
6144 | data->inode)) | ||
6145 | goto lr_restart; | ||
6146 | /* Fallthrough */ | ||
6147 | case -NFS4ERR_ADMIN_REVOKED: | ||
6148 | case -NFS4ERR_DELEG_REVOKED: | ||
6149 | case -NFS4ERR_EXPIRED: | ||
6150 | case -NFS4ERR_BAD_STATEID: | ||
6151 | case -NFS4ERR_UNKNOWN_LAYOUTTYPE: | ||
6152 | case -NFS4ERR_WRONG_CRED: | ||
6153 | data->args.lr_args = NULL; | ||
6154 | data->res.lr_res = NULL; | ||
6155 | goto lr_restart; | ||
6156 | } | ||
6157 | } | ||
6158 | 6184 | ||
6159 | switch (task->tk_status) { | 6185 | switch (task->tk_status) { |
6160 | case 0: | 6186 | case 0: |
@@ -6192,8 +6218,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
6192 | } | 6218 | } |
6193 | data->rpc_status = task->tk_status; | 6219 | data->rpc_status = task->tk_status; |
6194 | return; | 6220 | return; |
6195 | lr_restart: | ||
6196 | data->res.lr_ret = 0; | ||
6197 | out_restart: | 6221 | out_restart: |
6198 | task->tk_status = 0; | 6222 | task->tk_status = 0; |
6199 | rpc_restart_call_prepare(task); | 6223 | rpc_restart_call_prepare(task); |
@@ -6386,6 +6410,42 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * | |||
6386 | return err; | 6410 | return err; |
6387 | } | 6411 | } |
6388 | 6412 | ||
6413 | /* | ||
6414 | * Update the seqid of a lock stateid after receiving | ||
6415 | * NFS4ERR_OLD_STATEID | ||
6416 | */ | ||
6417 | static bool nfs4_refresh_lock_old_stateid(nfs4_stateid *dst, | ||
6418 | struct nfs4_lock_state *lsp) | ||
6419 | { | ||
6420 | struct nfs4_state *state = lsp->ls_state; | ||
6421 | bool ret = false; | ||
6422 | |||
6423 | spin_lock(&state->state_lock); | ||
6424 | if (!nfs4_stateid_match_other(dst, &lsp->ls_stateid)) | ||
6425 | goto out; | ||
6426 | if (!nfs4_stateid_is_newer(&lsp->ls_stateid, dst)) | ||
6427 | nfs4_stateid_seqid_inc(dst); | ||
6428 | else | ||
6429 | dst->seqid = lsp->ls_stateid.seqid; | ||
6430 | ret = true; | ||
6431 | out: | ||
6432 | spin_unlock(&state->state_lock); | ||
6433 | return ret; | ||
6434 | } | ||
6435 | |||
6436 | static bool nfs4_sync_lock_stateid(nfs4_stateid *dst, | ||
6437 | struct nfs4_lock_state *lsp) | ||
6438 | { | ||
6439 | struct nfs4_state *state = lsp->ls_state; | ||
6440 | bool ret; | ||
6441 | |||
6442 | spin_lock(&state->state_lock); | ||
6443 | ret = !nfs4_stateid_match_other(dst, &lsp->ls_stateid); | ||
6444 | nfs4_stateid_copy(dst, &lsp->ls_stateid); | ||
6445 | spin_unlock(&state->state_lock); | ||
6446 | return ret; | ||
6447 | } | ||
6448 | |||
6389 | struct nfs4_unlockdata { | 6449 | struct nfs4_unlockdata { |
6390 | struct nfs_locku_args arg; | 6450 | struct nfs_locku_args arg; |
6391 | struct nfs_locku_res res; | 6451 | struct nfs_locku_res res; |
@@ -6403,7 +6463,8 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, | |||
6403 | struct nfs_seqid *seqid) | 6463 | struct nfs_seqid *seqid) |
6404 | { | 6464 | { |
6405 | struct nfs4_unlockdata *p; | 6465 | struct nfs4_unlockdata *p; |
6406 | struct inode *inode = lsp->ls_state->inode; | 6466 | struct nfs4_state *state = lsp->ls_state; |
6467 | struct inode *inode = state->inode; | ||
6407 | 6468 | ||
6408 | p = kzalloc(sizeof(*p), GFP_NOFS); | 6469 | p = kzalloc(sizeof(*p), GFP_NOFS); |
6409 | if (p == NULL) | 6470 | if (p == NULL) |
@@ -6419,6 +6480,9 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, | |||
6419 | locks_init_lock(&p->fl); | 6480 | locks_init_lock(&p->fl); |
6420 | locks_copy_lock(&p->fl, fl); | 6481 | locks_copy_lock(&p->fl, fl); |
6421 | p->server = NFS_SERVER(inode); | 6482 | p->server = NFS_SERVER(inode); |
6483 | spin_lock(&state->state_lock); | ||
6484 | nfs4_stateid_copy(&p->arg.stateid, &lsp->ls_stateid); | ||
6485 | spin_unlock(&state->state_lock); | ||
6422 | return p; | 6486 | return p; |
6423 | } | 6487 | } |
6424 | 6488 | ||
@@ -6457,10 +6521,14 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
6457 | task->tk_msg.rpc_cred); | 6521 | task->tk_msg.rpc_cred); |
6458 | /* Fall through */ | 6522 | /* Fall through */ |
6459 | case -NFS4ERR_BAD_STATEID: | 6523 | case -NFS4ERR_BAD_STATEID: |
6460 | case -NFS4ERR_OLD_STATEID: | ||
6461 | case -NFS4ERR_STALE_STATEID: | 6524 | case -NFS4ERR_STALE_STATEID: |
6462 | if (!nfs4_stateid_match(&calldata->arg.stateid, | 6525 | if (nfs4_sync_lock_stateid(&calldata->arg.stateid, |
6463 | &calldata->lsp->ls_stateid)) | 6526 | calldata->lsp)) |
6527 | rpc_restart_call_prepare(task); | ||
6528 | break; | ||
6529 | case -NFS4ERR_OLD_STATEID: | ||
6530 | if (nfs4_refresh_lock_old_stateid(&calldata->arg.stateid, | ||
6531 | calldata->lsp)) | ||
6464 | rpc_restart_call_prepare(task); | 6532 | rpc_restart_call_prepare(task); |
6465 | break; | 6533 | break; |
6466 | default: | 6534 | default: |
@@ -6483,7 +6551,6 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) | |||
6483 | 6551 | ||
6484 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) | 6552 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) |
6485 | goto out_wait; | 6553 | goto out_wait; |
6486 | nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid); | ||
6487 | if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { | 6554 | if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { |
6488 | /* Note: exit _without_ running nfs4_locku_done */ | 6555 | /* Note: exit _without_ running nfs4_locku_done */ |
6489 | goto out_no_action; | 6556 | goto out_no_action; |
@@ -7645,6 +7712,8 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred) | |||
7645 | static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity) | 7712 | static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity) |
7646 | { | 7713 | { |
7647 | int status; | 7714 | int status; |
7715 | struct rpc_clnt *clnt = NFS_SERVER(dir)->client; | ||
7716 | struct nfs_client *clp = NFS_SERVER(dir)->nfs_client; | ||
7648 | struct nfs4_secinfo_arg args = { | 7717 | struct nfs4_secinfo_arg args = { |
7649 | .dir_fh = NFS_FH(dir), | 7718 | .dir_fh = NFS_FH(dir), |
7650 | .name = name, | 7719 | .name = name, |
@@ -7657,26 +7726,37 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct | |||
7657 | .rpc_argp = &args, | 7726 | .rpc_argp = &args, |
7658 | .rpc_resp = &res, | 7727 | .rpc_resp = &res, |
7659 | }; | 7728 | }; |
7660 | struct rpc_clnt *clnt = NFS_SERVER(dir)->client; | 7729 | struct nfs4_call_sync_data data = { |
7730 | .seq_server = NFS_SERVER(dir), | ||
7731 | .seq_args = &args.seq_args, | ||
7732 | .seq_res = &res.seq_res, | ||
7733 | }; | ||
7734 | struct rpc_task_setup task_setup = { | ||
7735 | .rpc_client = clnt, | ||
7736 | .rpc_message = &msg, | ||
7737 | .callback_ops = clp->cl_mvops->call_sync_ops, | ||
7738 | .callback_data = &data, | ||
7739 | .flags = RPC_TASK_NO_ROUND_ROBIN, | ||
7740 | }; | ||
7661 | const struct cred *cred = NULL; | 7741 | const struct cred *cred = NULL; |
7662 | 7742 | ||
7663 | if (use_integrity) { | 7743 | if (use_integrity) { |
7664 | clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; | 7744 | clnt = clp->cl_rpcclient; |
7665 | cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client); | 7745 | task_setup.rpc_client = clnt; |
7746 | |||
7747 | cred = nfs4_get_clid_cred(clp); | ||
7666 | msg.rpc_cred = cred; | 7748 | msg.rpc_cred = cred; |
7667 | } | 7749 | } |
7668 | 7750 | ||
7669 | dprintk("NFS call secinfo %s\n", name->name); | 7751 | dprintk("NFS call secinfo %s\n", name->name); |
7670 | 7752 | ||
7671 | nfs4_state_protect(NFS_SERVER(dir)->nfs_client, | 7753 | nfs4_state_protect(clp, NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); |
7672 | NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); | 7754 | nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); |
7755 | status = nfs4_call_sync_custom(&task_setup); | ||
7673 | 7756 | ||
7674 | status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, | ||
7675 | &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); | ||
7676 | dprintk("NFS reply secinfo: %d\n", status); | 7757 | dprintk("NFS reply secinfo: %d\n", status); |
7677 | 7758 | ||
7678 | put_cred(cred); | 7759 | put_cred(cred); |
7679 | |||
7680 | return status; | 7760 | return status; |
7681 | } | 7761 | } |
7682 | 7762 | ||
@@ -8344,7 +8424,6 @@ static const struct rpc_call_ops nfs4_get_lease_time_ops = { | |||
8344 | 8424 | ||
8345 | int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) | 8425 | int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) |
8346 | { | 8426 | { |
8347 | struct rpc_task *task; | ||
8348 | struct nfs4_get_lease_time_args args; | 8427 | struct nfs4_get_lease_time_args args; |
8349 | struct nfs4_get_lease_time_res res = { | 8428 | struct nfs4_get_lease_time_res res = { |
8350 | .lr_fsinfo = fsinfo, | 8429 | .lr_fsinfo = fsinfo, |
@@ -8366,17 +8445,9 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) | |||
8366 | .callback_data = &data, | 8445 | .callback_data = &data, |
8367 | .flags = RPC_TASK_TIMEOUT, | 8446 | .flags = RPC_TASK_TIMEOUT, |
8368 | }; | 8447 | }; |
8369 | int status; | ||
8370 | 8448 | ||
8371 | nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1); | 8449 | nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1); |
8372 | task = rpc_run_task(&task_setup); | 8450 | return nfs4_call_sync_custom(&task_setup); |
8373 | |||
8374 | if (IS_ERR(task)) | ||
8375 | return PTR_ERR(task); | ||
8376 | |||
8377 | status = task->tk_status; | ||
8378 | rpc_put_task(task); | ||
8379 | return status; | ||
8380 | } | 8451 | } |
8381 | 8452 | ||
8382 | #ifdef CONFIG_NFS_V4_1 | 8453 | #ifdef CONFIG_NFS_V4_1 |
@@ -8845,7 +8916,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, | |||
8845 | const struct cred *cred) | 8916 | const struct cred *cred) |
8846 | { | 8917 | { |
8847 | struct nfs4_reclaim_complete_data *calldata; | 8918 | struct nfs4_reclaim_complete_data *calldata; |
8848 | struct rpc_task *task; | ||
8849 | struct rpc_message msg = { | 8919 | struct rpc_message msg = { |
8850 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], | 8920 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], |
8851 | .rpc_cred = cred, | 8921 | .rpc_cred = cred, |
@@ -8854,7 +8924,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, | |||
8854 | .rpc_client = clp->cl_rpcclient, | 8924 | .rpc_client = clp->cl_rpcclient, |
8855 | .rpc_message = &msg, | 8925 | .rpc_message = &msg, |
8856 | .callback_ops = &nfs4_reclaim_complete_call_ops, | 8926 | .callback_ops = &nfs4_reclaim_complete_call_ops, |
8857 | .flags = RPC_TASK_ASYNC | RPC_TASK_NO_ROUND_ROBIN, | 8927 | .flags = RPC_TASK_NO_ROUND_ROBIN, |
8858 | }; | 8928 | }; |
8859 | int status = -ENOMEM; | 8929 | int status = -ENOMEM; |
8860 | 8930 | ||
@@ -8869,15 +8939,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, | |||
8869 | msg.rpc_argp = &calldata->arg; | 8939 | msg.rpc_argp = &calldata->arg; |
8870 | msg.rpc_resp = &calldata->res; | 8940 | msg.rpc_resp = &calldata->res; |
8871 | task_setup_data.callback_data = calldata; | 8941 | task_setup_data.callback_data = calldata; |
8872 | task = rpc_run_task(&task_setup_data); | 8942 | status = nfs4_call_sync_custom(&task_setup_data); |
8873 | if (IS_ERR(task)) { | ||
8874 | status = PTR_ERR(task); | ||
8875 | goto out; | ||
8876 | } | ||
8877 | status = rpc_wait_for_completion_task(task); | ||
8878 | if (status == 0) | ||
8879 | status = task->tk_status; | ||
8880 | rpc_put_task(task); | ||
8881 | out: | 8943 | out: |
8882 | dprintk("<-- %s status=%d\n", __func__, status); | 8944 | dprintk("<-- %s status=%d\n", __func__, status); |
8883 | return status; | 8945 | return status; |
@@ -9103,10 +9165,19 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
9103 | if (!nfs41_sequence_process(task, &lrp->res.seq_res)) | 9165 | if (!nfs41_sequence_process(task, &lrp->res.seq_res)) |
9104 | return; | 9166 | return; |
9105 | 9167 | ||
9168 | /* | ||
9169 | * Was there an RPC level error? Assume the call succeeded, | ||
9170 | * and that we need to release the layout | ||
9171 | */ | ||
9172 | if (task->tk_rpc_status != 0 && RPC_WAS_SENT(task)) { | ||
9173 | lrp->res.lrs_present = 0; | ||
9174 | return; | ||
9175 | } | ||
9176 | |||
9106 | server = NFS_SERVER(lrp->args.inode); | 9177 | server = NFS_SERVER(lrp->args.inode); |
9107 | switch (task->tk_status) { | 9178 | switch (task->tk_status) { |
9108 | case -NFS4ERR_OLD_STATEID: | 9179 | case -NFS4ERR_OLD_STATEID: |
9109 | if (nfs4_layoutreturn_refresh_stateid(&lrp->args.stateid, | 9180 | if (nfs4_layout_refresh_old_stateid(&lrp->args.stateid, |
9110 | &lrp->args.range, | 9181 | &lrp->args.range, |
9111 | lrp->args.inode)) | 9182 | lrp->args.inode)) |
9112 | goto out_restart; | 9183 | goto out_restart; |
@@ -9362,18 +9433,32 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, | |||
9362 | .rpc_resp = &res, | 9433 | .rpc_resp = &res, |
9363 | }; | 9434 | }; |
9364 | struct rpc_clnt *clnt = server->client; | 9435 | struct rpc_clnt *clnt = server->client; |
9436 | struct nfs4_call_sync_data data = { | ||
9437 | .seq_server = server, | ||
9438 | .seq_args = &args.seq_args, | ||
9439 | .seq_res = &res.seq_res, | ||
9440 | }; | ||
9441 | struct rpc_task_setup task_setup = { | ||
9442 | .rpc_client = server->client, | ||
9443 | .rpc_message = &msg, | ||
9444 | .callback_ops = server->nfs_client->cl_mvops->call_sync_ops, | ||
9445 | .callback_data = &data, | ||
9446 | .flags = RPC_TASK_NO_ROUND_ROBIN, | ||
9447 | }; | ||
9365 | const struct cred *cred = NULL; | 9448 | const struct cred *cred = NULL; |
9366 | int status; | 9449 | int status; |
9367 | 9450 | ||
9368 | if (use_integrity) { | 9451 | if (use_integrity) { |
9369 | clnt = server->nfs_client->cl_rpcclient; | 9452 | clnt = server->nfs_client->cl_rpcclient; |
9453 | task_setup.rpc_client = clnt; | ||
9454 | |||
9370 | cred = nfs4_get_clid_cred(server->nfs_client); | 9455 | cred = nfs4_get_clid_cred(server->nfs_client); |
9371 | msg.rpc_cred = cred; | 9456 | msg.rpc_cred = cred; |
9372 | } | 9457 | } |
9373 | 9458 | ||
9374 | dprintk("--> %s\n", __func__); | 9459 | dprintk("--> %s\n", __func__); |
9375 | status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, | 9460 | nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); |
9376 | &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); | 9461 | status = nfs4_call_sync_custom(&task_setup); |
9377 | dprintk("<-- %s status=%d\n", __func__, status); | 9462 | dprintk("<-- %s status=%d\n", __func__, status); |
9378 | 9463 | ||
9379 | put_cred(cred); | 9464 | put_cred(cred); |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cad4e064b328..0c6d53dc3672 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1015,22 +1015,6 @@ out: | |||
1015 | return ret; | 1015 | return ret; |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) | ||
1019 | { | ||
1020 | bool ret; | ||
1021 | int seq; | ||
1022 | |||
1023 | do { | ||
1024 | ret = false; | ||
1025 | seq = read_seqbegin(&state->seqlock); | ||
1026 | if (nfs4_state_match_open_stateid_other(state, dst)) { | ||
1027 | dst->seqid = state->open_stateid.seqid; | ||
1028 | ret = true; | ||
1029 | } | ||
1030 | } while (read_seqretry(&state->seqlock, seq)); | ||
1031 | return ret; | ||
1032 | } | ||
1033 | |||
1034 | bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) | 1018 | bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) |
1035 | { | 1019 | { |
1036 | bool ret; | 1020 | bool ret; |
@@ -2095,8 +2079,10 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred | |||
2095 | } | 2079 | } |
2096 | 2080 | ||
2097 | status = nfs4_begin_drain_session(clp); | 2081 | status = nfs4_begin_drain_session(clp); |
2098 | if (status != 0) | 2082 | if (status != 0) { |
2099 | return status; | 2083 | result = status; |
2084 | goto out; | ||
2085 | } | ||
2100 | 2086 | ||
2101 | status = nfs4_replace_transport(server, locations); | 2087 | status = nfs4_replace_transport(server, locations); |
2102 | if (status != 0) { | 2088 | if (status != 0) { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 46a8d636d151..ab07db0f07cd 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -1174,7 +1174,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, | |||
1174 | } else | 1174 | } else |
1175 | *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); | 1175 | *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); |
1176 | } | 1176 | } |
1177 | if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { | 1177 | if (label && (bmval[2] & FATTR4_WORD2_SECURITY_LABEL)) { |
1178 | *p++ = cpu_to_be32(label->lfs); | 1178 | *p++ = cpu_to_be32(label->lfs); |
1179 | *p++ = cpu_to_be32(label->pi); | 1179 | *p++ = cpu_to_be32(label->pi); |
1180 | *p++ = cpu_to_be32(label->len); | 1180 | *p++ = cpu_to_be32(label->len); |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4525d5acae38..bb80034a7661 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -359,9 +359,10 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, | |||
359 | } | 359 | } |
360 | 360 | ||
361 | /* | 361 | /* |
362 | * Update the seqid of a layout stateid | 362 | * Update the seqid of a layout stateid after receiving |
363 | * NFS4ERR_OLD_STATEID | ||
363 | */ | 364 | */ |
364 | bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst, | 365 | bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst, |
365 | struct pnfs_layout_range *dst_range, | 366 | struct pnfs_layout_range *dst_range, |
366 | struct inode *inode) | 367 | struct inode *inode) |
367 | { | 368 | { |
@@ -377,7 +378,15 @@ bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst, | |||
377 | 378 | ||
378 | spin_lock(&inode->i_lock); | 379 | spin_lock(&inode->i_lock); |
379 | lo = NFS_I(inode)->layout; | 380 | lo = NFS_I(inode)->layout; |
380 | if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) { | 381 | if (lo && pnfs_layout_is_valid(lo) && |
382 | nfs4_stateid_match_other(dst, &lo->plh_stateid)) { | ||
383 | /* Is our call using the most recent seqid? If so, bump it */ | ||
384 | if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) { | ||
385 | nfs4_stateid_seqid_inc(dst); | ||
386 | ret = true; | ||
387 | goto out; | ||
388 | } | ||
389 | /* Try to update the seqid to the most recent */ | ||
381 | err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0); | 390 | err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0); |
382 | if (err != -EBUSY) { | 391 | if (err != -EBUSY) { |
383 | dst->seqid = lo->plh_stateid.seqid; | 392 | dst->seqid = lo->plh_stateid.seqid; |
@@ -385,6 +394,7 @@ bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst, | |||
385 | ret = true; | 394 | ret = true; |
386 | } | 395 | } |
387 | } | 396 | } |
397 | out: | ||
388 | spin_unlock(&inode->i_lock); | 398 | spin_unlock(&inode->i_lock); |
389 | pnfs_free_lseg_list(&head); | 399 | pnfs_free_lseg_list(&head); |
390 | return ret; | 400 | return ret; |
@@ -1440,6 +1450,52 @@ out_noroc: | |||
1440 | return false; | 1450 | return false; |
1441 | } | 1451 | } |
1442 | 1452 | ||
1453 | int pnfs_roc_done(struct rpc_task *task, struct inode *inode, | ||
1454 | struct nfs4_layoutreturn_args **argpp, | ||
1455 | struct nfs4_layoutreturn_res **respp, | ||
1456 | int *ret) | ||
1457 | { | ||
1458 | struct nfs4_layoutreturn_args *arg = *argpp; | ||
1459 | int retval = -EAGAIN; | ||
1460 | |||
1461 | if (!arg) | ||
1462 | return 0; | ||
1463 | /* Handle Layoutreturn errors */ | ||
1464 | switch (*ret) { | ||
1465 | case 0: | ||
1466 | retval = 0; | ||
1467 | break; | ||
1468 | case -NFS4ERR_NOMATCHING_LAYOUT: | ||
1469 | /* Was there an RPC level error? If not, retry */ | ||
1470 | if (task->tk_rpc_status == 0) | ||
1471 | break; | ||
1472 | /* If the call was not sent, let caller handle it */ | ||
1473 | if (!RPC_WAS_SENT(task)) | ||
1474 | return 0; | ||
1475 | /* | ||
1476 | * Otherwise, assume the call succeeded and | ||
1477 | * that we need to release the layout | ||
1478 | */ | ||
1479 | *ret = 0; | ||
1480 | (*respp)->lrs_present = 0; | ||
1481 | retval = 0; | ||
1482 | break; | ||
1483 | case -NFS4ERR_DELAY: | ||
1484 | /* Let the caller handle the retry */ | ||
1485 | *ret = -NFS4ERR_NOMATCHING_LAYOUT; | ||
1486 | return 0; | ||
1487 | case -NFS4ERR_OLD_STATEID: | ||
1488 | if (!nfs4_layout_refresh_old_stateid(&arg->stateid, | ||
1489 | &arg->range, inode)) | ||
1490 | break; | ||
1491 | *ret = -NFS4ERR_NOMATCHING_LAYOUT; | ||
1492 | return -EAGAIN; | ||
1493 | } | ||
1494 | *argpp = NULL; | ||
1495 | *respp = NULL; | ||
1496 | return retval; | ||
1497 | } | ||
1498 | |||
1443 | void pnfs_roc_release(struct nfs4_layoutreturn_args *args, | 1499 | void pnfs_roc_release(struct nfs4_layoutreturn_args *args, |
1444 | struct nfs4_layoutreturn_res *res, | 1500 | struct nfs4_layoutreturn_res *res, |
1445 | int ret) | 1501 | int ret) |
@@ -1449,10 +1505,15 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, | |||
1449 | const nfs4_stateid *res_stateid = NULL; | 1505 | const nfs4_stateid *res_stateid = NULL; |
1450 | struct nfs4_xdr_opaque_data *ld_private = args->ld_private; | 1506 | struct nfs4_xdr_opaque_data *ld_private = args->ld_private; |
1451 | 1507 | ||
1452 | if (ret == 0) { | 1508 | switch (ret) { |
1453 | arg_stateid = &args->stateid; | 1509 | case -NFS4ERR_NOMATCHING_LAYOUT: |
1510 | break; | ||
1511 | case 0: | ||
1454 | if (res->lrs_present) | 1512 | if (res->lrs_present) |
1455 | res_stateid = &res->stateid; | 1513 | res_stateid = &res->stateid; |
1514 | /* Fallthrough */ | ||
1515 | default: | ||
1516 | arg_stateid = &args->stateid; | ||
1456 | } | 1517 | } |
1457 | pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, | 1518 | pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, |
1458 | res_stateid); | 1519 | res_stateid); |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f15609c003d8..f8a38065c7e4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -261,7 +261,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, | |||
261 | bool is_recall); | 261 | bool is_recall); |
262 | int pnfs_destroy_layouts_byclid(struct nfs_client *clp, | 262 | int pnfs_destroy_layouts_byclid(struct nfs_client *clp, |
263 | bool is_recall); | 263 | bool is_recall); |
264 | bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst, | 264 | bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst, |
265 | struct pnfs_layout_range *dst_range, | 265 | struct pnfs_layout_range *dst_range, |
266 | struct inode *inode); | 266 | struct inode *inode); |
267 | void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); | 267 | void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); |
@@ -282,6 +282,10 @@ bool pnfs_roc(struct inode *ino, | |||
282 | struct nfs4_layoutreturn_args *args, | 282 | struct nfs4_layoutreturn_args *args, |
283 | struct nfs4_layoutreturn_res *res, | 283 | struct nfs4_layoutreturn_res *res, |
284 | const struct cred *cred); | 284 | const struct cred *cred); |
285 | int pnfs_roc_done(struct rpc_task *task, struct inode *inode, | ||
286 | struct nfs4_layoutreturn_args **argpp, | ||
287 | struct nfs4_layoutreturn_res **respp, | ||
288 | int *ret); | ||
285 | void pnfs_roc_release(struct nfs4_layoutreturn_args *args, | 289 | void pnfs_roc_release(struct nfs4_layoutreturn_args *args, |
286 | struct nfs4_layoutreturn_res *res, | 290 | struct nfs4_layoutreturn_res *res, |
287 | int ret); | 291 | int ret); |
@@ -701,6 +705,15 @@ pnfs_roc(struct inode *ino, | |||
701 | return false; | 705 | return false; |
702 | } | 706 | } |
703 | 707 | ||
708 | static inline int | ||
709 | pnfs_roc_done(struct rpc_task *task, struct inode *inode, | ||
710 | struct nfs4_layoutreturn_args **argpp, | ||
711 | struct nfs4_layoutreturn_res **respp, | ||
712 | int *ret) | ||
713 | { | ||
714 | return 0; | ||
715 | } | ||
716 | |||
704 | static inline void | 717 | static inline void |
705 | pnfs_roc_release(struct nfs4_layoutreturn_args *args, | 718 | pnfs_roc_release(struct nfs4_layoutreturn_args *args, |
706 | struct nfs4_layoutreturn_res *res, | 719 | struct nfs4_layoutreturn_res *res, |
@@ -785,7 +798,7 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) | |||
785 | { | 798 | { |
786 | } | 799 | } |
787 | 800 | ||
788 | static inline bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst, | 801 | static inline bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst, |
789 | struct pnfs_layout_range *dst_range, | 802 | struct pnfs_layout_range *dst_range, |
790 | struct inode *inode) | 803 | struct inode *inode) |
791 | { | 804 | { |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 19a76cfa8b1f..a84df7d63403 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2645,6 +2645,13 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, | |||
2645 | } | 2645 | } |
2646 | EXPORT_SYMBOL_GPL(nfs_clone_sb_security); | 2646 | EXPORT_SYMBOL_GPL(nfs_clone_sb_security); |
2647 | 2647 | ||
2648 | static void nfs_set_readahead(struct backing_dev_info *bdi, | ||
2649 | unsigned long iomax_pages) | ||
2650 | { | ||
2651 | bdi->ra_pages = VM_READAHEAD_PAGES; | ||
2652 | bdi->io_pages = iomax_pages; | ||
2653 | } | ||
2654 | |||
2648 | struct dentry *nfs_fs_mount_common(struct nfs_server *server, | 2655 | struct dentry *nfs_fs_mount_common(struct nfs_server *server, |
2649 | int flags, const char *dev_name, | 2656 | int flags, const char *dev_name, |
2650 | struct nfs_mount_info *mount_info, | 2657 | struct nfs_mount_info *mount_info, |
@@ -2687,7 +2694,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, | |||
2687 | mntroot = ERR_PTR(error); | 2694 | mntroot = ERR_PTR(error); |
2688 | goto error_splat_super; | 2695 | goto error_splat_super; |
2689 | } | 2696 | } |
2690 | s->s_bdi->ra_pages = server->rpages * NFS_MAX_READAHEAD; | 2697 | nfs_set_readahead(s->s_bdi, server->rpages); |
2691 | server->super = s; | 2698 | server->super = s; |
2692 | } | 2699 | } |
2693 | 2700 | ||
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0a11712a80e3..570a60c2f4f4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -490,6 +490,9 @@ extern const struct file_operations nfs_dir_operations; | |||
490 | extern const struct dentry_operations nfs_dentry_operations; | 490 | extern const struct dentry_operations nfs_dentry_operations; |
491 | 491 | ||
492 | extern void nfs_force_lookup_revalidate(struct inode *dir); | 492 | extern void nfs_force_lookup_revalidate(struct inode *dir); |
493 | extern struct dentry *nfs_add_or_obtain(struct dentry *dentry, | ||
494 | struct nfs_fh *fh, struct nfs_fattr *fattr, | ||
495 | struct nfs4_label *label); | ||
493 | extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, | 496 | extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, |
494 | struct nfs_fattr *fattr, struct nfs4_label *label); | 497 | struct nfs_fattr *fattr, struct nfs4_label *label); |
495 | extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); | 498 | extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); |
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 27536b961552..a6ef35184ef1 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h | |||
@@ -242,9 +242,6 @@ void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue, | |||
242 | void rpc_sleep_on_priority(struct rpc_wait_queue *, | 242 | void rpc_sleep_on_priority(struct rpc_wait_queue *, |
243 | struct rpc_task *, | 243 | struct rpc_task *, |
244 | int priority); | 244 | int priority); |
245 | void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, | ||
246 | struct rpc_wait_queue *queue, | ||
247 | struct rpc_task *task); | ||
248 | void rpc_wake_up_queued_task(struct rpc_wait_queue *, | 245 | void rpc_wake_up_queued_task(struct rpc_wait_queue *, |
249 | struct rpc_task *); | 246 | struct rpc_task *); |
250 | void rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *, | 247 | void rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *, |
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8a87d8bcb197..f33e5013bdfb 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h | |||
@@ -186,7 +186,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) | |||
186 | extern void xdr_shift_buf(struct xdr_buf *, size_t); | 186 | extern void xdr_shift_buf(struct xdr_buf *, size_t); |
187 | extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); | 187 | extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); |
188 | extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); | 188 | extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); |
189 | extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); | 189 | extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int); |
190 | extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); | 190 | extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); |
191 | extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); | 191 | extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); |
192 | 192 | ||
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 13e108bcc9eb..d783e15ba898 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -352,6 +352,7 @@ bool xprt_prepare_transmit(struct rpc_task *task); | |||
352 | void xprt_request_enqueue_transmit(struct rpc_task *task); | 352 | void xprt_request_enqueue_transmit(struct rpc_task *task); |
353 | void xprt_request_enqueue_receive(struct rpc_task *task); | 353 | void xprt_request_enqueue_receive(struct rpc_task *task); |
354 | void xprt_request_wait_receive(struct rpc_task *task); | 354 | void xprt_request_wait_receive(struct rpc_task *task); |
355 | void xprt_request_dequeue_xprt(struct rpc_task *task); | ||
355 | bool xprt_request_need_retransmit(struct rpc_task *task); | 356 | bool xprt_request_need_retransmit(struct rpc_task *task); |
356 | void xprt_transmit(struct rpc_task *task); | 357 | void xprt_transmit(struct rpc_task *task); |
357 | void xprt_end_transmit(struct rpc_task *task); | 358 | void xprt_end_transmit(struct rpc_task *task); |
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 86fc38ff0355..16c239e0d6dd 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h | |||
@@ -49,9 +49,9 @@ | |||
49 | * fully-chunked NFS message (read chunks are the largest). Note only | 49 | * fully-chunked NFS message (read chunks are the largest). Note only |
50 | * a single chunk type per message is supported currently. | 50 | * a single chunk type per message is supported currently. |
51 | */ | 51 | */ |
52 | #define RPCRDMA_MIN_SLOT_TABLE (2U) | 52 | #define RPCRDMA_MIN_SLOT_TABLE (4U) |
53 | #define RPCRDMA_DEF_SLOT_TABLE (128U) | 53 | #define RPCRDMA_DEF_SLOT_TABLE (128U) |
54 | #define RPCRDMA_MAX_SLOT_TABLE (256U) | 54 | #define RPCRDMA_MAX_SLOT_TABLE (16384U) |
55 | 55 | ||
56 | #define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ | 56 | #define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ |
57 | #define RPCRDMA_DEF_INLINE (4096) /* default inline thresh */ | 57 | #define RPCRDMA_DEF_INLINE (4096) /* default inline thresh */ |
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index f6a4eaa85a3e..a13830616107 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h | |||
@@ -451,20 +451,81 @@ TRACE_EVENT(xprtrdma_createmrs, | |||
451 | 451 | ||
452 | TP_STRUCT__entry( | 452 | TP_STRUCT__entry( |
453 | __field(const void *, r_xprt) | 453 | __field(const void *, r_xprt) |
454 | __string(addr, rpcrdma_addrstr(r_xprt)) | ||
455 | __string(port, rpcrdma_portstr(r_xprt)) | ||
454 | __field(unsigned int, count) | 456 | __field(unsigned int, count) |
455 | ), | 457 | ), |
456 | 458 | ||
457 | TP_fast_assign( | 459 | TP_fast_assign( |
458 | __entry->r_xprt = r_xprt; | 460 | __entry->r_xprt = r_xprt; |
459 | __entry->count = count; | 461 | __entry->count = count; |
462 | __assign_str(addr, rpcrdma_addrstr(r_xprt)); | ||
463 | __assign_str(port, rpcrdma_portstr(r_xprt)); | ||
460 | ), | 464 | ), |
461 | 465 | ||
462 | TP_printk("r_xprt=%p: created %u MRs", | 466 | TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs", |
463 | __entry->r_xprt, __entry->count | 467 | __get_str(addr), __get_str(port), __entry->r_xprt, |
468 | __entry->count | ||
464 | ) | 469 | ) |
465 | ); | 470 | ); |
466 | 471 | ||
467 | DEFINE_RXPRT_EVENT(xprtrdma_nomrs); | 472 | TRACE_EVENT(xprtrdma_mr_get, |
473 | TP_PROTO( | ||
474 | const struct rpcrdma_req *req | ||
475 | ), | ||
476 | |||
477 | TP_ARGS(req), | ||
478 | |||
479 | TP_STRUCT__entry( | ||
480 | __field(const void *, req) | ||
481 | __field(unsigned int, task_id) | ||
482 | __field(unsigned int, client_id) | ||
483 | __field(u32, xid) | ||
484 | ), | ||
485 | |||
486 | TP_fast_assign( | ||
487 | const struct rpc_rqst *rqst = &req->rl_slot; | ||
488 | |||
489 | __entry->req = req; | ||
490 | __entry->task_id = rqst->rq_task->tk_pid; | ||
491 | __entry->client_id = rqst->rq_task->tk_client->cl_clid; | ||
492 | __entry->xid = be32_to_cpu(rqst->rq_xid); | ||
493 | ), | ||
494 | |||
495 | TP_printk("task:%u@%u xid=0x%08x req=%p", | ||
496 | __entry->task_id, __entry->client_id, __entry->xid, | ||
497 | __entry->req | ||
498 | ) | ||
499 | ); | ||
500 | |||
501 | TRACE_EVENT(xprtrdma_nomrs, | ||
502 | TP_PROTO( | ||
503 | const struct rpcrdma_req *req | ||
504 | ), | ||
505 | |||
506 | TP_ARGS(req), | ||
507 | |||
508 | TP_STRUCT__entry( | ||
509 | __field(const void *, req) | ||
510 | __field(unsigned int, task_id) | ||
511 | __field(unsigned int, client_id) | ||
512 | __field(u32, xid) | ||
513 | ), | ||
514 | |||
515 | TP_fast_assign( | ||
516 | const struct rpc_rqst *rqst = &req->rl_slot; | ||
517 | |||
518 | __entry->req = req; | ||
519 | __entry->task_id = rqst->rq_task->tk_pid; | ||
520 | __entry->client_id = rqst->rq_task->tk_client->cl_clid; | ||
521 | __entry->xid = be32_to_cpu(rqst->rq_xid); | ||
522 | ), | ||
523 | |||
524 | TP_printk("task:%u@%u xid=0x%08x req=%p", | ||
525 | __entry->task_id, __entry->client_id, __entry->xid, | ||
526 | __entry->req | ||
527 | ) | ||
528 | ); | ||
468 | 529 | ||
469 | DEFINE_RDCH_EVENT(read); | 530 | DEFINE_RDCH_EVENT(read); |
470 | DEFINE_WRCH_EVENT(write); | 531 | DEFINE_WRCH_EVENT(write); |
@@ -623,21 +684,21 @@ TRACE_EVENT(xprtrdma_post_send, | |||
623 | 684 | ||
624 | TRACE_EVENT(xprtrdma_post_recv, | 685 | TRACE_EVENT(xprtrdma_post_recv, |
625 | TP_PROTO( | 686 | TP_PROTO( |
626 | const struct ib_cqe *cqe | 687 | const struct rpcrdma_rep *rep |
627 | ), | 688 | ), |
628 | 689 | ||
629 | TP_ARGS(cqe), | 690 | TP_ARGS(rep), |
630 | 691 | ||
631 | TP_STRUCT__entry( | 692 | TP_STRUCT__entry( |
632 | __field(const void *, cqe) | 693 | __field(const void *, rep) |
633 | ), | 694 | ), |
634 | 695 | ||
635 | TP_fast_assign( | 696 | TP_fast_assign( |
636 | __entry->cqe = cqe; | 697 | __entry->rep = rep; |
637 | ), | 698 | ), |
638 | 699 | ||
639 | TP_printk("cqe=%p", | 700 | TP_printk("rep=%p", |
640 | __entry->cqe | 701 | __entry->rep |
641 | ) | 702 | ) |
642 | ); | 703 | ); |
643 | 704 | ||
@@ -715,14 +776,15 @@ TRACE_EVENT(xprtrdma_wc_receive, | |||
715 | TP_ARGS(wc), | 776 | TP_ARGS(wc), |
716 | 777 | ||
717 | TP_STRUCT__entry( | 778 | TP_STRUCT__entry( |
718 | __field(const void *, cqe) | 779 | __field(const void *, rep) |
719 | __field(u32, byte_len) | 780 | __field(u32, byte_len) |
720 | __field(unsigned int, status) | 781 | __field(unsigned int, status) |
721 | __field(u32, vendor_err) | 782 | __field(u32, vendor_err) |
722 | ), | 783 | ), |
723 | 784 | ||
724 | TP_fast_assign( | 785 | TP_fast_assign( |
725 | __entry->cqe = wc->wr_cqe; | 786 | __entry->rep = container_of(wc->wr_cqe, struct rpcrdma_rep, |
787 | rr_cqe); | ||
726 | __entry->status = wc->status; | 788 | __entry->status = wc->status; |
727 | if (wc->status) { | 789 | if (wc->status) { |
728 | __entry->byte_len = 0; | 790 | __entry->byte_len = 0; |
@@ -733,8 +795,8 @@ TRACE_EVENT(xprtrdma_wc_receive, | |||
733 | } | 795 | } |
734 | ), | 796 | ), |
735 | 797 | ||
736 | TP_printk("cqe=%p %u bytes: %s (%u/0x%x)", | 798 | TP_printk("rep=%p %u bytes: %s (%u/0x%x)", |
737 | __entry->cqe, __entry->byte_len, | 799 | __entry->rep, __entry->byte_len, |
738 | rdma_show_wc_status(__entry->status), | 800 | rdma_show_wc_status(__entry->status), |
739 | __entry->status, __entry->vendor_err | 801 | __entry->status, __entry->vendor_err |
740 | ) | 802 | ) |
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4ce42c62458e..d75fddca44c9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
@@ -1960,7 +1960,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, | |||
1960 | 1960 | ||
1961 | if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len)) | 1961 | if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len)) |
1962 | goto unwrap_failed; | 1962 | goto unwrap_failed; |
1963 | if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) | 1963 | if (xdr_buf_read_mic(rcv_buf, &mic, mic_offset)) |
1964 | goto unwrap_failed; | 1964 | goto unwrap_failed; |
1965 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); | 1965 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); |
1966 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 1966 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a07b516e503a..f7f78566be46 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1837,7 +1837,7 @@ call_allocate(struct rpc_task *task) | |||
1837 | return; | 1837 | return; |
1838 | } | 1838 | } |
1839 | 1839 | ||
1840 | rpc_exit(task, -ERESTARTSYS); | 1840 | rpc_call_rpcerror(task, -ERESTARTSYS); |
1841 | } | 1841 | } |
1842 | 1842 | ||
1843 | static int | 1843 | static int |
@@ -1862,6 +1862,7 @@ rpc_xdr_encode(struct rpc_task *task) | |||
1862 | req->rq_rbuffer, | 1862 | req->rq_rbuffer, |
1863 | req->rq_rcvsize); | 1863 | req->rq_rcvsize); |
1864 | 1864 | ||
1865 | req->rq_reply_bytes_recvd = 0; | ||
1865 | req->rq_snd_buf.head[0].iov_len = 0; | 1866 | req->rq_snd_buf.head[0].iov_len = 0; |
1866 | xdr_init_encode(&xdr, &req->rq_snd_buf, | 1867 | xdr_init_encode(&xdr, &req->rq_snd_buf, |
1867 | req->rq_snd_buf.head[0].iov_base, req); | 1868 | req->rq_snd_buf.head[0].iov_base, req); |
@@ -1881,6 +1882,8 @@ call_encode(struct rpc_task *task) | |||
1881 | if (!rpc_task_need_encode(task)) | 1882 | if (!rpc_task_need_encode(task)) |
1882 | goto out; | 1883 | goto out; |
1883 | dprint_status(task); | 1884 | dprint_status(task); |
1885 | /* Dequeue task from the receive queue while we're encoding */ | ||
1886 | xprt_request_dequeue_xprt(task); | ||
1884 | /* Encode here so that rpcsec_gss can use correct sequence number. */ | 1887 | /* Encode here so that rpcsec_gss can use correct sequence number. */ |
1885 | rpc_xdr_encode(task); | 1888 | rpc_xdr_encode(task); |
1886 | /* Did the encode result in an error condition? */ | 1889 | /* Did the encode result in an error condition? */ |
@@ -2479,6 +2482,7 @@ call_decode(struct rpc_task *task) | |||
2479 | struct rpc_clnt *clnt = task->tk_client; | 2482 | struct rpc_clnt *clnt = task->tk_client; |
2480 | struct rpc_rqst *req = task->tk_rqstp; | 2483 | struct rpc_rqst *req = task->tk_rqstp; |
2481 | struct xdr_stream xdr; | 2484 | struct xdr_stream xdr; |
2485 | int err; | ||
2482 | 2486 | ||
2483 | dprint_status(task); | 2487 | dprint_status(task); |
2484 | 2488 | ||
@@ -2501,6 +2505,15 @@ call_decode(struct rpc_task *task) | |||
2501 | * before it changed req->rq_reply_bytes_recvd. | 2505 | * before it changed req->rq_reply_bytes_recvd. |
2502 | */ | 2506 | */ |
2503 | smp_rmb(); | 2507 | smp_rmb(); |
2508 | |||
2509 | /* | ||
2510 | * Did we ever call xprt_complete_rqst()? If not, we should assume | ||
2511 | * the message is incomplete. | ||
2512 | */ | ||
2513 | err = -EAGAIN; | ||
2514 | if (!req->rq_reply_bytes_recvd) | ||
2515 | goto out; | ||
2516 | |||
2504 | req->rq_rcv_buf.len = req->rq_private_buf.len; | 2517 | req->rq_rcv_buf.len = req->rq_private_buf.len; |
2505 | 2518 | ||
2506 | /* Check that the softirq receive buffer is valid */ | 2519 | /* Check that the softirq receive buffer is valid */ |
@@ -2509,7 +2522,9 @@ call_decode(struct rpc_task *task) | |||
2509 | 2522 | ||
2510 | xdr_init_decode(&xdr, &req->rq_rcv_buf, | 2523 | xdr_init_decode(&xdr, &req->rq_rcv_buf, |
2511 | req->rq_rcv_buf.head[0].iov_base, req); | 2524 | req->rq_rcv_buf.head[0].iov_base, req); |
2512 | switch (rpc_decode_header(task, &xdr)) { | 2525 | err = rpc_decode_header(task, &xdr); |
2526 | out: | ||
2527 | switch (err) { | ||
2513 | case 0: | 2528 | case 0: |
2514 | task->tk_action = rpc_exit_task; | 2529 | task->tk_action = rpc_exit_task; |
2515 | task->tk_status = rpcauth_unwrap_resp(task, &xdr); | 2530 | task->tk_status = rpcauth_unwrap_resp(task, &xdr); |
@@ -2518,9 +2533,6 @@ call_decode(struct rpc_task *task) | |||
2518 | return; | 2533 | return; |
2519 | case -EAGAIN: | 2534 | case -EAGAIN: |
2520 | task->tk_status = 0; | 2535 | task->tk_status = 0; |
2521 | xdr_free_bvec(&req->rq_rcv_buf); | ||
2522 | req->rq_reply_bytes_recvd = 0; | ||
2523 | req->rq_rcv_buf.len = 0; | ||
2524 | if (task->tk_client->cl_discrtry) | 2536 | if (task->tk_client->cl_discrtry) |
2525 | xprt_conditional_disconnect(req->rq_xprt, | 2537 | xprt_conditional_disconnect(req->rq_xprt, |
2526 | req->rq_connect_cookie); | 2538 | req->rq_connect_cookie); |
@@ -2561,7 +2573,7 @@ rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr) | |||
2561 | return 0; | 2573 | return 0; |
2562 | out_fail: | 2574 | out_fail: |
2563 | trace_rpc_bad_callhdr(task); | 2575 | trace_rpc_bad_callhdr(task); |
2564 | rpc_exit(task, error); | 2576 | rpc_call_rpcerror(task, error); |
2565 | return error; | 2577 | return error; |
2566 | } | 2578 | } |
2567 | 2579 | ||
@@ -2628,7 +2640,7 @@ out_garbage: | |||
2628 | return -EAGAIN; | 2640 | return -EAGAIN; |
2629 | } | 2641 | } |
2630 | out_err: | 2642 | out_err: |
2631 | rpc_exit(task, error); | 2643 | rpc_call_rpcerror(task, error); |
2632 | return error; | 2644 | return error; |
2633 | 2645 | ||
2634 | out_unparsable: | 2646 | out_unparsable: |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 1f275aba786f..360afe153193 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -541,33 +541,14 @@ rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq, | |||
541 | return NULL; | 541 | return NULL; |
542 | } | 542 | } |
543 | 543 | ||
544 | static void | ||
545 | rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq, | ||
546 | struct rpc_wait_queue *queue, struct rpc_task *task) | ||
547 | { | ||
548 | rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL); | ||
549 | } | ||
550 | |||
551 | /* | 544 | /* |
552 | * Wake up a queued task while the queue lock is being held | 545 | * Wake up a queued task while the queue lock is being held |
553 | */ | 546 | */ |
554 | static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) | 547 | static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, |
555 | { | 548 | struct rpc_task *task) |
556 | rpc_wake_up_task_on_wq_queue_locked(rpciod_workqueue, queue, task); | ||
557 | } | ||
558 | |||
559 | /* | ||
560 | * Wake up a task on a specific queue | ||
561 | */ | ||
562 | void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, | ||
563 | struct rpc_wait_queue *queue, | ||
564 | struct rpc_task *task) | ||
565 | { | 549 | { |
566 | if (!RPC_IS_QUEUED(task)) | 550 | rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue, |
567 | return; | 551 | task, NULL, NULL); |
568 | spin_lock(&queue->lock); | ||
569 | rpc_wake_up_task_on_wq_queue_locked(wq, queue, task); | ||
570 | spin_unlock(&queue->lock); | ||
571 | } | 552 | } |
572 | 553 | ||
573 | /* | 554 | /* |
@@ -930,8 +911,10 @@ static void __rpc_execute(struct rpc_task *task) | |||
930 | /* | 911 | /* |
931 | * Signalled tasks should exit rather than sleep. | 912 | * Signalled tasks should exit rather than sleep. |
932 | */ | 913 | */ |
933 | if (RPC_SIGNALLED(task)) | 914 | if (RPC_SIGNALLED(task)) { |
915 | task->tk_rpc_status = -ERESTARTSYS; | ||
934 | rpc_exit(task, -ERESTARTSYS); | 916 | rpc_exit(task, -ERESTARTSYS); |
917 | } | ||
935 | 918 | ||
936 | /* | 919 | /* |
937 | * The queue->lock protects against races with | 920 | * The queue->lock protects against races with |
@@ -967,6 +950,7 @@ static void __rpc_execute(struct rpc_task *task) | |||
967 | */ | 950 | */ |
968 | dprintk("RPC: %5u got signal\n", task->tk_pid); | 951 | dprintk("RPC: %5u got signal\n", task->tk_pid); |
969 | set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); | 952 | set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); |
953 | task->tk_rpc_status = -ERESTARTSYS; | ||
970 | rpc_exit(task, -ERESTARTSYS); | 954 | rpc_exit(task, -ERESTARTSYS); |
971 | } | 955 | } |
972 | dprintk("RPC: %5u sync task resuming\n", task->tk_pid); | 956 | dprintk("RPC: %5u sync task resuming\n", task->tk_pid); |
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 48c93b9e525e..14ba9e72a204 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
@@ -560,7 +560,7 @@ EXPORT_SYMBOL_GPL(xdr_init_encode); | |||
560 | * required at the end of encoding, or any other time when the xdr_buf | 560 | * required at the end of encoding, or any other time when the xdr_buf |
561 | * data might be read. | 561 | * data might be read. |
562 | */ | 562 | */ |
563 | void xdr_commit_encode(struct xdr_stream *xdr) | 563 | inline void xdr_commit_encode(struct xdr_stream *xdr) |
564 | { | 564 | { |
565 | int shift = xdr->scratch.iov_len; | 565 | int shift = xdr->scratch.iov_len; |
566 | void *page; | 566 | void *page; |
@@ -1236,43 +1236,60 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) | |||
1236 | } | 1236 | } |
1237 | EXPORT_SYMBOL_GPL(xdr_encode_word); | 1237 | EXPORT_SYMBOL_GPL(xdr_encode_word); |
1238 | 1238 | ||
1239 | /* If the netobj starting offset bytes from the start of xdr_buf is contained | 1239 | /** |
1240 | * entirely in the head or the tail, set object to point to it; otherwise | 1240 | * xdr_buf_read_mic() - obtain the address of the GSS mic from xdr buf |
1241 | * try to find space for it at the end of the tail, copy it there, and | 1241 | * @buf: pointer to buffer containing a mic |
1242 | * set obj to point to it. */ | 1242 | * @mic: on success, returns the address of the mic |
1243 | int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset) | 1243 | * @offset: the offset in buf where mic may be found |
1244 | * | ||
1245 | * This function may modify the xdr buf if the mic is found to be straddling | ||
1246 | * a boundary between head, pages, and tail. On success the mic can be read | ||
1247 | * from the address returned. There is no need to free the mic. | ||
1248 | * | ||
1249 | * Return: Success returns 0, otherwise an integer error. | ||
1250 | */ | ||
1251 | int xdr_buf_read_mic(struct xdr_buf *buf, struct xdr_netobj *mic, unsigned int offset) | ||
1244 | { | 1252 | { |
1245 | struct xdr_buf subbuf; | 1253 | struct xdr_buf subbuf; |
1254 | unsigned int boundary; | ||
1246 | 1255 | ||
1247 | if (xdr_decode_word(buf, offset, &obj->len)) | 1256 | if (xdr_decode_word(buf, offset, &mic->len)) |
1248 | return -EFAULT; | 1257 | return -EFAULT; |
1249 | if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len)) | 1258 | offset += 4; |
1259 | |||
1260 | /* Is the mic partially in the head? */ | ||
1261 | boundary = buf->head[0].iov_len; | ||
1262 | if (offset < boundary && (offset + mic->len) > boundary) | ||
1263 | xdr_shift_buf(buf, boundary - offset); | ||
1264 | |||
1265 | /* Is the mic partially in the pages? */ | ||
1266 | boundary += buf->page_len; | ||
1267 | if (offset < boundary && (offset + mic->len) > boundary) | ||
1268 | xdr_shrink_pagelen(buf, boundary - offset); | ||
1269 | |||
1270 | if (xdr_buf_subsegment(buf, &subbuf, offset, mic->len)) | ||
1250 | return -EFAULT; | 1271 | return -EFAULT; |
1251 | 1272 | ||
1252 | /* Is the obj contained entirely in the head? */ | 1273 | /* Is the mic contained entirely in the head? */ |
1253 | obj->data = subbuf.head[0].iov_base; | 1274 | mic->data = subbuf.head[0].iov_base; |
1254 | if (subbuf.head[0].iov_len == obj->len) | 1275 | if (subbuf.head[0].iov_len == mic->len) |
1255 | return 0; | 1276 | return 0; |
1256 | /* ..or is the obj contained entirely in the tail? */ | 1277 | /* ..or is the mic contained entirely in the tail? */ |
1257 | obj->data = subbuf.tail[0].iov_base; | 1278 | mic->data = subbuf.tail[0].iov_base; |
1258 | if (subbuf.tail[0].iov_len == obj->len) | 1279 | if (subbuf.tail[0].iov_len == mic->len) |
1259 | return 0; | 1280 | return 0; |
1260 | 1281 | ||
1261 | /* use end of tail as storage for obj: | 1282 | /* Find a contiguous area in @buf to hold all of @mic */ |
1262 | * (We don't copy to the beginning because then we'd have | 1283 | if (mic->len > buf->buflen - buf->len) |
1263 | * to worry about doing a potentially overlapping copy. | ||
1264 | * This assumes the object is at most half the length of the | ||
1265 | * tail.) */ | ||
1266 | if (obj->len > buf->buflen - buf->len) | ||
1267 | return -ENOMEM; | 1284 | return -ENOMEM; |
1268 | if (buf->tail[0].iov_len != 0) | 1285 | if (buf->tail[0].iov_len != 0) |
1269 | obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len; | 1286 | mic->data = buf->tail[0].iov_base + buf->tail[0].iov_len; |
1270 | else | 1287 | else |
1271 | obj->data = buf->head[0].iov_base + buf->head[0].iov_len; | 1288 | mic->data = buf->head[0].iov_base + buf->head[0].iov_len; |
1272 | __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len); | 1289 | __read_bytes_from_xdr_buf(&subbuf, mic->data, mic->len); |
1273 | return 0; | 1290 | return 0; |
1274 | } | 1291 | } |
1275 | EXPORT_SYMBOL_GPL(xdr_buf_read_netobj); | 1292 | EXPORT_SYMBOL_GPL(xdr_buf_read_mic); |
1276 | 1293 | ||
1277 | /* Returns 0 on success, or else a negative error code. */ | 1294 | /* Returns 0 on success, or else a negative error code. */ |
1278 | static int | 1295 | static int |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2e71f5455c6c..8a45b3ccc313 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -456,6 +456,12 @@ void xprt_release_rqst_cong(struct rpc_task *task) | |||
456 | } | 456 | } |
457 | EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); | 457 | EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); |
458 | 458 | ||
459 | static void xprt_clear_congestion_window_wait_locked(struct rpc_xprt *xprt) | ||
460 | { | ||
461 | if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) | ||
462 | __xprt_lock_write_next_cong(xprt); | ||
463 | } | ||
464 | |||
459 | /* | 465 | /* |
460 | * Clear the congestion window wait flag and wake up the next | 466 | * Clear the congestion window wait flag and wake up the next |
461 | * entry on xprt->sending | 467 | * entry on xprt->sending |
@@ -671,6 +677,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt) | |||
671 | spin_lock(&xprt->transport_lock); | 677 | spin_lock(&xprt->transport_lock); |
672 | xprt_clear_connected(xprt); | 678 | xprt_clear_connected(xprt); |
673 | xprt_clear_write_space_locked(xprt); | 679 | xprt_clear_write_space_locked(xprt); |
680 | xprt_clear_congestion_window_wait_locked(xprt); | ||
674 | xprt_wake_pending_tasks(xprt, -ENOTCONN); | 681 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
675 | spin_unlock(&xprt->transport_lock); | 682 | spin_unlock(&xprt->transport_lock); |
676 | } | 683 | } |
@@ -1324,6 +1331,36 @@ xprt_request_dequeue_transmit(struct rpc_task *task) | |||
1324 | } | 1331 | } |
1325 | 1332 | ||
1326 | /** | 1333 | /** |
1334 | * xprt_request_dequeue_xprt - remove a task from the transmit+receive queue | ||
1335 | * @task: pointer to rpc_task | ||
1336 | * | ||
1337 | * Remove a task from the transmit and receive queues, and ensure that | ||
1338 | * it is not pinned by the receive work item. | ||
1339 | */ | ||
1340 | void | ||
1341 | xprt_request_dequeue_xprt(struct rpc_task *task) | ||
1342 | { | ||
1343 | struct rpc_rqst *req = task->tk_rqstp; | ||
1344 | struct rpc_xprt *xprt = req->rq_xprt; | ||
1345 | |||
1346 | if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) || | ||
1347 | test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) || | ||
1348 | xprt_is_pinned_rqst(req)) { | ||
1349 | spin_lock(&xprt->queue_lock); | ||
1350 | xprt_request_dequeue_transmit_locked(task); | ||
1351 | xprt_request_dequeue_receive_locked(task); | ||
1352 | while (xprt_is_pinned_rqst(req)) { | ||
1353 | set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); | ||
1354 | spin_unlock(&xprt->queue_lock); | ||
1355 | xprt_wait_on_pinned_rqst(req); | ||
1356 | spin_lock(&xprt->queue_lock); | ||
1357 | clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); | ||
1358 | } | ||
1359 | spin_unlock(&xprt->queue_lock); | ||
1360 | } | ||
1361 | } | ||
1362 | |||
1363 | /** | ||
1327 | * xprt_request_prepare - prepare an encoded request for transport | 1364 | * xprt_request_prepare - prepare an encoded request for transport |
1328 | * @req: pointer to rpc_rqst | 1365 | * @req: pointer to rpc_rqst |
1329 | * | 1366 | * |
@@ -1747,28 +1784,6 @@ void xprt_retry_reserve(struct rpc_task *task) | |||
1747 | xprt_do_reserve(xprt, task); | 1784 | xprt_do_reserve(xprt, task); |
1748 | } | 1785 | } |
1749 | 1786 | ||
1750 | static void | ||
1751 | xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req) | ||
1752 | { | ||
1753 | struct rpc_xprt *xprt = req->rq_xprt; | ||
1754 | |||
1755 | if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) || | ||
1756 | test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) || | ||
1757 | xprt_is_pinned_rqst(req)) { | ||
1758 | spin_lock(&xprt->queue_lock); | ||
1759 | xprt_request_dequeue_transmit_locked(task); | ||
1760 | xprt_request_dequeue_receive_locked(task); | ||
1761 | while (xprt_is_pinned_rqst(req)) { | ||
1762 | set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); | ||
1763 | spin_unlock(&xprt->queue_lock); | ||
1764 | xprt_wait_on_pinned_rqst(req); | ||
1765 | spin_lock(&xprt->queue_lock); | ||
1766 | clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); | ||
1767 | } | ||
1768 | spin_unlock(&xprt->queue_lock); | ||
1769 | } | ||
1770 | } | ||
1771 | |||
1772 | /** | 1787 | /** |
1773 | * xprt_release - release an RPC request slot | 1788 | * xprt_release - release an RPC request slot |
1774 | * @task: task which is finished with the slot | 1789 | * @task: task which is finished with the slot |
@@ -1788,7 +1803,7 @@ void xprt_release(struct rpc_task *task) | |||
1788 | } | 1803 | } |
1789 | 1804 | ||
1790 | xprt = req->rq_xprt; | 1805 | xprt = req->rq_xprt; |
1791 | xprt_request_dequeue_all(task, req); | 1806 | xprt_request_dequeue_xprt(task); |
1792 | spin_lock(&xprt->transport_lock); | 1807 | spin_lock(&xprt->transport_lock); |
1793 | xprt->ops->release_xprt(xprt, task); | 1808 | xprt->ops->release_xprt(xprt, task); |
1794 | if (xprt->ops->release_request) | 1809 | if (xprt->ops->release_request) |
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 59e624b1d7a0..50e075fcdd8f 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c | |||
@@ -54,9 +54,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) | |||
54 | 54 | ||
55 | unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt) | 55 | unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt) |
56 | { | 56 | { |
57 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 57 | return RPCRDMA_BACKWARD_WRS >> 1; |
58 | |||
59 | return r_xprt->rx_buf.rb_bc_srv_max_requests; | ||
60 | } | 58 | } |
61 | 59 | ||
62 | static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | 60 | static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 0b6dad7580a1..30065a28628c 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -7,67 +7,37 @@ | |||
7 | /* Lightweight memory registration using Fast Registration Work | 7 | /* Lightweight memory registration using Fast Registration Work |
8 | * Requests (FRWR). | 8 | * Requests (FRWR). |
9 | * | 9 | * |
10 | * FRWR features ordered asynchronous registration and deregistration | 10 | * FRWR features ordered asynchronous registration and invalidation |
11 | * of arbitrarily sized memory regions. This is the fastest and safest | 11 | * of arbitrarily-sized memory regions. This is the fastest and safest |
12 | * but most complex memory registration mode. | 12 | * but most complex memory registration mode. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | /* Normal operation | 15 | /* Normal operation |
16 | * | 16 | * |
17 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG | 17 | * A Memory Region is prepared for RDMA Read or Write using a FAST_REG |
18 | * Work Request (frwr_map). When the RDMA operation is finished, this | 18 | * Work Request (frwr_map). When the RDMA operation is finished, this |
19 | * Memory Region is invalidated using a LOCAL_INV Work Request | 19 | * Memory Region is invalidated using a LOCAL_INV Work Request |
20 | * (frwr_unmap_sync). | 20 | * (frwr_unmap_async and frwr_unmap_sync). |
21 | * | 21 | * |
22 | * Typically these Work Requests are not signaled, and neither are RDMA | 22 | * Typically FAST_REG Work Requests are not signaled, and neither are |
23 | * SEND Work Requests (with the exception of signaling occasionally to | 23 | * RDMA Send Work Requests (with the exception of signaling occasionally |
24 | * prevent provider work queue overflows). This greatly reduces HCA | 24 | * to prevent provider work queue overflows). This greatly reduces HCA |
25 | * interrupt workload. | 25 | * interrupt workload. |
26 | * | ||
27 | * As an optimization, frwr_unmap marks MRs INVALID before the | ||
28 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on | ||
29 | * rb_mrs immediately so that no work (like managing a linked list | ||
30 | * under a spinlock) is needed in the completion upcall. | ||
31 | * | ||
32 | * But this means that frwr_map() can occasionally encounter an MR | ||
33 | * that is INVALID but the LOCAL_INV WR has not completed. Work Queue | ||
34 | * ordering prevents a subsequent FAST_REG WR from executing against | ||
35 | * that MR while it is still being invalidated. | ||
36 | */ | 26 | */ |
37 | 27 | ||
38 | /* Transport recovery | 28 | /* Transport recovery |
39 | * | 29 | * |
40 | * ->op_map and the transport connect worker cannot run at the same | 30 | * frwr_map and frwr_unmap_* cannot run at the same time the transport |
41 | * time, but ->op_unmap can fire while the transport connect worker | 31 | * connect worker is running. The connect worker holds the transport |
42 | * is running. Thus MR recovery is handled in ->op_map, to guarantee | 32 | * send lock, just as ->send_request does. This prevents frwr_map and |
43 | * that recovered MRs are owned by a sending RPC, and not one where | 33 | * the connect worker from running concurrently. When a connection is |
44 | * ->op_unmap could fire at the same time transport reconnect is | 34 | * closed, the Receive completion queue is drained before the allowing |
45 | * being done. | 35 | * the connect worker to get control. This prevents frwr_unmap and the |
46 | * | 36 | * connect worker from running concurrently. |
47 | * When the underlying transport disconnects, MRs are left in one of | 37 | * |
48 | * four states: | 38 | * When the underlying transport disconnects, MRs that are in flight |
49 | * | 39 | * are flushed and are likely unusable. Thus all flushed MRs are |
50 | * INVALID: The MR was not in use before the QP entered ERROR state. | 40 | * destroyed. New MRs are created on demand. |
51 | * | ||
52 | * VALID: The MR was registered before the QP entered ERROR state. | ||
53 | * | ||
54 | * FLUSHED_FR: The MR was being registered when the QP entered ERROR | ||
55 | * state, and the pending WR was flushed. | ||
56 | * | ||
57 | * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR | ||
58 | * state, and the pending WR was flushed. | ||
59 | * | ||
60 | * When frwr_map encounters FLUSHED and VALID MRs, they are recovered | ||
61 | * with ib_dereg_mr and then are re-initialized. Because MR recovery | ||
62 | * allocates fresh resources, it is deferred to a workqueue, and the | ||
63 | * recovered MRs are placed back on the rb_mrs list when recovery is | ||
64 | * complete. frwr_map allocates another MR for the current RPC while | ||
65 | * the broken MR is reset. | ||
66 | * | ||
67 | * To ensure that frwr_map doesn't encounter an MR that is marked | ||
68 | * INVALID but that is about to be flushed due to a previous transport | ||
69 | * disconnect, the transport connect worker attempts to drain all | ||
70 | * pending send queue WRs before the transport is reconnected. | ||
71 | */ | 41 | */ |
72 | 42 | ||
73 | #include <linux/sunrpc/rpc_rdma.h> | 43 | #include <linux/sunrpc/rpc_rdma.h> |
@@ -118,15 +88,8 @@ void frwr_release_mr(struct rpcrdma_mr *mr) | |||
118 | kfree(mr); | 88 | kfree(mr); |
119 | } | 89 | } |
120 | 90 | ||
121 | /* MRs are dynamically allocated, so simply clean up and release the MR. | 91 | static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) |
122 | * A replacement MR will subsequently be allocated on demand. | ||
123 | */ | ||
124 | static void | ||
125 | frwr_mr_recycle_worker(struct work_struct *work) | ||
126 | { | 92 | { |
127 | struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); | ||
128 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | ||
129 | |||
130 | trace_xprtrdma_mr_recycle(mr); | 93 | trace_xprtrdma_mr_recycle(mr); |
131 | 94 | ||
132 | if (mr->mr_dir != DMA_NONE) { | 95 | if (mr->mr_dir != DMA_NONE) { |
@@ -136,14 +99,40 @@ frwr_mr_recycle_worker(struct work_struct *work) | |||
136 | mr->mr_dir = DMA_NONE; | 99 | mr->mr_dir = DMA_NONE; |
137 | } | 100 | } |
138 | 101 | ||
139 | spin_lock(&r_xprt->rx_buf.rb_mrlock); | 102 | spin_lock(&r_xprt->rx_buf.rb_lock); |
140 | list_del(&mr->mr_all); | 103 | list_del(&mr->mr_all); |
141 | r_xprt->rx_stats.mrs_recycled++; | 104 | r_xprt->rx_stats.mrs_recycled++; |
142 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); | 105 | spin_unlock(&r_xprt->rx_buf.rb_lock); |
143 | 106 | ||
144 | frwr_release_mr(mr); | 107 | frwr_release_mr(mr); |
145 | } | 108 | } |
146 | 109 | ||
110 | /* MRs are dynamically allocated, so simply clean up and release the MR. | ||
111 | * A replacement MR will subsequently be allocated on demand. | ||
112 | */ | ||
113 | static void | ||
114 | frwr_mr_recycle_worker(struct work_struct *work) | ||
115 | { | ||
116 | struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, | ||
117 | mr_recycle); | ||
118 | |||
119 | frwr_mr_recycle(mr->mr_xprt, mr); | ||
120 | } | ||
121 | |||
122 | /* frwr_recycle - Discard MRs | ||
123 | * @req: request to reset | ||
124 | * | ||
125 | * Used after a reconnect. These MRs could be in flight, we can't | ||
126 | * tell. Safe thing to do is release them. | ||
127 | */ | ||
128 | void frwr_recycle(struct rpcrdma_req *req) | ||
129 | { | ||
130 | struct rpcrdma_mr *mr; | ||
131 | |||
132 | while ((mr = rpcrdma_mr_pop(&req->rl_registered))) | ||
133 | frwr_mr_recycle(mr->mr_xprt, mr); | ||
134 | } | ||
135 | |||
147 | /* frwr_reset - Place MRs back on the free list | 136 | /* frwr_reset - Place MRs back on the free list |
148 | * @req: request to reset | 137 | * @req: request to reset |
149 | * | 138 | * |
@@ -156,12 +145,10 @@ frwr_mr_recycle_worker(struct work_struct *work) | |||
156 | */ | 145 | */ |
157 | void frwr_reset(struct rpcrdma_req *req) | 146 | void frwr_reset(struct rpcrdma_req *req) |
158 | { | 147 | { |
159 | while (!list_empty(&req->rl_registered)) { | 148 | struct rpcrdma_mr *mr; |
160 | struct rpcrdma_mr *mr; | ||
161 | 149 | ||
162 | mr = rpcrdma_mr_pop(&req->rl_registered); | 150 | while ((mr = rpcrdma_mr_pop(&req->rl_registered))) |
163 | rpcrdma_mr_unmap_and_put(mr); | 151 | rpcrdma_mr_put(mr); |
164 | } | ||
165 | } | 152 | } |
166 | 153 | ||
167 | /** | 154 | /** |
@@ -179,11 +166,14 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) | |||
179 | struct ib_mr *frmr; | 166 | struct ib_mr *frmr; |
180 | int rc; | 167 | int rc; |
181 | 168 | ||
169 | /* NB: ib_alloc_mr and device drivers typically allocate | ||
170 | * memory with GFP_KERNEL. | ||
171 | */ | ||
182 | frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); | 172 | frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); |
183 | if (IS_ERR(frmr)) | 173 | if (IS_ERR(frmr)) |
184 | goto out_mr_err; | 174 | goto out_mr_err; |
185 | 175 | ||
186 | sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL); | 176 | sg = kcalloc(depth, sizeof(*sg), GFP_NOFS); |
187 | if (!sg) | 177 | if (!sg) |
188 | goto out_list_err; | 178 | goto out_list_err; |
189 | 179 | ||
@@ -203,8 +193,6 @@ out_mr_err: | |||
203 | return rc; | 193 | return rc; |
204 | 194 | ||
205 | out_list_err: | 195 | out_list_err: |
206 | dprintk("RPC: %s: sg allocation failure\n", | ||
207 | __func__); | ||
208 | ib_dereg_mr(frmr); | 196 | ib_dereg_mr(frmr); |
209 | return -ENOMEM; | 197 | return -ENOMEM; |
210 | } | 198 | } |
@@ -290,8 +278,8 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) | |||
290 | ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; | 278 | ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; |
291 | ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ | 279 | ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ |
292 | 280 | ||
293 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / | 281 | ia->ri_max_segs = |
294 | ia->ri_max_frwr_depth); | 282 | DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth); |
295 | /* Reply chunks require segments for head and tail buffers */ | 283 | /* Reply chunks require segments for head and tail buffers */ |
296 | ia->ri_max_segs += 2; | 284 | ia->ri_max_segs += 2; |
297 | if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS) | 285 | if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS) |
@@ -323,31 +311,25 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) | |||
323 | * @nsegs: number of segments remaining | 311 | * @nsegs: number of segments remaining |
324 | * @writing: true when RDMA Write will be used | 312 | * @writing: true when RDMA Write will be used |
325 | * @xid: XID of RPC using the registered memory | 313 | * @xid: XID of RPC using the registered memory |
326 | * @out: initialized MR | 314 | * @mr: MR to fill in |
327 | * | 315 | * |
328 | * Prepare a REG_MR Work Request to register a memory region | 316 | * Prepare a REG_MR Work Request to register a memory region |
329 | * for remote access via RDMA READ or RDMA WRITE. | 317 | * for remote access via RDMA READ or RDMA WRITE. |
330 | * | 318 | * |
331 | * Returns the next segment or a negative errno pointer. | 319 | * Returns the next segment or a negative errno pointer. |
332 | * On success, the prepared MR is planted in @out. | 320 | * On success, @mr is filled in. |
333 | */ | 321 | */ |
334 | struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, | 322 | struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, |
335 | struct rpcrdma_mr_seg *seg, | 323 | struct rpcrdma_mr_seg *seg, |
336 | int nsegs, bool writing, __be32 xid, | 324 | int nsegs, bool writing, __be32 xid, |
337 | struct rpcrdma_mr **out) | 325 | struct rpcrdma_mr *mr) |
338 | { | 326 | { |
339 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 327 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
340 | bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; | ||
341 | struct rpcrdma_mr *mr; | ||
342 | struct ib_mr *ibmr; | ||
343 | struct ib_reg_wr *reg_wr; | 328 | struct ib_reg_wr *reg_wr; |
329 | struct ib_mr *ibmr; | ||
344 | int i, n; | 330 | int i, n; |
345 | u8 key; | 331 | u8 key; |
346 | 332 | ||
347 | mr = rpcrdma_mr_get(r_xprt); | ||
348 | if (!mr) | ||
349 | goto out_getmr_err; | ||
350 | |||
351 | if (nsegs > ia->ri_max_frwr_depth) | 333 | if (nsegs > ia->ri_max_frwr_depth) |
352 | nsegs = ia->ri_max_frwr_depth; | 334 | nsegs = ia->ri_max_frwr_depth; |
353 | for (i = 0; i < nsegs;) { | 335 | for (i = 0; i < nsegs;) { |
@@ -362,7 +344,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, | |||
362 | 344 | ||
363 | ++seg; | 345 | ++seg; |
364 | ++i; | 346 | ++i; |
365 | if (holes_ok) | 347 | if (ia->ri_mrtype == IB_MR_TYPE_SG_GAPS) |
366 | continue; | 348 | continue; |
367 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | 349 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || |
368 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | 350 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) |
@@ -397,22 +379,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, | |||
397 | mr->mr_offset = ibmr->iova; | 379 | mr->mr_offset = ibmr->iova; |
398 | trace_xprtrdma_mr_map(mr); | 380 | trace_xprtrdma_mr_map(mr); |
399 | 381 | ||
400 | *out = mr; | ||
401 | return seg; | 382 | return seg; |
402 | 383 | ||
403 | out_getmr_err: | ||
404 | xprt_wait_for_buffer_space(&r_xprt->rx_xprt); | ||
405 | return ERR_PTR(-EAGAIN); | ||
406 | |||
407 | out_dmamap_err: | 384 | out_dmamap_err: |
408 | mr->mr_dir = DMA_NONE; | 385 | mr->mr_dir = DMA_NONE; |
409 | trace_xprtrdma_frwr_sgerr(mr, i); | 386 | trace_xprtrdma_frwr_sgerr(mr, i); |
410 | rpcrdma_mr_put(mr); | ||
411 | return ERR_PTR(-EIO); | 387 | return ERR_PTR(-EIO); |
412 | 388 | ||
413 | out_mapmr_err: | 389 | out_mapmr_err: |
414 | trace_xprtrdma_frwr_maperr(mr, n); | 390 | trace_xprtrdma_frwr_maperr(mr, n); |
415 | rpcrdma_mr_recycle(mr); | ||
416 | return ERR_PTR(-EIO); | 391 | return ERR_PTR(-EIO); |
417 | } | 392 | } |
418 | 393 | ||
@@ -485,7 +460,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) | |||
485 | if (mr->mr_handle == rep->rr_inv_rkey) { | 460 | if (mr->mr_handle == rep->rr_inv_rkey) { |
486 | list_del_init(&mr->mr_list); | 461 | list_del_init(&mr->mr_list); |
487 | trace_xprtrdma_mr_remoteinv(mr); | 462 | trace_xprtrdma_mr_remoteinv(mr); |
488 | rpcrdma_mr_unmap_and_put(mr); | 463 | rpcrdma_mr_put(mr); |
489 | break; /* only one invalidated MR per RPC */ | 464 | break; /* only one invalidated MR per RPC */ |
490 | } | 465 | } |
491 | } | 466 | } |
@@ -495,7 +470,7 @@ static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr) | |||
495 | if (wc->status != IB_WC_SUCCESS) | 470 | if (wc->status != IB_WC_SUCCESS) |
496 | rpcrdma_mr_recycle(mr); | 471 | rpcrdma_mr_recycle(mr); |
497 | else | 472 | else |
498 | rpcrdma_mr_unmap_and_put(mr); | 473 | rpcrdma_mr_put(mr); |
499 | } | 474 | } |
500 | 475 | ||
501 | /** | 476 | /** |
@@ -532,8 +507,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | |||
532 | 507 | ||
533 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 508 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
534 | trace_xprtrdma_wc_li_wake(wc, frwr); | 509 | trace_xprtrdma_wc_li_wake(wc, frwr); |
535 | complete(&frwr->fr_linv_done); | ||
536 | __frwr_release_mr(wc, mr); | 510 | __frwr_release_mr(wc, mr); |
511 | complete(&frwr->fr_linv_done); | ||
537 | } | 512 | } |
538 | 513 | ||
539 | /** | 514 | /** |
@@ -562,8 +537,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
562 | */ | 537 | */ |
563 | frwr = NULL; | 538 | frwr = NULL; |
564 | prev = &first; | 539 | prev = &first; |
565 | while (!list_empty(&req->rl_registered)) { | 540 | while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { |
566 | mr = rpcrdma_mr_pop(&req->rl_registered); | ||
567 | 541 | ||
568 | trace_xprtrdma_mr_localinv(mr); | 542 | trace_xprtrdma_mr_localinv(mr); |
569 | r_xprt->rx_stats.local_inv_needed++; | 543 | r_xprt->rx_stats.local_inv_needed++; |
@@ -632,11 +606,15 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc) | |||
632 | struct rpcrdma_frwr *frwr = | 606 | struct rpcrdma_frwr *frwr = |
633 | container_of(cqe, struct rpcrdma_frwr, fr_cqe); | 607 | container_of(cqe, struct rpcrdma_frwr, fr_cqe); |
634 | struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); | 608 | struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); |
609 | struct rpcrdma_rep *rep = mr->mr_req->rl_reply; | ||
635 | 610 | ||
636 | /* WARNING: Only wr_cqe and status are reliable at this point */ | 611 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
637 | trace_xprtrdma_wc_li_done(wc, frwr); | 612 | trace_xprtrdma_wc_li_done(wc, frwr); |
638 | rpcrdma_complete_rqst(frwr->fr_req->rl_reply); | ||
639 | __frwr_release_mr(wc, mr); | 613 | __frwr_release_mr(wc, mr); |
614 | |||
615 | /* Ensure @rep is generated before __frwr_release_mr */ | ||
616 | smp_rmb(); | ||
617 | rpcrdma_complete_rqst(rep); | ||
640 | } | 618 | } |
641 | 619 | ||
642 | /** | 620 | /** |
@@ -662,15 +640,13 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
662 | */ | 640 | */ |
663 | frwr = NULL; | 641 | frwr = NULL; |
664 | prev = &first; | 642 | prev = &first; |
665 | while (!list_empty(&req->rl_registered)) { | 643 | while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { |
666 | mr = rpcrdma_mr_pop(&req->rl_registered); | ||
667 | 644 | ||
668 | trace_xprtrdma_mr_localinv(mr); | 645 | trace_xprtrdma_mr_localinv(mr); |
669 | r_xprt->rx_stats.local_inv_needed++; | 646 | r_xprt->rx_stats.local_inv_needed++; |
670 | 647 | ||
671 | frwr = &mr->frwr; | 648 | frwr = &mr->frwr; |
672 | frwr->fr_cqe.done = frwr_wc_localinv; | 649 | frwr->fr_cqe.done = frwr_wc_localinv; |
673 | frwr->fr_req = req; | ||
674 | last = &frwr->fr_invwr; | 650 | last = &frwr->fr_invwr; |
675 | last->next = NULL; | 651 | last->next = NULL; |
676 | last->wr_cqe = &frwr->fr_cqe; | 652 | last->wr_cqe = &frwr->fr_cqe; |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 4345e6912392..b86b5fd62d9f 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -342,6 +342,32 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr, | |||
342 | return 0; | 342 | return 0; |
343 | } | 343 | } |
344 | 344 | ||
345 | static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt, | ||
346 | struct rpcrdma_req *req, | ||
347 | struct rpcrdma_mr_seg *seg, | ||
348 | int nsegs, bool writing, | ||
349 | struct rpcrdma_mr **mr) | ||
350 | { | ||
351 | *mr = rpcrdma_mr_pop(&req->rl_free_mrs); | ||
352 | if (!*mr) { | ||
353 | *mr = rpcrdma_mr_get(r_xprt); | ||
354 | if (!*mr) | ||
355 | goto out_getmr_err; | ||
356 | trace_xprtrdma_mr_get(req); | ||
357 | (*mr)->mr_req = req; | ||
358 | } | ||
359 | |||
360 | rpcrdma_mr_push(*mr, &req->rl_registered); | ||
361 | return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr); | ||
362 | |||
363 | out_getmr_err: | ||
364 | trace_xprtrdma_nomrs(req); | ||
365 | xprt_wait_for_buffer_space(&r_xprt->rx_xprt); | ||
366 | if (r_xprt->rx_ep.rep_connected != -ENODEV) | ||
367 | schedule_work(&r_xprt->rx_buf.rb_refresh_worker); | ||
368 | return ERR_PTR(-EAGAIN); | ||
369 | } | ||
370 | |||
345 | /* Register and XDR encode the Read list. Supports encoding a list of read | 371 | /* Register and XDR encode the Read list. Supports encoding a list of read |
346 | * segments that belong to a single read chunk. | 372 | * segments that belong to a single read chunk. |
347 | * | 373 | * |
@@ -356,9 +382,10 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr, | |||
356 | * | 382 | * |
357 | * Only a single @pos value is currently supported. | 383 | * Only a single @pos value is currently supported. |
358 | */ | 384 | */ |
359 | static noinline int | 385 | static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, |
360 | rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | 386 | struct rpcrdma_req *req, |
361 | struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype) | 387 | struct rpc_rqst *rqst, |
388 | enum rpcrdma_chunktype rtype) | ||
362 | { | 389 | { |
363 | struct xdr_stream *xdr = &req->rl_stream; | 390 | struct xdr_stream *xdr = &req->rl_stream; |
364 | struct rpcrdma_mr_seg *seg; | 391 | struct rpcrdma_mr_seg *seg; |
@@ -379,10 +406,9 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
379 | return nsegs; | 406 | return nsegs; |
380 | 407 | ||
381 | do { | 408 | do { |
382 | seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr); | 409 | seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr); |
383 | if (IS_ERR(seg)) | 410 | if (IS_ERR(seg)) |
384 | return PTR_ERR(seg); | 411 | return PTR_ERR(seg); |
385 | rpcrdma_mr_push(mr, &req->rl_registered); | ||
386 | 412 | ||
387 | if (encode_read_segment(xdr, mr, pos) < 0) | 413 | if (encode_read_segment(xdr, mr, pos) < 0) |
388 | return -EMSGSIZE; | 414 | return -EMSGSIZE; |
@@ -411,9 +437,10 @@ done: | |||
411 | * | 437 | * |
412 | * Only a single Write chunk is currently supported. | 438 | * Only a single Write chunk is currently supported. |
413 | */ | 439 | */ |
414 | static noinline int | 440 | static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, |
415 | rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | 441 | struct rpcrdma_req *req, |
416 | struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype) | 442 | struct rpc_rqst *rqst, |
443 | enum rpcrdma_chunktype wtype) | ||
417 | { | 444 | { |
418 | struct xdr_stream *xdr = &req->rl_stream; | 445 | struct xdr_stream *xdr = &req->rl_stream; |
419 | struct rpcrdma_mr_seg *seg; | 446 | struct rpcrdma_mr_seg *seg; |
@@ -440,10 +467,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
440 | 467 | ||
441 | nchunks = 0; | 468 | nchunks = 0; |
442 | do { | 469 | do { |
443 | seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr); | 470 | seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr); |
444 | if (IS_ERR(seg)) | 471 | if (IS_ERR(seg)) |
445 | return PTR_ERR(seg); | 472 | return PTR_ERR(seg); |
446 | rpcrdma_mr_push(mr, &req->rl_registered); | ||
447 | 473 | ||
448 | if (encode_rdma_segment(xdr, mr) < 0) | 474 | if (encode_rdma_segment(xdr, mr) < 0) |
449 | return -EMSGSIZE; | 475 | return -EMSGSIZE; |
@@ -474,9 +500,10 @@ done: | |||
474 | * Returns zero on success, or a negative errno if a failure occurred. | 500 | * Returns zero on success, or a negative errno if a failure occurred. |
475 | * @xdr is advanced to the next position in the stream. | 501 | * @xdr is advanced to the next position in the stream. |
476 | */ | 502 | */ |
477 | static noinline int | 503 | static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, |
478 | rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | 504 | struct rpcrdma_req *req, |
479 | struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype) | 505 | struct rpc_rqst *rqst, |
506 | enum rpcrdma_chunktype wtype) | ||
480 | { | 507 | { |
481 | struct xdr_stream *xdr = &req->rl_stream; | 508 | struct xdr_stream *xdr = &req->rl_stream; |
482 | struct rpcrdma_mr_seg *seg; | 509 | struct rpcrdma_mr_seg *seg; |
@@ -501,10 +528,9 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
501 | 528 | ||
502 | nchunks = 0; | 529 | nchunks = 0; |
503 | do { | 530 | do { |
504 | seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr); | 531 | seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr); |
505 | if (IS_ERR(seg)) | 532 | if (IS_ERR(seg)) |
506 | return PTR_ERR(seg); | 533 | return PTR_ERR(seg); |
507 | rpcrdma_mr_push(mr, &req->rl_registered); | ||
508 | 534 | ||
509 | if (encode_rdma_segment(xdr, mr) < 0) | 535 | if (encode_rdma_segment(xdr, mr) < 0) |
510 | return -EMSGSIZE; | 536 | return -EMSGSIZE; |
@@ -841,12 +867,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) | |||
841 | * chunks. Very likely the connection has been replaced, | 867 | * chunks. Very likely the connection has been replaced, |
842 | * so these registrations are invalid and unusable. | 868 | * so these registrations are invalid and unusable. |
843 | */ | 869 | */ |
844 | while (unlikely(!list_empty(&req->rl_registered))) { | 870 | frwr_recycle(req); |
845 | struct rpcrdma_mr *mr; | ||
846 | |||
847 | mr = rpcrdma_mr_pop(&req->rl_registered); | ||
848 | rpcrdma_mr_recycle(mr); | ||
849 | } | ||
850 | 871 | ||
851 | /* This implementation supports the following combinations | 872 | /* This implementation supports the following combinations |
852 | * of chunk lists in one RPC-over-RDMA Call message: | 873 | * of chunk lists in one RPC-over-RDMA Call message: |
@@ -1240,8 +1261,6 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep) | |||
1240 | struct rpc_rqst *rqst = rep->rr_rqst; | 1261 | struct rpc_rqst *rqst = rep->rr_rqst; |
1241 | int status; | 1262 | int status; |
1242 | 1263 | ||
1243 | xprt->reestablish_timeout = 0; | ||
1244 | |||
1245 | switch (rep->rr_proc) { | 1264 | switch (rep->rr_proc) { |
1246 | case rdma_msg: | 1265 | case rdma_msg: |
1247 | status = rpcrdma_decode_msg(r_xprt, rep, rqst); | 1266 | status = rpcrdma_decode_msg(r_xprt, rep, rqst); |
@@ -1300,6 +1319,12 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
1300 | u32 credits; | 1319 | u32 credits; |
1301 | __be32 *p; | 1320 | __be32 *p; |
1302 | 1321 | ||
1322 | /* Any data means we had a useful conversation, so | ||
1323 | * then we don't need to delay the next reconnect. | ||
1324 | */ | ||
1325 | if (xprt->reestablish_timeout) | ||
1326 | xprt->reestablish_timeout = 0; | ||
1327 | |||
1303 | /* Fixed transport header fields */ | 1328 | /* Fixed transport header fields */ |
1304 | xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, | 1329 | xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, |
1305 | rep->rr_hdrbuf.head[0].iov_base, NULL); | 1330 | rep->rr_hdrbuf.head[0].iov_base, NULL); |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2ec349ed4770..160558b4135e 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -423,8 +423,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt) | |||
423 | 423 | ||
424 | if (ep->rep_connected == -ENODEV) | 424 | if (ep->rep_connected == -ENODEV) |
425 | return; | 425 | return; |
426 | if (ep->rep_connected > 0) | ||
427 | xprt->reestablish_timeout = 0; | ||
428 | rpcrdma_ep_disconnect(ep, ia); | 426 | rpcrdma_ep_disconnect(ep, ia); |
429 | 427 | ||
430 | /* Prepare @xprt for the next connection by reinitializing | 428 | /* Prepare @xprt for the next connection by reinitializing |
@@ -434,6 +432,7 @@ void xprt_rdma_close(struct rpc_xprt *xprt) | |||
434 | xprt->cwnd = RPC_CWNDSHIFT; | 432 | xprt->cwnd = RPC_CWNDSHIFT; |
435 | 433 | ||
436 | out: | 434 | out: |
435 | xprt->reestablish_timeout = 0; | ||
437 | ++xprt->connect_cookie; | 436 | ++xprt->connect_cookie; |
438 | xprt_disconnect_done(xprt); | 437 | xprt_disconnect_done(xprt); |
439 | } | 438 | } |
@@ -494,9 +493,9 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) | |||
494 | * @reconnect_timeout: reconnect timeout after server disconnects | 493 | * @reconnect_timeout: reconnect timeout after server disconnects |
495 | * | 494 | * |
496 | */ | 495 | */ |
497 | static void xprt_rdma_tcp_set_connect_timeout(struct rpc_xprt *xprt, | 496 | static void xprt_rdma_set_connect_timeout(struct rpc_xprt *xprt, |
498 | unsigned long connect_timeout, | 497 | unsigned long connect_timeout, |
499 | unsigned long reconnect_timeout) | 498 | unsigned long reconnect_timeout) |
500 | { | 499 | { |
501 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 500 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
502 | 501 | ||
@@ -571,6 +570,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) | |||
571 | return; | 570 | return; |
572 | 571 | ||
573 | out_sleep: | 572 | out_sleep: |
573 | set_bit(XPRT_CONGESTED, &xprt->state); | ||
574 | rpc_sleep_on(&xprt->backlog, task, NULL); | 574 | rpc_sleep_on(&xprt->backlog, task, NULL); |
575 | task->tk_status = -EAGAIN; | 575 | task->tk_status = -EAGAIN; |
576 | } | 576 | } |
@@ -589,7 +589,8 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) | |||
589 | 589 | ||
590 | memset(rqst, 0, sizeof(*rqst)); | 590 | memset(rqst, 0, sizeof(*rqst)); |
591 | rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); | 591 | rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); |
592 | rpc_wake_up_next(&xprt->backlog); | 592 | if (unlikely(!rpc_wake_up_next(&xprt->backlog))) |
593 | clear_bit(XPRT_CONGESTED, &xprt->state); | ||
593 | } | 594 | } |
594 | 595 | ||
595 | static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, | 596 | static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, |
@@ -803,7 +804,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = { | |||
803 | .send_request = xprt_rdma_send_request, | 804 | .send_request = xprt_rdma_send_request, |
804 | .close = xprt_rdma_close, | 805 | .close = xprt_rdma_close, |
805 | .destroy = xprt_rdma_destroy, | 806 | .destroy = xprt_rdma_destroy, |
806 | .set_connect_timeout = xprt_rdma_tcp_set_connect_timeout, | 807 | .set_connect_timeout = xprt_rdma_set_connect_timeout, |
807 | .print_stats = xprt_rdma_print_stats, | 808 | .print_stats = xprt_rdma_print_stats, |
808 | .enable_swap = xprt_rdma_enable_swap, | 809 | .enable_swap = xprt_rdma_enable_swap, |
809 | .disable_swap = xprt_rdma_disable_swap, | 810 | .disable_swap = xprt_rdma_disable_swap, |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b10aa16557f0..3a907537e2cf 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/slab.h> | 53 | #include <linux/slab.h> |
54 | #include <linux/sunrpc/addr.h> | 54 | #include <linux/sunrpc/addr.h> |
55 | #include <linux/sunrpc/svc_rdma.h> | 55 | #include <linux/sunrpc/svc_rdma.h> |
56 | #include <linux/log2.h> | ||
56 | 57 | ||
57 | #include <asm-generic/barrier.h> | 58 | #include <asm-generic/barrier.h> |
58 | #include <asm/bitops.h> | 59 | #include <asm/bitops.h> |
@@ -74,8 +75,10 @@ | |||
74 | * internal functions | 75 | * internal functions |
75 | */ | 76 | */ |
76 | static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); | 77 | static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); |
78 | static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf); | ||
77 | static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); | 79 | static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); |
78 | static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); | 80 | static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); |
81 | static void rpcrdma_mr_free(struct rpcrdma_mr *mr); | ||
79 | static struct rpcrdma_regbuf * | 82 | static struct rpcrdma_regbuf * |
80 | rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, | 83 | rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, |
81 | gfp_t flags); | 84 | gfp_t flags); |
@@ -405,9 +408,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) | |||
405 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; | 408 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
406 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 409 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
407 | struct rpcrdma_req *req; | 410 | struct rpcrdma_req *req; |
408 | struct rpcrdma_rep *rep; | ||
409 | 411 | ||
410 | cancel_delayed_work_sync(&buf->rb_refresh_worker); | 412 | cancel_work_sync(&buf->rb_refresh_worker); |
411 | 413 | ||
412 | /* This is similar to rpcrdma_ep_destroy, but: | 414 | /* This is similar to rpcrdma_ep_destroy, but: |
413 | * - Don't cancel the connect worker. | 415 | * - Don't cancel the connect worker. |
@@ -429,8 +431,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) | |||
429 | /* The ULP is responsible for ensuring all DMA | 431 | /* The ULP is responsible for ensuring all DMA |
430 | * mappings and MRs are gone. | 432 | * mappings and MRs are gone. |
431 | */ | 433 | */ |
432 | list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list) | 434 | rpcrdma_reps_destroy(buf); |
433 | rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); | ||
434 | list_for_each_entry(req, &buf->rb_allreqs, rl_all) { | 435 | list_for_each_entry(req, &buf->rb_allreqs, rl_all) { |
435 | rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf); | 436 | rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf); |
436 | rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); | 437 | rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); |
@@ -604,10 +605,10 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) | |||
604 | * Unlike a normal reconnection, a fresh PD and a new set | 605 | * Unlike a normal reconnection, a fresh PD and a new set |
605 | * of MRs and buffers is needed. | 606 | * of MRs and buffers is needed. |
606 | */ | 607 | */ |
607 | static int | 608 | static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, |
608 | rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, | 609 | struct ib_qp_init_attr *qp_init_attr) |
609 | struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | ||
610 | { | 610 | { |
611 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
611 | int rc, err; | 612 | int rc, err; |
612 | 613 | ||
613 | trace_xprtrdma_reinsert(r_xprt); | 614 | trace_xprtrdma_reinsert(r_xprt); |
@@ -624,7 +625,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, | |||
624 | } | 625 | } |
625 | 626 | ||
626 | rc = -ENETUNREACH; | 627 | rc = -ENETUNREACH; |
627 | err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | 628 | err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr); |
628 | if (err) { | 629 | if (err) { |
629 | pr_err("rpcrdma: rdma_create_qp returned %d\n", err); | 630 | pr_err("rpcrdma: rdma_create_qp returned %d\n", err); |
630 | goto out3; | 631 | goto out3; |
@@ -641,16 +642,16 @@ out1: | |||
641 | return rc; | 642 | return rc; |
642 | } | 643 | } |
643 | 644 | ||
644 | static int | 645 | static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, |
645 | rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, | 646 | struct ib_qp_init_attr *qp_init_attr) |
646 | struct rpcrdma_ia *ia) | ||
647 | { | 647 | { |
648 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
648 | struct rdma_cm_id *id, *old; | 649 | struct rdma_cm_id *id, *old; |
649 | int err, rc; | 650 | int err, rc; |
650 | 651 | ||
651 | trace_xprtrdma_reconnect(r_xprt); | 652 | trace_xprtrdma_reconnect(r_xprt); |
652 | 653 | ||
653 | rpcrdma_ep_disconnect(ep, ia); | 654 | rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia); |
654 | 655 | ||
655 | rc = -EHOSTUNREACH; | 656 | rc = -EHOSTUNREACH; |
656 | id = rpcrdma_create_id(r_xprt, ia); | 657 | id = rpcrdma_create_id(r_xprt, ia); |
@@ -672,7 +673,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, | |||
672 | goto out_destroy; | 673 | goto out_destroy; |
673 | } | 674 | } |
674 | 675 | ||
675 | err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); | 676 | err = rdma_create_qp(id, ia->ri_pd, qp_init_attr); |
676 | if (err) | 677 | if (err) |
677 | goto out_destroy; | 678 | goto out_destroy; |
678 | 679 | ||
@@ -697,25 +698,27 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
697 | struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, | 698 | struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, |
698 | rx_ia); | 699 | rx_ia); |
699 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; | 700 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
701 | struct ib_qp_init_attr qp_init_attr; | ||
700 | int rc; | 702 | int rc; |
701 | 703 | ||
702 | retry: | 704 | retry: |
705 | memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr)); | ||
703 | switch (ep->rep_connected) { | 706 | switch (ep->rep_connected) { |
704 | case 0: | 707 | case 0: |
705 | dprintk("RPC: %s: connecting...\n", __func__); | 708 | dprintk("RPC: %s: connecting...\n", __func__); |
706 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | 709 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr); |
707 | if (rc) { | 710 | if (rc) { |
708 | rc = -ENETUNREACH; | 711 | rc = -ENETUNREACH; |
709 | goto out_noupdate; | 712 | goto out_noupdate; |
710 | } | 713 | } |
711 | break; | 714 | break; |
712 | case -ENODEV: | 715 | case -ENODEV: |
713 | rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia); | 716 | rc = rpcrdma_ep_recreate_xprt(r_xprt, &qp_init_attr); |
714 | if (rc) | 717 | if (rc) |
715 | goto out_noupdate; | 718 | goto out_noupdate; |
716 | break; | 719 | break; |
717 | default: | 720 | default: |
718 | rc = rpcrdma_ep_reconnect(r_xprt, ep, ia); | 721 | rc = rpcrdma_ep_reconnect(r_xprt, &qp_init_attr); |
719 | if (rc) | 722 | if (rc) |
720 | goto out; | 723 | goto out; |
721 | } | 724 | } |
@@ -729,6 +732,8 @@ retry: | |||
729 | if (rc) | 732 | if (rc) |
730 | goto out; | 733 | goto out; |
731 | 734 | ||
735 | if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) | ||
736 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; | ||
732 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 737 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
733 | if (ep->rep_connected <= 0) { | 738 | if (ep->rep_connected <= 0) { |
734 | if (ep->rep_connected == -EAGAIN) | 739 | if (ep->rep_connected == -EAGAIN) |
@@ -942,14 +947,12 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) | |||
942 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 947 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
943 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 948 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
944 | unsigned int count; | 949 | unsigned int count; |
945 | LIST_HEAD(free); | ||
946 | LIST_HEAD(all); | ||
947 | 950 | ||
948 | for (count = 0; count < ia->ri_max_segs; count++) { | 951 | for (count = 0; count < ia->ri_max_segs; count++) { |
949 | struct rpcrdma_mr *mr; | 952 | struct rpcrdma_mr *mr; |
950 | int rc; | 953 | int rc; |
951 | 954 | ||
952 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | 955 | mr = kzalloc(sizeof(*mr), GFP_NOFS); |
953 | if (!mr) | 956 | if (!mr) |
954 | break; | 957 | break; |
955 | 958 | ||
@@ -961,15 +964,13 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) | |||
961 | 964 | ||
962 | mr->mr_xprt = r_xprt; | 965 | mr->mr_xprt = r_xprt; |
963 | 966 | ||
964 | list_add(&mr->mr_list, &free); | 967 | spin_lock(&buf->rb_lock); |
965 | list_add(&mr->mr_all, &all); | 968 | list_add(&mr->mr_list, &buf->rb_mrs); |
969 | list_add(&mr->mr_all, &buf->rb_all_mrs); | ||
970 | spin_unlock(&buf->rb_lock); | ||
966 | } | 971 | } |
967 | 972 | ||
968 | spin_lock(&buf->rb_mrlock); | ||
969 | list_splice(&free, &buf->rb_mrs); | ||
970 | list_splice(&all, &buf->rb_all); | ||
971 | r_xprt->rx_stats.mrs_allocated += count; | 973 | r_xprt->rx_stats.mrs_allocated += count; |
972 | spin_unlock(&buf->rb_mrlock); | ||
973 | trace_xprtrdma_createmrs(r_xprt, count); | 974 | trace_xprtrdma_createmrs(r_xprt, count); |
974 | } | 975 | } |
975 | 976 | ||
@@ -977,7 +978,7 @@ static void | |||
977 | rpcrdma_mr_refresh_worker(struct work_struct *work) | 978 | rpcrdma_mr_refresh_worker(struct work_struct *work) |
978 | { | 979 | { |
979 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, | 980 | struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, |
980 | rb_refresh_worker.work); | 981 | rb_refresh_worker); |
981 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, | 982 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
982 | rx_buf); | 983 | rx_buf); |
983 | 984 | ||
@@ -999,12 +1000,18 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, | |||
999 | struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; | 1000 | struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; |
1000 | struct rpcrdma_regbuf *rb; | 1001 | struct rpcrdma_regbuf *rb; |
1001 | struct rpcrdma_req *req; | 1002 | struct rpcrdma_req *req; |
1003 | size_t maxhdrsize; | ||
1002 | 1004 | ||
1003 | req = kzalloc(sizeof(*req), flags); | 1005 | req = kzalloc(sizeof(*req), flags); |
1004 | if (req == NULL) | 1006 | if (req == NULL) |
1005 | goto out1; | 1007 | goto out1; |
1006 | 1008 | ||
1007 | rb = rpcrdma_regbuf_alloc(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags); | 1009 | /* Compute maximum header buffer size in bytes */ |
1010 | maxhdrsize = rpcrdma_fixed_maxsz + 3 + | ||
1011 | r_xprt->rx_ia.ri_max_segs * rpcrdma_readchunk_maxsz; | ||
1012 | maxhdrsize *= sizeof(__be32); | ||
1013 | rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), | ||
1014 | DMA_TO_DEVICE, flags); | ||
1008 | if (!rb) | 1015 | if (!rb) |
1009 | goto out2; | 1016 | goto out2; |
1010 | req->rl_rdmabuf = rb; | 1017 | req->rl_rdmabuf = rb; |
@@ -1018,6 +1025,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, | |||
1018 | if (!req->rl_recvbuf) | 1025 | if (!req->rl_recvbuf) |
1019 | goto out4; | 1026 | goto out4; |
1020 | 1027 | ||
1028 | INIT_LIST_HEAD(&req->rl_free_mrs); | ||
1021 | INIT_LIST_HEAD(&req->rl_registered); | 1029 | INIT_LIST_HEAD(&req->rl_registered); |
1022 | spin_lock(&buffer->rb_lock); | 1030 | spin_lock(&buffer->rb_lock); |
1023 | list_add(&req->rl_all, &buffer->rb_allreqs); | 1031 | list_add(&req->rl_all, &buffer->rb_allreqs); |
@@ -1065,6 +1073,40 @@ out: | |||
1065 | return NULL; | 1073 | return NULL; |
1066 | } | 1074 | } |
1067 | 1075 | ||
1076 | static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) | ||
1077 | { | ||
1078 | rpcrdma_regbuf_free(rep->rr_rdmabuf); | ||
1079 | kfree(rep); | ||
1080 | } | ||
1081 | |||
1082 | static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) | ||
1083 | { | ||
1084 | struct llist_node *node; | ||
1085 | |||
1086 | /* Calls to llist_del_first are required to be serialized */ | ||
1087 | node = llist_del_first(&buf->rb_free_reps); | ||
1088 | if (!node) | ||
1089 | return NULL; | ||
1090 | return llist_entry(node, struct rpcrdma_rep, rr_node); | ||
1091 | } | ||
1092 | |||
1093 | static void rpcrdma_rep_put(struct rpcrdma_buffer *buf, | ||
1094 | struct rpcrdma_rep *rep) | ||
1095 | { | ||
1096 | if (!rep->rr_temp) | ||
1097 | llist_add(&rep->rr_node, &buf->rb_free_reps); | ||
1098 | else | ||
1099 | rpcrdma_rep_destroy(rep); | ||
1100 | } | ||
1101 | |||
1102 | static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) | ||
1103 | { | ||
1104 | struct rpcrdma_rep *rep; | ||
1105 | |||
1106 | while ((rep = rpcrdma_rep_get_locked(buf)) != NULL) | ||
1107 | rpcrdma_rep_destroy(rep); | ||
1108 | } | ||
1109 | |||
1068 | /** | 1110 | /** |
1069 | * rpcrdma_buffer_create - Create initial set of req/rep objects | 1111 | * rpcrdma_buffer_create - Create initial set of req/rep objects |
1070 | * @r_xprt: transport instance to (re)initialize | 1112 | * @r_xprt: transport instance to (re)initialize |
@@ -1078,12 +1120,10 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1078 | 1120 | ||
1079 | buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests; | 1121 | buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests; |
1080 | buf->rb_bc_srv_max_requests = 0; | 1122 | buf->rb_bc_srv_max_requests = 0; |
1081 | spin_lock_init(&buf->rb_mrlock); | ||
1082 | spin_lock_init(&buf->rb_lock); | 1123 | spin_lock_init(&buf->rb_lock); |
1083 | INIT_LIST_HEAD(&buf->rb_mrs); | 1124 | INIT_LIST_HEAD(&buf->rb_mrs); |
1084 | INIT_LIST_HEAD(&buf->rb_all); | 1125 | INIT_LIST_HEAD(&buf->rb_all_mrs); |
1085 | INIT_DELAYED_WORK(&buf->rb_refresh_worker, | 1126 | INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); |
1086 | rpcrdma_mr_refresh_worker); | ||
1087 | 1127 | ||
1088 | rpcrdma_mrs_create(r_xprt); | 1128 | rpcrdma_mrs_create(r_xprt); |
1089 | 1129 | ||
@@ -1102,7 +1142,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1102 | } | 1142 | } |
1103 | 1143 | ||
1104 | buf->rb_credits = 1; | 1144 | buf->rb_credits = 1; |
1105 | INIT_LIST_HEAD(&buf->rb_recv_bufs); | 1145 | init_llist_head(&buf->rb_free_reps); |
1106 | 1146 | ||
1107 | rc = rpcrdma_sendctxs_create(r_xprt); | 1147 | rc = rpcrdma_sendctxs_create(r_xprt); |
1108 | if (rc) | 1148 | if (rc) |
@@ -1114,12 +1154,6 @@ out: | |||
1114 | return rc; | 1154 | return rc; |
1115 | } | 1155 | } |
1116 | 1156 | ||
1117 | static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) | ||
1118 | { | ||
1119 | rpcrdma_regbuf_free(rep->rr_rdmabuf); | ||
1120 | kfree(rep); | ||
1121 | } | ||
1122 | |||
1123 | /** | 1157 | /** |
1124 | * rpcrdma_req_destroy - Destroy an rpcrdma_req object | 1158 | * rpcrdma_req_destroy - Destroy an rpcrdma_req object |
1125 | * @req: unused object to be destroyed | 1159 | * @req: unused object to be destroyed |
@@ -1127,11 +1161,13 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) | |||
1127 | * This function assumes that the caller prevents concurrent device | 1161 | * This function assumes that the caller prevents concurrent device |
1128 | * unload and transport tear-down. | 1162 | * unload and transport tear-down. |
1129 | */ | 1163 | */ |
1130 | void | 1164 | void rpcrdma_req_destroy(struct rpcrdma_req *req) |
1131 | rpcrdma_req_destroy(struct rpcrdma_req *req) | ||
1132 | { | 1165 | { |
1133 | list_del(&req->rl_all); | 1166 | list_del(&req->rl_all); |
1134 | 1167 | ||
1168 | while (!list_empty(&req->rl_free_mrs)) | ||
1169 | rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs)); | ||
1170 | |||
1135 | rpcrdma_regbuf_free(req->rl_recvbuf); | 1171 | rpcrdma_regbuf_free(req->rl_recvbuf); |
1136 | rpcrdma_regbuf_free(req->rl_sendbuf); | 1172 | rpcrdma_regbuf_free(req->rl_sendbuf); |
1137 | rpcrdma_regbuf_free(req->rl_rdmabuf); | 1173 | rpcrdma_regbuf_free(req->rl_rdmabuf); |
@@ -1147,25 +1183,19 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) | |||
1147 | unsigned int count; | 1183 | unsigned int count; |
1148 | 1184 | ||
1149 | count = 0; | 1185 | count = 0; |
1150 | spin_lock(&buf->rb_mrlock); | 1186 | spin_lock(&buf->rb_lock); |
1151 | while (!list_empty(&buf->rb_all)) { | 1187 | while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, |
1152 | mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all); | 1188 | struct rpcrdma_mr, |
1189 | mr_all)) != NULL) { | ||
1153 | list_del(&mr->mr_all); | 1190 | list_del(&mr->mr_all); |
1154 | 1191 | spin_unlock(&buf->rb_lock); | |
1155 | spin_unlock(&buf->rb_mrlock); | ||
1156 | |||
1157 | /* Ensure MW is not on any rl_registered list */ | ||
1158 | if (!list_empty(&mr->mr_list)) | ||
1159 | list_del(&mr->mr_list); | ||
1160 | 1192 | ||
1161 | frwr_release_mr(mr); | 1193 | frwr_release_mr(mr); |
1162 | count++; | 1194 | count++; |
1163 | spin_lock(&buf->rb_mrlock); | 1195 | spin_lock(&buf->rb_lock); |
1164 | } | 1196 | } |
1165 | spin_unlock(&buf->rb_mrlock); | 1197 | spin_unlock(&buf->rb_lock); |
1166 | r_xprt->rx_stats.mrs_allocated = 0; | 1198 | r_xprt->rx_stats.mrs_allocated = 0; |
1167 | |||
1168 | dprintk("RPC: %s: released %u MRs\n", __func__, count); | ||
1169 | } | 1199 | } |
1170 | 1200 | ||
1171 | /** | 1201 | /** |
@@ -1179,18 +1209,10 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) | |||
1179 | void | 1209 | void |
1180 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1210 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
1181 | { | 1211 | { |
1182 | cancel_delayed_work_sync(&buf->rb_refresh_worker); | 1212 | cancel_work_sync(&buf->rb_refresh_worker); |
1183 | 1213 | ||
1184 | rpcrdma_sendctxs_destroy(buf); | 1214 | rpcrdma_sendctxs_destroy(buf); |
1185 | 1215 | rpcrdma_reps_destroy(buf); | |
1186 | while (!list_empty(&buf->rb_recv_bufs)) { | ||
1187 | struct rpcrdma_rep *rep; | ||
1188 | |||
1189 | rep = list_first_entry(&buf->rb_recv_bufs, | ||
1190 | struct rpcrdma_rep, rr_list); | ||
1191 | list_del(&rep->rr_list); | ||
1192 | rpcrdma_rep_destroy(rep); | ||
1193 | } | ||
1194 | 1216 | ||
1195 | while (!list_empty(&buf->rb_send_bufs)) { | 1217 | while (!list_empty(&buf->rb_send_bufs)) { |
1196 | struct rpcrdma_req *req; | 1218 | struct rpcrdma_req *req; |
@@ -1215,54 +1237,20 @@ struct rpcrdma_mr * | |||
1215 | rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) | 1237 | rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) |
1216 | { | 1238 | { |
1217 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1239 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1218 | struct rpcrdma_mr *mr = NULL; | 1240 | struct rpcrdma_mr *mr; |
1219 | |||
1220 | spin_lock(&buf->rb_mrlock); | ||
1221 | if (!list_empty(&buf->rb_mrs)) | ||
1222 | mr = rpcrdma_mr_pop(&buf->rb_mrs); | ||
1223 | spin_unlock(&buf->rb_mrlock); | ||
1224 | 1241 | ||
1225 | if (!mr) | 1242 | spin_lock(&buf->rb_lock); |
1226 | goto out_nomrs; | 1243 | mr = rpcrdma_mr_pop(&buf->rb_mrs); |
1244 | spin_unlock(&buf->rb_lock); | ||
1227 | return mr; | 1245 | return mr; |
1228 | |||
1229 | out_nomrs: | ||
1230 | trace_xprtrdma_nomrs(r_xprt); | ||
1231 | if (r_xprt->rx_ep.rep_connected != -ENODEV) | ||
1232 | schedule_delayed_work(&buf->rb_refresh_worker, 0); | ||
1233 | |||
1234 | /* Allow the reply handler and refresh worker to run */ | ||
1235 | cond_resched(); | ||
1236 | |||
1237 | return NULL; | ||
1238 | } | ||
1239 | |||
1240 | static void | ||
1241 | __rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr) | ||
1242 | { | ||
1243 | spin_lock(&buf->rb_mrlock); | ||
1244 | rpcrdma_mr_push(mr, &buf->rb_mrs); | ||
1245 | spin_unlock(&buf->rb_mrlock); | ||
1246 | } | ||
1247 | |||
1248 | /** | ||
1249 | * rpcrdma_mr_put - Release an rpcrdma_mr object | ||
1250 | * @mr: object to release | ||
1251 | * | ||
1252 | */ | ||
1253 | void | ||
1254 | rpcrdma_mr_put(struct rpcrdma_mr *mr) | ||
1255 | { | ||
1256 | __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr); | ||
1257 | } | 1246 | } |
1258 | 1247 | ||
1259 | /** | 1248 | /** |
1260 | * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it | 1249 | * rpcrdma_mr_put - DMA unmap an MR and release it |
1261 | * @mr: object to release | 1250 | * @mr: MR to release |
1262 | * | 1251 | * |
1263 | */ | 1252 | */ |
1264 | void | 1253 | void rpcrdma_mr_put(struct rpcrdma_mr *mr) |
1265 | rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) | ||
1266 | { | 1254 | { |
1267 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | 1255 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
1268 | 1256 | ||
@@ -1272,7 +1260,19 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) | |||
1272 | mr->mr_sg, mr->mr_nents, mr->mr_dir); | 1260 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
1273 | mr->mr_dir = DMA_NONE; | 1261 | mr->mr_dir = DMA_NONE; |
1274 | } | 1262 | } |
1275 | __rpcrdma_mr_put(&r_xprt->rx_buf, mr); | 1263 | |
1264 | rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); | ||
1265 | } | ||
1266 | |||
1267 | static void rpcrdma_mr_free(struct rpcrdma_mr *mr) | ||
1268 | { | ||
1269 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | ||
1270 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1271 | |||
1272 | mr->mr_req = NULL; | ||
1273 | spin_lock(&buf->rb_lock); | ||
1274 | rpcrdma_mr_push(mr, &buf->rb_mrs); | ||
1275 | spin_unlock(&buf->rb_lock); | ||
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | /** | 1278 | /** |
@@ -1303,39 +1303,24 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1303 | */ | 1303 | */ |
1304 | void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) | 1304 | void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) |
1305 | { | 1305 | { |
1306 | struct rpcrdma_rep *rep = req->rl_reply; | 1306 | if (req->rl_reply) |
1307 | 1307 | rpcrdma_rep_put(buffers, req->rl_reply); | |
1308 | req->rl_reply = NULL; | 1308 | req->rl_reply = NULL; |
1309 | 1309 | ||
1310 | spin_lock(&buffers->rb_lock); | 1310 | spin_lock(&buffers->rb_lock); |
1311 | list_add(&req->rl_list, &buffers->rb_send_bufs); | 1311 | list_add(&req->rl_list, &buffers->rb_send_bufs); |
1312 | if (rep) { | ||
1313 | if (!rep->rr_temp) { | ||
1314 | list_add(&rep->rr_list, &buffers->rb_recv_bufs); | ||
1315 | rep = NULL; | ||
1316 | } | ||
1317 | } | ||
1318 | spin_unlock(&buffers->rb_lock); | 1312 | spin_unlock(&buffers->rb_lock); |
1319 | if (rep) | ||
1320 | rpcrdma_rep_destroy(rep); | ||
1321 | } | 1313 | } |
1322 | 1314 | ||
1323 | /* | 1315 | /** |
1324 | * Put reply buffers back into pool when not attached to | 1316 | * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list |
1325 | * request. This happens in error conditions. | 1317 | * @rep: rep to release |
1318 | * | ||
1319 | * Used after error conditions. | ||
1326 | */ | 1320 | */ |
1327 | void | 1321 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
1328 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | ||
1329 | { | 1322 | { |
1330 | struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; | 1323 | rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep); |
1331 | |||
1332 | if (!rep->rr_temp) { | ||
1333 | spin_lock(&buffers->rb_lock); | ||
1334 | list_add(&rep->rr_list, &buffers->rb_recv_bufs); | ||
1335 | spin_unlock(&buffers->rb_lock); | ||
1336 | } else { | ||
1337 | rpcrdma_rep_destroy(rep); | ||
1338 | } | ||
1339 | } | 1324 | } |
1340 | 1325 | ||
1341 | /* Returns a pointer to a rpcrdma_regbuf object, or NULL. | 1326 | /* Returns a pointer to a rpcrdma_regbuf object, or NULL. |
@@ -1483,7 +1468,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) | |||
1483 | count = 0; | 1468 | count = 0; |
1484 | 1469 | ||
1485 | needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); | 1470 | needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); |
1486 | if (ep->rep_receive_count > needed) | 1471 | if (likely(ep->rep_receive_count > needed)) |
1487 | goto out; | 1472 | goto out; |
1488 | needed -= ep->rep_receive_count; | 1473 | needed -= ep->rep_receive_count; |
1489 | if (!temp) | 1474 | if (!temp) |
@@ -1491,22 +1476,10 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) | |||
1491 | 1476 | ||
1492 | /* fast path: all needed reps can be found on the free list */ | 1477 | /* fast path: all needed reps can be found on the free list */ |
1493 | wr = NULL; | 1478 | wr = NULL; |
1494 | spin_lock(&buf->rb_lock); | ||
1495 | while (needed) { | 1479 | while (needed) { |
1496 | rep = list_first_entry_or_null(&buf->rb_recv_bufs, | 1480 | rep = rpcrdma_rep_get_locked(buf); |
1497 | struct rpcrdma_rep, rr_list); | ||
1498 | if (!rep) | 1481 | if (!rep) |
1499 | break; | 1482 | rep = rpcrdma_rep_create(r_xprt, temp); |
1500 | |||
1501 | list_del(&rep->rr_list); | ||
1502 | rep->rr_recv_wr.next = wr; | ||
1503 | wr = &rep->rr_recv_wr; | ||
1504 | --needed; | ||
1505 | } | ||
1506 | spin_unlock(&buf->rb_lock); | ||
1507 | |||
1508 | while (needed) { | ||
1509 | rep = rpcrdma_rep_create(r_xprt, temp); | ||
1510 | if (!rep) | 1483 | if (!rep) |
1511 | break; | 1484 | break; |
1512 | 1485 | ||
@@ -1523,7 +1496,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) | |||
1523 | if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) | 1496 | if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) |
1524 | goto release_wrs; | 1497 | goto release_wrs; |
1525 | 1498 | ||
1526 | trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); | 1499 | trace_xprtrdma_post_recv(rep); |
1527 | ++count; | 1500 | ++count; |
1528 | } | 1501 | } |
1529 | 1502 | ||
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 92ce09fcea74..65e6b0eb862e 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/atomic.h> /* atomic_t, etc */ | 47 | #include <linux/atomic.h> /* atomic_t, etc */ |
48 | #include <linux/kref.h> /* struct kref */ | 48 | #include <linux/kref.h> /* struct kref */ |
49 | #include <linux/workqueue.h> /* struct work_struct */ | 49 | #include <linux/workqueue.h> /* struct work_struct */ |
50 | #include <linux/llist.h> | ||
50 | 51 | ||
51 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | 52 | #include <rdma/rdma_cm.h> /* RDMA connection api */ |
52 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | 53 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ |
@@ -117,9 +118,6 @@ struct rpcrdma_ep { | |||
117 | #endif | 118 | #endif |
118 | 119 | ||
119 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV | 120 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
120 | * | ||
121 | * The below structure appears at the front of a large region of kmalloc'd | ||
122 | * memory, which always starts on a good alignment boundary. | ||
123 | */ | 121 | */ |
124 | 122 | ||
125 | struct rpcrdma_regbuf { | 123 | struct rpcrdma_regbuf { |
@@ -158,25 +156,22 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb) | |||
158 | 156 | ||
159 | /* To ensure a transport can always make forward progress, | 157 | /* To ensure a transport can always make forward progress, |
160 | * the number of RDMA segments allowed in header chunk lists | 158 | * the number of RDMA segments allowed in header chunk lists |
161 | * is capped at 8. This prevents less-capable devices and | 159 | * is capped at 16. This prevents less-capable devices from |
162 | * memory registrations from overrunning the Send buffer | 160 | * overrunning the Send buffer while building chunk lists. |
163 | * while building chunk lists. | ||
164 | * | 161 | * |
165 | * Elements of the Read list take up more room than the | 162 | * Elements of the Read list take up more room than the |
166 | * Write list or Reply chunk. 8 read segments means the Read | 163 | * Write list or Reply chunk. 16 read segments means the |
167 | * list (or Write list or Reply chunk) cannot consume more | 164 | * chunk lists cannot consume more than |
168 | * than | ||
169 | * | ||
170 | * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes. | ||
171 | * | 165 | * |
172 | * And the fixed part of the header is another 24 bytes. | 166 | * ((16 + 2) * read segment size) + 1 XDR words, |
173 | * | 167 | * |
174 | * The smallest inline threshold is 1024 bytes, ensuring that | 168 | * or about 400 bytes. The fixed part of the header is |
175 | * at least 750 bytes are available for RPC messages. | 169 | * another 24 bytes. Thus when the inline threshold is |
170 | * 1024 bytes, at least 600 bytes are available for RPC | ||
171 | * message bodies. | ||
176 | */ | 172 | */ |
177 | enum { | 173 | enum { |
178 | RPCRDMA_MAX_HDR_SEGS = 8, | 174 | RPCRDMA_MAX_HDR_SEGS = 16, |
179 | RPCRDMA_HDRBUF_SIZE = 256, | ||
180 | }; | 175 | }; |
181 | 176 | ||
182 | /* | 177 | /* |
@@ -206,7 +201,7 @@ struct rpcrdma_rep { | |||
206 | struct rpc_rqst *rr_rqst; | 201 | struct rpc_rqst *rr_rqst; |
207 | struct xdr_buf rr_hdrbuf; | 202 | struct xdr_buf rr_hdrbuf; |
208 | struct xdr_stream rr_stream; | 203 | struct xdr_stream rr_stream; |
209 | struct list_head rr_list; | 204 | struct llist_node rr_node; |
210 | struct ib_recv_wr rr_recv_wr; | 205 | struct ib_recv_wr rr_recv_wr; |
211 | }; | 206 | }; |
212 | 207 | ||
@@ -240,20 +235,20 @@ struct rpcrdma_sendctx { | |||
240 | * An external memory region is any buffer or page that is registered | 235 | * An external memory region is any buffer or page that is registered |
241 | * on the fly (ie, not pre-registered). | 236 | * on the fly (ie, not pre-registered). |
242 | */ | 237 | */ |
243 | struct rpcrdma_req; | ||
244 | struct rpcrdma_frwr { | 238 | struct rpcrdma_frwr { |
245 | struct ib_mr *fr_mr; | 239 | struct ib_mr *fr_mr; |
246 | struct ib_cqe fr_cqe; | 240 | struct ib_cqe fr_cqe; |
247 | struct completion fr_linv_done; | 241 | struct completion fr_linv_done; |
248 | struct rpcrdma_req *fr_req; | ||
249 | union { | 242 | union { |
250 | struct ib_reg_wr fr_regwr; | 243 | struct ib_reg_wr fr_regwr; |
251 | struct ib_send_wr fr_invwr; | 244 | struct ib_send_wr fr_invwr; |
252 | }; | 245 | }; |
253 | }; | 246 | }; |
254 | 247 | ||
248 | struct rpcrdma_req; | ||
255 | struct rpcrdma_mr { | 249 | struct rpcrdma_mr { |
256 | struct list_head mr_list; | 250 | struct list_head mr_list; |
251 | struct rpcrdma_req *mr_req; | ||
257 | struct scatterlist *mr_sg; | 252 | struct scatterlist *mr_sg; |
258 | int mr_nents; | 253 | int mr_nents; |
259 | enum dma_data_direction mr_dir; | 254 | enum dma_data_direction mr_dir; |
@@ -331,7 +326,8 @@ struct rpcrdma_req { | |||
331 | struct list_head rl_all; | 326 | struct list_head rl_all; |
332 | struct kref rl_kref; | 327 | struct kref rl_kref; |
333 | 328 | ||
334 | struct list_head rl_registered; /* registered segments */ | 329 | struct list_head rl_free_mrs; |
330 | struct list_head rl_registered; | ||
335 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | 331 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; |
336 | }; | 332 | }; |
337 | 333 | ||
@@ -344,7 +340,7 @@ rpcr_to_rdmar(const struct rpc_rqst *rqst) | |||
344 | static inline void | 340 | static inline void |
345 | rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list) | 341 | rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list) |
346 | { | 342 | { |
347 | list_add_tail(&mr->mr_list, list); | 343 | list_add(&mr->mr_list, list); |
348 | } | 344 | } |
349 | 345 | ||
350 | static inline struct rpcrdma_mr * | 346 | static inline struct rpcrdma_mr * |
@@ -352,8 +348,9 @@ rpcrdma_mr_pop(struct list_head *list) | |||
352 | { | 348 | { |
353 | struct rpcrdma_mr *mr; | 349 | struct rpcrdma_mr *mr; |
354 | 350 | ||
355 | mr = list_first_entry(list, struct rpcrdma_mr, mr_list); | 351 | mr = list_first_entry_or_null(list, struct rpcrdma_mr, mr_list); |
356 | list_del_init(&mr->mr_list); | 352 | if (mr) |
353 | list_del_init(&mr->mr_list); | ||
357 | return mr; | 354 | return mr; |
358 | } | 355 | } |
359 | 356 | ||
@@ -364,19 +361,19 @@ rpcrdma_mr_pop(struct list_head *list) | |||
364 | * One of these is associated with a transport instance | 361 | * One of these is associated with a transport instance |
365 | */ | 362 | */ |
366 | struct rpcrdma_buffer { | 363 | struct rpcrdma_buffer { |
367 | spinlock_t rb_mrlock; /* protect rb_mrs list */ | 364 | spinlock_t rb_lock; |
365 | struct list_head rb_send_bufs; | ||
368 | struct list_head rb_mrs; | 366 | struct list_head rb_mrs; |
369 | struct list_head rb_all; | ||
370 | 367 | ||
371 | unsigned long rb_sc_head; | 368 | unsigned long rb_sc_head; |
372 | unsigned long rb_sc_tail; | 369 | unsigned long rb_sc_tail; |
373 | unsigned long rb_sc_last; | 370 | unsigned long rb_sc_last; |
374 | struct rpcrdma_sendctx **rb_sc_ctxs; | 371 | struct rpcrdma_sendctx **rb_sc_ctxs; |
375 | 372 | ||
376 | spinlock_t rb_lock; /* protect buf lists */ | ||
377 | struct list_head rb_send_bufs; | ||
378 | struct list_head rb_recv_bufs; | ||
379 | struct list_head rb_allreqs; | 373 | struct list_head rb_allreqs; |
374 | struct list_head rb_all_mrs; | ||
375 | |||
376 | struct llist_head rb_free_reps; | ||
380 | 377 | ||
381 | u32 rb_max_requests; | 378 | u32 rb_max_requests; |
382 | u32 rb_credits; /* most recent credit grant */ | 379 | u32 rb_credits; /* most recent credit grant */ |
@@ -384,7 +381,7 @@ struct rpcrdma_buffer { | |||
384 | u32 rb_bc_srv_max_requests; | 381 | u32 rb_bc_srv_max_requests; |
385 | u32 rb_bc_max_requests; | 382 | u32 rb_bc_max_requests; |
386 | 383 | ||
387 | struct delayed_work rb_refresh_worker; | 384 | struct work_struct rb_refresh_worker; |
388 | }; | 385 | }; |
389 | 386 | ||
390 | /* | 387 | /* |
@@ -490,7 +487,6 @@ struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt); | |||
490 | 487 | ||
491 | struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); | 488 | struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); |
492 | void rpcrdma_mr_put(struct rpcrdma_mr *mr); | 489 | void rpcrdma_mr_put(struct rpcrdma_mr *mr); |
493 | void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr); | ||
494 | 490 | ||
495 | static inline void | 491 | static inline void |
496 | rpcrdma_mr_recycle(struct rpcrdma_mr *mr) | 492 | rpcrdma_mr_recycle(struct rpcrdma_mr *mr) |
@@ -546,6 +542,7 @@ rpcrdma_data_dir(bool writing) | |||
546 | /* Memory registration calls xprtrdma/frwr_ops.c | 542 | /* Memory registration calls xprtrdma/frwr_ops.c |
547 | */ | 543 | */ |
548 | bool frwr_is_supported(struct ib_device *device); | 544 | bool frwr_is_supported(struct ib_device *device); |
545 | void frwr_recycle(struct rpcrdma_req *req); | ||
549 | void frwr_reset(struct rpcrdma_req *req); | 546 | void frwr_reset(struct rpcrdma_req *req); |
550 | int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); | 547 | int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); |
551 | int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); | 548 | int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); |
@@ -554,7 +551,7 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); | |||
554 | struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, | 551 | struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, |
555 | struct rpcrdma_mr_seg *seg, | 552 | struct rpcrdma_mr_seg *seg, |
556 | int nsegs, bool writing, __be32 xid, | 553 | int nsegs, bool writing, __be32 xid, |
557 | struct rpcrdma_mr **mr); | 554 | struct rpcrdma_mr *mr); |
558 | int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); | 555 | int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); |
559 | void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); | 556 | void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); |
560 | void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); | 557 | void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e2176c167a57..9ac88722fa83 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -562,10 +562,14 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) | |||
562 | printk(KERN_WARNING "Callback slot table overflowed\n"); | 562 | printk(KERN_WARNING "Callback slot table overflowed\n"); |
563 | return -ESHUTDOWN; | 563 | return -ESHUTDOWN; |
564 | } | 564 | } |
565 | if (transport->recv.copied && !req->rq_private_buf.len) | ||
566 | return -ESHUTDOWN; | ||
565 | 567 | ||
566 | ret = xs_read_stream_request(transport, msg, flags, req); | 568 | ret = xs_read_stream_request(transport, msg, flags, req); |
567 | if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) | 569 | if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
568 | xprt_complete_bc_request(req, transport->recv.copied); | 570 | xprt_complete_bc_request(req, transport->recv.copied); |
571 | else | ||
572 | req->rq_private_buf.len = transport->recv.copied; | ||
569 | 573 | ||
570 | return ret; | 574 | return ret; |
571 | } | 575 | } |
@@ -587,7 +591,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) | |||
587 | /* Look up and lock the request corresponding to the given XID */ | 591 | /* Look up and lock the request corresponding to the given XID */ |
588 | spin_lock(&xprt->queue_lock); | 592 | spin_lock(&xprt->queue_lock); |
589 | req = xprt_lookup_rqst(xprt, transport->recv.xid); | 593 | req = xprt_lookup_rqst(xprt, transport->recv.xid); |
590 | if (!req) { | 594 | if (!req || (transport->recv.copied && !req->rq_private_buf.len)) { |
591 | msg->msg_flags |= MSG_TRUNC; | 595 | msg->msg_flags |= MSG_TRUNC; |
592 | goto out; | 596 | goto out; |
593 | } | 597 | } |
@@ -599,6 +603,8 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) | |||
599 | spin_lock(&xprt->queue_lock); | 603 | spin_lock(&xprt->queue_lock); |
600 | if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) | 604 | if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
601 | xprt_complete_rqst(req->rq_task, transport->recv.copied); | 605 | xprt_complete_rqst(req->rq_task, transport->recv.copied); |
606 | else | ||
607 | req->rq_private_buf.len = transport->recv.copied; | ||
602 | xprt_unpin_rqst(req); | 608 | xprt_unpin_rqst(req); |
603 | out: | 609 | out: |
604 | spin_unlock(&xprt->queue_lock); | 610 | spin_unlock(&xprt->queue_lock); |