diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-26 20:33:59 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-26 20:33:59 -0400 |
| commit | 59953fba87e5e535657403cc6439d24187929559 (patch) | |
| tree | 4f92cc3bcacf052cb3fb895512af5a7d3dad86cb | |
| parent | 9ec3a646fe09970f801ab15e0f1694060b9f19af (diff) | |
| parent | f139b6c676c7e49b66016b28bf3f8ec5c54be891 (diff) | |
Merge tag 'nfs-for-4.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Another set of mainly bugfixes and a couple of cleanups. No new
functionality in this round.
Highlights include:
Stable patches:
- Fix a regression in /proc/self/mountstats
- Fix the pNFS flexfiles O_DIRECT support
- Fix high load average due to callback thread sleeping
Bugfixes:
- Various patches to fix the pNFS layoutcommit support
- Do not cache pNFS deviceids unless server notifications are enabled
- Fix a SUNRPC transport reconnection regression
- make debugfs file creation failure non-fatal in SUNRPC
- Another fix for circular directory warnings on NFSv4 "junctioned"
mountpoints
- Fix locking around NFSv4.2 fallocate() support
- Truncating NFSv4 file opens should also sync O_DIRECT writes
- Prevent infinite loop in rpcrdma_ep_create()
Features:
- Various improvements to the RDMA transport code's handling of
memory registration
- Various code cleanups"
* tag 'nfs-for-4.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (55 commits)
fs/nfs: fix new compiler warning about boolean in switch
nfs: Remove unneeded casts in nfs
NFS: Don't attempt to decode missing directory entries
Revert "nfs: replace nfs_add_stats with nfs_inc_stats when add one"
NFS: Rename idmap.c to nfs4idmap.c
NFS: Move nfs_idmap.h into fs/nfs/
NFS: Remove CONFIG_NFS_V4 checks from nfs_idmap.h
NFS: Add a stub for GETDEVICELIST
nfs: remove WARN_ON_ONCE from nfs_direct_good_bytes
nfs: fix DIO good bytes calculation
nfs: Fetch MOUNTED_ON_FILEID when updating an inode
sunrpc: make debugfs file creation failure non-fatal
nfs: fix high load average due to callback thread sleeping
NFS: Reduce time spent holding the i_mutex during fallocate()
NFS: Don't zap caches on fallocate()
xprtrdma: Make rpcrdma_{un}map_one() into inline functions
xprtrdma: Handle non-SEND completions via a callout
xprtrdma: Add "open" memreg op
xprtrdma: Add "destroy MRs" memreg op
xprtrdma: Add "reset MRs" memreg op
...
49 files changed, 1150 insertions, 914 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 1e987acf20c9..8664417955a2 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
| @@ -22,7 +22,7 @@ nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o | |||
| 22 | obj-$(CONFIG_NFS_V4) += nfsv4.o | 22 | obj-$(CONFIG_NFS_V4) += nfsv4.o |
| 23 | CFLAGS_nfs4trace.o += -I$(src) | 23 | CFLAGS_nfs4trace.o += -I$(src) |
| 24 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ | 24 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ |
| 25 | delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ | 25 | delegation.o nfs4idmap.o callback.o callback_xdr.o callback_proc.o \ |
| 26 | nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \ | 26 | nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \ |
| 27 | dns_resolve.o nfs4trace.o | 27 | dns_resolve.o nfs4trace.o |
| 28 | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | 28 | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 1cac3c175d18..d2554fe140a3 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
| @@ -890,6 +890,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { | |||
| 890 | .free_deviceid_node = bl_free_deviceid_node, | 890 | .free_deviceid_node = bl_free_deviceid_node, |
| 891 | .pg_read_ops = &bl_pg_read_ops, | 891 | .pg_read_ops = &bl_pg_read_ops, |
| 892 | .pg_write_ops = &bl_pg_write_ops, | 892 | .pg_write_ops = &bl_pg_write_ops, |
| 893 | .sync = pnfs_generic_sync, | ||
| 893 | }; | 894 | }; |
| 894 | 895 | ||
| 895 | static int __init nfs4blocklayout_init(void) | 896 | static int __init nfs4blocklayout_init(void) |
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 5aed4f98df41..e535599a0719 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c | |||
| @@ -33,7 +33,7 @@ bl_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
| 33 | container_of(d, struct pnfs_block_dev, node); | 33 | container_of(d, struct pnfs_block_dev, node); |
| 34 | 34 | ||
| 35 | bl_free_device(dev); | 35 | bl_free_device(dev); |
| 36 | kfree(dev); | 36 | kfree_rcu(dev, node.rcu); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | static int | 39 | static int |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 351be9205bf8..8d129bb7355a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
| @@ -128,7 +128,7 @@ nfs41_callback_svc(void *vrqstp) | |||
| 128 | if (try_to_freeze()) | 128 | if (try_to_freeze()) |
| 129 | continue; | 129 | continue; |
| 130 | 130 | ||
| 131 | prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE); | 131 | prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); |
| 132 | spin_lock_bh(&serv->sv_cb_lock); | 132 | spin_lock_bh(&serv->sv_cb_lock); |
| 133 | if (!list_empty(&serv->sv_cb_list)) { | 133 | if (!list_empty(&serv->sv_cb_list)) { |
| 134 | req = list_first_entry(&serv->sv_cb_list, | 134 | req = list_first_entry(&serv->sv_cb_list, |
| @@ -142,10 +142,10 @@ nfs41_callback_svc(void *vrqstp) | |||
| 142 | error); | 142 | error); |
| 143 | } else { | 143 | } else { |
| 144 | spin_unlock_bh(&serv->sv_cb_lock); | 144 | spin_unlock_bh(&serv->sv_cb_lock); |
| 145 | /* schedule_timeout to game the hung task watchdog */ | 145 | schedule(); |
| 146 | schedule_timeout(60 * HZ); | ||
| 147 | finish_wait(&serv->sv_cb_waitq, &wq); | 146 | finish_wait(&serv->sv_cb_waitq, &wq); |
| 148 | } | 147 | } |
| 148 | flush_signals(current); | ||
| 149 | } | 149 | } |
| 150 | return 0; | 150 | return 0; |
| 151 | } | 151 | } |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 19874151e95c..892aefff3630 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
| @@ -31,7 +31,6 @@ | |||
| 31 | #include <linux/lockd/bind.h> | 31 | #include <linux/lockd/bind.h> |
| 32 | #include <linux/seq_file.h> | 32 | #include <linux/seq_file.h> |
| 33 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
| 34 | #include <linux/nfs_idmap.h> | ||
| 35 | #include <linux/vfs.h> | 34 | #include <linux/vfs.h> |
| 36 | #include <linux/inet.h> | 35 | #include <linux/inet.h> |
| 37 | #include <linux/in6.h> | 36 | #include <linux/in6.h> |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a6ad68865880..029d688a969f 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
| @@ -378,7 +378,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
| 378 | if (freeme == NULL) | 378 | if (freeme == NULL) |
| 379 | goto out; | 379 | goto out; |
| 380 | } | 380 | } |
| 381 | list_add_rcu(&delegation->super_list, &server->delegations); | 381 | list_add_tail_rcu(&delegation->super_list, &server->delegations); |
| 382 | rcu_assign_pointer(nfsi->delegation, delegation); | 382 | rcu_assign_pointer(nfsi->delegation, delegation); |
| 383 | delegation = NULL; | 383 | delegation = NULL; |
| 384 | 384 | ||
| @@ -514,7 +514,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) | |||
| 514 | 514 | ||
| 515 | delegation = nfs_inode_detach_delegation(inode); | 515 | delegation = nfs_inode_detach_delegation(inode); |
| 516 | if (delegation != NULL) | 516 | if (delegation != NULL) |
| 517 | nfs_do_return_delegation(inode, delegation, 0); | 517 | nfs_do_return_delegation(inode, delegation, 1); |
| 518 | } | 518 | } |
| 519 | 519 | ||
| 520 | /** | 520 | /** |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1e51ecd61854..b2c8b31b2be7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -543,6 +543,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en | |||
| 543 | if (scratch == NULL) | 543 | if (scratch == NULL) |
| 544 | return -ENOMEM; | 544 | return -ENOMEM; |
| 545 | 545 | ||
| 546 | if (buflen == 0) | ||
| 547 | goto out_nopages; | ||
| 548 | |||
| 546 | xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); | 549 | xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); |
| 547 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | 550 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
| 548 | 551 | ||
| @@ -564,6 +567,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en | |||
| 564 | break; | 567 | break; |
| 565 | } while (!entry->eof); | 568 | } while (!entry->eof); |
| 566 | 569 | ||
| 570 | out_nopages: | ||
| 567 | if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { | 571 | if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { |
| 568 | array = nfs_readdir_get_array(page); | 572 | array = nfs_readdir_get_array(page); |
| 569 | if (!IS_ERR(array)) { | 573 | if (!IS_ERR(array)) { |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b2cbc3a6cdd9..38678d9a5cc4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
| @@ -129,22 +129,25 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) | |||
| 129 | int i; | 129 | int i; |
| 130 | ssize_t count; | 130 | ssize_t count; |
| 131 | 131 | ||
| 132 | WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); | 132 | if (dreq->mirror_count == 1) { |
| 133 | 133 | dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; | |
| 134 | count = dreq->mirrors[hdr->pgio_mirror_idx].count; | 134 | dreq->count += hdr->good_bytes; |
| 135 | if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { | 135 | } else { |
| 136 | count = hdr->io_start + hdr->good_bytes - dreq->io_start; | 136 | /* mirrored writes */ |
| 137 | dreq->mirrors[hdr->pgio_mirror_idx].count = count; | 137 | count = dreq->mirrors[hdr->pgio_mirror_idx].count; |
| 138 | } | 138 | if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { |
| 139 | 139 | count = hdr->io_start + hdr->good_bytes - dreq->io_start; | |
| 140 | /* update the dreq->count by finding the minimum agreed count from all | 140 | dreq->mirrors[hdr->pgio_mirror_idx].count = count; |
| 141 | * mirrors */ | 141 | } |
| 142 | count = dreq->mirrors[0].count; | 142 | /* update the dreq->count by finding the minimum agreed count from all |
| 143 | * mirrors */ | ||
| 144 | count = dreq->mirrors[0].count; | ||
| 143 | 145 | ||
| 144 | for (i = 1; i < dreq->mirror_count; i++) | 146 | for (i = 1; i < dreq->mirror_count; i++) |
| 145 | count = min(count, dreq->mirrors[i].count); | 147 | count = min(count, dreq->mirrors[i].count); |
| 146 | 148 | ||
| 147 | dreq->count = count; | 149 | dreq->count = count; |
| 150 | } | ||
| 148 | } | 151 | } |
| 149 | 152 | ||
| 150 | /* | 153 | /* |
| @@ -258,18 +261,11 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) | |||
| 258 | if (!IS_SWAPFILE(inode)) | 261 | if (!IS_SWAPFILE(inode)) |
| 259 | return 0; | 262 | return 0; |
| 260 | 263 | ||
| 261 | #ifndef CONFIG_NFS_SWAP | ||
| 262 | dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", | ||
| 263 | iocb->ki_filp, (long long) pos, iter->nr_segs); | ||
| 264 | |||
| 265 | return -EINVAL; | ||
| 266 | #else | ||
| 267 | VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); | 264 | VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); |
| 268 | 265 | ||
| 269 | if (iov_iter_rw(iter) == READ) | 266 | if (iov_iter_rw(iter) == READ) |
| 270 | return nfs_file_direct_read(iocb, iter, pos); | 267 | return nfs_file_direct_read(iocb, iter, pos); |
| 271 | return nfs_file_direct_write(iocb, iter); | 268 | return nfs_file_direct_write(iocb, iter); |
| 272 | #endif /* CONFIG_NFS_SWAP */ | ||
| 273 | } | 269 | } |
| 274 | 270 | ||
| 275 | static void nfs_direct_release_pages(struct page **pages, unsigned int npages) | 271 | static void nfs_direct_release_pages(struct page **pages, unsigned int npages) |
| @@ -1030,6 +1026,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) | |||
| 1030 | if (i_size_read(inode) < iocb->ki_pos) | 1026 | if (i_size_read(inode) < iocb->ki_pos) |
| 1031 | i_size_write(inode, iocb->ki_pos); | 1027 | i_size_write(inode, iocb->ki_pos); |
| 1032 | spin_unlock(&inode->i_lock); | 1028 | spin_unlock(&inode->i_lock); |
| 1029 | generic_write_sync(file, pos, result); | ||
| 1033 | } | 1030 | } |
| 1034 | } | 1031 | } |
| 1035 | nfs_direct_req_release(dreq); | 1032 | nfs_direct_req_release(dreq); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c40e4363e746..8b8d83a526ce 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -280,6 +280,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 280 | 280 | ||
| 281 | trace_nfs_fsync_enter(inode); | 281 | trace_nfs_fsync_enter(inode); |
| 282 | 282 | ||
| 283 | nfs_inode_dio_wait(inode); | ||
| 283 | do { | 284 | do { |
| 284 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 285 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
| 285 | if (ret != 0) | 286 | if (ret != 0) |
| @@ -782,7 +783,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | |||
| 782 | * Flush all pending writes before doing anything | 783 | * Flush all pending writes before doing anything |
| 783 | * with locks.. | 784 | * with locks.. |
| 784 | */ | 785 | */ |
| 785 | nfs_sync_mapping(filp->f_mapping); | 786 | vfs_fsync(filp, 0); |
| 786 | 787 | ||
| 787 | l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); | 788 | l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); |
| 788 | if (!IS_ERR(l_ctx)) { | 789 | if (!IS_ERR(l_ctx)) { |
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 91e88a7ecef0..a46bf6de9ce4 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
| @@ -258,7 +258,8 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
| 258 | hdr->res.verf->committed != NFS_DATA_SYNC) | 258 | hdr->res.verf->committed != NFS_DATA_SYNC) |
| 259 | return; | 259 | return; |
| 260 | 260 | ||
| 261 | pnfs_set_layoutcommit(hdr); | 261 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, |
| 262 | hdr->mds_offset + hdr->res.count); | ||
| 262 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | 263 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, |
| 263 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | 264 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); |
| 264 | } | 265 | } |
| @@ -373,7 +374,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
| 373 | } | 374 | } |
| 374 | 375 | ||
| 375 | if (data->verf.committed == NFS_UNSTABLE) | 376 | if (data->verf.committed == NFS_UNSTABLE) |
| 376 | pnfs_commit_set_layoutcommit(data); | 377 | pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); |
| 377 | 378 | ||
| 378 | return 0; | 379 | return 0; |
| 379 | } | 380 | } |
| @@ -1086,7 +1087,7 @@ filelayout_alloc_deviceid_node(struct nfs_server *server, | |||
| 1086 | } | 1087 | } |
| 1087 | 1088 | ||
| 1088 | static void | 1089 | static void |
| 1089 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) | 1090 | filelayout_free_deviceid_node(struct nfs4_deviceid_node *d) |
| 1090 | { | 1091 | { |
| 1091 | nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); | 1092 | nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); |
| 1092 | } | 1093 | } |
| @@ -1137,7 +1138,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
| 1137 | .read_pagelist = filelayout_read_pagelist, | 1138 | .read_pagelist = filelayout_read_pagelist, |
| 1138 | .write_pagelist = filelayout_write_pagelist, | 1139 | .write_pagelist = filelayout_write_pagelist, |
| 1139 | .alloc_deviceid_node = filelayout_alloc_deviceid_node, | 1140 | .alloc_deviceid_node = filelayout_alloc_deviceid_node, |
| 1140 | .free_deviceid_node = filelayout_free_deveiceid_node, | 1141 | .free_deviceid_node = filelayout_free_deviceid_node, |
| 1142 | .sync = pnfs_nfs_generic_sync, | ||
| 1141 | }; | 1143 | }; |
| 1142 | 1144 | ||
| 1143 | static int __init nfs4filelayout_init(void) | 1145 | static int __init nfs4filelayout_init(void) |
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4f372e224603..4946ef40ba87 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
| @@ -55,7 +55,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |||
| 55 | nfs4_pnfs_ds_put(ds); | 55 | nfs4_pnfs_ds_put(ds); |
| 56 | } | 56 | } |
| 57 | kfree(dsaddr->stripe_indices); | 57 | kfree(dsaddr->stripe_indices); |
| 58 | kfree(dsaddr); | 58 | kfree_rcu(dsaddr, id_node.rcu); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | /* Decode opaque device data and return the result */ | 61 | /* Decode opaque device data and return the result */ |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 315cc68945b9..7d05089e52d6 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
| @@ -11,10 +11,10 @@ | |||
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 12 | 12 | ||
| 13 | #include <linux/sunrpc/metrics.h> | 13 | #include <linux/sunrpc/metrics.h> |
| 14 | #include <linux/nfs_idmap.h> | ||
| 15 | 14 | ||
| 16 | #include "flexfilelayout.h" | 15 | #include "flexfilelayout.h" |
| 17 | #include "../nfs4session.h" | 16 | #include "../nfs4session.h" |
| 17 | #include "../nfs4idmap.h" | ||
| 18 | #include "../internal.h" | 18 | #include "../internal.h" |
| 19 | #include "../delegation.h" | 19 | #include "../delegation.h" |
| 20 | #include "../nfs4trace.h" | 20 | #include "../nfs4trace.h" |
| @@ -891,7 +891,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, | |||
| 891 | static void | 891 | static void |
| 892 | ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) | 892 | ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) |
| 893 | { | 893 | { |
| 894 | pnfs_set_layoutcommit(hdr); | 894 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, |
| 895 | hdr->mds_offset + hdr->res.count); | ||
| 895 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | 896 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, |
| 896 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | 897 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); |
| 897 | } | 898 | } |
| @@ -1074,7 +1075,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, | |||
| 1074 | } | 1075 | } |
| 1075 | 1076 | ||
| 1076 | if (data->verf.committed == NFS_UNSTABLE) | 1077 | if (data->verf.committed == NFS_UNSTABLE) |
| 1077 | pnfs_commit_set_layoutcommit(data); | 1078 | pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); |
| 1078 | 1079 | ||
| 1079 | return 0; | 1080 | return 0; |
| 1080 | } | 1081 | } |
| @@ -1414,7 +1415,7 @@ ff_layout_get_ds_info(struct inode *inode) | |||
| 1414 | } | 1415 | } |
| 1415 | 1416 | ||
| 1416 | static void | 1417 | static void |
| 1417 | ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d) | 1418 | ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) |
| 1418 | { | 1419 | { |
| 1419 | nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, | 1420 | nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, |
| 1420 | id_node)); | 1421 | id_node)); |
| @@ -1498,7 +1499,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { | |||
| 1498 | .pg_read_ops = &ff_layout_pg_read_ops, | 1499 | .pg_read_ops = &ff_layout_pg_read_ops, |
| 1499 | .pg_write_ops = &ff_layout_pg_write_ops, | 1500 | .pg_write_ops = &ff_layout_pg_write_ops, |
| 1500 | .get_ds_info = ff_layout_get_ds_info, | 1501 | .get_ds_info = ff_layout_get_ds_info, |
| 1501 | .free_deviceid_node = ff_layout_free_deveiceid_node, | 1502 | .free_deviceid_node = ff_layout_free_deviceid_node, |
| 1502 | .mark_request_commit = pnfs_layout_mark_request_commit, | 1503 | .mark_request_commit = pnfs_layout_mark_request_commit, |
| 1503 | .clear_request_commit = pnfs_generic_clear_request_commit, | 1504 | .clear_request_commit = pnfs_generic_clear_request_commit, |
| 1504 | .scan_commit_lists = pnfs_generic_scan_commit_lists, | 1505 | .scan_commit_lists = pnfs_generic_scan_commit_lists, |
| @@ -1508,6 +1509,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { | |||
| 1508 | .write_pagelist = ff_layout_write_pagelist, | 1509 | .write_pagelist = ff_layout_write_pagelist, |
| 1509 | .alloc_deviceid_node = ff_layout_alloc_deviceid_node, | 1510 | .alloc_deviceid_node = ff_layout_alloc_deviceid_node, |
| 1510 | .encode_layoutreturn = ff_layout_encode_layoutreturn, | 1511 | .encode_layoutreturn = ff_layout_encode_layoutreturn, |
| 1512 | .sync = pnfs_nfs_generic_sync, | ||
| 1511 | }; | 1513 | }; |
| 1512 | 1514 | ||
| 1513 | static int __init nfs4flexfilelayout_init(void) | 1515 | static int __init nfs4flexfilelayout_init(void) |
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e2c01f204a95..77a2d026aa12 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
| @@ -30,7 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | |||
| 30 | { | 30 | { |
| 31 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); | 31 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); |
| 32 | nfs4_pnfs_ds_put(mirror_ds->ds); | 32 | nfs4_pnfs_ds_put(mirror_ds->ds); |
| 33 | kfree(mirror_ds); | 33 | kfree_rcu(mirror_ds, id_node.rcu); |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | /* Decode opaque device data and construct new_ds using it */ | 36 | /* Decode opaque device data and construct new_ds using it */ |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3689e95da79a..f734562c6d24 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -133,6 +133,13 @@ void nfs_evict_inode(struct inode *inode) | |||
| 133 | nfs_clear_inode(inode); | 133 | nfs_clear_inode(inode); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | int nfs_sync_inode(struct inode *inode) | ||
| 137 | { | ||
| 138 | nfs_inode_dio_wait(inode); | ||
| 139 | return nfs_wb_all(inode); | ||
| 140 | } | ||
| 141 | EXPORT_SYMBOL_GPL(nfs_sync_inode); | ||
| 142 | |||
| 136 | /** | 143 | /** |
| 137 | * nfs_sync_mapping - helper to flush all mmapped dirty data to disk | 144 | * nfs_sync_mapping - helper to flush all mmapped dirty data to disk |
| 138 | */ | 145 | */ |
| @@ -192,7 +199,6 @@ void nfs_zap_caches(struct inode *inode) | |||
| 192 | nfs_zap_caches_locked(inode); | 199 | nfs_zap_caches_locked(inode); |
| 193 | spin_unlock(&inode->i_lock); | 200 | spin_unlock(&inode->i_lock); |
| 194 | } | 201 | } |
| 195 | EXPORT_SYMBOL_GPL(nfs_zap_caches); | ||
| 196 | 202 | ||
| 197 | void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) | 203 | void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) |
| 198 | { | 204 | { |
| @@ -525,10 +531,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 525 | trace_nfs_setattr_enter(inode); | 531 | trace_nfs_setattr_enter(inode); |
| 526 | 532 | ||
| 527 | /* Write all dirty data */ | 533 | /* Write all dirty data */ |
| 528 | if (S_ISREG(inode->i_mode)) { | 534 | if (S_ISREG(inode->i_mode)) |
| 529 | nfs_inode_dio_wait(inode); | 535 | nfs_sync_inode(inode); |
| 530 | nfs_wb_all(inode); | ||
| 531 | } | ||
| 532 | 536 | ||
| 533 | fattr = nfs_alloc_fattr(); | 537 | fattr = nfs_alloc_fattr(); |
| 534 | if (fattr == NULL) | 538 | if (fattr == NULL) |
| @@ -644,8 +648,9 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
| 644 | trace_nfs_getattr_enter(inode); | 648 | trace_nfs_getattr_enter(inode); |
| 645 | /* Flush out writes to the server in order to update c/mtime. */ | 649 | /* Flush out writes to the server in order to update c/mtime. */ |
| 646 | if (S_ISREG(inode->i_mode)) { | 650 | if (S_ISREG(inode->i_mode)) { |
| 647 | nfs_inode_dio_wait(inode); | 651 | mutex_lock(&inode->i_mutex); |
| 648 | err = filemap_write_and_wait(inode->i_mapping); | 652 | err = nfs_sync_inode(inode); |
| 653 | mutex_unlock(&inode->i_mutex); | ||
| 649 | if (err) | 654 | if (err) |
| 650 | goto out; | 655 | goto out; |
| 651 | } | 656 | } |
| @@ -1588,6 +1593,19 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
| 1588 | } | 1593 | } |
| 1589 | EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); | 1594 | EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); |
| 1590 | 1595 | ||
| 1596 | |||
| 1597 | static inline bool nfs_fileid_valid(struct nfs_inode *nfsi, | ||
| 1598 | struct nfs_fattr *fattr) | ||
| 1599 | { | ||
| 1600 | bool ret1 = true, ret2 = true; | ||
| 1601 | |||
| 1602 | if (fattr->valid & NFS_ATTR_FATTR_FILEID) | ||
| 1603 | ret1 = (nfsi->fileid == fattr->fileid); | ||
| 1604 | if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) | ||
| 1605 | ret2 = (nfsi->fileid == fattr->mounted_on_fileid); | ||
| 1606 | return ret1 || ret2; | ||
| 1607 | } | ||
| 1608 | |||
| 1591 | /* | 1609 | /* |
| 1592 | * Many nfs protocol calls return the new file attributes after | 1610 | * Many nfs protocol calls return the new file attributes after |
| 1593 | * an operation. Here we update the inode to reflect the state | 1611 | * an operation. Here we update the inode to reflect the state |
| @@ -1614,7 +1632,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1614 | nfs_display_fhandle_hash(NFS_FH(inode)), | 1632 | nfs_display_fhandle_hash(NFS_FH(inode)), |
| 1615 | atomic_read(&inode->i_count), fattr->valid); | 1633 | atomic_read(&inode->i_count), fattr->valid); |
| 1616 | 1634 | ||
| 1617 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) { | 1635 | if (!nfs_fileid_valid(nfsi, fattr)) { |
| 1618 | printk(KERN_ERR "NFS: server %s error: fileid changed\n" | 1636 | printk(KERN_ERR "NFS: server %s error: fileid changed\n" |
| 1619 | "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", | 1637 | "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", |
| 1620 | NFS_SERVER(inode)->nfs_client->cl_hostname, | 1638 | NFS_SERVER(inode)->nfs_client->cl_hostname, |
| @@ -1819,7 +1837,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1819 | struct inode *nfs_alloc_inode(struct super_block *sb) | 1837 | struct inode *nfs_alloc_inode(struct super_block *sb) |
| 1820 | { | 1838 | { |
| 1821 | struct nfs_inode *nfsi; | 1839 | struct nfs_inode *nfsi; |
| 1822 | nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); | 1840 | nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); |
| 1823 | if (!nfsi) | 1841 | if (!nfsi) |
| 1824 | return NULL; | 1842 | return NULL; |
| 1825 | nfsi->flags = 0UL; | 1843 | nfsi->flags = 0UL; |
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index cb170722769c..3a9e75235f30 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c | |||
| @@ -36,13 +36,16 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | |||
| 36 | loff_t offset, loff_t len) | 36 | loff_t offset, loff_t len) |
| 37 | { | 37 | { |
| 38 | struct inode *inode = file_inode(filep); | 38 | struct inode *inode = file_inode(filep); |
| 39 | struct nfs_server *server = NFS_SERVER(inode); | ||
| 39 | struct nfs42_falloc_args args = { | 40 | struct nfs42_falloc_args args = { |
| 40 | .falloc_fh = NFS_FH(inode), | 41 | .falloc_fh = NFS_FH(inode), |
| 41 | .falloc_offset = offset, | 42 | .falloc_offset = offset, |
| 42 | .falloc_length = len, | 43 | .falloc_length = len, |
| 44 | .falloc_bitmask = server->cache_consistency_bitmask, | ||
| 45 | }; | ||
| 46 | struct nfs42_falloc_res res = { | ||
| 47 | .falloc_server = server, | ||
| 43 | }; | 48 | }; |
| 44 | struct nfs42_falloc_res res; | ||
| 45 | struct nfs_server *server = NFS_SERVER(inode); | ||
| 46 | int status; | 49 | int status; |
| 47 | 50 | ||
| 48 | msg->rpc_argp = &args; | 51 | msg->rpc_argp = &args; |
| @@ -52,8 +55,17 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | |||
| 52 | if (status) | 55 | if (status) |
| 53 | return status; | 56 | return status; |
| 54 | 57 | ||
| 55 | return nfs4_call_sync(server->client, server, msg, | 58 | res.falloc_fattr = nfs_alloc_fattr(); |
| 56 | &args.seq_args, &res.seq_res, 0); | 59 | if (!res.falloc_fattr) |
| 60 | return -ENOMEM; | ||
| 61 | |||
| 62 | status = nfs4_call_sync(server->client, server, msg, | ||
| 63 | &args.seq_args, &res.seq_res, 0); | ||
| 64 | if (status == 0) | ||
| 65 | status = nfs_post_op_update_inode(inode, res.falloc_fattr); | ||
| 66 | |||
| 67 | kfree(res.falloc_fattr); | ||
| 68 | return status; | ||
| 57 | } | 69 | } |
| 58 | 70 | ||
| 59 | static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | 71 | static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, |
| @@ -84,9 +96,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) | |||
| 84 | if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) | 96 | if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) |
| 85 | return -EOPNOTSUPP; | 97 | return -EOPNOTSUPP; |
| 86 | 98 | ||
| 99 | mutex_lock(&inode->i_mutex); | ||
| 100 | |||
| 87 | err = nfs42_proc_fallocate(&msg, filep, offset, len); | 101 | err = nfs42_proc_fallocate(&msg, filep, offset, len); |
| 88 | if (err == -EOPNOTSUPP) | 102 | if (err == -EOPNOTSUPP) |
| 89 | NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; | 103 | NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; |
| 104 | |||
| 105 | mutex_unlock(&inode->i_mutex); | ||
| 90 | return err; | 106 | return err; |
| 91 | } | 107 | } |
| 92 | 108 | ||
| @@ -101,9 +117,16 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) | |||
| 101 | if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) | 117 | if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) |
| 102 | return -EOPNOTSUPP; | 118 | return -EOPNOTSUPP; |
| 103 | 119 | ||
| 120 | nfs_wb_all(inode); | ||
| 121 | mutex_lock(&inode->i_mutex); | ||
| 122 | |||
| 104 | err = nfs42_proc_fallocate(&msg, filep, offset, len); | 123 | err = nfs42_proc_fallocate(&msg, filep, offset, len); |
| 124 | if (err == 0) | ||
| 125 | truncate_pagecache_range(inode, offset, (offset + len) -1); | ||
| 105 | if (err == -EOPNOTSUPP) | 126 | if (err == -EOPNOTSUPP) |
| 106 | NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; | 127 | NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; |
| 128 | |||
| 129 | mutex_unlock(&inode->i_mutex); | ||
| 107 | return err; | 130 | return err; |
| 108 | } | 131 | } |
| 109 | 132 | ||
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 038a7e1521fa..1a25b27248f2 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c | |||
| @@ -25,16 +25,20 @@ | |||
| 25 | 25 | ||
| 26 | #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ | 26 | #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ |
| 27 | encode_putfh_maxsz + \ | 27 | encode_putfh_maxsz + \ |
| 28 | encode_allocate_maxsz) | 28 | encode_allocate_maxsz + \ |
| 29 | encode_getattr_maxsz) | ||
| 29 | #define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ | 30 | #define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ |
| 30 | decode_putfh_maxsz + \ | 31 | decode_putfh_maxsz + \ |
| 31 | decode_allocate_maxsz) | 32 | decode_allocate_maxsz + \ |
| 33 | decode_getattr_maxsz) | ||
| 32 | #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ | 34 | #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ |
| 33 | encode_putfh_maxsz + \ | 35 | encode_putfh_maxsz + \ |
| 34 | encode_deallocate_maxsz) | 36 | encode_deallocate_maxsz + \ |
| 37 | encode_getattr_maxsz) | ||
| 35 | #define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ | 38 | #define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ |
| 36 | decode_putfh_maxsz + \ | 39 | decode_putfh_maxsz + \ |
| 37 | decode_deallocate_maxsz) | 40 | decode_deallocate_maxsz + \ |
| 41 | decode_getattr_maxsz) | ||
| 38 | #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ | 42 | #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ |
| 39 | encode_putfh_maxsz + \ | 43 | encode_putfh_maxsz + \ |
| 40 | encode_seek_maxsz) | 44 | encode_seek_maxsz) |
| @@ -92,6 +96,7 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, | |||
| 92 | encode_sequence(xdr, &args->seq_args, &hdr); | 96 | encode_sequence(xdr, &args->seq_args, &hdr); |
| 93 | encode_putfh(xdr, args->falloc_fh, &hdr); | 97 | encode_putfh(xdr, args->falloc_fh, &hdr); |
| 94 | encode_allocate(xdr, args, &hdr); | 98 | encode_allocate(xdr, args, &hdr); |
| 99 | encode_getfattr(xdr, args->falloc_bitmask, &hdr); | ||
| 95 | encode_nops(&hdr); | 100 | encode_nops(&hdr); |
| 96 | } | 101 | } |
| 97 | 102 | ||
| @@ -110,6 +115,7 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, | |||
| 110 | encode_sequence(xdr, &args->seq_args, &hdr); | 115 | encode_sequence(xdr, &args->seq_args, &hdr); |
| 111 | encode_putfh(xdr, args->falloc_fh, &hdr); | 116 | encode_putfh(xdr, args->falloc_fh, &hdr); |
| 112 | encode_deallocate(xdr, args, &hdr); | 117 | encode_deallocate(xdr, args, &hdr); |
| 118 | encode_getfattr(xdr, args->falloc_bitmask, &hdr); | ||
| 113 | encode_nops(&hdr); | 119 | encode_nops(&hdr); |
| 114 | } | 120 | } |
| 115 | 121 | ||
| @@ -183,6 +189,9 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp, | |||
| 183 | if (status) | 189 | if (status) |
| 184 | goto out; | 190 | goto out; |
| 185 | status = decode_allocate(xdr, res); | 191 | status = decode_allocate(xdr, res); |
| 192 | if (status) | ||
| 193 | goto out; | ||
| 194 | decode_getfattr(xdr, res->falloc_fattr, res->falloc_server); | ||
| 186 | out: | 195 | out: |
| 187 | return status; | 196 | return status; |
| 188 | } | 197 | } |
| @@ -207,6 +216,9 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, | |||
| 207 | if (status) | 216 | if (status) |
| 208 | goto out; | 217 | goto out; |
| 209 | status = decode_deallocate(xdr, res); | 218 | status = decode_deallocate(xdr, res); |
| 219 | if (status) | ||
| 220 | goto out; | ||
| 221 | decode_getfattr(xdr, res->falloc_fattr, res->falloc_server); | ||
| 210 | out: | 222 | out: |
| 211 | return status; | 223 | return status; |
| 212 | } | 224 | } |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 51c2dbd1e942..e42be52a8c18 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | */ | 4 | */ |
| 5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/nfs_fs.h> | 6 | #include <linux/nfs_fs.h> |
| 7 | #include <linux/nfs_idmap.h> | ||
| 8 | #include <linux/nfs_mount.h> | 7 | #include <linux/nfs_mount.h> |
| 9 | #include <linux/sunrpc/addr.h> | 8 | #include <linux/sunrpc/addr.h> |
| 10 | #include <linux/sunrpc/auth.h> | 9 | #include <linux/sunrpc/auth.h> |
| @@ -15,6 +14,7 @@ | |||
| 15 | #include "callback.h" | 14 | #include "callback.h" |
| 16 | #include "delegation.h" | 15 | #include "delegation.h" |
| 17 | #include "nfs4session.h" | 16 | #include "nfs4session.h" |
| 17 | #include "nfs4idmap.h" | ||
| 18 | #include "pnfs.h" | 18 | #include "pnfs.h" |
| 19 | #include "netns.h" | 19 | #include "netns.h" |
| 20 | 20 | ||
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 619eca34e70f..f58c17b3b480 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
| @@ -10,6 +10,8 @@ | |||
| 10 | #include "fscache.h" | 10 | #include "fscache.h" |
| 11 | #include "pnfs.h" | 11 | #include "pnfs.h" |
| 12 | 12 | ||
| 13 | #include "nfstrace.h" | ||
| 14 | |||
| 13 | #ifdef CONFIG_NFS_V4_2 | 15 | #ifdef CONFIG_NFS_V4_2 |
| 14 | #include "nfs42.h" | 16 | #include "nfs42.h" |
| 15 | #endif | 17 | #endif |
| @@ -57,7 +59,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) | |||
| 57 | if (openflags & O_TRUNC) { | 59 | if (openflags & O_TRUNC) { |
| 58 | attr.ia_valid |= ATTR_SIZE; | 60 | attr.ia_valid |= ATTR_SIZE; |
| 59 | attr.ia_size = 0; | 61 | attr.ia_size = 0; |
| 60 | nfs_wb_all(inode); | 62 | nfs_sync_inode(inode); |
| 61 | } | 63 | } |
| 62 | 64 | ||
| 63 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); | 65 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); |
| @@ -100,6 +102,9 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 100 | int ret; | 102 | int ret; |
| 101 | struct inode *inode = file_inode(file); | 103 | struct inode *inode = file_inode(file); |
| 102 | 104 | ||
| 105 | trace_nfs_fsync_enter(inode); | ||
| 106 | |||
| 107 | nfs_inode_dio_wait(inode); | ||
| 103 | do { | 108 | do { |
| 104 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 109 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
| 105 | if (ret != 0) | 110 | if (ret != 0) |
| @@ -107,7 +112,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 107 | mutex_lock(&inode->i_mutex); | 112 | mutex_lock(&inode->i_mutex); |
| 108 | ret = nfs_file_fsync_commit(file, start, end, datasync); | 113 | ret = nfs_file_fsync_commit(file, start, end, datasync); |
| 109 | if (!ret) | 114 | if (!ret) |
| 110 | ret = pnfs_layoutcommit_inode(inode, true); | 115 | ret = pnfs_sync_inode(inode, !!datasync); |
| 111 | mutex_unlock(&inode->i_mutex); | 116 | mutex_unlock(&inode->i_mutex); |
| 112 | /* | 117 | /* |
| 113 | * If nfs_file_fsync_commit detected a server reboot, then | 118 | * If nfs_file_fsync_commit detected a server reboot, then |
| @@ -118,6 +123,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 118 | end = LLONG_MAX; | 123 | end = LLONG_MAX; |
| 119 | } while (ret == -EAGAIN); | 124 | } while (ret == -EAGAIN); |
| 120 | 125 | ||
| 126 | trace_nfs_fsync_exit(inode, ret); | ||
| 121 | return ret; | 127 | return ret; |
| 122 | } | 128 | } |
| 123 | 129 | ||
| @@ -152,15 +158,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t | |||
| 152 | if (ret < 0) | 158 | if (ret < 0) |
| 153 | return ret; | 159 | return ret; |
| 154 | 160 | ||
| 155 | mutex_lock(&inode->i_mutex); | ||
| 156 | if (mode & FALLOC_FL_PUNCH_HOLE) | 161 | if (mode & FALLOC_FL_PUNCH_HOLE) |
| 157 | ret = nfs42_proc_deallocate(filep, offset, len); | 162 | return nfs42_proc_deallocate(filep, offset, len); |
| 158 | else | 163 | return nfs42_proc_allocate(filep, offset, len); |
| 159 | ret = nfs42_proc_allocate(filep, offset, len); | ||
| 160 | mutex_unlock(&inode->i_mutex); | ||
| 161 | |||
| 162 | nfs_zap_caches(inode); | ||
| 163 | return ret; | ||
| 164 | } | 164 | } |
| 165 | #endif /* CONFIG_NFS_V4_2 */ | 165 | #endif /* CONFIG_NFS_V4_2 */ |
| 166 | 166 | ||
diff --git a/fs/nfs/idmap.c b/fs/nfs/nfs4idmap.c index 857e2a99acc8..2e1737c40a29 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/nfs4idmap.c | |||
| @@ -36,7 +36,6 @@ | |||
| 36 | #include <linux/types.h> | 36 | #include <linux/types.h> |
| 37 | #include <linux/parser.h> | 37 | #include <linux/parser.h> |
| 38 | #include <linux/fs.h> | 38 | #include <linux/fs.h> |
| 39 | #include <linux/nfs_idmap.h> | ||
| 40 | #include <net/net_namespace.h> | 39 | #include <net/net_namespace.h> |
| 41 | #include <linux/sunrpc/rpc_pipe_fs.h> | 40 | #include <linux/sunrpc/rpc_pipe_fs.h> |
| 42 | #include <linux/nfs_fs.h> | 41 | #include <linux/nfs_fs.h> |
| @@ -49,6 +48,7 @@ | |||
| 49 | 48 | ||
| 50 | #include "internal.h" | 49 | #include "internal.h" |
| 51 | #include "netns.h" | 50 | #include "netns.h" |
| 51 | #include "nfs4idmap.h" | ||
| 52 | #include "nfs4trace.h" | 52 | #include "nfs4trace.h" |
| 53 | 53 | ||
| 54 | #define NFS_UINT_MAXLEN 11 | 54 | #define NFS_UINT_MAXLEN 11 |
diff --git a/include/linux/nfs_idmap.h b/fs/nfs/nfs4idmap.h index 333844e38f66..de44d7330ab3 100644 --- a/include/linux/nfs_idmap.h +++ b/fs/nfs/nfs4idmap.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * include/linux/nfs_idmap.h | 2 | * fs/nfs/nfs4idmap.h |
| 3 | * | 3 | * |
| 4 | * UID and GID to name mapping for clients. | 4 | * UID and GID to name mapping for clients. |
| 5 | * | 5 | * |
| @@ -46,19 +46,8 @@ struct nfs_server; | |||
| 46 | struct nfs_fattr; | 46 | struct nfs_fattr; |
| 47 | struct nfs4_string; | 47 | struct nfs4_string; |
| 48 | 48 | ||
| 49 | #if IS_ENABLED(CONFIG_NFS_V4) | ||
| 50 | int nfs_idmap_init(void); | 49 | int nfs_idmap_init(void); |
| 51 | void nfs_idmap_quit(void); | 50 | void nfs_idmap_quit(void); |
| 52 | #else | ||
| 53 | static inline int nfs_idmap_init(void) | ||
| 54 | { | ||
| 55 | return 0; | ||
| 56 | } | ||
| 57 | |||
| 58 | static inline void nfs_idmap_quit(void) | ||
| 59 | {} | ||
| 60 | #endif | ||
| 61 | |||
| 62 | int nfs_idmap_new(struct nfs_client *); | 51 | int nfs_idmap_new(struct nfs_client *); |
| 63 | void nfs_idmap_delete(struct nfs_client *); | 52 | void nfs_idmap_delete(struct nfs_client *); |
| 64 | 53 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 98e533f2c94a..45b35b9b1e36 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -51,7 +51,6 @@ | |||
| 51 | #include <linux/namei.h> | 51 | #include <linux/namei.h> |
| 52 | #include <linux/mount.h> | 52 | #include <linux/mount.h> |
| 53 | #include <linux/module.h> | 53 | #include <linux/module.h> |
| 54 | #include <linux/nfs_idmap.h> | ||
| 55 | #include <linux/xattr.h> | 54 | #include <linux/xattr.h> |
| 56 | #include <linux/utsname.h> | 55 | #include <linux/utsname.h> |
| 57 | #include <linux/freezer.h> | 56 | #include <linux/freezer.h> |
| @@ -63,6 +62,7 @@ | |||
| 63 | #include "callback.h" | 62 | #include "callback.h" |
| 64 | #include "pnfs.h" | 63 | #include "pnfs.h" |
| 65 | #include "netns.h" | 64 | #include "netns.h" |
| 65 | #include "nfs4idmap.h" | ||
| 66 | #include "nfs4session.h" | 66 | #include "nfs4session.h" |
| 67 | #include "fscache.h" | 67 | #include "fscache.h" |
| 68 | 68 | ||
| @@ -185,7 +185,8 @@ const u32 nfs4_fattr_bitmap[3] = { | |||
| 185 | | FATTR4_WORD1_SPACE_USED | 185 | | FATTR4_WORD1_SPACE_USED |
| 186 | | FATTR4_WORD1_TIME_ACCESS | 186 | | FATTR4_WORD1_TIME_ACCESS |
| 187 | | FATTR4_WORD1_TIME_METADATA | 187 | | FATTR4_WORD1_TIME_METADATA |
| 188 | | FATTR4_WORD1_TIME_MODIFY, | 188 | | FATTR4_WORD1_TIME_MODIFY |
| 189 | | FATTR4_WORD1_MOUNTED_ON_FILEID, | ||
| 189 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | 190 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL |
| 190 | FATTR4_WORD2_SECURITY_LABEL | 191 | FATTR4_WORD2_SECURITY_LABEL |
| 191 | #endif | 192 | #endif |
| @@ -3095,16 +3096,13 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 3095 | struct nfs_fsinfo *info, | 3096 | struct nfs_fsinfo *info, |
| 3096 | bool auth_probe) | 3097 | bool auth_probe) |
| 3097 | { | 3098 | { |
| 3098 | int status; | 3099 | int status = 0; |
| 3099 | 3100 | ||
| 3100 | switch (auth_probe) { | 3101 | if (!auth_probe) |
| 3101 | case false: | ||
| 3102 | status = nfs4_lookup_root(server, fhandle, info); | 3102 | status = nfs4_lookup_root(server, fhandle, info); |
| 3103 | if (status != -NFS4ERR_WRONGSEC) | 3103 | |
| 3104 | break; | 3104 | if (auth_probe || status == NFS4ERR_WRONGSEC) |
| 3105 | default: | ||
| 3106 | status = nfs4_do_find_root_sec(server, fhandle, info); | 3105 | status = nfs4_do_find_root_sec(server, fhandle, info); |
| 3107 | } | ||
| 3108 | 3106 | ||
| 3109 | if (status == 0) | 3107 | if (status == 0) |
| 3110 | status = nfs4_server_capabilities(server, fhandle); | 3108 | status = nfs4_server_capabilities(server, fhandle); |
| @@ -7944,6 +7942,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, | |||
| 7944 | { | 7942 | { |
| 7945 | struct nfs4_getdeviceinfo_args args = { | 7943 | struct nfs4_getdeviceinfo_args args = { |
| 7946 | .pdev = pdev, | 7944 | .pdev = pdev, |
| 7945 | .notify_types = NOTIFY_DEVICEID4_CHANGE | | ||
| 7946 | NOTIFY_DEVICEID4_DELETE, | ||
| 7947 | }; | 7947 | }; |
| 7948 | struct nfs4_getdeviceinfo_res res = { | 7948 | struct nfs4_getdeviceinfo_res res = { |
| 7949 | .pdev = pdev, | 7949 | .pdev = pdev, |
| @@ -7958,6 +7958,11 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, | |||
| 7958 | 7958 | ||
| 7959 | dprintk("--> %s\n", __func__); | 7959 | dprintk("--> %s\n", __func__); |
| 7960 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | 7960 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
| 7961 | if (res.notification & ~args.notify_types) | ||
| 7962 | dprintk("%s: unsupported notification\n", __func__); | ||
| 7963 | if (res.notification != args.notify_types) | ||
| 7964 | pdev->nocache = 1; | ||
| 7965 | |||
| 7961 | dprintk("<-- %s status=%d\n", __func__, status); | 7966 | dprintk("<-- %s status=%d\n", __func__, status); |
| 7962 | 7967 | ||
| 7963 | return status; | 7968 | return status; |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3b2b20534a3a..2782cfca2265 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
| @@ -42,7 +42,6 @@ | |||
| 42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
| 43 | #include <linux/fs.h> | 43 | #include <linux/fs.h> |
| 44 | #include <linux/nfs_fs.h> | 44 | #include <linux/nfs_fs.h> |
| 45 | #include <linux/nfs_idmap.h> | ||
| 46 | #include <linux/kthread.h> | 45 | #include <linux/kthread.h> |
| 47 | #include <linux/module.h> | 46 | #include <linux/module.h> |
| 48 | #include <linux/random.h> | 47 | #include <linux/random.h> |
| @@ -57,6 +56,7 @@ | |||
| 57 | #include "callback.h" | 56 | #include "callback.h" |
| 58 | #include "delegation.h" | 57 | #include "delegation.h" |
| 59 | #include "internal.h" | 58 | #include "internal.h" |
| 59 | #include "nfs4idmap.h" | ||
| 60 | #include "nfs4session.h" | 60 | #include "nfs4session.h" |
| 61 | #include "pnfs.h" | 61 | #include "pnfs.h" |
| 62 | #include "netns.h" | 62 | #include "netns.h" |
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 75090feeafad..6fb7cb6b3f4b 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
| @@ -3,12 +3,12 @@ | |||
| 3 | */ | 3 | */ |
| 4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
| 5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/nfs_idmap.h> | ||
| 7 | #include <linux/nfs4_mount.h> | 6 | #include <linux/nfs4_mount.h> |
| 8 | #include <linux/nfs_fs.h> | 7 | #include <linux/nfs_fs.h> |
| 9 | #include "delegation.h" | 8 | #include "delegation.h" |
| 10 | #include "internal.h" | 9 | #include "internal.h" |
| 11 | #include "nfs4_fs.h" | 10 | #include "nfs4_fs.h" |
| 11 | #include "nfs4idmap.h" | ||
| 12 | #include "dns_resolve.h" | 12 | #include "dns_resolve.h" |
| 13 | #include "pnfs.h" | 13 | #include "pnfs.h" |
| 14 | #include "nfs.h" | 14 | #include "nfs.h" |
| @@ -91,10 +91,11 @@ static void nfs4_evict_inode(struct inode *inode) | |||
| 91 | { | 91 | { |
| 92 | truncate_inode_pages_final(&inode->i_data); | 92 | truncate_inode_pages_final(&inode->i_data); |
| 93 | clear_inode(inode); | 93 | clear_inode(inode); |
| 94 | pnfs_return_layout(inode); | ||
| 95 | pnfs_destroy_layout(NFS_I(inode)); | ||
| 96 | /* If we are holding a delegation, return it! */ | 94 | /* If we are holding a delegation, return it! */ |
| 97 | nfs_inode_return_delegation_noreclaim(inode); | 95 | nfs_inode_return_delegation_noreclaim(inode); |
| 96 | /* Note that above delegreturn would trigger pnfs return-on-close */ | ||
| 97 | pnfs_return_layout(inode); | ||
| 98 | pnfs_destroy_layout(NFS_I(inode)); | ||
| 98 | /* First call standard NFS clear_inode() code */ | 99 | /* First call standard NFS clear_inode() code */ |
| 99 | nfs_clear_inode(inode); | 100 | nfs_clear_inode(inode); |
| 100 | } | 101 | } |
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index b6ebe7e445f6..0fbd3ab1be22 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c | |||
| @@ -6,10 +6,10 @@ | |||
| 6 | * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com> | 6 | * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com> |
| 7 | */ | 7 | */ |
| 8 | #include <linux/sysctl.h> | 8 | #include <linux/sysctl.h> |
| 9 | #include <linux/nfs_idmap.h> | ||
| 10 | #include <linux/nfs_fs.h> | 9 | #include <linux/nfs_fs.h> |
| 11 | 10 | ||
| 12 | #include "nfs4_fs.h" | 11 | #include "nfs4_fs.h" |
| 12 | #include "nfs4idmap.h" | ||
| 13 | #include "callback.h" | 13 | #include "callback.h" |
| 14 | 14 | ||
| 15 | static const int nfs_set_port_min = 0; | 15 | static const int nfs_set_port_min = 0; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5c399ec41079..0aea97841d30 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
| @@ -52,10 +52,10 @@ | |||
| 52 | #include <linux/nfs.h> | 52 | #include <linux/nfs.h> |
| 53 | #include <linux/nfs4.h> | 53 | #include <linux/nfs4.h> |
| 54 | #include <linux/nfs_fs.h> | 54 | #include <linux/nfs_fs.h> |
| 55 | #include <linux/nfs_idmap.h> | ||
| 56 | 55 | ||
| 57 | #include "nfs4_fs.h" | 56 | #include "nfs4_fs.h" |
| 58 | #include "internal.h" | 57 | #include "internal.h" |
| 58 | #include "nfs4idmap.h" | ||
| 59 | #include "nfs4session.h" | 59 | #include "nfs4session.h" |
| 60 | #include "pnfs.h" | 60 | #include "pnfs.h" |
| 61 | #include "netns.h" | 61 | #include "netns.h" |
| @@ -1920,7 +1920,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr, | |||
| 1920 | 1920 | ||
| 1921 | p = reserve_space(xdr, 4 + 4); | 1921 | p = reserve_space(xdr, 4 + 4); |
| 1922 | *p++ = cpu_to_be32(1); /* bitmap length */ | 1922 | *p++ = cpu_to_be32(1); /* bitmap length */ |
| 1923 | *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE); | 1923 | *p++ = cpu_to_be32(args->notify_types); |
| 1924 | } | 1924 | } |
| 1925 | 1925 | ||
| 1926 | static void | 1926 | static void |
| @@ -5753,8 +5753,9 @@ out_overflow: | |||
| 5753 | 5753 | ||
| 5754 | #if defined(CONFIG_NFS_V4_1) | 5754 | #if defined(CONFIG_NFS_V4_1) |
| 5755 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | 5755 | static int decode_getdeviceinfo(struct xdr_stream *xdr, |
| 5756 | struct pnfs_device *pdev) | 5756 | struct nfs4_getdeviceinfo_res *res) |
| 5757 | { | 5757 | { |
| 5758 | struct pnfs_device *pdev = res->pdev; | ||
| 5758 | __be32 *p; | 5759 | __be32 *p; |
| 5759 | uint32_t len, type; | 5760 | uint32_t len, type; |
| 5760 | int status; | 5761 | int status; |
| @@ -5802,12 +5803,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, | |||
| 5802 | if (unlikely(!p)) | 5803 | if (unlikely(!p)) |
| 5803 | goto out_overflow; | 5804 | goto out_overflow; |
| 5804 | 5805 | ||
| 5805 | if (be32_to_cpup(p++) & | 5806 | res->notification = be32_to_cpup(p++); |
| 5806 | ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) { | ||
| 5807 | dprintk("%s: unsupported notification\n", | ||
| 5808 | __func__); | ||
| 5809 | } | ||
| 5810 | |||
| 5811 | for (i = 1; i < len; i++) { | 5807 | for (i = 1; i < len; i++) { |
| 5812 | if (be32_to_cpup(p++)) { | 5808 | if (be32_to_cpup(p++)) { |
| 5813 | dprintk("%s: unsupported notification\n", | 5809 | dprintk("%s: unsupported notification\n", |
| @@ -7061,7 +7057,7 @@ static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, | |||
| 7061 | status = decode_sequence(xdr, &res->seq_res, rqstp); | 7057 | status = decode_sequence(xdr, &res->seq_res, rqstp); |
| 7062 | if (status != 0) | 7058 | if (status != 0) |
| 7063 | goto out; | 7059 | goto out; |
| 7064 | status = decode_getdeviceinfo(xdr, res->pdev); | 7060 | status = decode_getdeviceinfo(xdr, res); |
| 7065 | out: | 7061 | out: |
| 7066 | return status; | 7062 | return status; |
| 7067 | } | 7063 | } |
| @@ -7365,6 +7361,11 @@ nfs4_stat_to_errno(int stat) | |||
| 7365 | .p_name = #proc, \ | 7361 | .p_name = #proc, \ |
| 7366 | } | 7362 | } |
| 7367 | 7363 | ||
| 7364 | #define STUB(proc) \ | ||
| 7365 | [NFSPROC4_CLNT_##proc] = { \ | ||
| 7366 | .p_name = #proc, \ | ||
| 7367 | } | ||
| 7368 | |||
| 7368 | struct rpc_procinfo nfs4_procedures[] = { | 7369 | struct rpc_procinfo nfs4_procedures[] = { |
| 7369 | PROC(READ, enc_read, dec_read), | 7370 | PROC(READ, enc_read, dec_read), |
| 7370 | PROC(WRITE, enc_write, dec_write), | 7371 | PROC(WRITE, enc_write, dec_write), |
| @@ -7417,6 +7418,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
| 7417 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), | 7418 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), |
| 7418 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), | 7419 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), |
| 7419 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), | 7420 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), |
| 7421 | STUB(GETDEVICELIST), | ||
| 7420 | PROC(BIND_CONN_TO_SESSION, | 7422 | PROC(BIND_CONN_TO_SESSION, |
| 7421 | enc_bind_conn_to_session, dec_bind_conn_to_session), | 7423 | enc_bind_conn_to_session, dec_bind_conn_to_session), |
| 7422 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), | 7424 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), |
diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c index 4eb0aead69b6..c74f7af23d77 100644 --- a/fs/nfs/nfstrace.c +++ b/fs/nfs/nfstrace.c | |||
| @@ -7,3 +7,6 @@ | |||
| 7 | 7 | ||
| 8 | #define CREATE_TRACE_POINTS | 8 | #define CREATE_TRACE_POINTS |
| 9 | #include "nfstrace.h" | 9 | #include "nfstrace.h" |
| 10 | |||
| 11 | EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter); | ||
| 12 | EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit); | ||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 24e1d7403c0b..5aaed363556a 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
| @@ -57,7 +57,7 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
| 57 | 57 | ||
| 58 | dprintk("%s: free od=%p\n", __func__, de->od.od); | 58 | dprintk("%s: free od=%p\n", __func__, de->od.od); |
| 59 | osduld_put_device(de->od.od); | 59 | osduld_put_device(de->od.od); |
| 60 | kfree(de); | 60 | kfree_rcu(d, rcu); |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | struct objio_segment { | 63 | struct objio_segment { |
| @@ -637,6 +637,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { | |||
| 637 | .pg_read_ops = &objio_pg_read_ops, | 637 | .pg_read_ops = &objio_pg_read_ops, |
| 638 | .pg_write_ops = &objio_pg_write_ops, | 638 | .pg_write_ops = &objio_pg_write_ops, |
| 639 | 639 | ||
| 640 | .sync = pnfs_generic_sync, | ||
| 641 | |||
| 640 | .free_deviceid_node = objio_free_deviceid_node, | 642 | .free_deviceid_node = objio_free_deviceid_node, |
| 641 | 643 | ||
| 642 | .encode_layoutcommit = objlayout_encode_layoutcommit, | 644 | .encode_layoutcommit = objlayout_encode_layoutcommit, |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4f802b02fbb9..230606243be6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -1090,6 +1090,7 @@ bool pnfs_roc(struct inode *ino) | |||
| 1090 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ | 1090 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ |
| 1091 | spin_unlock(&ino->i_lock); | 1091 | spin_unlock(&ino->i_lock); |
| 1092 | pnfs_free_lseg_list(&tmp_list); | 1092 | pnfs_free_lseg_list(&tmp_list); |
| 1093 | pnfs_layoutcommit_inode(ino, true); | ||
| 1093 | return true; | 1094 | return true; |
| 1094 | 1095 | ||
| 1095 | out_noroc: | 1096 | out_noroc: |
| @@ -1104,8 +1105,10 @@ out_noroc: | |||
| 1104 | } | 1105 | } |
| 1105 | } | 1106 | } |
| 1106 | spin_unlock(&ino->i_lock); | 1107 | spin_unlock(&ino->i_lock); |
| 1107 | if (layoutreturn) | 1108 | if (layoutreturn) { |
| 1109 | pnfs_layoutcommit_inode(ino, true); | ||
| 1108 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | 1110 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); |
| 1111 | } | ||
| 1109 | return false; | 1112 | return false; |
| 1110 | } | 1113 | } |
| 1111 | 1114 | ||
| @@ -1841,7 +1844,8 @@ void pnfs_ld_write_done(struct nfs_pgio_header *hdr) | |||
| 1841 | { | 1844 | { |
| 1842 | trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); | 1845 | trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); |
| 1843 | if (!hdr->pnfs_error) { | 1846 | if (!hdr->pnfs_error) { |
| 1844 | pnfs_set_layoutcommit(hdr); | 1847 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, |
| 1848 | hdr->mds_offset + hdr->res.count); | ||
| 1845 | hdr->mds_ops->rpc_call_done(&hdr->task, hdr); | 1849 | hdr->mds_ops->rpc_call_done(&hdr->task, hdr); |
| 1846 | } else | 1850 | } else |
| 1847 | pnfs_ld_handle_write_error(hdr); | 1851 | pnfs_ld_handle_write_error(hdr); |
| @@ -1902,7 +1906,6 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | |||
| 1902 | pnfs_put_lseg(hdr->lseg); | 1906 | pnfs_put_lseg(hdr->lseg); |
| 1903 | nfs_pgio_header_free(hdr); | 1907 | nfs_pgio_header_free(hdr); |
| 1904 | } | 1908 | } |
| 1905 | EXPORT_SYMBOL_GPL(pnfs_writehdr_free); | ||
| 1906 | 1909 | ||
| 1907 | int | 1910 | int |
| 1908 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 1911 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) |
| @@ -2032,7 +2035,6 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | |||
| 2032 | pnfs_put_lseg(hdr->lseg); | 2035 | pnfs_put_lseg(hdr->lseg); |
| 2033 | nfs_pgio_header_free(hdr); | 2036 | nfs_pgio_header_free(hdr); |
| 2034 | } | 2037 | } |
| 2035 | EXPORT_SYMBOL_GPL(pnfs_readhdr_free); | ||
| 2036 | 2038 | ||
| 2037 | int | 2039 | int |
| 2038 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 2040 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) |
| @@ -2099,64 +2101,34 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | |||
| 2099 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | 2101 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); |
| 2100 | 2102 | ||
| 2101 | void | 2103 | void |
| 2102 | pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) | 2104 | pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, |
| 2105 | loff_t end_pos) | ||
| 2103 | { | 2106 | { |
| 2104 | struct inode *inode = hdr->inode; | ||
| 2105 | struct nfs_inode *nfsi = NFS_I(inode); | 2107 | struct nfs_inode *nfsi = NFS_I(inode); |
| 2106 | loff_t end_pos = hdr->mds_offset + hdr->res.count; | ||
| 2107 | bool mark_as_dirty = false; | 2108 | bool mark_as_dirty = false; |
| 2108 | 2109 | ||
| 2109 | spin_lock(&inode->i_lock); | 2110 | spin_lock(&inode->i_lock); |
| 2110 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 2111 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
| 2111 | mark_as_dirty = true; | ||
| 2112 | dprintk("%s: Set layoutcommit for inode %lu ", | ||
| 2113 | __func__, inode->i_ino); | ||
| 2114 | } | ||
| 2115 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { | ||
| 2116 | /* references matched in nfs4_layoutcommit_release */ | ||
| 2117 | pnfs_get_lseg(hdr->lseg); | ||
| 2118 | } | ||
| 2119 | if (end_pos > nfsi->layout->plh_lwb) | ||
| 2120 | nfsi->layout->plh_lwb = end_pos; | 2112 | nfsi->layout->plh_lwb = end_pos; |
| 2121 | spin_unlock(&inode->i_lock); | ||
| 2122 | dprintk("%s: lseg %p end_pos %llu\n", | ||
| 2123 | __func__, hdr->lseg, nfsi->layout->plh_lwb); | ||
| 2124 | |||
| 2125 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | ||
| 2126 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | ||
| 2127 | if (mark_as_dirty) | ||
| 2128 | mark_inode_dirty_sync(inode); | ||
| 2129 | } | ||
| 2130 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | ||
| 2131 | |||
| 2132 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) | ||
| 2133 | { | ||
| 2134 | struct inode *inode = data->inode; | ||
| 2135 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 2136 | bool mark_as_dirty = false; | ||
| 2137 | |||
| 2138 | spin_lock(&inode->i_lock); | ||
| 2139 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
| 2140 | mark_as_dirty = true; | 2113 | mark_as_dirty = true; |
| 2141 | dprintk("%s: Set layoutcommit for inode %lu ", | 2114 | dprintk("%s: Set layoutcommit for inode %lu ", |
| 2142 | __func__, inode->i_ino); | 2115 | __func__, inode->i_ino); |
| 2143 | } | 2116 | } else if (end_pos > nfsi->layout->plh_lwb) |
| 2144 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { | 2117 | nfsi->layout->plh_lwb = end_pos; |
| 2118 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { | ||
| 2145 | /* references matched in nfs4_layoutcommit_release */ | 2119 | /* references matched in nfs4_layoutcommit_release */ |
| 2146 | pnfs_get_lseg(data->lseg); | 2120 | pnfs_get_lseg(lseg); |
| 2147 | } | 2121 | } |
| 2148 | if (data->lwb > nfsi->layout->plh_lwb) | ||
| 2149 | nfsi->layout->plh_lwb = data->lwb; | ||
| 2150 | spin_unlock(&inode->i_lock); | 2122 | spin_unlock(&inode->i_lock); |
| 2151 | dprintk("%s: lseg %p end_pos %llu\n", | 2123 | dprintk("%s: lseg %p end_pos %llu\n", |
| 2152 | __func__, data->lseg, nfsi->layout->plh_lwb); | 2124 | __func__, lseg, nfsi->layout->plh_lwb); |
| 2153 | 2125 | ||
| 2154 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | 2126 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one |
| 2155 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | 2127 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ |
| 2156 | if (mark_as_dirty) | 2128 | if (mark_as_dirty) |
| 2157 | mark_inode_dirty_sync(inode); | 2129 | mark_inode_dirty_sync(inode); |
| 2158 | } | 2130 | } |
| 2159 | EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); | 2131 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); |
| 2160 | 2132 | ||
| 2161 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | 2133 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) |
| 2162 | { | 2134 | { |
| @@ -2216,7 +2188,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
| 2216 | pnfs_list_write_lseg(inode, &data->lseg_list); | 2188 | pnfs_list_write_lseg(inode, &data->lseg_list); |
| 2217 | 2189 | ||
| 2218 | end_pos = nfsi->layout->plh_lwb; | 2190 | end_pos = nfsi->layout->plh_lwb; |
| 2219 | nfsi->layout->plh_lwb = 0; | ||
| 2220 | 2191 | ||
| 2221 | nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); | 2192 | nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); |
| 2222 | spin_unlock(&inode->i_lock); | 2193 | spin_unlock(&inode->i_lock); |
| @@ -2233,11 +2204,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
| 2233 | status = ld->prepare_layoutcommit(&data->args); | 2204 | status = ld->prepare_layoutcommit(&data->args); |
| 2234 | if (status) { | 2205 | if (status) { |
| 2235 | spin_lock(&inode->i_lock); | 2206 | spin_lock(&inode->i_lock); |
| 2236 | if (end_pos < nfsi->layout->plh_lwb) | 2207 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); |
| 2208 | if (end_pos > nfsi->layout->plh_lwb) | ||
| 2237 | nfsi->layout->plh_lwb = end_pos; | 2209 | nfsi->layout->plh_lwb = end_pos; |
| 2238 | spin_unlock(&inode->i_lock); | 2210 | spin_unlock(&inode->i_lock); |
| 2239 | put_rpccred(data->cred); | 2211 | put_rpccred(data->cred); |
| 2240 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); | ||
| 2241 | goto clear_layoutcommitting; | 2212 | goto clear_layoutcommitting; |
| 2242 | } | 2213 | } |
| 2243 | } | 2214 | } |
| @@ -2258,6 +2229,13 @@ clear_layoutcommitting: | |||
| 2258 | } | 2229 | } |
| 2259 | EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); | 2230 | EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); |
| 2260 | 2231 | ||
| 2232 | int | ||
| 2233 | pnfs_generic_sync(struct inode *inode, bool datasync) | ||
| 2234 | { | ||
| 2235 | return pnfs_layoutcommit_inode(inode, true); | ||
| 2236 | } | ||
| 2237 | EXPORT_SYMBOL_GPL(pnfs_generic_sync); | ||
| 2238 | |||
| 2261 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | 2239 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) |
| 2262 | { | 2240 | { |
| 2263 | struct nfs4_threshold *thp; | 2241 | struct nfs4_threshold *thp; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 084c9144f86d..1e6308f82fc3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
| @@ -155,6 +155,8 @@ struct pnfs_layoutdriver_type { | |||
| 155 | int how, | 155 | int how, |
| 156 | struct nfs_commit_info *cinfo); | 156 | struct nfs_commit_info *cinfo); |
| 157 | 157 | ||
| 158 | int (*sync)(struct inode *inode, bool datasync); | ||
| 159 | |||
| 158 | /* | 160 | /* |
| 159 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted | 161 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted |
| 160 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS | 162 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS |
| @@ -203,6 +205,7 @@ struct pnfs_device { | |||
| 203 | struct page **pages; | 205 | struct page **pages; |
| 204 | unsigned int pgbase; | 206 | unsigned int pgbase; |
| 205 | unsigned int pglen; /* reply buffer length */ | 207 | unsigned int pglen; /* reply buffer length */ |
| 208 | unsigned char nocache : 1;/* May not be cached */ | ||
| 206 | }; | 209 | }; |
| 207 | 210 | ||
| 208 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 | 211 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 |
| @@ -263,10 +266,11 @@ bool pnfs_roc(struct inode *ino); | |||
| 263 | void pnfs_roc_release(struct inode *ino); | 266 | void pnfs_roc_release(struct inode *ino); |
| 264 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 267 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
| 265 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); | 268 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); |
| 266 | void pnfs_set_layoutcommit(struct nfs_pgio_header *); | 269 | void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); |
| 267 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); | ||
| 268 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 270 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
| 269 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 271 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
| 272 | int pnfs_generic_sync(struct inode *inode, bool datasync); | ||
| 273 | int pnfs_nfs_generic_sync(struct inode *inode, bool datasync); | ||
| 270 | int _pnfs_return_layout(struct inode *); | 274 | int _pnfs_return_layout(struct inode *); |
| 271 | int pnfs_commit_and_return_layout(struct inode *); | 275 | int pnfs_commit_and_return_layout(struct inode *); |
| 272 | void pnfs_ld_write_done(struct nfs_pgio_header *); | 276 | void pnfs_ld_write_done(struct nfs_pgio_header *); |
| @@ -291,6 +295,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, | |||
| 291 | enum { | 295 | enum { |
| 292 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ | 296 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ |
| 293 | NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ | 297 | NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ |
| 298 | NFS_DEVICEID_NOCACHE, /* device may not be cached */ | ||
| 294 | }; | 299 | }; |
| 295 | 300 | ||
| 296 | /* pnfs_dev.c */ | 301 | /* pnfs_dev.c */ |
| @@ -302,6 +307,7 @@ struct nfs4_deviceid_node { | |||
| 302 | unsigned long flags; | 307 | unsigned long flags; |
| 303 | unsigned long timestamp_unavailable; | 308 | unsigned long timestamp_unavailable; |
| 304 | struct nfs4_deviceid deviceid; | 309 | struct nfs4_deviceid deviceid; |
| 310 | struct rcu_head rcu; | ||
| 305 | atomic_t ref; | 311 | atomic_t ref; |
| 306 | }; | 312 | }; |
| 307 | 313 | ||
| @@ -486,6 +492,14 @@ pnfs_ld_read_whole_page(struct inode *inode) | |||
| 486 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; | 492 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; |
| 487 | } | 493 | } |
| 488 | 494 | ||
| 495 | static inline int | ||
| 496 | pnfs_sync_inode(struct inode *inode, bool datasync) | ||
| 497 | { | ||
| 498 | if (!pnfs_enabled_sb(NFS_SERVER(inode))) | ||
| 499 | return 0; | ||
| 500 | return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync); | ||
| 501 | } | ||
| 502 | |||
| 489 | static inline bool | 503 | static inline bool |
| 490 | pnfs_layoutcommit_outstanding(struct inode *inode) | 504 | pnfs_layoutcommit_outstanding(struct inode *inode) |
| 491 | { | 505 | { |
| @@ -568,6 +582,12 @@ pnfs_ld_read_whole_page(struct inode *inode) | |||
| 568 | return false; | 582 | return false; |
| 569 | } | 583 | } |
| 570 | 584 | ||
| 585 | static inline int | ||
| 586 | pnfs_sync_inode(struct inode *inode, bool datasync) | ||
| 587 | { | ||
| 588 | return 0; | ||
| 589 | } | ||
| 590 | |||
| 571 | static inline bool | 591 | static inline bool |
| 572 | pnfs_roc(struct inode *ino) | 592 | pnfs_roc(struct inode *ino) |
| 573 | { | 593 | { |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index aa2ec0015183..2961fcd7a2df 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
| @@ -149,6 +149,8 @@ nfs4_get_device_info(struct nfs_server *server, | |||
| 149 | */ | 149 | */ |
| 150 | d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, | 150 | d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, |
| 151 | gfp_flags); | 151 | gfp_flags); |
| 152 | if (d && pdev->nocache) | ||
| 153 | set_bit(NFS_DEVICEID_NOCACHE, &d->flags); | ||
| 152 | 154 | ||
| 153 | out_free_pages: | 155 | out_free_pages: |
| 154 | for (i = 0; i < max_pages; i++) | 156 | for (i = 0; i < max_pages; i++) |
| @@ -175,8 +177,8 @@ __nfs4_find_get_deviceid(struct nfs_server *server, | |||
| 175 | rcu_read_lock(); | 177 | rcu_read_lock(); |
| 176 | d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, | 178 | d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, |
| 177 | hash); | 179 | hash); |
| 178 | if (d != NULL) | 180 | if (d != NULL && !atomic_inc_not_zero(&d->ref)) |
| 179 | atomic_inc(&d->ref); | 181 | d = NULL; |
| 180 | rcu_read_unlock(); | 182 | rcu_read_unlock(); |
| 181 | return d; | 183 | return d; |
| 182 | } | 184 | } |
| @@ -235,12 +237,11 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
| 235 | return; | 237 | return; |
| 236 | } | 238 | } |
| 237 | hlist_del_init_rcu(&d->node); | 239 | hlist_del_init_rcu(&d->node); |
| 240 | clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); | ||
| 238 | spin_unlock(&nfs4_deviceid_lock); | 241 | spin_unlock(&nfs4_deviceid_lock); |
| 239 | synchronize_rcu(); | ||
| 240 | 242 | ||
| 241 | /* balance the initial ref set in pnfs_insert_deviceid */ | 243 | /* balance the initial ref set in pnfs_insert_deviceid */ |
| 242 | if (atomic_dec_and_test(&d->ref)) | 244 | nfs4_put_deviceid_node(d); |
| 243 | d->ld->free_deviceid_node(d); | ||
| 244 | } | 245 | } |
| 245 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | 246 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); |
| 246 | 247 | ||
| @@ -271,6 +272,11 @@ EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); | |||
| 271 | bool | 272 | bool |
| 272 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) | 273 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) |
| 273 | { | 274 | { |
| 275 | if (test_bit(NFS_DEVICEID_NOCACHE, &d->flags)) { | ||
| 276 | if (atomic_add_unless(&d->ref, -1, 2)) | ||
| 277 | return false; | ||
| 278 | nfs4_delete_deviceid(d->ld, d->nfs_client, &d->deviceid); | ||
| 279 | } | ||
| 274 | if (!atomic_dec_and_test(&d->ref)) | 280 | if (!atomic_dec_and_test(&d->ref)) |
| 275 | return false; | 281 | return false; |
| 276 | d->ld->free_deviceid_node(d); | 282 | d->ld->free_deviceid_node(d); |
| @@ -314,6 +320,7 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash) | |||
| 314 | if (d->nfs_client == clp && atomic_read(&d->ref)) { | 320 | if (d->nfs_client == clp && atomic_read(&d->ref)) { |
| 315 | hlist_del_init_rcu(&d->node); | 321 | hlist_del_init_rcu(&d->node); |
| 316 | hlist_add_head(&d->tmpnode, &tmp); | 322 | hlist_add_head(&d->tmpnode, &tmp); |
| 323 | clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); | ||
| 317 | } | 324 | } |
| 318 | rcu_read_unlock(); | 325 | rcu_read_unlock(); |
| 319 | spin_unlock(&nfs4_deviceid_lock); | 326 | spin_unlock(&nfs4_deviceid_lock); |
| @@ -321,12 +328,10 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash) | |||
| 321 | if (hlist_empty(&tmp)) | 328 | if (hlist_empty(&tmp)) |
| 322 | return; | 329 | return; |
| 323 | 330 | ||
| 324 | synchronize_rcu(); | ||
| 325 | while (!hlist_empty(&tmp)) { | 331 | while (!hlist_empty(&tmp)) { |
| 326 | d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); | 332 | d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); |
| 327 | hlist_del(&d->tmpnode); | 333 | hlist_del(&d->tmpnode); |
| 328 | if (atomic_dec_and_test(&d->ref)) | 334 | nfs4_put_deviceid_node(d); |
| 329 | d->ld->free_deviceid_node(d); | ||
| 330 | } | 335 | } |
| 331 | } | 336 | } |
| 332 | 337 | ||
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 54e36b38fb5f..f37e25b6311c 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c | |||
| @@ -561,7 +561,7 @@ static bool load_v3_ds_connect(void) | |||
| 561 | return(get_v3_ds_connect != NULL); | 561 | return(get_v3_ds_connect != NULL); |
| 562 | } | 562 | } |
| 563 | 563 | ||
| 564 | void __exit nfs4_pnfs_v3_ds_connect_unload(void) | 564 | void nfs4_pnfs_v3_ds_connect_unload(void) |
| 565 | { | 565 | { |
| 566 | if (get_v3_ds_connect) { | 566 | if (get_v3_ds_connect) { |
| 567 | symbol_put(nfs3_set_ds_client); | 567 | symbol_put(nfs3_set_ds_client); |
| @@ -868,3 +868,13 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, | |||
| 868 | nfs_request_add_commit_list(req, list, cinfo); | 868 | nfs_request_add_commit_list(req, list, cinfo); |
| 869 | } | 869 | } |
| 870 | EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); | 870 | EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); |
| 871 | |||
| 872 | int | ||
| 873 | pnfs_nfs_generic_sync(struct inode *inode, bool datasync) | ||
| 874 | { | ||
| 875 | if (datasync) | ||
| 876 | return 0; | ||
| 877 | return pnfs_layoutcommit_inode(inode, true); | ||
| 878 | } | ||
| 879 | EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync); | ||
| 880 | |||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a5b7427c3754..ae0ff7a11b40 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
| @@ -284,7 +284,7 @@ int nfs_readpage(struct file *file, struct page *page) | |||
| 284 | dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", | 284 | dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", |
| 285 | page, PAGE_CACHE_SIZE, page_file_index(page)); | 285 | page, PAGE_CACHE_SIZE, page_file_index(page)); |
| 286 | nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); | 286 | nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); |
| 287 | nfs_inc_stats(inode, NFSIOS_READPAGES); | 287 | nfs_add_stats(inode, NFSIOS_READPAGES, 1); |
| 288 | 288 | ||
| 289 | /* | 289 | /* |
| 290 | * Try to flush any pending writes to the file.. | 290 | * Try to flush any pending writes to the file.. |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 21f8f52bf37d..f175b833b6ba 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -43,7 +43,6 @@ | |||
| 43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
| 44 | #include <linux/mount.h> | 44 | #include <linux/mount.h> |
| 45 | #include <linux/namei.h> | 45 | #include <linux/namei.h> |
| 46 | #include <linux/nfs_idmap.h> | ||
| 47 | #include <linux/vfs.h> | 46 | #include <linux/vfs.h> |
| 48 | #include <linux/inet.h> | 47 | #include <linux/inet.h> |
| 49 | #include <linux/in6.h> | 48 | #include <linux/in6.h> |
| @@ -2193,7 +2192,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, | |||
| 2193 | data->version != nfss->nfs_client->rpc_ops->version || | 2192 | data->version != nfss->nfs_client->rpc_ops->version || |
| 2194 | data->minorversion != nfss->nfs_client->cl_minorversion || | 2193 | data->minorversion != nfss->nfs_client->cl_minorversion || |
| 2195 | data->retrans != nfss->client->cl_timeout->to_retries || | 2194 | data->retrans != nfss->client->cl_timeout->to_retries || |
| 2196 | data->selected_flavor != nfss->client->cl_auth->au_flavor || | 2195 | !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) || |
| 2197 | data->acregmin != nfss->acregmin / HZ || | 2196 | data->acregmin != nfss->acregmin / HZ || |
| 2198 | data->acregmax != nfss->acregmax / HZ || | 2197 | data->acregmax != nfss->acregmax / HZ || |
| 2199 | data->acdirmin != nfss->acdirmin / HZ || | 2198 | data->acdirmin != nfss->acdirmin / HZ || |
| @@ -2241,7 +2240,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) | |||
| 2241 | data->wsize = nfss->wsize; | 2240 | data->wsize = nfss->wsize; |
| 2242 | data->retrans = nfss->client->cl_timeout->to_retries; | 2241 | data->retrans = nfss->client->cl_timeout->to_retries; |
| 2243 | data->selected_flavor = nfss->client->cl_auth->au_flavor; | 2242 | data->selected_flavor = nfss->client->cl_auth->au_flavor; |
| 2244 | data->auth_info = nfss->auth_info; | ||
| 2245 | data->acregmin = nfss->acregmin / HZ; | 2243 | data->acregmin = nfss->acregmin / HZ; |
| 2246 | data->acregmax = nfss->acregmax / HZ; | 2244 | data->acregmax = nfss->acregmax / HZ; |
| 2247 | data->acdirmin = nfss->acdirmin / HZ; | 2245 | data->acdirmin = nfss->acdirmin / HZ; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3612b4622337..d12a4be613a5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -580,7 +580,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st | |||
| 580 | int ret; | 580 | int ret; |
| 581 | 581 | ||
| 582 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); | 582 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); |
| 583 | nfs_inc_stats(inode, NFSIOS_WRITEPAGES); | 583 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); |
| 584 | 584 | ||
| 585 | nfs_pageio_cond_complete(pgio, page_file_index(page)); | 585 | nfs_pageio_cond_complete(pgio, page_file_index(page)); |
| 586 | ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); | 586 | ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); |
| @@ -1840,17 +1840,16 @@ EXPORT_SYMBOL_GPL(nfs_write_inode); | |||
| 1840 | */ | 1840 | */ |
| 1841 | int nfs_wb_all(struct inode *inode) | 1841 | int nfs_wb_all(struct inode *inode) |
| 1842 | { | 1842 | { |
| 1843 | struct writeback_control wbc = { | ||
| 1844 | .sync_mode = WB_SYNC_ALL, | ||
| 1845 | .nr_to_write = LONG_MAX, | ||
| 1846 | .range_start = 0, | ||
| 1847 | .range_end = LLONG_MAX, | ||
| 1848 | }; | ||
| 1849 | int ret; | 1843 | int ret; |
| 1850 | 1844 | ||
| 1851 | trace_nfs_writeback_inode_enter(inode); | 1845 | trace_nfs_writeback_inode_enter(inode); |
| 1852 | 1846 | ||
| 1853 | ret = sync_inode(inode, &wbc); | 1847 | ret = filemap_write_and_wait(inode->i_mapping); |
| 1848 | if (!ret) { | ||
| 1849 | ret = nfs_commit_inode(inode, FLUSH_SYNC); | ||
| 1850 | if (!ret) | ||
| 1851 | pnfs_sync_inode(inode, true); | ||
| 1852 | } | ||
| 1854 | 1853 | ||
| 1855 | trace_nfs_writeback_inode_exit(inode, ret); | 1854 | trace_nfs_writeback_inode_exit(inode, ret); |
| 1856 | return ret; | 1855 | return ret; |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 410abd172feb..b95f914ce083 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
| @@ -511,6 +511,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned | |||
| 511 | * Try to write back everything synchronously (but check the | 511 | * Try to write back everything synchronously (but check the |
| 512 | * return value!) | 512 | * return value!) |
| 513 | */ | 513 | */ |
| 514 | extern int nfs_sync_inode(struct inode *inode); | ||
| 514 | extern int nfs_wb_all(struct inode *inode); | 515 | extern int nfs_wb_all(struct inode *inode); |
| 515 | extern int nfs_wb_page(struct inode *inode, struct page* page); | 516 | extern int nfs_wb_page(struct inode *inode, struct page* page); |
| 516 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); | 517 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4cb3eaa89cf7..93ab6071bbe9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
| @@ -255,11 +255,13 @@ struct nfs4_layoutget { | |||
| 255 | struct nfs4_getdeviceinfo_args { | 255 | struct nfs4_getdeviceinfo_args { |
| 256 | struct nfs4_sequence_args seq_args; | 256 | struct nfs4_sequence_args seq_args; |
| 257 | struct pnfs_device *pdev; | 257 | struct pnfs_device *pdev; |
| 258 | __u32 notify_types; | ||
| 258 | }; | 259 | }; |
| 259 | 260 | ||
| 260 | struct nfs4_getdeviceinfo_res { | 261 | struct nfs4_getdeviceinfo_res { |
| 261 | struct nfs4_sequence_res seq_res; | 262 | struct nfs4_sequence_res seq_res; |
| 262 | struct pnfs_device *pdev; | 263 | struct pnfs_device *pdev; |
| 264 | __u32 notification; | ||
| 263 | }; | 265 | }; |
| 264 | 266 | ||
| 265 | struct nfs4_layoutcommit_args { | 267 | struct nfs4_layoutcommit_args { |
| @@ -1271,11 +1273,15 @@ struct nfs42_falloc_args { | |||
| 1271 | nfs4_stateid falloc_stateid; | 1273 | nfs4_stateid falloc_stateid; |
| 1272 | u64 falloc_offset; | 1274 | u64 falloc_offset; |
| 1273 | u64 falloc_length; | 1275 | u64 falloc_length; |
| 1276 | const u32 *falloc_bitmask; | ||
| 1274 | }; | 1277 | }; |
| 1275 | 1278 | ||
| 1276 | struct nfs42_falloc_res { | 1279 | struct nfs42_falloc_res { |
| 1277 | struct nfs4_sequence_res seq_res; | 1280 | struct nfs4_sequence_res seq_res; |
| 1278 | unsigned int status; | 1281 | unsigned int status; |
| 1282 | |||
| 1283 | struct nfs_fattr *falloc_fattr; | ||
| 1284 | const struct nfs_server *falloc_server; | ||
| 1279 | }; | 1285 | }; |
| 1280 | 1286 | ||
| 1281 | struct nfs42_seek_args { | 1287 | struct nfs42_seek_args { |
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index aadc6a04e1ac..807371357160 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h | |||
| @@ -142,12 +142,18 @@ typedef __be32 rpc_fraghdr; | |||
| 142 | (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) | 142 | (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) |
| 143 | 143 | ||
| 144 | /* | 144 | /* |
| 145 | * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. | 145 | * Well-known netids. See: |
| 146 | * | ||
| 147 | * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml | ||
| 146 | */ | 148 | */ |
| 147 | #define RPCBIND_NETID_UDP "udp" | 149 | #define RPCBIND_NETID_UDP "udp" |
| 148 | #define RPCBIND_NETID_TCP "tcp" | 150 | #define RPCBIND_NETID_TCP "tcp" |
| 151 | #define RPCBIND_NETID_RDMA "rdma" | ||
| 152 | #define RPCBIND_NETID_SCTP "sctp" | ||
| 149 | #define RPCBIND_NETID_UDP6 "udp6" | 153 | #define RPCBIND_NETID_UDP6 "udp6" |
| 150 | #define RPCBIND_NETID_TCP6 "tcp6" | 154 | #define RPCBIND_NETID_TCP6 "tcp6" |
| 155 | #define RPCBIND_NETID_RDMA6 "rdma6" | ||
| 156 | #define RPCBIND_NETID_SCTP6 "sctp6" | ||
| 151 | #define RPCBIND_NETID_LOCAL "local" | 157 | #define RPCBIND_NETID_LOCAL "local" |
| 152 | 158 | ||
| 153 | /* | 159 | /* |
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 64a0a0a97b23..c984c85981ea 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h | |||
| @@ -41,11 +41,6 @@ | |||
| 41 | #define _LINUX_SUNRPC_XPRTRDMA_H | 41 | #define _LINUX_SUNRPC_XPRTRDMA_H |
| 42 | 42 | ||
| 43 | /* | 43 | /* |
| 44 | * rpcbind (v3+) RDMA netid. | ||
| 45 | */ | ||
| 46 | #define RPCBIND_NETID_RDMA "rdma" | ||
| 47 | |||
| 48 | /* | ||
| 49 | * Constants. Max RPC/NFS header is big enough to account for | 44 | * Constants. Max RPC/NFS header is big enough to account for |
| 50 | * additional marshaling buffers passed down by Linux client. | 45 | * additional marshaling buffers passed down by Linux client. |
| 51 | * | 46 | * |
diff --git a/include/uapi/linux/nfs_idmap.h b/include/uapi/linux/nfs_idmap.h index 8d4b1c7b24d4..038e36c96669 100644 --- a/include/uapi/linux/nfs_idmap.h +++ b/include/uapi/linux/nfs_idmap.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * include/linux/nfs_idmap.h | 2 | * include/uapi/linux/nfs_idmap.h |
| 3 | * | 3 | * |
| 4 | * UID and GID to name mapping for clients. | 4 | * UID and GID to name mapping for clients. |
| 5 | * | 5 | * |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b91fd9c597b4..337ca851a350 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
| @@ -89,8 +89,8 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) | |||
| 89 | if (!task->tk_timeout) | 89 | if (!task->tk_timeout) |
| 90 | return; | 90 | return; |
| 91 | 91 | ||
| 92 | dprintk("RPC: %5u setting alarm for %lu ms\n", | 92 | dprintk("RPC: %5u setting alarm for %u ms\n", |
| 93 | task->tk_pid, task->tk_timeout * 1000 / HZ); | 93 | task->tk_pid, jiffies_to_msecs(task->tk_timeout)); |
| 94 | 94 | ||
| 95 | task->u.tk_wait.expires = jiffies + task->tk_timeout; | 95 | task->u.tk_wait.expires = jiffies + task->tk_timeout; |
| 96 | if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) | 96 | if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9949722d99ce..1d4fe24af06a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
| @@ -326,6 +326,15 @@ out_unlock: | |||
| 326 | xprt_clear_locked(xprt); | 326 | xprt_clear_locked(xprt); |
| 327 | } | 327 | } |
| 328 | 328 | ||
| 329 | static void xprt_task_clear_bytes_sent(struct rpc_task *task) | ||
| 330 | { | ||
| 331 | if (task != NULL) { | ||
| 332 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 333 | if (req != NULL) | ||
| 334 | req->rq_bytes_sent = 0; | ||
| 335 | } | ||
| 336 | } | ||
| 337 | |||
| 329 | /** | 338 | /** |
| 330 | * xprt_release_xprt - allow other requests to use a transport | 339 | * xprt_release_xprt - allow other requests to use a transport |
| 331 | * @xprt: transport with other tasks potentially waiting | 340 | * @xprt: transport with other tasks potentially waiting |
| @@ -336,11 +345,7 @@ out_unlock: | |||
| 336 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) | 345 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) |
| 337 | { | 346 | { |
| 338 | if (xprt->snd_task == task) { | 347 | if (xprt->snd_task == task) { |
| 339 | if (task != NULL) { | 348 | xprt_task_clear_bytes_sent(task); |
| 340 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 341 | if (req != NULL) | ||
| 342 | req->rq_bytes_sent = 0; | ||
| 343 | } | ||
| 344 | xprt_clear_locked(xprt); | 349 | xprt_clear_locked(xprt); |
| 345 | __xprt_lock_write_next(xprt); | 350 | __xprt_lock_write_next(xprt); |
| 346 | } | 351 | } |
| @@ -358,11 +363,7 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt); | |||
| 358 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) | 363 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) |
| 359 | { | 364 | { |
| 360 | if (xprt->snd_task == task) { | 365 | if (xprt->snd_task == task) { |
| 361 | if (task != NULL) { | 366 | xprt_task_clear_bytes_sent(task); |
| 362 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 363 | if (req != NULL) | ||
| 364 | req->rq_bytes_sent = 0; | ||
| 365 | } | ||
| 366 | xprt_clear_locked(xprt); | 367 | xprt_clear_locked(xprt); |
| 367 | __xprt_lock_write_next_cong(xprt); | 368 | __xprt_lock_write_next_cong(xprt); |
| 368 | } | 369 | } |
| @@ -700,6 +701,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt, | |||
| 700 | goto out; | 701 | goto out; |
| 701 | if (xprt->snd_task != task) | 702 | if (xprt->snd_task != task) |
| 702 | goto out; | 703 | goto out; |
| 704 | xprt_task_clear_bytes_sent(task); | ||
| 703 | xprt->snd_task = cookie; | 705 | xprt->snd_task = cookie; |
| 704 | ret = true; | 706 | ret = true; |
| 705 | out: | 707 | out: |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index da5136fd5694..579f72bbcf4b 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o |
| 2 | 2 | ||
| 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o \ |
| 4 | fmr_ops.o frwr_ops.o physical_ops.o | ||
| 4 | 5 | ||
| 5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o | 6 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o |
| 6 | 7 | ||
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c new file mode 100644 index 000000000000..302d4ebf6fbf --- /dev/null +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
| 3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
| 4 | */ | ||
| 5 | |||
| 6 | /* Lightweight memory registration using Fast Memory Regions (FMR). | ||
| 7 | * Referred to sometimes as MTHCAFMR mode. | ||
| 8 | * | ||
| 9 | * FMR uses synchronous memory registration and deregistration. | ||
| 10 | * FMR registration is known to be fast, but FMR deregistration | ||
| 11 | * can take tens of usecs to complete. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include "xprt_rdma.h" | ||
| 15 | |||
| 16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
| 17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
| 18 | #endif | ||
| 19 | |||
| 20 | /* Maximum scatter/gather per FMR */ | ||
| 21 | #define RPCRDMA_MAX_FMR_SGES (64) | ||
| 22 | |||
| 23 | static int | ||
| 24 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
| 25 | struct rpcrdma_create_data_internal *cdata) | ||
| 26 | { | ||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | |||
| 30 | /* FMR mode conveys up to 64 pages of payload per chunk segment. | ||
| 31 | */ | ||
| 32 | static size_t | ||
| 33 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
| 34 | { | ||
| 35 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
| 36 | rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES); | ||
| 37 | } | ||
| 38 | |||
| 39 | static int | ||
| 40 | fmr_op_init(struct rpcrdma_xprt *r_xprt) | ||
| 41 | { | ||
| 42 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
| 43 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
| 44 | struct ib_fmr_attr fmr_attr = { | ||
| 45 | .max_pages = RPCRDMA_MAX_FMR_SGES, | ||
| 46 | .max_maps = 1, | ||
| 47 | .page_shift = PAGE_SHIFT | ||
| 48 | }; | ||
| 49 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
| 50 | struct rpcrdma_mw *r; | ||
| 51 | int i, rc; | ||
| 52 | |||
| 53 | INIT_LIST_HEAD(&buf->rb_mws); | ||
| 54 | INIT_LIST_HEAD(&buf->rb_all); | ||
| 55 | |||
| 56 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
| 57 | dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); | ||
| 58 | |||
| 59 | while (i--) { | ||
| 60 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
| 61 | if (!r) | ||
| 62 | return -ENOMEM; | ||
| 63 | |||
| 64 | r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); | ||
| 65 | if (IS_ERR(r->r.fmr)) | ||
| 66 | goto out_fmr_err; | ||
| 67 | |||
| 68 | list_add(&r->mw_list, &buf->rb_mws); | ||
| 69 | list_add(&r->mw_all, &buf->rb_all); | ||
| 70 | } | ||
| 71 | return 0; | ||
| 72 | |||
| 73 | out_fmr_err: | ||
| 74 | rc = PTR_ERR(r->r.fmr); | ||
| 75 | dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); | ||
| 76 | kfree(r); | ||
| 77 | return rc; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* Use the ib_map_phys_fmr() verb to register a memory region | ||
| 81 | * for remote access via RDMA READ or RDMA WRITE. | ||
| 82 | */ | ||
| 83 | static int | ||
| 84 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
| 85 | int nsegs, bool writing) | ||
| 86 | { | ||
| 87 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 88 | struct ib_device *device = ia->ri_id->device; | ||
| 89 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
| 90 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 91 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
| 92 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
| 93 | int len, pageoff, i, rc; | ||
| 94 | |||
| 95 | pageoff = offset_in_page(seg1->mr_offset); | ||
| 96 | seg1->mr_offset -= pageoff; /* start of page */ | ||
| 97 | seg1->mr_len += pageoff; | ||
| 98 | len = -pageoff; | ||
| 99 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | ||
| 100 | nsegs = RPCRDMA_MAX_FMR_SGES; | ||
| 101 | for (i = 0; i < nsegs;) { | ||
| 102 | rpcrdma_map_one(device, seg, direction); | ||
| 103 | physaddrs[i] = seg->mr_dma; | ||
| 104 | len += seg->mr_len; | ||
| 105 | ++seg; | ||
| 106 | ++i; | ||
| 107 | /* Check for holes */ | ||
| 108 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
| 109 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | |||
| 113 | rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
| 114 | if (rc) | ||
| 115 | goto out_maperr; | ||
| 116 | |||
| 117 | seg1->mr_rkey = mw->r.fmr->rkey; | ||
| 118 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
| 119 | seg1->mr_nsegs = i; | ||
| 120 | seg1->mr_len = len; | ||
| 121 | return i; | ||
| 122 | |||
| 123 | out_maperr: | ||
| 124 | dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | ||
| 125 | __func__, len, (unsigned long long)seg1->mr_dma, | ||
| 126 | pageoff, i, rc); | ||
| 127 | while (i--) | ||
| 128 | rpcrdma_unmap_one(device, --seg); | ||
| 129 | return rc; | ||
| 130 | } | ||
| 131 | |||
| 132 | /* Use the ib_unmap_fmr() verb to prevent further remote | ||
| 133 | * access via RDMA READ or RDMA WRITE. | ||
| 134 | */ | ||
| 135 | static int | ||
| 136 | fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
| 137 | { | ||
| 138 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 139 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 140 | struct ib_device *device; | ||
| 141 | int rc, nsegs = seg->mr_nsegs; | ||
| 142 | LIST_HEAD(l); | ||
| 143 | |||
| 144 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
| 145 | rc = ib_unmap_fmr(&l); | ||
| 146 | read_lock(&ia->ri_qplock); | ||
| 147 | device = ia->ri_id->device; | ||
| 148 | while (seg1->mr_nsegs--) | ||
| 149 | rpcrdma_unmap_one(device, seg++); | ||
| 150 | read_unlock(&ia->ri_qplock); | ||
| 151 | if (rc) | ||
| 152 | goto out_err; | ||
| 153 | return nsegs; | ||
| 154 | |||
| 155 | out_err: | ||
| 156 | dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); | ||
| 157 | return nsegs; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* After a disconnect, unmap all FMRs. | ||
| 161 | * | ||
| 162 | * This is invoked only in the transport connect worker in order | ||
| 163 | * to serialize with rpcrdma_register_fmr_external(). | ||
| 164 | */ | ||
| 165 | static void | ||
| 166 | fmr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
| 167 | { | ||
| 168 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
| 169 | struct rpcrdma_mw *r; | ||
| 170 | LIST_HEAD(list); | ||
| 171 | int rc; | ||
| 172 | |||
| 173 | list_for_each_entry(r, &buf->rb_all, mw_all) | ||
| 174 | list_add(&r->r.fmr->list, &list); | ||
| 175 | |||
| 176 | rc = ib_unmap_fmr(&list); | ||
| 177 | if (rc) | ||
| 178 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
| 179 | __func__, rc); | ||
| 180 | } | ||
| 181 | |||
| 182 | static void | ||
| 183 | fmr_op_destroy(struct rpcrdma_buffer *buf) | ||
| 184 | { | ||
| 185 | struct rpcrdma_mw *r; | ||
| 186 | int rc; | ||
| 187 | |||
| 188 | while (!list_empty(&buf->rb_all)) { | ||
| 189 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
| 190 | list_del(&r->mw_all); | ||
| 191 | rc = ib_dealloc_fmr(r->r.fmr); | ||
| 192 | if (rc) | ||
| 193 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
| 194 | __func__, rc); | ||
| 195 | kfree(r); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | ||
| 200 | .ro_map = fmr_op_map, | ||
| 201 | .ro_unmap = fmr_op_unmap, | ||
| 202 | .ro_open = fmr_op_open, | ||
| 203 | .ro_maxpages = fmr_op_maxpages, | ||
| 204 | .ro_init = fmr_op_init, | ||
| 205 | .ro_reset = fmr_op_reset, | ||
| 206 | .ro_destroy = fmr_op_destroy, | ||
| 207 | .ro_displayname = "fmr", | ||
| 208 | }; | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c new file mode 100644 index 000000000000..dff0481dbcf8 --- /dev/null +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
| @@ -0,0 +1,353 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
| 3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
| 4 | */ | ||
| 5 | |||
| 6 | /* Lightweight memory registration using Fast Registration Work | ||
| 7 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | ||
| 8 | * | ||
| 9 | * FRWR features ordered asynchronous registration and deregistration | ||
| 10 | * of arbitrarily sized memory regions. This is the fastest and safest | ||
| 11 | * but most complex memory registration mode. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include "xprt_rdma.h" | ||
| 15 | |||
| 16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
| 17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
| 18 | #endif | ||
| 19 | |||
| 20 | static int | ||
| 21 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, | ||
| 22 | unsigned int depth) | ||
| 23 | { | ||
| 24 | struct rpcrdma_frmr *f = &r->r.frmr; | ||
| 25 | int rc; | ||
| 26 | |||
| 27 | f->fr_mr = ib_alloc_fast_reg_mr(pd, depth); | ||
| 28 | if (IS_ERR(f->fr_mr)) | ||
| 29 | goto out_mr_err; | ||
| 30 | f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); | ||
| 31 | if (IS_ERR(f->fr_pgl)) | ||
| 32 | goto out_list_err; | ||
| 33 | return 0; | ||
| 34 | |||
| 35 | out_mr_err: | ||
| 36 | rc = PTR_ERR(f->fr_mr); | ||
| 37 | dprintk("RPC: %s: ib_alloc_fast_reg_mr status %i\n", | ||
| 38 | __func__, rc); | ||
| 39 | return rc; | ||
| 40 | |||
| 41 | out_list_err: | ||
| 42 | rc = PTR_ERR(f->fr_pgl); | ||
| 43 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", | ||
| 44 | __func__, rc); | ||
| 45 | ib_dereg_mr(f->fr_mr); | ||
| 46 | return rc; | ||
| 47 | } | ||
| 48 | |||
| 49 | static void | ||
| 50 | __frwr_release(struct rpcrdma_mw *r) | ||
| 51 | { | ||
| 52 | int rc; | ||
| 53 | |||
| 54 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 55 | if (rc) | ||
| 56 | dprintk("RPC: %s: ib_dereg_mr status %i\n", | ||
| 57 | __func__, rc); | ||
| 58 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
| 59 | } | ||
| 60 | |||
| 61 | static int | ||
| 62 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
| 63 | struct rpcrdma_create_data_internal *cdata) | ||
| 64 | { | ||
| 65 | struct ib_device_attr *devattr = &ia->ri_devattr; | ||
| 66 | int depth, delta; | ||
| 67 | |||
| 68 | ia->ri_max_frmr_depth = | ||
| 69 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
| 70 | devattr->max_fast_reg_page_list_len); | ||
| 71 | dprintk("RPC: %s: device's max FR page list len = %u\n", | ||
| 72 | __func__, ia->ri_max_frmr_depth); | ||
| 73 | |||
| 74 | /* Add room for frmr register and invalidate WRs. | ||
| 75 | * 1. FRMR reg WR for head | ||
| 76 | * 2. FRMR invalidate WR for head | ||
| 77 | * 3. N FRMR reg WRs for pagelist | ||
| 78 | * 4. N FRMR invalidate WRs for pagelist | ||
| 79 | * 5. FRMR reg WR for tail | ||
| 80 | * 6. FRMR invalidate WR for tail | ||
| 81 | * 7. The RDMA_SEND WR | ||
| 82 | */ | ||
| 83 | depth = 7; | ||
| 84 | |||
| 85 | /* Calculate N if the device max FRMR depth is smaller than | ||
| 86 | * RPCRDMA_MAX_DATA_SEGS. | ||
| 87 | */ | ||
| 88 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
| 89 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | ||
| 90 | do { | ||
| 91 | depth += 2; /* FRMR reg + invalidate */ | ||
| 92 | delta -= ia->ri_max_frmr_depth; | ||
| 93 | } while (delta > 0); | ||
| 94 | } | ||
| 95 | |||
| 96 | ep->rep_attr.cap.max_send_wr *= depth; | ||
| 97 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
| 98 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
| 99 | if (!cdata->max_requests) | ||
| 100 | return -EINVAL; | ||
| 101 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
| 102 | depth; | ||
| 103 | } | ||
| 104 | |||
| 105 | return 0; | ||
| 106 | } | ||
| 107 | |||
| 108 | /* FRWR mode conveys a list of pages per chunk segment. The | ||
| 109 | * maximum length of that list is the FRWR page list depth. | ||
| 110 | */ | ||
| 111 | static size_t | ||
| 112 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
| 113 | { | ||
| 114 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 115 | |||
| 116 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
| 117 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | ||
| 118 | } | ||
| 119 | |||
| 120 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ | ||
| 121 | static void | ||
| 122 | frwr_sendcompletion(struct ib_wc *wc) | ||
| 123 | { | ||
| 124 | struct rpcrdma_mw *r; | ||
| 125 | |||
| 126 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
| 127 | return; | ||
| 128 | |||
| 129 | /* WARNING: Only wr_id and status are reliable at this point */ | ||
| 130 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
| 131 | dprintk("RPC: %s: frmr %p (stale), status %d\n", | ||
| 132 | __func__, r, wc->status); | ||
| 133 | r->r.frmr.fr_state = FRMR_IS_STALE; | ||
| 134 | } | ||
| 135 | |||
| 136 | static int | ||
| 137 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | ||
| 138 | { | ||
| 139 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
| 140 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
| 141 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
| 142 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
| 143 | int i; | ||
| 144 | |||
| 145 | INIT_LIST_HEAD(&buf->rb_mws); | ||
| 146 | INIT_LIST_HEAD(&buf->rb_all); | ||
| 147 | |||
| 148 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
| 149 | dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); | ||
| 150 | |||
| 151 | while (i--) { | ||
| 152 | struct rpcrdma_mw *r; | ||
| 153 | int rc; | ||
| 154 | |||
| 155 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
| 156 | if (!r) | ||
| 157 | return -ENOMEM; | ||
| 158 | |||
| 159 | rc = __frwr_init(r, pd, device, depth); | ||
| 160 | if (rc) { | ||
| 161 | kfree(r); | ||
| 162 | return rc; | ||
| 163 | } | ||
| 164 | |||
| 165 | list_add(&r->mw_list, &buf->rb_mws); | ||
| 166 | list_add(&r->mw_all, &buf->rb_all); | ||
| 167 | r->mw_sendcompletion = frwr_sendcompletion; | ||
| 168 | } | ||
| 169 | |||
| 170 | return 0; | ||
| 171 | } | ||
| 172 | |||
| 173 | /* Post a FAST_REG Work Request to register a memory region | ||
| 174 | * for remote access via RDMA READ or RDMA WRITE. | ||
| 175 | */ | ||
| 176 | static int | ||
| 177 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
| 178 | int nsegs, bool writing) | ||
| 179 | { | ||
| 180 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 181 | struct ib_device *device = ia->ri_id->device; | ||
| 182 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
| 183 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 184 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
| 185 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
| 186 | struct ib_mr *mr = frmr->fr_mr; | ||
| 187 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
| 188 | u8 key; | ||
| 189 | int len, pageoff; | ||
| 190 | int i, rc; | ||
| 191 | int seg_len; | ||
| 192 | u64 pa; | ||
| 193 | int page_no; | ||
| 194 | |||
| 195 | pageoff = offset_in_page(seg1->mr_offset); | ||
| 196 | seg1->mr_offset -= pageoff; /* start of page */ | ||
| 197 | seg1->mr_len += pageoff; | ||
| 198 | len = -pageoff; | ||
| 199 | if (nsegs > ia->ri_max_frmr_depth) | ||
| 200 | nsegs = ia->ri_max_frmr_depth; | ||
| 201 | for (page_no = i = 0; i < nsegs;) { | ||
| 202 | rpcrdma_map_one(device, seg, direction); | ||
| 203 | pa = seg->mr_dma; | ||
| 204 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
| 205 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
| 206 | pa += PAGE_SIZE; | ||
| 207 | } | ||
| 208 | len += seg->mr_len; | ||
| 209 | ++seg; | ||
| 210 | ++i; | ||
| 211 | /* Check for holes */ | ||
| 212 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
| 213 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
| 214 | break; | ||
| 215 | } | ||
| 216 | dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", | ||
| 217 | __func__, mw, i, len); | ||
| 218 | |||
| 219 | frmr->fr_state = FRMR_IS_VALID; | ||
| 220 | |||
| 221 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
| 222 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
| 223 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
| 224 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; | ||
| 225 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
| 226 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
| 227 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
| 228 | fastreg_wr.wr.fast_reg.length = len; | ||
| 229 | fastreg_wr.wr.fast_reg.access_flags = writing ? | ||
| 230 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
| 231 | IB_ACCESS_REMOTE_READ; | ||
| 232 | key = (u8)(mr->rkey & 0x000000FF); | ||
| 233 | ib_update_fast_reg_key(mr, ++key); | ||
| 234 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
| 235 | |||
| 236 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 237 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
| 238 | if (rc) | ||
| 239 | goto out_senderr; | ||
| 240 | |||
| 241 | seg1->mr_rkey = mr->rkey; | ||
| 242 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
| 243 | seg1->mr_nsegs = i; | ||
| 244 | seg1->mr_len = len; | ||
| 245 | return i; | ||
| 246 | |||
| 247 | out_senderr: | ||
| 248 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
| 249 | ib_update_fast_reg_key(mr, --key); | ||
| 250 | frmr->fr_state = FRMR_IS_INVALID; | ||
| 251 | while (i--) | ||
| 252 | rpcrdma_unmap_one(device, --seg); | ||
| 253 | return rc; | ||
| 254 | } | ||
| 255 | |||
| 256 | /* Post a LOCAL_INV Work Request to prevent further remote access | ||
| 257 | * via RDMA READ or RDMA WRITE. | ||
| 258 | */ | ||
| 259 | static int | ||
| 260 | frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
| 261 | { | ||
| 262 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 263 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 264 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
| 265 | int rc, nsegs = seg->mr_nsegs; | ||
| 266 | struct ib_device *device; | ||
| 267 | |||
| 268 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
| 269 | |||
| 270 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | ||
| 271 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
| 272 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
| 273 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
| 274 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 275 | |||
| 276 | read_lock(&ia->ri_qplock); | ||
| 277 | device = ia->ri_id->device; | ||
| 278 | while (seg1->mr_nsegs--) | ||
| 279 | rpcrdma_unmap_one(device, seg++); | ||
| 280 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
| 281 | read_unlock(&ia->ri_qplock); | ||
| 282 | if (rc) | ||
| 283 | goto out_err; | ||
| 284 | return nsegs; | ||
| 285 | |||
| 286 | out_err: | ||
| 287 | /* Force rpcrdma_buffer_get() to retry */ | ||
| 288 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
| 289 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
| 290 | return nsegs; | ||
| 291 | } | ||
| 292 | |||
| 293 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
| 294 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
| 295 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
| 296 | * also torn down. | ||
| 297 | * | ||
| 298 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
| 299 | * | ||
| 300 | * This is invoked only in the transport connect worker in order | ||
| 301 | * to serialize with rpcrdma_register_frmr_external(). | ||
| 302 | */ | ||
| 303 | static void | ||
| 304 | frwr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
| 305 | { | ||
| 306 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
| 307 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
| 308 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
| 309 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
| 310 | struct rpcrdma_mw *r; | ||
| 311 | int rc; | ||
| 312 | |||
| 313 | list_for_each_entry(r, &buf->rb_all, mw_all) { | ||
| 314 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
| 315 | continue; | ||
| 316 | |||
| 317 | __frwr_release(r); | ||
| 318 | rc = __frwr_init(r, pd, device, depth); | ||
| 319 | if (rc) { | ||
| 320 | dprintk("RPC: %s: mw %p left %s\n", | ||
| 321 | __func__, r, | ||
| 322 | (r->r.frmr.fr_state == FRMR_IS_STALE ? | ||
| 323 | "stale" : "valid")); | ||
| 324 | continue; | ||
| 325 | } | ||
| 326 | |||
| 327 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
| 328 | } | ||
| 329 | } | ||
| 330 | |||
| 331 | static void | ||
| 332 | frwr_op_destroy(struct rpcrdma_buffer *buf) | ||
| 333 | { | ||
| 334 | struct rpcrdma_mw *r; | ||
| 335 | |||
| 336 | while (!list_empty(&buf->rb_all)) { | ||
| 337 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
| 338 | list_del(&r->mw_all); | ||
| 339 | __frwr_release(r); | ||
| 340 | kfree(r); | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | ||
| 345 | .ro_map = frwr_op_map, | ||
| 346 | .ro_unmap = frwr_op_unmap, | ||
| 347 | .ro_open = frwr_op_open, | ||
| 348 | .ro_maxpages = frwr_op_maxpages, | ||
| 349 | .ro_init = frwr_op_init, | ||
| 350 | .ro_reset = frwr_op_reset, | ||
| 351 | .ro_destroy = frwr_op_destroy, | ||
| 352 | .ro_displayname = "frwr", | ||
| 353 | }; | ||
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c new file mode 100644 index 000000000000..ba518af16787 --- /dev/null +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
| 3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
| 4 | */ | ||
| 5 | |||
| 6 | /* No-op chunk preparation. All client memory is pre-registered. | ||
| 7 | * Sometimes referred to as ALLPHYSICAL mode. | ||
| 8 | * | ||
| 9 | * Physical registration is simple because all client memory is | ||
| 10 | * pre-registered and never deregistered. This mode is good for | ||
| 11 | * adapter bring up, but is considered not safe: the server is | ||
| 12 | * trusted not to abuse its access to client memory not involved | ||
| 13 | * in RDMA I/O. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include "xprt_rdma.h" | ||
| 17 | |||
| 18 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
| 19 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
| 20 | #endif | ||
| 21 | |||
| 22 | static int | ||
| 23 | physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
| 24 | struct rpcrdma_create_data_internal *cdata) | ||
| 25 | { | ||
| 26 | return 0; | ||
| 27 | } | ||
| 28 | |||
| 29 | /* PHYSICAL memory registration conveys one page per chunk segment. | ||
| 30 | */ | ||
| 31 | static size_t | ||
| 32 | physical_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
| 33 | { | ||
| 34 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
| 35 | rpcrdma_max_segments(r_xprt)); | ||
| 36 | } | ||
| 37 | |||
| 38 | static int | ||
| 39 | physical_op_init(struct rpcrdma_xprt *r_xprt) | ||
| 40 | { | ||
| 41 | return 0; | ||
| 42 | } | ||
| 43 | |||
| 44 | /* The client's physical memory is already exposed for | ||
| 45 | * remote access via RDMA READ or RDMA WRITE. | ||
| 46 | */ | ||
| 47 | static int | ||
| 48 | physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
| 49 | int nsegs, bool writing) | ||
| 50 | { | ||
| 51 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 52 | |||
| 53 | rpcrdma_map_one(ia->ri_id->device, seg, | ||
| 54 | rpcrdma_data_dir(writing)); | ||
| 55 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
| 56 | seg->mr_base = seg->mr_dma; | ||
| 57 | seg->mr_nsegs = 1; | ||
| 58 | return 1; | ||
| 59 | } | ||
| 60 | |||
| 61 | /* Unmap a memory region, but leave it registered. | ||
| 62 | */ | ||
| 63 | static int | ||
| 64 | physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
| 65 | { | ||
| 66 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 67 | |||
| 68 | read_lock(&ia->ri_qplock); | ||
| 69 | rpcrdma_unmap_one(ia->ri_id->device, seg); | ||
| 70 | read_unlock(&ia->ri_qplock); | ||
| 71 | |||
| 72 | return 1; | ||
| 73 | } | ||
| 74 | |||
| 75 | static void | ||
| 76 | physical_op_reset(struct rpcrdma_xprt *r_xprt) | ||
| 77 | { | ||
| 78 | } | ||
| 79 | |||
| 80 | static void | ||
| 81 | physical_op_destroy(struct rpcrdma_buffer *buf) | ||
| 82 | { | ||
| 83 | } | ||
| 84 | |||
| 85 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | ||
| 86 | .ro_map = physical_op_map, | ||
| 87 | .ro_unmap = physical_op_unmap, | ||
| 88 | .ro_open = physical_op_open, | ||
| 89 | .ro_maxpages = physical_op_maxpages, | ||
| 90 | .ro_init = physical_op_init, | ||
| 91 | .ro_reset = physical_op_reset, | ||
| 92 | .ro_destroy = physical_op_destroy, | ||
| 93 | .ro_displayname = "physical", | ||
| 94 | }; | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 91ffde82fa0c..2c53ea9e1b83 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
| @@ -53,6 +53,14 @@ | |||
| 53 | # define RPCDBG_FACILITY RPCDBG_TRANS | 53 | # define RPCDBG_FACILITY RPCDBG_TRANS |
| 54 | #endif | 54 | #endif |
| 55 | 55 | ||
| 56 | enum rpcrdma_chunktype { | ||
| 57 | rpcrdma_noch = 0, | ||
| 58 | rpcrdma_readch, | ||
| 59 | rpcrdma_areadch, | ||
| 60 | rpcrdma_writech, | ||
| 61 | rpcrdma_replych | ||
| 62 | }; | ||
| 63 | |||
| 56 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 64 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
| 57 | static const char transfertypes[][12] = { | 65 | static const char transfertypes[][12] = { |
| 58 | "pure inline", /* no chunks */ | 66 | "pure inline", /* no chunks */ |
| @@ -179,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 179 | struct rpcrdma_write_array *warray = NULL; | 187 | struct rpcrdma_write_array *warray = NULL; |
| 180 | struct rpcrdma_write_chunk *cur_wchunk = NULL; | 188 | struct rpcrdma_write_chunk *cur_wchunk = NULL; |
| 181 | __be32 *iptr = headerp->rm_body.rm_chunks; | 189 | __be32 *iptr = headerp->rm_body.rm_chunks; |
| 190 | int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool); | ||
| 182 | 191 | ||
| 183 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { | 192 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { |
| 184 | /* a read chunk - server will RDMA Read our memory */ | 193 | /* a read chunk - server will RDMA Read our memory */ |
| @@ -201,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 201 | if (nsegs < 0) | 210 | if (nsegs < 0) |
| 202 | return nsegs; | 211 | return nsegs; |
| 203 | 212 | ||
| 213 | map = r_xprt->rx_ia.ri_ops->ro_map; | ||
| 204 | do { | 214 | do { |
| 205 | n = rpcrdma_register_external(seg, nsegs, | 215 | n = map(r_xprt, seg, nsegs, cur_wchunk != NULL); |
| 206 | cur_wchunk != NULL, r_xprt); | ||
| 207 | if (n <= 0) | 216 | if (n <= 0) |
| 208 | goto out; | 217 | goto out; |
| 209 | if (cur_rchunk) { /* read */ | 218 | if (cur_rchunk) { /* read */ |
| @@ -275,34 +284,13 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 275 | return (unsigned char *)iptr - (unsigned char *)headerp; | 284 | return (unsigned char *)iptr - (unsigned char *)headerp; |
| 276 | 285 | ||
| 277 | out: | 286 | out: |
| 278 | if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) { | 287 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) |
| 279 | for (pos = 0; nchunks--;) | 288 | return n; |
| 280 | pos += rpcrdma_deregister_external( | ||
| 281 | &req->rl_segments[pos], r_xprt); | ||
| 282 | } | ||
| 283 | return n; | ||
| 284 | } | ||
| 285 | 289 | ||
| 286 | /* | 290 | for (pos = 0; nchunks--;) |
| 287 | * Marshal chunks. This routine returns the header length | 291 | pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
| 288 | * consumed by marshaling. | 292 | &req->rl_segments[pos]); |
| 289 | * | 293 | return n; |
| 290 | * Returns positive RPC/RDMA header size, or negative errno. | ||
| 291 | */ | ||
| 292 | |||
| 293 | ssize_t | ||
| 294 | rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) | ||
| 295 | { | ||
| 296 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
| 297 | struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); | ||
| 298 | |||
| 299 | if (req->rl_rtype != rpcrdma_noch) | ||
| 300 | result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
| 301 | headerp, req->rl_rtype); | ||
| 302 | else if (req->rl_wtype != rpcrdma_noch) | ||
| 303 | result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
| 304 | headerp, req->rl_wtype); | ||
| 305 | return result; | ||
| 306 | } | 294 | } |
| 307 | 295 | ||
| 308 | /* | 296 | /* |
| @@ -397,6 +385,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 397 | char *base; | 385 | char *base; |
| 398 | size_t rpclen, padlen; | 386 | size_t rpclen, padlen; |
| 399 | ssize_t hdrlen; | 387 | ssize_t hdrlen; |
| 388 | enum rpcrdma_chunktype rtype, wtype; | ||
| 400 | struct rpcrdma_msg *headerp; | 389 | struct rpcrdma_msg *headerp; |
| 401 | 390 | ||
| 402 | /* | 391 | /* |
| @@ -433,13 +422,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 433 | * into pages; otherwise use reply chunks. | 422 | * into pages; otherwise use reply chunks. |
| 434 | */ | 423 | */ |
| 435 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) | 424 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) |
| 436 | req->rl_wtype = rpcrdma_noch; | 425 | wtype = rpcrdma_noch; |
| 437 | else if (rqst->rq_rcv_buf.page_len == 0) | 426 | else if (rqst->rq_rcv_buf.page_len == 0) |
| 438 | req->rl_wtype = rpcrdma_replych; | 427 | wtype = rpcrdma_replych; |
| 439 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 428 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) |
| 440 | req->rl_wtype = rpcrdma_writech; | 429 | wtype = rpcrdma_writech; |
| 441 | else | 430 | else |
| 442 | req->rl_wtype = rpcrdma_replych; | 431 | wtype = rpcrdma_replych; |
| 443 | 432 | ||
| 444 | /* | 433 | /* |
| 445 | * Chunks needed for arguments? | 434 | * Chunks needed for arguments? |
| @@ -456,16 +445,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 456 | * TBD check NFSv4 setacl | 445 | * TBD check NFSv4 setacl |
| 457 | */ | 446 | */ |
| 458 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | 447 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) |
| 459 | req->rl_rtype = rpcrdma_noch; | 448 | rtype = rpcrdma_noch; |
| 460 | else if (rqst->rq_snd_buf.page_len == 0) | 449 | else if (rqst->rq_snd_buf.page_len == 0) |
| 461 | req->rl_rtype = rpcrdma_areadch; | 450 | rtype = rpcrdma_areadch; |
| 462 | else | 451 | else |
| 463 | req->rl_rtype = rpcrdma_readch; | 452 | rtype = rpcrdma_readch; |
| 464 | 453 | ||
| 465 | /* The following simplification is not true forever */ | 454 | /* The following simplification is not true forever */ |
| 466 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych) | 455 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) |
| 467 | req->rl_wtype = rpcrdma_noch; | 456 | wtype = rpcrdma_noch; |
| 468 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) { | 457 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { |
| 469 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | 458 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", |
| 470 | __func__); | 459 | __func__); |
| 471 | return -EIO; | 460 | return -EIO; |
| @@ -479,7 +468,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 479 | * When padding is in use and applies to the transfer, insert | 468 | * When padding is in use and applies to the transfer, insert |
| 480 | * it and change the message type. | 469 | * it and change the message type. |
| 481 | */ | 470 | */ |
| 482 | if (req->rl_rtype == rpcrdma_noch) { | 471 | if (rtype == rpcrdma_noch) { |
| 483 | 472 | ||
| 484 | padlen = rpcrdma_inline_pullup(rqst, | 473 | padlen = rpcrdma_inline_pullup(rqst, |
| 485 | RPCRDMA_INLINE_PAD_VALUE(rqst)); | 474 | RPCRDMA_INLINE_PAD_VALUE(rqst)); |
| @@ -494,7 +483,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 494 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | 483 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; |
| 495 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | 484 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; |
| 496 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | 485 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ |
| 497 | if (req->rl_wtype != rpcrdma_noch) { | 486 | if (wtype != rpcrdma_noch) { |
| 498 | dprintk("RPC: %s: invalid chunk list\n", | 487 | dprintk("RPC: %s: invalid chunk list\n", |
| 499 | __func__); | 488 | __func__); |
| 500 | return -EIO; | 489 | return -EIO; |
| @@ -515,18 +504,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 515 | * on receive. Therefore, we request a reply chunk | 504 | * on receive. Therefore, we request a reply chunk |
| 516 | * for non-writes wherever feasible and efficient. | 505 | * for non-writes wherever feasible and efficient. |
| 517 | */ | 506 | */ |
| 518 | if (req->rl_wtype == rpcrdma_noch) | 507 | if (wtype == rpcrdma_noch) |
| 519 | req->rl_wtype = rpcrdma_replych; | 508 | wtype = rpcrdma_replych; |
| 520 | } | 509 | } |
| 521 | } | 510 | } |
| 522 | 511 | ||
| 523 | hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen); | 512 | if (rtype != rpcrdma_noch) { |
| 513 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
| 514 | headerp, rtype); | ||
| 515 | wtype = rtype; /* simplify dprintk */ | ||
| 516 | |||
| 517 | } else if (wtype != rpcrdma_noch) { | ||
| 518 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
| 519 | headerp, wtype); | ||
| 520 | } | ||
| 524 | if (hdrlen < 0) | 521 | if (hdrlen < 0) |
| 525 | return hdrlen; | 522 | return hdrlen; |
| 526 | 523 | ||
| 527 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" | 524 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
| 528 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 525 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
| 529 | __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, | 526 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
| 530 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); | 527 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); |
| 531 | 528 | ||
| 532 | /* | 529 | /* |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2e192baa59f3..54f23b1be986 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
| @@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = { | |||
| 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ |
| 158 | 158 | ||
| 159 | static void | 159 | static void |
| 160 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
| 161 | { | ||
| 162 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
| 163 | char buf[20]; | ||
| 164 | |||
| 165 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
| 166 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
| 167 | |||
| 168 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; | ||
| 169 | } | ||
| 170 | |||
| 171 | static void | ||
| 172 | xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
| 173 | { | ||
| 174 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
| 175 | char buf[40]; | ||
| 176 | |||
| 177 | snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); | ||
| 178 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
| 179 | |||
| 180 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; | ||
| 181 | } | ||
| 182 | |||
| 183 | static void | ||
| 160 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) | 184 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) |
| 161 | { | 185 | { |
| 162 | struct sockaddr *sap = (struct sockaddr *) | 186 | struct sockaddr *sap = (struct sockaddr *) |
| 163 | &rpcx_to_rdmad(xprt).addr; | 187 | &rpcx_to_rdmad(xprt).addr; |
| 164 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | 188 | char buf[128]; |
| 165 | char buf[64]; | 189 | |
| 190 | switch (sap->sa_family) { | ||
| 191 | case AF_INET: | ||
| 192 | xprt_rdma_format_addresses4(xprt, sap); | ||
| 193 | break; | ||
| 194 | case AF_INET6: | ||
| 195 | xprt_rdma_format_addresses6(xprt, sap); | ||
| 196 | break; | ||
| 197 | default: | ||
| 198 | pr_err("rpcrdma: Unrecognized address family\n"); | ||
| 199 | return; | ||
| 200 | } | ||
| 166 | 201 | ||
| 167 | (void)rpc_ntop(sap, buf, sizeof(buf)); | 202 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
| 168 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); | 203 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); |
| @@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) | |||
| 170 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); | 205 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); |
| 171 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | 206 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); |
| 172 | 207 | ||
| 173 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; | ||
| 174 | |||
| 175 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
| 176 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
| 177 | |||
| 178 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); | 208 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); |
| 179 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | 209 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); |
| 180 | 210 | ||
| 181 | /* netid */ | 211 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
| 182 | xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; | ||
| 183 | } | 212 | } |
| 184 | 213 | ||
| 185 | static void | 214 | static void |
| @@ -377,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args) | |||
| 377 | xprt_rdma_connect_worker); | 406 | xprt_rdma_connect_worker); |
| 378 | 407 | ||
| 379 | xprt_rdma_format_addresses(xprt); | 408 | xprt_rdma_format_addresses(xprt); |
| 380 | xprt->max_payload = rpcrdma_max_payload(new_xprt); | 409 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
| 410 | if (xprt->max_payload == 0) | ||
| 411 | goto out4; | ||
| 412 | xprt->max_payload <<= PAGE_SHIFT; | ||
| 381 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", | 413 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", |
| 382 | __func__, xprt->max_payload); | 414 | __func__, xprt->max_payload); |
| 383 | 415 | ||
| @@ -552,8 +584,8 @@ xprt_rdma_free(void *buffer) | |||
| 552 | 584 | ||
| 553 | for (i = 0; req->rl_nchunks;) { | 585 | for (i = 0; req->rl_nchunks;) { |
| 554 | --req->rl_nchunks; | 586 | --req->rl_nchunks; |
| 555 | i += rpcrdma_deregister_external( | 587 | i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
| 556 | &req->rl_segments[i], r_xprt); | 588 | &req->rl_segments[i]); |
| 557 | } | 589 | } |
| 558 | 590 | ||
| 559 | rpcrdma_buffer_put(req); | 591 | rpcrdma_buffer_put(req); |
| @@ -579,10 +611,7 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
| 579 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 611 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 580 | int rc = 0; | 612 | int rc = 0; |
| 581 | 613 | ||
| 582 | if (req->rl_niovs == 0) | 614 | rc = rpcrdma_marshal_req(rqst); |
| 583 | rc = rpcrdma_marshal_req(rqst); | ||
| 584 | else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) | ||
| 585 | rc = rpcrdma_marshal_chunks(rqst, 0); | ||
| 586 | if (rc < 0) | 615 | if (rc < 0) |
| 587 | goto failed_marshal; | 616 | goto failed_marshal; |
| 588 | 617 | ||
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e28909fddd30..4870d272e006 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
| @@ -50,6 +50,7 @@ | |||
| 50 | #include <linux/interrupt.h> | 50 | #include <linux/interrupt.h> |
| 51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
| 52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
| 53 | #include <linux/sunrpc/addr.h> | ||
| 53 | #include <asm/bitops.h> | 54 | #include <asm/bitops.h> |
| 54 | 55 | ||
| 55 | #include "xprt_rdma.h" | 56 | #include "xprt_rdma.h" |
| @@ -62,9 +63,6 @@ | |||
| 62 | # define RPCDBG_FACILITY RPCDBG_TRANS | 63 | # define RPCDBG_FACILITY RPCDBG_TRANS |
| 63 | #endif | 64 | #endif |
| 64 | 65 | ||
| 65 | static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); | ||
| 66 | static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); | ||
| 67 | |||
| 68 | /* | 66 | /* |
| 69 | * internal functions | 67 | * internal functions |
| 70 | */ | 68 | */ |
| @@ -188,7 +186,7 @@ static const char * const wc_status[] = { | |||
| 188 | "remote access error", | 186 | "remote access error", |
| 189 | "remote operation error", | 187 | "remote operation error", |
| 190 | "transport retry counter exceeded", | 188 | "transport retry counter exceeded", |
| 191 | "RNR retrycounter exceeded", | 189 | "RNR retry counter exceeded", |
| 192 | "local RDD violation error", | 190 | "local RDD violation error", |
| 193 | "remove invalid RD request", | 191 | "remove invalid RD request", |
| 194 | "operation aborted", | 192 | "operation aborted", |
| @@ -206,21 +204,17 @@ static const char * const wc_status[] = { | |||
| 206 | static void | 204 | static void |
| 207 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) | 205 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) |
| 208 | { | 206 | { |
| 209 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
| 210 | return; | ||
| 211 | |||
| 212 | /* WARNING: Only wr_id and status are reliable at this point */ | 207 | /* WARNING: Only wr_id and status are reliable at this point */ |
| 213 | if (wc->wr_id == 0ULL) { | 208 | if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) { |
| 214 | if (wc->status != IB_WC_WR_FLUSH_ERR) | 209 | if (wc->status != IB_WC_SUCCESS && |
| 210 | wc->status != IB_WC_WR_FLUSH_ERR) | ||
| 215 | pr_err("RPC: %s: SEND: %s\n", | 211 | pr_err("RPC: %s: SEND: %s\n", |
| 216 | __func__, COMPLETION_MSG(wc->status)); | 212 | __func__, COMPLETION_MSG(wc->status)); |
| 217 | } else { | 213 | } else { |
| 218 | struct rpcrdma_mw *r; | 214 | struct rpcrdma_mw *r; |
| 219 | 215 | ||
| 220 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | 216 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
| 221 | r->r.frmr.fr_state = FRMR_IS_STALE; | 217 | r->mw_sendcompletion(wc); |
| 222 | pr_err("RPC: %s: frmr %p (stale): %s\n", | ||
| 223 | __func__, r, COMPLETION_MSG(wc->status)); | ||
| 224 | } | 218 | } |
| 225 | } | 219 | } |
| 226 | 220 | ||
| @@ -424,7 +418,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
| 424 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 418 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
| 425 | struct rpcrdma_ep *ep = &xprt->rx_ep; | 419 | struct rpcrdma_ep *ep = &xprt->rx_ep; |
| 426 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 420 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
| 427 | struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; | 421 | struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; |
| 428 | #endif | 422 | #endif |
| 429 | struct ib_qp_attr *attr = &ia->ri_qp_attr; | 423 | struct ib_qp_attr *attr = &ia->ri_qp_attr; |
| 430 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; | 424 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; |
| @@ -480,9 +474,8 @@ connected: | |||
| 480 | wake_up_all(&ep->rep_connect_wait); | 474 | wake_up_all(&ep->rep_connect_wait); |
| 481 | /*FALLTHROUGH*/ | 475 | /*FALLTHROUGH*/ |
| 482 | default: | 476 | default: |
| 483 | dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n", | 477 | dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", |
| 484 | __func__, &addr->sin_addr.s_addr, | 478 | __func__, sap, rpc_get_port(sap), ep, |
| 485 | ntohs(addr->sin_port), ep, | ||
| 486 | CONNECTION_MSG(event->event)); | 479 | CONNECTION_MSG(event->event)); |
| 487 | break; | 480 | break; |
| 488 | } | 481 | } |
| @@ -491,19 +484,16 @@ connected: | |||
| 491 | if (connstate == 1) { | 484 | if (connstate == 1) { |
| 492 | int ird = attr->max_dest_rd_atomic; | 485 | int ird = attr->max_dest_rd_atomic; |
| 493 | int tird = ep->rep_remote_cma.responder_resources; | 486 | int tird = ep->rep_remote_cma.responder_resources; |
| 494 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u " | 487 | |
| 495 | "on %s, memreg %d slots %d ird %d%s\n", | 488 | pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", |
| 496 | &addr->sin_addr.s_addr, | 489 | sap, rpc_get_port(sap), |
| 497 | ntohs(addr->sin_port), | ||
| 498 | ia->ri_id->device->name, | 490 | ia->ri_id->device->name, |
| 499 | ia->ri_memreg_strategy, | 491 | ia->ri_ops->ro_displayname, |
| 500 | xprt->rx_buf.rb_max_requests, | 492 | xprt->rx_buf.rb_max_requests, |
| 501 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | 493 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); |
| 502 | } else if (connstate < 0) { | 494 | } else if (connstate < 0) { |
| 503 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", | 495 | pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n", |
| 504 | &addr->sin_addr.s_addr, | 496 | sap, rpc_get_port(sap), connstate); |
| 505 | ntohs(addr->sin_port), | ||
| 506 | connstate); | ||
| 507 | } | 497 | } |
| 508 | #endif | 498 | #endif |
| 509 | 499 | ||
| @@ -621,17 +611,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 621 | 611 | ||
| 622 | if (memreg == RPCRDMA_FRMR) { | 612 | if (memreg == RPCRDMA_FRMR) { |
| 623 | /* Requires both frmr reg and local dma lkey */ | 613 | /* Requires both frmr reg and local dma lkey */ |
| 624 | if ((devattr->device_cap_flags & | 614 | if (((devattr->device_cap_flags & |
| 625 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | 615 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != |
| 626 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | 616 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) || |
| 617 | (devattr->max_fast_reg_page_list_len == 0)) { | ||
| 627 | dprintk("RPC: %s: FRMR registration " | 618 | dprintk("RPC: %s: FRMR registration " |
| 628 | "not supported by HCA\n", __func__); | 619 | "not supported by HCA\n", __func__); |
| 629 | memreg = RPCRDMA_MTHCAFMR; | 620 | memreg = RPCRDMA_MTHCAFMR; |
| 630 | } else { | ||
| 631 | /* Mind the ia limit on FRMR page list depth */ | ||
| 632 | ia->ri_max_frmr_depth = min_t(unsigned int, | ||
| 633 | RPCRDMA_MAX_DATA_SEGS, | ||
| 634 | devattr->max_fast_reg_page_list_len); | ||
| 635 | } | 621 | } |
| 636 | } | 622 | } |
| 637 | if (memreg == RPCRDMA_MTHCAFMR) { | 623 | if (memreg == RPCRDMA_MTHCAFMR) { |
| @@ -652,13 +638,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 652 | */ | 638 | */ |
| 653 | switch (memreg) { | 639 | switch (memreg) { |
| 654 | case RPCRDMA_FRMR: | 640 | case RPCRDMA_FRMR: |
| 641 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | ||
| 655 | break; | 642 | break; |
| 656 | case RPCRDMA_ALLPHYSICAL: | 643 | case RPCRDMA_ALLPHYSICAL: |
| 644 | ia->ri_ops = &rpcrdma_physical_memreg_ops; | ||
| 657 | mem_priv = IB_ACCESS_LOCAL_WRITE | | 645 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
| 658 | IB_ACCESS_REMOTE_WRITE | | 646 | IB_ACCESS_REMOTE_WRITE | |
| 659 | IB_ACCESS_REMOTE_READ; | 647 | IB_ACCESS_REMOTE_READ; |
| 660 | goto register_setup; | 648 | goto register_setup; |
| 661 | case RPCRDMA_MTHCAFMR: | 649 | case RPCRDMA_MTHCAFMR: |
| 650 | ia->ri_ops = &rpcrdma_fmr_memreg_ops; | ||
| 662 | if (ia->ri_have_dma_lkey) | 651 | if (ia->ri_have_dma_lkey) |
| 663 | break; | 652 | break; |
| 664 | mem_priv = IB_ACCESS_LOCAL_WRITE; | 653 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
| @@ -678,8 +667,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 678 | rc = -ENOMEM; | 667 | rc = -ENOMEM; |
| 679 | goto out3; | 668 | goto out3; |
| 680 | } | 669 | } |
| 681 | dprintk("RPC: %s: memory registration strategy is %d\n", | 670 | dprintk("RPC: %s: memory registration strategy is '%s'\n", |
| 682 | __func__, memreg); | 671 | __func__, ia->ri_ops->ro_displayname); |
| 683 | 672 | ||
| 684 | /* Else will do memory reg/dereg for each chunk */ | 673 | /* Else will do memory reg/dereg for each chunk */ |
| 685 | ia->ri_memreg_strategy = memreg; | 674 | ia->ri_memreg_strategy = memreg; |
| @@ -743,49 +732,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 743 | 732 | ||
| 744 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; | 733 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; |
| 745 | ep->rep_attr.qp_context = ep; | 734 | ep->rep_attr.qp_context = ep; |
| 746 | /* send_cq and recv_cq initialized below */ | ||
| 747 | ep->rep_attr.srq = NULL; | 735 | ep->rep_attr.srq = NULL; |
| 748 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 736 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
| 749 | switch (ia->ri_memreg_strategy) { | 737 | rc = ia->ri_ops->ro_open(ia, ep, cdata); |
| 750 | case RPCRDMA_FRMR: { | 738 | if (rc) |
| 751 | int depth = 7; | 739 | return rc; |
| 752 | |||
| 753 | /* Add room for frmr register and invalidate WRs. | ||
| 754 | * 1. FRMR reg WR for head | ||
| 755 | * 2. FRMR invalidate WR for head | ||
| 756 | * 3. N FRMR reg WRs for pagelist | ||
| 757 | * 4. N FRMR invalidate WRs for pagelist | ||
| 758 | * 5. FRMR reg WR for tail | ||
| 759 | * 6. FRMR invalidate WR for tail | ||
| 760 | * 7. The RDMA_SEND WR | ||
| 761 | */ | ||
| 762 | |||
| 763 | /* Calculate N if the device max FRMR depth is smaller than | ||
| 764 | * RPCRDMA_MAX_DATA_SEGS. | ||
| 765 | */ | ||
| 766 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
| 767 | int delta = RPCRDMA_MAX_DATA_SEGS - | ||
| 768 | ia->ri_max_frmr_depth; | ||
| 769 | |||
| 770 | do { | ||
| 771 | depth += 2; /* FRMR reg + invalidate */ | ||
| 772 | delta -= ia->ri_max_frmr_depth; | ||
| 773 | } while (delta > 0); | ||
| 774 | |||
| 775 | } | ||
| 776 | ep->rep_attr.cap.max_send_wr *= depth; | ||
| 777 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
| 778 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
| 779 | if (!cdata->max_requests) | ||
| 780 | return -EINVAL; | ||
| 781 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
| 782 | depth; | ||
| 783 | } | ||
| 784 | break; | ||
| 785 | } | ||
| 786 | default: | ||
| 787 | break; | ||
| 788 | } | ||
| 789 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | 740 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; |
| 790 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); | 741 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); |
| 791 | ep->rep_attr.cap.max_recv_sge = 1; | 742 | ep->rep_attr.cap.max_recv_sge = 1; |
| @@ -944,21 +895,9 @@ retry: | |||
| 944 | rpcrdma_ep_disconnect(ep, ia); | 895 | rpcrdma_ep_disconnect(ep, ia); |
| 945 | rpcrdma_flush_cqs(ep); | 896 | rpcrdma_flush_cqs(ep); |
| 946 | 897 | ||
| 947 | switch (ia->ri_memreg_strategy) { | ||
| 948 | case RPCRDMA_FRMR: | ||
| 949 | rpcrdma_reset_frmrs(ia); | ||
| 950 | break; | ||
| 951 | case RPCRDMA_MTHCAFMR: | ||
| 952 | rpcrdma_reset_fmrs(ia); | ||
| 953 | break; | ||
| 954 | case RPCRDMA_ALLPHYSICAL: | ||
| 955 | break; | ||
| 956 | default: | ||
| 957 | rc = -EIO; | ||
| 958 | goto out; | ||
| 959 | } | ||
| 960 | |||
| 961 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 898 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
| 899 | ia->ri_ops->ro_reset(xprt); | ||
| 900 | |||
| 962 | id = rpcrdma_create_id(xprt, ia, | 901 | id = rpcrdma_create_id(xprt, ia, |
| 963 | (struct sockaddr *)&xprt->rx_data.addr); | 902 | (struct sockaddr *)&xprt->rx_data.addr); |
| 964 | if (IS_ERR(id)) { | 903 | if (IS_ERR(id)) { |
| @@ -1123,91 +1062,6 @@ out: | |||
| 1123 | return ERR_PTR(rc); | 1062 | return ERR_PTR(rc); |
| 1124 | } | 1063 | } |
| 1125 | 1064 | ||
| 1126 | static int | ||
| 1127 | rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
| 1128 | { | ||
| 1129 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
| 1130 | struct ib_fmr_attr fmr_attr = { | ||
| 1131 | .max_pages = RPCRDMA_MAX_DATA_SEGS, | ||
| 1132 | .max_maps = 1, | ||
| 1133 | .page_shift = PAGE_SHIFT | ||
| 1134 | }; | ||
| 1135 | struct rpcrdma_mw *r; | ||
| 1136 | int i, rc; | ||
| 1137 | |||
| 1138 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
| 1139 | dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); | ||
| 1140 | |||
| 1141 | while (i--) { | ||
| 1142 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
| 1143 | if (r == NULL) | ||
| 1144 | return -ENOMEM; | ||
| 1145 | |||
| 1146 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr); | ||
| 1147 | if (IS_ERR(r->r.fmr)) { | ||
| 1148 | rc = PTR_ERR(r->r.fmr); | ||
| 1149 | dprintk("RPC: %s: ib_alloc_fmr failed %i\n", | ||
| 1150 | __func__, rc); | ||
| 1151 | goto out_free; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | list_add(&r->mw_list, &buf->rb_mws); | ||
| 1155 | list_add(&r->mw_all, &buf->rb_all); | ||
| 1156 | } | ||
| 1157 | return 0; | ||
| 1158 | |||
| 1159 | out_free: | ||
| 1160 | kfree(r); | ||
| 1161 | return rc; | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | static int | ||
| 1165 | rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
| 1166 | { | ||
| 1167 | struct rpcrdma_frmr *f; | ||
| 1168 | struct rpcrdma_mw *r; | ||
| 1169 | int i, rc; | ||
| 1170 | |||
| 1171 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
| 1172 | dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); | ||
| 1173 | |||
| 1174 | while (i--) { | ||
| 1175 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
| 1176 | if (r == NULL) | ||
| 1177 | return -ENOMEM; | ||
| 1178 | f = &r->r.frmr; | ||
| 1179 | |||
| 1180 | f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
| 1181 | ia->ri_max_frmr_depth); | ||
| 1182 | if (IS_ERR(f->fr_mr)) { | ||
| 1183 | rc = PTR_ERR(f->fr_mr); | ||
| 1184 | dprintk("RPC: %s: ib_alloc_fast_reg_mr " | ||
| 1185 | "failed %i\n", __func__, rc); | ||
| 1186 | goto out_free; | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
| 1190 | ia->ri_max_frmr_depth); | ||
| 1191 | if (IS_ERR(f->fr_pgl)) { | ||
| 1192 | rc = PTR_ERR(f->fr_pgl); | ||
| 1193 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list " | ||
| 1194 | "failed %i\n", __func__, rc); | ||
| 1195 | |||
| 1196 | ib_dereg_mr(f->fr_mr); | ||
| 1197 | goto out_free; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | list_add(&r->mw_list, &buf->rb_mws); | ||
| 1201 | list_add(&r->mw_all, &buf->rb_all); | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | return 0; | ||
| 1205 | |||
| 1206 | out_free: | ||
| 1207 | kfree(r); | ||
| 1208 | return rc; | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | int | 1065 | int |
| 1212 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | 1066 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
| 1213 | { | 1067 | { |
| @@ -1244,22 +1098,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
| 1244 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; | 1098 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; |
| 1245 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; | 1099 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; |
| 1246 | 1100 | ||
| 1247 | INIT_LIST_HEAD(&buf->rb_mws); | 1101 | rc = ia->ri_ops->ro_init(r_xprt); |
| 1248 | INIT_LIST_HEAD(&buf->rb_all); | 1102 | if (rc) |
| 1249 | switch (ia->ri_memreg_strategy) { | 1103 | goto out; |
| 1250 | case RPCRDMA_FRMR: | ||
| 1251 | rc = rpcrdma_init_frmrs(ia, buf); | ||
| 1252 | if (rc) | ||
| 1253 | goto out; | ||
| 1254 | break; | ||
| 1255 | case RPCRDMA_MTHCAFMR: | ||
| 1256 | rc = rpcrdma_init_fmrs(ia, buf); | ||
| 1257 | if (rc) | ||
| 1258 | goto out; | ||
| 1259 | break; | ||
| 1260 | default: | ||
| 1261 | break; | ||
| 1262 | } | ||
| 1263 | 1104 | ||
| 1264 | for (i = 0; i < buf->rb_max_requests; i++) { | 1105 | for (i = 0; i < buf->rb_max_requests; i++) { |
| 1265 | struct rpcrdma_req *req; | 1106 | struct rpcrdma_req *req; |
| @@ -1311,47 +1152,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | |||
| 1311 | kfree(req); | 1152 | kfree(req); |
| 1312 | } | 1153 | } |
| 1313 | 1154 | ||
| 1314 | static void | ||
| 1315 | rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) | ||
| 1316 | { | ||
| 1317 | struct rpcrdma_mw *r; | ||
| 1318 | int rc; | ||
| 1319 | |||
| 1320 | while (!list_empty(&buf->rb_all)) { | ||
| 1321 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
| 1322 | list_del(&r->mw_all); | ||
| 1323 | list_del(&r->mw_list); | ||
| 1324 | |||
| 1325 | rc = ib_dealloc_fmr(r->r.fmr); | ||
| 1326 | if (rc) | ||
| 1327 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
| 1328 | __func__, rc); | ||
| 1329 | |||
| 1330 | kfree(r); | ||
| 1331 | } | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | static void | ||
| 1335 | rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf) | ||
| 1336 | { | ||
| 1337 | struct rpcrdma_mw *r; | ||
| 1338 | int rc; | ||
| 1339 | |||
| 1340 | while (!list_empty(&buf->rb_all)) { | ||
| 1341 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
| 1342 | list_del(&r->mw_all); | ||
| 1343 | list_del(&r->mw_list); | ||
| 1344 | |||
| 1345 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1346 | if (rc) | ||
| 1347 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
| 1348 | __func__, rc); | ||
| 1349 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
| 1350 | |||
| 1351 | kfree(r); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | void | 1155 | void |
| 1356 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1156 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
| 1357 | { | 1157 | { |
| @@ -1372,104 +1172,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1372 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); | 1172 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); |
| 1373 | } | 1173 | } |
| 1374 | 1174 | ||
| 1375 | switch (ia->ri_memreg_strategy) { | 1175 | ia->ri_ops->ro_destroy(buf); |
| 1376 | case RPCRDMA_FRMR: | ||
| 1377 | rpcrdma_destroy_frmrs(buf); | ||
| 1378 | break; | ||
| 1379 | case RPCRDMA_MTHCAFMR: | ||
| 1380 | rpcrdma_destroy_fmrs(buf); | ||
| 1381 | break; | ||
| 1382 | default: | ||
| 1383 | break; | ||
| 1384 | } | ||
| 1385 | 1176 | ||
| 1386 | kfree(buf->rb_pool); | 1177 | kfree(buf->rb_pool); |
| 1387 | } | 1178 | } |
| 1388 | 1179 | ||
| 1389 | /* After a disconnect, unmap all FMRs. | ||
| 1390 | * | ||
| 1391 | * This is invoked only in the transport connect worker in order | ||
| 1392 | * to serialize with rpcrdma_register_fmr_external(). | ||
| 1393 | */ | ||
| 1394 | static void | ||
| 1395 | rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) | ||
| 1396 | { | ||
| 1397 | struct rpcrdma_xprt *r_xprt = | ||
| 1398 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
| 1399 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
| 1400 | struct list_head *pos; | ||
| 1401 | struct rpcrdma_mw *r; | ||
| 1402 | LIST_HEAD(l); | ||
| 1403 | int rc; | ||
| 1404 | |||
| 1405 | list_for_each(pos, &buf->rb_all) { | ||
| 1406 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
| 1407 | |||
| 1408 | INIT_LIST_HEAD(&l); | ||
| 1409 | list_add(&r->r.fmr->list, &l); | ||
| 1410 | rc = ib_unmap_fmr(&l); | ||
| 1411 | if (rc) | ||
| 1412 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
| 1413 | __func__, rc); | ||
| 1414 | } | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
| 1418 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
| 1419 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
| 1420 | * also torn down. | ||
| 1421 | * | ||
| 1422 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
| 1423 | * | ||
| 1424 | * This is invoked only in the transport connect worker in order | ||
| 1425 | * to serialize with rpcrdma_register_frmr_external(). | ||
| 1426 | */ | ||
| 1427 | static void | ||
| 1428 | rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) | ||
| 1429 | { | ||
| 1430 | struct rpcrdma_xprt *r_xprt = | ||
| 1431 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
| 1432 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
| 1433 | struct list_head *pos; | ||
| 1434 | struct rpcrdma_mw *r; | ||
| 1435 | int rc; | ||
| 1436 | |||
| 1437 | list_for_each(pos, &buf->rb_all) { | ||
| 1438 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
| 1439 | |||
| 1440 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
| 1441 | continue; | ||
| 1442 | |||
| 1443 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1444 | if (rc) | ||
| 1445 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
| 1446 | __func__, rc); | ||
| 1447 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
| 1448 | |||
| 1449 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
| 1450 | ia->ri_max_frmr_depth); | ||
| 1451 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
| 1452 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
| 1453 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
| 1454 | " failed %i\n", __func__, rc); | ||
| 1455 | continue; | ||
| 1456 | } | ||
| 1457 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( | ||
| 1458 | ia->ri_id->device, | ||
| 1459 | ia->ri_max_frmr_depth); | ||
| 1460 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
| 1461 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
| 1462 | dprintk("RPC: %s: " | ||
| 1463 | "ib_alloc_fast_reg_page_list " | ||
| 1464 | "failed %i\n", __func__, rc); | ||
| 1465 | |||
| 1466 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1467 | continue; | ||
| 1468 | } | ||
| 1469 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
| 1470 | } | ||
| 1471 | } | ||
| 1472 | |||
| 1473 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving | 1180 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving |
| 1474 | * some req segments uninitialized. | 1181 | * some req segments uninitialized. |
| 1475 | */ | 1182 | */ |
| @@ -1509,7 +1216,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | |||
| 1509 | } | 1216 | } |
| 1510 | } | 1217 | } |
| 1511 | 1218 | ||
| 1512 | /* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external(). | 1219 | /* rpcrdma_unmap_one() was already done during deregistration. |
| 1513 | * Redo only the ib_post_send(). | 1220 | * Redo only the ib_post_send(). |
| 1514 | */ | 1221 | */ |
| 1515 | static void | 1222 | static void |
| @@ -1729,6 +1436,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | |||
| 1729 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. | 1436 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. |
| 1730 | */ | 1437 | */ |
| 1731 | 1438 | ||
| 1439 | void | ||
| 1440 | rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg) | ||
| 1441 | { | ||
| 1442 | dprintk("RPC: map_one: offset %p iova %llx len %zu\n", | ||
| 1443 | seg->mr_offset, | ||
| 1444 | (unsigned long long)seg->mr_dma, seg->mr_dmalen); | ||
| 1445 | } | ||
| 1446 | |||
| 1732 | static int | 1447 | static int |
| 1733 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | 1448 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, |
| 1734 | struct ib_mr **mrp, struct ib_sge *iov) | 1449 | struct ib_mr **mrp, struct ib_sge *iov) |
| @@ -1854,287 +1569,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |||
| 1854 | } | 1569 | } |
| 1855 | 1570 | ||
| 1856 | /* | 1571 | /* |
| 1857 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
| 1858 | */ | ||
| 1859 | |||
| 1860 | static void | ||
| 1861 | rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) | ||
| 1862 | { | ||
| 1863 | seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
| 1864 | seg->mr_dmalen = seg->mr_len; | ||
| 1865 | if (seg->mr_page) | ||
| 1866 | seg->mr_dma = ib_dma_map_page(ia->ri_id->device, | ||
| 1867 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
| 1868 | seg->mr_dmalen, seg->mr_dir); | ||
| 1869 | else | ||
| 1870 | seg->mr_dma = ib_dma_map_single(ia->ri_id->device, | ||
| 1871 | seg->mr_offset, | ||
| 1872 | seg->mr_dmalen, seg->mr_dir); | ||
| 1873 | if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) { | ||
| 1874 | dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n", | ||
| 1875 | __func__, | ||
| 1876 | (unsigned long long)seg->mr_dma, | ||
| 1877 | seg->mr_offset, seg->mr_dmalen); | ||
| 1878 | } | ||
| 1879 | } | ||
| 1880 | |||
| 1881 | static void | ||
| 1882 | rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | ||
| 1883 | { | ||
| 1884 | if (seg->mr_page) | ||
| 1885 | ib_dma_unmap_page(ia->ri_id->device, | ||
| 1886 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
| 1887 | else | ||
| 1888 | ib_dma_unmap_single(ia->ri_id->device, | ||
| 1889 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
| 1890 | } | ||
| 1891 | |||
| 1892 | static int | ||
| 1893 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
| 1894 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
| 1895 | struct rpcrdma_xprt *r_xprt) | ||
| 1896 | { | ||
| 1897 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1898 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
| 1899 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
| 1900 | struct ib_mr *mr = frmr->fr_mr; | ||
| 1901 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
| 1902 | u8 key; | ||
| 1903 | int len, pageoff; | ||
| 1904 | int i, rc; | ||
| 1905 | int seg_len; | ||
| 1906 | u64 pa; | ||
| 1907 | int page_no; | ||
| 1908 | |||
| 1909 | pageoff = offset_in_page(seg1->mr_offset); | ||
| 1910 | seg1->mr_offset -= pageoff; /* start of page */ | ||
| 1911 | seg1->mr_len += pageoff; | ||
| 1912 | len = -pageoff; | ||
| 1913 | if (*nsegs > ia->ri_max_frmr_depth) | ||
| 1914 | *nsegs = ia->ri_max_frmr_depth; | ||
| 1915 | for (page_no = i = 0; i < *nsegs;) { | ||
| 1916 | rpcrdma_map_one(ia, seg, writing); | ||
| 1917 | pa = seg->mr_dma; | ||
| 1918 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
| 1919 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
| 1920 | pa += PAGE_SIZE; | ||
| 1921 | } | ||
| 1922 | len += seg->mr_len; | ||
| 1923 | ++seg; | ||
| 1924 | ++i; | ||
| 1925 | /* Check for holes */ | ||
| 1926 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
| 1927 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
| 1928 | break; | ||
| 1929 | } | ||
| 1930 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
| 1931 | __func__, mw, i); | ||
| 1932 | |||
| 1933 | frmr->fr_state = FRMR_IS_VALID; | ||
| 1934 | |||
| 1935 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
| 1936 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
| 1937 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
| 1938 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma; | ||
| 1939 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
| 1940 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
| 1941 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
| 1942 | fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; | ||
| 1943 | if (fastreg_wr.wr.fast_reg.length < len) { | ||
| 1944 | rc = -EIO; | ||
| 1945 | goto out_err; | ||
| 1946 | } | ||
| 1947 | |||
| 1948 | /* Bump the key */ | ||
| 1949 | key = (u8)(mr->rkey & 0x000000FF); | ||
| 1950 | ib_update_fast_reg_key(mr, ++key); | ||
| 1951 | |||
| 1952 | fastreg_wr.wr.fast_reg.access_flags = (writing ? | ||
| 1953 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
| 1954 | IB_ACCESS_REMOTE_READ); | ||
| 1955 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
| 1956 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1957 | |||
| 1958 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
| 1959 | if (rc) { | ||
| 1960 | dprintk("RPC: %s: failed ib_post_send for register," | ||
| 1961 | " status %i\n", __func__, rc); | ||
| 1962 | ib_update_fast_reg_key(mr, --key); | ||
| 1963 | goto out_err; | ||
| 1964 | } else { | ||
| 1965 | seg1->mr_rkey = mr->rkey; | ||
| 1966 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
| 1967 | seg1->mr_nsegs = i; | ||
| 1968 | seg1->mr_len = len; | ||
| 1969 | } | ||
| 1970 | *nsegs = i; | ||
| 1971 | return 0; | ||
| 1972 | out_err: | ||
| 1973 | frmr->fr_state = FRMR_IS_INVALID; | ||
| 1974 | while (i--) | ||
| 1975 | rpcrdma_unmap_one(ia, --seg); | ||
| 1976 | return rc; | ||
| 1977 | } | ||
| 1978 | |||
| 1979 | static int | ||
| 1980 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
| 1981 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
| 1982 | { | ||
| 1983 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1984 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
| 1985 | int rc; | ||
| 1986 | |||
| 1987 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
| 1988 | |||
| 1989 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
| 1990 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
| 1991 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
| 1992 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
| 1993 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1994 | |||
| 1995 | read_lock(&ia->ri_qplock); | ||
| 1996 | while (seg1->mr_nsegs--) | ||
| 1997 | rpcrdma_unmap_one(ia, seg++); | ||
| 1998 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
| 1999 | read_unlock(&ia->ri_qplock); | ||
| 2000 | if (rc) { | ||
| 2001 | /* Force rpcrdma_buffer_get() to retry */ | ||
| 2002 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
| 2003 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
| 2004 | " status %i\n", __func__, rc); | ||
| 2005 | } | ||
| 2006 | return rc; | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | static int | ||
| 2010 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
| 2011 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
| 2012 | { | ||
| 2013 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 2014 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
| 2015 | int len, pageoff, i, rc; | ||
| 2016 | |||
| 2017 | pageoff = offset_in_page(seg1->mr_offset); | ||
| 2018 | seg1->mr_offset -= pageoff; /* start of page */ | ||
| 2019 | seg1->mr_len += pageoff; | ||
| 2020 | len = -pageoff; | ||
| 2021 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
| 2022 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
| 2023 | for (i = 0; i < *nsegs;) { | ||
| 2024 | rpcrdma_map_one(ia, seg, writing); | ||
| 2025 | physaddrs[i] = seg->mr_dma; | ||
| 2026 | len += seg->mr_len; | ||
| 2027 | ++seg; | ||
| 2028 | ++i; | ||
| 2029 | /* Check for holes */ | ||
| 2030 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
| 2031 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
| 2032 | break; | ||
| 2033 | } | ||
| 2034 | rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
| 2035 | if (rc) { | ||
| 2036 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
| 2037 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
| 2038 | len, (unsigned long long)seg1->mr_dma, | ||
| 2039 | pageoff, i, rc); | ||
| 2040 | while (i--) | ||
| 2041 | rpcrdma_unmap_one(ia, --seg); | ||
| 2042 | } else { | ||
| 2043 | seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; | ||
| 2044 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
| 2045 | seg1->mr_nsegs = i; | ||
| 2046 | seg1->mr_len = len; | ||
| 2047 | } | ||
| 2048 | *nsegs = i; | ||
| 2049 | return rc; | ||
| 2050 | } | ||
| 2051 | |||
| 2052 | static int | ||
| 2053 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
| 2054 | struct rpcrdma_ia *ia) | ||
| 2055 | { | ||
| 2056 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 2057 | LIST_HEAD(l); | ||
| 2058 | int rc; | ||
| 2059 | |||
| 2060 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
| 2061 | rc = ib_unmap_fmr(&l); | ||
| 2062 | read_lock(&ia->ri_qplock); | ||
| 2063 | while (seg1->mr_nsegs--) | ||
| 2064 | rpcrdma_unmap_one(ia, seg++); | ||
| 2065 | read_unlock(&ia->ri_qplock); | ||
| 2066 | if (rc) | ||
| 2067 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
| 2068 | " status %i\n", __func__, rc); | ||
| 2069 | return rc; | ||
| 2070 | } | ||
| 2071 | |||
| 2072 | int | ||
| 2073 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | ||
| 2074 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | ||
| 2075 | { | ||
| 2076 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 2077 | int rc = 0; | ||
| 2078 | |||
| 2079 | switch (ia->ri_memreg_strategy) { | ||
| 2080 | |||
| 2081 | case RPCRDMA_ALLPHYSICAL: | ||
| 2082 | rpcrdma_map_one(ia, seg, writing); | ||
| 2083 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
| 2084 | seg->mr_base = seg->mr_dma; | ||
| 2085 | seg->mr_nsegs = 1; | ||
| 2086 | nsegs = 1; | ||
| 2087 | break; | ||
| 2088 | |||
| 2089 | /* Registration using frmr registration */ | ||
| 2090 | case RPCRDMA_FRMR: | ||
| 2091 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
| 2092 | break; | ||
| 2093 | |||
| 2094 | /* Registration using fmr memory registration */ | ||
| 2095 | case RPCRDMA_MTHCAFMR: | ||
| 2096 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | ||
| 2097 | break; | ||
| 2098 | |||
| 2099 | default: | ||
| 2100 | return -EIO; | ||
| 2101 | } | ||
| 2102 | if (rc) | ||
| 2103 | return rc; | ||
| 2104 | |||
| 2105 | return nsegs; | ||
| 2106 | } | ||
| 2107 | |||
| 2108 | int | ||
| 2109 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | ||
| 2110 | struct rpcrdma_xprt *r_xprt) | ||
| 2111 | { | ||
| 2112 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
| 2113 | int nsegs = seg->mr_nsegs, rc; | ||
| 2114 | |||
| 2115 | switch (ia->ri_memreg_strategy) { | ||
| 2116 | |||
| 2117 | case RPCRDMA_ALLPHYSICAL: | ||
| 2118 | read_lock(&ia->ri_qplock); | ||
| 2119 | rpcrdma_unmap_one(ia, seg); | ||
| 2120 | read_unlock(&ia->ri_qplock); | ||
| 2121 | break; | ||
| 2122 | |||
| 2123 | case RPCRDMA_FRMR: | ||
| 2124 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
| 2125 | break; | ||
| 2126 | |||
| 2127 | case RPCRDMA_MTHCAFMR: | ||
| 2128 | rc = rpcrdma_deregister_fmr_external(seg, ia); | ||
| 2129 | break; | ||
| 2130 | |||
| 2131 | default: | ||
| 2132 | break; | ||
| 2133 | } | ||
| 2134 | return nsegs; | ||
| 2135 | } | ||
| 2136 | |||
| 2137 | /* | ||
| 2138 | * Prepost any receive buffer, then post send. | 1572 | * Prepost any receive buffer, then post send. |
| 2139 | * | 1573 | * |
| 2140 | * Receive buffer is donated to hardware, reclaimed upon recv completion. | 1574 | * Receive buffer is donated to hardware, reclaimed upon recv completion. |
| @@ -2156,7 +1590,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
| 2156 | } | 1590 | } |
| 2157 | 1591 | ||
| 2158 | send_wr.next = NULL; | 1592 | send_wr.next = NULL; |
| 2159 | send_wr.wr_id = 0ULL; /* no send cookie */ | 1593 | send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION; |
| 2160 | send_wr.sg_list = req->rl_send_iov; | 1594 | send_wr.sg_list = req->rl_send_iov; |
| 2161 | send_wr.num_sge = req->rl_niovs; | 1595 | send_wr.num_sge = req->rl_niovs; |
| 2162 | send_wr.opcode = IB_WR_SEND; | 1596 | send_wr.opcode = IB_WR_SEND; |
| @@ -2215,43 +1649,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
| 2215 | return rc; | 1649 | return rc; |
| 2216 | } | 1650 | } |
| 2217 | 1651 | ||
| 2218 | /* Physical mapping means one Read/Write list entry per-page. | 1652 | /* How many chunk list items fit within our inline buffers? |
| 2219 | * All list entries must fit within an inline buffer | ||
| 2220 | * | ||
| 2221 | * NB: The server must return a Write list for NFS READ, | ||
| 2222 | * which has the same constraint. Factor in the inline | ||
| 2223 | * rsize as well. | ||
| 2224 | */ | 1653 | */ |
| 2225 | static size_t | 1654 | unsigned int |
| 2226 | rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt) | 1655 | rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt) |
| 2227 | { | 1656 | { |
| 2228 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | 1657 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
| 2229 | unsigned int inline_size, pages; | 1658 | int bytes, segments; |
| 2230 | 1659 | ||
| 2231 | inline_size = min_t(unsigned int, | 1660 | bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize); |
| 2232 | cdata->inline_wsize, cdata->inline_rsize); | 1661 | bytes -= RPCRDMA_HDRLEN_MIN; |
| 2233 | inline_size -= RPCRDMA_HDRLEN_MIN; | 1662 | if (bytes < sizeof(struct rpcrdma_segment) * 2) { |
| 2234 | pages = inline_size / sizeof(struct rpcrdma_segment); | 1663 | pr_warn("RPC: %s: inline threshold too small\n", |
| 2235 | return pages << PAGE_SHIFT; | 1664 | __func__); |
| 2236 | } | 1665 | return 0; |
| 2237 | |||
| 2238 | static size_t | ||
| 2239 | rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt) | ||
| 2240 | { | ||
| 2241 | return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; | ||
| 2242 | } | ||
| 2243 | |||
| 2244 | size_t | ||
| 2245 | rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt) | ||
| 2246 | { | ||
| 2247 | size_t result; | ||
| 2248 | |||
| 2249 | switch (r_xprt->rx_ia.ri_memreg_strategy) { | ||
| 2250 | case RPCRDMA_ALLPHYSICAL: | ||
| 2251 | result = rpcrdma_physical_max_payload(r_xprt); | ||
| 2252 | break; | ||
| 2253 | default: | ||
| 2254 | result = rpcrdma_mr_max_payload(r_xprt); | ||
| 2255 | } | 1666 | } |
| 2256 | return result; | 1667 | |
| 1668 | segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1); | ||
| 1669 | dprintk("RPC: %s: max chunk list size = %d segments\n", | ||
| 1670 | __func__, segments); | ||
| 1671 | return segments; | ||
| 2257 | } | 1672 | } |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fb6f0885..78e0b8beaa36 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
| @@ -60,6 +60,7 @@ | |||
| 60 | * Interface Adapter -- one per transport instance | 60 | * Interface Adapter -- one per transport instance |
| 61 | */ | 61 | */ |
| 62 | struct rpcrdma_ia { | 62 | struct rpcrdma_ia { |
| 63 | const struct rpcrdma_memreg_ops *ri_ops; | ||
| 63 | rwlock_t ri_qplock; | 64 | rwlock_t ri_qplock; |
| 64 | struct rdma_cm_id *ri_id; | 65 | struct rdma_cm_id *ri_id; |
| 65 | struct ib_pd *ri_pd; | 66 | struct ib_pd *ri_pd; |
| @@ -105,6 +106,10 @@ struct rpcrdma_ep { | |||
| 105 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 106 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
| 106 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 107 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
| 107 | 108 | ||
| 109 | /* Force completion handler to ignore the signal | ||
| 110 | */ | ||
| 111 | #define RPCRDMA_IGNORE_COMPLETION (0ULL) | ||
| 112 | |||
| 108 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV | 113 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
| 109 | * | 114 | * |
| 110 | * The below structure appears at the front of a large region of kmalloc'd | 115 | * The below structure appears at the front of a large region of kmalloc'd |
| @@ -143,14 +148,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
| 143 | return (struct rpcrdma_msg *)rb->rg_base; | 148 | return (struct rpcrdma_msg *)rb->rg_base; |
| 144 | } | 149 | } |
| 145 | 150 | ||
| 146 | enum rpcrdma_chunktype { | ||
| 147 | rpcrdma_noch = 0, | ||
| 148 | rpcrdma_readch, | ||
| 149 | rpcrdma_areadch, | ||
| 150 | rpcrdma_writech, | ||
| 151 | rpcrdma_replych | ||
| 152 | }; | ||
| 153 | |||
| 154 | /* | 151 | /* |
| 155 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | 152 | * struct rpcrdma_rep -- this structure encapsulates state required to recv |
| 156 | * and complete a reply, asychronously. It needs several pieces of | 153 | * and complete a reply, asychronously. It needs several pieces of |
| @@ -213,6 +210,7 @@ struct rpcrdma_mw { | |||
| 213 | struct ib_fmr *fmr; | 210 | struct ib_fmr *fmr; |
| 214 | struct rpcrdma_frmr frmr; | 211 | struct rpcrdma_frmr frmr; |
| 215 | } r; | 212 | } r; |
| 213 | void (*mw_sendcompletion)(struct ib_wc *); | ||
| 216 | struct list_head mw_list; | 214 | struct list_head mw_list; |
| 217 | struct list_head mw_all; | 215 | struct list_head mw_all; |
| 218 | }; | 216 | }; |
| @@ -258,7 +256,6 @@ struct rpcrdma_req { | |||
| 258 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 256 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
| 259 | unsigned int rl_nchunks; /* non-zero if chunks */ | 257 | unsigned int rl_nchunks; /* non-zero if chunks */ |
| 260 | unsigned int rl_connect_cookie; /* retry detection */ | 258 | unsigned int rl_connect_cookie; /* retry detection */ |
| 261 | enum rpcrdma_chunktype rl_rtype, rl_wtype; | ||
| 262 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 259 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
| 263 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 260 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
| 264 | struct ib_sge rl_send_iov[4]; /* for active requests */ | 261 | struct ib_sge rl_send_iov[4]; /* for active requests */ |
| @@ -340,6 +337,29 @@ struct rpcrdma_stats { | |||
| 340 | }; | 337 | }; |
| 341 | 338 | ||
| 342 | /* | 339 | /* |
| 340 | * Per-registration mode operations | ||
| 341 | */ | ||
| 342 | struct rpcrdma_xprt; | ||
| 343 | struct rpcrdma_memreg_ops { | ||
| 344 | int (*ro_map)(struct rpcrdma_xprt *, | ||
| 345 | struct rpcrdma_mr_seg *, int, bool); | ||
| 346 | int (*ro_unmap)(struct rpcrdma_xprt *, | ||
| 347 | struct rpcrdma_mr_seg *); | ||
| 348 | int (*ro_open)(struct rpcrdma_ia *, | ||
| 349 | struct rpcrdma_ep *, | ||
| 350 | struct rpcrdma_create_data_internal *); | ||
| 351 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | ||
| 352 | int (*ro_init)(struct rpcrdma_xprt *); | ||
| 353 | void (*ro_reset)(struct rpcrdma_xprt *); | ||
| 354 | void (*ro_destroy)(struct rpcrdma_buffer *); | ||
| 355 | const char *ro_displayname; | ||
| 356 | }; | ||
| 357 | |||
| 358 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | ||
| 359 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | ||
| 360 | extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops; | ||
| 361 | |||
| 362 | /* | ||
| 343 | * RPCRDMA transport -- encapsulates the structures above for | 363 | * RPCRDMA transport -- encapsulates the structures above for |
| 344 | * integration with RPC. | 364 | * integration with RPC. |
| 345 | * | 365 | * |
| @@ -398,16 +418,56 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); | |||
| 398 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 418 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
| 399 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | 419 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
| 400 | 420 | ||
| 401 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | ||
| 402 | int, int, struct rpcrdma_xprt *); | ||
| 403 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | ||
| 404 | struct rpcrdma_xprt *); | ||
| 405 | |||
| 406 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, | 421 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, |
| 407 | size_t, gfp_t); | 422 | size_t, gfp_t); |
| 408 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, | 423 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, |
| 409 | struct rpcrdma_regbuf *); | 424 | struct rpcrdma_regbuf *); |
| 410 | 425 | ||
| 426 | unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); | ||
| 427 | |||
| 428 | /* | ||
| 429 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
| 430 | */ | ||
| 431 | |||
| 432 | void rpcrdma_mapping_error(struct rpcrdma_mr_seg *); | ||
| 433 | |||
| 434 | static inline enum dma_data_direction | ||
| 435 | rpcrdma_data_dir(bool writing) | ||
| 436 | { | ||
| 437 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
| 438 | } | ||
| 439 | |||
| 440 | static inline void | ||
| 441 | rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg, | ||
| 442 | enum dma_data_direction direction) | ||
| 443 | { | ||
| 444 | seg->mr_dir = direction; | ||
| 445 | seg->mr_dmalen = seg->mr_len; | ||
| 446 | |||
| 447 | if (seg->mr_page) | ||
| 448 | seg->mr_dma = ib_dma_map_page(device, | ||
| 449 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
| 450 | seg->mr_dmalen, seg->mr_dir); | ||
| 451 | else | ||
| 452 | seg->mr_dma = ib_dma_map_single(device, | ||
| 453 | seg->mr_offset, | ||
| 454 | seg->mr_dmalen, seg->mr_dir); | ||
| 455 | |||
| 456 | if (ib_dma_mapping_error(device, seg->mr_dma)) | ||
| 457 | rpcrdma_mapping_error(seg); | ||
| 458 | } | ||
| 459 | |||
| 460 | static inline void | ||
| 461 | rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg) | ||
| 462 | { | ||
| 463 | if (seg->mr_page) | ||
| 464 | ib_dma_unmap_page(device, | ||
| 465 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
| 466 | else | ||
| 467 | ib_dma_unmap_single(device, | ||
| 468 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
| 469 | } | ||
| 470 | |||
| 411 | /* | 471 | /* |
| 412 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 472 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
| 413 | */ | 473 | */ |
| @@ -418,9 +478,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | |||
| 418 | /* | 478 | /* |
| 419 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | 479 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |
| 420 | */ | 480 | */ |
| 421 | ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t); | ||
| 422 | int rpcrdma_marshal_req(struct rpc_rqst *); | 481 | int rpcrdma_marshal_req(struct rpc_rqst *); |
| 423 | size_t rpcrdma_max_payload(struct rpcrdma_xprt *); | ||
| 424 | 482 | ||
| 425 | /* Temporary NFS request map cache. Created in svc_rdma.c */ | 483 | /* Temporary NFS request map cache. Created in svc_rdma.c */ |
| 426 | extern struct kmem_cache *svc_rdma_map_cachep; | 484 | extern struct kmem_cache *svc_rdma_map_cachep; |
