diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-26 20:33:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-26 20:33:59 -0400 |
commit | 59953fba87e5e535657403cc6439d24187929559 (patch) | |
tree | 4f92cc3bcacf052cb3fb895512af5a7d3dad86cb | |
parent | 9ec3a646fe09970f801ab15e0f1694060b9f19af (diff) | |
parent | f139b6c676c7e49b66016b28bf3f8ec5c54be891 (diff) |
Merge tag 'nfs-for-4.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Another set of mainly bugfixes and a couple of cleanups. No new
functionality in this round.
Highlights include:
Stable patches:
- Fix a regression in /proc/self/mountstats
- Fix the pNFS flexfiles O_DIRECT support
- Fix high load average due to callback thread sleeping
Bugfixes:
- Various patches to fix the pNFS layoutcommit support
- Do not cache pNFS deviceids unless server notifications are enabled
- Fix a SUNRPC transport reconnection regression
- make debugfs file creation failure non-fatal in SUNRPC
- Another fix for circular directory warnings on NFSv4 "junctioned"
mountpoints
- Fix locking around NFSv4.2 fallocate() support
- Truncating NFSv4 file opens should also sync O_DIRECT writes
- Prevent infinite loop in rpcrdma_ep_create()
Features:
- Various improvements to the RDMA transport code's handling of
memory registration
- Various code cleanups"
* tag 'nfs-for-4.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (55 commits)
fs/nfs: fix new compiler warning about boolean in switch
nfs: Remove unneeded casts in nfs
NFS: Don't attempt to decode missing directory entries
Revert "nfs: replace nfs_add_stats with nfs_inc_stats when add one"
NFS: Rename idmap.c to nfs4idmap.c
NFS: Move nfs_idmap.h into fs/nfs/
NFS: Remove CONFIG_NFS_V4 checks from nfs_idmap.h
NFS: Add a stub for GETDEVICELIST
nfs: remove WARN_ON_ONCE from nfs_direct_good_bytes
nfs: fix DIO good bytes calculation
nfs: Fetch MOUNTED_ON_FILEID when updating an inode
sunrpc: make debugfs file creation failure non-fatal
nfs: fix high load average due to callback thread sleeping
NFS: Reduce time spent holding the i_mutex during fallocate()
NFS: Don't zap caches on fallocate()
xprtrdma: Make rpcrdma_{un}map_one() into inline functions
xprtrdma: Handle non-SEND completions via a callout
xprtrdma: Add "open" memreg op
xprtrdma: Add "destroy MRs" memreg op
xprtrdma: Add "reset MRs" memreg op
...
49 files changed, 1150 insertions, 914 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 1e987acf20c9..8664417955a2 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -22,7 +22,7 @@ nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o | |||
22 | obj-$(CONFIG_NFS_V4) += nfsv4.o | 22 | obj-$(CONFIG_NFS_V4) += nfsv4.o |
23 | CFLAGS_nfs4trace.o += -I$(src) | 23 | CFLAGS_nfs4trace.o += -I$(src) |
24 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ | 24 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ |
25 | delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ | 25 | delegation.o nfs4idmap.o callback.o callback_xdr.o callback_proc.o \ |
26 | nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \ | 26 | nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \ |
27 | dns_resolve.o nfs4trace.o | 27 | dns_resolve.o nfs4trace.o |
28 | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | 28 | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 1cac3c175d18..d2554fe140a3 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -890,6 +890,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { | |||
890 | .free_deviceid_node = bl_free_deviceid_node, | 890 | .free_deviceid_node = bl_free_deviceid_node, |
891 | .pg_read_ops = &bl_pg_read_ops, | 891 | .pg_read_ops = &bl_pg_read_ops, |
892 | .pg_write_ops = &bl_pg_write_ops, | 892 | .pg_write_ops = &bl_pg_write_ops, |
893 | .sync = pnfs_generic_sync, | ||
893 | }; | 894 | }; |
894 | 895 | ||
895 | static int __init nfs4blocklayout_init(void) | 896 | static int __init nfs4blocklayout_init(void) |
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 5aed4f98df41..e535599a0719 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c | |||
@@ -33,7 +33,7 @@ bl_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
33 | container_of(d, struct pnfs_block_dev, node); | 33 | container_of(d, struct pnfs_block_dev, node); |
34 | 34 | ||
35 | bl_free_device(dev); | 35 | bl_free_device(dev); |
36 | kfree(dev); | 36 | kfree_rcu(dev, node.rcu); |
37 | } | 37 | } |
38 | 38 | ||
39 | static int | 39 | static int |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 351be9205bf8..8d129bb7355a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -128,7 +128,7 @@ nfs41_callback_svc(void *vrqstp) | |||
128 | if (try_to_freeze()) | 128 | if (try_to_freeze()) |
129 | continue; | 129 | continue; |
130 | 130 | ||
131 | prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE); | 131 | prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); |
132 | spin_lock_bh(&serv->sv_cb_lock); | 132 | spin_lock_bh(&serv->sv_cb_lock); |
133 | if (!list_empty(&serv->sv_cb_list)) { | 133 | if (!list_empty(&serv->sv_cb_list)) { |
134 | req = list_first_entry(&serv->sv_cb_list, | 134 | req = list_first_entry(&serv->sv_cb_list, |
@@ -142,10 +142,10 @@ nfs41_callback_svc(void *vrqstp) | |||
142 | error); | 142 | error); |
143 | } else { | 143 | } else { |
144 | spin_unlock_bh(&serv->sv_cb_lock); | 144 | spin_unlock_bh(&serv->sv_cb_lock); |
145 | /* schedule_timeout to game the hung task watchdog */ | 145 | schedule(); |
146 | schedule_timeout(60 * HZ); | ||
147 | finish_wait(&serv->sv_cb_waitq, &wq); | 146 | finish_wait(&serv->sv_cb_waitq, &wq); |
148 | } | 147 | } |
148 | flush_signals(current); | ||
149 | } | 149 | } |
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 19874151e95c..892aefff3630 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/lockd/bind.h> | 31 | #include <linux/lockd/bind.h> |
32 | #include <linux/seq_file.h> | 32 | #include <linux/seq_file.h> |
33 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
34 | #include <linux/nfs_idmap.h> | ||
35 | #include <linux/vfs.h> | 34 | #include <linux/vfs.h> |
36 | #include <linux/inet.h> | 35 | #include <linux/inet.h> |
37 | #include <linux/in6.h> | 36 | #include <linux/in6.h> |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a6ad68865880..029d688a969f 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -378,7 +378,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
378 | if (freeme == NULL) | 378 | if (freeme == NULL) |
379 | goto out; | 379 | goto out; |
380 | } | 380 | } |
381 | list_add_rcu(&delegation->super_list, &server->delegations); | 381 | list_add_tail_rcu(&delegation->super_list, &server->delegations); |
382 | rcu_assign_pointer(nfsi->delegation, delegation); | 382 | rcu_assign_pointer(nfsi->delegation, delegation); |
383 | delegation = NULL; | 383 | delegation = NULL; |
384 | 384 | ||
@@ -514,7 +514,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) | |||
514 | 514 | ||
515 | delegation = nfs_inode_detach_delegation(inode); | 515 | delegation = nfs_inode_detach_delegation(inode); |
516 | if (delegation != NULL) | 516 | if (delegation != NULL) |
517 | nfs_do_return_delegation(inode, delegation, 0); | 517 | nfs_do_return_delegation(inode, delegation, 1); |
518 | } | 518 | } |
519 | 519 | ||
520 | /** | 520 | /** |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1e51ecd61854..b2c8b31b2be7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -543,6 +543,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en | |||
543 | if (scratch == NULL) | 543 | if (scratch == NULL) |
544 | return -ENOMEM; | 544 | return -ENOMEM; |
545 | 545 | ||
546 | if (buflen == 0) | ||
547 | goto out_nopages; | ||
548 | |||
546 | xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); | 549 | xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); |
547 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | 550 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
548 | 551 | ||
@@ -564,6 +567,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en | |||
564 | break; | 567 | break; |
565 | } while (!entry->eof); | 568 | } while (!entry->eof); |
566 | 569 | ||
570 | out_nopages: | ||
567 | if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { | 571 | if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { |
568 | array = nfs_readdir_get_array(page); | 572 | array = nfs_readdir_get_array(page); |
569 | if (!IS_ERR(array)) { | 573 | if (!IS_ERR(array)) { |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b2cbc3a6cdd9..38678d9a5cc4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -129,22 +129,25 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) | |||
129 | int i; | 129 | int i; |
130 | ssize_t count; | 130 | ssize_t count; |
131 | 131 | ||
132 | WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); | 132 | if (dreq->mirror_count == 1) { |
133 | 133 | dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; | |
134 | count = dreq->mirrors[hdr->pgio_mirror_idx].count; | 134 | dreq->count += hdr->good_bytes; |
135 | if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { | 135 | } else { |
136 | count = hdr->io_start + hdr->good_bytes - dreq->io_start; | 136 | /* mirrored writes */ |
137 | dreq->mirrors[hdr->pgio_mirror_idx].count = count; | 137 | count = dreq->mirrors[hdr->pgio_mirror_idx].count; |
138 | } | 138 | if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { |
139 | 139 | count = hdr->io_start + hdr->good_bytes - dreq->io_start; | |
140 | /* update the dreq->count by finding the minimum agreed count from all | 140 | dreq->mirrors[hdr->pgio_mirror_idx].count = count; |
141 | * mirrors */ | 141 | } |
142 | count = dreq->mirrors[0].count; | 142 | /* update the dreq->count by finding the minimum agreed count from all |
143 | * mirrors */ | ||
144 | count = dreq->mirrors[0].count; | ||
143 | 145 | ||
144 | for (i = 1; i < dreq->mirror_count; i++) | 146 | for (i = 1; i < dreq->mirror_count; i++) |
145 | count = min(count, dreq->mirrors[i].count); | 147 | count = min(count, dreq->mirrors[i].count); |
146 | 148 | ||
147 | dreq->count = count; | 149 | dreq->count = count; |
150 | } | ||
148 | } | 151 | } |
149 | 152 | ||
150 | /* | 153 | /* |
@@ -258,18 +261,11 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) | |||
258 | if (!IS_SWAPFILE(inode)) | 261 | if (!IS_SWAPFILE(inode)) |
259 | return 0; | 262 | return 0; |
260 | 263 | ||
261 | #ifndef CONFIG_NFS_SWAP | ||
262 | dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", | ||
263 | iocb->ki_filp, (long long) pos, iter->nr_segs); | ||
264 | |||
265 | return -EINVAL; | ||
266 | #else | ||
267 | VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); | 264 | VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); |
268 | 265 | ||
269 | if (iov_iter_rw(iter) == READ) | 266 | if (iov_iter_rw(iter) == READ) |
270 | return nfs_file_direct_read(iocb, iter, pos); | 267 | return nfs_file_direct_read(iocb, iter, pos); |
271 | return nfs_file_direct_write(iocb, iter); | 268 | return nfs_file_direct_write(iocb, iter); |
272 | #endif /* CONFIG_NFS_SWAP */ | ||
273 | } | 269 | } |
274 | 270 | ||
275 | static void nfs_direct_release_pages(struct page **pages, unsigned int npages) | 271 | static void nfs_direct_release_pages(struct page **pages, unsigned int npages) |
@@ -1030,6 +1026,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) | |||
1030 | if (i_size_read(inode) < iocb->ki_pos) | 1026 | if (i_size_read(inode) < iocb->ki_pos) |
1031 | i_size_write(inode, iocb->ki_pos); | 1027 | i_size_write(inode, iocb->ki_pos); |
1032 | spin_unlock(&inode->i_lock); | 1028 | spin_unlock(&inode->i_lock); |
1029 | generic_write_sync(file, pos, result); | ||
1033 | } | 1030 | } |
1034 | } | 1031 | } |
1035 | nfs_direct_req_release(dreq); | 1032 | nfs_direct_req_release(dreq); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c40e4363e746..8b8d83a526ce 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -280,6 +280,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
280 | 280 | ||
281 | trace_nfs_fsync_enter(inode); | 281 | trace_nfs_fsync_enter(inode); |
282 | 282 | ||
283 | nfs_inode_dio_wait(inode); | ||
283 | do { | 284 | do { |
284 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 285 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
285 | if (ret != 0) | 286 | if (ret != 0) |
@@ -782,7 +783,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | |||
782 | * Flush all pending writes before doing anything | 783 | * Flush all pending writes before doing anything |
783 | * with locks.. | 784 | * with locks.. |
784 | */ | 785 | */ |
785 | nfs_sync_mapping(filp->f_mapping); | 786 | vfs_fsync(filp, 0); |
786 | 787 | ||
787 | l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); | 788 | l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); |
788 | if (!IS_ERR(l_ctx)) { | 789 | if (!IS_ERR(l_ctx)) { |
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 91e88a7ecef0..a46bf6de9ce4 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -258,7 +258,8 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
258 | hdr->res.verf->committed != NFS_DATA_SYNC) | 258 | hdr->res.verf->committed != NFS_DATA_SYNC) |
259 | return; | 259 | return; |
260 | 260 | ||
261 | pnfs_set_layoutcommit(hdr); | 261 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, |
262 | hdr->mds_offset + hdr->res.count); | ||
262 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | 263 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, |
263 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | 264 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); |
264 | } | 265 | } |
@@ -373,7 +374,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
373 | } | 374 | } |
374 | 375 | ||
375 | if (data->verf.committed == NFS_UNSTABLE) | 376 | if (data->verf.committed == NFS_UNSTABLE) |
376 | pnfs_commit_set_layoutcommit(data); | 377 | pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); |
377 | 378 | ||
378 | return 0; | 379 | return 0; |
379 | } | 380 | } |
@@ -1086,7 +1087,7 @@ filelayout_alloc_deviceid_node(struct nfs_server *server, | |||
1086 | } | 1087 | } |
1087 | 1088 | ||
1088 | static void | 1089 | static void |
1089 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) | 1090 | filelayout_free_deviceid_node(struct nfs4_deviceid_node *d) |
1090 | { | 1091 | { |
1091 | nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); | 1092 | nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); |
1092 | } | 1093 | } |
@@ -1137,7 +1138,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
1137 | .read_pagelist = filelayout_read_pagelist, | 1138 | .read_pagelist = filelayout_read_pagelist, |
1138 | .write_pagelist = filelayout_write_pagelist, | 1139 | .write_pagelist = filelayout_write_pagelist, |
1139 | .alloc_deviceid_node = filelayout_alloc_deviceid_node, | 1140 | .alloc_deviceid_node = filelayout_alloc_deviceid_node, |
1140 | .free_deviceid_node = filelayout_free_deveiceid_node, | 1141 | .free_deviceid_node = filelayout_free_deviceid_node, |
1142 | .sync = pnfs_nfs_generic_sync, | ||
1141 | }; | 1143 | }; |
1142 | 1144 | ||
1143 | static int __init nfs4filelayout_init(void) | 1145 | static int __init nfs4filelayout_init(void) |
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4f372e224603..4946ef40ba87 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
@@ -55,7 +55,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |||
55 | nfs4_pnfs_ds_put(ds); | 55 | nfs4_pnfs_ds_put(ds); |
56 | } | 56 | } |
57 | kfree(dsaddr->stripe_indices); | 57 | kfree(dsaddr->stripe_indices); |
58 | kfree(dsaddr); | 58 | kfree_rcu(dsaddr, id_node.rcu); |
59 | } | 59 | } |
60 | 60 | ||
61 | /* Decode opaque device data and return the result */ | 61 | /* Decode opaque device data and return the result */ |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 315cc68945b9..7d05089e52d6 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -11,10 +11,10 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | 12 | ||
13 | #include <linux/sunrpc/metrics.h> | 13 | #include <linux/sunrpc/metrics.h> |
14 | #include <linux/nfs_idmap.h> | ||
15 | 14 | ||
16 | #include "flexfilelayout.h" | 15 | #include "flexfilelayout.h" |
17 | #include "../nfs4session.h" | 16 | #include "../nfs4session.h" |
17 | #include "../nfs4idmap.h" | ||
18 | #include "../internal.h" | 18 | #include "../internal.h" |
19 | #include "../delegation.h" | 19 | #include "../delegation.h" |
20 | #include "../nfs4trace.h" | 20 | #include "../nfs4trace.h" |
@@ -891,7 +891,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, | |||
891 | static void | 891 | static void |
892 | ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) | 892 | ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) |
893 | { | 893 | { |
894 | pnfs_set_layoutcommit(hdr); | 894 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, |
895 | hdr->mds_offset + hdr->res.count); | ||
895 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | 896 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, |
896 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | 897 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); |
897 | } | 898 | } |
@@ -1074,7 +1075,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, | |||
1074 | } | 1075 | } |
1075 | 1076 | ||
1076 | if (data->verf.committed == NFS_UNSTABLE) | 1077 | if (data->verf.committed == NFS_UNSTABLE) |
1077 | pnfs_commit_set_layoutcommit(data); | 1078 | pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); |
1078 | 1079 | ||
1079 | return 0; | 1080 | return 0; |
1080 | } | 1081 | } |
@@ -1414,7 +1415,7 @@ ff_layout_get_ds_info(struct inode *inode) | |||
1414 | } | 1415 | } |
1415 | 1416 | ||
1416 | static void | 1417 | static void |
1417 | ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d) | 1418 | ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) |
1418 | { | 1419 | { |
1419 | nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, | 1420 | nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, |
1420 | id_node)); | 1421 | id_node)); |
@@ -1498,7 +1499,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { | |||
1498 | .pg_read_ops = &ff_layout_pg_read_ops, | 1499 | .pg_read_ops = &ff_layout_pg_read_ops, |
1499 | .pg_write_ops = &ff_layout_pg_write_ops, | 1500 | .pg_write_ops = &ff_layout_pg_write_ops, |
1500 | .get_ds_info = ff_layout_get_ds_info, | 1501 | .get_ds_info = ff_layout_get_ds_info, |
1501 | .free_deviceid_node = ff_layout_free_deveiceid_node, | 1502 | .free_deviceid_node = ff_layout_free_deviceid_node, |
1502 | .mark_request_commit = pnfs_layout_mark_request_commit, | 1503 | .mark_request_commit = pnfs_layout_mark_request_commit, |
1503 | .clear_request_commit = pnfs_generic_clear_request_commit, | 1504 | .clear_request_commit = pnfs_generic_clear_request_commit, |
1504 | .scan_commit_lists = pnfs_generic_scan_commit_lists, | 1505 | .scan_commit_lists = pnfs_generic_scan_commit_lists, |
@@ -1508,6 +1509,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { | |||
1508 | .write_pagelist = ff_layout_write_pagelist, | 1509 | .write_pagelist = ff_layout_write_pagelist, |
1509 | .alloc_deviceid_node = ff_layout_alloc_deviceid_node, | 1510 | .alloc_deviceid_node = ff_layout_alloc_deviceid_node, |
1510 | .encode_layoutreturn = ff_layout_encode_layoutreturn, | 1511 | .encode_layoutreturn = ff_layout_encode_layoutreturn, |
1512 | .sync = pnfs_nfs_generic_sync, | ||
1511 | }; | 1513 | }; |
1512 | 1514 | ||
1513 | static int __init nfs4flexfilelayout_init(void) | 1515 | static int __init nfs4flexfilelayout_init(void) |
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e2c01f204a95..77a2d026aa12 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -30,7 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | |||
30 | { | 30 | { |
31 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); | 31 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); |
32 | nfs4_pnfs_ds_put(mirror_ds->ds); | 32 | nfs4_pnfs_ds_put(mirror_ds->ds); |
33 | kfree(mirror_ds); | 33 | kfree_rcu(mirror_ds, id_node.rcu); |
34 | } | 34 | } |
35 | 35 | ||
36 | /* Decode opaque device data and construct new_ds using it */ | 36 | /* Decode opaque device data and construct new_ds using it */ |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3689e95da79a..f734562c6d24 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -133,6 +133,13 @@ void nfs_evict_inode(struct inode *inode) | |||
133 | nfs_clear_inode(inode); | 133 | nfs_clear_inode(inode); |
134 | } | 134 | } |
135 | 135 | ||
136 | int nfs_sync_inode(struct inode *inode) | ||
137 | { | ||
138 | nfs_inode_dio_wait(inode); | ||
139 | return nfs_wb_all(inode); | ||
140 | } | ||
141 | EXPORT_SYMBOL_GPL(nfs_sync_inode); | ||
142 | |||
136 | /** | 143 | /** |
137 | * nfs_sync_mapping - helper to flush all mmapped dirty data to disk | 144 | * nfs_sync_mapping - helper to flush all mmapped dirty data to disk |
138 | */ | 145 | */ |
@@ -192,7 +199,6 @@ void nfs_zap_caches(struct inode *inode) | |||
192 | nfs_zap_caches_locked(inode); | 199 | nfs_zap_caches_locked(inode); |
193 | spin_unlock(&inode->i_lock); | 200 | spin_unlock(&inode->i_lock); |
194 | } | 201 | } |
195 | EXPORT_SYMBOL_GPL(nfs_zap_caches); | ||
196 | 202 | ||
197 | void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) | 203 | void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) |
198 | { | 204 | { |
@@ -525,10 +531,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
525 | trace_nfs_setattr_enter(inode); | 531 | trace_nfs_setattr_enter(inode); |
526 | 532 | ||
527 | /* Write all dirty data */ | 533 | /* Write all dirty data */ |
528 | if (S_ISREG(inode->i_mode)) { | 534 | if (S_ISREG(inode->i_mode)) |
529 | nfs_inode_dio_wait(inode); | 535 | nfs_sync_inode(inode); |
530 | nfs_wb_all(inode); | ||
531 | } | ||
532 | 536 | ||
533 | fattr = nfs_alloc_fattr(); | 537 | fattr = nfs_alloc_fattr(); |
534 | if (fattr == NULL) | 538 | if (fattr == NULL) |
@@ -644,8 +648,9 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
644 | trace_nfs_getattr_enter(inode); | 648 | trace_nfs_getattr_enter(inode); |
645 | /* Flush out writes to the server in order to update c/mtime. */ | 649 | /* Flush out writes to the server in order to update c/mtime. */ |
646 | if (S_ISREG(inode->i_mode)) { | 650 | if (S_ISREG(inode->i_mode)) { |
647 | nfs_inode_dio_wait(inode); | 651 | mutex_lock(&inode->i_mutex); |
648 | err = filemap_write_and_wait(inode->i_mapping); | 652 | err = nfs_sync_inode(inode); |
653 | mutex_unlock(&inode->i_mutex); | ||
649 | if (err) | 654 | if (err) |
650 | goto out; | 655 | goto out; |
651 | } | 656 | } |
@@ -1588,6 +1593,19 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
1588 | } | 1593 | } |
1589 | EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); | 1594 | EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); |
1590 | 1595 | ||
1596 | |||
1597 | static inline bool nfs_fileid_valid(struct nfs_inode *nfsi, | ||
1598 | struct nfs_fattr *fattr) | ||
1599 | { | ||
1600 | bool ret1 = true, ret2 = true; | ||
1601 | |||
1602 | if (fattr->valid & NFS_ATTR_FATTR_FILEID) | ||
1603 | ret1 = (nfsi->fileid == fattr->fileid); | ||
1604 | if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) | ||
1605 | ret2 = (nfsi->fileid == fattr->mounted_on_fileid); | ||
1606 | return ret1 || ret2; | ||
1607 | } | ||
1608 | |||
1591 | /* | 1609 | /* |
1592 | * Many nfs protocol calls return the new file attributes after | 1610 | * Many nfs protocol calls return the new file attributes after |
1593 | * an operation. Here we update the inode to reflect the state | 1611 | * an operation. Here we update the inode to reflect the state |
@@ -1614,7 +1632,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1614 | nfs_display_fhandle_hash(NFS_FH(inode)), | 1632 | nfs_display_fhandle_hash(NFS_FH(inode)), |
1615 | atomic_read(&inode->i_count), fattr->valid); | 1633 | atomic_read(&inode->i_count), fattr->valid); |
1616 | 1634 | ||
1617 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) { | 1635 | if (!nfs_fileid_valid(nfsi, fattr)) { |
1618 | printk(KERN_ERR "NFS: server %s error: fileid changed\n" | 1636 | printk(KERN_ERR "NFS: server %s error: fileid changed\n" |
1619 | "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", | 1637 | "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", |
1620 | NFS_SERVER(inode)->nfs_client->cl_hostname, | 1638 | NFS_SERVER(inode)->nfs_client->cl_hostname, |
@@ -1819,7 +1837,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1819 | struct inode *nfs_alloc_inode(struct super_block *sb) | 1837 | struct inode *nfs_alloc_inode(struct super_block *sb) |
1820 | { | 1838 | { |
1821 | struct nfs_inode *nfsi; | 1839 | struct nfs_inode *nfsi; |
1822 | nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); | 1840 | nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); |
1823 | if (!nfsi) | 1841 | if (!nfsi) |
1824 | return NULL; | 1842 | return NULL; |
1825 | nfsi->flags = 0UL; | 1843 | nfsi->flags = 0UL; |
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index cb170722769c..3a9e75235f30 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c | |||
@@ -36,13 +36,16 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | |||
36 | loff_t offset, loff_t len) | 36 | loff_t offset, loff_t len) |
37 | { | 37 | { |
38 | struct inode *inode = file_inode(filep); | 38 | struct inode *inode = file_inode(filep); |
39 | struct nfs_server *server = NFS_SERVER(inode); | ||
39 | struct nfs42_falloc_args args = { | 40 | struct nfs42_falloc_args args = { |
40 | .falloc_fh = NFS_FH(inode), | 41 | .falloc_fh = NFS_FH(inode), |
41 | .falloc_offset = offset, | 42 | .falloc_offset = offset, |
42 | .falloc_length = len, | 43 | .falloc_length = len, |
44 | .falloc_bitmask = server->cache_consistency_bitmask, | ||
45 | }; | ||
46 | struct nfs42_falloc_res res = { | ||
47 | .falloc_server = server, | ||
43 | }; | 48 | }; |
44 | struct nfs42_falloc_res res; | ||
45 | struct nfs_server *server = NFS_SERVER(inode); | ||
46 | int status; | 49 | int status; |
47 | 50 | ||
48 | msg->rpc_argp = &args; | 51 | msg->rpc_argp = &args; |
@@ -52,8 +55,17 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | |||
52 | if (status) | 55 | if (status) |
53 | return status; | 56 | return status; |
54 | 57 | ||
55 | return nfs4_call_sync(server->client, server, msg, | 58 | res.falloc_fattr = nfs_alloc_fattr(); |
56 | &args.seq_args, &res.seq_res, 0); | 59 | if (!res.falloc_fattr) |
60 | return -ENOMEM; | ||
61 | |||
62 | status = nfs4_call_sync(server->client, server, msg, | ||
63 | &args.seq_args, &res.seq_res, 0); | ||
64 | if (status == 0) | ||
65 | status = nfs_post_op_update_inode(inode, res.falloc_fattr); | ||
66 | |||
67 | kfree(res.falloc_fattr); | ||
68 | return status; | ||
57 | } | 69 | } |
58 | 70 | ||
59 | static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | 71 | static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, |
@@ -84,9 +96,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) | |||
84 | if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) | 96 | if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) |
85 | return -EOPNOTSUPP; | 97 | return -EOPNOTSUPP; |
86 | 98 | ||
99 | mutex_lock(&inode->i_mutex); | ||
100 | |||
87 | err = nfs42_proc_fallocate(&msg, filep, offset, len); | 101 | err = nfs42_proc_fallocate(&msg, filep, offset, len); |
88 | if (err == -EOPNOTSUPP) | 102 | if (err == -EOPNOTSUPP) |
89 | NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; | 103 | NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; |
104 | |||
105 | mutex_unlock(&inode->i_mutex); | ||
90 | return err; | 106 | return err; |
91 | } | 107 | } |
92 | 108 | ||
@@ -101,9 +117,16 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) | |||
101 | if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) | 117 | if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) |
102 | return -EOPNOTSUPP; | 118 | return -EOPNOTSUPP; |
103 | 119 | ||
120 | nfs_wb_all(inode); | ||
121 | mutex_lock(&inode->i_mutex); | ||
122 | |||
104 | err = nfs42_proc_fallocate(&msg, filep, offset, len); | 123 | err = nfs42_proc_fallocate(&msg, filep, offset, len); |
124 | if (err == 0) | ||
125 | truncate_pagecache_range(inode, offset, (offset + len) -1); | ||
105 | if (err == -EOPNOTSUPP) | 126 | if (err == -EOPNOTSUPP) |
106 | NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; | 127 | NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; |
128 | |||
129 | mutex_unlock(&inode->i_mutex); | ||
107 | return err; | 130 | return err; |
108 | } | 131 | } |
109 | 132 | ||
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 038a7e1521fa..1a25b27248f2 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c | |||
@@ -25,16 +25,20 @@ | |||
25 | 25 | ||
26 | #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ | 26 | #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ |
27 | encode_putfh_maxsz + \ | 27 | encode_putfh_maxsz + \ |
28 | encode_allocate_maxsz) | 28 | encode_allocate_maxsz + \ |
29 | encode_getattr_maxsz) | ||
29 | #define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ | 30 | #define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ |
30 | decode_putfh_maxsz + \ | 31 | decode_putfh_maxsz + \ |
31 | decode_allocate_maxsz) | 32 | decode_allocate_maxsz + \ |
33 | decode_getattr_maxsz) | ||
32 | #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ | 34 | #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ |
33 | encode_putfh_maxsz + \ | 35 | encode_putfh_maxsz + \ |
34 | encode_deallocate_maxsz) | 36 | encode_deallocate_maxsz + \ |
37 | encode_getattr_maxsz) | ||
35 | #define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ | 38 | #define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ |
36 | decode_putfh_maxsz + \ | 39 | decode_putfh_maxsz + \ |
37 | decode_deallocate_maxsz) | 40 | decode_deallocate_maxsz + \ |
41 | decode_getattr_maxsz) | ||
38 | #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ | 42 | #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ |
39 | encode_putfh_maxsz + \ | 43 | encode_putfh_maxsz + \ |
40 | encode_seek_maxsz) | 44 | encode_seek_maxsz) |
@@ -92,6 +96,7 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, | |||
92 | encode_sequence(xdr, &args->seq_args, &hdr); | 96 | encode_sequence(xdr, &args->seq_args, &hdr); |
93 | encode_putfh(xdr, args->falloc_fh, &hdr); | 97 | encode_putfh(xdr, args->falloc_fh, &hdr); |
94 | encode_allocate(xdr, args, &hdr); | 98 | encode_allocate(xdr, args, &hdr); |
99 | encode_getfattr(xdr, args->falloc_bitmask, &hdr); | ||
95 | encode_nops(&hdr); | 100 | encode_nops(&hdr); |
96 | } | 101 | } |
97 | 102 | ||
@@ -110,6 +115,7 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, | |||
110 | encode_sequence(xdr, &args->seq_args, &hdr); | 115 | encode_sequence(xdr, &args->seq_args, &hdr); |
111 | encode_putfh(xdr, args->falloc_fh, &hdr); | 116 | encode_putfh(xdr, args->falloc_fh, &hdr); |
112 | encode_deallocate(xdr, args, &hdr); | 117 | encode_deallocate(xdr, args, &hdr); |
118 | encode_getfattr(xdr, args->falloc_bitmask, &hdr); | ||
113 | encode_nops(&hdr); | 119 | encode_nops(&hdr); |
114 | } | 120 | } |
115 | 121 | ||
@@ -183,6 +189,9 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp, | |||
183 | if (status) | 189 | if (status) |
184 | goto out; | 190 | goto out; |
185 | status = decode_allocate(xdr, res); | 191 | status = decode_allocate(xdr, res); |
192 | if (status) | ||
193 | goto out; | ||
194 | decode_getfattr(xdr, res->falloc_fattr, res->falloc_server); | ||
186 | out: | 195 | out: |
187 | return status; | 196 | return status; |
188 | } | 197 | } |
@@ -207,6 +216,9 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, | |||
207 | if (status) | 216 | if (status) |
208 | goto out; | 217 | goto out; |
209 | status = decode_deallocate(xdr, res); | 218 | status = decode_deallocate(xdr, res); |
219 | if (status) | ||
220 | goto out; | ||
221 | decode_getfattr(xdr, res->falloc_fattr, res->falloc_server); | ||
210 | out: | 222 | out: |
211 | return status; | 223 | return status; |
212 | } | 224 | } |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 51c2dbd1e942..e42be52a8c18 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -4,7 +4,6 @@ | |||
4 | */ | 4 | */ |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/nfs_fs.h> | 6 | #include <linux/nfs_fs.h> |
7 | #include <linux/nfs_idmap.h> | ||
8 | #include <linux/nfs_mount.h> | 7 | #include <linux/nfs_mount.h> |
9 | #include <linux/sunrpc/addr.h> | 8 | #include <linux/sunrpc/addr.h> |
10 | #include <linux/sunrpc/auth.h> | 9 | #include <linux/sunrpc/auth.h> |
@@ -15,6 +14,7 @@ | |||
15 | #include "callback.h" | 14 | #include "callback.h" |
16 | #include "delegation.h" | 15 | #include "delegation.h" |
17 | #include "nfs4session.h" | 16 | #include "nfs4session.h" |
17 | #include "nfs4idmap.h" | ||
18 | #include "pnfs.h" | 18 | #include "pnfs.h" |
19 | #include "netns.h" | 19 | #include "netns.h" |
20 | 20 | ||
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 619eca34e70f..f58c17b3b480 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
@@ -10,6 +10,8 @@ | |||
10 | #include "fscache.h" | 10 | #include "fscache.h" |
11 | #include "pnfs.h" | 11 | #include "pnfs.h" |
12 | 12 | ||
13 | #include "nfstrace.h" | ||
14 | |||
13 | #ifdef CONFIG_NFS_V4_2 | 15 | #ifdef CONFIG_NFS_V4_2 |
14 | #include "nfs42.h" | 16 | #include "nfs42.h" |
15 | #endif | 17 | #endif |
@@ -57,7 +59,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) | |||
57 | if (openflags & O_TRUNC) { | 59 | if (openflags & O_TRUNC) { |
58 | attr.ia_valid |= ATTR_SIZE; | 60 | attr.ia_valid |= ATTR_SIZE; |
59 | attr.ia_size = 0; | 61 | attr.ia_size = 0; |
60 | nfs_wb_all(inode); | 62 | nfs_sync_inode(inode); |
61 | } | 63 | } |
62 | 64 | ||
63 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); | 65 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); |
@@ -100,6 +102,9 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
100 | int ret; | 102 | int ret; |
101 | struct inode *inode = file_inode(file); | 103 | struct inode *inode = file_inode(file); |
102 | 104 | ||
105 | trace_nfs_fsync_enter(inode); | ||
106 | |||
107 | nfs_inode_dio_wait(inode); | ||
103 | do { | 108 | do { |
104 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 109 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
105 | if (ret != 0) | 110 | if (ret != 0) |
@@ -107,7 +112,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
107 | mutex_lock(&inode->i_mutex); | 112 | mutex_lock(&inode->i_mutex); |
108 | ret = nfs_file_fsync_commit(file, start, end, datasync); | 113 | ret = nfs_file_fsync_commit(file, start, end, datasync); |
109 | if (!ret) | 114 | if (!ret) |
110 | ret = pnfs_layoutcommit_inode(inode, true); | 115 | ret = pnfs_sync_inode(inode, !!datasync); |
111 | mutex_unlock(&inode->i_mutex); | 116 | mutex_unlock(&inode->i_mutex); |
112 | /* | 117 | /* |
113 | * If nfs_file_fsync_commit detected a server reboot, then | 118 | * If nfs_file_fsync_commit detected a server reboot, then |
@@ -118,6 +123,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
118 | end = LLONG_MAX; | 123 | end = LLONG_MAX; |
119 | } while (ret == -EAGAIN); | 124 | } while (ret == -EAGAIN); |
120 | 125 | ||
126 | trace_nfs_fsync_exit(inode, ret); | ||
121 | return ret; | 127 | return ret; |
122 | } | 128 | } |
123 | 129 | ||
@@ -152,15 +158,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t | |||
152 | if (ret < 0) | 158 | if (ret < 0) |
153 | return ret; | 159 | return ret; |
154 | 160 | ||
155 | mutex_lock(&inode->i_mutex); | ||
156 | if (mode & FALLOC_FL_PUNCH_HOLE) | 161 | if (mode & FALLOC_FL_PUNCH_HOLE) |
157 | ret = nfs42_proc_deallocate(filep, offset, len); | 162 | return nfs42_proc_deallocate(filep, offset, len); |
158 | else | 163 | return nfs42_proc_allocate(filep, offset, len); |
159 | ret = nfs42_proc_allocate(filep, offset, len); | ||
160 | mutex_unlock(&inode->i_mutex); | ||
161 | |||
162 | nfs_zap_caches(inode); | ||
163 | return ret; | ||
164 | } | 164 | } |
165 | #endif /* CONFIG_NFS_V4_2 */ | 165 | #endif /* CONFIG_NFS_V4_2 */ |
166 | 166 | ||
diff --git a/fs/nfs/idmap.c b/fs/nfs/nfs4idmap.c index 857e2a99acc8..2e1737c40a29 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/nfs4idmap.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include <linux/types.h> | 36 | #include <linux/types.h> |
37 | #include <linux/parser.h> | 37 | #include <linux/parser.h> |
38 | #include <linux/fs.h> | 38 | #include <linux/fs.h> |
39 | #include <linux/nfs_idmap.h> | ||
40 | #include <net/net_namespace.h> | 39 | #include <net/net_namespace.h> |
41 | #include <linux/sunrpc/rpc_pipe_fs.h> | 40 | #include <linux/sunrpc/rpc_pipe_fs.h> |
42 | #include <linux/nfs_fs.h> | 41 | #include <linux/nfs_fs.h> |
@@ -49,6 +48,7 @@ | |||
49 | 48 | ||
50 | #include "internal.h" | 49 | #include "internal.h" |
51 | #include "netns.h" | 50 | #include "netns.h" |
51 | #include "nfs4idmap.h" | ||
52 | #include "nfs4trace.h" | 52 | #include "nfs4trace.h" |
53 | 53 | ||
54 | #define NFS_UINT_MAXLEN 11 | 54 | #define NFS_UINT_MAXLEN 11 |
diff --git a/include/linux/nfs_idmap.h b/fs/nfs/nfs4idmap.h index 333844e38f66..de44d7330ab3 100644 --- a/include/linux/nfs_idmap.h +++ b/fs/nfs/nfs4idmap.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * include/linux/nfs_idmap.h | 2 | * fs/nfs/nfs4idmap.h |
3 | * | 3 | * |
4 | * UID and GID to name mapping for clients. | 4 | * UID and GID to name mapping for clients. |
5 | * | 5 | * |
@@ -46,19 +46,8 @@ struct nfs_server; | |||
46 | struct nfs_fattr; | 46 | struct nfs_fattr; |
47 | struct nfs4_string; | 47 | struct nfs4_string; |
48 | 48 | ||
49 | #if IS_ENABLED(CONFIG_NFS_V4) | ||
50 | int nfs_idmap_init(void); | 49 | int nfs_idmap_init(void); |
51 | void nfs_idmap_quit(void); | 50 | void nfs_idmap_quit(void); |
52 | #else | ||
53 | static inline int nfs_idmap_init(void) | ||
54 | { | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | static inline void nfs_idmap_quit(void) | ||
59 | {} | ||
60 | #endif | ||
61 | |||
62 | int nfs_idmap_new(struct nfs_client *); | 51 | int nfs_idmap_new(struct nfs_client *); |
63 | void nfs_idmap_delete(struct nfs_client *); | 52 | void nfs_idmap_delete(struct nfs_client *); |
64 | 53 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 98e533f2c94a..45b35b9b1e36 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -51,7 +51,6 @@ | |||
51 | #include <linux/namei.h> | 51 | #include <linux/namei.h> |
52 | #include <linux/mount.h> | 52 | #include <linux/mount.h> |
53 | #include <linux/module.h> | 53 | #include <linux/module.h> |
54 | #include <linux/nfs_idmap.h> | ||
55 | #include <linux/xattr.h> | 54 | #include <linux/xattr.h> |
56 | #include <linux/utsname.h> | 55 | #include <linux/utsname.h> |
57 | #include <linux/freezer.h> | 56 | #include <linux/freezer.h> |
@@ -63,6 +62,7 @@ | |||
63 | #include "callback.h" | 62 | #include "callback.h" |
64 | #include "pnfs.h" | 63 | #include "pnfs.h" |
65 | #include "netns.h" | 64 | #include "netns.h" |
65 | #include "nfs4idmap.h" | ||
66 | #include "nfs4session.h" | 66 | #include "nfs4session.h" |
67 | #include "fscache.h" | 67 | #include "fscache.h" |
68 | 68 | ||
@@ -185,7 +185,8 @@ const u32 nfs4_fattr_bitmap[3] = { | |||
185 | | FATTR4_WORD1_SPACE_USED | 185 | | FATTR4_WORD1_SPACE_USED |
186 | | FATTR4_WORD1_TIME_ACCESS | 186 | | FATTR4_WORD1_TIME_ACCESS |
187 | | FATTR4_WORD1_TIME_METADATA | 187 | | FATTR4_WORD1_TIME_METADATA |
188 | | FATTR4_WORD1_TIME_MODIFY, | 188 | | FATTR4_WORD1_TIME_MODIFY |
189 | | FATTR4_WORD1_MOUNTED_ON_FILEID, | ||
189 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | 190 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL |
190 | FATTR4_WORD2_SECURITY_LABEL | 191 | FATTR4_WORD2_SECURITY_LABEL |
191 | #endif | 192 | #endif |
@@ -3095,16 +3096,13 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, | |||
3095 | struct nfs_fsinfo *info, | 3096 | struct nfs_fsinfo *info, |
3096 | bool auth_probe) | 3097 | bool auth_probe) |
3097 | { | 3098 | { |
3098 | int status; | 3099 | int status = 0; |
3099 | 3100 | ||
3100 | switch (auth_probe) { | 3101 | if (!auth_probe) |
3101 | case false: | ||
3102 | status = nfs4_lookup_root(server, fhandle, info); | 3102 | status = nfs4_lookup_root(server, fhandle, info); |
3103 | if (status != -NFS4ERR_WRONGSEC) | 3103 | |
3104 | break; | 3104 | if (auth_probe || status == NFS4ERR_WRONGSEC) |
3105 | default: | ||
3106 | status = nfs4_do_find_root_sec(server, fhandle, info); | 3105 | status = nfs4_do_find_root_sec(server, fhandle, info); |
3107 | } | ||
3108 | 3106 | ||
3109 | if (status == 0) | 3107 | if (status == 0) |
3110 | status = nfs4_server_capabilities(server, fhandle); | 3108 | status = nfs4_server_capabilities(server, fhandle); |
@@ -7944,6 +7942,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, | |||
7944 | { | 7942 | { |
7945 | struct nfs4_getdeviceinfo_args args = { | 7943 | struct nfs4_getdeviceinfo_args args = { |
7946 | .pdev = pdev, | 7944 | .pdev = pdev, |
7945 | .notify_types = NOTIFY_DEVICEID4_CHANGE | | ||
7946 | NOTIFY_DEVICEID4_DELETE, | ||
7947 | }; | 7947 | }; |
7948 | struct nfs4_getdeviceinfo_res res = { | 7948 | struct nfs4_getdeviceinfo_res res = { |
7949 | .pdev = pdev, | 7949 | .pdev = pdev, |
@@ -7958,6 +7958,11 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, | |||
7958 | 7958 | ||
7959 | dprintk("--> %s\n", __func__); | 7959 | dprintk("--> %s\n", __func__); |
7960 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | 7960 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
7961 | if (res.notification & ~args.notify_types) | ||
7962 | dprintk("%s: unsupported notification\n", __func__); | ||
7963 | if (res.notification != args.notify_types) | ||
7964 | pdev->nocache = 1; | ||
7965 | |||
7961 | dprintk("<-- %s status=%d\n", __func__, status); | 7966 | dprintk("<-- %s status=%d\n", __func__, status); |
7962 | 7967 | ||
7963 | return status; | 7968 | return status; |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3b2b20534a3a..2782cfca2265 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/fs.h> | 43 | #include <linux/fs.h> |
44 | #include <linux/nfs_fs.h> | 44 | #include <linux/nfs_fs.h> |
45 | #include <linux/nfs_idmap.h> | ||
46 | #include <linux/kthread.h> | 45 | #include <linux/kthread.h> |
47 | #include <linux/module.h> | 46 | #include <linux/module.h> |
48 | #include <linux/random.h> | 47 | #include <linux/random.h> |
@@ -57,6 +56,7 @@ | |||
57 | #include "callback.h" | 56 | #include "callback.h" |
58 | #include "delegation.h" | 57 | #include "delegation.h" |
59 | #include "internal.h" | 58 | #include "internal.h" |
59 | #include "nfs4idmap.h" | ||
60 | #include "nfs4session.h" | 60 | #include "nfs4session.h" |
61 | #include "pnfs.h" | 61 | #include "pnfs.h" |
62 | #include "netns.h" | 62 | #include "netns.h" |
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 75090feeafad..6fb7cb6b3f4b 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
@@ -3,12 +3,12 @@ | |||
3 | */ | 3 | */ |
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/nfs_idmap.h> | ||
7 | #include <linux/nfs4_mount.h> | 6 | #include <linux/nfs4_mount.h> |
8 | #include <linux/nfs_fs.h> | 7 | #include <linux/nfs_fs.h> |
9 | #include "delegation.h" | 8 | #include "delegation.h" |
10 | #include "internal.h" | 9 | #include "internal.h" |
11 | #include "nfs4_fs.h" | 10 | #include "nfs4_fs.h" |
11 | #include "nfs4idmap.h" | ||
12 | #include "dns_resolve.h" | 12 | #include "dns_resolve.h" |
13 | #include "pnfs.h" | 13 | #include "pnfs.h" |
14 | #include "nfs.h" | 14 | #include "nfs.h" |
@@ -91,10 +91,11 @@ static void nfs4_evict_inode(struct inode *inode) | |||
91 | { | 91 | { |
92 | truncate_inode_pages_final(&inode->i_data); | 92 | truncate_inode_pages_final(&inode->i_data); |
93 | clear_inode(inode); | 93 | clear_inode(inode); |
94 | pnfs_return_layout(inode); | ||
95 | pnfs_destroy_layout(NFS_I(inode)); | ||
96 | /* If we are holding a delegation, return it! */ | 94 | /* If we are holding a delegation, return it! */ |
97 | nfs_inode_return_delegation_noreclaim(inode); | 95 | nfs_inode_return_delegation_noreclaim(inode); |
96 | /* Note that above delegreturn would trigger pnfs return-on-close */ | ||
97 | pnfs_return_layout(inode); | ||
98 | pnfs_destroy_layout(NFS_I(inode)); | ||
98 | /* First call standard NFS clear_inode() code */ | 99 | /* First call standard NFS clear_inode() code */ |
99 | nfs_clear_inode(inode); | 100 | nfs_clear_inode(inode); |
100 | } | 101 | } |
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index b6ebe7e445f6..0fbd3ab1be22 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c | |||
@@ -6,10 +6,10 @@ | |||
6 | * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com> | 6 | * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com> |
7 | */ | 7 | */ |
8 | #include <linux/sysctl.h> | 8 | #include <linux/sysctl.h> |
9 | #include <linux/nfs_idmap.h> | ||
10 | #include <linux/nfs_fs.h> | 9 | #include <linux/nfs_fs.h> |
11 | 10 | ||
12 | #include "nfs4_fs.h" | 11 | #include "nfs4_fs.h" |
12 | #include "nfs4idmap.h" | ||
13 | #include "callback.h" | 13 | #include "callback.h" |
14 | 14 | ||
15 | static const int nfs_set_port_min = 0; | 15 | static const int nfs_set_port_min = 0; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5c399ec41079..0aea97841d30 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -52,10 +52,10 @@ | |||
52 | #include <linux/nfs.h> | 52 | #include <linux/nfs.h> |
53 | #include <linux/nfs4.h> | 53 | #include <linux/nfs4.h> |
54 | #include <linux/nfs_fs.h> | 54 | #include <linux/nfs_fs.h> |
55 | #include <linux/nfs_idmap.h> | ||
56 | 55 | ||
57 | #include "nfs4_fs.h" | 56 | #include "nfs4_fs.h" |
58 | #include "internal.h" | 57 | #include "internal.h" |
58 | #include "nfs4idmap.h" | ||
59 | #include "nfs4session.h" | 59 | #include "nfs4session.h" |
60 | #include "pnfs.h" | 60 | #include "pnfs.h" |
61 | #include "netns.h" | 61 | #include "netns.h" |
@@ -1920,7 +1920,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr, | |||
1920 | 1920 | ||
1921 | p = reserve_space(xdr, 4 + 4); | 1921 | p = reserve_space(xdr, 4 + 4); |
1922 | *p++ = cpu_to_be32(1); /* bitmap length */ | 1922 | *p++ = cpu_to_be32(1); /* bitmap length */ |
1923 | *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE); | 1923 | *p++ = cpu_to_be32(args->notify_types); |
1924 | } | 1924 | } |
1925 | 1925 | ||
1926 | static void | 1926 | static void |
@@ -5753,8 +5753,9 @@ out_overflow: | |||
5753 | 5753 | ||
5754 | #if defined(CONFIG_NFS_V4_1) | 5754 | #if defined(CONFIG_NFS_V4_1) |
5755 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | 5755 | static int decode_getdeviceinfo(struct xdr_stream *xdr, |
5756 | struct pnfs_device *pdev) | 5756 | struct nfs4_getdeviceinfo_res *res) |
5757 | { | 5757 | { |
5758 | struct pnfs_device *pdev = res->pdev; | ||
5758 | __be32 *p; | 5759 | __be32 *p; |
5759 | uint32_t len, type; | 5760 | uint32_t len, type; |
5760 | int status; | 5761 | int status; |
@@ -5802,12 +5803,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, | |||
5802 | if (unlikely(!p)) | 5803 | if (unlikely(!p)) |
5803 | goto out_overflow; | 5804 | goto out_overflow; |
5804 | 5805 | ||
5805 | if (be32_to_cpup(p++) & | 5806 | res->notification = be32_to_cpup(p++); |
5806 | ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) { | ||
5807 | dprintk("%s: unsupported notification\n", | ||
5808 | __func__); | ||
5809 | } | ||
5810 | |||
5811 | for (i = 1; i < len; i++) { | 5807 | for (i = 1; i < len; i++) { |
5812 | if (be32_to_cpup(p++)) { | 5808 | if (be32_to_cpup(p++)) { |
5813 | dprintk("%s: unsupported notification\n", | 5809 | dprintk("%s: unsupported notification\n", |
@@ -7061,7 +7057,7 @@ static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, | |||
7061 | status = decode_sequence(xdr, &res->seq_res, rqstp); | 7057 | status = decode_sequence(xdr, &res->seq_res, rqstp); |
7062 | if (status != 0) | 7058 | if (status != 0) |
7063 | goto out; | 7059 | goto out; |
7064 | status = decode_getdeviceinfo(xdr, res->pdev); | 7060 | status = decode_getdeviceinfo(xdr, res); |
7065 | out: | 7061 | out: |
7066 | return status; | 7062 | return status; |
7067 | } | 7063 | } |
@@ -7365,6 +7361,11 @@ nfs4_stat_to_errno(int stat) | |||
7365 | .p_name = #proc, \ | 7361 | .p_name = #proc, \ |
7366 | } | 7362 | } |
7367 | 7363 | ||
7364 | #define STUB(proc) \ | ||
7365 | [NFSPROC4_CLNT_##proc] = { \ | ||
7366 | .p_name = #proc, \ | ||
7367 | } | ||
7368 | |||
7368 | struct rpc_procinfo nfs4_procedures[] = { | 7369 | struct rpc_procinfo nfs4_procedures[] = { |
7369 | PROC(READ, enc_read, dec_read), | 7370 | PROC(READ, enc_read, dec_read), |
7370 | PROC(WRITE, enc_write, dec_write), | 7371 | PROC(WRITE, enc_write, dec_write), |
@@ -7417,6 +7418,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
7417 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), | 7418 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), |
7418 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), | 7419 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), |
7419 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), | 7420 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), |
7421 | STUB(GETDEVICELIST), | ||
7420 | PROC(BIND_CONN_TO_SESSION, | 7422 | PROC(BIND_CONN_TO_SESSION, |
7421 | enc_bind_conn_to_session, dec_bind_conn_to_session), | 7423 | enc_bind_conn_to_session, dec_bind_conn_to_session), |
7422 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), | 7424 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), |
diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c index 4eb0aead69b6..c74f7af23d77 100644 --- a/fs/nfs/nfstrace.c +++ b/fs/nfs/nfstrace.c | |||
@@ -7,3 +7,6 @@ | |||
7 | 7 | ||
8 | #define CREATE_TRACE_POINTS | 8 | #define CREATE_TRACE_POINTS |
9 | #include "nfstrace.h" | 9 | #include "nfstrace.h" |
10 | |||
11 | EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter); | ||
12 | EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit); | ||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 24e1d7403c0b..5aaed363556a 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -57,7 +57,7 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
57 | 57 | ||
58 | dprintk("%s: free od=%p\n", __func__, de->od.od); | 58 | dprintk("%s: free od=%p\n", __func__, de->od.od); |
59 | osduld_put_device(de->od.od); | 59 | osduld_put_device(de->od.od); |
60 | kfree(de); | 60 | kfree_rcu(d, rcu); |
61 | } | 61 | } |
62 | 62 | ||
63 | struct objio_segment { | 63 | struct objio_segment { |
@@ -637,6 +637,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { | |||
637 | .pg_read_ops = &objio_pg_read_ops, | 637 | .pg_read_ops = &objio_pg_read_ops, |
638 | .pg_write_ops = &objio_pg_write_ops, | 638 | .pg_write_ops = &objio_pg_write_ops, |
639 | 639 | ||
640 | .sync = pnfs_generic_sync, | ||
641 | |||
640 | .free_deviceid_node = objio_free_deviceid_node, | 642 | .free_deviceid_node = objio_free_deviceid_node, |
641 | 643 | ||
642 | .encode_layoutcommit = objlayout_encode_layoutcommit, | 644 | .encode_layoutcommit = objlayout_encode_layoutcommit, |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4f802b02fbb9..230606243be6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -1090,6 +1090,7 @@ bool pnfs_roc(struct inode *ino) | |||
1090 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ | 1090 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ |
1091 | spin_unlock(&ino->i_lock); | 1091 | spin_unlock(&ino->i_lock); |
1092 | pnfs_free_lseg_list(&tmp_list); | 1092 | pnfs_free_lseg_list(&tmp_list); |
1093 | pnfs_layoutcommit_inode(ino, true); | ||
1093 | return true; | 1094 | return true; |
1094 | 1095 | ||
1095 | out_noroc: | 1096 | out_noroc: |
@@ -1104,8 +1105,10 @@ out_noroc: | |||
1104 | } | 1105 | } |
1105 | } | 1106 | } |
1106 | spin_unlock(&ino->i_lock); | 1107 | spin_unlock(&ino->i_lock); |
1107 | if (layoutreturn) | 1108 | if (layoutreturn) { |
1109 | pnfs_layoutcommit_inode(ino, true); | ||
1108 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | 1110 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); |
1111 | } | ||
1109 | return false; | 1112 | return false; |
1110 | } | 1113 | } |
1111 | 1114 | ||
@@ -1841,7 +1844,8 @@ void pnfs_ld_write_done(struct nfs_pgio_header *hdr) | |||
1841 | { | 1844 | { |
1842 | trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); | 1845 | trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); |
1843 | if (!hdr->pnfs_error) { | 1846 | if (!hdr->pnfs_error) { |
1844 | pnfs_set_layoutcommit(hdr); | 1847 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, |
1848 | hdr->mds_offset + hdr->res.count); | ||
1845 | hdr->mds_ops->rpc_call_done(&hdr->task, hdr); | 1849 | hdr->mds_ops->rpc_call_done(&hdr->task, hdr); |
1846 | } else | 1850 | } else |
1847 | pnfs_ld_handle_write_error(hdr); | 1851 | pnfs_ld_handle_write_error(hdr); |
@@ -1902,7 +1906,6 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | |||
1902 | pnfs_put_lseg(hdr->lseg); | 1906 | pnfs_put_lseg(hdr->lseg); |
1903 | nfs_pgio_header_free(hdr); | 1907 | nfs_pgio_header_free(hdr); |
1904 | } | 1908 | } |
1905 | EXPORT_SYMBOL_GPL(pnfs_writehdr_free); | ||
1906 | 1909 | ||
1907 | int | 1910 | int |
1908 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 1911 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) |
@@ -2032,7 +2035,6 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | |||
2032 | pnfs_put_lseg(hdr->lseg); | 2035 | pnfs_put_lseg(hdr->lseg); |
2033 | nfs_pgio_header_free(hdr); | 2036 | nfs_pgio_header_free(hdr); |
2034 | } | 2037 | } |
2035 | EXPORT_SYMBOL_GPL(pnfs_readhdr_free); | ||
2036 | 2038 | ||
2037 | int | 2039 | int |
2038 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 2040 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) |
@@ -2099,64 +2101,34 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | |||
2099 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | 2101 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); |
2100 | 2102 | ||
2101 | void | 2103 | void |
2102 | pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) | 2104 | pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, |
2105 | loff_t end_pos) | ||
2103 | { | 2106 | { |
2104 | struct inode *inode = hdr->inode; | ||
2105 | struct nfs_inode *nfsi = NFS_I(inode); | 2107 | struct nfs_inode *nfsi = NFS_I(inode); |
2106 | loff_t end_pos = hdr->mds_offset + hdr->res.count; | ||
2107 | bool mark_as_dirty = false; | 2108 | bool mark_as_dirty = false; |
2108 | 2109 | ||
2109 | spin_lock(&inode->i_lock); | 2110 | spin_lock(&inode->i_lock); |
2110 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 2111 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
2111 | mark_as_dirty = true; | ||
2112 | dprintk("%s: Set layoutcommit for inode %lu ", | ||
2113 | __func__, inode->i_ino); | ||
2114 | } | ||
2115 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { | ||
2116 | /* references matched in nfs4_layoutcommit_release */ | ||
2117 | pnfs_get_lseg(hdr->lseg); | ||
2118 | } | ||
2119 | if (end_pos > nfsi->layout->plh_lwb) | ||
2120 | nfsi->layout->plh_lwb = end_pos; | 2112 | nfsi->layout->plh_lwb = end_pos; |
2121 | spin_unlock(&inode->i_lock); | ||
2122 | dprintk("%s: lseg %p end_pos %llu\n", | ||
2123 | __func__, hdr->lseg, nfsi->layout->plh_lwb); | ||
2124 | |||
2125 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | ||
2126 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | ||
2127 | if (mark_as_dirty) | ||
2128 | mark_inode_dirty_sync(inode); | ||
2129 | } | ||
2130 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | ||
2131 | |||
2132 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) | ||
2133 | { | ||
2134 | struct inode *inode = data->inode; | ||
2135 | struct nfs_inode *nfsi = NFS_I(inode); | ||
2136 | bool mark_as_dirty = false; | ||
2137 | |||
2138 | spin_lock(&inode->i_lock); | ||
2139 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
2140 | mark_as_dirty = true; | 2113 | mark_as_dirty = true; |
2141 | dprintk("%s: Set layoutcommit for inode %lu ", | 2114 | dprintk("%s: Set layoutcommit for inode %lu ", |
2142 | __func__, inode->i_ino); | 2115 | __func__, inode->i_ino); |
2143 | } | 2116 | } else if (end_pos > nfsi->layout->plh_lwb) |
2144 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { | 2117 | nfsi->layout->plh_lwb = end_pos; |
2118 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { | ||
2145 | /* references matched in nfs4_layoutcommit_release */ | 2119 | /* references matched in nfs4_layoutcommit_release */ |
2146 | pnfs_get_lseg(data->lseg); | 2120 | pnfs_get_lseg(lseg); |
2147 | } | 2121 | } |
2148 | if (data->lwb > nfsi->layout->plh_lwb) | ||
2149 | nfsi->layout->plh_lwb = data->lwb; | ||
2150 | spin_unlock(&inode->i_lock); | 2122 | spin_unlock(&inode->i_lock); |
2151 | dprintk("%s: lseg %p end_pos %llu\n", | 2123 | dprintk("%s: lseg %p end_pos %llu\n", |
2152 | __func__, data->lseg, nfsi->layout->plh_lwb); | 2124 | __func__, lseg, nfsi->layout->plh_lwb); |
2153 | 2125 | ||
2154 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | 2126 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one |
2155 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | 2127 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ |
2156 | if (mark_as_dirty) | 2128 | if (mark_as_dirty) |
2157 | mark_inode_dirty_sync(inode); | 2129 | mark_inode_dirty_sync(inode); |
2158 | } | 2130 | } |
2159 | EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); | 2131 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); |
2160 | 2132 | ||
2161 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | 2133 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) |
2162 | { | 2134 | { |
@@ -2216,7 +2188,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
2216 | pnfs_list_write_lseg(inode, &data->lseg_list); | 2188 | pnfs_list_write_lseg(inode, &data->lseg_list); |
2217 | 2189 | ||
2218 | end_pos = nfsi->layout->plh_lwb; | 2190 | end_pos = nfsi->layout->plh_lwb; |
2219 | nfsi->layout->plh_lwb = 0; | ||
2220 | 2191 | ||
2221 | nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); | 2192 | nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); |
2222 | spin_unlock(&inode->i_lock); | 2193 | spin_unlock(&inode->i_lock); |
@@ -2233,11 +2204,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
2233 | status = ld->prepare_layoutcommit(&data->args); | 2204 | status = ld->prepare_layoutcommit(&data->args); |
2234 | if (status) { | 2205 | if (status) { |
2235 | spin_lock(&inode->i_lock); | 2206 | spin_lock(&inode->i_lock); |
2236 | if (end_pos < nfsi->layout->plh_lwb) | 2207 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); |
2208 | if (end_pos > nfsi->layout->plh_lwb) | ||
2237 | nfsi->layout->plh_lwb = end_pos; | 2209 | nfsi->layout->plh_lwb = end_pos; |
2238 | spin_unlock(&inode->i_lock); | 2210 | spin_unlock(&inode->i_lock); |
2239 | put_rpccred(data->cred); | 2211 | put_rpccred(data->cred); |
2240 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); | ||
2241 | goto clear_layoutcommitting; | 2212 | goto clear_layoutcommitting; |
2242 | } | 2213 | } |
2243 | } | 2214 | } |
@@ -2258,6 +2229,13 @@ clear_layoutcommitting: | |||
2258 | } | 2229 | } |
2259 | EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); | 2230 | EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); |
2260 | 2231 | ||
2232 | int | ||
2233 | pnfs_generic_sync(struct inode *inode, bool datasync) | ||
2234 | { | ||
2235 | return pnfs_layoutcommit_inode(inode, true); | ||
2236 | } | ||
2237 | EXPORT_SYMBOL_GPL(pnfs_generic_sync); | ||
2238 | |||
2261 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | 2239 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) |
2262 | { | 2240 | { |
2263 | struct nfs4_threshold *thp; | 2241 | struct nfs4_threshold *thp; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 084c9144f86d..1e6308f82fc3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -155,6 +155,8 @@ struct pnfs_layoutdriver_type { | |||
155 | int how, | 155 | int how, |
156 | struct nfs_commit_info *cinfo); | 156 | struct nfs_commit_info *cinfo); |
157 | 157 | ||
158 | int (*sync)(struct inode *inode, bool datasync); | ||
159 | |||
158 | /* | 160 | /* |
159 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted | 161 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted |
160 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS | 162 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS |
@@ -203,6 +205,7 @@ struct pnfs_device { | |||
203 | struct page **pages; | 205 | struct page **pages; |
204 | unsigned int pgbase; | 206 | unsigned int pgbase; |
205 | unsigned int pglen; /* reply buffer length */ | 207 | unsigned int pglen; /* reply buffer length */ |
208 | unsigned char nocache : 1;/* May not be cached */ | ||
206 | }; | 209 | }; |
207 | 210 | ||
208 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 | 211 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 |
@@ -263,10 +266,11 @@ bool pnfs_roc(struct inode *ino); | |||
263 | void pnfs_roc_release(struct inode *ino); | 266 | void pnfs_roc_release(struct inode *ino); |
264 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 267 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
265 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); | 268 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); |
266 | void pnfs_set_layoutcommit(struct nfs_pgio_header *); | 269 | void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); |
267 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); | ||
268 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 270 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
269 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 271 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
272 | int pnfs_generic_sync(struct inode *inode, bool datasync); | ||
273 | int pnfs_nfs_generic_sync(struct inode *inode, bool datasync); | ||
270 | int _pnfs_return_layout(struct inode *); | 274 | int _pnfs_return_layout(struct inode *); |
271 | int pnfs_commit_and_return_layout(struct inode *); | 275 | int pnfs_commit_and_return_layout(struct inode *); |
272 | void pnfs_ld_write_done(struct nfs_pgio_header *); | 276 | void pnfs_ld_write_done(struct nfs_pgio_header *); |
@@ -291,6 +295,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, | |||
291 | enum { | 295 | enum { |
292 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ | 296 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ |
293 | NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ | 297 | NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ |
298 | NFS_DEVICEID_NOCACHE, /* device may not be cached */ | ||
294 | }; | 299 | }; |
295 | 300 | ||
296 | /* pnfs_dev.c */ | 301 | /* pnfs_dev.c */ |
@@ -302,6 +307,7 @@ struct nfs4_deviceid_node { | |||
302 | unsigned long flags; | 307 | unsigned long flags; |
303 | unsigned long timestamp_unavailable; | 308 | unsigned long timestamp_unavailable; |
304 | struct nfs4_deviceid deviceid; | 309 | struct nfs4_deviceid deviceid; |
310 | struct rcu_head rcu; | ||
305 | atomic_t ref; | 311 | atomic_t ref; |
306 | }; | 312 | }; |
307 | 313 | ||
@@ -486,6 +492,14 @@ pnfs_ld_read_whole_page(struct inode *inode) | |||
486 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; | 492 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; |
487 | } | 493 | } |
488 | 494 | ||
495 | static inline int | ||
496 | pnfs_sync_inode(struct inode *inode, bool datasync) | ||
497 | { | ||
498 | if (!pnfs_enabled_sb(NFS_SERVER(inode))) | ||
499 | return 0; | ||
500 | return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync); | ||
501 | } | ||
502 | |||
489 | static inline bool | 503 | static inline bool |
490 | pnfs_layoutcommit_outstanding(struct inode *inode) | 504 | pnfs_layoutcommit_outstanding(struct inode *inode) |
491 | { | 505 | { |
@@ -568,6 +582,12 @@ pnfs_ld_read_whole_page(struct inode *inode) | |||
568 | return false; | 582 | return false; |
569 | } | 583 | } |
570 | 584 | ||
585 | static inline int | ||
586 | pnfs_sync_inode(struct inode *inode, bool datasync) | ||
587 | { | ||
588 | return 0; | ||
589 | } | ||
590 | |||
571 | static inline bool | 591 | static inline bool |
572 | pnfs_roc(struct inode *ino) | 592 | pnfs_roc(struct inode *ino) |
573 | { | 593 | { |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index aa2ec0015183..2961fcd7a2df 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -149,6 +149,8 @@ nfs4_get_device_info(struct nfs_server *server, | |||
149 | */ | 149 | */ |
150 | d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, | 150 | d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, |
151 | gfp_flags); | 151 | gfp_flags); |
152 | if (d && pdev->nocache) | ||
153 | set_bit(NFS_DEVICEID_NOCACHE, &d->flags); | ||
152 | 154 | ||
153 | out_free_pages: | 155 | out_free_pages: |
154 | for (i = 0; i < max_pages; i++) | 156 | for (i = 0; i < max_pages; i++) |
@@ -175,8 +177,8 @@ __nfs4_find_get_deviceid(struct nfs_server *server, | |||
175 | rcu_read_lock(); | 177 | rcu_read_lock(); |
176 | d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, | 178 | d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, |
177 | hash); | 179 | hash); |
178 | if (d != NULL) | 180 | if (d != NULL && !atomic_inc_not_zero(&d->ref)) |
179 | atomic_inc(&d->ref); | 181 | d = NULL; |
180 | rcu_read_unlock(); | 182 | rcu_read_unlock(); |
181 | return d; | 183 | return d; |
182 | } | 184 | } |
@@ -235,12 +237,11 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
235 | return; | 237 | return; |
236 | } | 238 | } |
237 | hlist_del_init_rcu(&d->node); | 239 | hlist_del_init_rcu(&d->node); |
240 | clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); | ||
238 | spin_unlock(&nfs4_deviceid_lock); | 241 | spin_unlock(&nfs4_deviceid_lock); |
239 | synchronize_rcu(); | ||
240 | 242 | ||
241 | /* balance the initial ref set in pnfs_insert_deviceid */ | 243 | /* balance the initial ref set in pnfs_insert_deviceid */ |
242 | if (atomic_dec_and_test(&d->ref)) | 244 | nfs4_put_deviceid_node(d); |
243 | d->ld->free_deviceid_node(d); | ||
244 | } | 245 | } |
245 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | 246 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); |
246 | 247 | ||
@@ -271,6 +272,11 @@ EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); | |||
271 | bool | 272 | bool |
272 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) | 273 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) |
273 | { | 274 | { |
275 | if (test_bit(NFS_DEVICEID_NOCACHE, &d->flags)) { | ||
276 | if (atomic_add_unless(&d->ref, -1, 2)) | ||
277 | return false; | ||
278 | nfs4_delete_deviceid(d->ld, d->nfs_client, &d->deviceid); | ||
279 | } | ||
274 | if (!atomic_dec_and_test(&d->ref)) | 280 | if (!atomic_dec_and_test(&d->ref)) |
275 | return false; | 281 | return false; |
276 | d->ld->free_deviceid_node(d); | 282 | d->ld->free_deviceid_node(d); |
@@ -314,6 +320,7 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash) | |||
314 | if (d->nfs_client == clp && atomic_read(&d->ref)) { | 320 | if (d->nfs_client == clp && atomic_read(&d->ref)) { |
315 | hlist_del_init_rcu(&d->node); | 321 | hlist_del_init_rcu(&d->node); |
316 | hlist_add_head(&d->tmpnode, &tmp); | 322 | hlist_add_head(&d->tmpnode, &tmp); |
323 | clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); | ||
317 | } | 324 | } |
318 | rcu_read_unlock(); | 325 | rcu_read_unlock(); |
319 | spin_unlock(&nfs4_deviceid_lock); | 326 | spin_unlock(&nfs4_deviceid_lock); |
@@ -321,12 +328,10 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash) | |||
321 | if (hlist_empty(&tmp)) | 328 | if (hlist_empty(&tmp)) |
322 | return; | 329 | return; |
323 | 330 | ||
324 | synchronize_rcu(); | ||
325 | while (!hlist_empty(&tmp)) { | 331 | while (!hlist_empty(&tmp)) { |
326 | d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); | 332 | d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); |
327 | hlist_del(&d->tmpnode); | 333 | hlist_del(&d->tmpnode); |
328 | if (atomic_dec_and_test(&d->ref)) | 334 | nfs4_put_deviceid_node(d); |
329 | d->ld->free_deviceid_node(d); | ||
330 | } | 335 | } |
331 | } | 336 | } |
332 | 337 | ||
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 54e36b38fb5f..f37e25b6311c 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c | |||
@@ -561,7 +561,7 @@ static bool load_v3_ds_connect(void) | |||
561 | return(get_v3_ds_connect != NULL); | 561 | return(get_v3_ds_connect != NULL); |
562 | } | 562 | } |
563 | 563 | ||
564 | void __exit nfs4_pnfs_v3_ds_connect_unload(void) | 564 | void nfs4_pnfs_v3_ds_connect_unload(void) |
565 | { | 565 | { |
566 | if (get_v3_ds_connect) { | 566 | if (get_v3_ds_connect) { |
567 | symbol_put(nfs3_set_ds_client); | 567 | symbol_put(nfs3_set_ds_client); |
@@ -868,3 +868,13 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, | |||
868 | nfs_request_add_commit_list(req, list, cinfo); | 868 | nfs_request_add_commit_list(req, list, cinfo); |
869 | } | 869 | } |
870 | EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); | 870 | EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); |
871 | |||
872 | int | ||
873 | pnfs_nfs_generic_sync(struct inode *inode, bool datasync) | ||
874 | { | ||
875 | if (datasync) | ||
876 | return 0; | ||
877 | return pnfs_layoutcommit_inode(inode, true); | ||
878 | } | ||
879 | EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync); | ||
880 | |||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a5b7427c3754..ae0ff7a11b40 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -284,7 +284,7 @@ int nfs_readpage(struct file *file, struct page *page) | |||
284 | dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", | 284 | dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", |
285 | page, PAGE_CACHE_SIZE, page_file_index(page)); | 285 | page, PAGE_CACHE_SIZE, page_file_index(page)); |
286 | nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); | 286 | nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); |
287 | nfs_inc_stats(inode, NFSIOS_READPAGES); | 287 | nfs_add_stats(inode, NFSIOS_READPAGES, 1); |
288 | 288 | ||
289 | /* | 289 | /* |
290 | * Try to flush any pending writes to the file.. | 290 | * Try to flush any pending writes to the file.. |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 21f8f52bf37d..f175b833b6ba 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
44 | #include <linux/mount.h> | 44 | #include <linux/mount.h> |
45 | #include <linux/namei.h> | 45 | #include <linux/namei.h> |
46 | #include <linux/nfs_idmap.h> | ||
47 | #include <linux/vfs.h> | 46 | #include <linux/vfs.h> |
48 | #include <linux/inet.h> | 47 | #include <linux/inet.h> |
49 | #include <linux/in6.h> | 48 | #include <linux/in6.h> |
@@ -2193,7 +2192,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, | |||
2193 | data->version != nfss->nfs_client->rpc_ops->version || | 2192 | data->version != nfss->nfs_client->rpc_ops->version || |
2194 | data->minorversion != nfss->nfs_client->cl_minorversion || | 2193 | data->minorversion != nfss->nfs_client->cl_minorversion || |
2195 | data->retrans != nfss->client->cl_timeout->to_retries || | 2194 | data->retrans != nfss->client->cl_timeout->to_retries || |
2196 | data->selected_flavor != nfss->client->cl_auth->au_flavor || | 2195 | !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) || |
2197 | data->acregmin != nfss->acregmin / HZ || | 2196 | data->acregmin != nfss->acregmin / HZ || |
2198 | data->acregmax != nfss->acregmax / HZ || | 2197 | data->acregmax != nfss->acregmax / HZ || |
2199 | data->acdirmin != nfss->acdirmin / HZ || | 2198 | data->acdirmin != nfss->acdirmin / HZ || |
@@ -2241,7 +2240,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) | |||
2241 | data->wsize = nfss->wsize; | 2240 | data->wsize = nfss->wsize; |
2242 | data->retrans = nfss->client->cl_timeout->to_retries; | 2241 | data->retrans = nfss->client->cl_timeout->to_retries; |
2243 | data->selected_flavor = nfss->client->cl_auth->au_flavor; | 2242 | data->selected_flavor = nfss->client->cl_auth->au_flavor; |
2244 | data->auth_info = nfss->auth_info; | ||
2245 | data->acregmin = nfss->acregmin / HZ; | 2243 | data->acregmin = nfss->acregmin / HZ; |
2246 | data->acregmax = nfss->acregmax / HZ; | 2244 | data->acregmax = nfss->acregmax / HZ; |
2247 | data->acdirmin = nfss->acdirmin / HZ; | 2245 | data->acdirmin = nfss->acdirmin / HZ; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3612b4622337..d12a4be613a5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -580,7 +580,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st | |||
580 | int ret; | 580 | int ret; |
581 | 581 | ||
582 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); | 582 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); |
583 | nfs_inc_stats(inode, NFSIOS_WRITEPAGES); | 583 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); |
584 | 584 | ||
585 | nfs_pageio_cond_complete(pgio, page_file_index(page)); | 585 | nfs_pageio_cond_complete(pgio, page_file_index(page)); |
586 | ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); | 586 | ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); |
@@ -1840,17 +1840,16 @@ EXPORT_SYMBOL_GPL(nfs_write_inode); | |||
1840 | */ | 1840 | */ |
1841 | int nfs_wb_all(struct inode *inode) | 1841 | int nfs_wb_all(struct inode *inode) |
1842 | { | 1842 | { |
1843 | struct writeback_control wbc = { | ||
1844 | .sync_mode = WB_SYNC_ALL, | ||
1845 | .nr_to_write = LONG_MAX, | ||
1846 | .range_start = 0, | ||
1847 | .range_end = LLONG_MAX, | ||
1848 | }; | ||
1849 | int ret; | 1843 | int ret; |
1850 | 1844 | ||
1851 | trace_nfs_writeback_inode_enter(inode); | 1845 | trace_nfs_writeback_inode_enter(inode); |
1852 | 1846 | ||
1853 | ret = sync_inode(inode, &wbc); | 1847 | ret = filemap_write_and_wait(inode->i_mapping); |
1848 | if (!ret) { | ||
1849 | ret = nfs_commit_inode(inode, FLUSH_SYNC); | ||
1850 | if (!ret) | ||
1851 | pnfs_sync_inode(inode, true); | ||
1852 | } | ||
1854 | 1853 | ||
1855 | trace_nfs_writeback_inode_exit(inode, ret); | 1854 | trace_nfs_writeback_inode_exit(inode, ret); |
1856 | return ret; | 1855 | return ret; |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 410abd172feb..b95f914ce083 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -511,6 +511,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned | |||
511 | * Try to write back everything synchronously (but check the | 511 | * Try to write back everything synchronously (but check the |
512 | * return value!) | 512 | * return value!) |
513 | */ | 513 | */ |
514 | extern int nfs_sync_inode(struct inode *inode); | ||
514 | extern int nfs_wb_all(struct inode *inode); | 515 | extern int nfs_wb_all(struct inode *inode); |
515 | extern int nfs_wb_page(struct inode *inode, struct page* page); | 516 | extern int nfs_wb_page(struct inode *inode, struct page* page); |
516 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); | 517 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4cb3eaa89cf7..93ab6071bbe9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -255,11 +255,13 @@ struct nfs4_layoutget { | |||
255 | struct nfs4_getdeviceinfo_args { | 255 | struct nfs4_getdeviceinfo_args { |
256 | struct nfs4_sequence_args seq_args; | 256 | struct nfs4_sequence_args seq_args; |
257 | struct pnfs_device *pdev; | 257 | struct pnfs_device *pdev; |
258 | __u32 notify_types; | ||
258 | }; | 259 | }; |
259 | 260 | ||
260 | struct nfs4_getdeviceinfo_res { | 261 | struct nfs4_getdeviceinfo_res { |
261 | struct nfs4_sequence_res seq_res; | 262 | struct nfs4_sequence_res seq_res; |
262 | struct pnfs_device *pdev; | 263 | struct pnfs_device *pdev; |
264 | __u32 notification; | ||
263 | }; | 265 | }; |
264 | 266 | ||
265 | struct nfs4_layoutcommit_args { | 267 | struct nfs4_layoutcommit_args { |
@@ -1271,11 +1273,15 @@ struct nfs42_falloc_args { | |||
1271 | nfs4_stateid falloc_stateid; | 1273 | nfs4_stateid falloc_stateid; |
1272 | u64 falloc_offset; | 1274 | u64 falloc_offset; |
1273 | u64 falloc_length; | 1275 | u64 falloc_length; |
1276 | const u32 *falloc_bitmask; | ||
1274 | }; | 1277 | }; |
1275 | 1278 | ||
1276 | struct nfs42_falloc_res { | 1279 | struct nfs42_falloc_res { |
1277 | struct nfs4_sequence_res seq_res; | 1280 | struct nfs4_sequence_res seq_res; |
1278 | unsigned int status; | 1281 | unsigned int status; |
1282 | |||
1283 | struct nfs_fattr *falloc_fattr; | ||
1284 | const struct nfs_server *falloc_server; | ||
1279 | }; | 1285 | }; |
1280 | 1286 | ||
1281 | struct nfs42_seek_args { | 1287 | struct nfs42_seek_args { |
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index aadc6a04e1ac..807371357160 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h | |||
@@ -142,12 +142,18 @@ typedef __be32 rpc_fraghdr; | |||
142 | (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) | 142 | (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) |
143 | 143 | ||
144 | /* | 144 | /* |
145 | * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. | 145 | * Well-known netids. See: |
146 | * | ||
147 | * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml | ||
146 | */ | 148 | */ |
147 | #define RPCBIND_NETID_UDP "udp" | 149 | #define RPCBIND_NETID_UDP "udp" |
148 | #define RPCBIND_NETID_TCP "tcp" | 150 | #define RPCBIND_NETID_TCP "tcp" |
151 | #define RPCBIND_NETID_RDMA "rdma" | ||
152 | #define RPCBIND_NETID_SCTP "sctp" | ||
149 | #define RPCBIND_NETID_UDP6 "udp6" | 153 | #define RPCBIND_NETID_UDP6 "udp6" |
150 | #define RPCBIND_NETID_TCP6 "tcp6" | 154 | #define RPCBIND_NETID_TCP6 "tcp6" |
155 | #define RPCBIND_NETID_RDMA6 "rdma6" | ||
156 | #define RPCBIND_NETID_SCTP6 "sctp6" | ||
151 | #define RPCBIND_NETID_LOCAL "local" | 157 | #define RPCBIND_NETID_LOCAL "local" |
152 | 158 | ||
153 | /* | 159 | /* |
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 64a0a0a97b23..c984c85981ea 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h | |||
@@ -41,11 +41,6 @@ | |||
41 | #define _LINUX_SUNRPC_XPRTRDMA_H | 41 | #define _LINUX_SUNRPC_XPRTRDMA_H |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * rpcbind (v3+) RDMA netid. | ||
45 | */ | ||
46 | #define RPCBIND_NETID_RDMA "rdma" | ||
47 | |||
48 | /* | ||
49 | * Constants. Max RPC/NFS header is big enough to account for | 44 | * Constants. Max RPC/NFS header is big enough to account for |
50 | * additional marshaling buffers passed down by Linux client. | 45 | * additional marshaling buffers passed down by Linux client. |
51 | * | 46 | * |
diff --git a/include/uapi/linux/nfs_idmap.h b/include/uapi/linux/nfs_idmap.h index 8d4b1c7b24d4..038e36c96669 100644 --- a/include/uapi/linux/nfs_idmap.h +++ b/include/uapi/linux/nfs_idmap.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * include/linux/nfs_idmap.h | 2 | * include/uapi/linux/nfs_idmap.h |
3 | * | 3 | * |
4 | * UID and GID to name mapping for clients. | 4 | * UID and GID to name mapping for clients. |
5 | * | 5 | * |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b91fd9c597b4..337ca851a350 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -89,8 +89,8 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) | |||
89 | if (!task->tk_timeout) | 89 | if (!task->tk_timeout) |
90 | return; | 90 | return; |
91 | 91 | ||
92 | dprintk("RPC: %5u setting alarm for %lu ms\n", | 92 | dprintk("RPC: %5u setting alarm for %u ms\n", |
93 | task->tk_pid, task->tk_timeout * 1000 / HZ); | 93 | task->tk_pid, jiffies_to_msecs(task->tk_timeout)); |
94 | 94 | ||
95 | task->u.tk_wait.expires = jiffies + task->tk_timeout; | 95 | task->u.tk_wait.expires = jiffies + task->tk_timeout; |
96 | if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) | 96 | if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9949722d99ce..1d4fe24af06a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -326,6 +326,15 @@ out_unlock: | |||
326 | xprt_clear_locked(xprt); | 326 | xprt_clear_locked(xprt); |
327 | } | 327 | } |
328 | 328 | ||
329 | static void xprt_task_clear_bytes_sent(struct rpc_task *task) | ||
330 | { | ||
331 | if (task != NULL) { | ||
332 | struct rpc_rqst *req = task->tk_rqstp; | ||
333 | if (req != NULL) | ||
334 | req->rq_bytes_sent = 0; | ||
335 | } | ||
336 | } | ||
337 | |||
329 | /** | 338 | /** |
330 | * xprt_release_xprt - allow other requests to use a transport | 339 | * xprt_release_xprt - allow other requests to use a transport |
331 | * @xprt: transport with other tasks potentially waiting | 340 | * @xprt: transport with other tasks potentially waiting |
@@ -336,11 +345,7 @@ out_unlock: | |||
336 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) | 345 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) |
337 | { | 346 | { |
338 | if (xprt->snd_task == task) { | 347 | if (xprt->snd_task == task) { |
339 | if (task != NULL) { | 348 | xprt_task_clear_bytes_sent(task); |
340 | struct rpc_rqst *req = task->tk_rqstp; | ||
341 | if (req != NULL) | ||
342 | req->rq_bytes_sent = 0; | ||
343 | } | ||
344 | xprt_clear_locked(xprt); | 349 | xprt_clear_locked(xprt); |
345 | __xprt_lock_write_next(xprt); | 350 | __xprt_lock_write_next(xprt); |
346 | } | 351 | } |
@@ -358,11 +363,7 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt); | |||
358 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) | 363 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) |
359 | { | 364 | { |
360 | if (xprt->snd_task == task) { | 365 | if (xprt->snd_task == task) { |
361 | if (task != NULL) { | 366 | xprt_task_clear_bytes_sent(task); |
362 | struct rpc_rqst *req = task->tk_rqstp; | ||
363 | if (req != NULL) | ||
364 | req->rq_bytes_sent = 0; | ||
365 | } | ||
366 | xprt_clear_locked(xprt); | 367 | xprt_clear_locked(xprt); |
367 | __xprt_lock_write_next_cong(xprt); | 368 | __xprt_lock_write_next_cong(xprt); |
368 | } | 369 | } |
@@ -700,6 +701,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt, | |||
700 | goto out; | 701 | goto out; |
701 | if (xprt->snd_task != task) | 702 | if (xprt->snd_task != task) |
702 | goto out; | 703 | goto out; |
704 | xprt_task_clear_bytes_sent(task); | ||
703 | xprt->snd_task = cookie; | 705 | xprt->snd_task = cookie; |
704 | ret = true; | 706 | ret = true; |
705 | out: | 707 | out: |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index da5136fd5694..579f72bbcf4b 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -1,6 +1,7 @@ | |||
1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o |
2 | 2 | ||
3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o \ |
4 | fmr_ops.o frwr_ops.o physical_ops.o | ||
4 | 5 | ||
5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o | 6 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o |
6 | 7 | ||
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c new file mode 100644 index 000000000000..302d4ebf6fbf --- /dev/null +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -0,0 +1,208 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* Lightweight memory registration using Fast Memory Regions (FMR). | ||
7 | * Referred to sometimes as MTHCAFMR mode. | ||
8 | * | ||
9 | * FMR uses synchronous memory registration and deregistration. | ||
10 | * FMR registration is known to be fast, but FMR deregistration | ||
11 | * can take tens of usecs to complete. | ||
12 | */ | ||
13 | |||
14 | #include "xprt_rdma.h" | ||
15 | |||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
18 | #endif | ||
19 | |||
20 | /* Maximum scatter/gather per FMR */ | ||
21 | #define RPCRDMA_MAX_FMR_SGES (64) | ||
22 | |||
23 | static int | ||
24 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
25 | struct rpcrdma_create_data_internal *cdata) | ||
26 | { | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | /* FMR mode conveys up to 64 pages of payload per chunk segment. | ||
31 | */ | ||
32 | static size_t | ||
33 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
34 | { | ||
35 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
36 | rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | fmr_op_init(struct rpcrdma_xprt *r_xprt) | ||
41 | { | ||
42 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
43 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
44 | struct ib_fmr_attr fmr_attr = { | ||
45 | .max_pages = RPCRDMA_MAX_FMR_SGES, | ||
46 | .max_maps = 1, | ||
47 | .page_shift = PAGE_SHIFT | ||
48 | }; | ||
49 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
50 | struct rpcrdma_mw *r; | ||
51 | int i, rc; | ||
52 | |||
53 | INIT_LIST_HEAD(&buf->rb_mws); | ||
54 | INIT_LIST_HEAD(&buf->rb_all); | ||
55 | |||
56 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
57 | dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); | ||
58 | |||
59 | while (i--) { | ||
60 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
61 | if (!r) | ||
62 | return -ENOMEM; | ||
63 | |||
64 | r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); | ||
65 | if (IS_ERR(r->r.fmr)) | ||
66 | goto out_fmr_err; | ||
67 | |||
68 | list_add(&r->mw_list, &buf->rb_mws); | ||
69 | list_add(&r->mw_all, &buf->rb_all); | ||
70 | } | ||
71 | return 0; | ||
72 | |||
73 | out_fmr_err: | ||
74 | rc = PTR_ERR(r->r.fmr); | ||
75 | dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); | ||
76 | kfree(r); | ||
77 | return rc; | ||
78 | } | ||
79 | |||
80 | /* Use the ib_map_phys_fmr() verb to register a memory region | ||
81 | * for remote access via RDMA READ or RDMA WRITE. | ||
82 | */ | ||
83 | static int | ||
84 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
85 | int nsegs, bool writing) | ||
86 | { | ||
87 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
88 | struct ib_device *device = ia->ri_id->device; | ||
89 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
90 | struct rpcrdma_mr_seg *seg1 = seg; | ||
91 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
92 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
93 | int len, pageoff, i, rc; | ||
94 | |||
95 | pageoff = offset_in_page(seg1->mr_offset); | ||
96 | seg1->mr_offset -= pageoff; /* start of page */ | ||
97 | seg1->mr_len += pageoff; | ||
98 | len = -pageoff; | ||
99 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | ||
100 | nsegs = RPCRDMA_MAX_FMR_SGES; | ||
101 | for (i = 0; i < nsegs;) { | ||
102 | rpcrdma_map_one(device, seg, direction); | ||
103 | physaddrs[i] = seg->mr_dma; | ||
104 | len += seg->mr_len; | ||
105 | ++seg; | ||
106 | ++i; | ||
107 | /* Check for holes */ | ||
108 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
109 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
110 | break; | ||
111 | } | ||
112 | |||
113 | rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
114 | if (rc) | ||
115 | goto out_maperr; | ||
116 | |||
117 | seg1->mr_rkey = mw->r.fmr->rkey; | ||
118 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
119 | seg1->mr_nsegs = i; | ||
120 | seg1->mr_len = len; | ||
121 | return i; | ||
122 | |||
123 | out_maperr: | ||
124 | dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | ||
125 | __func__, len, (unsigned long long)seg1->mr_dma, | ||
126 | pageoff, i, rc); | ||
127 | while (i--) | ||
128 | rpcrdma_unmap_one(device, --seg); | ||
129 | return rc; | ||
130 | } | ||
131 | |||
132 | /* Use the ib_unmap_fmr() verb to prevent further remote | ||
133 | * access via RDMA READ or RDMA WRITE. | ||
134 | */ | ||
135 | static int | ||
136 | fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
137 | { | ||
138 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
139 | struct rpcrdma_mr_seg *seg1 = seg; | ||
140 | struct ib_device *device; | ||
141 | int rc, nsegs = seg->mr_nsegs; | ||
142 | LIST_HEAD(l); | ||
143 | |||
144 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
145 | rc = ib_unmap_fmr(&l); | ||
146 | read_lock(&ia->ri_qplock); | ||
147 | device = ia->ri_id->device; | ||
148 | while (seg1->mr_nsegs--) | ||
149 | rpcrdma_unmap_one(device, seg++); | ||
150 | read_unlock(&ia->ri_qplock); | ||
151 | if (rc) | ||
152 | goto out_err; | ||
153 | return nsegs; | ||
154 | |||
155 | out_err: | ||
156 | dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); | ||
157 | return nsegs; | ||
158 | } | ||
159 | |||
160 | /* After a disconnect, unmap all FMRs. | ||
161 | * | ||
162 | * This is invoked only in the transport connect worker in order | ||
163 | * to serialize with rpcrdma_register_fmr_external(). | ||
164 | */ | ||
165 | static void | ||
166 | fmr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
167 | { | ||
168 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
169 | struct rpcrdma_mw *r; | ||
170 | LIST_HEAD(list); | ||
171 | int rc; | ||
172 | |||
173 | list_for_each_entry(r, &buf->rb_all, mw_all) | ||
174 | list_add(&r->r.fmr->list, &list); | ||
175 | |||
176 | rc = ib_unmap_fmr(&list); | ||
177 | if (rc) | ||
178 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
179 | __func__, rc); | ||
180 | } | ||
181 | |||
182 | static void | ||
183 | fmr_op_destroy(struct rpcrdma_buffer *buf) | ||
184 | { | ||
185 | struct rpcrdma_mw *r; | ||
186 | int rc; | ||
187 | |||
188 | while (!list_empty(&buf->rb_all)) { | ||
189 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
190 | list_del(&r->mw_all); | ||
191 | rc = ib_dealloc_fmr(r->r.fmr); | ||
192 | if (rc) | ||
193 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
194 | __func__, rc); | ||
195 | kfree(r); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | ||
200 | .ro_map = fmr_op_map, | ||
201 | .ro_unmap = fmr_op_unmap, | ||
202 | .ro_open = fmr_op_open, | ||
203 | .ro_maxpages = fmr_op_maxpages, | ||
204 | .ro_init = fmr_op_init, | ||
205 | .ro_reset = fmr_op_reset, | ||
206 | .ro_destroy = fmr_op_destroy, | ||
207 | .ro_displayname = "fmr", | ||
208 | }; | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c new file mode 100644 index 000000000000..dff0481dbcf8 --- /dev/null +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* Lightweight memory registration using Fast Registration Work | ||
7 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | ||
8 | * | ||
9 | * FRWR features ordered asynchronous registration and deregistration | ||
10 | * of arbitrarily sized memory regions. This is the fastest and safest | ||
11 | * but most complex memory registration mode. | ||
12 | */ | ||
13 | |||
14 | #include "xprt_rdma.h" | ||
15 | |||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
18 | #endif | ||
19 | |||
20 | static int | ||
21 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, | ||
22 | unsigned int depth) | ||
23 | { | ||
24 | struct rpcrdma_frmr *f = &r->r.frmr; | ||
25 | int rc; | ||
26 | |||
27 | f->fr_mr = ib_alloc_fast_reg_mr(pd, depth); | ||
28 | if (IS_ERR(f->fr_mr)) | ||
29 | goto out_mr_err; | ||
30 | f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); | ||
31 | if (IS_ERR(f->fr_pgl)) | ||
32 | goto out_list_err; | ||
33 | return 0; | ||
34 | |||
35 | out_mr_err: | ||
36 | rc = PTR_ERR(f->fr_mr); | ||
37 | dprintk("RPC: %s: ib_alloc_fast_reg_mr status %i\n", | ||
38 | __func__, rc); | ||
39 | return rc; | ||
40 | |||
41 | out_list_err: | ||
42 | rc = PTR_ERR(f->fr_pgl); | ||
43 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", | ||
44 | __func__, rc); | ||
45 | ib_dereg_mr(f->fr_mr); | ||
46 | return rc; | ||
47 | } | ||
48 | |||
49 | static void | ||
50 | __frwr_release(struct rpcrdma_mw *r) | ||
51 | { | ||
52 | int rc; | ||
53 | |||
54 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
55 | if (rc) | ||
56 | dprintk("RPC: %s: ib_dereg_mr status %i\n", | ||
57 | __func__, rc); | ||
58 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
59 | } | ||
60 | |||
61 | static int | ||
62 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
63 | struct rpcrdma_create_data_internal *cdata) | ||
64 | { | ||
65 | struct ib_device_attr *devattr = &ia->ri_devattr; | ||
66 | int depth, delta; | ||
67 | |||
68 | ia->ri_max_frmr_depth = | ||
69 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
70 | devattr->max_fast_reg_page_list_len); | ||
71 | dprintk("RPC: %s: device's max FR page list len = %u\n", | ||
72 | __func__, ia->ri_max_frmr_depth); | ||
73 | |||
74 | /* Add room for frmr register and invalidate WRs. | ||
75 | * 1. FRMR reg WR for head | ||
76 | * 2. FRMR invalidate WR for head | ||
77 | * 3. N FRMR reg WRs for pagelist | ||
78 | * 4. N FRMR invalidate WRs for pagelist | ||
79 | * 5. FRMR reg WR for tail | ||
80 | * 6. FRMR invalidate WR for tail | ||
81 | * 7. The RDMA_SEND WR | ||
82 | */ | ||
83 | depth = 7; | ||
84 | |||
85 | /* Calculate N if the device max FRMR depth is smaller than | ||
86 | * RPCRDMA_MAX_DATA_SEGS. | ||
87 | */ | ||
88 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
89 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | ||
90 | do { | ||
91 | depth += 2; /* FRMR reg + invalidate */ | ||
92 | delta -= ia->ri_max_frmr_depth; | ||
93 | } while (delta > 0); | ||
94 | } | ||
95 | |||
96 | ep->rep_attr.cap.max_send_wr *= depth; | ||
97 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
98 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
99 | if (!cdata->max_requests) | ||
100 | return -EINVAL; | ||
101 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
102 | depth; | ||
103 | } | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | /* FRWR mode conveys a list of pages per chunk segment. The | ||
109 | * maximum length of that list is the FRWR page list depth. | ||
110 | */ | ||
111 | static size_t | ||
112 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
113 | { | ||
114 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
115 | |||
116 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
117 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | ||
118 | } | ||
119 | |||
120 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ | ||
121 | static void | ||
122 | frwr_sendcompletion(struct ib_wc *wc) | ||
123 | { | ||
124 | struct rpcrdma_mw *r; | ||
125 | |||
126 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
127 | return; | ||
128 | |||
129 | /* WARNING: Only wr_id and status are reliable at this point */ | ||
130 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
131 | dprintk("RPC: %s: frmr %p (stale), status %d\n", | ||
132 | __func__, r, wc->status); | ||
133 | r->r.frmr.fr_state = FRMR_IS_STALE; | ||
134 | } | ||
135 | |||
136 | static int | ||
137 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | ||
138 | { | ||
139 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
140 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
141 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
142 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
143 | int i; | ||
144 | |||
145 | INIT_LIST_HEAD(&buf->rb_mws); | ||
146 | INIT_LIST_HEAD(&buf->rb_all); | ||
147 | |||
148 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
149 | dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); | ||
150 | |||
151 | while (i--) { | ||
152 | struct rpcrdma_mw *r; | ||
153 | int rc; | ||
154 | |||
155 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
156 | if (!r) | ||
157 | return -ENOMEM; | ||
158 | |||
159 | rc = __frwr_init(r, pd, device, depth); | ||
160 | if (rc) { | ||
161 | kfree(r); | ||
162 | return rc; | ||
163 | } | ||
164 | |||
165 | list_add(&r->mw_list, &buf->rb_mws); | ||
166 | list_add(&r->mw_all, &buf->rb_all); | ||
167 | r->mw_sendcompletion = frwr_sendcompletion; | ||
168 | } | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | /* Post a FAST_REG Work Request to register a memory region | ||
174 | * for remote access via RDMA READ or RDMA WRITE. | ||
175 | */ | ||
176 | static int | ||
177 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
178 | int nsegs, bool writing) | ||
179 | { | ||
180 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
181 | struct ib_device *device = ia->ri_id->device; | ||
182 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
183 | struct rpcrdma_mr_seg *seg1 = seg; | ||
184 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
185 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
186 | struct ib_mr *mr = frmr->fr_mr; | ||
187 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
188 | u8 key; | ||
189 | int len, pageoff; | ||
190 | int i, rc; | ||
191 | int seg_len; | ||
192 | u64 pa; | ||
193 | int page_no; | ||
194 | |||
195 | pageoff = offset_in_page(seg1->mr_offset); | ||
196 | seg1->mr_offset -= pageoff; /* start of page */ | ||
197 | seg1->mr_len += pageoff; | ||
198 | len = -pageoff; | ||
199 | if (nsegs > ia->ri_max_frmr_depth) | ||
200 | nsegs = ia->ri_max_frmr_depth; | ||
201 | for (page_no = i = 0; i < nsegs;) { | ||
202 | rpcrdma_map_one(device, seg, direction); | ||
203 | pa = seg->mr_dma; | ||
204 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
205 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
206 | pa += PAGE_SIZE; | ||
207 | } | ||
208 | len += seg->mr_len; | ||
209 | ++seg; | ||
210 | ++i; | ||
211 | /* Check for holes */ | ||
212 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
213 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
214 | break; | ||
215 | } | ||
216 | dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", | ||
217 | __func__, mw, i, len); | ||
218 | |||
219 | frmr->fr_state = FRMR_IS_VALID; | ||
220 | |||
221 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
222 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
223 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
224 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; | ||
225 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
226 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
227 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
228 | fastreg_wr.wr.fast_reg.length = len; | ||
229 | fastreg_wr.wr.fast_reg.access_flags = writing ? | ||
230 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
231 | IB_ACCESS_REMOTE_READ; | ||
232 | key = (u8)(mr->rkey & 0x000000FF); | ||
233 | ib_update_fast_reg_key(mr, ++key); | ||
234 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
235 | |||
236 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
237 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
238 | if (rc) | ||
239 | goto out_senderr; | ||
240 | |||
241 | seg1->mr_rkey = mr->rkey; | ||
242 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
243 | seg1->mr_nsegs = i; | ||
244 | seg1->mr_len = len; | ||
245 | return i; | ||
246 | |||
247 | out_senderr: | ||
248 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
249 | ib_update_fast_reg_key(mr, --key); | ||
250 | frmr->fr_state = FRMR_IS_INVALID; | ||
251 | while (i--) | ||
252 | rpcrdma_unmap_one(device, --seg); | ||
253 | return rc; | ||
254 | } | ||
255 | |||
256 | /* Post a LOCAL_INV Work Request to prevent further remote access | ||
257 | * via RDMA READ or RDMA WRITE. | ||
258 | */ | ||
259 | static int | ||
260 | frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
261 | { | ||
262 | struct rpcrdma_mr_seg *seg1 = seg; | ||
263 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
264 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
265 | int rc, nsegs = seg->mr_nsegs; | ||
266 | struct ib_device *device; | ||
267 | |||
268 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
269 | |||
270 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | ||
271 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
272 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
273 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
274 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
275 | |||
276 | read_lock(&ia->ri_qplock); | ||
277 | device = ia->ri_id->device; | ||
278 | while (seg1->mr_nsegs--) | ||
279 | rpcrdma_unmap_one(device, seg++); | ||
280 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
281 | read_unlock(&ia->ri_qplock); | ||
282 | if (rc) | ||
283 | goto out_err; | ||
284 | return nsegs; | ||
285 | |||
286 | out_err: | ||
287 | /* Force rpcrdma_buffer_get() to retry */ | ||
288 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
289 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
290 | return nsegs; | ||
291 | } | ||
292 | |||
293 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
294 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
295 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
296 | * also torn down. | ||
297 | * | ||
298 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
299 | * | ||
300 | * This is invoked only in the transport connect worker in order | ||
301 | * to serialize with rpcrdma_register_frmr_external(). | ||
302 | */ | ||
303 | static void | ||
304 | frwr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
305 | { | ||
306 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
307 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
308 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
309 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
310 | struct rpcrdma_mw *r; | ||
311 | int rc; | ||
312 | |||
313 | list_for_each_entry(r, &buf->rb_all, mw_all) { | ||
314 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
315 | continue; | ||
316 | |||
317 | __frwr_release(r); | ||
318 | rc = __frwr_init(r, pd, device, depth); | ||
319 | if (rc) { | ||
320 | dprintk("RPC: %s: mw %p left %s\n", | ||
321 | __func__, r, | ||
322 | (r->r.frmr.fr_state == FRMR_IS_STALE ? | ||
323 | "stale" : "valid")); | ||
324 | continue; | ||
325 | } | ||
326 | |||
327 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void | ||
332 | frwr_op_destroy(struct rpcrdma_buffer *buf) | ||
333 | { | ||
334 | struct rpcrdma_mw *r; | ||
335 | |||
336 | while (!list_empty(&buf->rb_all)) { | ||
337 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
338 | list_del(&r->mw_all); | ||
339 | __frwr_release(r); | ||
340 | kfree(r); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | ||
345 | .ro_map = frwr_op_map, | ||
346 | .ro_unmap = frwr_op_unmap, | ||
347 | .ro_open = frwr_op_open, | ||
348 | .ro_maxpages = frwr_op_maxpages, | ||
349 | .ro_init = frwr_op_init, | ||
350 | .ro_reset = frwr_op_reset, | ||
351 | .ro_destroy = frwr_op_destroy, | ||
352 | .ro_displayname = "frwr", | ||
353 | }; | ||
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c new file mode 100644 index 000000000000..ba518af16787 --- /dev/null +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* No-op chunk preparation. All client memory is pre-registered. | ||
7 | * Sometimes referred to as ALLPHYSICAL mode. | ||
8 | * | ||
9 | * Physical registration is simple because all client memory is | ||
10 | * pre-registered and never deregistered. This mode is good for | ||
11 | * adapter bring up, but is considered not safe: the server is | ||
12 | * trusted not to abuse its access to client memory not involved | ||
13 | * in RDMA I/O. | ||
14 | */ | ||
15 | |||
16 | #include "xprt_rdma.h" | ||
17 | |||
18 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
19 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
20 | #endif | ||
21 | |||
22 | static int | ||
23 | physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
24 | struct rpcrdma_create_data_internal *cdata) | ||
25 | { | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | /* PHYSICAL memory registration conveys one page per chunk segment. | ||
30 | */ | ||
31 | static size_t | ||
32 | physical_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
33 | { | ||
34 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
35 | rpcrdma_max_segments(r_xprt)); | ||
36 | } | ||
37 | |||
38 | static int | ||
39 | physical_op_init(struct rpcrdma_xprt *r_xprt) | ||
40 | { | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | /* The client's physical memory is already exposed for | ||
45 | * remote access via RDMA READ or RDMA WRITE. | ||
46 | */ | ||
47 | static int | ||
48 | physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
49 | int nsegs, bool writing) | ||
50 | { | ||
51 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
52 | |||
53 | rpcrdma_map_one(ia->ri_id->device, seg, | ||
54 | rpcrdma_data_dir(writing)); | ||
55 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
56 | seg->mr_base = seg->mr_dma; | ||
57 | seg->mr_nsegs = 1; | ||
58 | return 1; | ||
59 | } | ||
60 | |||
61 | /* Unmap a memory region, but leave it registered. | ||
62 | */ | ||
63 | static int | ||
64 | physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
65 | { | ||
66 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
67 | |||
68 | read_lock(&ia->ri_qplock); | ||
69 | rpcrdma_unmap_one(ia->ri_id->device, seg); | ||
70 | read_unlock(&ia->ri_qplock); | ||
71 | |||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | static void | ||
76 | physical_op_reset(struct rpcrdma_xprt *r_xprt) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static void | ||
81 | physical_op_destroy(struct rpcrdma_buffer *buf) | ||
82 | { | ||
83 | } | ||
84 | |||
85 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | ||
86 | .ro_map = physical_op_map, | ||
87 | .ro_unmap = physical_op_unmap, | ||
88 | .ro_open = physical_op_open, | ||
89 | .ro_maxpages = physical_op_maxpages, | ||
90 | .ro_init = physical_op_init, | ||
91 | .ro_reset = physical_op_reset, | ||
92 | .ro_destroy = physical_op_destroy, | ||
93 | .ro_displayname = "physical", | ||
94 | }; | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 91ffde82fa0c..2c53ea9e1b83 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -53,6 +53,14 @@ | |||
53 | # define RPCDBG_FACILITY RPCDBG_TRANS | 53 | # define RPCDBG_FACILITY RPCDBG_TRANS |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | enum rpcrdma_chunktype { | ||
57 | rpcrdma_noch = 0, | ||
58 | rpcrdma_readch, | ||
59 | rpcrdma_areadch, | ||
60 | rpcrdma_writech, | ||
61 | rpcrdma_replych | ||
62 | }; | ||
63 | |||
56 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 64 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
57 | static const char transfertypes[][12] = { | 65 | static const char transfertypes[][12] = { |
58 | "pure inline", /* no chunks */ | 66 | "pure inline", /* no chunks */ |
@@ -179,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
179 | struct rpcrdma_write_array *warray = NULL; | 187 | struct rpcrdma_write_array *warray = NULL; |
180 | struct rpcrdma_write_chunk *cur_wchunk = NULL; | 188 | struct rpcrdma_write_chunk *cur_wchunk = NULL; |
181 | __be32 *iptr = headerp->rm_body.rm_chunks; | 189 | __be32 *iptr = headerp->rm_body.rm_chunks; |
190 | int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool); | ||
182 | 191 | ||
183 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { | 192 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { |
184 | /* a read chunk - server will RDMA Read our memory */ | 193 | /* a read chunk - server will RDMA Read our memory */ |
@@ -201,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
201 | if (nsegs < 0) | 210 | if (nsegs < 0) |
202 | return nsegs; | 211 | return nsegs; |
203 | 212 | ||
213 | map = r_xprt->rx_ia.ri_ops->ro_map; | ||
204 | do { | 214 | do { |
205 | n = rpcrdma_register_external(seg, nsegs, | 215 | n = map(r_xprt, seg, nsegs, cur_wchunk != NULL); |
206 | cur_wchunk != NULL, r_xprt); | ||
207 | if (n <= 0) | 216 | if (n <= 0) |
208 | goto out; | 217 | goto out; |
209 | if (cur_rchunk) { /* read */ | 218 | if (cur_rchunk) { /* read */ |
@@ -275,34 +284,13 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
275 | return (unsigned char *)iptr - (unsigned char *)headerp; | 284 | return (unsigned char *)iptr - (unsigned char *)headerp; |
276 | 285 | ||
277 | out: | 286 | out: |
278 | if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) { | 287 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) |
279 | for (pos = 0; nchunks--;) | 288 | return n; |
280 | pos += rpcrdma_deregister_external( | ||
281 | &req->rl_segments[pos], r_xprt); | ||
282 | } | ||
283 | return n; | ||
284 | } | ||
285 | 289 | ||
286 | /* | 290 | for (pos = 0; nchunks--;) |
287 | * Marshal chunks. This routine returns the header length | 291 | pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
288 | * consumed by marshaling. | 292 | &req->rl_segments[pos]); |
289 | * | 293 | return n; |
290 | * Returns positive RPC/RDMA header size, or negative errno. | ||
291 | */ | ||
292 | |||
293 | ssize_t | ||
294 | rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) | ||
295 | { | ||
296 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
297 | struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); | ||
298 | |||
299 | if (req->rl_rtype != rpcrdma_noch) | ||
300 | result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
301 | headerp, req->rl_rtype); | ||
302 | else if (req->rl_wtype != rpcrdma_noch) | ||
303 | result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
304 | headerp, req->rl_wtype); | ||
305 | return result; | ||
306 | } | 294 | } |
307 | 295 | ||
308 | /* | 296 | /* |
@@ -397,6 +385,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
397 | char *base; | 385 | char *base; |
398 | size_t rpclen, padlen; | 386 | size_t rpclen, padlen; |
399 | ssize_t hdrlen; | 387 | ssize_t hdrlen; |
388 | enum rpcrdma_chunktype rtype, wtype; | ||
400 | struct rpcrdma_msg *headerp; | 389 | struct rpcrdma_msg *headerp; |
401 | 390 | ||
402 | /* | 391 | /* |
@@ -433,13 +422,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
433 | * into pages; otherwise use reply chunks. | 422 | * into pages; otherwise use reply chunks. |
434 | */ | 423 | */ |
435 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) | 424 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) |
436 | req->rl_wtype = rpcrdma_noch; | 425 | wtype = rpcrdma_noch; |
437 | else if (rqst->rq_rcv_buf.page_len == 0) | 426 | else if (rqst->rq_rcv_buf.page_len == 0) |
438 | req->rl_wtype = rpcrdma_replych; | 427 | wtype = rpcrdma_replych; |
439 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 428 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) |
440 | req->rl_wtype = rpcrdma_writech; | 429 | wtype = rpcrdma_writech; |
441 | else | 430 | else |
442 | req->rl_wtype = rpcrdma_replych; | 431 | wtype = rpcrdma_replych; |
443 | 432 | ||
444 | /* | 433 | /* |
445 | * Chunks needed for arguments? | 434 | * Chunks needed for arguments? |
@@ -456,16 +445,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
456 | * TBD check NFSv4 setacl | 445 | * TBD check NFSv4 setacl |
457 | */ | 446 | */ |
458 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | 447 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) |
459 | req->rl_rtype = rpcrdma_noch; | 448 | rtype = rpcrdma_noch; |
460 | else if (rqst->rq_snd_buf.page_len == 0) | 449 | else if (rqst->rq_snd_buf.page_len == 0) |
461 | req->rl_rtype = rpcrdma_areadch; | 450 | rtype = rpcrdma_areadch; |
462 | else | 451 | else |
463 | req->rl_rtype = rpcrdma_readch; | 452 | rtype = rpcrdma_readch; |
464 | 453 | ||
465 | /* The following simplification is not true forever */ | 454 | /* The following simplification is not true forever */ |
466 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych) | 455 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) |
467 | req->rl_wtype = rpcrdma_noch; | 456 | wtype = rpcrdma_noch; |
468 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) { | 457 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { |
469 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | 458 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", |
470 | __func__); | 459 | __func__); |
471 | return -EIO; | 460 | return -EIO; |
@@ -479,7 +468,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
479 | * When padding is in use and applies to the transfer, insert | 468 | * When padding is in use and applies to the transfer, insert |
480 | * it and change the message type. | 469 | * it and change the message type. |
481 | */ | 470 | */ |
482 | if (req->rl_rtype == rpcrdma_noch) { | 471 | if (rtype == rpcrdma_noch) { |
483 | 472 | ||
484 | padlen = rpcrdma_inline_pullup(rqst, | 473 | padlen = rpcrdma_inline_pullup(rqst, |
485 | RPCRDMA_INLINE_PAD_VALUE(rqst)); | 474 | RPCRDMA_INLINE_PAD_VALUE(rqst)); |
@@ -494,7 +483,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
494 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | 483 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; |
495 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | 484 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; |
496 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | 485 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ |
497 | if (req->rl_wtype != rpcrdma_noch) { | 486 | if (wtype != rpcrdma_noch) { |
498 | dprintk("RPC: %s: invalid chunk list\n", | 487 | dprintk("RPC: %s: invalid chunk list\n", |
499 | __func__); | 488 | __func__); |
500 | return -EIO; | 489 | return -EIO; |
@@ -515,18 +504,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
515 | * on receive. Therefore, we request a reply chunk | 504 | * on receive. Therefore, we request a reply chunk |
516 | * for non-writes wherever feasible and efficient. | 505 | * for non-writes wherever feasible and efficient. |
517 | */ | 506 | */ |
518 | if (req->rl_wtype == rpcrdma_noch) | 507 | if (wtype == rpcrdma_noch) |
519 | req->rl_wtype = rpcrdma_replych; | 508 | wtype = rpcrdma_replych; |
520 | } | 509 | } |
521 | } | 510 | } |
522 | 511 | ||
523 | hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen); | 512 | if (rtype != rpcrdma_noch) { |
513 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
514 | headerp, rtype); | ||
515 | wtype = rtype; /* simplify dprintk */ | ||
516 | |||
517 | } else if (wtype != rpcrdma_noch) { | ||
518 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
519 | headerp, wtype); | ||
520 | } | ||
524 | if (hdrlen < 0) | 521 | if (hdrlen < 0) |
525 | return hdrlen; | 522 | return hdrlen; |
526 | 523 | ||
527 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" | 524 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
528 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 525 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
529 | __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, | 526 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
530 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); | 527 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); |
531 | 528 | ||
532 | /* | 529 | /* |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2e192baa59f3..54f23b1be986 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = { | |||
157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ |
158 | 158 | ||
159 | static void | 159 | static void |
160 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
161 | { | ||
162 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
163 | char buf[20]; | ||
164 | |||
165 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
166 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
167 | |||
168 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; | ||
169 | } | ||
170 | |||
171 | static void | ||
172 | xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
173 | { | ||
174 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
175 | char buf[40]; | ||
176 | |||
177 | snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); | ||
178 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
179 | |||
180 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; | ||
181 | } | ||
182 | |||
183 | static void | ||
160 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) | 184 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) |
161 | { | 185 | { |
162 | struct sockaddr *sap = (struct sockaddr *) | 186 | struct sockaddr *sap = (struct sockaddr *) |
163 | &rpcx_to_rdmad(xprt).addr; | 187 | &rpcx_to_rdmad(xprt).addr; |
164 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | 188 | char buf[128]; |
165 | char buf[64]; | 189 | |
190 | switch (sap->sa_family) { | ||
191 | case AF_INET: | ||
192 | xprt_rdma_format_addresses4(xprt, sap); | ||
193 | break; | ||
194 | case AF_INET6: | ||
195 | xprt_rdma_format_addresses6(xprt, sap); | ||
196 | break; | ||
197 | default: | ||
198 | pr_err("rpcrdma: Unrecognized address family\n"); | ||
199 | return; | ||
200 | } | ||
166 | 201 | ||
167 | (void)rpc_ntop(sap, buf, sizeof(buf)); | 202 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
168 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); | 203 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); |
@@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) | |||
170 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); | 205 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); |
171 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | 206 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); |
172 | 207 | ||
173 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; | ||
174 | |||
175 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
176 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
177 | |||
178 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); | 208 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); |
179 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | 209 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); |
180 | 210 | ||
181 | /* netid */ | 211 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
182 | xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; | ||
183 | } | 212 | } |
184 | 213 | ||
185 | static void | 214 | static void |
@@ -377,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args) | |||
377 | xprt_rdma_connect_worker); | 406 | xprt_rdma_connect_worker); |
378 | 407 | ||
379 | xprt_rdma_format_addresses(xprt); | 408 | xprt_rdma_format_addresses(xprt); |
380 | xprt->max_payload = rpcrdma_max_payload(new_xprt); | 409 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
410 | if (xprt->max_payload == 0) | ||
411 | goto out4; | ||
412 | xprt->max_payload <<= PAGE_SHIFT; | ||
381 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", | 413 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", |
382 | __func__, xprt->max_payload); | 414 | __func__, xprt->max_payload); |
383 | 415 | ||
@@ -552,8 +584,8 @@ xprt_rdma_free(void *buffer) | |||
552 | 584 | ||
553 | for (i = 0; req->rl_nchunks;) { | 585 | for (i = 0; req->rl_nchunks;) { |
554 | --req->rl_nchunks; | 586 | --req->rl_nchunks; |
555 | i += rpcrdma_deregister_external( | 587 | i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
556 | &req->rl_segments[i], r_xprt); | 588 | &req->rl_segments[i]); |
557 | } | 589 | } |
558 | 590 | ||
559 | rpcrdma_buffer_put(req); | 591 | rpcrdma_buffer_put(req); |
@@ -579,10 +611,7 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
579 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 611 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
580 | int rc = 0; | 612 | int rc = 0; |
581 | 613 | ||
582 | if (req->rl_niovs == 0) | 614 | rc = rpcrdma_marshal_req(rqst); |
583 | rc = rpcrdma_marshal_req(rqst); | ||
584 | else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) | ||
585 | rc = rpcrdma_marshal_chunks(rqst, 0); | ||
586 | if (rc < 0) | 615 | if (rc < 0) |
587 | goto failed_marshal; | 616 | goto failed_marshal; |
588 | 617 | ||
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e28909fddd30..4870d272e006 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/interrupt.h> | 50 | #include <linux/interrupt.h> |
51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | #include <linux/sunrpc/addr.h> | ||
53 | #include <asm/bitops.h> | 54 | #include <asm/bitops.h> |
54 | 55 | ||
55 | #include "xprt_rdma.h" | 56 | #include "xprt_rdma.h" |
@@ -62,9 +63,6 @@ | |||
62 | # define RPCDBG_FACILITY RPCDBG_TRANS | 63 | # define RPCDBG_FACILITY RPCDBG_TRANS |
63 | #endif | 64 | #endif |
64 | 65 | ||
65 | static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); | ||
66 | static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); | ||
67 | |||
68 | /* | 66 | /* |
69 | * internal functions | 67 | * internal functions |
70 | */ | 68 | */ |
@@ -188,7 +186,7 @@ static const char * const wc_status[] = { | |||
188 | "remote access error", | 186 | "remote access error", |
189 | "remote operation error", | 187 | "remote operation error", |
190 | "transport retry counter exceeded", | 188 | "transport retry counter exceeded", |
191 | "RNR retrycounter exceeded", | 189 | "RNR retry counter exceeded", |
192 | "local RDD violation error", | 190 | "local RDD violation error", |
193 | "remove invalid RD request", | 191 | "remove invalid RD request", |
194 | "operation aborted", | 192 | "operation aborted", |
@@ -206,21 +204,17 @@ static const char * const wc_status[] = { | |||
206 | static void | 204 | static void |
207 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) | 205 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) |
208 | { | 206 | { |
209 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
210 | return; | ||
211 | |||
212 | /* WARNING: Only wr_id and status are reliable at this point */ | 207 | /* WARNING: Only wr_id and status are reliable at this point */ |
213 | if (wc->wr_id == 0ULL) { | 208 | if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) { |
214 | if (wc->status != IB_WC_WR_FLUSH_ERR) | 209 | if (wc->status != IB_WC_SUCCESS && |
210 | wc->status != IB_WC_WR_FLUSH_ERR) | ||
215 | pr_err("RPC: %s: SEND: %s\n", | 211 | pr_err("RPC: %s: SEND: %s\n", |
216 | __func__, COMPLETION_MSG(wc->status)); | 212 | __func__, COMPLETION_MSG(wc->status)); |
217 | } else { | 213 | } else { |
218 | struct rpcrdma_mw *r; | 214 | struct rpcrdma_mw *r; |
219 | 215 | ||
220 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | 216 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
221 | r->r.frmr.fr_state = FRMR_IS_STALE; | 217 | r->mw_sendcompletion(wc); |
222 | pr_err("RPC: %s: frmr %p (stale): %s\n", | ||
223 | __func__, r, COMPLETION_MSG(wc->status)); | ||
224 | } | 218 | } |
225 | } | 219 | } |
226 | 220 | ||
@@ -424,7 +418,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
424 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 418 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
425 | struct rpcrdma_ep *ep = &xprt->rx_ep; | 419 | struct rpcrdma_ep *ep = &xprt->rx_ep; |
426 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 420 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
427 | struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; | 421 | struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; |
428 | #endif | 422 | #endif |
429 | struct ib_qp_attr *attr = &ia->ri_qp_attr; | 423 | struct ib_qp_attr *attr = &ia->ri_qp_attr; |
430 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; | 424 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; |
@@ -480,9 +474,8 @@ connected: | |||
480 | wake_up_all(&ep->rep_connect_wait); | 474 | wake_up_all(&ep->rep_connect_wait); |
481 | /*FALLTHROUGH*/ | 475 | /*FALLTHROUGH*/ |
482 | default: | 476 | default: |
483 | dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n", | 477 | dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", |
484 | __func__, &addr->sin_addr.s_addr, | 478 | __func__, sap, rpc_get_port(sap), ep, |
485 | ntohs(addr->sin_port), ep, | ||
486 | CONNECTION_MSG(event->event)); | 479 | CONNECTION_MSG(event->event)); |
487 | break; | 480 | break; |
488 | } | 481 | } |
@@ -491,19 +484,16 @@ connected: | |||
491 | if (connstate == 1) { | 484 | if (connstate == 1) { |
492 | int ird = attr->max_dest_rd_atomic; | 485 | int ird = attr->max_dest_rd_atomic; |
493 | int tird = ep->rep_remote_cma.responder_resources; | 486 | int tird = ep->rep_remote_cma.responder_resources; |
494 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u " | 487 | |
495 | "on %s, memreg %d slots %d ird %d%s\n", | 488 | pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", |
496 | &addr->sin_addr.s_addr, | 489 | sap, rpc_get_port(sap), |
497 | ntohs(addr->sin_port), | ||
498 | ia->ri_id->device->name, | 490 | ia->ri_id->device->name, |
499 | ia->ri_memreg_strategy, | 491 | ia->ri_ops->ro_displayname, |
500 | xprt->rx_buf.rb_max_requests, | 492 | xprt->rx_buf.rb_max_requests, |
501 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | 493 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); |
502 | } else if (connstate < 0) { | 494 | } else if (connstate < 0) { |
503 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", | 495 | pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n", |
504 | &addr->sin_addr.s_addr, | 496 | sap, rpc_get_port(sap), connstate); |
505 | ntohs(addr->sin_port), | ||
506 | connstate); | ||
507 | } | 497 | } |
508 | #endif | 498 | #endif |
509 | 499 | ||
@@ -621,17 +611,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
621 | 611 | ||
622 | if (memreg == RPCRDMA_FRMR) { | 612 | if (memreg == RPCRDMA_FRMR) { |
623 | /* Requires both frmr reg and local dma lkey */ | 613 | /* Requires both frmr reg and local dma lkey */ |
624 | if ((devattr->device_cap_flags & | 614 | if (((devattr->device_cap_flags & |
625 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | 615 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != |
626 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | 616 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) || |
617 | (devattr->max_fast_reg_page_list_len == 0)) { | ||
627 | dprintk("RPC: %s: FRMR registration " | 618 | dprintk("RPC: %s: FRMR registration " |
628 | "not supported by HCA\n", __func__); | 619 | "not supported by HCA\n", __func__); |
629 | memreg = RPCRDMA_MTHCAFMR; | 620 | memreg = RPCRDMA_MTHCAFMR; |
630 | } else { | ||
631 | /* Mind the ia limit on FRMR page list depth */ | ||
632 | ia->ri_max_frmr_depth = min_t(unsigned int, | ||
633 | RPCRDMA_MAX_DATA_SEGS, | ||
634 | devattr->max_fast_reg_page_list_len); | ||
635 | } | 621 | } |
636 | } | 622 | } |
637 | if (memreg == RPCRDMA_MTHCAFMR) { | 623 | if (memreg == RPCRDMA_MTHCAFMR) { |
@@ -652,13 +638,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
652 | */ | 638 | */ |
653 | switch (memreg) { | 639 | switch (memreg) { |
654 | case RPCRDMA_FRMR: | 640 | case RPCRDMA_FRMR: |
641 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | ||
655 | break; | 642 | break; |
656 | case RPCRDMA_ALLPHYSICAL: | 643 | case RPCRDMA_ALLPHYSICAL: |
644 | ia->ri_ops = &rpcrdma_physical_memreg_ops; | ||
657 | mem_priv = IB_ACCESS_LOCAL_WRITE | | 645 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
658 | IB_ACCESS_REMOTE_WRITE | | 646 | IB_ACCESS_REMOTE_WRITE | |
659 | IB_ACCESS_REMOTE_READ; | 647 | IB_ACCESS_REMOTE_READ; |
660 | goto register_setup; | 648 | goto register_setup; |
661 | case RPCRDMA_MTHCAFMR: | 649 | case RPCRDMA_MTHCAFMR: |
650 | ia->ri_ops = &rpcrdma_fmr_memreg_ops; | ||
662 | if (ia->ri_have_dma_lkey) | 651 | if (ia->ri_have_dma_lkey) |
663 | break; | 652 | break; |
664 | mem_priv = IB_ACCESS_LOCAL_WRITE; | 653 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
@@ -678,8 +667,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
678 | rc = -ENOMEM; | 667 | rc = -ENOMEM; |
679 | goto out3; | 668 | goto out3; |
680 | } | 669 | } |
681 | dprintk("RPC: %s: memory registration strategy is %d\n", | 670 | dprintk("RPC: %s: memory registration strategy is '%s'\n", |
682 | __func__, memreg); | 671 | __func__, ia->ri_ops->ro_displayname); |
683 | 672 | ||
684 | /* Else will do memory reg/dereg for each chunk */ | 673 | /* Else will do memory reg/dereg for each chunk */ |
685 | ia->ri_memreg_strategy = memreg; | 674 | ia->ri_memreg_strategy = memreg; |
@@ -743,49 +732,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
743 | 732 | ||
744 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; | 733 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; |
745 | ep->rep_attr.qp_context = ep; | 734 | ep->rep_attr.qp_context = ep; |
746 | /* send_cq and recv_cq initialized below */ | ||
747 | ep->rep_attr.srq = NULL; | 735 | ep->rep_attr.srq = NULL; |
748 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 736 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
749 | switch (ia->ri_memreg_strategy) { | 737 | rc = ia->ri_ops->ro_open(ia, ep, cdata); |
750 | case RPCRDMA_FRMR: { | 738 | if (rc) |
751 | int depth = 7; | 739 | return rc; |
752 | |||
753 | /* Add room for frmr register and invalidate WRs. | ||
754 | * 1. FRMR reg WR for head | ||
755 | * 2. FRMR invalidate WR for head | ||
756 | * 3. N FRMR reg WRs for pagelist | ||
757 | * 4. N FRMR invalidate WRs for pagelist | ||
758 | * 5. FRMR reg WR for tail | ||
759 | * 6. FRMR invalidate WR for tail | ||
760 | * 7. The RDMA_SEND WR | ||
761 | */ | ||
762 | |||
763 | /* Calculate N if the device max FRMR depth is smaller than | ||
764 | * RPCRDMA_MAX_DATA_SEGS. | ||
765 | */ | ||
766 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
767 | int delta = RPCRDMA_MAX_DATA_SEGS - | ||
768 | ia->ri_max_frmr_depth; | ||
769 | |||
770 | do { | ||
771 | depth += 2; /* FRMR reg + invalidate */ | ||
772 | delta -= ia->ri_max_frmr_depth; | ||
773 | } while (delta > 0); | ||
774 | |||
775 | } | ||
776 | ep->rep_attr.cap.max_send_wr *= depth; | ||
777 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
778 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
779 | if (!cdata->max_requests) | ||
780 | return -EINVAL; | ||
781 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
782 | depth; | ||
783 | } | ||
784 | break; | ||
785 | } | ||
786 | default: | ||
787 | break; | ||
788 | } | ||
789 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | 740 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; |
790 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); | 741 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); |
791 | ep->rep_attr.cap.max_recv_sge = 1; | 742 | ep->rep_attr.cap.max_recv_sge = 1; |
@@ -944,21 +895,9 @@ retry: | |||
944 | rpcrdma_ep_disconnect(ep, ia); | 895 | rpcrdma_ep_disconnect(ep, ia); |
945 | rpcrdma_flush_cqs(ep); | 896 | rpcrdma_flush_cqs(ep); |
946 | 897 | ||
947 | switch (ia->ri_memreg_strategy) { | ||
948 | case RPCRDMA_FRMR: | ||
949 | rpcrdma_reset_frmrs(ia); | ||
950 | break; | ||
951 | case RPCRDMA_MTHCAFMR: | ||
952 | rpcrdma_reset_fmrs(ia); | ||
953 | break; | ||
954 | case RPCRDMA_ALLPHYSICAL: | ||
955 | break; | ||
956 | default: | ||
957 | rc = -EIO; | ||
958 | goto out; | ||
959 | } | ||
960 | |||
961 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 898 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
899 | ia->ri_ops->ro_reset(xprt); | ||
900 | |||
962 | id = rpcrdma_create_id(xprt, ia, | 901 | id = rpcrdma_create_id(xprt, ia, |
963 | (struct sockaddr *)&xprt->rx_data.addr); | 902 | (struct sockaddr *)&xprt->rx_data.addr); |
964 | if (IS_ERR(id)) { | 903 | if (IS_ERR(id)) { |
@@ -1123,91 +1062,6 @@ out: | |||
1123 | return ERR_PTR(rc); | 1062 | return ERR_PTR(rc); |
1124 | } | 1063 | } |
1125 | 1064 | ||
1126 | static int | ||
1127 | rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1128 | { | ||
1129 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
1130 | struct ib_fmr_attr fmr_attr = { | ||
1131 | .max_pages = RPCRDMA_MAX_DATA_SEGS, | ||
1132 | .max_maps = 1, | ||
1133 | .page_shift = PAGE_SHIFT | ||
1134 | }; | ||
1135 | struct rpcrdma_mw *r; | ||
1136 | int i, rc; | ||
1137 | |||
1138 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1139 | dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); | ||
1140 | |||
1141 | while (i--) { | ||
1142 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1143 | if (r == NULL) | ||
1144 | return -ENOMEM; | ||
1145 | |||
1146 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr); | ||
1147 | if (IS_ERR(r->r.fmr)) { | ||
1148 | rc = PTR_ERR(r->r.fmr); | ||
1149 | dprintk("RPC: %s: ib_alloc_fmr failed %i\n", | ||
1150 | __func__, rc); | ||
1151 | goto out_free; | ||
1152 | } | ||
1153 | |||
1154 | list_add(&r->mw_list, &buf->rb_mws); | ||
1155 | list_add(&r->mw_all, &buf->rb_all); | ||
1156 | } | ||
1157 | return 0; | ||
1158 | |||
1159 | out_free: | ||
1160 | kfree(r); | ||
1161 | return rc; | ||
1162 | } | ||
1163 | |||
1164 | static int | ||
1165 | rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1166 | { | ||
1167 | struct rpcrdma_frmr *f; | ||
1168 | struct rpcrdma_mw *r; | ||
1169 | int i, rc; | ||
1170 | |||
1171 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1172 | dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); | ||
1173 | |||
1174 | while (i--) { | ||
1175 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1176 | if (r == NULL) | ||
1177 | return -ENOMEM; | ||
1178 | f = &r->r.frmr; | ||
1179 | |||
1180 | f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1181 | ia->ri_max_frmr_depth); | ||
1182 | if (IS_ERR(f->fr_mr)) { | ||
1183 | rc = PTR_ERR(f->fr_mr); | ||
1184 | dprintk("RPC: %s: ib_alloc_fast_reg_mr " | ||
1185 | "failed %i\n", __func__, rc); | ||
1186 | goto out_free; | ||
1187 | } | ||
1188 | |||
1189 | f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1190 | ia->ri_max_frmr_depth); | ||
1191 | if (IS_ERR(f->fr_pgl)) { | ||
1192 | rc = PTR_ERR(f->fr_pgl); | ||
1193 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list " | ||
1194 | "failed %i\n", __func__, rc); | ||
1195 | |||
1196 | ib_dereg_mr(f->fr_mr); | ||
1197 | goto out_free; | ||
1198 | } | ||
1199 | |||
1200 | list_add(&r->mw_list, &buf->rb_mws); | ||
1201 | list_add(&r->mw_all, &buf->rb_all); | ||
1202 | } | ||
1203 | |||
1204 | return 0; | ||
1205 | |||
1206 | out_free: | ||
1207 | kfree(r); | ||
1208 | return rc; | ||
1209 | } | ||
1210 | |||
1211 | int | 1065 | int |
1212 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | 1066 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
1213 | { | 1067 | { |
@@ -1244,22 +1098,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1244 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; | 1098 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; |
1245 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; | 1099 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; |
1246 | 1100 | ||
1247 | INIT_LIST_HEAD(&buf->rb_mws); | 1101 | rc = ia->ri_ops->ro_init(r_xprt); |
1248 | INIT_LIST_HEAD(&buf->rb_all); | 1102 | if (rc) |
1249 | switch (ia->ri_memreg_strategy) { | 1103 | goto out; |
1250 | case RPCRDMA_FRMR: | ||
1251 | rc = rpcrdma_init_frmrs(ia, buf); | ||
1252 | if (rc) | ||
1253 | goto out; | ||
1254 | break; | ||
1255 | case RPCRDMA_MTHCAFMR: | ||
1256 | rc = rpcrdma_init_fmrs(ia, buf); | ||
1257 | if (rc) | ||
1258 | goto out; | ||
1259 | break; | ||
1260 | default: | ||
1261 | break; | ||
1262 | } | ||
1263 | 1104 | ||
1264 | for (i = 0; i < buf->rb_max_requests; i++) { | 1105 | for (i = 0; i < buf->rb_max_requests; i++) { |
1265 | struct rpcrdma_req *req; | 1106 | struct rpcrdma_req *req; |
@@ -1311,47 +1152,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | |||
1311 | kfree(req); | 1152 | kfree(req); |
1312 | } | 1153 | } |
1313 | 1154 | ||
1314 | static void | ||
1315 | rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) | ||
1316 | { | ||
1317 | struct rpcrdma_mw *r; | ||
1318 | int rc; | ||
1319 | |||
1320 | while (!list_empty(&buf->rb_all)) { | ||
1321 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1322 | list_del(&r->mw_all); | ||
1323 | list_del(&r->mw_list); | ||
1324 | |||
1325 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1326 | if (rc) | ||
1327 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
1328 | __func__, rc); | ||
1329 | |||
1330 | kfree(r); | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | static void | ||
1335 | rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf) | ||
1336 | { | ||
1337 | struct rpcrdma_mw *r; | ||
1338 | int rc; | ||
1339 | |||
1340 | while (!list_empty(&buf->rb_all)) { | ||
1341 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1342 | list_del(&r->mw_all); | ||
1343 | list_del(&r->mw_list); | ||
1344 | |||
1345 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1346 | if (rc) | ||
1347 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1348 | __func__, rc); | ||
1349 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1350 | |||
1351 | kfree(r); | ||
1352 | } | ||
1353 | } | ||
1354 | |||
1355 | void | 1155 | void |
1356 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1156 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
1357 | { | 1157 | { |
@@ -1372,104 +1172,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1372 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); | 1172 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); |
1373 | } | 1173 | } |
1374 | 1174 | ||
1375 | switch (ia->ri_memreg_strategy) { | 1175 | ia->ri_ops->ro_destroy(buf); |
1376 | case RPCRDMA_FRMR: | ||
1377 | rpcrdma_destroy_frmrs(buf); | ||
1378 | break; | ||
1379 | case RPCRDMA_MTHCAFMR: | ||
1380 | rpcrdma_destroy_fmrs(buf); | ||
1381 | break; | ||
1382 | default: | ||
1383 | break; | ||
1384 | } | ||
1385 | 1176 | ||
1386 | kfree(buf->rb_pool); | 1177 | kfree(buf->rb_pool); |
1387 | } | 1178 | } |
1388 | 1179 | ||
1389 | /* After a disconnect, unmap all FMRs. | ||
1390 | * | ||
1391 | * This is invoked only in the transport connect worker in order | ||
1392 | * to serialize with rpcrdma_register_fmr_external(). | ||
1393 | */ | ||
1394 | static void | ||
1395 | rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) | ||
1396 | { | ||
1397 | struct rpcrdma_xprt *r_xprt = | ||
1398 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1399 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1400 | struct list_head *pos; | ||
1401 | struct rpcrdma_mw *r; | ||
1402 | LIST_HEAD(l); | ||
1403 | int rc; | ||
1404 | |||
1405 | list_for_each(pos, &buf->rb_all) { | ||
1406 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1407 | |||
1408 | INIT_LIST_HEAD(&l); | ||
1409 | list_add(&r->r.fmr->list, &l); | ||
1410 | rc = ib_unmap_fmr(&l); | ||
1411 | if (rc) | ||
1412 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
1413 | __func__, rc); | ||
1414 | } | ||
1415 | } | ||
1416 | |||
1417 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
1418 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
1419 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
1420 | * also torn down. | ||
1421 | * | ||
1422 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
1423 | * | ||
1424 | * This is invoked only in the transport connect worker in order | ||
1425 | * to serialize with rpcrdma_register_frmr_external(). | ||
1426 | */ | ||
1427 | static void | ||
1428 | rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) | ||
1429 | { | ||
1430 | struct rpcrdma_xprt *r_xprt = | ||
1431 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1432 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1433 | struct list_head *pos; | ||
1434 | struct rpcrdma_mw *r; | ||
1435 | int rc; | ||
1436 | |||
1437 | list_for_each(pos, &buf->rb_all) { | ||
1438 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1439 | |||
1440 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
1441 | continue; | ||
1442 | |||
1443 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1444 | if (rc) | ||
1445 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1446 | __func__, rc); | ||
1447 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1448 | |||
1449 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1450 | ia->ri_max_frmr_depth); | ||
1451 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1452 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1453 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1454 | " failed %i\n", __func__, rc); | ||
1455 | continue; | ||
1456 | } | ||
1457 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( | ||
1458 | ia->ri_id->device, | ||
1459 | ia->ri_max_frmr_depth); | ||
1460 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1461 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1462 | dprintk("RPC: %s: " | ||
1463 | "ib_alloc_fast_reg_page_list " | ||
1464 | "failed %i\n", __func__, rc); | ||
1465 | |||
1466 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
1467 | continue; | ||
1468 | } | ||
1469 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1470 | } | ||
1471 | } | ||
1472 | |||
1473 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving | 1180 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving |
1474 | * some req segments uninitialized. | 1181 | * some req segments uninitialized. |
1475 | */ | 1182 | */ |
@@ -1509,7 +1216,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | |||
1509 | } | 1216 | } |
1510 | } | 1217 | } |
1511 | 1218 | ||
1512 | /* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external(). | 1219 | /* rpcrdma_unmap_one() was already done during deregistration. |
1513 | * Redo only the ib_post_send(). | 1220 | * Redo only the ib_post_send(). |
1514 | */ | 1221 | */ |
1515 | static void | 1222 | static void |
@@ -1729,6 +1436,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | |||
1729 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. | 1436 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. |
1730 | */ | 1437 | */ |
1731 | 1438 | ||
1439 | void | ||
1440 | rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg) | ||
1441 | { | ||
1442 | dprintk("RPC: map_one: offset %p iova %llx len %zu\n", | ||
1443 | seg->mr_offset, | ||
1444 | (unsigned long long)seg->mr_dma, seg->mr_dmalen); | ||
1445 | } | ||
1446 | |||
1732 | static int | 1447 | static int |
1733 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | 1448 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, |
1734 | struct ib_mr **mrp, struct ib_sge *iov) | 1449 | struct ib_mr **mrp, struct ib_sge *iov) |
@@ -1854,287 +1569,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |||
1854 | } | 1569 | } |
1855 | 1570 | ||
1856 | /* | 1571 | /* |
1857 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
1858 | */ | ||
1859 | |||
1860 | static void | ||
1861 | rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) | ||
1862 | { | ||
1863 | seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
1864 | seg->mr_dmalen = seg->mr_len; | ||
1865 | if (seg->mr_page) | ||
1866 | seg->mr_dma = ib_dma_map_page(ia->ri_id->device, | ||
1867 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
1868 | seg->mr_dmalen, seg->mr_dir); | ||
1869 | else | ||
1870 | seg->mr_dma = ib_dma_map_single(ia->ri_id->device, | ||
1871 | seg->mr_offset, | ||
1872 | seg->mr_dmalen, seg->mr_dir); | ||
1873 | if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) { | ||
1874 | dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n", | ||
1875 | __func__, | ||
1876 | (unsigned long long)seg->mr_dma, | ||
1877 | seg->mr_offset, seg->mr_dmalen); | ||
1878 | } | ||
1879 | } | ||
1880 | |||
1881 | static void | ||
1882 | rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | ||
1883 | { | ||
1884 | if (seg->mr_page) | ||
1885 | ib_dma_unmap_page(ia->ri_id->device, | ||
1886 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1887 | else | ||
1888 | ib_dma_unmap_single(ia->ri_id->device, | ||
1889 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1890 | } | ||
1891 | |||
1892 | static int | ||
1893 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1894 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1895 | struct rpcrdma_xprt *r_xprt) | ||
1896 | { | ||
1897 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1898 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
1899 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
1900 | struct ib_mr *mr = frmr->fr_mr; | ||
1901 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
1902 | u8 key; | ||
1903 | int len, pageoff; | ||
1904 | int i, rc; | ||
1905 | int seg_len; | ||
1906 | u64 pa; | ||
1907 | int page_no; | ||
1908 | |||
1909 | pageoff = offset_in_page(seg1->mr_offset); | ||
1910 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1911 | seg1->mr_len += pageoff; | ||
1912 | len = -pageoff; | ||
1913 | if (*nsegs > ia->ri_max_frmr_depth) | ||
1914 | *nsegs = ia->ri_max_frmr_depth; | ||
1915 | for (page_no = i = 0; i < *nsegs;) { | ||
1916 | rpcrdma_map_one(ia, seg, writing); | ||
1917 | pa = seg->mr_dma; | ||
1918 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
1919 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
1920 | pa += PAGE_SIZE; | ||
1921 | } | ||
1922 | len += seg->mr_len; | ||
1923 | ++seg; | ||
1924 | ++i; | ||
1925 | /* Check for holes */ | ||
1926 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1927 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1928 | break; | ||
1929 | } | ||
1930 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1931 | __func__, mw, i); | ||
1932 | |||
1933 | frmr->fr_state = FRMR_IS_VALID; | ||
1934 | |||
1935 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
1936 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
1937 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
1938 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma; | ||
1939 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
1940 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
1941 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1942 | fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; | ||
1943 | if (fastreg_wr.wr.fast_reg.length < len) { | ||
1944 | rc = -EIO; | ||
1945 | goto out_err; | ||
1946 | } | ||
1947 | |||
1948 | /* Bump the key */ | ||
1949 | key = (u8)(mr->rkey & 0x000000FF); | ||
1950 | ib_update_fast_reg_key(mr, ++key); | ||
1951 | |||
1952 | fastreg_wr.wr.fast_reg.access_flags = (writing ? | ||
1953 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
1954 | IB_ACCESS_REMOTE_READ); | ||
1955 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
1956 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1957 | |||
1958 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
1959 | if (rc) { | ||
1960 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1961 | " status %i\n", __func__, rc); | ||
1962 | ib_update_fast_reg_key(mr, --key); | ||
1963 | goto out_err; | ||
1964 | } else { | ||
1965 | seg1->mr_rkey = mr->rkey; | ||
1966 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1967 | seg1->mr_nsegs = i; | ||
1968 | seg1->mr_len = len; | ||
1969 | } | ||
1970 | *nsegs = i; | ||
1971 | return 0; | ||
1972 | out_err: | ||
1973 | frmr->fr_state = FRMR_IS_INVALID; | ||
1974 | while (i--) | ||
1975 | rpcrdma_unmap_one(ia, --seg); | ||
1976 | return rc; | ||
1977 | } | ||
1978 | |||
1979 | static int | ||
1980 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1981 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1982 | { | ||
1983 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1984 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1985 | int rc; | ||
1986 | |||
1987 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1988 | |||
1989 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1990 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
1991 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1992 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
1993 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1994 | |||
1995 | read_lock(&ia->ri_qplock); | ||
1996 | while (seg1->mr_nsegs--) | ||
1997 | rpcrdma_unmap_one(ia, seg++); | ||
1998 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1999 | read_unlock(&ia->ri_qplock); | ||
2000 | if (rc) { | ||
2001 | /* Force rpcrdma_buffer_get() to retry */ | ||
2002 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
2003 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
2004 | " status %i\n", __func__, rc); | ||
2005 | } | ||
2006 | return rc; | ||
2007 | } | ||
2008 | |||
2009 | static int | ||
2010 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
2011 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
2012 | { | ||
2013 | struct rpcrdma_mr_seg *seg1 = seg; | ||
2014 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
2015 | int len, pageoff, i, rc; | ||
2016 | |||
2017 | pageoff = offset_in_page(seg1->mr_offset); | ||
2018 | seg1->mr_offset -= pageoff; /* start of page */ | ||
2019 | seg1->mr_len += pageoff; | ||
2020 | len = -pageoff; | ||
2021 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
2022 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
2023 | for (i = 0; i < *nsegs;) { | ||
2024 | rpcrdma_map_one(ia, seg, writing); | ||
2025 | physaddrs[i] = seg->mr_dma; | ||
2026 | len += seg->mr_len; | ||
2027 | ++seg; | ||
2028 | ++i; | ||
2029 | /* Check for holes */ | ||
2030 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
2031 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
2032 | break; | ||
2033 | } | ||
2034 | rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
2035 | if (rc) { | ||
2036 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
2037 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
2038 | len, (unsigned long long)seg1->mr_dma, | ||
2039 | pageoff, i, rc); | ||
2040 | while (i--) | ||
2041 | rpcrdma_unmap_one(ia, --seg); | ||
2042 | } else { | ||
2043 | seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; | ||
2044 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
2045 | seg1->mr_nsegs = i; | ||
2046 | seg1->mr_len = len; | ||
2047 | } | ||
2048 | *nsegs = i; | ||
2049 | return rc; | ||
2050 | } | ||
2051 | |||
2052 | static int | ||
2053 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
2054 | struct rpcrdma_ia *ia) | ||
2055 | { | ||
2056 | struct rpcrdma_mr_seg *seg1 = seg; | ||
2057 | LIST_HEAD(l); | ||
2058 | int rc; | ||
2059 | |||
2060 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
2061 | rc = ib_unmap_fmr(&l); | ||
2062 | read_lock(&ia->ri_qplock); | ||
2063 | while (seg1->mr_nsegs--) | ||
2064 | rpcrdma_unmap_one(ia, seg++); | ||
2065 | read_unlock(&ia->ri_qplock); | ||
2066 | if (rc) | ||
2067 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
2068 | " status %i\n", __func__, rc); | ||
2069 | return rc; | ||
2070 | } | ||
2071 | |||
2072 | int | ||
2073 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | ||
2074 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | ||
2075 | { | ||
2076 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
2077 | int rc = 0; | ||
2078 | |||
2079 | switch (ia->ri_memreg_strategy) { | ||
2080 | |||
2081 | case RPCRDMA_ALLPHYSICAL: | ||
2082 | rpcrdma_map_one(ia, seg, writing); | ||
2083 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
2084 | seg->mr_base = seg->mr_dma; | ||
2085 | seg->mr_nsegs = 1; | ||
2086 | nsegs = 1; | ||
2087 | break; | ||
2088 | |||
2089 | /* Registration using frmr registration */ | ||
2090 | case RPCRDMA_FRMR: | ||
2091 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
2092 | break; | ||
2093 | |||
2094 | /* Registration using fmr memory registration */ | ||
2095 | case RPCRDMA_MTHCAFMR: | ||
2096 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | ||
2097 | break; | ||
2098 | |||
2099 | default: | ||
2100 | return -EIO; | ||
2101 | } | ||
2102 | if (rc) | ||
2103 | return rc; | ||
2104 | |||
2105 | return nsegs; | ||
2106 | } | ||
2107 | |||
2108 | int | ||
2109 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | ||
2110 | struct rpcrdma_xprt *r_xprt) | ||
2111 | { | ||
2112 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
2113 | int nsegs = seg->mr_nsegs, rc; | ||
2114 | |||
2115 | switch (ia->ri_memreg_strategy) { | ||
2116 | |||
2117 | case RPCRDMA_ALLPHYSICAL: | ||
2118 | read_lock(&ia->ri_qplock); | ||
2119 | rpcrdma_unmap_one(ia, seg); | ||
2120 | read_unlock(&ia->ri_qplock); | ||
2121 | break; | ||
2122 | |||
2123 | case RPCRDMA_FRMR: | ||
2124 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
2125 | break; | ||
2126 | |||
2127 | case RPCRDMA_MTHCAFMR: | ||
2128 | rc = rpcrdma_deregister_fmr_external(seg, ia); | ||
2129 | break; | ||
2130 | |||
2131 | default: | ||
2132 | break; | ||
2133 | } | ||
2134 | return nsegs; | ||
2135 | } | ||
2136 | |||
2137 | /* | ||
2138 | * Prepost any receive buffer, then post send. | 1572 | * Prepost any receive buffer, then post send. |
2139 | * | 1573 | * |
2140 | * Receive buffer is donated to hardware, reclaimed upon recv completion. | 1574 | * Receive buffer is donated to hardware, reclaimed upon recv completion. |
@@ -2156,7 +1590,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
2156 | } | 1590 | } |
2157 | 1591 | ||
2158 | send_wr.next = NULL; | 1592 | send_wr.next = NULL; |
2159 | send_wr.wr_id = 0ULL; /* no send cookie */ | 1593 | send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION; |
2160 | send_wr.sg_list = req->rl_send_iov; | 1594 | send_wr.sg_list = req->rl_send_iov; |
2161 | send_wr.num_sge = req->rl_niovs; | 1595 | send_wr.num_sge = req->rl_niovs; |
2162 | send_wr.opcode = IB_WR_SEND; | 1596 | send_wr.opcode = IB_WR_SEND; |
@@ -2215,43 +1649,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
2215 | return rc; | 1649 | return rc; |
2216 | } | 1650 | } |
2217 | 1651 | ||
2218 | /* Physical mapping means one Read/Write list entry per-page. | 1652 | /* How many chunk list items fit within our inline buffers? |
2219 | * All list entries must fit within an inline buffer | ||
2220 | * | ||
2221 | * NB: The server must return a Write list for NFS READ, | ||
2222 | * which has the same constraint. Factor in the inline | ||
2223 | * rsize as well. | ||
2224 | */ | 1653 | */ |
2225 | static size_t | 1654 | unsigned int |
2226 | rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt) | 1655 | rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt) |
2227 | { | 1656 | { |
2228 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | 1657 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
2229 | unsigned int inline_size, pages; | 1658 | int bytes, segments; |
2230 | 1659 | ||
2231 | inline_size = min_t(unsigned int, | 1660 | bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize); |
2232 | cdata->inline_wsize, cdata->inline_rsize); | 1661 | bytes -= RPCRDMA_HDRLEN_MIN; |
2233 | inline_size -= RPCRDMA_HDRLEN_MIN; | 1662 | if (bytes < sizeof(struct rpcrdma_segment) * 2) { |
2234 | pages = inline_size / sizeof(struct rpcrdma_segment); | 1663 | pr_warn("RPC: %s: inline threshold too small\n", |
2235 | return pages << PAGE_SHIFT; | 1664 | __func__); |
2236 | } | 1665 | return 0; |
2237 | |||
2238 | static size_t | ||
2239 | rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt) | ||
2240 | { | ||
2241 | return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; | ||
2242 | } | ||
2243 | |||
2244 | size_t | ||
2245 | rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt) | ||
2246 | { | ||
2247 | size_t result; | ||
2248 | |||
2249 | switch (r_xprt->rx_ia.ri_memreg_strategy) { | ||
2250 | case RPCRDMA_ALLPHYSICAL: | ||
2251 | result = rpcrdma_physical_max_payload(r_xprt); | ||
2252 | break; | ||
2253 | default: | ||
2254 | result = rpcrdma_mr_max_payload(r_xprt); | ||
2255 | } | 1666 | } |
2256 | return result; | 1667 | |
1668 | segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1); | ||
1669 | dprintk("RPC: %s: max chunk list size = %d segments\n", | ||
1670 | __func__, segments); | ||
1671 | return segments; | ||
2257 | } | 1672 | } |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fb6f0885..78e0b8beaa36 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -60,6 +60,7 @@ | |||
60 | * Interface Adapter -- one per transport instance | 60 | * Interface Adapter -- one per transport instance |
61 | */ | 61 | */ |
62 | struct rpcrdma_ia { | 62 | struct rpcrdma_ia { |
63 | const struct rpcrdma_memreg_ops *ri_ops; | ||
63 | rwlock_t ri_qplock; | 64 | rwlock_t ri_qplock; |
64 | struct rdma_cm_id *ri_id; | 65 | struct rdma_cm_id *ri_id; |
65 | struct ib_pd *ri_pd; | 66 | struct ib_pd *ri_pd; |
@@ -105,6 +106,10 @@ struct rpcrdma_ep { | |||
105 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 106 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
106 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 107 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
107 | 108 | ||
109 | /* Force completion handler to ignore the signal | ||
110 | */ | ||
111 | #define RPCRDMA_IGNORE_COMPLETION (0ULL) | ||
112 | |||
108 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV | 113 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
109 | * | 114 | * |
110 | * The below structure appears at the front of a large region of kmalloc'd | 115 | * The below structure appears at the front of a large region of kmalloc'd |
@@ -143,14 +148,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
143 | return (struct rpcrdma_msg *)rb->rg_base; | 148 | return (struct rpcrdma_msg *)rb->rg_base; |
144 | } | 149 | } |
145 | 150 | ||
146 | enum rpcrdma_chunktype { | ||
147 | rpcrdma_noch = 0, | ||
148 | rpcrdma_readch, | ||
149 | rpcrdma_areadch, | ||
150 | rpcrdma_writech, | ||
151 | rpcrdma_replych | ||
152 | }; | ||
153 | |||
154 | /* | 151 | /* |
155 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | 152 | * struct rpcrdma_rep -- this structure encapsulates state required to recv |
156 | * and complete a reply, asychronously. It needs several pieces of | 153 | * and complete a reply, asychronously. It needs several pieces of |
@@ -213,6 +210,7 @@ struct rpcrdma_mw { | |||
213 | struct ib_fmr *fmr; | 210 | struct ib_fmr *fmr; |
214 | struct rpcrdma_frmr frmr; | 211 | struct rpcrdma_frmr frmr; |
215 | } r; | 212 | } r; |
213 | void (*mw_sendcompletion)(struct ib_wc *); | ||
216 | struct list_head mw_list; | 214 | struct list_head mw_list; |
217 | struct list_head mw_all; | 215 | struct list_head mw_all; |
218 | }; | 216 | }; |
@@ -258,7 +256,6 @@ struct rpcrdma_req { | |||
258 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 256 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
259 | unsigned int rl_nchunks; /* non-zero if chunks */ | 257 | unsigned int rl_nchunks; /* non-zero if chunks */ |
260 | unsigned int rl_connect_cookie; /* retry detection */ | 258 | unsigned int rl_connect_cookie; /* retry detection */ |
261 | enum rpcrdma_chunktype rl_rtype, rl_wtype; | ||
262 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 259 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
263 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 260 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
264 | struct ib_sge rl_send_iov[4]; /* for active requests */ | 261 | struct ib_sge rl_send_iov[4]; /* for active requests */ |
@@ -340,6 +337,29 @@ struct rpcrdma_stats { | |||
340 | }; | 337 | }; |
341 | 338 | ||
342 | /* | 339 | /* |
340 | * Per-registration mode operations | ||
341 | */ | ||
342 | struct rpcrdma_xprt; | ||
343 | struct rpcrdma_memreg_ops { | ||
344 | int (*ro_map)(struct rpcrdma_xprt *, | ||
345 | struct rpcrdma_mr_seg *, int, bool); | ||
346 | int (*ro_unmap)(struct rpcrdma_xprt *, | ||
347 | struct rpcrdma_mr_seg *); | ||
348 | int (*ro_open)(struct rpcrdma_ia *, | ||
349 | struct rpcrdma_ep *, | ||
350 | struct rpcrdma_create_data_internal *); | ||
351 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | ||
352 | int (*ro_init)(struct rpcrdma_xprt *); | ||
353 | void (*ro_reset)(struct rpcrdma_xprt *); | ||
354 | void (*ro_destroy)(struct rpcrdma_buffer *); | ||
355 | const char *ro_displayname; | ||
356 | }; | ||
357 | |||
358 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | ||
359 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | ||
360 | extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops; | ||
361 | |||
362 | /* | ||
343 | * RPCRDMA transport -- encapsulates the structures above for | 363 | * RPCRDMA transport -- encapsulates the structures above for |
344 | * integration with RPC. | 364 | * integration with RPC. |
345 | * | 365 | * |
@@ -398,16 +418,56 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); | |||
398 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 418 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
399 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | 419 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
400 | 420 | ||
401 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | ||
402 | int, int, struct rpcrdma_xprt *); | ||
403 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | ||
404 | struct rpcrdma_xprt *); | ||
405 | |||
406 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, | 421 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, |
407 | size_t, gfp_t); | 422 | size_t, gfp_t); |
408 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, | 423 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, |
409 | struct rpcrdma_regbuf *); | 424 | struct rpcrdma_regbuf *); |
410 | 425 | ||
426 | unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); | ||
427 | |||
428 | /* | ||
429 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
430 | */ | ||
431 | |||
432 | void rpcrdma_mapping_error(struct rpcrdma_mr_seg *); | ||
433 | |||
434 | static inline enum dma_data_direction | ||
435 | rpcrdma_data_dir(bool writing) | ||
436 | { | ||
437 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
438 | } | ||
439 | |||
440 | static inline void | ||
441 | rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg, | ||
442 | enum dma_data_direction direction) | ||
443 | { | ||
444 | seg->mr_dir = direction; | ||
445 | seg->mr_dmalen = seg->mr_len; | ||
446 | |||
447 | if (seg->mr_page) | ||
448 | seg->mr_dma = ib_dma_map_page(device, | ||
449 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
450 | seg->mr_dmalen, seg->mr_dir); | ||
451 | else | ||
452 | seg->mr_dma = ib_dma_map_single(device, | ||
453 | seg->mr_offset, | ||
454 | seg->mr_dmalen, seg->mr_dir); | ||
455 | |||
456 | if (ib_dma_mapping_error(device, seg->mr_dma)) | ||
457 | rpcrdma_mapping_error(seg); | ||
458 | } | ||
459 | |||
460 | static inline void | ||
461 | rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg) | ||
462 | { | ||
463 | if (seg->mr_page) | ||
464 | ib_dma_unmap_page(device, | ||
465 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
466 | else | ||
467 | ib_dma_unmap_single(device, | ||
468 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
469 | } | ||
470 | |||
411 | /* | 471 | /* |
412 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 472 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
413 | */ | 473 | */ |
@@ -418,9 +478,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | |||
418 | /* | 478 | /* |
419 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | 479 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |
420 | */ | 480 | */ |
421 | ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t); | ||
422 | int rpcrdma_marshal_req(struct rpc_rqst *); | 481 | int rpcrdma_marshal_req(struct rpc_rqst *); |
423 | size_t rpcrdma_max_payload(struct rpcrdma_xprt *); | ||
424 | 482 | ||
425 | /* Temporary NFS request map cache. Created in svc_rdma.c */ | 483 | /* Temporary NFS request map cache. Created in svc_rdma.c */ |
426 | extern struct kmem_cache *svc_rdma_map_cachep; | 484 | extern struct kmem_cache *svc_rdma_map_cachep; |