diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-10 18:02:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-10 18:02:42 -0400 |
commit | d1e1cda862c16252087374ac75949b0e89a5717e (patch) | |
tree | 544ce467bed23638949a1991b4f7b00e7472baa4 | |
parent | 07888238f55056605cd23aa4ea3ca97d5e15938f (diff) | |
parent | a914722f333b3359d2f4f12919380a334176bb89 (diff) |
Merge tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
- massive cleanup of the NFS read/write code by Anna and Dros
- support multiple NFS read/write requests per page in order to deal
with non-page aligned pNFS striping. Also cleans up the r/wsize <
page size code nicely.
- stable fix for ensuring inode is declared uptodate only after all
the attributes have been checked.
- stable fix for a kernel Oops when remounting
- NFS over RDMA client fixes
- move the pNFS files layout driver into its own subdirectory"
* tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits)
NFS: populate ->net in mount data when remounting
pnfs: fix lockup caused by pnfs_generic_pg_test
NFSv4.1: Fix typo in dprintk
NFSv4.1: Comment is now wrong and redundant to code
NFS: Use raw_write_seqcount_begin/end int nfs4_reclaim_open_state
xprtrdma: Disconnect on registration failure
xprtrdma: Remove BUG_ON() call sites
xprtrdma: Avoid deadlock when credit window is reset
SUNRPC: Move congestion window constants to header file
xprtrdma: Reset connection timeout after successful reconnect
xprtrdma: Use macros for reconnection timeout constants
xprtrdma: Allocate missing pagelist
xprtrdma: Remove Tavor MTU setting
xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting
xprtrdma: Reduce the number of hardway buffer allocations
xprtrdma: Limit work done by completion handler
xprtrmda: Reduce calls to ib_poll_cq() in completion handlers
xprtrmda: Reduce lock contention in completion handlers
xprtrdma: Split the completion queue
xprtrdma: Make rpcrdma_ep_destroy() return void
...
41 files changed, 1804 insertions, 1896 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 03192a66c143..4782e0840dcc 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -29,8 +29,6 @@ nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | |||
29 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o | 29 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o |
30 | nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o | 30 | nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o |
31 | 31 | ||
32 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | 32 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ |
33 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | ||
34 | |||
35 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | 33 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ |
36 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ | 34 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 65d849bdf77a..9b431f44fad9 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err) | |||
210 | SetPageUptodate(bvec->bv_page); | 210 | SetPageUptodate(bvec->bv_page); |
211 | 211 | ||
212 | if (err) { | 212 | if (err) { |
213 | struct nfs_read_data *rdata = par->data; | 213 | struct nfs_pgio_data *rdata = par->data; |
214 | struct nfs_pgio_header *header = rdata->header; | 214 | struct nfs_pgio_header *header = rdata->header; |
215 | 215 | ||
216 | if (!header->pnfs_error) | 216 | if (!header->pnfs_error) |
@@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err) | |||
224 | static void bl_read_cleanup(struct work_struct *work) | 224 | static void bl_read_cleanup(struct work_struct *work) |
225 | { | 225 | { |
226 | struct rpc_task *task; | 226 | struct rpc_task *task; |
227 | struct nfs_read_data *rdata; | 227 | struct nfs_pgio_data *rdata; |
228 | dprintk("%s enter\n", __func__); | 228 | dprintk("%s enter\n", __func__); |
229 | task = container_of(work, struct rpc_task, u.tk_work); | 229 | task = container_of(work, struct rpc_task, u.tk_work); |
230 | rdata = container_of(task, struct nfs_read_data, task); | 230 | rdata = container_of(task, struct nfs_pgio_data, task); |
231 | pnfs_ld_read_done(rdata); | 231 | pnfs_ld_read_done(rdata); |
232 | } | 232 | } |
233 | 233 | ||
234 | static void | 234 | static void |
235 | bl_end_par_io_read(void *data, int unused) | 235 | bl_end_par_io_read(void *data, int unused) |
236 | { | 236 | { |
237 | struct nfs_read_data *rdata = data; | 237 | struct nfs_pgio_data *rdata = data; |
238 | 238 | ||
239 | rdata->task.tk_status = rdata->header->pnfs_error; | 239 | rdata->task.tk_status = rdata->header->pnfs_error; |
240 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); | 240 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); |
@@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused) | |||
242 | } | 242 | } |
243 | 243 | ||
244 | static enum pnfs_try_status | 244 | static enum pnfs_try_status |
245 | bl_read_pagelist(struct nfs_read_data *rdata) | 245 | bl_read_pagelist(struct nfs_pgio_data *rdata) |
246 | { | 246 | { |
247 | struct nfs_pgio_header *header = rdata->header; | 247 | struct nfs_pgio_header *header = rdata->header; |
248 | int i, hole; | 248 | int i, hole; |
@@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) | |||
390 | } | 390 | } |
391 | 391 | ||
392 | if (unlikely(err)) { | 392 | if (unlikely(err)) { |
393 | struct nfs_write_data *data = par->data; | 393 | struct nfs_pgio_data *data = par->data; |
394 | struct nfs_pgio_header *header = data->header; | 394 | struct nfs_pgio_header *header = data->header; |
395 | 395 | ||
396 | if (!header->pnfs_error) | 396 | if (!header->pnfs_error) |
@@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err) | |||
405 | { | 405 | { |
406 | struct parallel_io *par = bio->bi_private; | 406 | struct parallel_io *par = bio->bi_private; |
407 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 407 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
408 | struct nfs_write_data *data = par->data; | 408 | struct nfs_pgio_data *data = par->data; |
409 | struct nfs_pgio_header *header = data->header; | 409 | struct nfs_pgio_header *header = data->header; |
410 | 410 | ||
411 | if (!uptodate) { | 411 | if (!uptodate) { |
@@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err) | |||
423 | static void bl_write_cleanup(struct work_struct *work) | 423 | static void bl_write_cleanup(struct work_struct *work) |
424 | { | 424 | { |
425 | struct rpc_task *task; | 425 | struct rpc_task *task; |
426 | struct nfs_write_data *wdata; | 426 | struct nfs_pgio_data *wdata; |
427 | dprintk("%s enter\n", __func__); | 427 | dprintk("%s enter\n", __func__); |
428 | task = container_of(work, struct rpc_task, u.tk_work); | 428 | task = container_of(work, struct rpc_task, u.tk_work); |
429 | wdata = container_of(task, struct nfs_write_data, task); | 429 | wdata = container_of(task, struct nfs_pgio_data, task); |
430 | if (likely(!wdata->header->pnfs_error)) { | 430 | if (likely(!wdata->header->pnfs_error)) { |
431 | /* Marks for LAYOUTCOMMIT */ | 431 | /* Marks for LAYOUTCOMMIT */ |
432 | mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), | 432 | mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), |
@@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work) | |||
438 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ | 438 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ |
439 | static void bl_end_par_io_write(void *data, int num_se) | 439 | static void bl_end_par_io_write(void *data, int num_se) |
440 | { | 440 | { |
441 | struct nfs_write_data *wdata = data; | 441 | struct nfs_pgio_data *wdata = data; |
442 | 442 | ||
443 | if (unlikely(wdata->header->pnfs_error)) { | 443 | if (unlikely(wdata->header->pnfs_error)) { |
444 | bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, | 444 | bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, |
@@ -673,7 +673,7 @@ check_page: | |||
673 | } | 673 | } |
674 | 674 | ||
675 | static enum pnfs_try_status | 675 | static enum pnfs_try_status |
676 | bl_write_pagelist(struct nfs_write_data *wdata, int sync) | 676 | bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) |
677 | { | 677 | { |
678 | struct nfs_pgio_header *header = wdata->header; | 678 | struct nfs_pgio_header *header = wdata->header; |
679 | int i, ret, npg_zero, pg_index, last = 0; | 679 | int i, ret, npg_zero, pg_index, last = 0; |
@@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | |||
1189 | pnfs_generic_pg_init_read(pgio, req); | 1189 | pnfs_generic_pg_init_read(pgio, req); |
1190 | } | 1190 | } |
1191 | 1191 | ||
1192 | static bool | 1192 | /* |
1193 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | ||
1194 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | ||
1195 | */ | ||
1196 | static size_t | ||
1193 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1197 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1194 | struct nfs_page *req) | 1198 | struct nfs_page *req) |
1195 | { | 1199 | { |
1196 | if (pgio->pg_dreq != NULL && | 1200 | if (pgio->pg_dreq != NULL && |
1197 | !is_aligned_req(req, SECTOR_SIZE)) | 1201 | !is_aligned_req(req, SECTOR_SIZE)) |
1198 | return false; | 1202 | return 0; |
1199 | 1203 | ||
1200 | return pnfs_generic_pg_test(pgio, prev, req); | 1204 | return pnfs_generic_pg_test(pgio, prev, req); |
1201 | } | 1205 | } |
@@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | |||
1241 | } | 1245 | } |
1242 | } | 1246 | } |
1243 | 1247 | ||
1244 | static bool | 1248 | /* |
1249 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | ||
1250 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | ||
1251 | */ | ||
1252 | static size_t | ||
1245 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1253 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1246 | struct nfs_page *req) | 1254 | struct nfs_page *req) |
1247 | { | 1255 | { |
1248 | if (pgio->pg_dreq != NULL && | 1256 | if (pgio->pg_dreq != NULL && |
1249 | !is_aligned_req(req, PAGE_CACHE_SIZE)) | 1257 | !is_aligned_req(req, PAGE_CACHE_SIZE)) |
1250 | return false; | 1258 | return 0; |
1251 | 1259 | ||
1252 | return pnfs_generic_pg_test(pgio, prev, req); | 1260 | return pnfs_generic_pg_test(pgio, prev, req); |
1253 | } | 1261 | } |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b8797ae6831f..4ad7bc388679 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq) | |||
108 | return atomic_dec_and_test(&dreq->io_count); | 108 | return atomic_dec_and_test(&dreq->io_count); |
109 | } | 109 | } |
110 | 110 | ||
111 | /* | ||
112 | * nfs_direct_select_verf - select the right verifier | ||
113 | * @dreq - direct request possibly spanning multiple servers | ||
114 | * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs | ||
115 | * @ds_idx - index of data server in data server list, only valid if ds_clp set | ||
116 | * | ||
117 | * returns the correct verifier to use given the role of the server | ||
118 | */ | ||
119 | static struct nfs_writeverf * | ||
120 | nfs_direct_select_verf(struct nfs_direct_req *dreq, | ||
121 | struct nfs_client *ds_clp, | ||
122 | int ds_idx) | ||
123 | { | ||
124 | struct nfs_writeverf *verfp = &dreq->verf; | ||
125 | |||
126 | #ifdef CONFIG_NFS_V4_1 | ||
127 | if (ds_clp) { | ||
128 | /* pNFS is in use, use the DS verf */ | ||
129 | if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) | ||
130 | verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; | ||
131 | else | ||
132 | WARN_ON_ONCE(1); | ||
133 | } | ||
134 | #endif | ||
135 | return verfp; | ||
136 | } | ||
137 | |||
138 | |||
139 | /* | ||
140 | * nfs_direct_set_hdr_verf - set the write/commit verifier | ||
141 | * @dreq - direct request possibly spanning multiple servers | ||
142 | * @hdr - pageio header to validate against previously seen verfs | ||
143 | * | ||
144 | * Set the server's (MDS or DS) "seen" verifier | ||
145 | */ | ||
146 | static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, | ||
147 | struct nfs_pgio_header *hdr) | ||
148 | { | ||
149 | struct nfs_writeverf *verfp; | ||
150 | |||
151 | verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, | ||
152 | hdr->data->ds_idx); | ||
153 | WARN_ON_ONCE(verfp->committed >= 0); | ||
154 | memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | ||
155 | WARN_ON_ONCE(verfp->committed < 0); | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * nfs_direct_cmp_hdr_verf - compare verifier for pgio header | ||
160 | * @dreq - direct request possibly spanning multiple servers | ||
161 | * @hdr - pageio header to validate against previously seen verf | ||
162 | * | ||
163 | * set the server's "seen" verf if not initialized. | ||
164 | * returns result of comparison between @hdr->verf and the "seen" | ||
165 | * verf of the server used by @hdr (DS or MDS) | ||
166 | */ | ||
167 | static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, | ||
168 | struct nfs_pgio_header *hdr) | ||
169 | { | ||
170 | struct nfs_writeverf *verfp; | ||
171 | |||
172 | verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, | ||
173 | hdr->data->ds_idx); | ||
174 | if (verfp->committed < 0) { | ||
175 | nfs_direct_set_hdr_verf(dreq, hdr); | ||
176 | return 0; | ||
177 | } | ||
178 | return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | ||
179 | } | ||
180 | |||
181 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
182 | /* | ||
183 | * nfs_direct_cmp_commit_data_verf - compare verifier for commit data | ||
184 | * @dreq - direct request possibly spanning multiple servers | ||
185 | * @data - commit data to validate against previously seen verf | ||
186 | * | ||
187 | * returns result of comparison between @data->verf and the verf of | ||
188 | * the server used by @data (DS or MDS) | ||
189 | */ | ||
190 | static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, | ||
191 | struct nfs_commit_data *data) | ||
192 | { | ||
193 | struct nfs_writeverf *verfp; | ||
194 | |||
195 | verfp = nfs_direct_select_verf(dreq, data->ds_clp, | ||
196 | data->ds_commit_index); | ||
197 | WARN_ON_ONCE(verfp->committed < 0); | ||
198 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); | ||
199 | } | ||
200 | #endif | ||
201 | |||
111 | /** | 202 | /** |
112 | * nfs_direct_IO - NFS address space operation for direct I/O | 203 | * nfs_direct_IO - NFS address space operation for direct I/O |
113 | * @rw: direction (read or write) | 204 | * @rw: direction (read or write) |
@@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||
168 | kref_get(&dreq->kref); | 259 | kref_get(&dreq->kref); |
169 | init_completion(&dreq->completion); | 260 | init_completion(&dreq->completion); |
170 | INIT_LIST_HEAD(&dreq->mds_cinfo.list); | 261 | INIT_LIST_HEAD(&dreq->mds_cinfo.list); |
262 | dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ | ||
171 | INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); | 263 | INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); |
172 | spin_lock_init(&dreq->lock); | 264 | spin_lock_init(&dreq->lock); |
173 | 265 | ||
@@ -380,8 +472,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de | |||
380 | struct nfs_page *req; | 472 | struct nfs_page *req; |
381 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | 473 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); |
382 | /* XXX do we need to do the eof zeroing found in async_filler? */ | 474 | /* XXX do we need to do the eof zeroing found in async_filler? */ |
383 | req = nfs_create_request(dreq->ctx, dreq->inode, | 475 | req = nfs_create_request(dreq->ctx, pagevec[i], NULL, |
384 | pagevec[i], | ||
385 | pgbase, req_len); | 476 | pgbase, req_len); |
386 | if (IS_ERR(req)) { | 477 | if (IS_ERR(req)) { |
387 | result = PTR_ERR(req); | 478 | result = PTR_ERR(req); |
@@ -424,7 +515,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
424 | size_t requested_bytes = 0; | 515 | size_t requested_bytes = 0; |
425 | unsigned long seg; | 516 | unsigned long seg; |
426 | 517 | ||
427 | NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, | 518 | nfs_pageio_init_read(&desc, dreq->inode, false, |
428 | &nfs_direct_read_completion_ops); | 519 | &nfs_direct_read_completion_ops); |
429 | get_dreq(dreq); | 520 | get_dreq(dreq); |
430 | desc.pg_dreq = dreq; | 521 | desc.pg_dreq = dreq; |
@@ -564,7 +655,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
564 | dreq->count = 0; | 655 | dreq->count = 0; |
565 | get_dreq(dreq); | 656 | get_dreq(dreq); |
566 | 657 | ||
567 | NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, | 658 | nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, |
568 | &nfs_direct_write_completion_ops); | 659 | &nfs_direct_write_completion_ops); |
569 | desc.pg_dreq = dreq; | 660 | desc.pg_dreq = dreq; |
570 | 661 | ||
@@ -603,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) | |||
603 | dprintk("NFS: %5u commit failed with error %d.\n", | 694 | dprintk("NFS: %5u commit failed with error %d.\n", |
604 | data->task.tk_pid, status); | 695 | data->task.tk_pid, status); |
605 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | 696 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
606 | } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { | 697 | } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) { |
607 | dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); | 698 | dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); |
608 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | 699 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
609 | } | 700 | } |
@@ -750,8 +841,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d | |||
750 | struct nfs_page *req; | 841 | struct nfs_page *req; |
751 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | 842 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); |
752 | 843 | ||
753 | req = nfs_create_request(dreq->ctx, dreq->inode, | 844 | req = nfs_create_request(dreq->ctx, pagevec[i], NULL, |
754 | pagevec[i], | ||
755 | pgbase, req_len); | 845 | pgbase, req_len); |
756 | if (IS_ERR(req)) { | 846 | if (IS_ERR(req)) { |
757 | result = PTR_ERR(req); | 847 | result = PTR_ERR(req); |
@@ -813,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
813 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) | 903 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) |
814 | bit = NFS_IOHDR_NEED_RESCHED; | 904 | bit = NFS_IOHDR_NEED_RESCHED; |
815 | else if (dreq->flags == 0) { | 905 | else if (dreq->flags == 0) { |
816 | memcpy(&dreq->verf, hdr->verf, | 906 | nfs_direct_set_hdr_verf(dreq, hdr); |
817 | sizeof(dreq->verf)); | ||
818 | bit = NFS_IOHDR_NEED_COMMIT; | 907 | bit = NFS_IOHDR_NEED_COMMIT; |
819 | dreq->flags = NFS_ODIRECT_DO_COMMIT; | 908 | dreq->flags = NFS_ODIRECT_DO_COMMIT; |
820 | } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { | 909 | } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { |
821 | if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { | 910 | if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { |
822 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | 911 | dreq->flags = |
912 | NFS_ODIRECT_RESCHED_WRITES; | ||
823 | bit = NFS_IOHDR_NEED_RESCHED; | 913 | bit = NFS_IOHDR_NEED_RESCHED; |
824 | } else | 914 | } else |
825 | bit = NFS_IOHDR_NEED_COMMIT; | 915 | bit = NFS_IOHDR_NEED_COMMIT; |
@@ -829,6 +919,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
829 | spin_unlock(&dreq->lock); | 919 | spin_unlock(&dreq->lock); |
830 | 920 | ||
831 | while (!list_empty(&hdr->pages)) { | 921 | while (!list_empty(&hdr->pages)) { |
922 | bool do_destroy = true; | ||
923 | |||
832 | req = nfs_list_entry(hdr->pages.next); | 924 | req = nfs_list_entry(hdr->pages.next); |
833 | nfs_list_remove_request(req); | 925 | nfs_list_remove_request(req); |
834 | switch (bit) { | 926 | switch (bit) { |
@@ -836,6 +928,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
836 | case NFS_IOHDR_NEED_COMMIT: | 928 | case NFS_IOHDR_NEED_COMMIT: |
837 | kref_get(&req->wb_kref); | 929 | kref_get(&req->wb_kref); |
838 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); | 930 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); |
931 | do_destroy = false; | ||
839 | } | 932 | } |
840 | nfs_unlock_and_release_request(req); | 933 | nfs_unlock_and_release_request(req); |
841 | } | 934 | } |
@@ -874,7 +967,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
874 | size_t requested_bytes = 0; | 967 | size_t requested_bytes = 0; |
875 | unsigned long seg; | 968 | unsigned long seg; |
876 | 969 | ||
877 | NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, | 970 | nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, |
878 | &nfs_direct_write_completion_ops); | 971 | &nfs_direct_write_completion_ops); |
879 | desc.pg_dreq = dreq; | 972 | desc.pg_dreq = dreq; |
880 | get_dreq(dreq); | 973 | get_dreq(dreq); |
diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile new file mode 100644 index 000000000000..8516cdffb9e9 --- /dev/null +++ b/fs/nfs/filelayout/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | # | ||
2 | # Makefile for the pNFS Files Layout Driver kernel module | ||
3 | # | ||
4 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | ||
5 | nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o | ||
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c index b9a35c05b60f..d2eba1c13b7e 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -35,11 +35,11 @@ | |||
35 | 35 | ||
36 | #include <linux/sunrpc/metrics.h> | 36 | #include <linux/sunrpc/metrics.h> |
37 | 37 | ||
38 | #include "nfs4session.h" | 38 | #include "../nfs4session.h" |
39 | #include "internal.h" | 39 | #include "../internal.h" |
40 | #include "delegation.h" | 40 | #include "../delegation.h" |
41 | #include "nfs4filelayout.h" | 41 | #include "filelayout.h" |
42 | #include "nfs4trace.h" | 42 | #include "../nfs4trace.h" |
43 | 43 | ||
44 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 44 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
45 | 45 | ||
@@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) | |||
84 | BUG(); | 84 | BUG(); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void filelayout_reset_write(struct nfs_write_data *data) | 87 | static void filelayout_reset_write(struct nfs_pgio_data *data) |
88 | { | 88 | { |
89 | struct nfs_pgio_header *hdr = data->header; | 89 | struct nfs_pgio_header *hdr = data->header; |
90 | struct rpc_task *task = &data->task; | 90 | struct rpc_task *task = &data->task; |
@@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data) | |||
105 | } | 105 | } |
106 | } | 106 | } |
107 | 107 | ||
108 | static void filelayout_reset_read(struct nfs_read_data *data) | 108 | static void filelayout_reset_read(struct nfs_pgio_data *data) |
109 | { | 109 | { |
110 | struct nfs_pgio_header *hdr = data->header; | 110 | struct nfs_pgio_header *hdr = data->header; |
111 | struct rpc_task *task = &data->task; | 111 | struct rpc_task *task = &data->task; |
@@ -243,7 +243,7 @@ wait_on_recovery: | |||
243 | /* NFS_PROTO call done callback routines */ | 243 | /* NFS_PROTO call done callback routines */ |
244 | 244 | ||
245 | static int filelayout_read_done_cb(struct rpc_task *task, | 245 | static int filelayout_read_done_cb(struct rpc_task *task, |
246 | struct nfs_read_data *data) | 246 | struct nfs_pgio_data *data) |
247 | { | 247 | { |
248 | struct nfs_pgio_header *hdr = data->header; | 248 | struct nfs_pgio_header *hdr = data->header; |
249 | int err; | 249 | int err; |
@@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, | |||
270 | * rfc5661 is not clear about which credential should be used. | 270 | * rfc5661 is not clear about which credential should be used. |
271 | */ | 271 | */ |
272 | static void | 272 | static void |
273 | filelayout_set_layoutcommit(struct nfs_write_data *wdata) | 273 | filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) |
274 | { | 274 | { |
275 | struct nfs_pgio_header *hdr = wdata->header; | 275 | struct nfs_pgio_header *hdr = wdata->header; |
276 | 276 | ||
@@ -279,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) | |||
279 | return; | 279 | return; |
280 | 280 | ||
281 | pnfs_set_layoutcommit(wdata); | 281 | pnfs_set_layoutcommit(wdata); |
282 | dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | 282 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, |
283 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | 283 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); |
284 | } | 284 | } |
285 | 285 | ||
@@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) | |||
305 | */ | 305 | */ |
306 | static void filelayout_read_prepare(struct rpc_task *task, void *data) | 306 | static void filelayout_read_prepare(struct rpc_task *task, void *data) |
307 | { | 307 | { |
308 | struct nfs_read_data *rdata = data; | 308 | struct nfs_pgio_data *rdata = data; |
309 | 309 | ||
310 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { | 310 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { |
311 | rpc_exit(task, -EIO); | 311 | rpc_exit(task, -EIO); |
@@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) | |||
317 | rpc_exit(task, 0); | 317 | rpc_exit(task, 0); |
318 | return; | 318 | return; |
319 | } | 319 | } |
320 | rdata->read_done_cb = filelayout_read_done_cb; | 320 | rdata->pgio_done_cb = filelayout_read_done_cb; |
321 | 321 | ||
322 | if (nfs41_setup_sequence(rdata->ds_clp->cl_session, | 322 | if (nfs41_setup_sequence(rdata->ds_clp->cl_session, |
323 | &rdata->args.seq_args, | 323 | &rdata->args.seq_args, |
@@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) | |||
331 | 331 | ||
332 | static void filelayout_read_call_done(struct rpc_task *task, void *data) | 332 | static void filelayout_read_call_done(struct rpc_task *task, void *data) |
333 | { | 333 | { |
334 | struct nfs_read_data *rdata = data; | 334 | struct nfs_pgio_data *rdata = data; |
335 | 335 | ||
336 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); | 336 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); |
337 | 337 | ||
@@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) | |||
347 | 347 | ||
348 | static void filelayout_read_count_stats(struct rpc_task *task, void *data) | 348 | static void filelayout_read_count_stats(struct rpc_task *task, void *data) |
349 | { | 349 | { |
350 | struct nfs_read_data *rdata = data; | 350 | struct nfs_pgio_data *rdata = data; |
351 | 351 | ||
352 | rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); | 352 | rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); |
353 | } | 353 | } |
354 | 354 | ||
355 | static void filelayout_read_release(void *data) | 355 | static void filelayout_read_release(void *data) |
356 | { | 356 | { |
357 | struct nfs_read_data *rdata = data; | 357 | struct nfs_pgio_data *rdata = data; |
358 | struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; | 358 | struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; |
359 | 359 | ||
360 | filelayout_fenceme(lo->plh_inode, lo); | 360 | filelayout_fenceme(lo->plh_inode, lo); |
@@ -363,7 +363,7 @@ static void filelayout_read_release(void *data) | |||
363 | } | 363 | } |
364 | 364 | ||
365 | static int filelayout_write_done_cb(struct rpc_task *task, | 365 | static int filelayout_write_done_cb(struct rpc_task *task, |
366 | struct nfs_write_data *data) | 366 | struct nfs_pgio_data *data) |
367 | { | 367 | { |
368 | struct nfs_pgio_header *hdr = data->header; | 368 | struct nfs_pgio_header *hdr = data->header; |
369 | int err; | 369 | int err; |
@@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
419 | 419 | ||
420 | static void filelayout_write_prepare(struct rpc_task *task, void *data) | 420 | static void filelayout_write_prepare(struct rpc_task *task, void *data) |
421 | { | 421 | { |
422 | struct nfs_write_data *wdata = data; | 422 | struct nfs_pgio_data *wdata = data; |
423 | 423 | ||
424 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { | 424 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { |
425 | rpc_exit(task, -EIO); | 425 | rpc_exit(task, -EIO); |
@@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) | |||
443 | 443 | ||
444 | static void filelayout_write_call_done(struct rpc_task *task, void *data) | 444 | static void filelayout_write_call_done(struct rpc_task *task, void *data) |
445 | { | 445 | { |
446 | struct nfs_write_data *wdata = data; | 446 | struct nfs_pgio_data *wdata = data; |
447 | 447 | ||
448 | if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && | 448 | if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && |
449 | task->tk_status == 0) { | 449 | task->tk_status == 0) { |
@@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) | |||
457 | 457 | ||
458 | static void filelayout_write_count_stats(struct rpc_task *task, void *data) | 458 | static void filelayout_write_count_stats(struct rpc_task *task, void *data) |
459 | { | 459 | { |
460 | struct nfs_write_data *wdata = data; | 460 | struct nfs_pgio_data *wdata = data; |
461 | 461 | ||
462 | rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); | 462 | rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); |
463 | } | 463 | } |
464 | 464 | ||
465 | static void filelayout_write_release(void *data) | 465 | static void filelayout_write_release(void *data) |
466 | { | 466 | { |
467 | struct nfs_write_data *wdata = data; | 467 | struct nfs_pgio_data *wdata = data; |
468 | struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; | 468 | struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; |
469 | 469 | ||
470 | filelayout_fenceme(lo->plh_inode, lo); | 470 | filelayout_fenceme(lo->plh_inode, lo); |
@@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { | |||
529 | }; | 529 | }; |
530 | 530 | ||
531 | static enum pnfs_try_status | 531 | static enum pnfs_try_status |
532 | filelayout_read_pagelist(struct nfs_read_data *data) | 532 | filelayout_read_pagelist(struct nfs_pgio_data *data) |
533 | { | 533 | { |
534 | struct nfs_pgio_header *hdr = data->header; | 534 | struct nfs_pgio_header *hdr = data->header; |
535 | struct pnfs_layout_segment *lseg = hdr->lseg; | 535 | struct pnfs_layout_segment *lseg = hdr->lseg; |
@@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
560 | /* No multipath support. Use first DS */ | 560 | /* No multipath support. Use first DS */ |
561 | atomic_inc(&ds->ds_clp->cl_count); | 561 | atomic_inc(&ds->ds_clp->cl_count); |
562 | data->ds_clp = ds->ds_clp; | 562 | data->ds_clp = ds->ds_clp; |
563 | data->ds_idx = idx; | ||
563 | fh = nfs4_fl_select_ds_fh(lseg, j); | 564 | fh = nfs4_fl_select_ds_fh(lseg, j); |
564 | if (fh) | 565 | if (fh) |
565 | data->args.fh = fh; | 566 | data->args.fh = fh; |
@@ -568,14 +569,14 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
568 | data->mds_offset = offset; | 569 | data->mds_offset = offset; |
569 | 570 | ||
570 | /* Perform an asynchronous read to ds */ | 571 | /* Perform an asynchronous read to ds */ |
571 | nfs_initiate_read(ds_clnt, data, | 572 | nfs_initiate_pgio(ds_clnt, data, |
572 | &filelayout_read_call_ops, RPC_TASK_SOFTCONN); | 573 | &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); |
573 | return PNFS_ATTEMPTED; | 574 | return PNFS_ATTEMPTED; |
574 | } | 575 | } |
575 | 576 | ||
576 | /* Perform async writes. */ | 577 | /* Perform async writes. */ |
577 | static enum pnfs_try_status | 578 | static enum pnfs_try_status |
578 | filelayout_write_pagelist(struct nfs_write_data *data, int sync) | 579 | filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) |
579 | { | 580 | { |
580 | struct nfs_pgio_header *hdr = data->header; | 581 | struct nfs_pgio_header *hdr = data->header; |
581 | struct pnfs_layout_segment *lseg = hdr->lseg; | 582 | struct pnfs_layout_segment *lseg = hdr->lseg; |
@@ -600,20 +601,18 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
600 | __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, | 601 | __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, |
601 | offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); | 602 | offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); |
602 | 603 | ||
603 | data->write_done_cb = filelayout_write_done_cb; | 604 | data->pgio_done_cb = filelayout_write_done_cb; |
604 | atomic_inc(&ds->ds_clp->cl_count); | 605 | atomic_inc(&ds->ds_clp->cl_count); |
605 | data->ds_clp = ds->ds_clp; | 606 | data->ds_clp = ds->ds_clp; |
607 | data->ds_idx = idx; | ||
606 | fh = nfs4_fl_select_ds_fh(lseg, j); | 608 | fh = nfs4_fl_select_ds_fh(lseg, j); |
607 | if (fh) | 609 | if (fh) |
608 | data->args.fh = fh; | 610 | data->args.fh = fh; |
609 | /* | 611 | |
610 | * Get the file offset on the dserver. Set the write offset to | ||
611 | * this offset and save the original offset. | ||
612 | */ | ||
613 | data->args.offset = filelayout_get_dserver_offset(lseg, offset); | 612 | data->args.offset = filelayout_get_dserver_offset(lseg, offset); |
614 | 613 | ||
615 | /* Perform an asynchronous write */ | 614 | /* Perform an asynchronous write */ |
616 | nfs_initiate_write(ds_clnt, data, | 615 | nfs_initiate_pgio(ds_clnt, data, |
617 | &filelayout_write_call_ops, sync, | 616 | &filelayout_write_call_ops, sync, |
618 | RPC_TASK_SOFTCONN); | 617 | RPC_TASK_SOFTCONN); |
619 | return PNFS_ATTEMPTED; | 618 | return PNFS_ATTEMPTED; |
@@ -637,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
637 | struct nfs4_deviceid_node *d; | 636 | struct nfs4_deviceid_node *d; |
638 | struct nfs4_file_layout_dsaddr *dsaddr; | 637 | struct nfs4_file_layout_dsaddr *dsaddr; |
639 | int status = -EINVAL; | 638 | int status = -EINVAL; |
640 | struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); | ||
641 | 639 | ||
642 | dprintk("--> %s\n", __func__); | 640 | dprintk("--> %s\n", __func__); |
643 | 641 | ||
@@ -655,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
655 | goto out; | 653 | goto out; |
656 | } | 654 | } |
657 | 655 | ||
658 | if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) { | 656 | if (!fl->stripe_unit) { |
659 | dprintk("%s Invalid stripe unit (%u)\n", | 657 | dprintk("%s Invalid stripe unit (%u)\n", |
660 | __func__, fl->stripe_unit); | 658 | __func__, fl->stripe_unit); |
661 | goto out; | 659 | goto out; |
@@ -692,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
692 | goto out_put; | 690 | goto out_put; |
693 | } | 691 | } |
694 | 692 | ||
695 | if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) { | ||
696 | dprintk("%s Stripe unit (%u) not aligned with rsize %u " | ||
697 | "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize, | ||
698 | nfss->wsize); | ||
699 | } | ||
700 | |||
701 | status = 0; | 693 | status = 0; |
702 | out: | 694 | out: |
703 | dprintk("--> %s returns %d\n", __func__, status); | 695 | dprintk("--> %s returns %d\n", __func__, status); |
@@ -850,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, | |||
850 | { | 842 | { |
851 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | 843 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); |
852 | struct pnfs_commit_bucket *buckets; | 844 | struct pnfs_commit_bucket *buckets; |
853 | int size; | 845 | int size, i; |
854 | 846 | ||
855 | if (fl->commit_through_mds) | 847 | if (fl->commit_through_mds) |
856 | return 0; | 848 | return 0; |
857 | if (cinfo->ds->nbuckets != 0) { | 849 | |
850 | size = (fl->stripe_type == STRIPE_SPARSE) ? | ||
851 | fl->dsaddr->ds_num : fl->dsaddr->stripe_count; | ||
852 | |||
853 | if (cinfo->ds->nbuckets >= size) { | ||
858 | /* This assumes there is only one IOMODE_RW lseg. What | 854 | /* This assumes there is only one IOMODE_RW lseg. What |
859 | * we really want to do is have a layout_hdr level | 855 | * we really want to do is have a layout_hdr level |
860 | * dictionary of <multipath_list4, fh> keys, each | 856 | * dictionary of <multipath_list4, fh> keys, each |
@@ -864,30 +860,36 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, | |||
864 | return 0; | 860 | return 0; |
865 | } | 861 | } |
866 | 862 | ||
867 | size = (fl->stripe_type == STRIPE_SPARSE) ? | ||
868 | fl->dsaddr->ds_num : fl->dsaddr->stripe_count; | ||
869 | |||
870 | buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), | 863 | buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), |
871 | gfp_flags); | 864 | gfp_flags); |
872 | if (!buckets) | 865 | if (!buckets) |
873 | return -ENOMEM; | 866 | return -ENOMEM; |
874 | else { | 867 | for (i = 0; i < size; i++) { |
875 | int i; | 868 | INIT_LIST_HEAD(&buckets[i].written); |
869 | INIT_LIST_HEAD(&buckets[i].committing); | ||
870 | /* mark direct verifier as unset */ | ||
871 | buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; | ||
872 | } | ||
876 | 873 | ||
877 | spin_lock(cinfo->lock); | 874 | spin_lock(cinfo->lock); |
878 | if (cinfo->ds->nbuckets != 0) | 875 | if (cinfo->ds->nbuckets >= size) |
879 | kfree(buckets); | 876 | goto out; |
880 | else { | 877 | for (i = 0; i < cinfo->ds->nbuckets; i++) { |
881 | cinfo->ds->buckets = buckets; | 878 | list_splice(&cinfo->ds->buckets[i].written, |
882 | cinfo->ds->nbuckets = size; | 879 | &buckets[i].written); |
883 | for (i = 0; i < size; i++) { | 880 | list_splice(&cinfo->ds->buckets[i].committing, |
884 | INIT_LIST_HEAD(&buckets[i].written); | 881 | &buckets[i].committing); |
885 | INIT_LIST_HEAD(&buckets[i].committing); | 882 | buckets[i].direct_verf.committed = |
886 | } | 883 | cinfo->ds->buckets[i].direct_verf.committed; |
887 | } | 884 | buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; |
888 | spin_unlock(cinfo->lock); | 885 | buckets[i].clseg = cinfo->ds->buckets[i].clseg; |
889 | return 0; | ||
890 | } | 886 | } |
887 | swap(cinfo->ds->buckets, buckets); | ||
888 | cinfo->ds->nbuckets = size; | ||
889 | out: | ||
890 | spin_unlock(cinfo->lock); | ||
891 | kfree(buckets); | ||
892 | return 0; | ||
891 | } | 893 | } |
892 | 894 | ||
893 | static struct pnfs_layout_segment * | 895 | static struct pnfs_layout_segment * |
@@ -915,47 +917,51 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
915 | /* | 917 | /* |
916 | * filelayout_pg_test(). Called by nfs_can_coalesce_requests() | 918 | * filelayout_pg_test(). Called by nfs_can_coalesce_requests() |
917 | * | 919 | * |
918 | * return true : coalesce page | 920 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number |
919 | * return false : don't coalesce page | 921 | * of bytes (maximum @req->wb_bytes) that can be coalesced. |
920 | */ | 922 | */ |
921 | static bool | 923 | static size_t |
922 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 924 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
923 | struct nfs_page *req) | 925 | struct nfs_page *req) |
924 | { | 926 | { |
927 | unsigned int size; | ||
925 | u64 p_stripe, r_stripe; | 928 | u64 p_stripe, r_stripe; |
926 | u32 stripe_unit; | 929 | u32 stripe_offset; |
930 | u64 segment_offset = pgio->pg_lseg->pls_range.offset; | ||
931 | u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; | ||
927 | 932 | ||
928 | if (!pnfs_generic_pg_test(pgio, prev, req) || | 933 | /* calls nfs_generic_pg_test */ |
929 | !nfs_generic_pg_test(pgio, prev, req)) | 934 | size = pnfs_generic_pg_test(pgio, prev, req); |
930 | return false; | 935 | if (!size) |
936 | return 0; | ||
931 | 937 | ||
932 | p_stripe = (u64)req_offset(prev); | 938 | /* see if req and prev are in the same stripe */ |
933 | r_stripe = (u64)req_offset(req); | 939 | if (prev) { |
934 | stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; | 940 | p_stripe = (u64)req_offset(prev) - segment_offset; |
941 | r_stripe = (u64)req_offset(req) - segment_offset; | ||
942 | do_div(p_stripe, stripe_unit); | ||
943 | do_div(r_stripe, stripe_unit); | ||
935 | 944 | ||
936 | do_div(p_stripe, stripe_unit); | 945 | if (p_stripe != r_stripe) |
937 | do_div(r_stripe, stripe_unit); | 946 | return 0; |
947 | } | ||
938 | 948 | ||
939 | return (p_stripe == r_stripe); | 949 | /* calculate remaining bytes in the current stripe */ |
950 | div_u64_rem((u64)req_offset(req) - segment_offset, | ||
951 | stripe_unit, | ||
952 | &stripe_offset); | ||
953 | WARN_ON_ONCE(stripe_offset > stripe_unit); | ||
954 | if (stripe_offset >= stripe_unit) | ||
955 | return 0; | ||
956 | return min(stripe_unit - (unsigned int)stripe_offset, size); | ||
940 | } | 957 | } |
941 | 958 | ||
942 | static void | 959 | static void |
943 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, | 960 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, |
944 | struct nfs_page *req) | 961 | struct nfs_page *req) |
945 | { | 962 | { |
946 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 963 | if (!pgio->pg_lseg) |
947 | 964 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | |
948 | if (req->wb_offset != req->wb_pgbase) { | ||
949 | /* | ||
950 | * Handling unaligned pages is difficult, because have to | ||
951 | * somehow split a req in two in certain cases in the | ||
952 | * pg.test code. Avoid this by just not using pnfs | ||
953 | * in this case. | ||
954 | */ | ||
955 | nfs_pageio_reset_read_mds(pgio); | ||
956 | return; | ||
957 | } | ||
958 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
959 | req->wb_context, | 965 | req->wb_context, |
960 | 0, | 966 | 0, |
961 | NFS4_MAX_UINT64, | 967 | NFS4_MAX_UINT64, |
@@ -973,11 +979,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
973 | struct nfs_commit_info cinfo; | 979 | struct nfs_commit_info cinfo; |
974 | int status; | 980 | int status; |
975 | 981 | ||
976 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 982 | if (!pgio->pg_lseg) |
977 | 983 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | |
978 | if (req->wb_offset != req->wb_pgbase) | ||
979 | goto out_mds; | ||
980 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
981 | req->wb_context, | 984 | req->wb_context, |
982 | 0, | 985 | 0, |
983 | NFS4_MAX_UINT64, | 986 | NFS4_MAX_UINT64, |
@@ -1067,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req, | |||
1067 | */ | 1070 | */ |
1068 | j = nfs4_fl_calc_j_index(lseg, req_offset(req)); | 1071 | j = nfs4_fl_calc_j_index(lseg, req_offset(req)); |
1069 | i = select_bucket_index(fl, j); | 1072 | i = select_bucket_index(fl, j); |
1073 | spin_lock(cinfo->lock); | ||
1070 | buckets = cinfo->ds->buckets; | 1074 | buckets = cinfo->ds->buckets; |
1071 | list = &buckets[i].written; | 1075 | list = &buckets[i].written; |
1072 | if (list_empty(list)) { | 1076 | if (list_empty(list)) { |
@@ -1080,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req, | |||
1080 | } | 1084 | } |
1081 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); | 1085 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); |
1082 | cinfo->ds->nwritten++; | 1086 | cinfo->ds->nwritten++; |
1087 | spin_unlock(cinfo->lock); | ||
1083 | return list; | 1088 | return list; |
1084 | } | 1089 | } |
1085 | 1090 | ||
@@ -1176,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst, | |||
1176 | return ret; | 1181 | return ret; |
1177 | } | 1182 | } |
1178 | 1183 | ||
1184 | /* Note called with cinfo->lock held. */ | ||
1179 | static int | 1185 | static int |
1180 | filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, | 1186 | filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, |
1181 | struct nfs_commit_info *cinfo, | 1187 | struct nfs_commit_info *cinfo, |
@@ -1220,15 +1226,18 @@ static void filelayout_recover_commit_reqs(struct list_head *dst, | |||
1220 | struct nfs_commit_info *cinfo) | 1226 | struct nfs_commit_info *cinfo) |
1221 | { | 1227 | { |
1222 | struct pnfs_commit_bucket *b; | 1228 | struct pnfs_commit_bucket *b; |
1229 | struct pnfs_layout_segment *freeme; | ||
1223 | int i; | 1230 | int i; |
1224 | 1231 | ||
1232 | restart: | ||
1225 | spin_lock(cinfo->lock); | 1233 | spin_lock(cinfo->lock); |
1226 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { | 1234 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { |
1227 | if (transfer_commit_list(&b->written, dst, cinfo, 0)) { | 1235 | if (transfer_commit_list(&b->written, dst, cinfo, 0)) { |
1228 | spin_unlock(cinfo->lock); | 1236 | freeme = b->wlseg; |
1229 | pnfs_put_lseg(b->wlseg); | ||
1230 | b->wlseg = NULL; | 1237 | b->wlseg = NULL; |
1231 | spin_lock(cinfo->lock); | 1238 | spin_unlock(cinfo->lock); |
1239 | pnfs_put_lseg(freeme); | ||
1240 | goto restart; | ||
1232 | } | 1241 | } |
1233 | } | 1242 | } |
1234 | cinfo->ds->nwritten = 0; | 1243 | cinfo->ds->nwritten = 0; |
@@ -1243,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) | |||
1243 | struct nfs_commit_data *data; | 1252 | struct nfs_commit_data *data; |
1244 | int i, j; | 1253 | int i, j; |
1245 | unsigned int nreq = 0; | 1254 | unsigned int nreq = 0; |
1255 | struct pnfs_layout_segment *freeme; | ||
1246 | 1256 | ||
1247 | fl_cinfo = cinfo->ds; | 1257 | fl_cinfo = cinfo->ds; |
1248 | bucket = fl_cinfo->buckets; | 1258 | bucket = fl_cinfo->buckets; |
@@ -1253,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) | |||
1253 | if (!data) | 1263 | if (!data) |
1254 | break; | 1264 | break; |
1255 | data->ds_commit_index = i; | 1265 | data->ds_commit_index = i; |
1266 | spin_lock(cinfo->lock); | ||
1256 | data->lseg = bucket->clseg; | 1267 | data->lseg = bucket->clseg; |
1257 | bucket->clseg = NULL; | 1268 | bucket->clseg = NULL; |
1269 | spin_unlock(cinfo->lock); | ||
1258 | list_add(&data->pages, list); | 1270 | list_add(&data->pages, list); |
1259 | nreq++; | 1271 | nreq++; |
1260 | } | 1272 | } |
@@ -1264,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) | |||
1264 | if (list_empty(&bucket->committing)) | 1276 | if (list_empty(&bucket->committing)) |
1265 | continue; | 1277 | continue; |
1266 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); | 1278 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); |
1267 | pnfs_put_lseg(bucket->clseg); | 1279 | spin_lock(cinfo->lock); |
1280 | freeme = bucket->clseg; | ||
1268 | bucket->clseg = NULL; | 1281 | bucket->clseg = NULL; |
1282 | spin_unlock(cinfo->lock); | ||
1283 | pnfs_put_lseg(freeme); | ||
1269 | } | 1284 | } |
1270 | /* Caller will clean up entries put on list */ | 1285 | /* Caller will clean up entries put on list */ |
1271 | return nreq; | 1286 | return nreq; |
@@ -1330,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | |||
1330 | struct nfs4_filelayout *flo; | 1345 | struct nfs4_filelayout *flo; |
1331 | 1346 | ||
1332 | flo = kzalloc(sizeof(*flo), gfp_flags); | 1347 | flo = kzalloc(sizeof(*flo), gfp_flags); |
1333 | return &flo->generic_hdr; | 1348 | return flo != NULL ? &flo->generic_hdr : NULL; |
1334 | } | 1349 | } |
1335 | 1350 | ||
1336 | static void | 1351 | static void |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h index cebd20e7e923..ffbddf2219ea 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/filelayout/filelayout.h | |||
@@ -30,7 +30,7 @@ | |||
30 | #ifndef FS_NFS_NFS4FILELAYOUT_H | 30 | #ifndef FS_NFS_NFS4FILELAYOUT_H |
31 | #define FS_NFS_NFS4FILELAYOUT_H | 31 | #define FS_NFS_NFS4FILELAYOUT_H |
32 | 32 | ||
33 | #include "pnfs.h" | 33 | #include "../pnfs.h" |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * Default data server connection timeout and retrans vaules. | 36 | * Default data server connection timeout and retrans vaules. |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index b9c61efe9660..44bf0140a4c7 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
@@ -33,9 +33,9 @@ | |||
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | #include <linux/sunrpc/addr.h> | 34 | #include <linux/sunrpc/addr.h> |
35 | 35 | ||
36 | #include "internal.h" | 36 | #include "../internal.h" |
37 | #include "nfs4session.h" | 37 | #include "../nfs4session.h" |
38 | #include "nfs4filelayout.h" | 38 | #include "filelayout.h" |
39 | 39 | ||
40 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 40 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
41 | 41 | ||
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 66984a9aafaa..b94f80420a58 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -120,7 +120,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, | |||
120 | 120 | ||
121 | security_d_instantiate(ret, inode); | 121 | security_d_instantiate(ret, inode); |
122 | spin_lock(&ret->d_lock); | 122 | spin_lock(&ret->d_lock); |
123 | if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { | 123 | if (IS_ROOT(ret) && !ret->d_fsdata && |
124 | !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { | ||
124 | ret->d_fsdata = name; | 125 | ret->d_fsdata = name; |
125 | name = NULL; | 126 | name = NULL; |
126 | } | 127 | } |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e6f7398d2b3c..c496f8a74639 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -1575,18 +1575,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1575 | inode->i_version = fattr->change_attr; | 1575 | inode->i_version = fattr->change_attr; |
1576 | } | 1576 | } |
1577 | } else if (server->caps & NFS_CAP_CHANGE_ATTR) | 1577 | } else if (server->caps & NFS_CAP_CHANGE_ATTR) |
1578 | invalid |= save_cache_validity; | 1578 | nfsi->cache_validity |= save_cache_validity; |
1579 | 1579 | ||
1580 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) { | 1580 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) { |
1581 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); | 1581 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); |
1582 | } else if (server->caps & NFS_CAP_MTIME) | 1582 | } else if (server->caps & NFS_CAP_MTIME) |
1583 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1583 | nfsi->cache_validity |= save_cache_validity & |
1584 | (NFS_INO_INVALID_ATTR | ||
1584 | | NFS_INO_REVAL_FORCED); | 1585 | | NFS_INO_REVAL_FORCED); |
1585 | 1586 | ||
1586 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) { | 1587 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) { |
1587 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); | 1588 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); |
1588 | } else if (server->caps & NFS_CAP_CTIME) | 1589 | } else if (server->caps & NFS_CAP_CTIME) |
1589 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1590 | nfsi->cache_validity |= save_cache_validity & |
1591 | (NFS_INO_INVALID_ATTR | ||
1590 | | NFS_INO_REVAL_FORCED); | 1592 | | NFS_INO_REVAL_FORCED); |
1591 | 1593 | ||
1592 | /* Check if our cached file size is stale */ | 1594 | /* Check if our cached file size is stale */ |
@@ -1608,7 +1610,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1608 | (long long)new_isize); | 1610 | (long long)new_isize); |
1609 | } | 1611 | } |
1610 | } else | 1612 | } else |
1611 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1613 | nfsi->cache_validity |= save_cache_validity & |
1614 | (NFS_INO_INVALID_ATTR | ||
1612 | | NFS_INO_REVAL_PAGECACHE | 1615 | | NFS_INO_REVAL_PAGECACHE |
1613 | | NFS_INO_REVAL_FORCED); | 1616 | | NFS_INO_REVAL_FORCED); |
1614 | 1617 | ||
@@ -1616,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1616 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) | 1619 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) |
1617 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); | 1620 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); |
1618 | else if (server->caps & NFS_CAP_ATIME) | 1621 | else if (server->caps & NFS_CAP_ATIME) |
1619 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME | 1622 | nfsi->cache_validity |= save_cache_validity & |
1623 | (NFS_INO_INVALID_ATIME | ||
1620 | | NFS_INO_REVAL_FORCED); | 1624 | | NFS_INO_REVAL_FORCED); |
1621 | 1625 | ||
1622 | if (fattr->valid & NFS_ATTR_FATTR_MODE) { | 1626 | if (fattr->valid & NFS_ATTR_FATTR_MODE) { |
@@ -1627,7 +1631,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1627 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1631 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1628 | } | 1632 | } |
1629 | } else if (server->caps & NFS_CAP_MODE) | 1633 | } else if (server->caps & NFS_CAP_MODE) |
1630 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1634 | nfsi->cache_validity |= save_cache_validity & |
1635 | (NFS_INO_INVALID_ATTR | ||
1631 | | NFS_INO_INVALID_ACCESS | 1636 | | NFS_INO_INVALID_ACCESS |
1632 | | NFS_INO_INVALID_ACL | 1637 | | NFS_INO_INVALID_ACL |
1633 | | NFS_INO_REVAL_FORCED); | 1638 | | NFS_INO_REVAL_FORCED); |
@@ -1638,7 +1643,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1638 | inode->i_uid = fattr->uid; | 1643 | inode->i_uid = fattr->uid; |
1639 | } | 1644 | } |
1640 | } else if (server->caps & NFS_CAP_OWNER) | 1645 | } else if (server->caps & NFS_CAP_OWNER) |
1641 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1646 | nfsi->cache_validity |= save_cache_validity & |
1647 | (NFS_INO_INVALID_ATTR | ||
1642 | | NFS_INO_INVALID_ACCESS | 1648 | | NFS_INO_INVALID_ACCESS |
1643 | | NFS_INO_INVALID_ACL | 1649 | | NFS_INO_INVALID_ACL |
1644 | | NFS_INO_REVAL_FORCED); | 1650 | | NFS_INO_REVAL_FORCED); |
@@ -1649,7 +1655,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1649 | inode->i_gid = fattr->gid; | 1655 | inode->i_gid = fattr->gid; |
1650 | } | 1656 | } |
1651 | } else if (server->caps & NFS_CAP_OWNER_GROUP) | 1657 | } else if (server->caps & NFS_CAP_OWNER_GROUP) |
1652 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1658 | nfsi->cache_validity |= save_cache_validity & |
1659 | (NFS_INO_INVALID_ATTR | ||
1653 | | NFS_INO_INVALID_ACCESS | 1660 | | NFS_INO_INVALID_ACCESS |
1654 | | NFS_INO_INVALID_ACL | 1661 | | NFS_INO_INVALID_ACL |
1655 | | NFS_INO_REVAL_FORCED); | 1662 | | NFS_INO_REVAL_FORCED); |
@@ -1662,7 +1669,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1662 | set_nlink(inode, fattr->nlink); | 1669 | set_nlink(inode, fattr->nlink); |
1663 | } | 1670 | } |
1664 | } else if (server->caps & NFS_CAP_NLINK) | 1671 | } else if (server->caps & NFS_CAP_NLINK) |
1665 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1672 | nfsi->cache_validity |= save_cache_validity & |
1673 | (NFS_INO_INVALID_ATTR | ||
1666 | | NFS_INO_REVAL_FORCED); | 1674 | | NFS_INO_REVAL_FORCED); |
1667 | 1675 | ||
1668 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { | 1676 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index dd8bfc2e2464..8b69cba1bb04 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -231,13 +231,20 @@ extern void nfs_destroy_writepagecache(void); | |||
231 | 231 | ||
232 | extern int __init nfs_init_directcache(void); | 232 | extern int __init nfs_init_directcache(void); |
233 | extern void nfs_destroy_directcache(void); | 233 | extern void nfs_destroy_directcache(void); |
234 | extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); | ||
235 | extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, | 234 | extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, |
236 | struct nfs_pgio_header *hdr, | 235 | struct nfs_pgio_header *hdr, |
237 | void (*release)(struct nfs_pgio_header *hdr)); | 236 | void (*release)(struct nfs_pgio_header *hdr)); |
238 | void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); | 237 | void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); |
239 | int nfs_iocounter_wait(struct nfs_io_counter *c); | 238 | int nfs_iocounter_wait(struct nfs_io_counter *c); |
240 | 239 | ||
240 | extern const struct nfs_pageio_ops nfs_pgio_rw_ops; | ||
241 | struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); | ||
242 | void nfs_rw_header_free(struct nfs_pgio_header *); | ||
243 | void nfs_pgio_data_release(struct nfs_pgio_data *); | ||
244 | int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); | ||
245 | int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, | ||
246 | const struct rpc_call_ops *, int, int); | ||
247 | |||
241 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) | 248 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) |
242 | { | 249 | { |
243 | c->flags = 0; | 250 | c->flags = 0; |
@@ -395,19 +402,11 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool | |||
395 | 402 | ||
396 | struct nfs_pgio_completion_ops; | 403 | struct nfs_pgio_completion_ops; |
397 | /* read.c */ | 404 | /* read.c */ |
398 | extern struct nfs_read_header *nfs_readhdr_alloc(void); | ||
399 | extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); | ||
400 | extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, | 405 | extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, |
401 | struct inode *inode, | 406 | struct inode *inode, bool force_mds, |
402 | const struct nfs_pgio_completion_ops *compl_ops); | 407 | const struct nfs_pgio_completion_ops *compl_ops); |
403 | extern int nfs_initiate_read(struct rpc_clnt *clnt, | ||
404 | struct nfs_read_data *data, | ||
405 | const struct rpc_call_ops *call_ops, int flags); | ||
406 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); | 408 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); |
407 | extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, | ||
408 | struct nfs_pgio_header *hdr); | ||
409 | extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); | 409 | extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); |
410 | extern void nfs_readdata_release(struct nfs_read_data *rdata); | ||
411 | 410 | ||
412 | /* super.c */ | 411 | /* super.c */ |
413 | void nfs_clone_super(struct super_block *, struct nfs_mount_info *); | 412 | void nfs_clone_super(struct super_block *, struct nfs_mount_info *); |
@@ -422,19 +421,10 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); | |||
422 | 421 | ||
423 | /* write.c */ | 422 | /* write.c */ |
424 | extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 423 | extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, |
425 | struct inode *inode, int ioflags, | 424 | struct inode *inode, int ioflags, bool force_mds, |
426 | const struct nfs_pgio_completion_ops *compl_ops); | 425 | const struct nfs_pgio_completion_ops *compl_ops); |
427 | extern struct nfs_write_header *nfs_writehdr_alloc(void); | ||
428 | extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); | ||
429 | extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, | ||
430 | struct nfs_pgio_header *hdr); | ||
431 | extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); | 426 | extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); |
432 | extern void nfs_writedata_release(struct nfs_write_data *wdata); | ||
433 | extern void nfs_commit_free(struct nfs_commit_data *p); | 427 | extern void nfs_commit_free(struct nfs_commit_data *p); |
434 | extern int nfs_initiate_write(struct rpc_clnt *clnt, | ||
435 | struct nfs_write_data *data, | ||
436 | const struct rpc_call_ops *call_ops, | ||
437 | int how, int flags); | ||
438 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); | 428 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); |
439 | extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); | 429 | extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); |
440 | extern int nfs_initiate_commit(struct rpc_clnt *clnt, | 430 | extern int nfs_initiate_commit(struct rpc_clnt *clnt, |
@@ -447,6 +437,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data, | |||
447 | struct nfs_commit_info *cinfo); | 437 | struct nfs_commit_info *cinfo); |
448 | int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, | 438 | int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, |
449 | struct nfs_commit_info *cinfo, int max); | 439 | struct nfs_commit_info *cinfo, int max); |
440 | unsigned long nfs_reqs_to_commit(struct nfs_commit_info *); | ||
450 | int nfs_scan_commit(struct inode *inode, struct list_head *dst, | 441 | int nfs_scan_commit(struct inode *inode, struct list_head *dst, |
451 | struct nfs_commit_info *cinfo); | 442 | struct nfs_commit_info *cinfo); |
452 | void nfs_mark_request_commit(struct nfs_page *req, | 443 | void nfs_mark_request_commit(struct nfs_page *req, |
@@ -492,7 +483,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) | |||
492 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); | 483 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); |
493 | 484 | ||
494 | /* nfs4proc.c */ | 485 | /* nfs4proc.c */ |
495 | extern void __nfs4_read_done_cb(struct nfs_read_data *); | 486 | extern void __nfs4_read_done_cb(struct nfs_pgio_data *); |
496 | extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, | 487 | extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, |
497 | const struct rpc_timeout *timeparms, | 488 | const struct rpc_timeout *timeparms, |
498 | const char *ip_addr); | 489 | const char *ip_addr); |
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 62db136339ea..5f61b83f4a1c 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | |||
103 | /* | 103 | /* |
104 | * typedef opaque nfsdata<>; | 104 | * typedef opaque nfsdata<>; |
105 | */ | 105 | */ |
106 | static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) | 106 | static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result) |
107 | { | 107 | { |
108 | u32 recvd, count; | 108 | u32 recvd, count; |
109 | __be32 *p; | 109 | __be32 *p; |
@@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, | |||
613 | * }; | 613 | * }; |
614 | */ | 614 | */ |
615 | static void encode_readargs(struct xdr_stream *xdr, | 615 | static void encode_readargs(struct xdr_stream *xdr, |
616 | const struct nfs_readargs *args) | 616 | const struct nfs_pgio_args *args) |
617 | { | 617 | { |
618 | u32 offset = args->offset; | 618 | u32 offset = args->offset; |
619 | u32 count = args->count; | 619 | u32 count = args->count; |
@@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr, | |||
629 | 629 | ||
630 | static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, | 630 | static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, |
631 | struct xdr_stream *xdr, | 631 | struct xdr_stream *xdr, |
632 | const struct nfs_readargs *args) | 632 | const struct nfs_pgio_args *args) |
633 | { | 633 | { |
634 | encode_readargs(xdr, args); | 634 | encode_readargs(xdr, args); |
635 | prepare_reply_buffer(req, args->pages, args->pgbase, | 635 | prepare_reply_buffer(req, args->pages, args->pgbase, |
@@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, | |||
649 | * }; | 649 | * }; |
650 | */ | 650 | */ |
651 | static void encode_writeargs(struct xdr_stream *xdr, | 651 | static void encode_writeargs(struct xdr_stream *xdr, |
652 | const struct nfs_writeargs *args) | 652 | const struct nfs_pgio_args *args) |
653 | { | 653 | { |
654 | u32 offset = args->offset; | 654 | u32 offset = args->offset; |
655 | u32 count = args->count; | 655 | u32 count = args->count; |
@@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr, | |||
669 | 669 | ||
670 | static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, | 670 | static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, |
671 | struct xdr_stream *xdr, | 671 | struct xdr_stream *xdr, |
672 | const struct nfs_writeargs *args) | 672 | const struct nfs_pgio_args *args) |
673 | { | 673 | { |
674 | encode_writeargs(xdr, args); | 674 | encode_writeargs(xdr, args); |
675 | xdr->buf->flags |= XDRBUF_WRITE; | 675 | xdr->buf->flags |= XDRBUF_WRITE; |
@@ -857,7 +857,7 @@ out_default: | |||
857 | * }; | 857 | * }; |
858 | */ | 858 | */ |
859 | static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, | 859 | static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, |
860 | struct nfs_readres *result) | 860 | struct nfs_pgio_res *result) |
861 | { | 861 | { |
862 | enum nfs_stat status; | 862 | enum nfs_stat status; |
863 | int error; | 863 | int error; |
@@ -878,7 +878,7 @@ out_default: | |||
878 | } | 878 | } |
879 | 879 | ||
880 | static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, | 880 | static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, |
881 | struct nfs_writeres *result) | 881 | struct nfs_pgio_res *result) |
882 | { | 882 | { |
883 | /* All NFSv2 writes are "file sync" writes */ | 883 | /* All NFSv2 writes are "file sync" writes */ |
884 | result->verf->committed = NFS_FILE_SYNC; | 884 | result->verf->committed = NFS_FILE_SYNC; |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index db60149c4579..e7daa42bbc86 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | |||
795 | return status; | 795 | return status; |
796 | } | 796 | } |
797 | 797 | ||
798 | static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) | 798 | static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) |
799 | { | 799 | { |
800 | struct inode *inode = data->header->inode; | 800 | struct inode *inode = data->header->inode; |
801 | 801 | ||
@@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
807 | return 0; | 807 | return 0; |
808 | } | 808 | } |
809 | 809 | ||
810 | static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) | 810 | static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
811 | { | 811 | { |
812 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; | 812 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; |
813 | } | 813 | } |
814 | 814 | ||
815 | static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 815 | static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) |
816 | { | 816 | { |
817 | rpc_call_start(task); | 817 | rpc_call_start(task); |
818 | return 0; | 818 | return 0; |
819 | } | 819 | } |
820 | 820 | ||
821 | static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | 821 | static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) |
822 | { | 822 | { |
823 | struct inode *inode = data->header->inode; | 823 | struct inode *inode = data->header->inode; |
824 | 824 | ||
@@ -829,17 +829,11 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
829 | return 0; | 829 | return 0; |
830 | } | 830 | } |
831 | 831 | ||
832 | static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) | 832 | static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
833 | { | 833 | { |
834 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; | 834 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; |
835 | } | 835 | } |
836 | 836 | ||
837 | static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
838 | { | ||
839 | rpc_call_start(task); | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 837 | static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
844 | { | 838 | { |
845 | rpc_call_start(task); | 839 | rpc_call_start(task); |
@@ -946,13 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
946 | .fsinfo = nfs3_proc_fsinfo, | 940 | .fsinfo = nfs3_proc_fsinfo, |
947 | .pathconf = nfs3_proc_pathconf, | 941 | .pathconf = nfs3_proc_pathconf, |
948 | .decode_dirent = nfs3_decode_dirent, | 942 | .decode_dirent = nfs3_decode_dirent, |
943 | .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare, | ||
949 | .read_setup = nfs3_proc_read_setup, | 944 | .read_setup = nfs3_proc_read_setup, |
950 | .read_pageio_init = nfs_pageio_init_read, | ||
951 | .read_rpc_prepare = nfs3_proc_read_rpc_prepare, | ||
952 | .read_done = nfs3_read_done, | 945 | .read_done = nfs3_read_done, |
953 | .write_setup = nfs3_proc_write_setup, | 946 | .write_setup = nfs3_proc_write_setup, |
954 | .write_pageio_init = nfs_pageio_init_write, | ||
955 | .write_rpc_prepare = nfs3_proc_write_rpc_prepare, | ||
956 | .write_done = nfs3_write_done, | 947 | .write_done = nfs3_write_done, |
957 | .commit_setup = nfs3_proc_commit_setup, | 948 | .commit_setup = nfs3_proc_commit_setup, |
958 | .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, | 949 | .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index fa6d72131c19..8f4cbe7f4aa8 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, | |||
953 | * }; | 953 | * }; |
954 | */ | 954 | */ |
955 | static void encode_read3args(struct xdr_stream *xdr, | 955 | static void encode_read3args(struct xdr_stream *xdr, |
956 | const struct nfs_readargs *args) | 956 | const struct nfs_pgio_args *args) |
957 | { | 957 | { |
958 | __be32 *p; | 958 | __be32 *p; |
959 | 959 | ||
@@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr, | |||
966 | 966 | ||
967 | static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, | 967 | static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, |
968 | struct xdr_stream *xdr, | 968 | struct xdr_stream *xdr, |
969 | const struct nfs_readargs *args) | 969 | const struct nfs_pgio_args *args) |
970 | { | 970 | { |
971 | encode_read3args(xdr, args); | 971 | encode_read3args(xdr, args); |
972 | prepare_reply_buffer(req, args->pages, args->pgbase, | 972 | prepare_reply_buffer(req, args->pages, args->pgbase, |
@@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, | |||
992 | * }; | 992 | * }; |
993 | */ | 993 | */ |
994 | static void encode_write3args(struct xdr_stream *xdr, | 994 | static void encode_write3args(struct xdr_stream *xdr, |
995 | const struct nfs_writeargs *args) | 995 | const struct nfs_pgio_args *args) |
996 | { | 996 | { |
997 | __be32 *p; | 997 | __be32 *p; |
998 | 998 | ||
@@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr, | |||
1008 | 1008 | ||
1009 | static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, | 1009 | static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, |
1010 | struct xdr_stream *xdr, | 1010 | struct xdr_stream *xdr, |
1011 | const struct nfs_writeargs *args) | 1011 | const struct nfs_pgio_args *args) |
1012 | { | 1012 | { |
1013 | encode_write3args(xdr, args); | 1013 | encode_write3args(xdr, args); |
1014 | xdr->buf->flags |= XDRBUF_WRITE; | 1014 | xdr->buf->flags |= XDRBUF_WRITE; |
@@ -1589,7 +1589,7 @@ out_default: | |||
1589 | * }; | 1589 | * }; |
1590 | */ | 1590 | */ |
1591 | static int decode_read3resok(struct xdr_stream *xdr, | 1591 | static int decode_read3resok(struct xdr_stream *xdr, |
1592 | struct nfs_readres *result) | 1592 | struct nfs_pgio_res *result) |
1593 | { | 1593 | { |
1594 | u32 eof, count, ocount, recvd; | 1594 | u32 eof, count, ocount, recvd; |
1595 | __be32 *p; | 1595 | __be32 *p; |
@@ -1625,7 +1625,7 @@ out_overflow: | |||
1625 | } | 1625 | } |
1626 | 1626 | ||
1627 | static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, | 1627 | static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, |
1628 | struct nfs_readres *result) | 1628 | struct nfs_pgio_res *result) |
1629 | { | 1629 | { |
1630 | enum nfs_stat status; | 1630 | enum nfs_stat status; |
1631 | int error; | 1631 | int error; |
@@ -1673,7 +1673,7 @@ out_status: | |||
1673 | * }; | 1673 | * }; |
1674 | */ | 1674 | */ |
1675 | static int decode_write3resok(struct xdr_stream *xdr, | 1675 | static int decode_write3resok(struct xdr_stream *xdr, |
1676 | struct nfs_writeres *result) | 1676 | struct nfs_pgio_res *result) |
1677 | { | 1677 | { |
1678 | __be32 *p; | 1678 | __be32 *p; |
1679 | 1679 | ||
@@ -1697,7 +1697,7 @@ out_eio: | |||
1697 | } | 1697 | } |
1698 | 1698 | ||
1699 | static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, | 1699 | static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, |
1700 | struct nfs_writeres *result) | 1700 | struct nfs_pgio_res *result) |
1701 | { | 1701 | { |
1702 | enum nfs_stat status; | 1702 | enum nfs_stat status; |
1703 | int error; | 1703 | int error; |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e1d1badbe53c..f63cb87cd730 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, | |||
337 | */ | 337 | */ |
338 | static inline void | 338 | static inline void |
339 | nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, | 339 | nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, |
340 | struct rpc_message *msg, struct nfs_write_data *wdata) | 340 | struct rpc_message *msg, struct nfs_pgio_data *wdata) |
341 | { | 341 | { |
342 | if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && | 342 | if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && |
343 | !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) | 343 | !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) |
@@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, | |||
369 | 369 | ||
370 | static inline void | 370 | static inline void |
371 | nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, | 371 | nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, |
372 | struct rpc_message *msg, struct nfs_write_data *wdata) | 372 | struct rpc_message *msg, struct nfs_pgio_data *wdata) |
373 | { | 373 | { |
374 | } | 374 | } |
375 | #endif /* CONFIG_NFS_V4_1 */ | 375 | #endif /* CONFIG_NFS_V4_1 */ |
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8de3407e0360..464db9dd6318 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
@@ -100,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
100 | break; | 100 | break; |
101 | mutex_lock(&inode->i_mutex); | 101 | mutex_lock(&inode->i_mutex); |
102 | ret = nfs_file_fsync_commit(file, start, end, datasync); | 102 | ret = nfs_file_fsync_commit(file, start, end, datasync); |
103 | if (!ret && !datasync) | 103 | if (!ret) |
104 | /* application has asked for meta-data sync */ | ||
105 | ret = pnfs_layoutcommit_inode(inode, true); | 104 | ret = pnfs_layoutcommit_inode(inode, true); |
106 | mutex_unlock(&inode->i_mutex); | 105 | mutex_unlock(&inode->i_mutex); |
107 | /* | 106 | /* |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f55fed8dc64..285ad5334018 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2027,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
2027 | return status; | 2027 | return status; |
2028 | } | 2028 | } |
2029 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) | 2029 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) |
2030 | _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); | 2030 | nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); |
2031 | return 0; | 2031 | return 0; |
2032 | } | 2032 | } |
2033 | 2033 | ||
@@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err) | |||
4033 | return false; | 4033 | return false; |
4034 | } | 4034 | } |
4035 | 4035 | ||
4036 | void __nfs4_read_done_cb(struct nfs_read_data *data) | 4036 | void __nfs4_read_done_cb(struct nfs_pgio_data *data) |
4037 | { | 4037 | { |
4038 | nfs_invalidate_atime(data->header->inode); | 4038 | nfs_invalidate_atime(data->header->inode); |
4039 | } | 4039 | } |
4040 | 4040 | ||
4041 | static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | 4041 | static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) |
4042 | { | 4042 | { |
4043 | struct nfs_server *server = NFS_SERVER(data->header->inode); | 4043 | struct nfs_server *server = NFS_SERVER(data->header->inode); |
4044 | 4044 | ||
@@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | |||
4055 | } | 4055 | } |
4056 | 4056 | ||
4057 | static bool nfs4_read_stateid_changed(struct rpc_task *task, | 4057 | static bool nfs4_read_stateid_changed(struct rpc_task *task, |
4058 | struct nfs_readargs *args) | 4058 | struct nfs_pgio_args *args) |
4059 | { | 4059 | { |
4060 | 4060 | ||
4061 | if (!nfs4_error_stateid_expired(task->tk_status) || | 4061 | if (!nfs4_error_stateid_expired(task->tk_status) || |
@@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, | |||
4068 | return true; | 4068 | return true; |
4069 | } | 4069 | } |
4070 | 4070 | ||
4071 | static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | 4071 | static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) |
4072 | { | 4072 | { |
4073 | 4073 | ||
4074 | dprintk("--> %s\n", __func__); | 4074 | dprintk("--> %s\n", __func__); |
@@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
4077 | return -EAGAIN; | 4077 | return -EAGAIN; |
4078 | if (nfs4_read_stateid_changed(task, &data->args)) | 4078 | if (nfs4_read_stateid_changed(task, &data->args)) |
4079 | return -EAGAIN; | 4079 | return -EAGAIN; |
4080 | return data->read_done_cb ? data->read_done_cb(task, data) : | 4080 | return data->pgio_done_cb ? data->pgio_done_cb(task, data) : |
4081 | nfs4_read_done_cb(task, data); | 4081 | nfs4_read_done_cb(task, data); |
4082 | } | 4082 | } |
4083 | 4083 | ||
4084 | static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) | 4084 | static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
4085 | { | 4085 | { |
4086 | data->timestamp = jiffies; | 4086 | data->timestamp = jiffies; |
4087 | data->read_done_cb = nfs4_read_done_cb; | 4087 | data->pgio_done_cb = nfs4_read_done_cb; |
4088 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; | 4088 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; |
4089 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); | 4089 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); |
4090 | } | 4090 | } |
4091 | 4091 | ||
4092 | static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 4092 | static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) |
4093 | { | 4093 | { |
4094 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), | 4094 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), |
4095 | &data->args.seq_args, | 4095 | &data->args.seq_args, |
@@ -4097,14 +4097,14 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat | |||
4097 | task)) | 4097 | task)) |
4098 | return 0; | 4098 | return 0; |
4099 | if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, | 4099 | if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, |
4100 | data->args.lock_context, FMODE_READ) == -EIO) | 4100 | data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) |
4101 | return -EIO; | 4101 | return -EIO; |
4102 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) | 4102 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) |
4103 | return -EIO; | 4103 | return -EIO; |
4104 | return 0; | 4104 | return 0; |
4105 | } | 4105 | } |
4106 | 4106 | ||
4107 | static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) | 4107 | static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) |
4108 | { | 4108 | { |
4109 | struct inode *inode = data->header->inode; | 4109 | struct inode *inode = data->header->inode; |
4110 | 4110 | ||
@@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data | |||
4121 | } | 4121 | } |
4122 | 4122 | ||
4123 | static bool nfs4_write_stateid_changed(struct rpc_task *task, | 4123 | static bool nfs4_write_stateid_changed(struct rpc_task *task, |
4124 | struct nfs_writeargs *args) | 4124 | struct nfs_pgio_args *args) |
4125 | { | 4125 | { |
4126 | 4126 | ||
4127 | if (!nfs4_error_stateid_expired(task->tk_status) || | 4127 | if (!nfs4_error_stateid_expired(task->tk_status) || |
@@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task, | |||
4134 | return true; | 4134 | return true; |
4135 | } | 4135 | } |
4136 | 4136 | ||
4137 | static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | 4137 | static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) |
4138 | { | 4138 | { |
4139 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | 4139 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
4140 | return -EAGAIN; | 4140 | return -EAGAIN; |
4141 | if (nfs4_write_stateid_changed(task, &data->args)) | 4141 | if (nfs4_write_stateid_changed(task, &data->args)) |
4142 | return -EAGAIN; | 4142 | return -EAGAIN; |
4143 | return data->write_done_cb ? data->write_done_cb(task, data) : | 4143 | return data->pgio_done_cb ? data->pgio_done_cb(task, data) : |
4144 | nfs4_write_done_cb(task, data); | 4144 | nfs4_write_done_cb(task, data); |
4145 | } | 4145 | } |
4146 | 4146 | ||
4147 | static | 4147 | static |
4148 | bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) | 4148 | bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) |
4149 | { | 4149 | { |
4150 | const struct nfs_pgio_header *hdr = data->header; | 4150 | const struct nfs_pgio_header *hdr = data->header; |
4151 | 4151 | ||
@@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) | |||
4158 | return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; | 4158 | return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; |
4159 | } | 4159 | } |
4160 | 4160 | ||
4161 | static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) | 4161 | static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
4162 | { | 4162 | { |
4163 | struct nfs_server *server = NFS_SERVER(data->header->inode); | 4163 | struct nfs_server *server = NFS_SERVER(data->header->inode); |
4164 | 4164 | ||
@@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
4168 | } else | 4168 | } else |
4169 | data->args.bitmask = server->cache_consistency_bitmask; | 4169 | data->args.bitmask = server->cache_consistency_bitmask; |
4170 | 4170 | ||
4171 | if (!data->write_done_cb) | 4171 | if (!data->pgio_done_cb) |
4172 | data->write_done_cb = nfs4_write_done_cb; | 4172 | data->pgio_done_cb = nfs4_write_done_cb; |
4173 | data->res.server = server; | 4173 | data->res.server = server; |
4174 | data->timestamp = jiffies; | 4174 | data->timestamp = jiffies; |
4175 | 4175 | ||
@@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
4177 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | 4177 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); |
4178 | } | 4178 | } |
4179 | 4179 | ||
4180 | static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
4181 | { | ||
4182 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), | ||
4183 | &data->args.seq_args, | ||
4184 | &data->res.seq_res, | ||
4185 | task)) | ||
4186 | return 0; | ||
4187 | if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, | ||
4188 | data->args.lock_context, FMODE_WRITE) == -EIO) | ||
4189 | return -EIO; | ||
4190 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) | ||
4191 | return -EIO; | ||
4192 | return 0; | ||
4193 | } | ||
4194 | |||
4195 | static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 4180 | static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
4196 | { | 4181 | { |
4197 | nfs4_setup_sequence(NFS_SERVER(data->inode), | 4182 | nfs4_setup_sequence(NFS_SERVER(data->inode), |
@@ -8432,13 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
8432 | .pathconf = nfs4_proc_pathconf, | 8417 | .pathconf = nfs4_proc_pathconf, |
8433 | .set_capabilities = nfs4_server_capabilities, | 8418 | .set_capabilities = nfs4_server_capabilities, |
8434 | .decode_dirent = nfs4_decode_dirent, | 8419 | .decode_dirent = nfs4_decode_dirent, |
8420 | .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare, | ||
8435 | .read_setup = nfs4_proc_read_setup, | 8421 | .read_setup = nfs4_proc_read_setup, |
8436 | .read_pageio_init = pnfs_pageio_init_read, | ||
8437 | .read_rpc_prepare = nfs4_proc_read_rpc_prepare, | ||
8438 | .read_done = nfs4_read_done, | 8422 | .read_done = nfs4_read_done, |
8439 | .write_setup = nfs4_proc_write_setup, | 8423 | .write_setup = nfs4_proc_write_setup, |
8440 | .write_pageio_init = pnfs_pageio_init_write, | ||
8441 | .write_rpc_prepare = nfs4_proc_write_rpc_prepare, | ||
8442 | .write_done = nfs4_write_done, | 8424 | .write_done = nfs4_write_done, |
8443 | .commit_setup = nfs4_proc_commit_setup, | 8425 | .commit_setup = nfs4_proc_commit_setup, |
8444 | .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, | 8426 | .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c0583b9bef71..848f6853c59e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1456,7 +1456,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs | |||
1456 | * server that doesn't support a grace period. | 1456 | * server that doesn't support a grace period. |
1457 | */ | 1457 | */ |
1458 | spin_lock(&sp->so_lock); | 1458 | spin_lock(&sp->so_lock); |
1459 | write_seqcount_begin(&sp->so_reclaim_seqcount); | 1459 | raw_write_seqcount_begin(&sp->so_reclaim_seqcount); |
1460 | restart: | 1460 | restart: |
1461 | list_for_each_entry(state, &sp->so_states, open_states) { | 1461 | list_for_each_entry(state, &sp->so_states, open_states) { |
1462 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) | 1462 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) |
@@ -1519,13 +1519,13 @@ restart: | |||
1519 | spin_lock(&sp->so_lock); | 1519 | spin_lock(&sp->so_lock); |
1520 | goto restart; | 1520 | goto restart; |
1521 | } | 1521 | } |
1522 | write_seqcount_end(&sp->so_reclaim_seqcount); | 1522 | raw_write_seqcount_end(&sp->so_reclaim_seqcount); |
1523 | spin_unlock(&sp->so_lock); | 1523 | spin_unlock(&sp->so_lock); |
1524 | return 0; | 1524 | return 0; |
1525 | out_err: | 1525 | out_err: |
1526 | nfs4_put_open_state(state); | 1526 | nfs4_put_open_state(state); |
1527 | spin_lock(&sp->so_lock); | 1527 | spin_lock(&sp->so_lock); |
1528 | write_seqcount_end(&sp->so_reclaim_seqcount); | 1528 | raw_write_seqcount_end(&sp->so_reclaim_seqcount); |
1529 | spin_unlock(&sp->so_lock); | 1529 | spin_unlock(&sp->so_lock); |
1530 | return status; | 1530 | return status; |
1531 | } | 1531 | } |
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 849cf146db30..0a744f3a86f6 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h | |||
@@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); | |||
932 | 932 | ||
933 | DECLARE_EVENT_CLASS(nfs4_read_event, | 933 | DECLARE_EVENT_CLASS(nfs4_read_event, |
934 | TP_PROTO( | 934 | TP_PROTO( |
935 | const struct nfs_read_data *data, | 935 | const struct nfs_pgio_data *data, |
936 | int error | 936 | int error |
937 | ), | 937 | ), |
938 | 938 | ||
@@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, | |||
972 | #define DEFINE_NFS4_READ_EVENT(name) \ | 972 | #define DEFINE_NFS4_READ_EVENT(name) \ |
973 | DEFINE_EVENT(nfs4_read_event, name, \ | 973 | DEFINE_EVENT(nfs4_read_event, name, \ |
974 | TP_PROTO( \ | 974 | TP_PROTO( \ |
975 | const struct nfs_read_data *data, \ | 975 | const struct nfs_pgio_data *data, \ |
976 | int error \ | 976 | int error \ |
977 | ), \ | 977 | ), \ |
978 | TP_ARGS(data, error)) | 978 | TP_ARGS(data, error)) |
@@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); | |||
983 | 983 | ||
984 | DECLARE_EVENT_CLASS(nfs4_write_event, | 984 | DECLARE_EVENT_CLASS(nfs4_write_event, |
985 | TP_PROTO( | 985 | TP_PROTO( |
986 | const struct nfs_write_data *data, | 986 | const struct nfs_pgio_data *data, |
987 | int error | 987 | int error |
988 | ), | 988 | ), |
989 | 989 | ||
@@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, | |||
1024 | #define DEFINE_NFS4_WRITE_EVENT(name) \ | 1024 | #define DEFINE_NFS4_WRITE_EVENT(name) \ |
1025 | DEFINE_EVENT(nfs4_write_event, name, \ | 1025 | DEFINE_EVENT(nfs4_write_event, name, \ |
1026 | TP_PROTO( \ | 1026 | TP_PROTO( \ |
1027 | const struct nfs_write_data *data, \ | 1027 | const struct nfs_pgio_data *data, \ |
1028 | int error \ | 1028 | int error \ |
1029 | ), \ | 1029 | ), \ |
1030 | TP_ARGS(data, error)) | 1030 | TP_ARGS(data, error)) |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 73ce8d4fe2c8..939ae606cfa4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) | |||
1556 | encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); | 1556 | encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); |
1557 | } | 1557 | } |
1558 | 1558 | ||
1559 | static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) | 1559 | static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args, |
1560 | struct compound_hdr *hdr) | ||
1560 | { | 1561 | { |
1561 | __be32 *p; | 1562 | __be32 *p; |
1562 | 1563 | ||
@@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4 | |||
1701 | encode_nfs4_verifier(xdr, &arg->confirm); | 1702 | encode_nfs4_verifier(xdr, &arg->confirm); |
1702 | } | 1703 | } |
1703 | 1704 | ||
1704 | static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) | 1705 | static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args, |
1706 | struct compound_hdr *hdr) | ||
1705 | { | 1707 | { |
1706 | __be32 *p; | 1708 | __be32 *p; |
1707 | 1709 | ||
@@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2451 | * Encode a READ request | 2453 | * Encode a READ request |
2452 | */ | 2454 | */ |
2453 | static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, | 2455 | static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, |
2454 | struct nfs_readargs *args) | 2456 | struct nfs_pgio_args *args) |
2455 | { | 2457 | { |
2456 | struct compound_hdr hdr = { | 2458 | struct compound_hdr hdr = { |
2457 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | 2459 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), |
@@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2513 | * Encode a WRITE request | 2515 | * Encode a WRITE request |
2514 | */ | 2516 | */ |
2515 | static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, | 2517 | static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, |
2516 | struct nfs_writeargs *args) | 2518 | struct nfs_pgio_args *args) |
2517 | { | 2519 | { |
2518 | struct compound_hdr hdr = { | 2520 | struct compound_hdr hdr = { |
2519 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | 2521 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), |
@@ -5085,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr) | |||
5085 | return decode_op_hdr(xdr, OP_PUTROOTFH); | 5087 | return decode_op_hdr(xdr, OP_PUTROOTFH); |
5086 | } | 5088 | } |
5087 | 5089 | ||
5088 | static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) | 5090 | static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, |
5091 | struct nfs_pgio_res *res) | ||
5089 | { | 5092 | { |
5090 | __be32 *p; | 5093 | __be32 *p; |
5091 | uint32_t count, eof, recvd; | 5094 | uint32_t count, eof, recvd; |
@@ -5339,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr) | |||
5339 | return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); | 5342 | return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); |
5340 | } | 5343 | } |
5341 | 5344 | ||
5342 | static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) | 5345 | static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res) |
5343 | { | 5346 | { |
5344 | __be32 *p; | 5347 | __be32 *p; |
5345 | int status; | 5348 | int status; |
@@ -6636,7 +6639,7 @@ out: | |||
6636 | * Decode Read response | 6639 | * Decode Read response |
6637 | */ | 6640 | */ |
6638 | static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | 6641 | static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, |
6639 | struct nfs_readres *res) | 6642 | struct nfs_pgio_res *res) |
6640 | { | 6643 | { |
6641 | struct compound_hdr hdr; | 6644 | struct compound_hdr hdr; |
6642 | int status; | 6645 | int status; |
@@ -6661,7 +6664,7 @@ out: | |||
6661 | * Decode WRITE response | 6664 | * Decode WRITE response |
6662 | */ | 6665 | */ |
6663 | static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | 6666 | static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, |
6664 | struct nfs_writeres *res) | 6667 | struct nfs_pgio_res *res) |
6665 | { | 6668 | { |
6666 | struct compound_hdr hdr; | 6669 | struct compound_hdr hdr; |
6667 | int status; | 6670 | int status; |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 5457745dd4f1..611320753db2 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private) | |||
439 | objlayout_read_done(&objios->oir, status, objios->sync); | 439 | objlayout_read_done(&objios->oir, status, objios->sync); |
440 | } | 440 | } |
441 | 441 | ||
442 | int objio_read_pagelist(struct nfs_read_data *rdata) | 442 | int objio_read_pagelist(struct nfs_pgio_data *rdata) |
443 | { | 443 | { |
444 | struct nfs_pgio_header *hdr = rdata->header; | 444 | struct nfs_pgio_header *hdr = rdata->header; |
445 | struct objio_state *objios; | 445 | struct objio_state *objios; |
@@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private) | |||
487 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) | 487 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) |
488 | { | 488 | { |
489 | struct objio_state *objios = priv; | 489 | struct objio_state *objios = priv; |
490 | struct nfs_write_data *wdata = objios->oir.rpcdata; | 490 | struct nfs_pgio_data *wdata = objios->oir.rpcdata; |
491 | struct address_space *mapping = wdata->header->inode->i_mapping; | 491 | struct address_space *mapping = wdata->header->inode->i_mapping; |
492 | pgoff_t index = offset / PAGE_SIZE; | 492 | pgoff_t index = offset / PAGE_SIZE; |
493 | struct page *page; | 493 | struct page *page; |
@@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = { | |||
531 | .put_page = &__r4w_put_page, | 531 | .put_page = &__r4w_put_page, |
532 | }; | 532 | }; |
533 | 533 | ||
534 | int objio_write_pagelist(struct nfs_write_data *wdata, int how) | 534 | int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) |
535 | { | 535 | { |
536 | struct nfs_pgio_header *hdr = wdata->header; | 536 | struct nfs_pgio_header *hdr = wdata->header; |
537 | struct objio_state *objios; | 537 | struct objio_state *objios; |
@@ -564,14 +564,22 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how) | |||
564 | return 0; | 564 | return 0; |
565 | } | 565 | } |
566 | 566 | ||
567 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | 567 | /* |
568 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | ||
569 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | ||
570 | */ | ||
571 | static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, | ||
568 | struct nfs_page *prev, struct nfs_page *req) | 572 | struct nfs_page *prev, struct nfs_page *req) |
569 | { | 573 | { |
570 | if (!pnfs_generic_pg_test(pgio, prev, req)) | 574 | unsigned int size; |
571 | return false; | 575 | |
576 | size = pnfs_generic_pg_test(pgio, prev, req); | ||
577 | |||
578 | if (!size || pgio->pg_count + req->wb_bytes > | ||
579 | (unsigned long)pgio->pg_layout_private) | ||
580 | return 0; | ||
572 | 581 | ||
573 | return pgio->pg_count + req->wb_bytes <= | 582 | return min(size, req->wb_bytes); |
574 | (unsigned long)pgio->pg_layout_private; | ||
575 | } | 583 | } |
576 | 584 | ||
577 | static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 585 | static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index e4f9cbfec67b..765d3f54e986 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | |||
53 | struct objlayout *objlay; | 53 | struct objlayout *objlay; |
54 | 54 | ||
55 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); | 55 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); |
56 | if (objlay) { | 56 | if (!objlay) |
57 | spin_lock_init(&objlay->lock); | 57 | return NULL; |
58 | INIT_LIST_HEAD(&objlay->err_list); | 58 | spin_lock_init(&objlay->lock); |
59 | } | 59 | INIT_LIST_HEAD(&objlay->err_list); |
60 | dprintk("%s: Return %p\n", __func__, objlay); | 60 | dprintk("%s: Return %p\n", __func__, objlay); |
61 | return &objlay->pnfs_layout; | 61 | return &objlay->pnfs_layout; |
62 | } | 62 | } |
@@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, | |||
229 | static void _rpc_read_complete(struct work_struct *work) | 229 | static void _rpc_read_complete(struct work_struct *work) |
230 | { | 230 | { |
231 | struct rpc_task *task; | 231 | struct rpc_task *task; |
232 | struct nfs_read_data *rdata; | 232 | struct nfs_pgio_data *rdata; |
233 | 233 | ||
234 | dprintk("%s enter\n", __func__); | 234 | dprintk("%s enter\n", __func__); |
235 | task = container_of(work, struct rpc_task, u.tk_work); | 235 | task = container_of(work, struct rpc_task, u.tk_work); |
236 | rdata = container_of(task, struct nfs_read_data, task); | 236 | rdata = container_of(task, struct nfs_pgio_data, task); |
237 | 237 | ||
238 | pnfs_ld_read_done(rdata); | 238 | pnfs_ld_read_done(rdata); |
239 | } | 239 | } |
@@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work) | |||
241 | void | 241 | void |
242 | objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | 242 | objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) |
243 | { | 243 | { |
244 | struct nfs_read_data *rdata = oir->rpcdata; | 244 | struct nfs_pgio_data *rdata = oir->rpcdata; |
245 | 245 | ||
246 | oir->status = rdata->task.tk_status = status; | 246 | oir->status = rdata->task.tk_status = status; |
247 | if (status >= 0) | 247 | if (status >= 0) |
@@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | |||
266 | * Perform sync or async reads. | 266 | * Perform sync or async reads. |
267 | */ | 267 | */ |
268 | enum pnfs_try_status | 268 | enum pnfs_try_status |
269 | objlayout_read_pagelist(struct nfs_read_data *rdata) | 269 | objlayout_read_pagelist(struct nfs_pgio_data *rdata) |
270 | { | 270 | { |
271 | struct nfs_pgio_header *hdr = rdata->header; | 271 | struct nfs_pgio_header *hdr = rdata->header; |
272 | struct inode *inode = hdr->inode; | 272 | struct inode *inode = hdr->inode; |
@@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) | |||
312 | static void _rpc_write_complete(struct work_struct *work) | 312 | static void _rpc_write_complete(struct work_struct *work) |
313 | { | 313 | { |
314 | struct rpc_task *task; | 314 | struct rpc_task *task; |
315 | struct nfs_write_data *wdata; | 315 | struct nfs_pgio_data *wdata; |
316 | 316 | ||
317 | dprintk("%s enter\n", __func__); | 317 | dprintk("%s enter\n", __func__); |
318 | task = container_of(work, struct rpc_task, u.tk_work); | 318 | task = container_of(work, struct rpc_task, u.tk_work); |
319 | wdata = container_of(task, struct nfs_write_data, task); | 319 | wdata = container_of(task, struct nfs_pgio_data, task); |
320 | 320 | ||
321 | pnfs_ld_write_done(wdata); | 321 | pnfs_ld_write_done(wdata); |
322 | } | 322 | } |
@@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work) | |||
324 | void | 324 | void |
325 | objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | 325 | objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) |
326 | { | 326 | { |
327 | struct nfs_write_data *wdata = oir->rpcdata; | 327 | struct nfs_pgio_data *wdata = oir->rpcdata; |
328 | 328 | ||
329 | oir->status = wdata->task.tk_status = status; | 329 | oir->status = wdata->task.tk_status = status; |
330 | if (status >= 0) { | 330 | if (status >= 0) { |
@@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | |||
351 | * Perform sync or async writes. | 351 | * Perform sync or async writes. |
352 | */ | 352 | */ |
353 | enum pnfs_try_status | 353 | enum pnfs_try_status |
354 | objlayout_write_pagelist(struct nfs_write_data *wdata, | 354 | objlayout_write_pagelist(struct nfs_pgio_data *wdata, |
355 | int how) | 355 | int how) |
356 | { | 356 | { |
357 | struct nfs_pgio_header *hdr = wdata->header; | 357 | struct nfs_pgio_header *hdr = wdata->header; |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 87aa1dec6120..01e041029a6c 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg); | |||
119 | */ | 119 | */ |
120 | extern void objio_free_result(struct objlayout_io_res *oir); | 120 | extern void objio_free_result(struct objlayout_io_res *oir); |
121 | 121 | ||
122 | extern int objio_read_pagelist(struct nfs_read_data *rdata); | 122 | extern int objio_read_pagelist(struct nfs_pgio_data *rdata); |
123 | extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); | 123 | extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); |
124 | 124 | ||
125 | /* | 125 | /* |
126 | * callback API | 126 | * callback API |
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( | |||
168 | extern void objlayout_free_lseg(struct pnfs_layout_segment *); | 168 | extern void objlayout_free_lseg(struct pnfs_layout_segment *); |
169 | 169 | ||
170 | extern enum pnfs_try_status objlayout_read_pagelist( | 170 | extern enum pnfs_try_status objlayout_read_pagelist( |
171 | struct nfs_read_data *); | 171 | struct nfs_pgio_data *); |
172 | 172 | ||
173 | extern enum pnfs_try_status objlayout_write_pagelist( | 173 | extern enum pnfs_try_status objlayout_write_pagelist( |
174 | struct nfs_write_data *, | 174 | struct nfs_pgio_data *, |
175 | int how); | 175 | int how); |
176 | 176 | ||
177 | extern void objlayout_encode_layoutcommit( | 177 | extern void objlayout_encode_layoutcommit( |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 03ed984ab4d8..b6ee3a6ee96d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -24,9 +24,14 @@ | |||
24 | #include "internal.h" | 24 | #include "internal.h" |
25 | #include "pnfs.h" | 25 | #include "pnfs.h" |
26 | 26 | ||
27 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | ||
28 | |||
27 | static struct kmem_cache *nfs_page_cachep; | 29 | static struct kmem_cache *nfs_page_cachep; |
30 | static const struct rpc_call_ops nfs_pgio_common_ops; | ||
31 | |||
32 | static void nfs_free_request(struct nfs_page *); | ||
28 | 33 | ||
29 | bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) | 34 | static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) |
30 | { | 35 | { |
31 | p->npages = pagecount; | 36 | p->npages = pagecount; |
32 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 37 | if (pagecount <= ARRAY_SIZE(p->page_array)) |
@@ -133,11 +138,156 @@ nfs_iocounter_wait(struct nfs_io_counter *c) | |||
133 | return __nfs_iocounter_wait(c); | 138 | return __nfs_iocounter_wait(c); |
134 | } | 139 | } |
135 | 140 | ||
141 | static int nfs_wait_bit_uninterruptible(void *word) | ||
142 | { | ||
143 | io_schedule(); | ||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * nfs_page_group_lock - lock the head of the page group | ||
149 | * @req - request in group that is to be locked | ||
150 | * | ||
151 | * this lock must be held if modifying the page group list | ||
152 | */ | ||
153 | void | ||
154 | nfs_page_group_lock(struct nfs_page *req) | ||
155 | { | ||
156 | struct nfs_page *head = req->wb_head; | ||
157 | |||
158 | WARN_ON_ONCE(head != head->wb_head); | ||
159 | |||
160 | wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, | ||
161 | nfs_wait_bit_uninterruptible, | ||
162 | TASK_UNINTERRUPTIBLE); | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * nfs_page_group_unlock - unlock the head of the page group | ||
167 | * @req - request in group that is to be unlocked | ||
168 | */ | ||
169 | void | ||
170 | nfs_page_group_unlock(struct nfs_page *req) | ||
171 | { | ||
172 | struct nfs_page *head = req->wb_head; | ||
173 | |||
174 | WARN_ON_ONCE(head != head->wb_head); | ||
175 | |||
176 | smp_mb__before_atomic(); | ||
177 | clear_bit(PG_HEADLOCK, &head->wb_flags); | ||
178 | smp_mb__after_atomic(); | ||
179 | wake_up_bit(&head->wb_flags, PG_HEADLOCK); | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * nfs_page_group_sync_on_bit_locked | ||
184 | * | ||
185 | * must be called with page group lock held | ||
186 | */ | ||
187 | static bool | ||
188 | nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) | ||
189 | { | ||
190 | struct nfs_page *head = req->wb_head; | ||
191 | struct nfs_page *tmp; | ||
192 | |||
193 | WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags)); | ||
194 | WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags)); | ||
195 | |||
196 | tmp = req->wb_this_page; | ||
197 | while (tmp != req) { | ||
198 | if (!test_bit(bit, &tmp->wb_flags)) | ||
199 | return false; | ||
200 | tmp = tmp->wb_this_page; | ||
201 | } | ||
202 | |||
203 | /* true! reset all bits */ | ||
204 | tmp = req; | ||
205 | do { | ||
206 | clear_bit(bit, &tmp->wb_flags); | ||
207 | tmp = tmp->wb_this_page; | ||
208 | } while (tmp != req); | ||
209 | |||
210 | return true; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * nfs_page_group_sync_on_bit - set bit on current request, but only | ||
215 | * return true if the bit is set for all requests in page group | ||
216 | * @req - request in page group | ||
217 | * @bit - PG_* bit that is used to sync page group | ||
218 | */ | ||
219 | bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) | ||
220 | { | ||
221 | bool ret; | ||
222 | |||
223 | nfs_page_group_lock(req); | ||
224 | ret = nfs_page_group_sync_on_bit_locked(req, bit); | ||
225 | nfs_page_group_unlock(req); | ||
226 | |||
227 | return ret; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * nfs_page_group_init - Initialize the page group linkage for @req | ||
232 | * @req - a new nfs request | ||
233 | * @prev - the previous request in page group, or NULL if @req is the first | ||
234 | * or only request in the group (the head). | ||
235 | */ | ||
236 | static inline void | ||
237 | nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) | ||
238 | { | ||
239 | WARN_ON_ONCE(prev == req); | ||
240 | |||
241 | if (!prev) { | ||
242 | req->wb_head = req; | ||
243 | req->wb_this_page = req; | ||
244 | } else { | ||
245 | WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); | ||
246 | WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); | ||
247 | req->wb_head = prev->wb_head; | ||
248 | req->wb_this_page = prev->wb_this_page; | ||
249 | prev->wb_this_page = req; | ||
250 | |||
251 | /* grab extra ref if head request has extra ref from | ||
252 | * the write/commit path to handle handoff between write | ||
253 | * and commit lists */ | ||
254 | if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) | ||
255 | kref_get(&req->wb_kref); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * nfs_page_group_destroy - sync the destruction of page groups | ||
261 | * @req - request that no longer needs the page group | ||
262 | * | ||
263 | * releases the page group reference from each member once all | ||
264 | * members have called this function. | ||
265 | */ | ||
266 | static void | ||
267 | nfs_page_group_destroy(struct kref *kref) | ||
268 | { | ||
269 | struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); | ||
270 | struct nfs_page *tmp, *next; | ||
271 | |||
272 | if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) | ||
273 | return; | ||
274 | |||
275 | tmp = req; | ||
276 | do { | ||
277 | next = tmp->wb_this_page; | ||
278 | /* unlink and free */ | ||
279 | tmp->wb_this_page = tmp; | ||
280 | tmp->wb_head = tmp; | ||
281 | nfs_free_request(tmp); | ||
282 | tmp = next; | ||
283 | } while (tmp != req); | ||
284 | } | ||
285 | |||
136 | /** | 286 | /** |
137 | * nfs_create_request - Create an NFS read/write request. | 287 | * nfs_create_request - Create an NFS read/write request. |
138 | * @ctx: open context to use | 288 | * @ctx: open context to use |
139 | * @inode: inode to which the request is attached | ||
140 | * @page: page to write | 289 | * @page: page to write |
290 | * @last: last nfs request created for this page group or NULL if head | ||
141 | * @offset: starting offset within the page for the write | 291 | * @offset: starting offset within the page for the write |
142 | * @count: number of bytes to read/write | 292 | * @count: number of bytes to read/write |
143 | * | 293 | * |
@@ -146,9 +296,9 @@ nfs_iocounter_wait(struct nfs_io_counter *c) | |||
146 | * User should ensure it is safe to sleep in this function. | 296 | * User should ensure it is safe to sleep in this function. |
147 | */ | 297 | */ |
148 | struct nfs_page * | 298 | struct nfs_page * |
149 | nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | 299 | nfs_create_request(struct nfs_open_context *ctx, struct page *page, |
150 | struct page *page, | 300 | struct nfs_page *last, unsigned int offset, |
151 | unsigned int offset, unsigned int count) | 301 | unsigned int count) |
152 | { | 302 | { |
153 | struct nfs_page *req; | 303 | struct nfs_page *req; |
154 | struct nfs_lock_context *l_ctx; | 304 | struct nfs_lock_context *l_ctx; |
@@ -180,6 +330,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | |||
180 | req->wb_bytes = count; | 330 | req->wb_bytes = count; |
181 | req->wb_context = get_nfs_open_context(ctx); | 331 | req->wb_context = get_nfs_open_context(ctx); |
182 | kref_init(&req->wb_kref); | 332 | kref_init(&req->wb_kref); |
333 | nfs_page_group_init(req, last); | ||
183 | return req; | 334 | return req; |
184 | } | 335 | } |
185 | 336 | ||
@@ -237,16 +388,22 @@ static void nfs_clear_request(struct nfs_page *req) | |||
237 | } | 388 | } |
238 | } | 389 | } |
239 | 390 | ||
240 | |||
241 | /** | 391 | /** |
242 | * nfs_release_request - Release the count on an NFS read/write request | 392 | * nfs_release_request - Release the count on an NFS read/write request |
243 | * @req: request to release | 393 | * @req: request to release |
244 | * | 394 | * |
245 | * Note: Should never be called with the spinlock held! | 395 | * Note: Should never be called with the spinlock held! |
246 | */ | 396 | */ |
247 | static void nfs_free_request(struct kref *kref) | 397 | static void nfs_free_request(struct nfs_page *req) |
248 | { | 398 | { |
249 | struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); | 399 | WARN_ON_ONCE(req->wb_this_page != req); |
400 | |||
401 | /* extra debug: make sure no sync bits are still set */ | ||
402 | WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); | ||
403 | WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags)); | ||
404 | WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags)); | ||
405 | WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags)); | ||
406 | WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags)); | ||
250 | 407 | ||
251 | /* Release struct file and open context */ | 408 | /* Release struct file and open context */ |
252 | nfs_clear_request(req); | 409 | nfs_clear_request(req); |
@@ -255,13 +412,7 @@ static void nfs_free_request(struct kref *kref) | |||
255 | 412 | ||
256 | void nfs_release_request(struct nfs_page *req) | 413 | void nfs_release_request(struct nfs_page *req) |
257 | { | 414 | { |
258 | kref_put(&req->wb_kref, nfs_free_request); | 415 | kref_put(&req->wb_kref, nfs_page_group_destroy); |
259 | } | ||
260 | |||
261 | static int nfs_wait_bit_uninterruptible(void *word) | ||
262 | { | ||
263 | io_schedule(); | ||
264 | return 0; | ||
265 | } | 416 | } |
266 | 417 | ||
267 | /** | 418 | /** |
@@ -279,22 +430,249 @@ nfs_wait_on_request(struct nfs_page *req) | |||
279 | TASK_UNINTERRUPTIBLE); | 430 | TASK_UNINTERRUPTIBLE); |
280 | } | 431 | } |
281 | 432 | ||
282 | bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) | 433 | /* |
434 | * nfs_generic_pg_test - determine if requests can be coalesced | ||
435 | * @desc: pointer to descriptor | ||
436 | * @prev: previous request in desc, or NULL | ||
437 | * @req: this request | ||
438 | * | ||
439 | * Returns zero if @req can be coalesced into @desc, otherwise it returns | ||
440 | * the size of the request. | ||
441 | */ | ||
442 | size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | ||
443 | struct nfs_page *prev, struct nfs_page *req) | ||
283 | { | 444 | { |
284 | /* | 445 | if (desc->pg_count > desc->pg_bsize) { |
285 | * FIXME: ideally we should be able to coalesce all requests | 446 | /* should never happen */ |
286 | * that are not block boundary aligned, but currently this | 447 | WARN_ON_ONCE(1); |
287 | * is problematic for the case of bsize < PAGE_CACHE_SIZE, | ||
288 | * since nfs_flush_multi and nfs_pagein_multi assume you | ||
289 | * can have only one struct nfs_page. | ||
290 | */ | ||
291 | if (desc->pg_bsize < PAGE_SIZE) | ||
292 | return 0; | 448 | return 0; |
449 | } | ||
293 | 450 | ||
294 | return desc->pg_count + req->wb_bytes <= desc->pg_bsize; | 451 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); |
295 | } | 452 | } |
296 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | 453 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); |
297 | 454 | ||
455 | static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) | ||
456 | { | ||
457 | return container_of(hdr, struct nfs_rw_header, header); | ||
458 | } | ||
459 | |||
460 | /** | ||
461 | * nfs_rw_header_alloc - Allocate a header for a read or write | ||
462 | * @ops: Read or write function vector | ||
463 | */ | ||
464 | struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) | ||
465 | { | ||
466 | struct nfs_rw_header *header = ops->rw_alloc_header(); | ||
467 | |||
468 | if (header) { | ||
469 | struct nfs_pgio_header *hdr = &header->header; | ||
470 | |||
471 | INIT_LIST_HEAD(&hdr->pages); | ||
472 | spin_lock_init(&hdr->lock); | ||
473 | atomic_set(&hdr->refcnt, 0); | ||
474 | hdr->rw_ops = ops; | ||
475 | } | ||
476 | return header; | ||
477 | } | ||
478 | EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); | ||
479 | |||
480 | /* | ||
481 | * nfs_rw_header_free - Free a read or write header | ||
482 | * @hdr: The header to free | ||
483 | */ | ||
484 | void nfs_rw_header_free(struct nfs_pgio_header *hdr) | ||
485 | { | ||
486 | hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); | ||
487 | } | ||
488 | EXPORT_SYMBOL_GPL(nfs_rw_header_free); | ||
489 | |||
490 | /** | ||
491 | * nfs_pgio_data_alloc - Allocate pageio data | ||
492 | * @hdr: The header making a request | ||
493 | * @pagecount: Number of pages to create | ||
494 | */ | ||
495 | static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, | ||
496 | unsigned int pagecount) | ||
497 | { | ||
498 | struct nfs_pgio_data *data, *prealloc; | ||
499 | |||
500 | prealloc = &NFS_RW_HEADER(hdr)->rpc_data; | ||
501 | if (prealloc->header == NULL) | ||
502 | data = prealloc; | ||
503 | else | ||
504 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
505 | if (!data) | ||
506 | goto out; | ||
507 | |||
508 | if (nfs_pgarray_set(&data->pages, pagecount)) { | ||
509 | data->header = hdr; | ||
510 | atomic_inc(&hdr->refcnt); | ||
511 | } else { | ||
512 | if (data != prealloc) | ||
513 | kfree(data); | ||
514 | data = NULL; | ||
515 | } | ||
516 | out: | ||
517 | return data; | ||
518 | } | ||
519 | |||
520 | /** | ||
521 | * nfs_pgio_data_release - Properly free pageio data | ||
522 | * @data: The data to release | ||
523 | */ | ||
524 | void nfs_pgio_data_release(struct nfs_pgio_data *data) | ||
525 | { | ||
526 | struct nfs_pgio_header *hdr = data->header; | ||
527 | struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); | ||
528 | |||
529 | put_nfs_open_context(data->args.context); | ||
530 | if (data->pages.pagevec != data->pages.page_array) | ||
531 | kfree(data->pages.pagevec); | ||
532 | if (data == &pageio_header->rpc_data) { | ||
533 | data->header = NULL; | ||
534 | data = NULL; | ||
535 | } | ||
536 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
537 | hdr->completion_ops->completion(hdr); | ||
538 | /* Note: we only free the rpc_task after callbacks are done. | ||
539 | * See the comment in rpc_free_task() for why | ||
540 | */ | ||
541 | kfree(data); | ||
542 | } | ||
543 | EXPORT_SYMBOL_GPL(nfs_pgio_data_release); | ||
544 | |||
545 | /** | ||
546 | * nfs_pgio_rpcsetup - Set up arguments for a pageio call | ||
547 | * @data: The pageio data | ||
548 | * @count: Number of bytes to read | ||
549 | * @offset: Initial offset | ||
550 | * @how: How to commit data (writes only) | ||
551 | * @cinfo: Commit information for the call (writes only) | ||
552 | */ | ||
553 | static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, | ||
554 | unsigned int count, unsigned int offset, | ||
555 | int how, struct nfs_commit_info *cinfo) | ||
556 | { | ||
557 | struct nfs_page *req = data->header->req; | ||
558 | |||
559 | /* Set up the RPC argument and reply structs | ||
560 | * NB: take care not to mess about with data->commit et al. */ | ||
561 | |||
562 | data->args.fh = NFS_FH(data->header->inode); | ||
563 | data->args.offset = req_offset(req) + offset; | ||
564 | /* pnfs_set_layoutcommit needs this */ | ||
565 | data->mds_offset = data->args.offset; | ||
566 | data->args.pgbase = req->wb_pgbase + offset; | ||
567 | data->args.pages = data->pages.pagevec; | ||
568 | data->args.count = count; | ||
569 | data->args.context = get_nfs_open_context(req->wb_context); | ||
570 | data->args.lock_context = req->wb_lock_context; | ||
571 | data->args.stable = NFS_UNSTABLE; | ||
572 | switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { | ||
573 | case 0: | ||
574 | break; | ||
575 | case FLUSH_COND_STABLE: | ||
576 | if (nfs_reqs_to_commit(cinfo)) | ||
577 | break; | ||
578 | default: | ||
579 | data->args.stable = NFS_FILE_SYNC; | ||
580 | } | ||
581 | |||
582 | data->res.fattr = &data->fattr; | ||
583 | data->res.count = count; | ||
584 | data->res.eof = 0; | ||
585 | data->res.verf = &data->verf; | ||
586 | nfs_fattr_init(&data->fattr); | ||
587 | } | ||
588 | |||
589 | /** | ||
590 | * nfs_pgio_prepare - Prepare pageio data to go over the wire | ||
591 | * @task: The current task | ||
592 | * @calldata: pageio data to prepare | ||
593 | */ | ||
594 | static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) | ||
595 | { | ||
596 | struct nfs_pgio_data *data = calldata; | ||
597 | int err; | ||
598 | err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); | ||
599 | if (err) | ||
600 | rpc_exit(task, err); | ||
601 | } | ||
602 | |||
603 | int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, | ||
604 | const struct rpc_call_ops *call_ops, int how, int flags) | ||
605 | { | ||
606 | struct rpc_task *task; | ||
607 | struct rpc_message msg = { | ||
608 | .rpc_argp = &data->args, | ||
609 | .rpc_resp = &data->res, | ||
610 | .rpc_cred = data->header->cred, | ||
611 | }; | ||
612 | struct rpc_task_setup task_setup_data = { | ||
613 | .rpc_client = clnt, | ||
614 | .task = &data->task, | ||
615 | .rpc_message = &msg, | ||
616 | .callback_ops = call_ops, | ||
617 | .callback_data = data, | ||
618 | .workqueue = nfsiod_workqueue, | ||
619 | .flags = RPC_TASK_ASYNC | flags, | ||
620 | }; | ||
621 | int ret = 0; | ||
622 | |||
623 | data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); | ||
624 | |||
625 | dprintk("NFS: %5u initiated pgio call " | ||
626 | "(req %s/%llu, %u bytes @ offset %llu)\n", | ||
627 | data->task.tk_pid, | ||
628 | data->header->inode->i_sb->s_id, | ||
629 | (unsigned long long)NFS_FILEID(data->header->inode), | ||
630 | data->args.count, | ||
631 | (unsigned long long)data->args.offset); | ||
632 | |||
633 | task = rpc_run_task(&task_setup_data); | ||
634 | if (IS_ERR(task)) { | ||
635 | ret = PTR_ERR(task); | ||
636 | goto out; | ||
637 | } | ||
638 | if (how & FLUSH_SYNC) { | ||
639 | ret = rpc_wait_for_completion_task(task); | ||
640 | if (ret == 0) | ||
641 | ret = task->tk_status; | ||
642 | } | ||
643 | rpc_put_task(task); | ||
644 | out: | ||
645 | return ret; | ||
646 | } | ||
647 | EXPORT_SYMBOL_GPL(nfs_initiate_pgio); | ||
648 | |||
649 | /** | ||
650 | * nfs_pgio_error - Clean up from a pageio error | ||
651 | * @desc: IO descriptor | ||
652 | * @hdr: pageio header | ||
653 | */ | ||
654 | static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, | ||
655 | struct nfs_pgio_header *hdr) | ||
656 | { | ||
657 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | ||
658 | nfs_pgio_data_release(hdr->data); | ||
659 | hdr->data = NULL; | ||
660 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
661 | return -ENOMEM; | ||
662 | } | ||
663 | |||
664 | /** | ||
665 | * nfs_pgio_release - Release pageio data | ||
666 | * @calldata: The pageio data to release | ||
667 | */ | ||
668 | static void nfs_pgio_release(void *calldata) | ||
669 | { | ||
670 | struct nfs_pgio_data *data = calldata; | ||
671 | if (data->header->rw_ops->rw_release) | ||
672 | data->header->rw_ops->rw_release(data); | ||
673 | nfs_pgio_data_release(data); | ||
674 | } | ||
675 | |||
298 | /** | 676 | /** |
299 | * nfs_pageio_init - initialise a page io descriptor | 677 | * nfs_pageio_init - initialise a page io descriptor |
300 | * @desc: pointer to descriptor | 678 | * @desc: pointer to descriptor |
@@ -307,6 +685,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
307 | struct inode *inode, | 685 | struct inode *inode, |
308 | const struct nfs_pageio_ops *pg_ops, | 686 | const struct nfs_pageio_ops *pg_ops, |
309 | const struct nfs_pgio_completion_ops *compl_ops, | 687 | const struct nfs_pgio_completion_ops *compl_ops, |
688 | const struct nfs_rw_ops *rw_ops, | ||
310 | size_t bsize, | 689 | size_t bsize, |
311 | int io_flags) | 690 | int io_flags) |
312 | { | 691 | { |
@@ -320,6 +699,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
320 | desc->pg_inode = inode; | 699 | desc->pg_inode = inode; |
321 | desc->pg_ops = pg_ops; | 700 | desc->pg_ops = pg_ops; |
322 | desc->pg_completion_ops = compl_ops; | 701 | desc->pg_completion_ops = compl_ops; |
702 | desc->pg_rw_ops = rw_ops; | ||
323 | desc->pg_ioflags = io_flags; | 703 | desc->pg_ioflags = io_flags; |
324 | desc->pg_error = 0; | 704 | desc->pg_error = 0; |
325 | desc->pg_lseg = NULL; | 705 | desc->pg_lseg = NULL; |
@@ -328,6 +708,94 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
328 | } | 708 | } |
329 | EXPORT_SYMBOL_GPL(nfs_pageio_init); | 709 | EXPORT_SYMBOL_GPL(nfs_pageio_init); |
330 | 710 | ||
711 | /** | ||
712 | * nfs_pgio_result - Basic pageio error handling | ||
713 | * @task: The task that ran | ||
714 | * @calldata: Pageio data to check | ||
715 | */ | ||
716 | static void nfs_pgio_result(struct rpc_task *task, void *calldata) | ||
717 | { | ||
718 | struct nfs_pgio_data *data = calldata; | ||
719 | struct inode *inode = data->header->inode; | ||
720 | |||
721 | dprintk("NFS: %s: %5u, (status %d)\n", __func__, | ||
722 | task->tk_pid, task->tk_status); | ||
723 | |||
724 | if (data->header->rw_ops->rw_done(task, data, inode) != 0) | ||
725 | return; | ||
726 | if (task->tk_status < 0) | ||
727 | nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); | ||
728 | else | ||
729 | data->header->rw_ops->rw_result(task, data); | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * Create an RPC task for the given read or write request and kick it. | ||
734 | * The page must have been locked by the caller. | ||
735 | * | ||
736 | * It may happen that the page we're passed is not marked dirty. | ||
737 | * This is the case if nfs_updatepage detects a conflicting request | ||
738 | * that has been written but not committed. | ||
739 | */ | ||
740 | int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | ||
741 | struct nfs_pgio_header *hdr) | ||
742 | { | ||
743 | struct nfs_page *req; | ||
744 | struct page **pages; | ||
745 | struct nfs_pgio_data *data; | ||
746 | struct list_head *head = &desc->pg_list; | ||
747 | struct nfs_commit_info cinfo; | ||
748 | |||
749 | data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, | ||
750 | desc->pg_count)); | ||
751 | if (!data) | ||
752 | return nfs_pgio_error(desc, hdr); | ||
753 | |||
754 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | ||
755 | pages = data->pages.pagevec; | ||
756 | while (!list_empty(head)) { | ||
757 | req = nfs_list_entry(head->next); | ||
758 | nfs_list_remove_request(req); | ||
759 | nfs_list_add_request(req, &hdr->pages); | ||
760 | *pages++ = req->wb_page; | ||
761 | } | ||
762 | |||
763 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
764 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) | ||
765 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
766 | |||
767 | /* Set up the argument struct */ | ||
768 | nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); | ||
769 | hdr->data = data; | ||
770 | desc->pg_rpc_callops = &nfs_pgio_common_ops; | ||
771 | return 0; | ||
772 | } | ||
773 | EXPORT_SYMBOL_GPL(nfs_generic_pgio); | ||
774 | |||
775 | static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) | ||
776 | { | ||
777 | struct nfs_rw_header *rw_hdr; | ||
778 | struct nfs_pgio_header *hdr; | ||
779 | int ret; | ||
780 | |||
781 | rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); | ||
782 | if (!rw_hdr) { | ||
783 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
784 | return -ENOMEM; | ||
785 | } | ||
786 | hdr = &rw_hdr->header; | ||
787 | nfs_pgheader_init(desc, hdr, nfs_rw_header_free); | ||
788 | atomic_inc(&hdr->refcnt); | ||
789 | ret = nfs_generic_pgio(desc, hdr); | ||
790 | if (ret == 0) | ||
791 | ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), | ||
792 | hdr->data, desc->pg_rpc_callops, | ||
793 | desc->pg_ioflags, 0); | ||
794 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
795 | hdr->completion_ops->completion(hdr); | ||
796 | return ret; | ||
797 | } | ||
798 | |||
331 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, | 799 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, |
332 | const struct nfs_open_context *ctx2) | 800 | const struct nfs_open_context *ctx2) |
333 | { | 801 | { |
@@ -356,18 +824,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
356 | struct nfs_page *req, | 824 | struct nfs_page *req, |
357 | struct nfs_pageio_descriptor *pgio) | 825 | struct nfs_pageio_descriptor *pgio) |
358 | { | 826 | { |
359 | if (!nfs_match_open_context(req->wb_context, prev->wb_context)) | 827 | size_t size; |
360 | return false; | 828 | |
361 | if (req->wb_context->dentry->d_inode->i_flock != NULL && | 829 | if (prev) { |
362 | !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) | 830 | if (!nfs_match_open_context(req->wb_context, prev->wb_context)) |
363 | return false; | 831 | return false; |
364 | if (req->wb_pgbase != 0) | 832 | if (req->wb_context->dentry->d_inode->i_flock != NULL && |
365 | return false; | 833 | !nfs_match_lock_context(req->wb_lock_context, |
366 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) | 834 | prev->wb_lock_context)) |
367 | return false; | 835 | return false; |
368 | if (req_offset(req) != req_offset(prev) + prev->wb_bytes) | 836 | if (req_offset(req) != req_offset(prev) + prev->wb_bytes) |
369 | return false; | 837 | return false; |
370 | return pgio->pg_ops->pg_test(pgio, prev, req); | 838 | } |
839 | size = pgio->pg_ops->pg_test(pgio, prev, req); | ||
840 | WARN_ON_ONCE(size > req->wb_bytes); | ||
841 | if (size && size < req->wb_bytes) | ||
842 | req->wb_bytes = size; | ||
843 | return size > 0; | ||
371 | } | 844 | } |
372 | 845 | ||
373 | /** | 846 | /** |
@@ -381,17 +854,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
381 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | 854 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, |
382 | struct nfs_page *req) | 855 | struct nfs_page *req) |
383 | { | 856 | { |
857 | struct nfs_page *prev = NULL; | ||
384 | if (desc->pg_count != 0) { | 858 | if (desc->pg_count != 0) { |
385 | struct nfs_page *prev; | ||
386 | |||
387 | prev = nfs_list_entry(desc->pg_list.prev); | 859 | prev = nfs_list_entry(desc->pg_list.prev); |
388 | if (!nfs_can_coalesce_requests(prev, req, desc)) | ||
389 | return 0; | ||
390 | } else { | 860 | } else { |
391 | if (desc->pg_ops->pg_init) | 861 | if (desc->pg_ops->pg_init) |
392 | desc->pg_ops->pg_init(desc, req); | 862 | desc->pg_ops->pg_init(desc, req); |
393 | desc->pg_base = req->wb_pgbase; | 863 | desc->pg_base = req->wb_pgbase; |
394 | } | 864 | } |
865 | if (!nfs_can_coalesce_requests(prev, req, desc)) | ||
866 | return 0; | ||
395 | nfs_list_remove_request(req); | 867 | nfs_list_remove_request(req); |
396 | nfs_list_add_request(req, &desc->pg_list); | 868 | nfs_list_add_request(req, &desc->pg_list); |
397 | desc->pg_count += req->wb_bytes; | 869 | desc->pg_count += req->wb_bytes; |
@@ -421,22 +893,73 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | |||
421 | * @desc: destination io descriptor | 893 | * @desc: destination io descriptor |
422 | * @req: request | 894 | * @req: request |
423 | * | 895 | * |
896 | * This may split a request into subrequests which are all part of the | ||
897 | * same page group. | ||
898 | * | ||
424 | * Returns true if the request 'req' was successfully coalesced into the | 899 | * Returns true if the request 'req' was successfully coalesced into the |
425 | * existing list of pages 'desc'. | 900 | * existing list of pages 'desc'. |
426 | */ | 901 | */ |
427 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | 902 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, |
428 | struct nfs_page *req) | 903 | struct nfs_page *req) |
429 | { | 904 | { |
430 | while (!nfs_pageio_do_add_request(desc, req)) { | 905 | struct nfs_page *subreq; |
431 | desc->pg_moreio = 1; | 906 | unsigned int bytes_left = 0; |
432 | nfs_pageio_doio(desc); | 907 | unsigned int offset, pgbase; |
433 | if (desc->pg_error < 0) | 908 | |
434 | return 0; | 909 | nfs_page_group_lock(req); |
435 | desc->pg_moreio = 0; | 910 | |
436 | if (desc->pg_recoalesce) | 911 | subreq = req; |
437 | return 0; | 912 | bytes_left = subreq->wb_bytes; |
438 | } | 913 | offset = subreq->wb_offset; |
914 | pgbase = subreq->wb_pgbase; | ||
915 | |||
916 | do { | ||
917 | if (!nfs_pageio_do_add_request(desc, subreq)) { | ||
918 | /* make sure pg_test call(s) did nothing */ | ||
919 | WARN_ON_ONCE(subreq->wb_bytes != bytes_left); | ||
920 | WARN_ON_ONCE(subreq->wb_offset != offset); | ||
921 | WARN_ON_ONCE(subreq->wb_pgbase != pgbase); | ||
922 | |||
923 | nfs_page_group_unlock(req); | ||
924 | desc->pg_moreio = 1; | ||
925 | nfs_pageio_doio(desc); | ||
926 | if (desc->pg_error < 0) | ||
927 | return 0; | ||
928 | desc->pg_moreio = 0; | ||
929 | if (desc->pg_recoalesce) | ||
930 | return 0; | ||
931 | /* retry add_request for this subreq */ | ||
932 | nfs_page_group_lock(req); | ||
933 | continue; | ||
934 | } | ||
935 | |||
936 | /* check for buggy pg_test call(s) */ | ||
937 | WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE); | ||
938 | WARN_ON_ONCE(subreq->wb_bytes > bytes_left); | ||
939 | WARN_ON_ONCE(subreq->wb_bytes == 0); | ||
940 | |||
941 | bytes_left -= subreq->wb_bytes; | ||
942 | offset += subreq->wb_bytes; | ||
943 | pgbase += subreq->wb_bytes; | ||
944 | |||
945 | if (bytes_left) { | ||
946 | subreq = nfs_create_request(req->wb_context, | ||
947 | req->wb_page, | ||
948 | subreq, pgbase, bytes_left); | ||
949 | if (IS_ERR(subreq)) | ||
950 | goto err_ptr; | ||
951 | nfs_lock_request(subreq); | ||
952 | subreq->wb_offset = offset; | ||
953 | subreq->wb_index = req->wb_index; | ||
954 | } | ||
955 | } while (bytes_left > 0); | ||
956 | |||
957 | nfs_page_group_unlock(req); | ||
439 | return 1; | 958 | return 1; |
959 | err_ptr: | ||
960 | desc->pg_error = PTR_ERR(subreq); | ||
961 | nfs_page_group_unlock(req); | ||
962 | return 0; | ||
440 | } | 963 | } |
441 | 964 | ||
442 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | 965 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) |
@@ -535,3 +1058,13 @@ void nfs_destroy_nfspagecache(void) | |||
535 | kmem_cache_destroy(nfs_page_cachep); | 1058 | kmem_cache_destroy(nfs_page_cachep); |
536 | } | 1059 | } |
537 | 1060 | ||
1061 | static const struct rpc_call_ops nfs_pgio_common_ops = { | ||
1062 | .rpc_call_prepare = nfs_pgio_prepare, | ||
1063 | .rpc_call_done = nfs_pgio_result, | ||
1064 | .rpc_release = nfs_pgio_release, | ||
1065 | }; | ||
1066 | |||
1067 | const struct nfs_pageio_ops nfs_pgio_rw_ops = { | ||
1068 | .pg_test = nfs_generic_pg_test, | ||
1069 | .pg_doio = nfs_generic_pg_pgios, | ||
1070 | }; | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fd9536e494bc..6fdcd233d6f7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -1388,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r | |||
1388 | 1388 | ||
1389 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 1389 | WARN_ON_ONCE(pgio->pg_lseg != NULL); |
1390 | 1390 | ||
1391 | if (req->wb_offset != req->wb_pgbase) { | ||
1392 | nfs_pageio_reset_read_mds(pgio); | ||
1393 | return; | ||
1394 | } | ||
1395 | |||
1396 | if (pgio->pg_dreq == NULL) | 1391 | if (pgio->pg_dreq == NULL) |
1397 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); | 1392 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); |
1398 | else | 1393 | else |
@@ -1417,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
1417 | { | 1412 | { |
1418 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 1413 | WARN_ON_ONCE(pgio->pg_lseg != NULL); |
1419 | 1414 | ||
1420 | if (req->wb_offset != req->wb_pgbase) { | ||
1421 | nfs_pageio_reset_write_mds(pgio); | ||
1422 | return; | ||
1423 | } | ||
1424 | |||
1425 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1415 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
1426 | req->wb_context, | 1416 | req->wb_context, |
1427 | req_offset(req), | 1417 | req_offset(req), |
@@ -1434,56 +1424,49 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
1434 | } | 1424 | } |
1435 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | 1425 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); |
1436 | 1426 | ||
1437 | void | 1427 | /* |
1438 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, | 1428 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number |
1439 | const struct nfs_pgio_completion_ops *compl_ops) | 1429 | * of bytes (maximum @req->wb_bytes) that can be coalesced. |
1440 | { | 1430 | */ |
1441 | struct nfs_server *server = NFS_SERVER(inode); | 1431 | size_t |
1442 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | ||
1443 | |||
1444 | if (ld == NULL) | ||
1445 | nfs_pageio_init_read(pgio, inode, compl_ops); | ||
1446 | else | ||
1447 | nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); | ||
1448 | } | ||
1449 | |||
1450 | void | ||
1451 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, | ||
1452 | int ioflags, | ||
1453 | const struct nfs_pgio_completion_ops *compl_ops) | ||
1454 | { | ||
1455 | struct nfs_server *server = NFS_SERVER(inode); | ||
1456 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | ||
1457 | |||
1458 | if (ld == NULL) | ||
1459 | nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); | ||
1460 | else | ||
1461 | nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); | ||
1462 | } | ||
1463 | |||
1464 | bool | ||
1465 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1432 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1466 | struct nfs_page *req) | 1433 | struct nfs_page *req) |
1467 | { | 1434 | { |
1468 | if (pgio->pg_lseg == NULL) | 1435 | unsigned int size; |
1469 | return nfs_generic_pg_test(pgio, prev, req); | 1436 | u64 seg_end, req_start, seg_left; |
1437 | |||
1438 | size = nfs_generic_pg_test(pgio, prev, req); | ||
1439 | if (!size) | ||
1440 | return 0; | ||
1470 | 1441 | ||
1471 | /* | 1442 | /* |
1472 | * Test if a nfs_page is fully contained in the pnfs_layout_range. | 1443 | * 'size' contains the number of bytes left in the current page (up |
1473 | * Note that this test makes several assumptions: | 1444 | * to the original size asked for in @req->wb_bytes). |
1474 | * - that the previous nfs_page in the struct nfs_pageio_descriptor | 1445 | * |
1475 | * is known to lie within the range. | 1446 | * Calculate how many bytes are left in the layout segment |
1476 | * - that the nfs_page being tested is known to be contiguous with the | 1447 | * and if there are less bytes than 'size', return that instead. |
1477 | * previous nfs_page. | ||
1478 | * - Layout ranges are page aligned, so we only have to test the | ||
1479 | * start offset of the request. | ||
1480 | * | 1448 | * |
1481 | * Please also note that 'end_offset' is actually the offset of the | 1449 | * Please also note that 'end_offset' is actually the offset of the |
1482 | * first byte that lies outside the pnfs_layout_range. FIXME? | 1450 | * first byte that lies outside the pnfs_layout_range. FIXME? |
1483 | * | 1451 | * |
1484 | */ | 1452 | */ |
1485 | return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, | 1453 | if (pgio->pg_lseg) { |
1486 | pgio->pg_lseg->pls_range.length); | 1454 | seg_end = end_offset(pgio->pg_lseg->pls_range.offset, |
1455 | pgio->pg_lseg->pls_range.length); | ||
1456 | req_start = req_offset(req); | ||
1457 | WARN_ON_ONCE(req_start > seg_end); | ||
1458 | /* start of request is past the last byte of this segment */ | ||
1459 | if (req_start >= seg_end) | ||
1460 | return 0; | ||
1461 | |||
1462 | /* adjust 'size' iff there are fewer bytes left in the | ||
1463 | * segment than what nfs_generic_pg_test returned */ | ||
1464 | seg_left = seg_end - req_start; | ||
1465 | if (seg_left < size) | ||
1466 | size = (unsigned int)seg_left; | ||
1467 | } | ||
1468 | |||
1469 | return size; | ||
1487 | } | 1470 | } |
1488 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | 1471 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); |
1489 | 1472 | ||
@@ -1496,7 +1479,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, | |||
1496 | LIST_HEAD(failed); | 1479 | LIST_HEAD(failed); |
1497 | 1480 | ||
1498 | /* Resend all requests through the MDS */ | 1481 | /* Resend all requests through the MDS */ |
1499 | nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); | 1482 | nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); |
1500 | pgio.pg_dreq = dreq; | 1483 | pgio.pg_dreq = dreq; |
1501 | while (!list_empty(head)) { | 1484 | while (!list_empty(head)) { |
1502 | struct nfs_page *req = nfs_list_entry(head->next); | 1485 | struct nfs_page *req = nfs_list_entry(head->next); |
@@ -1519,7 +1502,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, | |||
1519 | } | 1502 | } |
1520 | EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); | 1503 | EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); |
1521 | 1504 | ||
1522 | static void pnfs_ld_handle_write_error(struct nfs_write_data *data) | 1505 | static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) |
1523 | { | 1506 | { |
1524 | struct nfs_pgio_header *hdr = data->header; | 1507 | struct nfs_pgio_header *hdr = data->header; |
1525 | 1508 | ||
@@ -1538,7 +1521,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) | |||
1538 | /* | 1521 | /* |
1539 | * Called by non rpc-based layout drivers | 1522 | * Called by non rpc-based layout drivers |
1540 | */ | 1523 | */ |
1541 | void pnfs_ld_write_done(struct nfs_write_data *data) | 1524 | void pnfs_ld_write_done(struct nfs_pgio_data *data) |
1542 | { | 1525 | { |
1543 | struct nfs_pgio_header *hdr = data->header; | 1526 | struct nfs_pgio_header *hdr = data->header; |
1544 | 1527 | ||
@@ -1554,7 +1537,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | |||
1554 | 1537 | ||
1555 | static void | 1538 | static void |
1556 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | 1539 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, |
1557 | struct nfs_write_data *data) | 1540 | struct nfs_pgio_data *data) |
1558 | { | 1541 | { |
1559 | struct nfs_pgio_header *hdr = data->header; | 1542 | struct nfs_pgio_header *hdr = data->header; |
1560 | 1543 | ||
@@ -1563,11 +1546,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | |||
1563 | nfs_pageio_reset_write_mds(desc); | 1546 | nfs_pageio_reset_write_mds(desc); |
1564 | desc->pg_recoalesce = 1; | 1547 | desc->pg_recoalesce = 1; |
1565 | } | 1548 | } |
1566 | nfs_writedata_release(data); | 1549 | nfs_pgio_data_release(data); |
1567 | } | 1550 | } |
1568 | 1551 | ||
1569 | static enum pnfs_try_status | 1552 | static enum pnfs_try_status |
1570 | pnfs_try_to_write_data(struct nfs_write_data *wdata, | 1553 | pnfs_try_to_write_data(struct nfs_pgio_data *wdata, |
1571 | const struct rpc_call_ops *call_ops, | 1554 | const struct rpc_call_ops *call_ops, |
1572 | struct pnfs_layout_segment *lseg, | 1555 | struct pnfs_layout_segment *lseg, |
1573 | int how) | 1556 | int how) |
@@ -1589,41 +1572,36 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, | |||
1589 | } | 1572 | } |
1590 | 1573 | ||
1591 | static void | 1574 | static void |
1592 | pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) | 1575 | pnfs_do_write(struct nfs_pageio_descriptor *desc, |
1576 | struct nfs_pgio_header *hdr, int how) | ||
1593 | { | 1577 | { |
1594 | struct nfs_write_data *data; | 1578 | struct nfs_pgio_data *data = hdr->data; |
1595 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 1579 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; |
1596 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 1580 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
1581 | enum pnfs_try_status trypnfs; | ||
1597 | 1582 | ||
1598 | desc->pg_lseg = NULL; | 1583 | desc->pg_lseg = NULL; |
1599 | while (!list_empty(head)) { | 1584 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); |
1600 | enum pnfs_try_status trypnfs; | 1585 | if (trypnfs == PNFS_NOT_ATTEMPTED) |
1601 | 1586 | pnfs_write_through_mds(desc, data); | |
1602 | data = list_first_entry(head, struct nfs_write_data, list); | ||
1603 | list_del_init(&data->list); | ||
1604 | |||
1605 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); | ||
1606 | if (trypnfs == PNFS_NOT_ATTEMPTED) | ||
1607 | pnfs_write_through_mds(desc, data); | ||
1608 | } | ||
1609 | pnfs_put_lseg(lseg); | 1587 | pnfs_put_lseg(lseg); |
1610 | } | 1588 | } |
1611 | 1589 | ||
1612 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | 1590 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) |
1613 | { | 1591 | { |
1614 | pnfs_put_lseg(hdr->lseg); | 1592 | pnfs_put_lseg(hdr->lseg); |
1615 | nfs_writehdr_free(hdr); | 1593 | nfs_rw_header_free(hdr); |
1616 | } | 1594 | } |
1617 | EXPORT_SYMBOL_GPL(pnfs_writehdr_free); | 1595 | EXPORT_SYMBOL_GPL(pnfs_writehdr_free); |
1618 | 1596 | ||
1619 | int | 1597 | int |
1620 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 1598 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) |
1621 | { | 1599 | { |
1622 | struct nfs_write_header *whdr; | 1600 | struct nfs_rw_header *whdr; |
1623 | struct nfs_pgio_header *hdr; | 1601 | struct nfs_pgio_header *hdr; |
1624 | int ret; | 1602 | int ret; |
1625 | 1603 | ||
1626 | whdr = nfs_writehdr_alloc(); | 1604 | whdr = nfs_rw_header_alloc(desc->pg_rw_ops); |
1627 | if (!whdr) { | 1605 | if (!whdr) { |
1628 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 1606 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); |
1629 | pnfs_put_lseg(desc->pg_lseg); | 1607 | pnfs_put_lseg(desc->pg_lseg); |
@@ -1634,12 +1612,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | |||
1634 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | 1612 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); |
1635 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 1613 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
1636 | atomic_inc(&hdr->refcnt); | 1614 | atomic_inc(&hdr->refcnt); |
1637 | ret = nfs_generic_flush(desc, hdr); | 1615 | ret = nfs_generic_pgio(desc, hdr); |
1638 | if (ret != 0) { | 1616 | if (ret != 0) { |
1639 | pnfs_put_lseg(desc->pg_lseg); | 1617 | pnfs_put_lseg(desc->pg_lseg); |
1640 | desc->pg_lseg = NULL; | 1618 | desc->pg_lseg = NULL; |
1641 | } else | 1619 | } else |
1642 | pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); | 1620 | pnfs_do_write(desc, hdr, desc->pg_ioflags); |
1643 | if (atomic_dec_and_test(&hdr->refcnt)) | 1621 | if (atomic_dec_and_test(&hdr->refcnt)) |
1644 | hdr->completion_ops->completion(hdr); | 1622 | hdr->completion_ops->completion(hdr); |
1645 | return ret; | 1623 | return ret; |
@@ -1655,7 +1633,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, | |||
1655 | LIST_HEAD(failed); | 1633 | LIST_HEAD(failed); |
1656 | 1634 | ||
1657 | /* Resend all requests through the MDS */ | 1635 | /* Resend all requests through the MDS */ |
1658 | nfs_pageio_init_read(&pgio, inode, compl_ops); | 1636 | nfs_pageio_init_read(&pgio, inode, true, compl_ops); |
1659 | pgio.pg_dreq = dreq; | 1637 | pgio.pg_dreq = dreq; |
1660 | while (!list_empty(head)) { | 1638 | while (!list_empty(head)) { |
1661 | struct nfs_page *req = nfs_list_entry(head->next); | 1639 | struct nfs_page *req = nfs_list_entry(head->next); |
@@ -1674,7 +1652,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, | |||
1674 | } | 1652 | } |
1675 | EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); | 1653 | EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); |
1676 | 1654 | ||
1677 | static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | 1655 | static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) |
1678 | { | 1656 | { |
1679 | struct nfs_pgio_header *hdr = data->header; | 1657 | struct nfs_pgio_header *hdr = data->header; |
1680 | 1658 | ||
@@ -1693,7 +1671,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | |||
1693 | /* | 1671 | /* |
1694 | * Called by non rpc-based layout drivers | 1672 | * Called by non rpc-based layout drivers |
1695 | */ | 1673 | */ |
1696 | void pnfs_ld_read_done(struct nfs_read_data *data) | 1674 | void pnfs_ld_read_done(struct nfs_pgio_data *data) |
1697 | { | 1675 | { |
1698 | struct nfs_pgio_header *hdr = data->header; | 1676 | struct nfs_pgio_header *hdr = data->header; |
1699 | 1677 | ||
@@ -1709,7 +1687,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | |||
1709 | 1687 | ||
1710 | static void | 1688 | static void |
1711 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | 1689 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, |
1712 | struct nfs_read_data *data) | 1690 | struct nfs_pgio_data *data) |
1713 | { | 1691 | { |
1714 | struct nfs_pgio_header *hdr = data->header; | 1692 | struct nfs_pgio_header *hdr = data->header; |
1715 | 1693 | ||
@@ -1718,14 +1696,14 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | |||
1718 | nfs_pageio_reset_read_mds(desc); | 1696 | nfs_pageio_reset_read_mds(desc); |
1719 | desc->pg_recoalesce = 1; | 1697 | desc->pg_recoalesce = 1; |
1720 | } | 1698 | } |
1721 | nfs_readdata_release(data); | 1699 | nfs_pgio_data_release(data); |
1722 | } | 1700 | } |
1723 | 1701 | ||
1724 | /* | 1702 | /* |
1725 | * Call the appropriate parallel I/O subsystem read function. | 1703 | * Call the appropriate parallel I/O subsystem read function. |
1726 | */ | 1704 | */ |
1727 | static enum pnfs_try_status | 1705 | static enum pnfs_try_status |
1728 | pnfs_try_to_read_data(struct nfs_read_data *rdata, | 1706 | pnfs_try_to_read_data(struct nfs_pgio_data *rdata, |
1729 | const struct rpc_call_ops *call_ops, | 1707 | const struct rpc_call_ops *call_ops, |
1730 | struct pnfs_layout_segment *lseg) | 1708 | struct pnfs_layout_segment *lseg) |
1731 | { | 1709 | { |
@@ -1747,41 +1725,35 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, | |||
1747 | } | 1725 | } |
1748 | 1726 | ||
1749 | static void | 1727 | static void |
1750 | pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) | 1728 | pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) |
1751 | { | 1729 | { |
1752 | struct nfs_read_data *data; | 1730 | struct nfs_pgio_data *data = hdr->data; |
1753 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 1731 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; |
1754 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 1732 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
1733 | enum pnfs_try_status trypnfs; | ||
1755 | 1734 | ||
1756 | desc->pg_lseg = NULL; | 1735 | desc->pg_lseg = NULL; |
1757 | while (!list_empty(head)) { | 1736 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); |
1758 | enum pnfs_try_status trypnfs; | 1737 | if (trypnfs == PNFS_NOT_ATTEMPTED) |
1759 | 1738 | pnfs_read_through_mds(desc, data); | |
1760 | data = list_first_entry(head, struct nfs_read_data, list); | ||
1761 | list_del_init(&data->list); | ||
1762 | |||
1763 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); | ||
1764 | if (trypnfs == PNFS_NOT_ATTEMPTED) | ||
1765 | pnfs_read_through_mds(desc, data); | ||
1766 | } | ||
1767 | pnfs_put_lseg(lseg); | 1739 | pnfs_put_lseg(lseg); |
1768 | } | 1740 | } |
1769 | 1741 | ||
1770 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | 1742 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) |
1771 | { | 1743 | { |
1772 | pnfs_put_lseg(hdr->lseg); | 1744 | pnfs_put_lseg(hdr->lseg); |
1773 | nfs_readhdr_free(hdr); | 1745 | nfs_rw_header_free(hdr); |
1774 | } | 1746 | } |
1775 | EXPORT_SYMBOL_GPL(pnfs_readhdr_free); | 1747 | EXPORT_SYMBOL_GPL(pnfs_readhdr_free); |
1776 | 1748 | ||
1777 | int | 1749 | int |
1778 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 1750 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) |
1779 | { | 1751 | { |
1780 | struct nfs_read_header *rhdr; | 1752 | struct nfs_rw_header *rhdr; |
1781 | struct nfs_pgio_header *hdr; | 1753 | struct nfs_pgio_header *hdr; |
1782 | int ret; | 1754 | int ret; |
1783 | 1755 | ||
1784 | rhdr = nfs_readhdr_alloc(); | 1756 | rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); |
1785 | if (!rhdr) { | 1757 | if (!rhdr) { |
1786 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 1758 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); |
1787 | ret = -ENOMEM; | 1759 | ret = -ENOMEM; |
@@ -1793,12 +1765,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | |||
1793 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | 1765 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); |
1794 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 1766 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
1795 | atomic_inc(&hdr->refcnt); | 1767 | atomic_inc(&hdr->refcnt); |
1796 | ret = nfs_generic_pagein(desc, hdr); | 1768 | ret = nfs_generic_pgio(desc, hdr); |
1797 | if (ret != 0) { | 1769 | if (ret != 0) { |
1798 | pnfs_put_lseg(desc->pg_lseg); | 1770 | pnfs_put_lseg(desc->pg_lseg); |
1799 | desc->pg_lseg = NULL; | 1771 | desc->pg_lseg = NULL; |
1800 | } else | 1772 | } else |
1801 | pnfs_do_multiple_reads(desc, &hdr->rpc_list); | 1773 | pnfs_do_read(desc, hdr); |
1802 | if (atomic_dec_and_test(&hdr->refcnt)) | 1774 | if (atomic_dec_and_test(&hdr->refcnt)) |
1803 | hdr->completion_ops->completion(hdr); | 1775 | hdr->completion_ops->completion(hdr); |
1804 | return ret; | 1776 | return ret; |
@@ -1848,7 +1820,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | |||
1848 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | 1820 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); |
1849 | 1821 | ||
1850 | void | 1822 | void |
1851 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | 1823 | pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) |
1852 | { | 1824 | { |
1853 | struct nfs_pgio_header *hdr = wdata->header; | 1825 | struct nfs_pgio_header *hdr = wdata->header; |
1854 | struct inode *inode = hdr->inode; | 1826 | struct inode *inode = hdr->inode; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c3058a076596..4fb309a2b4c4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type { | |||
113 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted | 113 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted |
114 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS | 114 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS |
115 | */ | 115 | */ |
116 | enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); | 116 | enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); |
117 | enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); | 117 | enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); |
118 | 118 | ||
119 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); | 119 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); |
120 | 120 | ||
@@ -180,11 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | |||
180 | void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); | 180 | void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); |
181 | void pnfs_put_lseg(struct pnfs_layout_segment *lseg); | 181 | void pnfs_put_lseg(struct pnfs_layout_segment *lseg); |
182 | 182 | ||
183 | void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, | ||
184 | const struct nfs_pgio_completion_ops *); | ||
185 | void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, | ||
186 | int, const struct nfs_pgio_completion_ops *); | ||
187 | |||
188 | void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); | 183 | void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); |
189 | void unset_pnfs_layoutdriver(struct nfs_server *); | 184 | void unset_pnfs_layoutdriver(struct nfs_server *); |
190 | void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); | 185 | void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); |
@@ -192,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); | |||
192 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | 187 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, |
193 | struct nfs_page *req, u64 wb_size); | 188 | struct nfs_page *req, u64 wb_size); |
194 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); | 189 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); |
195 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); | 190 | size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, |
191 | struct nfs_page *prev, struct nfs_page *req); | ||
196 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); | 192 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); |
197 | struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); | 193 | struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); |
198 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 194 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
@@ -217,13 +213,13 @@ bool pnfs_roc(struct inode *ino); | |||
217 | void pnfs_roc_release(struct inode *ino); | 213 | void pnfs_roc_release(struct inode *ino); |
218 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 214 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
219 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); | 215 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); |
220 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | 216 | void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); |
221 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 217 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
222 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 218 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
223 | int _pnfs_return_layout(struct inode *); | 219 | int _pnfs_return_layout(struct inode *); |
224 | int pnfs_commit_and_return_layout(struct inode *); | 220 | int pnfs_commit_and_return_layout(struct inode *); |
225 | void pnfs_ld_write_done(struct nfs_write_data *); | 221 | void pnfs_ld_write_done(struct nfs_pgio_data *); |
226 | void pnfs_ld_read_done(struct nfs_read_data *); | 222 | void pnfs_ld_read_done(struct nfs_pgio_data *); |
227 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | 223 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, |
228 | struct nfs_open_context *ctx, | 224 | struct nfs_open_context *ctx, |
229 | loff_t pos, | 225 | loff_t pos, |
@@ -461,18 +457,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) | |||
461 | { | 457 | { |
462 | } | 458 | } |
463 | 459 | ||
464 | static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, | ||
465 | const struct nfs_pgio_completion_ops *compl_ops) | ||
466 | { | ||
467 | nfs_pageio_init_read(pgio, inode, compl_ops); | ||
468 | } | ||
469 | |||
470 | static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, | ||
471 | const struct nfs_pgio_completion_ops *compl_ops) | ||
472 | { | ||
473 | nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); | ||
474 | } | ||
475 | |||
476 | static inline int | 460 | static inline int |
477 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, | 461 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, |
478 | struct nfs_commit_info *cinfo) | 462 | struct nfs_commit_info *cinfo) |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e55ce9e8b034..c171ce1a8a30 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | |||
578 | return 0; | 578 | return 0; |
579 | } | 579 | } |
580 | 580 | ||
581 | static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) | 581 | static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) |
582 | { | 582 | { |
583 | struct inode *inode = data->header->inode; | 583 | struct inode *inode = data->header->inode; |
584 | 584 | ||
@@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
594 | return 0; | 594 | return 0; |
595 | } | 595 | } |
596 | 596 | ||
597 | static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) | 597 | static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
598 | { | 598 | { |
599 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; | 599 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; |
600 | } | 600 | } |
601 | 601 | ||
602 | static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 602 | static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) |
603 | { | 603 | { |
604 | rpc_call_start(task); | 604 | rpc_call_start(task); |
605 | return 0; | 605 | return 0; |
606 | } | 606 | } |
607 | 607 | ||
608 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | 608 | static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) |
609 | { | 609 | { |
610 | struct inode *inode = data->header->inode; | 610 | struct inode *inode = data->header->inode; |
611 | 611 | ||
@@ -614,19 +614,13 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
614 | return 0; | 614 | return 0; |
615 | } | 615 | } |
616 | 616 | ||
617 | static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) | 617 | static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
618 | { | 618 | { |
619 | /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ | 619 | /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ |
620 | data->args.stable = NFS_FILE_SYNC; | 620 | data->args.stable = NFS_FILE_SYNC; |
621 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; | 621 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; |
622 | } | 622 | } |
623 | 623 | ||
624 | static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
625 | { | ||
626 | rpc_call_start(task); | ||
627 | return 0; | ||
628 | } | ||
629 | |||
630 | static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 624 | static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
631 | { | 625 | { |
632 | BUG(); | 626 | BUG(); |
@@ -734,13 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
734 | .fsinfo = nfs_proc_fsinfo, | 728 | .fsinfo = nfs_proc_fsinfo, |
735 | .pathconf = nfs_proc_pathconf, | 729 | .pathconf = nfs_proc_pathconf, |
736 | .decode_dirent = nfs2_decode_dirent, | 730 | .decode_dirent = nfs2_decode_dirent, |
731 | .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare, | ||
737 | .read_setup = nfs_proc_read_setup, | 732 | .read_setup = nfs_proc_read_setup, |
738 | .read_pageio_init = nfs_pageio_init_read, | ||
739 | .read_rpc_prepare = nfs_proc_read_rpc_prepare, | ||
740 | .read_done = nfs_read_done, | 733 | .read_done = nfs_read_done, |
741 | .write_setup = nfs_proc_write_setup, | 734 | .write_setup = nfs_proc_write_setup, |
742 | .write_pageio_init = nfs_pageio_init_write, | ||
743 | .write_rpc_prepare = nfs_proc_write_rpc_prepare, | ||
744 | .write_done = nfs_write_done, | 735 | .write_done = nfs_write_done, |
745 | .commit_setup = nfs_proc_commit_setup, | 736 | .commit_setup = nfs_proc_commit_setup, |
746 | .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, | 737 | .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 411aedda14bb..e818a475ca64 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -24,85 +24,24 @@ | |||
24 | #include "internal.h" | 24 | #include "internal.h" |
25 | #include "iostat.h" | 25 | #include "iostat.h" |
26 | #include "fscache.h" | 26 | #include "fscache.h" |
27 | #include "pnfs.h" | ||
27 | 28 | ||
28 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 29 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
29 | 30 | ||
30 | static const struct nfs_pageio_ops nfs_pageio_read_ops; | ||
31 | static const struct rpc_call_ops nfs_read_common_ops; | ||
32 | static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; | 31 | static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; |
32 | static const struct nfs_rw_ops nfs_rw_read_ops; | ||
33 | 33 | ||
34 | static struct kmem_cache *nfs_rdata_cachep; | 34 | static struct kmem_cache *nfs_rdata_cachep; |
35 | 35 | ||
36 | struct nfs_read_header *nfs_readhdr_alloc(void) | 36 | static struct nfs_rw_header *nfs_readhdr_alloc(void) |
37 | { | 37 | { |
38 | struct nfs_read_header *rhdr; | 38 | return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); |
39 | |||
40 | rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); | ||
41 | if (rhdr) { | ||
42 | struct nfs_pgio_header *hdr = &rhdr->header; | ||
43 | |||
44 | INIT_LIST_HEAD(&hdr->pages); | ||
45 | INIT_LIST_HEAD(&hdr->rpc_list); | ||
46 | spin_lock_init(&hdr->lock); | ||
47 | atomic_set(&hdr->refcnt, 0); | ||
48 | } | ||
49 | return rhdr; | ||
50 | } | 39 | } |
51 | EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); | ||
52 | 40 | ||
53 | static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, | 41 | static void nfs_readhdr_free(struct nfs_rw_header *rhdr) |
54 | unsigned int pagecount) | ||
55 | { | 42 | { |
56 | struct nfs_read_data *data, *prealloc; | ||
57 | |||
58 | prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; | ||
59 | if (prealloc->header == NULL) | ||
60 | data = prealloc; | ||
61 | else | ||
62 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
63 | if (!data) | ||
64 | goto out; | ||
65 | |||
66 | if (nfs_pgarray_set(&data->pages, pagecount)) { | ||
67 | data->header = hdr; | ||
68 | atomic_inc(&hdr->refcnt); | ||
69 | } else { | ||
70 | if (data != prealloc) | ||
71 | kfree(data); | ||
72 | data = NULL; | ||
73 | } | ||
74 | out: | ||
75 | return data; | ||
76 | } | ||
77 | |||
78 | void nfs_readhdr_free(struct nfs_pgio_header *hdr) | ||
79 | { | ||
80 | struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); | ||
81 | |||
82 | kmem_cache_free(nfs_rdata_cachep, rhdr); | 43 | kmem_cache_free(nfs_rdata_cachep, rhdr); |
83 | } | 44 | } |
84 | EXPORT_SYMBOL_GPL(nfs_readhdr_free); | ||
85 | |||
86 | void nfs_readdata_release(struct nfs_read_data *rdata) | ||
87 | { | ||
88 | struct nfs_pgio_header *hdr = rdata->header; | ||
89 | struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); | ||
90 | |||
91 | put_nfs_open_context(rdata->args.context); | ||
92 | if (rdata->pages.pagevec != rdata->pages.page_array) | ||
93 | kfree(rdata->pages.pagevec); | ||
94 | if (rdata == &read_header->rpc_data) { | ||
95 | rdata->header = NULL; | ||
96 | rdata = NULL; | ||
97 | } | ||
98 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
99 | hdr->completion_ops->completion(hdr); | ||
100 | /* Note: we only free the rpc_task after callbacks are done. | ||
101 | * See the comment in rpc_free_task() for why | ||
102 | */ | ||
103 | kfree(rdata); | ||
104 | } | ||
105 | EXPORT_SYMBOL_GPL(nfs_readdata_release); | ||
106 | 45 | ||
107 | static | 46 | static |
108 | int nfs_return_empty_page(struct page *page) | 47 | int nfs_return_empty_page(struct page *page) |
@@ -114,17 +53,24 @@ int nfs_return_empty_page(struct page *page) | |||
114 | } | 53 | } |
115 | 54 | ||
116 | void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, | 55 | void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, |
117 | struct inode *inode, | 56 | struct inode *inode, bool force_mds, |
118 | const struct nfs_pgio_completion_ops *compl_ops) | 57 | const struct nfs_pgio_completion_ops *compl_ops) |
119 | { | 58 | { |
120 | nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, | 59 | struct nfs_server *server = NFS_SERVER(inode); |
121 | NFS_SERVER(inode)->rsize, 0); | 60 | const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; |
61 | |||
62 | #ifdef CONFIG_NFS_V4_1 | ||
63 | if (server->pnfs_curr_ld && !force_mds) | ||
64 | pg_ops = server->pnfs_curr_ld->pg_read_ops; | ||
65 | #endif | ||
66 | nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, | ||
67 | server->rsize, 0); | ||
122 | } | 68 | } |
123 | EXPORT_SYMBOL_GPL(nfs_pageio_init_read); | 69 | EXPORT_SYMBOL_GPL(nfs_pageio_init_read); |
124 | 70 | ||
125 | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) | 71 | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) |
126 | { | 72 | { |
127 | pgio->pg_ops = &nfs_pageio_read_ops; | 73 | pgio->pg_ops = &nfs_pgio_rw_ops; |
128 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; | 74 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; |
129 | } | 75 | } |
130 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); | 76 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); |
@@ -139,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
139 | len = nfs_page_length(page); | 85 | len = nfs_page_length(page); |
140 | if (len == 0) | 86 | if (len == 0) |
141 | return nfs_return_empty_page(page); | 87 | return nfs_return_empty_page(page); |
142 | new = nfs_create_request(ctx, inode, page, 0, len); | 88 | new = nfs_create_request(ctx, page, NULL, 0, len); |
143 | if (IS_ERR(new)) { | 89 | if (IS_ERR(new)) { |
144 | unlock_page(page); | 90 | unlock_page(page); |
145 | return PTR_ERR(new); | 91 | return PTR_ERR(new); |
@@ -147,7 +93,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
147 | if (len < PAGE_CACHE_SIZE) | 93 | if (len < PAGE_CACHE_SIZE) |
148 | zero_user_segment(page, len, PAGE_CACHE_SIZE); | 94 | zero_user_segment(page, len, PAGE_CACHE_SIZE); |
149 | 95 | ||
150 | NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); | 96 | nfs_pageio_init_read(&pgio, inode, false, |
97 | &nfs_async_read_completion_ops); | ||
151 | nfs_pageio_add_request(&pgio, new); | 98 | nfs_pageio_add_request(&pgio, new); |
152 | nfs_pageio_complete(&pgio); | 99 | nfs_pageio_complete(&pgio); |
153 | NFS_I(inode)->read_io += pgio.pg_bytes_written; | 100 | NFS_I(inode)->read_io += pgio.pg_bytes_written; |
@@ -158,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req) | |||
158 | { | 105 | { |
159 | struct inode *d_inode = req->wb_context->dentry->d_inode; | 106 | struct inode *d_inode = req->wb_context->dentry->d_inode; |
160 | 107 | ||
161 | if (PageUptodate(req->wb_page)) | 108 | dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, |
162 | nfs_readpage_to_fscache(d_inode, req->wb_page, 0); | 109 | (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, |
110 | (long long)req_offset(req)); | ||
163 | 111 | ||
164 | unlock_page(req->wb_page); | 112 | if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { |
113 | if (PageUptodate(req->wb_page)) | ||
114 | nfs_readpage_to_fscache(d_inode, req->wb_page, 0); | ||
115 | |||
116 | unlock_page(req->wb_page); | ||
117 | } | ||
165 | 118 | ||
166 | dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", | 119 | dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", |
167 | req->wb_context->dentry->d_inode->i_sb->s_id, | 120 | req->wb_context->dentry->d_inode->i_sb->s_id, |
@@ -171,7 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req) | |||
171 | nfs_release_request(req); | 124 | nfs_release_request(req); |
172 | } | 125 | } |
173 | 126 | ||
174 | /* Note io was page aligned */ | 127 | static void nfs_page_group_set_uptodate(struct nfs_page *req) |
128 | { | ||
129 | if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) | ||
130 | SetPageUptodate(req->wb_page); | ||
131 | } | ||
132 | |||
175 | static void nfs_read_completion(struct nfs_pgio_header *hdr) | 133 | static void nfs_read_completion(struct nfs_pgio_header *hdr) |
176 | { | 134 | { |
177 | unsigned long bytes = 0; | 135 | unsigned long bytes = 0; |
@@ -181,21 +139,32 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) | |||
181 | while (!list_empty(&hdr->pages)) { | 139 | while (!list_empty(&hdr->pages)) { |
182 | struct nfs_page *req = nfs_list_entry(hdr->pages.next); | 140 | struct nfs_page *req = nfs_list_entry(hdr->pages.next); |
183 | struct page *page = req->wb_page; | 141 | struct page *page = req->wb_page; |
142 | unsigned long start = req->wb_pgbase; | ||
143 | unsigned long end = req->wb_pgbase + req->wb_bytes; | ||
184 | 144 | ||
185 | if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { | 145 | if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { |
186 | if (bytes > hdr->good_bytes) | 146 | /* note: regions of the page not covered by a |
187 | zero_user(page, 0, PAGE_SIZE); | 147 | * request are zeroed in nfs_readpage_async / |
188 | else if (hdr->good_bytes - bytes < PAGE_SIZE) | 148 | * readpage_async_filler */ |
189 | zero_user_segment(page, | 149 | if (bytes > hdr->good_bytes) { |
190 | hdr->good_bytes & ~PAGE_MASK, | 150 | /* nothing in this request was good, so zero |
191 | PAGE_SIZE); | 151 | * the full extent of the request */ |
152 | zero_user_segment(page, start, end); | ||
153 | |||
154 | } else if (hdr->good_bytes - bytes < req->wb_bytes) { | ||
155 | /* part of this request has good bytes, but | ||
156 | * not all. zero the bad bytes */ | ||
157 | start += hdr->good_bytes - bytes; | ||
158 | WARN_ON(start < req->wb_pgbase); | ||
159 | zero_user_segment(page, start, end); | ||
160 | } | ||
192 | } | 161 | } |
193 | bytes += req->wb_bytes; | 162 | bytes += req->wb_bytes; |
194 | if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { | 163 | if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { |
195 | if (bytes <= hdr->good_bytes) | 164 | if (bytes <= hdr->good_bytes) |
196 | SetPageUptodate(page); | 165 | nfs_page_group_set_uptodate(req); |
197 | } else | 166 | } else |
198 | SetPageUptodate(page); | 167 | nfs_page_group_set_uptodate(req); |
199 | nfs_list_remove_request(req); | 168 | nfs_list_remove_request(req); |
200 | nfs_readpage_release(req); | 169 | nfs_readpage_release(req); |
201 | } | 170 | } |
@@ -203,95 +172,14 @@ out: | |||
203 | hdr->release(hdr); | 172 | hdr->release(hdr); |
204 | } | 173 | } |
205 | 174 | ||
206 | int nfs_initiate_read(struct rpc_clnt *clnt, | 175 | static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, |
207 | struct nfs_read_data *data, | 176 | struct rpc_task_setup *task_setup_data, int how) |
208 | const struct rpc_call_ops *call_ops, int flags) | ||
209 | { | 177 | { |
210 | struct inode *inode = data->header->inode; | 178 | struct inode *inode = data->header->inode; |
211 | int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; | 179 | int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; |
212 | struct rpc_task *task; | ||
213 | struct rpc_message msg = { | ||
214 | .rpc_argp = &data->args, | ||
215 | .rpc_resp = &data->res, | ||
216 | .rpc_cred = data->header->cred, | ||
217 | }; | ||
218 | struct rpc_task_setup task_setup_data = { | ||
219 | .task = &data->task, | ||
220 | .rpc_client = clnt, | ||
221 | .rpc_message = &msg, | ||
222 | .callback_ops = call_ops, | ||
223 | .callback_data = data, | ||
224 | .workqueue = nfsiod_workqueue, | ||
225 | .flags = RPC_TASK_ASYNC | swap_flags | flags, | ||
226 | }; | ||
227 | 180 | ||
228 | /* Set up the initial task struct. */ | 181 | task_setup_data->flags |= swap_flags; |
229 | NFS_PROTO(inode)->read_setup(data, &msg); | 182 | NFS_PROTO(inode)->read_setup(data, msg); |
230 | |||
231 | dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ " | ||
232 | "offset %llu)\n", | ||
233 | data->task.tk_pid, | ||
234 | inode->i_sb->s_id, | ||
235 | (unsigned long long)NFS_FILEID(inode), | ||
236 | data->args.count, | ||
237 | (unsigned long long)data->args.offset); | ||
238 | |||
239 | task = rpc_run_task(&task_setup_data); | ||
240 | if (IS_ERR(task)) | ||
241 | return PTR_ERR(task); | ||
242 | rpc_put_task(task); | ||
243 | return 0; | ||
244 | } | ||
245 | EXPORT_SYMBOL_GPL(nfs_initiate_read); | ||
246 | |||
247 | /* | ||
248 | * Set up the NFS read request struct | ||
249 | */ | ||
250 | static void nfs_read_rpcsetup(struct nfs_read_data *data, | ||
251 | unsigned int count, unsigned int offset) | ||
252 | { | ||
253 | struct nfs_page *req = data->header->req; | ||
254 | |||
255 | data->args.fh = NFS_FH(data->header->inode); | ||
256 | data->args.offset = req_offset(req) + offset; | ||
257 | data->args.pgbase = req->wb_pgbase + offset; | ||
258 | data->args.pages = data->pages.pagevec; | ||
259 | data->args.count = count; | ||
260 | data->args.context = get_nfs_open_context(req->wb_context); | ||
261 | data->args.lock_context = req->wb_lock_context; | ||
262 | |||
263 | data->res.fattr = &data->fattr; | ||
264 | data->res.count = count; | ||
265 | data->res.eof = 0; | ||
266 | nfs_fattr_init(&data->fattr); | ||
267 | } | ||
268 | |||
269 | static int nfs_do_read(struct nfs_read_data *data, | ||
270 | const struct rpc_call_ops *call_ops) | ||
271 | { | ||
272 | struct inode *inode = data->header->inode; | ||
273 | |||
274 | return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); | ||
275 | } | ||
276 | |||
277 | static int | ||
278 | nfs_do_multiple_reads(struct list_head *head, | ||
279 | const struct rpc_call_ops *call_ops) | ||
280 | { | ||
281 | struct nfs_read_data *data; | ||
282 | int ret = 0; | ||
283 | |||
284 | while (!list_empty(head)) { | ||
285 | int ret2; | ||
286 | |||
287 | data = list_first_entry(head, struct nfs_read_data, list); | ||
288 | list_del_init(&data->list); | ||
289 | |||
290 | ret2 = nfs_do_read(data, call_ops); | ||
291 | if (ret == 0) | ||
292 | ret = ret2; | ||
293 | } | ||
294 | return ret; | ||
295 | } | 183 | } |
296 | 184 | ||
297 | static void | 185 | static void |
@@ -311,143 +199,14 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { | |||
311 | .completion = nfs_read_completion, | 199 | .completion = nfs_read_completion, |
312 | }; | 200 | }; |
313 | 201 | ||
314 | static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, | ||
315 | struct nfs_pgio_header *hdr) | ||
316 | { | ||
317 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | ||
318 | while (!list_empty(&hdr->rpc_list)) { | ||
319 | struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, | ||
320 | struct nfs_read_data, list); | ||
321 | list_del(&data->list); | ||
322 | nfs_readdata_release(data); | ||
323 | } | ||
324 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * Generate multiple requests to fill a single page. | ||
329 | * | ||
330 | * We optimize to reduce the number of read operations on the wire. If we | ||
331 | * detect that we're reading a page, or an area of a page, that is past the | ||
332 | * end of file, we do not generate NFS read operations but just clear the | ||
333 | * parts of the page that would have come back zero from the server anyway. | ||
334 | * | ||
335 | * We rely on the cached value of i_size to make this determination; another | ||
336 | * client can fill pages on the server past our cached end-of-file, but we | ||
337 | * won't see the new data until our attribute cache is updated. This is more | ||
338 | * or less conventional NFS client behavior. | ||
339 | */ | ||
340 | static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, | ||
341 | struct nfs_pgio_header *hdr) | ||
342 | { | ||
343 | struct nfs_page *req = hdr->req; | ||
344 | struct page *page = req->wb_page; | ||
345 | struct nfs_read_data *data; | ||
346 | size_t rsize = desc->pg_bsize, nbytes; | ||
347 | unsigned int offset; | ||
348 | |||
349 | offset = 0; | ||
350 | nbytes = desc->pg_count; | ||
351 | do { | ||
352 | size_t len = min(nbytes,rsize); | ||
353 | |||
354 | data = nfs_readdata_alloc(hdr, 1); | ||
355 | if (!data) { | ||
356 | nfs_pagein_error(desc, hdr); | ||
357 | return -ENOMEM; | ||
358 | } | ||
359 | data->pages.pagevec[0] = page; | ||
360 | nfs_read_rpcsetup(data, len, offset); | ||
361 | list_add(&data->list, &hdr->rpc_list); | ||
362 | nbytes -= len; | ||
363 | offset += len; | ||
364 | } while (nbytes != 0); | ||
365 | |||
366 | nfs_list_remove_request(req); | ||
367 | nfs_list_add_request(req, &hdr->pages); | ||
368 | desc->pg_rpc_callops = &nfs_read_common_ops; | ||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, | ||
373 | struct nfs_pgio_header *hdr) | ||
374 | { | ||
375 | struct nfs_page *req; | ||
376 | struct page **pages; | ||
377 | struct nfs_read_data *data; | ||
378 | struct list_head *head = &desc->pg_list; | ||
379 | |||
380 | data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, | ||
381 | desc->pg_count)); | ||
382 | if (!data) { | ||
383 | nfs_pagein_error(desc, hdr); | ||
384 | return -ENOMEM; | ||
385 | } | ||
386 | |||
387 | pages = data->pages.pagevec; | ||
388 | while (!list_empty(head)) { | ||
389 | req = nfs_list_entry(head->next); | ||
390 | nfs_list_remove_request(req); | ||
391 | nfs_list_add_request(req, &hdr->pages); | ||
392 | *pages++ = req->wb_page; | ||
393 | } | ||
394 | |||
395 | nfs_read_rpcsetup(data, desc->pg_count, 0); | ||
396 | list_add(&data->list, &hdr->rpc_list); | ||
397 | desc->pg_rpc_callops = &nfs_read_common_ops; | ||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, | ||
402 | struct nfs_pgio_header *hdr) | ||
403 | { | ||
404 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | ||
405 | return nfs_pagein_multi(desc, hdr); | ||
406 | return nfs_pagein_one(desc, hdr); | ||
407 | } | ||
408 | EXPORT_SYMBOL_GPL(nfs_generic_pagein); | ||
409 | |||
410 | static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | ||
411 | { | ||
412 | struct nfs_read_header *rhdr; | ||
413 | struct nfs_pgio_header *hdr; | ||
414 | int ret; | ||
415 | |||
416 | rhdr = nfs_readhdr_alloc(); | ||
417 | if (!rhdr) { | ||
418 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
419 | return -ENOMEM; | ||
420 | } | ||
421 | hdr = &rhdr->header; | ||
422 | nfs_pgheader_init(desc, hdr, nfs_readhdr_free); | ||
423 | atomic_inc(&hdr->refcnt); | ||
424 | ret = nfs_generic_pagein(desc, hdr); | ||
425 | if (ret == 0) | ||
426 | ret = nfs_do_multiple_reads(&hdr->rpc_list, | ||
427 | desc->pg_rpc_callops); | ||
428 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
429 | hdr->completion_ops->completion(hdr); | ||
430 | return ret; | ||
431 | } | ||
432 | |||
433 | static const struct nfs_pageio_ops nfs_pageio_read_ops = { | ||
434 | .pg_test = nfs_generic_pg_test, | ||
435 | .pg_doio = nfs_generic_pg_readpages, | ||
436 | }; | ||
437 | |||
438 | /* | 202 | /* |
439 | * This is the callback from RPC telling us whether a reply was | 203 | * This is the callback from RPC telling us whether a reply was |
440 | * received or some error occurred (timeout or socket shutdown). | 204 | * received or some error occurred (timeout or socket shutdown). |
441 | */ | 205 | */ |
442 | int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) | 206 | static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, |
207 | struct inode *inode) | ||
443 | { | 208 | { |
444 | struct inode *inode = data->header->inode; | 209 | int status = NFS_PROTO(inode)->read_done(task, data); |
445 | int status; | ||
446 | |||
447 | dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, | ||
448 | task->tk_status); | ||
449 | |||
450 | status = NFS_PROTO(inode)->read_done(task, data); | ||
451 | if (status != 0) | 210 | if (status != 0) |
452 | return status; | 211 | return status; |
453 | 212 | ||
@@ -460,10 +219,10 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) | |||
460 | return 0; | 219 | return 0; |
461 | } | 220 | } |
462 | 221 | ||
463 | static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) | 222 | static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) |
464 | { | 223 | { |
465 | struct nfs_readargs *argp = &data->args; | 224 | struct nfs_pgio_args *argp = &data->args; |
466 | struct nfs_readres *resp = &data->res; | 225 | struct nfs_pgio_res *resp = &data->res; |
467 | 226 | ||
468 | /* This is a short read! */ | 227 | /* This is a short read! */ |
469 | nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); | 228 | nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); |
@@ -480,17 +239,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data | |||
480 | rpc_restart_call_prepare(task); | 239 | rpc_restart_call_prepare(task); |
481 | } | 240 | } |
482 | 241 | ||
483 | static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) | 242 | static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) |
484 | { | 243 | { |
485 | struct nfs_read_data *data = calldata; | ||
486 | struct nfs_pgio_header *hdr = data->header; | 244 | struct nfs_pgio_header *hdr = data->header; |
487 | 245 | ||
488 | /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ | 246 | if (data->res.eof) { |
489 | if (nfs_readpage_result(task, data) != 0) | ||
490 | return; | ||
491 | if (task->tk_status < 0) | ||
492 | nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); | ||
493 | else if (data->res.eof) { | ||
494 | loff_t bound; | 247 | loff_t bound; |
495 | 248 | ||
496 | bound = data->args.offset + data->res.count; | 249 | bound = data->args.offset + data->res.count; |
@@ -505,26 +258,6 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) | |||
505 | nfs_readpage_retry(task, data); | 258 | nfs_readpage_retry(task, data); |
506 | } | 259 | } |
507 | 260 | ||
508 | static void nfs_readpage_release_common(void *calldata) | ||
509 | { | ||
510 | nfs_readdata_release(calldata); | ||
511 | } | ||
512 | |||
513 | void nfs_read_prepare(struct rpc_task *task, void *calldata) | ||
514 | { | ||
515 | struct nfs_read_data *data = calldata; | ||
516 | int err; | ||
517 | err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); | ||
518 | if (err) | ||
519 | rpc_exit(task, err); | ||
520 | } | ||
521 | |||
522 | static const struct rpc_call_ops nfs_read_common_ops = { | ||
523 | .rpc_call_prepare = nfs_read_prepare, | ||
524 | .rpc_call_done = nfs_readpage_result_common, | ||
525 | .rpc_release = nfs_readpage_release_common, | ||
526 | }; | ||
527 | |||
528 | /* | 261 | /* |
529 | * Read a page over NFS. | 262 | * Read a page over NFS. |
530 | * We read the page synchronously in the following case: | 263 | * We read the page synchronously in the following case: |
@@ -592,7 +325,6 @@ static int | |||
592 | readpage_async_filler(void *data, struct page *page) | 325 | readpage_async_filler(void *data, struct page *page) |
593 | { | 326 | { |
594 | struct nfs_readdesc *desc = (struct nfs_readdesc *)data; | 327 | struct nfs_readdesc *desc = (struct nfs_readdesc *)data; |
595 | struct inode *inode = page_file_mapping(page)->host; | ||
596 | struct nfs_page *new; | 328 | struct nfs_page *new; |
597 | unsigned int len; | 329 | unsigned int len; |
598 | int error; | 330 | int error; |
@@ -601,7 +333,7 @@ readpage_async_filler(void *data, struct page *page) | |||
601 | if (len == 0) | 333 | if (len == 0) |
602 | return nfs_return_empty_page(page); | 334 | return nfs_return_empty_page(page); |
603 | 335 | ||
604 | new = nfs_create_request(desc->ctx, inode, page, 0, len); | 336 | new = nfs_create_request(desc->ctx, page, NULL, 0, len); |
605 | if (IS_ERR(new)) | 337 | if (IS_ERR(new)) |
606 | goto out_error; | 338 | goto out_error; |
607 | 339 | ||
@@ -654,7 +386,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
654 | if (ret == 0) | 386 | if (ret == 0) |
655 | goto read_complete; /* all pages were read */ | 387 | goto read_complete; /* all pages were read */ |
656 | 388 | ||
657 | NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); | 389 | nfs_pageio_init_read(&pgio, inode, false, |
390 | &nfs_async_read_completion_ops); | ||
658 | 391 | ||
659 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); | 392 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); |
660 | 393 | ||
@@ -671,7 +404,7 @@ out: | |||
671 | int __init nfs_init_readpagecache(void) | 404 | int __init nfs_init_readpagecache(void) |
672 | { | 405 | { |
673 | nfs_rdata_cachep = kmem_cache_create("nfs_read_data", | 406 | nfs_rdata_cachep = kmem_cache_create("nfs_read_data", |
674 | sizeof(struct nfs_read_header), | 407 | sizeof(struct nfs_rw_header), |
675 | 0, SLAB_HWCACHE_ALIGN, | 408 | 0, SLAB_HWCACHE_ALIGN, |
676 | NULL); | 409 | NULL); |
677 | if (nfs_rdata_cachep == NULL) | 410 | if (nfs_rdata_cachep == NULL) |
@@ -684,3 +417,12 @@ void nfs_destroy_readpagecache(void) | |||
684 | { | 417 | { |
685 | kmem_cache_destroy(nfs_rdata_cachep); | 418 | kmem_cache_destroy(nfs_rdata_cachep); |
686 | } | 419 | } |
420 | |||
421 | static const struct nfs_rw_ops nfs_rw_read_ops = { | ||
422 | .rw_mode = FMODE_READ, | ||
423 | .rw_alloc_header = nfs_readhdr_alloc, | ||
424 | .rw_free_header = nfs_readhdr_free, | ||
425 | .rw_done = nfs_readpage_done, | ||
426 | .rw_result = nfs_readpage_result, | ||
427 | .rw_initiate = nfs_initiate_read, | ||
428 | }; | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2cb56943e232..084af1060d79 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2180,11 +2180,23 @@ out_no_address: | |||
2180 | return -EINVAL; | 2180 | return -EINVAL; |
2181 | } | 2181 | } |
2182 | 2182 | ||
2183 | #define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | ||
2184 | | NFS_MOUNT_SECURE \ | ||
2185 | | NFS_MOUNT_TCP \ | ||
2186 | | NFS_MOUNT_VER3 \ | ||
2187 | | NFS_MOUNT_KERBEROS \ | ||
2188 | | NFS_MOUNT_NONLM \ | ||
2189 | | NFS_MOUNT_BROKEN_SUID \ | ||
2190 | | NFS_MOUNT_STRICTLOCK \ | ||
2191 | | NFS_MOUNT_UNSHARED \ | ||
2192 | | NFS_MOUNT_NORESVPORT \ | ||
2193 | | NFS_MOUNT_LEGACY_INTERFACE) | ||
2194 | |||
2183 | static int | 2195 | static int |
2184 | nfs_compare_remount_data(struct nfs_server *nfss, | 2196 | nfs_compare_remount_data(struct nfs_server *nfss, |
2185 | struct nfs_parsed_mount_data *data) | 2197 | struct nfs_parsed_mount_data *data) |
2186 | { | 2198 | { |
2187 | if (data->flags != nfss->flags || | 2199 | if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || |
2188 | data->rsize != nfss->rsize || | 2200 | data->rsize != nfss->rsize || |
2189 | data->wsize != nfss->wsize || | 2201 | data->wsize != nfss->wsize || |
2190 | data->version != nfss->nfs_client->rpc_ops->version || | 2202 | data->version != nfss->nfs_client->rpc_ops->version || |
@@ -2248,6 +2260,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) | |||
2248 | data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; | 2260 | data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; |
2249 | data->version = nfsvers; | 2261 | data->version = nfsvers; |
2250 | data->minorversion = nfss->nfs_client->cl_minorversion; | 2262 | data->minorversion = nfss->nfs_client->cl_minorversion; |
2263 | data->net = current->nsproxy->net_ns; | ||
2251 | memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, | 2264 | memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, |
2252 | data->nfs_server.addrlen); | 2265 | data->nfs_server.addrlen); |
2253 | 2266 | ||
@@ -2347,18 +2360,6 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) | |||
2347 | nfs_initialise_sb(sb); | 2360 | nfs_initialise_sb(sb); |
2348 | } | 2361 | } |
2349 | 2362 | ||
2350 | #define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | ||
2351 | | NFS_MOUNT_SECURE \ | ||
2352 | | NFS_MOUNT_TCP \ | ||
2353 | | NFS_MOUNT_VER3 \ | ||
2354 | | NFS_MOUNT_KERBEROS \ | ||
2355 | | NFS_MOUNT_NONLM \ | ||
2356 | | NFS_MOUNT_BROKEN_SUID \ | ||
2357 | | NFS_MOUNT_STRICTLOCK \ | ||
2358 | | NFS_MOUNT_UNSHARED \ | ||
2359 | | NFS_MOUNT_NORESVPORT \ | ||
2360 | | NFS_MOUNT_LEGACY_INTERFACE) | ||
2361 | |||
2362 | static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) | 2363 | static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) |
2363 | { | 2364 | { |
2364 | const struct nfs_server *a = s->s_fs_info; | 2365 | const struct nfs_server *a = s->s_fs_info; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ffb9459f180b..3ee5af4e738e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -42,10 +42,10 @@ | |||
42 | * Local function declarations | 42 | * Local function declarations |
43 | */ | 43 | */ |
44 | static void nfs_redirty_request(struct nfs_page *req); | 44 | static void nfs_redirty_request(struct nfs_page *req); |
45 | static const struct rpc_call_ops nfs_write_common_ops; | ||
46 | static const struct rpc_call_ops nfs_commit_ops; | 45 | static const struct rpc_call_ops nfs_commit_ops; |
47 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; | 46 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; |
48 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops; | 47 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops; |
48 | static const struct nfs_rw_ops nfs_rw_write_ops; | ||
49 | 49 | ||
50 | static struct kmem_cache *nfs_wdata_cachep; | 50 | static struct kmem_cache *nfs_wdata_cachep; |
51 | static mempool_t *nfs_wdata_mempool; | 51 | static mempool_t *nfs_wdata_mempool; |
@@ -70,76 +70,19 @@ void nfs_commit_free(struct nfs_commit_data *p) | |||
70 | } | 70 | } |
71 | EXPORT_SYMBOL_GPL(nfs_commit_free); | 71 | EXPORT_SYMBOL_GPL(nfs_commit_free); |
72 | 72 | ||
73 | struct nfs_write_header *nfs_writehdr_alloc(void) | 73 | static struct nfs_rw_header *nfs_writehdr_alloc(void) |
74 | { | 74 | { |
75 | struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); | 75 | struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); |
76 | |||
77 | if (p) { | ||
78 | struct nfs_pgio_header *hdr = &p->header; | ||
79 | 76 | ||
77 | if (p) | ||
80 | memset(p, 0, sizeof(*p)); | 78 | memset(p, 0, sizeof(*p)); |
81 | INIT_LIST_HEAD(&hdr->pages); | ||
82 | INIT_LIST_HEAD(&hdr->rpc_list); | ||
83 | spin_lock_init(&hdr->lock); | ||
84 | atomic_set(&hdr->refcnt, 0); | ||
85 | hdr->verf = &p->verf; | ||
86 | } | ||
87 | return p; | 79 | return p; |
88 | } | 80 | } |
89 | EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); | ||
90 | |||
91 | static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, | ||
92 | unsigned int pagecount) | ||
93 | { | ||
94 | struct nfs_write_data *data, *prealloc; | ||
95 | |||
96 | prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; | ||
97 | if (prealloc->header == NULL) | ||
98 | data = prealloc; | ||
99 | else | ||
100 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
101 | if (!data) | ||
102 | goto out; | ||
103 | |||
104 | if (nfs_pgarray_set(&data->pages, pagecount)) { | ||
105 | data->header = hdr; | ||
106 | atomic_inc(&hdr->refcnt); | ||
107 | } else { | ||
108 | if (data != prealloc) | ||
109 | kfree(data); | ||
110 | data = NULL; | ||
111 | } | ||
112 | out: | ||
113 | return data; | ||
114 | } | ||
115 | 81 | ||
116 | void nfs_writehdr_free(struct nfs_pgio_header *hdr) | 82 | static void nfs_writehdr_free(struct nfs_rw_header *whdr) |
117 | { | 83 | { |
118 | struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); | ||
119 | mempool_free(whdr, nfs_wdata_mempool); | 84 | mempool_free(whdr, nfs_wdata_mempool); |
120 | } | 85 | } |
121 | EXPORT_SYMBOL_GPL(nfs_writehdr_free); | ||
122 | |||
123 | void nfs_writedata_release(struct nfs_write_data *wdata) | ||
124 | { | ||
125 | struct nfs_pgio_header *hdr = wdata->header; | ||
126 | struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); | ||
127 | |||
128 | put_nfs_open_context(wdata->args.context); | ||
129 | if (wdata->pages.pagevec != wdata->pages.page_array) | ||
130 | kfree(wdata->pages.pagevec); | ||
131 | if (wdata == &write_header->rpc_data) { | ||
132 | wdata->header = NULL; | ||
133 | wdata = NULL; | ||
134 | } | ||
135 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
136 | hdr->completion_ops->completion(hdr); | ||
137 | /* Note: we only free the rpc_task after callbacks are done. | ||
138 | * See the comment in rpc_free_task() for why | ||
139 | */ | ||
140 | kfree(wdata); | ||
141 | } | ||
142 | EXPORT_SYMBOL_GPL(nfs_writedata_release); | ||
143 | 86 | ||
144 | static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) | 87 | static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) |
145 | { | 88 | { |
@@ -211,18 +154,78 @@ static void nfs_set_pageerror(struct page *page) | |||
211 | nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); | 154 | nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); |
212 | } | 155 | } |
213 | 156 | ||
157 | /* | ||
158 | * nfs_page_group_search_locked | ||
159 | * @head - head request of page group | ||
160 | * @page_offset - offset into page | ||
161 | * | ||
162 | * Search page group with head @head to find a request that contains the | ||
163 | * page offset @page_offset. | ||
164 | * | ||
165 | * Returns a pointer to the first matching nfs request, or NULL if no | ||
166 | * match is found. | ||
167 | * | ||
168 | * Must be called with the page group lock held | ||
169 | */ | ||
170 | static struct nfs_page * | ||
171 | nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) | ||
172 | { | ||
173 | struct nfs_page *req; | ||
174 | |||
175 | WARN_ON_ONCE(head != head->wb_head); | ||
176 | WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); | ||
177 | |||
178 | req = head; | ||
179 | do { | ||
180 | if (page_offset >= req->wb_pgbase && | ||
181 | page_offset < (req->wb_pgbase + req->wb_bytes)) | ||
182 | return req; | ||
183 | |||
184 | req = req->wb_this_page; | ||
185 | } while (req != head); | ||
186 | |||
187 | return NULL; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * nfs_page_group_covers_page | ||
192 | * @head - head request of page group | ||
193 | * | ||
194 | * Return true if the page group with head @head covers the whole page, | ||
195 | * returns false otherwise | ||
196 | */ | ||
197 | static bool nfs_page_group_covers_page(struct nfs_page *req) | ||
198 | { | ||
199 | struct nfs_page *tmp; | ||
200 | unsigned int pos = 0; | ||
201 | unsigned int len = nfs_page_length(req->wb_page); | ||
202 | |||
203 | nfs_page_group_lock(req); | ||
204 | |||
205 | do { | ||
206 | tmp = nfs_page_group_search_locked(req->wb_head, pos); | ||
207 | if (tmp) { | ||
208 | /* no way this should happen */ | ||
209 | WARN_ON_ONCE(tmp->wb_pgbase != pos); | ||
210 | pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); | ||
211 | } | ||
212 | } while (tmp && pos < len); | ||
213 | |||
214 | nfs_page_group_unlock(req); | ||
215 | WARN_ON_ONCE(pos > len); | ||
216 | return pos == len; | ||
217 | } | ||
218 | |||
214 | /* We can set the PG_uptodate flag if we see that a write request | 219 | /* We can set the PG_uptodate flag if we see that a write request |
215 | * covers the full page. | 220 | * covers the full page. |
216 | */ | 221 | */ |
217 | static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) | 222 | static void nfs_mark_uptodate(struct nfs_page *req) |
218 | { | 223 | { |
219 | if (PageUptodate(page)) | 224 | if (PageUptodate(req->wb_page)) |
220 | return; | ||
221 | if (base != 0) | ||
222 | return; | 225 | return; |
223 | if (count != nfs_page_length(page)) | 226 | if (!nfs_page_group_covers_page(req)) |
224 | return; | 227 | return; |
225 | SetPageUptodate(page); | 228 | SetPageUptodate(req->wb_page); |
226 | } | 229 | } |
227 | 230 | ||
228 | static int wb_priority(struct writeback_control *wbc) | 231 | static int wb_priority(struct writeback_control *wbc) |
@@ -258,12 +261,15 @@ static void nfs_set_page_writeback(struct page *page) | |||
258 | } | 261 | } |
259 | } | 262 | } |
260 | 263 | ||
261 | static void nfs_end_page_writeback(struct page *page) | 264 | static void nfs_end_page_writeback(struct nfs_page *req) |
262 | { | 265 | { |
263 | struct inode *inode = page_file_mapping(page)->host; | 266 | struct inode *inode = page_file_mapping(req->wb_page)->host; |
264 | struct nfs_server *nfss = NFS_SERVER(inode); | 267 | struct nfs_server *nfss = NFS_SERVER(inode); |
265 | 268 | ||
266 | end_page_writeback(page); | 269 | if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) |
270 | return; | ||
271 | |||
272 | end_page_writeback(req->wb_page); | ||
267 | if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) | 273 | if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) |
268 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | 274 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); |
269 | } | 275 | } |
@@ -354,10 +360,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc | |||
354 | struct nfs_pageio_descriptor pgio; | 360 | struct nfs_pageio_descriptor pgio; |
355 | int err; | 361 | int err; |
356 | 362 | ||
357 | NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, | 363 | nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), |
358 | page->mapping->host, | 364 | false, &nfs_async_write_completion_ops); |
359 | wb_priority(wbc), | ||
360 | &nfs_async_write_completion_ops); | ||
361 | err = nfs_do_writepage(page, wbc, &pgio); | 365 | err = nfs_do_writepage(page, wbc, &pgio); |
362 | nfs_pageio_complete(&pgio); | 366 | nfs_pageio_complete(&pgio); |
363 | if (err < 0) | 367 | if (err < 0) |
@@ -400,7 +404,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
400 | 404 | ||
401 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); | 405 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); |
402 | 406 | ||
403 | NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); | 407 | nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, |
408 | &nfs_async_write_completion_ops); | ||
404 | err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); | 409 | err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); |
405 | nfs_pageio_complete(&pgio); | 410 | nfs_pageio_complete(&pgio); |
406 | 411 | ||
@@ -425,6 +430,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
425 | { | 430 | { |
426 | struct nfs_inode *nfsi = NFS_I(inode); | 431 | struct nfs_inode *nfsi = NFS_I(inode); |
427 | 432 | ||
433 | WARN_ON_ONCE(req->wb_this_page != req); | ||
434 | |||
428 | /* Lock the request! */ | 435 | /* Lock the request! */ |
429 | nfs_lock_request(req); | 436 | nfs_lock_request(req); |
430 | 437 | ||
@@ -441,6 +448,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
441 | set_page_private(req->wb_page, (unsigned long)req); | 448 | set_page_private(req->wb_page, (unsigned long)req); |
442 | } | 449 | } |
443 | nfsi->npages++; | 450 | nfsi->npages++; |
451 | set_bit(PG_INODE_REF, &req->wb_flags); | ||
444 | kref_get(&req->wb_kref); | 452 | kref_get(&req->wb_kref); |
445 | spin_unlock(&inode->i_lock); | 453 | spin_unlock(&inode->i_lock); |
446 | } | 454 | } |
@@ -452,15 +460,20 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
452 | { | 460 | { |
453 | struct inode *inode = req->wb_context->dentry->d_inode; | 461 | struct inode *inode = req->wb_context->dentry->d_inode; |
454 | struct nfs_inode *nfsi = NFS_I(inode); | 462 | struct nfs_inode *nfsi = NFS_I(inode); |
463 | struct nfs_page *head; | ||
455 | 464 | ||
456 | spin_lock(&inode->i_lock); | 465 | if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { |
457 | if (likely(!PageSwapCache(req->wb_page))) { | 466 | head = req->wb_head; |
458 | set_page_private(req->wb_page, 0); | 467 | |
459 | ClearPagePrivate(req->wb_page); | 468 | spin_lock(&inode->i_lock); |
460 | clear_bit(PG_MAPPED, &req->wb_flags); | 469 | if (likely(!PageSwapCache(head->wb_page))) { |
470 | set_page_private(head->wb_page, 0); | ||
471 | ClearPagePrivate(head->wb_page); | ||
472 | clear_bit(PG_MAPPED, &head->wb_flags); | ||
473 | } | ||
474 | nfsi->npages--; | ||
475 | spin_unlock(&inode->i_lock); | ||
461 | } | 476 | } |
462 | nfsi->npages--; | ||
463 | spin_unlock(&inode->i_lock); | ||
464 | nfs_release_request(req); | 477 | nfs_release_request(req); |
465 | } | 478 | } |
466 | 479 | ||
@@ -583,7 +596,7 @@ nfs_clear_request_commit(struct nfs_page *req) | |||
583 | } | 596 | } |
584 | 597 | ||
585 | static inline | 598 | static inline |
586 | int nfs_write_need_commit(struct nfs_write_data *data) | 599 | int nfs_write_need_commit(struct nfs_pgio_data *data) |
587 | { | 600 | { |
588 | if (data->verf.committed == NFS_DATA_SYNC) | 601 | if (data->verf.committed == NFS_DATA_SYNC) |
589 | return data->header->lseg == NULL; | 602 | return data->header->lseg == NULL; |
@@ -614,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req) | |||
614 | } | 627 | } |
615 | 628 | ||
616 | static inline | 629 | static inline |
617 | int nfs_write_need_commit(struct nfs_write_data *data) | 630 | int nfs_write_need_commit(struct nfs_pgio_data *data) |
618 | { | 631 | { |
619 | return 0; | 632 | return 0; |
620 | } | 633 | } |
@@ -625,6 +638,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) | |||
625 | { | 638 | { |
626 | struct nfs_commit_info cinfo; | 639 | struct nfs_commit_info cinfo; |
627 | unsigned long bytes = 0; | 640 | unsigned long bytes = 0; |
641 | bool do_destroy; | ||
628 | 642 | ||
629 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) | 643 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) |
630 | goto out; | 644 | goto out; |
@@ -645,7 +659,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) | |||
645 | goto next; | 659 | goto next; |
646 | } | 660 | } |
647 | if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { | 661 | if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { |
648 | memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); | 662 | memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); |
649 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); | 663 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); |
650 | goto next; | 664 | goto next; |
651 | } | 665 | } |
@@ -653,7 +667,8 @@ remove_req: | |||
653 | nfs_inode_remove_request(req); | 667 | nfs_inode_remove_request(req); |
654 | next: | 668 | next: |
655 | nfs_unlock_request(req); | 669 | nfs_unlock_request(req); |
656 | nfs_end_page_writeback(req->wb_page); | 670 | nfs_end_page_writeback(req); |
671 | do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); | ||
657 | nfs_release_request(req); | 672 | nfs_release_request(req); |
658 | } | 673 | } |
659 | out: | 674 | out: |
@@ -661,7 +676,7 @@ out: | |||
661 | } | 676 | } |
662 | 677 | ||
663 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | 678 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) |
664 | static unsigned long | 679 | unsigned long |
665 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | 680 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) |
666 | { | 681 | { |
667 | return cinfo->mds->ncommit; | 682 | return cinfo->mds->ncommit; |
@@ -718,7 +733,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||
718 | } | 733 | } |
719 | 734 | ||
720 | #else | 735 | #else |
721 | static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | 736 | unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) |
722 | { | 737 | { |
723 | return 0; | 738 | return 0; |
724 | } | 739 | } |
@@ -758,6 +773,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
758 | if (req == NULL) | 773 | if (req == NULL) |
759 | goto out_unlock; | 774 | goto out_unlock; |
760 | 775 | ||
776 | /* should be handled by nfs_flush_incompatible */ | ||
777 | WARN_ON_ONCE(req->wb_head != req); | ||
778 | WARN_ON_ONCE(req->wb_this_page != req); | ||
779 | |||
761 | rqend = req->wb_offset + req->wb_bytes; | 780 | rqend = req->wb_offset + req->wb_bytes; |
762 | /* | 781 | /* |
763 | * Tell the caller to flush out the request if | 782 | * Tell the caller to flush out the request if |
@@ -819,7 +838,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, | |||
819 | req = nfs_try_to_update_request(inode, page, offset, bytes); | 838 | req = nfs_try_to_update_request(inode, page, offset, bytes); |
820 | if (req != NULL) | 839 | if (req != NULL) |
821 | goto out; | 840 | goto out; |
822 | req = nfs_create_request(ctx, inode, page, offset, bytes); | 841 | req = nfs_create_request(ctx, page, NULL, offset, bytes); |
823 | if (IS_ERR(req)) | 842 | if (IS_ERR(req)) |
824 | goto out; | 843 | goto out; |
825 | nfs_inode_add_request(inode, req); | 844 | nfs_inode_add_request(inode, req); |
@@ -837,7 +856,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | |||
837 | return PTR_ERR(req); | 856 | return PTR_ERR(req); |
838 | /* Update file length */ | 857 | /* Update file length */ |
839 | nfs_grow_file(page, offset, count); | 858 | nfs_grow_file(page, offset, count); |
840 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); | 859 | nfs_mark_uptodate(req); |
841 | nfs_mark_request_dirty(req); | 860 | nfs_mark_request_dirty(req); |
842 | nfs_unlock_and_release_request(req); | 861 | nfs_unlock_and_release_request(req); |
843 | return 0; | 862 | return 0; |
@@ -863,6 +882,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
863 | return 0; | 882 | return 0; |
864 | l_ctx = req->wb_lock_context; | 883 | l_ctx = req->wb_lock_context; |
865 | do_flush = req->wb_page != page || req->wb_context != ctx; | 884 | do_flush = req->wb_page != page || req->wb_context != ctx; |
885 | /* for now, flush if more than 1 request in page_group */ | ||
886 | do_flush |= req->wb_this_page != req; | ||
866 | if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { | 887 | if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { |
867 | do_flush |= l_ctx->lockowner.l_owner != current->files | 888 | do_flush |= l_ctx->lockowner.l_owner != current->files |
868 | || l_ctx->lockowner.l_pid != current->tgid; | 889 | || l_ctx->lockowner.l_pid != current->tgid; |
@@ -990,126 +1011,17 @@ static int flush_task_priority(int how) | |||
990 | return RPC_PRIORITY_NORMAL; | 1011 | return RPC_PRIORITY_NORMAL; |
991 | } | 1012 | } |
992 | 1013 | ||
993 | int nfs_initiate_write(struct rpc_clnt *clnt, | 1014 | static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, |
994 | struct nfs_write_data *data, | 1015 | struct rpc_task_setup *task_setup_data, int how) |
995 | const struct rpc_call_ops *call_ops, | ||
996 | int how, int flags) | ||
997 | { | 1016 | { |
998 | struct inode *inode = data->header->inode; | 1017 | struct inode *inode = data->header->inode; |
999 | int priority = flush_task_priority(how); | 1018 | int priority = flush_task_priority(how); |
1000 | struct rpc_task *task; | ||
1001 | struct rpc_message msg = { | ||
1002 | .rpc_argp = &data->args, | ||
1003 | .rpc_resp = &data->res, | ||
1004 | .rpc_cred = data->header->cred, | ||
1005 | }; | ||
1006 | struct rpc_task_setup task_setup_data = { | ||
1007 | .rpc_client = clnt, | ||
1008 | .task = &data->task, | ||
1009 | .rpc_message = &msg, | ||
1010 | .callback_ops = call_ops, | ||
1011 | .callback_data = data, | ||
1012 | .workqueue = nfsiod_workqueue, | ||
1013 | .flags = RPC_TASK_ASYNC | flags, | ||
1014 | .priority = priority, | ||
1015 | }; | ||
1016 | int ret = 0; | ||
1017 | |||
1018 | /* Set up the initial task struct. */ | ||
1019 | NFS_PROTO(inode)->write_setup(data, &msg); | ||
1020 | 1019 | ||
1021 | dprintk("NFS: %5u initiated write call " | 1020 | task_setup_data->priority = priority; |
1022 | "(req %s/%llu, %u bytes @ offset %llu)\n", | 1021 | NFS_PROTO(inode)->write_setup(data, msg); |
1023 | data->task.tk_pid, | ||
1024 | inode->i_sb->s_id, | ||
1025 | (unsigned long long)NFS_FILEID(inode), | ||
1026 | data->args.count, | ||
1027 | (unsigned long long)data->args.offset); | ||
1028 | 1022 | ||
1029 | nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, | 1023 | nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, |
1030 | &task_setup_data.rpc_client, &msg, data); | 1024 | &task_setup_data->rpc_client, msg, data); |
1031 | |||
1032 | task = rpc_run_task(&task_setup_data); | ||
1033 | if (IS_ERR(task)) { | ||
1034 | ret = PTR_ERR(task); | ||
1035 | goto out; | ||
1036 | } | ||
1037 | if (how & FLUSH_SYNC) { | ||
1038 | ret = rpc_wait_for_completion_task(task); | ||
1039 | if (ret == 0) | ||
1040 | ret = task->tk_status; | ||
1041 | } | ||
1042 | rpc_put_task(task); | ||
1043 | out: | ||
1044 | return ret; | ||
1045 | } | ||
1046 | EXPORT_SYMBOL_GPL(nfs_initiate_write); | ||
1047 | |||
1048 | /* | ||
1049 | * Set up the argument/result storage required for the RPC call. | ||
1050 | */ | ||
1051 | static void nfs_write_rpcsetup(struct nfs_write_data *data, | ||
1052 | unsigned int count, unsigned int offset, | ||
1053 | int how, struct nfs_commit_info *cinfo) | ||
1054 | { | ||
1055 | struct nfs_page *req = data->header->req; | ||
1056 | |||
1057 | /* Set up the RPC argument and reply structs | ||
1058 | * NB: take care not to mess about with data->commit et al. */ | ||
1059 | |||
1060 | data->args.fh = NFS_FH(data->header->inode); | ||
1061 | data->args.offset = req_offset(req) + offset; | ||
1062 | /* pnfs_set_layoutcommit needs this */ | ||
1063 | data->mds_offset = data->args.offset; | ||
1064 | data->args.pgbase = req->wb_pgbase + offset; | ||
1065 | data->args.pages = data->pages.pagevec; | ||
1066 | data->args.count = count; | ||
1067 | data->args.context = get_nfs_open_context(req->wb_context); | ||
1068 | data->args.lock_context = req->wb_lock_context; | ||
1069 | data->args.stable = NFS_UNSTABLE; | ||
1070 | switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { | ||
1071 | case 0: | ||
1072 | break; | ||
1073 | case FLUSH_COND_STABLE: | ||
1074 | if (nfs_reqs_to_commit(cinfo)) | ||
1075 | break; | ||
1076 | default: | ||
1077 | data->args.stable = NFS_FILE_SYNC; | ||
1078 | } | ||
1079 | |||
1080 | data->res.fattr = &data->fattr; | ||
1081 | data->res.count = count; | ||
1082 | data->res.verf = &data->verf; | ||
1083 | nfs_fattr_init(&data->fattr); | ||
1084 | } | ||
1085 | |||
1086 | static int nfs_do_write(struct nfs_write_data *data, | ||
1087 | const struct rpc_call_ops *call_ops, | ||
1088 | int how) | ||
1089 | { | ||
1090 | struct inode *inode = data->header->inode; | ||
1091 | |||
1092 | return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0); | ||
1093 | } | ||
1094 | |||
1095 | static int nfs_do_multiple_writes(struct list_head *head, | ||
1096 | const struct rpc_call_ops *call_ops, | ||
1097 | int how) | ||
1098 | { | ||
1099 | struct nfs_write_data *data; | ||
1100 | int ret = 0; | ||
1101 | |||
1102 | while (!list_empty(head)) { | ||
1103 | int ret2; | ||
1104 | |||
1105 | data = list_first_entry(head, struct nfs_write_data, list); | ||
1106 | list_del_init(&data->list); | ||
1107 | |||
1108 | ret2 = nfs_do_write(data, call_ops, how); | ||
1109 | if (ret == 0) | ||
1110 | ret = ret2; | ||
1111 | } | ||
1112 | return ret; | ||
1113 | } | 1025 | } |
1114 | 1026 | ||
1115 | /* If a nfs_flush_* function fails, it should remove reqs from @head and | 1027 | /* If a nfs_flush_* function fails, it should remove reqs from @head and |
@@ -1120,7 +1032,7 @@ static void nfs_redirty_request(struct nfs_page *req) | |||
1120 | { | 1032 | { |
1121 | nfs_mark_request_dirty(req); | 1033 | nfs_mark_request_dirty(req); |
1122 | nfs_unlock_request(req); | 1034 | nfs_unlock_request(req); |
1123 | nfs_end_page_writeback(req->wb_page); | 1035 | nfs_end_page_writeback(req); |
1124 | nfs_release_request(req); | 1036 | nfs_release_request(req); |
1125 | } | 1037 | } |
1126 | 1038 | ||
@@ -1140,173 +1052,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { | |||
1140 | .completion = nfs_write_completion, | 1052 | .completion = nfs_write_completion, |
1141 | }; | 1053 | }; |
1142 | 1054 | ||
1143 | static void nfs_flush_error(struct nfs_pageio_descriptor *desc, | ||
1144 | struct nfs_pgio_header *hdr) | ||
1145 | { | ||
1146 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | ||
1147 | while (!list_empty(&hdr->rpc_list)) { | ||
1148 | struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, | ||
1149 | struct nfs_write_data, list); | ||
1150 | list_del(&data->list); | ||
1151 | nfs_writedata_release(data); | ||
1152 | } | ||
1153 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
1154 | } | ||
1155 | |||
1156 | /* | ||
1157 | * Generate multiple small requests to write out a single | ||
1158 | * contiguous dirty area on one page. | ||
1159 | */ | ||
1160 | static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, | ||
1161 | struct nfs_pgio_header *hdr) | ||
1162 | { | ||
1163 | struct nfs_page *req = hdr->req; | ||
1164 | struct page *page = req->wb_page; | ||
1165 | struct nfs_write_data *data; | ||
1166 | size_t wsize = desc->pg_bsize, nbytes; | ||
1167 | unsigned int offset; | ||
1168 | int requests = 0; | ||
1169 | struct nfs_commit_info cinfo; | ||
1170 | |||
1171 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | ||
1172 | |||
1173 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
1174 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || | ||
1175 | desc->pg_count > wsize)) | ||
1176 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
1177 | |||
1178 | |||
1179 | offset = 0; | ||
1180 | nbytes = desc->pg_count; | ||
1181 | do { | ||
1182 | size_t len = min(nbytes, wsize); | ||
1183 | |||
1184 | data = nfs_writedata_alloc(hdr, 1); | ||
1185 | if (!data) { | ||
1186 | nfs_flush_error(desc, hdr); | ||
1187 | return -ENOMEM; | ||
1188 | } | ||
1189 | data->pages.pagevec[0] = page; | ||
1190 | nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); | ||
1191 | list_add(&data->list, &hdr->rpc_list); | ||
1192 | requests++; | ||
1193 | nbytes -= len; | ||
1194 | offset += len; | ||
1195 | } while (nbytes != 0); | ||
1196 | nfs_list_remove_request(req); | ||
1197 | nfs_list_add_request(req, &hdr->pages); | ||
1198 | desc->pg_rpc_callops = &nfs_write_common_ops; | ||
1199 | return 0; | ||
1200 | } | ||
1201 | |||
1202 | /* | ||
1203 | * Create an RPC task for the given write request and kick it. | ||
1204 | * The page must have been locked by the caller. | ||
1205 | * | ||
1206 | * It may happen that the page we're passed is not marked dirty. | ||
1207 | * This is the case if nfs_updatepage detects a conflicting request | ||
1208 | * that has been written but not committed. | ||
1209 | */ | ||
1210 | static int nfs_flush_one(struct nfs_pageio_descriptor *desc, | ||
1211 | struct nfs_pgio_header *hdr) | ||
1212 | { | ||
1213 | struct nfs_page *req; | ||
1214 | struct page **pages; | ||
1215 | struct nfs_write_data *data; | ||
1216 | struct list_head *head = &desc->pg_list; | ||
1217 | struct nfs_commit_info cinfo; | ||
1218 | |||
1219 | data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, | ||
1220 | desc->pg_count)); | ||
1221 | if (!data) { | ||
1222 | nfs_flush_error(desc, hdr); | ||
1223 | return -ENOMEM; | ||
1224 | } | ||
1225 | |||
1226 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | ||
1227 | pages = data->pages.pagevec; | ||
1228 | while (!list_empty(head)) { | ||
1229 | req = nfs_list_entry(head->next); | ||
1230 | nfs_list_remove_request(req); | ||
1231 | nfs_list_add_request(req, &hdr->pages); | ||
1232 | *pages++ = req->wb_page; | ||
1233 | } | ||
1234 | |||
1235 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
1236 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) | ||
1237 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
1238 | |||
1239 | /* Set up the argument struct */ | ||
1240 | nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); | ||
1241 | list_add(&data->list, &hdr->rpc_list); | ||
1242 | desc->pg_rpc_callops = &nfs_write_common_ops; | ||
1243 | return 0; | ||
1244 | } | ||
1245 | |||
1246 | int nfs_generic_flush(struct nfs_pageio_descriptor *desc, | ||
1247 | struct nfs_pgio_header *hdr) | ||
1248 | { | ||
1249 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | ||
1250 | return nfs_flush_multi(desc, hdr); | ||
1251 | return nfs_flush_one(desc, hdr); | ||
1252 | } | ||
1253 | EXPORT_SYMBOL_GPL(nfs_generic_flush); | ||
1254 | |||
1255 | static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | ||
1256 | { | ||
1257 | struct nfs_write_header *whdr; | ||
1258 | struct nfs_pgio_header *hdr; | ||
1259 | int ret; | ||
1260 | |||
1261 | whdr = nfs_writehdr_alloc(); | ||
1262 | if (!whdr) { | ||
1263 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
1264 | return -ENOMEM; | ||
1265 | } | ||
1266 | hdr = &whdr->header; | ||
1267 | nfs_pgheader_init(desc, hdr, nfs_writehdr_free); | ||
1268 | atomic_inc(&hdr->refcnt); | ||
1269 | ret = nfs_generic_flush(desc, hdr); | ||
1270 | if (ret == 0) | ||
1271 | ret = nfs_do_multiple_writes(&hdr->rpc_list, | ||
1272 | desc->pg_rpc_callops, | ||
1273 | desc->pg_ioflags); | ||
1274 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
1275 | hdr->completion_ops->completion(hdr); | ||
1276 | return ret; | ||
1277 | } | ||
1278 | |||
1279 | static const struct nfs_pageio_ops nfs_pageio_write_ops = { | ||
1280 | .pg_test = nfs_generic_pg_test, | ||
1281 | .pg_doio = nfs_generic_pg_writepages, | ||
1282 | }; | ||
1283 | |||
1284 | void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 1055 | void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, |
1285 | struct inode *inode, int ioflags, | 1056 | struct inode *inode, int ioflags, bool force_mds, |
1286 | const struct nfs_pgio_completion_ops *compl_ops) | 1057 | const struct nfs_pgio_completion_ops *compl_ops) |
1287 | { | 1058 | { |
1288 | nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, | 1059 | struct nfs_server *server = NFS_SERVER(inode); |
1289 | NFS_SERVER(inode)->wsize, ioflags); | 1060 | const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; |
1061 | |||
1062 | #ifdef CONFIG_NFS_V4_1 | ||
1063 | if (server->pnfs_curr_ld && !force_mds) | ||
1064 | pg_ops = server->pnfs_curr_ld->pg_write_ops; | ||
1065 | #endif | ||
1066 | nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, | ||
1067 | server->wsize, ioflags); | ||
1290 | } | 1068 | } |
1291 | EXPORT_SYMBOL_GPL(nfs_pageio_init_write); | 1069 | EXPORT_SYMBOL_GPL(nfs_pageio_init_write); |
1292 | 1070 | ||
1293 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) | 1071 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) |
1294 | { | 1072 | { |
1295 | pgio->pg_ops = &nfs_pageio_write_ops; | 1073 | pgio->pg_ops = &nfs_pgio_rw_ops; |
1296 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; | 1074 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; |
1297 | } | 1075 | } |
1298 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | 1076 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); |
1299 | 1077 | ||
1300 | 1078 | ||
1301 | void nfs_write_prepare(struct rpc_task *task, void *calldata) | ||
1302 | { | ||
1303 | struct nfs_write_data *data = calldata; | ||
1304 | int err; | ||
1305 | err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); | ||
1306 | if (err) | ||
1307 | rpc_exit(task, err); | ||
1308 | } | ||
1309 | |||
1310 | void nfs_commit_prepare(struct rpc_task *task, void *calldata) | 1079 | void nfs_commit_prepare(struct rpc_task *task, void *calldata) |
1311 | { | 1080 | { |
1312 | struct nfs_commit_data *data = calldata; | 1081 | struct nfs_commit_data *data = calldata; |
@@ -1314,23 +1083,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) | |||
1314 | NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); | 1083 | NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); |
1315 | } | 1084 | } |
1316 | 1085 | ||
1317 | /* | 1086 | static void nfs_writeback_release_common(struct nfs_pgio_data *data) |
1318 | * Handle a write reply that flushes a whole page. | ||
1319 | * | ||
1320 | * FIXME: There is an inherent race with invalidate_inode_pages and | ||
1321 | * writebacks since the page->count is kept > 1 for as long | ||
1322 | * as the page has a write request pending. | ||
1323 | */ | ||
1324 | static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) | ||
1325 | { | ||
1326 | struct nfs_write_data *data = calldata; | ||
1327 | |||
1328 | nfs_writeback_done(task, data); | ||
1329 | } | ||
1330 | |||
1331 | static void nfs_writeback_release_common(void *calldata) | ||
1332 | { | 1087 | { |
1333 | struct nfs_write_data *data = calldata; | ||
1334 | struct nfs_pgio_header *hdr = data->header; | 1088 | struct nfs_pgio_header *hdr = data->header; |
1335 | int status = data->task.tk_status; | 1089 | int status = data->task.tk_status; |
1336 | 1090 | ||
@@ -1339,34 +1093,46 @@ static void nfs_writeback_release_common(void *calldata) | |||
1339 | if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) | 1093 | if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) |
1340 | ; /* Do nothing */ | 1094 | ; /* Do nothing */ |
1341 | else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) | 1095 | else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) |
1342 | memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); | 1096 | memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); |
1343 | else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) | 1097 | else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) |
1344 | set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); | 1098 | set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); |
1345 | spin_unlock(&hdr->lock); | 1099 | spin_unlock(&hdr->lock); |
1346 | } | 1100 | } |
1347 | nfs_writedata_release(data); | ||
1348 | } | 1101 | } |
1349 | 1102 | ||
1350 | static const struct rpc_call_ops nfs_write_common_ops = { | 1103 | /* |
1351 | .rpc_call_prepare = nfs_write_prepare, | 1104 | * Special version of should_remove_suid() that ignores capabilities. |
1352 | .rpc_call_done = nfs_writeback_done_common, | 1105 | */ |
1353 | .rpc_release = nfs_writeback_release_common, | 1106 | static int nfs_should_remove_suid(const struct inode *inode) |
1354 | }; | 1107 | { |
1108 | umode_t mode = inode->i_mode; | ||
1109 | int kill = 0; | ||
1110 | |||
1111 | /* suid always must be killed */ | ||
1112 | if (unlikely(mode & S_ISUID)) | ||
1113 | kill = ATTR_KILL_SUID; | ||
1355 | 1114 | ||
1115 | /* | ||
1116 | * sgid without any exec bits is just a mandatory locking mark; leave | ||
1117 | * it alone. If some exec bits are set, it's a real sgid; kill it. | ||
1118 | */ | ||
1119 | if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) | ||
1120 | kill |= ATTR_KILL_SGID; | ||
1121 | |||
1122 | if (unlikely(kill && S_ISREG(mode))) | ||
1123 | return kill; | ||
1124 | |||
1125 | return 0; | ||
1126 | } | ||
1356 | 1127 | ||
1357 | /* | 1128 | /* |
1358 | * This function is called when the WRITE call is complete. | 1129 | * This function is called when the WRITE call is complete. |
1359 | */ | 1130 | */ |
1360 | void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | 1131 | static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, |
1132 | struct inode *inode) | ||
1361 | { | 1133 | { |
1362 | struct nfs_writeargs *argp = &data->args; | ||
1363 | struct nfs_writeres *resp = &data->res; | ||
1364 | struct inode *inode = data->header->inode; | ||
1365 | int status; | 1134 | int status; |
1366 | 1135 | ||
1367 | dprintk("NFS: %5u nfs_writeback_done (status %d)\n", | ||
1368 | task->tk_pid, task->tk_status); | ||
1369 | |||
1370 | /* | 1136 | /* |
1371 | * ->write_done will attempt to use post-op attributes to detect | 1137 | * ->write_done will attempt to use post-op attributes to detect |
1372 | * conflicting writes by other clients. A strict interpretation | 1138 | * conflicting writes by other clients. A strict interpretation |
@@ -1376,11 +1142,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1376 | */ | 1142 | */ |
1377 | status = NFS_PROTO(inode)->write_done(task, data); | 1143 | status = NFS_PROTO(inode)->write_done(task, data); |
1378 | if (status != 0) | 1144 | if (status != 0) |
1379 | return; | 1145 | return status; |
1380 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); | 1146 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); |
1381 | 1147 | ||
1382 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | 1148 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) |
1383 | if (resp->verf->committed < argp->stable && task->tk_status >= 0) { | 1149 | if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { |
1384 | /* We tried a write call, but the server did not | 1150 | /* We tried a write call, but the server did not |
1385 | * commit data to stable storage even though we | 1151 | * commit data to stable storage even though we |
1386 | * requested it. | 1152 | * requested it. |
@@ -1396,18 +1162,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1396 | dprintk("NFS: faulty NFS server %s:" | 1162 | dprintk("NFS: faulty NFS server %s:" |
1397 | " (committed = %d) != (stable = %d)\n", | 1163 | " (committed = %d) != (stable = %d)\n", |
1398 | NFS_SERVER(inode)->nfs_client->cl_hostname, | 1164 | NFS_SERVER(inode)->nfs_client->cl_hostname, |
1399 | resp->verf->committed, argp->stable); | 1165 | data->res.verf->committed, data->args.stable); |
1400 | complain = jiffies + 300 * HZ; | 1166 | complain = jiffies + 300 * HZ; |
1401 | } | 1167 | } |
1402 | } | 1168 | } |
1403 | #endif | 1169 | #endif |
1404 | if (task->tk_status < 0) | 1170 | |
1405 | nfs_set_pgio_error(data->header, task->tk_status, argp->offset); | 1171 | /* Deal with the suid/sgid bit corner case */ |
1406 | else if (resp->count < argp->count) { | 1172 | if (nfs_should_remove_suid(inode)) |
1173 | nfs_mark_for_revalidate(inode); | ||
1174 | return 0; | ||
1175 | } | ||
1176 | |||
1177 | /* | ||
1178 | * This function is called when the WRITE call is complete. | ||
1179 | */ | ||
1180 | static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) | ||
1181 | { | ||
1182 | struct nfs_pgio_args *argp = &data->args; | ||
1183 | struct nfs_pgio_res *resp = &data->res; | ||
1184 | |||
1185 | if (resp->count < argp->count) { | ||
1407 | static unsigned long complain; | 1186 | static unsigned long complain; |
1408 | 1187 | ||
1409 | /* This a short write! */ | 1188 | /* This a short write! */ |
1410 | nfs_inc_stats(inode, NFSIOS_SHORTWRITE); | 1189 | nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); |
1411 | 1190 | ||
1412 | /* Has the server at least made some progress? */ | 1191 | /* Has the server at least made some progress? */ |
1413 | if (resp->count == 0) { | 1192 | if (resp->count == 0) { |
@@ -1874,7 +1653,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | |||
1874 | int __init nfs_init_writepagecache(void) | 1653 | int __init nfs_init_writepagecache(void) |
1875 | { | 1654 | { |
1876 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", | 1655 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", |
1877 | sizeof(struct nfs_write_header), | 1656 | sizeof(struct nfs_rw_header), |
1878 | 0, SLAB_HWCACHE_ALIGN, | 1657 | 0, SLAB_HWCACHE_ALIGN, |
1879 | NULL); | 1658 | NULL); |
1880 | if (nfs_wdata_cachep == NULL) | 1659 | if (nfs_wdata_cachep == NULL) |
@@ -1936,3 +1715,12 @@ void nfs_destroy_writepagecache(void) | |||
1936 | kmem_cache_destroy(nfs_wdata_cachep); | 1715 | kmem_cache_destroy(nfs_wdata_cachep); |
1937 | } | 1716 | } |
1938 | 1717 | ||
1718 | static const struct nfs_rw_ops nfs_rw_write_ops = { | ||
1719 | .rw_mode = FMODE_WRITE, | ||
1720 | .rw_alloc_header = nfs_writehdr_alloc, | ||
1721 | .rw_free_header = nfs_writehdr_free, | ||
1722 | .rw_release = nfs_writeback_release_common, | ||
1723 | .rw_done = nfs_writeback_done, | ||
1724 | .rw_result = nfs_writeback_result, | ||
1725 | .rw_initiate = nfs_initiate_write, | ||
1726 | }; | ||
diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 3e794c12e90a..610af5155ef2 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h | |||
@@ -46,6 +46,9 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc | |||
46 | enum nfs3_stable_how { | 46 | enum nfs3_stable_how { |
47 | NFS_UNSTABLE = 0, | 47 | NFS_UNSTABLE = 0, |
48 | NFS_DATA_SYNC = 1, | 48 | NFS_DATA_SYNC = 1, |
49 | NFS_FILE_SYNC = 2 | 49 | NFS_FILE_SYNC = 2, |
50 | |||
51 | /* used by direct.c to mark verf as invalid */ | ||
52 | NFS_INVALID_STABLE_HOW = -1 | ||
50 | }; | 53 | }; |
51 | #endif /* _LINUX_NFS_H */ | 54 | #endif /* _LINUX_NFS_H */ |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fa6918b0f829..919576b8e2cf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -520,7 +520,6 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); | |||
520 | extern int nfs_writepages(struct address_space *, struct writeback_control *); | 520 | extern int nfs_writepages(struct address_space *, struct writeback_control *); |
521 | extern int nfs_flush_incompatible(struct file *file, struct page *page); | 521 | extern int nfs_flush_incompatible(struct file *file, struct page *page); |
522 | extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); | 522 | extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); |
523 | extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); | ||
524 | 523 | ||
525 | /* | 524 | /* |
526 | * Try to write back everything synchronously (but check the | 525 | * Try to write back everything synchronously (but check the |
@@ -553,7 +552,6 @@ nfs_have_writebacks(struct inode *inode) | |||
553 | extern int nfs_readpage(struct file *, struct page *); | 552 | extern int nfs_readpage(struct file *, struct page *); |
554 | extern int nfs_readpages(struct file *, struct address_space *, | 553 | extern int nfs_readpages(struct file *, struct address_space *, |
555 | struct list_head *, unsigned); | 554 | struct list_head *, unsigned); |
556 | extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); | ||
557 | extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, | 555 | extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, |
558 | struct page *); | 556 | struct page *); |
559 | 557 | ||
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 92ce5783b707..7d9096d95d4a 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h | |||
@@ -22,12 +22,17 @@ | |||
22 | * Valid flags for a dirty buffer | 22 | * Valid flags for a dirty buffer |
23 | */ | 23 | */ |
24 | enum { | 24 | enum { |
25 | PG_BUSY = 0, | 25 | PG_BUSY = 0, /* nfs_{un}lock_request */ |
26 | PG_MAPPED, | 26 | PG_MAPPED, /* page private set for buffered io */ |
27 | PG_CLEAN, | 27 | PG_CLEAN, /* write succeeded */ |
28 | PG_NEED_COMMIT, | 28 | PG_COMMIT_TO_DS, /* used by pnfs layouts */ |
29 | PG_NEED_RESCHED, | 29 | PG_INODE_REF, /* extra ref held by inode (head req only) */ |
30 | PG_COMMIT_TO_DS, | 30 | PG_HEADLOCK, /* page group lock of wb_head */ |
31 | PG_TEARDOWN, /* page group sync for destroy */ | ||
32 | PG_UNLOCKPAGE, /* page group sync bit in read path */ | ||
33 | PG_UPTODATE, /* page group sync bit in read path */ | ||
34 | PG_WB_END, /* page group sync bit in write path */ | ||
35 | PG_REMOVE, /* page group sync bit in write path */ | ||
31 | }; | 36 | }; |
32 | 37 | ||
33 | struct nfs_inode; | 38 | struct nfs_inode; |
@@ -43,15 +48,29 @@ struct nfs_page { | |||
43 | struct kref wb_kref; /* reference count */ | 48 | struct kref wb_kref; /* reference count */ |
44 | unsigned long wb_flags; | 49 | unsigned long wb_flags; |
45 | struct nfs_write_verifier wb_verf; /* Commit cookie */ | 50 | struct nfs_write_verifier wb_verf; /* Commit cookie */ |
51 | struct nfs_page *wb_this_page; /* list of reqs for this page */ | ||
52 | struct nfs_page *wb_head; /* head pointer for req list */ | ||
46 | }; | 53 | }; |
47 | 54 | ||
48 | struct nfs_pageio_descriptor; | 55 | struct nfs_pageio_descriptor; |
49 | struct nfs_pageio_ops { | 56 | struct nfs_pageio_ops { |
50 | void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); | 57 | void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); |
51 | bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 58 | size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, |
59 | struct nfs_page *); | ||
52 | int (*pg_doio)(struct nfs_pageio_descriptor *); | 60 | int (*pg_doio)(struct nfs_pageio_descriptor *); |
53 | }; | 61 | }; |
54 | 62 | ||
63 | struct nfs_rw_ops { | ||
64 | const fmode_t rw_mode; | ||
65 | struct nfs_rw_header *(*rw_alloc_header)(void); | ||
66 | void (*rw_free_header)(struct nfs_rw_header *); | ||
67 | void (*rw_release)(struct nfs_pgio_data *); | ||
68 | int (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *); | ||
69 | void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *); | ||
70 | void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *, | ||
71 | struct rpc_task_setup *, int); | ||
72 | }; | ||
73 | |||
55 | struct nfs_pageio_descriptor { | 74 | struct nfs_pageio_descriptor { |
56 | struct list_head pg_list; | 75 | struct list_head pg_list; |
57 | unsigned long pg_bytes_written; | 76 | unsigned long pg_bytes_written; |
@@ -63,6 +82,7 @@ struct nfs_pageio_descriptor { | |||
63 | 82 | ||
64 | struct inode *pg_inode; | 83 | struct inode *pg_inode; |
65 | const struct nfs_pageio_ops *pg_ops; | 84 | const struct nfs_pageio_ops *pg_ops; |
85 | const struct nfs_rw_ops *pg_rw_ops; | ||
66 | int pg_ioflags; | 86 | int pg_ioflags; |
67 | int pg_error; | 87 | int pg_error; |
68 | const struct rpc_call_ops *pg_rpc_callops; | 88 | const struct rpc_call_ops *pg_rpc_callops; |
@@ -75,29 +95,33 @@ struct nfs_pageio_descriptor { | |||
75 | #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) | 95 | #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) |
76 | 96 | ||
77 | extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, | 97 | extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, |
78 | struct inode *inode, | ||
79 | struct page *page, | 98 | struct page *page, |
99 | struct nfs_page *last, | ||
80 | unsigned int offset, | 100 | unsigned int offset, |
81 | unsigned int count); | 101 | unsigned int count); |
82 | extern void nfs_release_request(struct nfs_page *req); | 102 | extern void nfs_release_request(struct nfs_page *); |
83 | 103 | ||
84 | 104 | ||
85 | extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | 105 | extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, |
86 | struct inode *inode, | 106 | struct inode *inode, |
87 | const struct nfs_pageio_ops *pg_ops, | 107 | const struct nfs_pageio_ops *pg_ops, |
88 | const struct nfs_pgio_completion_ops *compl_ops, | 108 | const struct nfs_pgio_completion_ops *compl_ops, |
109 | const struct nfs_rw_ops *rw_ops, | ||
89 | size_t bsize, | 110 | size_t bsize, |
90 | int how); | 111 | int how); |
91 | extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, | 112 | extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, |
92 | struct nfs_page *); | 113 | struct nfs_page *); |
93 | extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); | 114 | extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); |
94 | extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); | 115 | extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); |
95 | extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | 116 | extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, |
96 | struct nfs_page *prev, | 117 | struct nfs_page *prev, |
97 | struct nfs_page *req); | 118 | struct nfs_page *req); |
98 | extern int nfs_wait_on_request(struct nfs_page *); | 119 | extern int nfs_wait_on_request(struct nfs_page *); |
99 | extern void nfs_unlock_request(struct nfs_page *req); | 120 | extern void nfs_unlock_request(struct nfs_page *req); |
100 | extern void nfs_unlock_and_release_request(struct nfs_page *req); | 121 | extern void nfs_unlock_and_release_request(struct nfs_page *); |
122 | extern void nfs_page_group_lock(struct nfs_page *); | ||
123 | extern void nfs_page_group_unlock(struct nfs_page *); | ||
124 | extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); | ||
101 | 125 | ||
102 | /* | 126 | /* |
103 | * Lock the page of an asynchronous request | 127 | * Lock the page of an asynchronous request |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6fb5b2335b59..9a1396e70310 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -489,31 +489,21 @@ struct nfs4_delegreturnres { | |||
489 | }; | 489 | }; |
490 | 490 | ||
491 | /* | 491 | /* |
492 | * Arguments to the read call. | 492 | * Arguments to the write call. |
493 | */ | 493 | */ |
494 | struct nfs_readargs { | 494 | struct nfs_write_verifier { |
495 | struct nfs4_sequence_args seq_args; | 495 | char data[8]; |
496 | struct nfs_fh * fh; | ||
497 | struct nfs_open_context *context; | ||
498 | struct nfs_lock_context *lock_context; | ||
499 | nfs4_stateid stateid; | ||
500 | __u64 offset; | ||
501 | __u32 count; | ||
502 | unsigned int pgbase; | ||
503 | struct page ** pages; | ||
504 | }; | 496 | }; |
505 | 497 | ||
506 | struct nfs_readres { | 498 | struct nfs_writeverf { |
507 | struct nfs4_sequence_res seq_res; | 499 | struct nfs_write_verifier verifier; |
508 | struct nfs_fattr * fattr; | 500 | enum nfs3_stable_how committed; |
509 | __u32 count; | ||
510 | int eof; | ||
511 | }; | 501 | }; |
512 | 502 | ||
513 | /* | 503 | /* |
514 | * Arguments to the write call. | 504 | * Arguments shared by the read and write call. |
515 | */ | 505 | */ |
516 | struct nfs_writeargs { | 506 | struct nfs_pgio_args { |
517 | struct nfs4_sequence_args seq_args; | 507 | struct nfs4_sequence_args seq_args; |
518 | struct nfs_fh * fh; | 508 | struct nfs_fh * fh; |
519 | struct nfs_open_context *context; | 509 | struct nfs_open_context *context; |
@@ -521,27 +511,20 @@ struct nfs_writeargs { | |||
521 | nfs4_stateid stateid; | 511 | nfs4_stateid stateid; |
522 | __u64 offset; | 512 | __u64 offset; |
523 | __u32 count; | 513 | __u32 count; |
524 | enum nfs3_stable_how stable; | ||
525 | unsigned int pgbase; | 514 | unsigned int pgbase; |
526 | struct page ** pages; | 515 | struct page ** pages; |
527 | const u32 * bitmask; | 516 | const u32 * bitmask; /* used by write */ |
528 | }; | 517 | enum nfs3_stable_how stable; /* used by write */ |
529 | |||
530 | struct nfs_write_verifier { | ||
531 | char data[8]; | ||
532 | }; | 518 | }; |
533 | 519 | ||
534 | struct nfs_writeverf { | 520 | struct nfs_pgio_res { |
535 | struct nfs_write_verifier verifier; | ||
536 | enum nfs3_stable_how committed; | ||
537 | }; | ||
538 | |||
539 | struct nfs_writeres { | ||
540 | struct nfs4_sequence_res seq_res; | 521 | struct nfs4_sequence_res seq_res; |
541 | struct nfs_fattr * fattr; | 522 | struct nfs_fattr * fattr; |
542 | struct nfs_writeverf * verf; | ||
543 | __u32 count; | 523 | __u32 count; |
544 | const struct nfs_server *server; | 524 | int eof; /* used by read */ |
525 | struct nfs_writeverf * verf; /* used by write */ | ||
526 | const struct nfs_server *server; /* used by write */ | ||
527 | |||
545 | }; | 528 | }; |
546 | 529 | ||
547 | /* | 530 | /* |
@@ -1129,6 +1112,7 @@ struct pnfs_commit_bucket { | |||
1129 | struct list_head committing; | 1112 | struct list_head committing; |
1130 | struct pnfs_layout_segment *wlseg; | 1113 | struct pnfs_layout_segment *wlseg; |
1131 | struct pnfs_layout_segment *clseg; | 1114 | struct pnfs_layout_segment *clseg; |
1115 | struct nfs_writeverf direct_verf; | ||
1132 | }; | 1116 | }; |
1133 | 1117 | ||
1134 | struct pnfs_ds_commit_info { | 1118 | struct pnfs_ds_commit_info { |
@@ -1264,20 +1248,6 @@ struct nfs_page_array { | |||
1264 | struct page *page_array[NFS_PAGEVEC_SIZE]; | 1248 | struct page *page_array[NFS_PAGEVEC_SIZE]; |
1265 | }; | 1249 | }; |
1266 | 1250 | ||
1267 | struct nfs_read_data { | ||
1268 | struct nfs_pgio_header *header; | ||
1269 | struct list_head list; | ||
1270 | struct rpc_task task; | ||
1271 | struct nfs_fattr fattr; /* fattr storage */ | ||
1272 | struct nfs_readargs args; | ||
1273 | struct nfs_readres res; | ||
1274 | unsigned long timestamp; /* For lease renewal */ | ||
1275 | int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); | ||
1276 | __u64 mds_offset; | ||
1277 | struct nfs_page_array pages; | ||
1278 | struct nfs_client *ds_clp; /* pNFS data server */ | ||
1279 | }; | ||
1280 | |||
1281 | /* used as flag bits in nfs_pgio_header */ | 1251 | /* used as flag bits in nfs_pgio_header */ |
1282 | enum { | 1252 | enum { |
1283 | NFS_IOHDR_ERROR = 0, | 1253 | NFS_IOHDR_ERROR = 0, |
@@ -1287,19 +1257,22 @@ enum { | |||
1287 | NFS_IOHDR_NEED_RESCHED, | 1257 | NFS_IOHDR_NEED_RESCHED, |
1288 | }; | 1258 | }; |
1289 | 1259 | ||
1260 | struct nfs_pgio_data; | ||
1261 | |||
1290 | struct nfs_pgio_header { | 1262 | struct nfs_pgio_header { |
1291 | struct inode *inode; | 1263 | struct inode *inode; |
1292 | struct rpc_cred *cred; | 1264 | struct rpc_cred *cred; |
1293 | struct list_head pages; | 1265 | struct list_head pages; |
1294 | struct list_head rpc_list; | 1266 | struct nfs_pgio_data *data; |
1295 | atomic_t refcnt; | 1267 | atomic_t refcnt; |
1296 | struct nfs_page *req; | 1268 | struct nfs_page *req; |
1297 | struct nfs_writeverf *verf; | 1269 | struct nfs_writeverf verf; /* Used for writes */ |
1298 | struct pnfs_layout_segment *lseg; | 1270 | struct pnfs_layout_segment *lseg; |
1299 | loff_t io_start; | 1271 | loff_t io_start; |
1300 | const struct rpc_call_ops *mds_ops; | 1272 | const struct rpc_call_ops *mds_ops; |
1301 | void (*release) (struct nfs_pgio_header *hdr); | 1273 | void (*release) (struct nfs_pgio_header *hdr); |
1302 | const struct nfs_pgio_completion_ops *completion_ops; | 1274 | const struct nfs_pgio_completion_ops *completion_ops; |
1275 | const struct nfs_rw_ops *rw_ops; | ||
1303 | struct nfs_direct_req *dreq; | 1276 | struct nfs_direct_req *dreq; |
1304 | void *layout_private; | 1277 | void *layout_private; |
1305 | spinlock_t lock; | 1278 | spinlock_t lock; |
@@ -1310,30 +1283,24 @@ struct nfs_pgio_header { | |||
1310 | unsigned long flags; | 1283 | unsigned long flags; |
1311 | }; | 1284 | }; |
1312 | 1285 | ||
1313 | struct nfs_read_header { | 1286 | struct nfs_pgio_data { |
1314 | struct nfs_pgio_header header; | ||
1315 | struct nfs_read_data rpc_data; | ||
1316 | }; | ||
1317 | |||
1318 | struct nfs_write_data { | ||
1319 | struct nfs_pgio_header *header; | 1287 | struct nfs_pgio_header *header; |
1320 | struct list_head list; | ||
1321 | struct rpc_task task; | 1288 | struct rpc_task task; |
1322 | struct nfs_fattr fattr; | 1289 | struct nfs_fattr fattr; |
1323 | struct nfs_writeverf verf; | 1290 | struct nfs_writeverf verf; /* Used for writes */ |
1324 | struct nfs_writeargs args; /* argument struct */ | 1291 | struct nfs_pgio_args args; /* argument struct */ |
1325 | struct nfs_writeres res; /* result struct */ | 1292 | struct nfs_pgio_res res; /* result struct */ |
1326 | unsigned long timestamp; /* For lease renewal */ | 1293 | unsigned long timestamp; /* For lease renewal */ |
1327 | int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); | 1294 | int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data); |
1328 | __u64 mds_offset; /* Filelayout dense stripe */ | 1295 | __u64 mds_offset; /* Filelayout dense stripe */ |
1329 | struct nfs_page_array pages; | 1296 | struct nfs_page_array pages; |
1330 | struct nfs_client *ds_clp; /* pNFS data server */ | 1297 | struct nfs_client *ds_clp; /* pNFS data server */ |
1298 | int ds_idx; /* ds index if ds_clp is set */ | ||
1331 | }; | 1299 | }; |
1332 | 1300 | ||
1333 | struct nfs_write_header { | 1301 | struct nfs_rw_header { |
1334 | struct nfs_pgio_header header; | 1302 | struct nfs_pgio_header header; |
1335 | struct nfs_write_data rpc_data; | 1303 | struct nfs_pgio_data rpc_data; |
1336 | struct nfs_writeverf verf; | ||
1337 | }; | 1304 | }; |
1338 | 1305 | ||
1339 | struct nfs_mds_commit_info { | 1306 | struct nfs_mds_commit_info { |
@@ -1465,16 +1432,11 @@ struct nfs_rpc_ops { | |||
1465 | struct nfs_pathconf *); | 1432 | struct nfs_pathconf *); |
1466 | int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); | 1433 | int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); |
1467 | int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); | 1434 | int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); |
1468 | void (*read_setup) (struct nfs_read_data *, struct rpc_message *); | 1435 | int (*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); |
1469 | void (*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, | 1436 | void (*read_setup) (struct nfs_pgio_data *, struct rpc_message *); |
1470 | const struct nfs_pgio_completion_ops *); | 1437 | int (*read_done) (struct rpc_task *, struct nfs_pgio_data *); |
1471 | int (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); | 1438 | void (*write_setup) (struct nfs_pgio_data *, struct rpc_message *); |
1472 | int (*read_done) (struct rpc_task *, struct nfs_read_data *); | 1439 | int (*write_done) (struct rpc_task *, struct nfs_pgio_data *); |
1473 | void (*write_setup) (struct nfs_write_data *, struct rpc_message *); | ||
1474 | void (*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int, | ||
1475 | const struct nfs_pgio_completion_ops *); | ||
1476 | int (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); | ||
1477 | int (*write_done) (struct rpc_task *, struct nfs_write_data *); | ||
1478 | void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); | 1440 | void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); |
1479 | void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); | 1441 | void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); |
1480 | int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); | 1442 | int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3876f0f1dfd3..fcbfe8783243 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -24,6 +24,12 @@ | |||
24 | #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) | 24 | #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) |
25 | #define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT | 25 | #define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT |
26 | 26 | ||
27 | #define RPC_CWNDSHIFT (8U) | ||
28 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) | ||
29 | #define RPC_INITCWND RPC_CWNDSCALE | ||
30 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) | ||
31 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) | ||
32 | |||
27 | /* | 33 | /* |
28 | * This describes a timeout strategy | 34 | * This describes a timeout strategy |
29 | */ | 35 | */ |
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 27ce26240932..92d5ab99fbf3 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
@@ -218,10 +218,8 @@ static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor) | |||
218 | 218 | ||
219 | spin_lock(®istered_mechs_lock); | 219 | spin_lock(®istered_mechs_lock); |
220 | list_for_each_entry(pos, ®istered_mechs, gm_list) { | 220 | list_for_each_entry(pos, ®istered_mechs, gm_list) { |
221 | if (!mech_supports_pseudoflavor(pos, pseudoflavor)) { | 221 | if (!mech_supports_pseudoflavor(pos, pseudoflavor)) |
222 | module_put(pos->gm_owner); | ||
223 | continue; | 222 | continue; |
224 | } | ||
225 | if (try_module_get(pos->gm_owner)) | 223 | if (try_module_get(pos->gm_owner)) |
226 | gm = pos; | 224 | gm = pos; |
227 | break; | 225 | break; |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 25578afe1548..c0365c14b858 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -832,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work) | |||
832 | * @size: requested byte size | 832 | * @size: requested byte size |
833 | * | 833 | * |
834 | * To prevent rpciod from hanging, this allocator never sleeps, | 834 | * To prevent rpciod from hanging, this allocator never sleeps, |
835 | * returning NULL if the request cannot be serviced immediately. | 835 | * returning NULL and suppressing warning if the request cannot be serviced |
836 | * immediately. | ||
836 | * The caller can arrange to sleep in a way that is safe for rpciod. | 837 | * The caller can arrange to sleep in a way that is safe for rpciod. |
837 | * | 838 | * |
838 | * Most requests are 'small' (under 2KiB) and can be serviced from a | 839 | * Most requests are 'small' (under 2KiB) and can be serviced from a |
@@ -845,7 +846,7 @@ static void rpc_async_schedule(struct work_struct *work) | |||
845 | void *rpc_malloc(struct rpc_task *task, size_t size) | 846 | void *rpc_malloc(struct rpc_task *task, size_t size) |
846 | { | 847 | { |
847 | struct rpc_buffer *buf; | 848 | struct rpc_buffer *buf; |
848 | gfp_t gfp = GFP_NOWAIT; | 849 | gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; |
849 | 850 | ||
850 | if (RPC_IS_SWAPPER(task)) | 851 | if (RPC_IS_SWAPPER(task)) |
851 | gfp |= __GFP_MEMALLOC; | 852 | gfp |= __GFP_MEMALLOC; |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 89d051de6b3e..c3b2b3369e52 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -71,24 +71,6 @@ static void xprt_destroy(struct rpc_xprt *xprt); | |||
71 | static DEFINE_SPINLOCK(xprt_list_lock); | 71 | static DEFINE_SPINLOCK(xprt_list_lock); |
72 | static LIST_HEAD(xprt_list); | 72 | static LIST_HEAD(xprt_list); |
73 | 73 | ||
74 | /* | ||
75 | * The transport code maintains an estimate on the maximum number of out- | ||
76 | * standing RPC requests, using a smoothed version of the congestion | ||
77 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
78 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
79 | * halved; otherwise, it is incremented by 1/cwnd when | ||
80 | * | ||
81 | * - a reply is received and | ||
82 | * - a full number of requests are outstanding and | ||
83 | * - the congestion window hasn't been updated recently. | ||
84 | */ | ||
85 | #define RPC_CWNDSHIFT (8U) | ||
86 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) | ||
87 | #define RPC_INITCWND RPC_CWNDSCALE | ||
88 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) | ||
89 | |||
90 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) | ||
91 | |||
92 | /** | 74 | /** |
93 | * xprt_register_transport - register a transport implementation | 75 | * xprt_register_transport - register a transport implementation |
94 | * @transport: transport to register | 76 | * @transport: transport to register |
@@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); | |||
446 | * @task: recently completed RPC request used to adjust window | 428 | * @task: recently completed RPC request used to adjust window |
447 | * @result: result code of completed RPC request | 429 | * @result: result code of completed RPC request |
448 | * | 430 | * |
449 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. | 431 | * The transport code maintains an estimate on the maximum number of out- |
432 | * standing RPC requests, using a smoothed version of the congestion | ||
433 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
434 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
435 | * halved; otherwise, it is incremented by 1/cwnd when | ||
436 | * | ||
437 | * - a reply is received and | ||
438 | * - a full number of requests are outstanding and | ||
439 | * - the congestion window hasn't been updated recently. | ||
450 | */ | 440 | */ |
451 | void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) | 441 | void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) |
452 | { | 442 | { |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 96ead526b125..693966d3f33b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -78,8 +78,7 @@ static const char transfertypes[][12] = { | |||
78 | * elements. Segments are then coalesced when registered, if possible | 78 | * elements. Segments are then coalesced when registered, if possible |
79 | * within the selected memreg mode. | 79 | * within the selected memreg mode. |
80 | * | 80 | * |
81 | * Note, this routine is never called if the connection's memory | 81 | * Returns positive number of segments converted, or a negative errno. |
82 | * registration strategy is 0 (bounce buffers). | ||
83 | */ | 82 | */ |
84 | 83 | ||
85 | static int | 84 | static int |
@@ -102,10 +101,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
102 | page_base = xdrbuf->page_base & ~PAGE_MASK; | 101 | page_base = xdrbuf->page_base & ~PAGE_MASK; |
103 | p = 0; | 102 | p = 0; |
104 | while (len && n < nsegs) { | 103 | while (len && n < nsegs) { |
104 | if (!ppages[p]) { | ||
105 | /* alloc the pagelist for receiving buffer */ | ||
106 | ppages[p] = alloc_page(GFP_ATOMIC); | ||
107 | if (!ppages[p]) | ||
108 | return -ENOMEM; | ||
109 | } | ||
105 | seg[n].mr_page = ppages[p]; | 110 | seg[n].mr_page = ppages[p]; |
106 | seg[n].mr_offset = (void *)(unsigned long) page_base; | 111 | seg[n].mr_offset = (void *)(unsigned long) page_base; |
107 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); | 112 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); |
108 | BUG_ON(seg[n].mr_len > PAGE_SIZE); | 113 | if (seg[n].mr_len > PAGE_SIZE) |
114 | return -EIO; | ||
109 | len -= seg[n].mr_len; | 115 | len -= seg[n].mr_len; |
110 | ++n; | 116 | ++n; |
111 | ++p; | 117 | ++p; |
@@ -114,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
114 | 120 | ||
115 | /* Message overflows the seg array */ | 121 | /* Message overflows the seg array */ |
116 | if (len && n == nsegs) | 122 | if (len && n == nsegs) |
117 | return 0; | 123 | return -EIO; |
118 | 124 | ||
119 | if (xdrbuf->tail[0].iov_len) { | 125 | if (xdrbuf->tail[0].iov_len) { |
120 | /* the rpcrdma protocol allows us to omit any trailing | 126 | /* the rpcrdma protocol allows us to omit any trailing |
@@ -123,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
123 | return n; | 129 | return n; |
124 | if (n == nsegs) | 130 | if (n == nsegs) |
125 | /* Tail remains, but we're out of segments */ | 131 | /* Tail remains, but we're out of segments */ |
126 | return 0; | 132 | return -EIO; |
127 | seg[n].mr_page = NULL; | 133 | seg[n].mr_page = NULL; |
128 | seg[n].mr_offset = xdrbuf->tail[0].iov_base; | 134 | seg[n].mr_offset = xdrbuf->tail[0].iov_base; |
129 | seg[n].mr_len = xdrbuf->tail[0].iov_len; | 135 | seg[n].mr_len = xdrbuf->tail[0].iov_len; |
@@ -164,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
164 | * Reply chunk (a counted array): | 170 | * Reply chunk (a counted array): |
165 | * N elements: | 171 | * N elements: |
166 | * 1 - N - HLOO - HLOO - ... - HLOO | 172 | * 1 - N - HLOO - HLOO - ... - HLOO |
173 | * | ||
174 | * Returns positive RPC/RDMA header size, or negative errno. | ||
167 | */ | 175 | */ |
168 | 176 | ||
169 | static unsigned int | 177 | static ssize_t |
170 | rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | 178 | rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, |
171 | struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) | 179 | struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) |
172 | { | 180 | { |
173 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 181 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
174 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 182 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
175 | int nsegs, nchunks = 0; | 183 | int n, nsegs, nchunks = 0; |
176 | unsigned int pos; | 184 | unsigned int pos; |
177 | struct rpcrdma_mr_seg *seg = req->rl_segments; | 185 | struct rpcrdma_mr_seg *seg = req->rl_segments; |
178 | struct rpcrdma_read_chunk *cur_rchunk = NULL; | 186 | struct rpcrdma_read_chunk *cur_rchunk = NULL; |
@@ -198,12 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
198 | pos = target->head[0].iov_len; | 206 | pos = target->head[0].iov_len; |
199 | 207 | ||
200 | nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); | 208 | nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); |
201 | if (nsegs == 0) | 209 | if (nsegs < 0) |
202 | return 0; | 210 | return nsegs; |
203 | 211 | ||
204 | do { | 212 | do { |
205 | /* bind/register the memory, then build chunk from result. */ | 213 | n = rpcrdma_register_external(seg, nsegs, |
206 | int n = rpcrdma_register_external(seg, nsegs, | ||
207 | cur_wchunk != NULL, r_xprt); | 214 | cur_wchunk != NULL, r_xprt); |
208 | if (n <= 0) | 215 | if (n <= 0) |
209 | goto out; | 216 | goto out; |
@@ -248,10 +255,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
248 | /* success. all failures return above */ | 255 | /* success. all failures return above */ |
249 | req->rl_nchunks = nchunks; | 256 | req->rl_nchunks = nchunks; |
250 | 257 | ||
251 | BUG_ON(nchunks == 0); | ||
252 | BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) | ||
253 | && (nchunks > 3)); | ||
254 | |||
255 | /* | 258 | /* |
256 | * finish off header. If write, marshal discrim and nchunks. | 259 | * finish off header. If write, marshal discrim and nchunks. |
257 | */ | 260 | */ |
@@ -278,8 +281,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
278 | out: | 281 | out: |
279 | for (pos = 0; nchunks--;) | 282 | for (pos = 0; nchunks--;) |
280 | pos += rpcrdma_deregister_external( | 283 | pos += rpcrdma_deregister_external( |
281 | &req->rl_segments[pos], r_xprt, NULL); | 284 | &req->rl_segments[pos], r_xprt); |
282 | return 0; | 285 | return n; |
283 | } | 286 | } |
284 | 287 | ||
285 | /* | 288 | /* |
@@ -361,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) | |||
361 | * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. | 364 | * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. |
362 | * [2] -- optional padding. | 365 | * [2] -- optional padding. |
363 | * [3] -- if padded, header only in [1] and data here. | 366 | * [3] -- if padded, header only in [1] and data here. |
367 | * | ||
368 | * Returns zero on success, otherwise a negative errno. | ||
364 | */ | 369 | */ |
365 | 370 | ||
366 | int | 371 | int |
@@ -370,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
370 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 375 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
371 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 376 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
372 | char *base; | 377 | char *base; |
373 | size_t hdrlen, rpclen, padlen; | 378 | size_t rpclen, padlen; |
379 | ssize_t hdrlen; | ||
374 | enum rpcrdma_chunktype rtype, wtype; | 380 | enum rpcrdma_chunktype rtype, wtype; |
375 | struct rpcrdma_msg *headerp; | 381 | struct rpcrdma_msg *headerp; |
376 | 382 | ||
@@ -441,14 +447,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
441 | /* The following simplification is not true forever */ | 447 | /* The following simplification is not true forever */ |
442 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) | 448 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) |
443 | wtype = rpcrdma_noch; | 449 | wtype = rpcrdma_noch; |
444 | BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); | 450 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { |
445 | 451 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | |
446 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && | 452 | __func__); |
447 | (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { | 453 | return -EIO; |
448 | /* forced to "pure inline"? */ | ||
449 | dprintk("RPC: %s: too much data (%d/%d) for inline\n", | ||
450 | __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); | ||
451 | return -1; | ||
452 | } | 454 | } |
453 | 455 | ||
454 | hdrlen = 28; /*sizeof *headerp;*/ | 456 | hdrlen = 28; /*sizeof *headerp;*/ |
@@ -474,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
474 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | 476 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; |
475 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | 477 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; |
476 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | 478 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ |
477 | BUG_ON(wtype != rpcrdma_noch); | 479 | if (wtype != rpcrdma_noch) { |
478 | 480 | dprintk("RPC: %s: invalid chunk list\n", | |
481 | __func__); | ||
482 | return -EIO; | ||
483 | } | ||
479 | } else { | 484 | } else { |
480 | headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; | 485 | headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; |
481 | headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; | 486 | headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; |
@@ -492,8 +497,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
492 | * on receive. Therefore, we request a reply chunk | 497 | * on receive. Therefore, we request a reply chunk |
493 | * for non-writes wherever feasible and efficient. | 498 | * for non-writes wherever feasible and efficient. |
494 | */ | 499 | */ |
495 | if (wtype == rpcrdma_noch && | 500 | if (wtype == rpcrdma_noch) |
496 | r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) | ||
497 | wtype = rpcrdma_replych; | 501 | wtype = rpcrdma_replych; |
498 | } | 502 | } |
499 | } | 503 | } |
@@ -511,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
511 | hdrlen = rpcrdma_create_chunks(rqst, | 515 | hdrlen = rpcrdma_create_chunks(rqst, |
512 | &rqst->rq_rcv_buf, headerp, wtype); | 516 | &rqst->rq_rcv_buf, headerp, wtype); |
513 | } | 517 | } |
514 | 518 | if (hdrlen < 0) | |
515 | if (hdrlen == 0) | 519 | return hdrlen; |
516 | return -1; | ||
517 | 520 | ||
518 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" | 521 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
519 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 522 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
@@ -680,15 +683,11 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
680 | rqst->rq_private_buf = rqst->rq_rcv_buf; | 683 | rqst->rq_private_buf = rqst->rq_rcv_buf; |
681 | } | 684 | } |
682 | 685 | ||
683 | /* | ||
684 | * This function is called when an async event is posted to | ||
685 | * the connection which changes the connection state. All it | ||
686 | * does at this point is mark the connection up/down, the rpc | ||
687 | * timers do the rest. | ||
688 | */ | ||
689 | void | 686 | void |
690 | rpcrdma_conn_func(struct rpcrdma_ep *ep) | 687 | rpcrdma_connect_worker(struct work_struct *work) |
691 | { | 688 | { |
689 | struct rpcrdma_ep *ep = | ||
690 | container_of(work, struct rpcrdma_ep, rep_connect_worker.work); | ||
692 | struct rpc_xprt *xprt = ep->rep_xprt; | 691 | struct rpc_xprt *xprt = ep->rep_xprt; |
693 | 692 | ||
694 | spin_lock_bh(&xprt->transport_lock); | 693 | spin_lock_bh(&xprt->transport_lock); |
@@ -705,13 +704,15 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) | |||
705 | } | 704 | } |
706 | 705 | ||
707 | /* | 706 | /* |
708 | * This function is called when memory window unbind which we are waiting | 707 | * This function is called when an async event is posted to |
709 | * for completes. Just use rr_func (zeroed by upcall) to signal completion. | 708 | * the connection which changes the connection state. All it |
709 | * does at this point is mark the connection up/down, the rpc | ||
710 | * timers do the rest. | ||
710 | */ | 711 | */ |
711 | static void | 712 | void |
712 | rpcrdma_unbind_func(struct rpcrdma_rep *rep) | 713 | rpcrdma_conn_func(struct rpcrdma_ep *ep) |
713 | { | 714 | { |
714 | wake_up(&rep->rr_unbind); | 715 | schedule_delayed_work(&ep->rep_connect_worker, 0); |
715 | } | 716 | } |
716 | 717 | ||
717 | /* | 718 | /* |
@@ -728,7 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
728 | struct rpc_xprt *xprt = rep->rr_xprt; | 729 | struct rpc_xprt *xprt = rep->rr_xprt; |
729 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 730 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
730 | __be32 *iptr; | 731 | __be32 *iptr; |
731 | int i, rdmalen, status; | 732 | int rdmalen, status; |
733 | unsigned long cwnd; | ||
732 | 734 | ||
733 | /* Check status. If bad, signal disconnect and return rep to pool */ | 735 | /* Check status. If bad, signal disconnect and return rep to pool */ |
734 | if (rep->rr_len == ~0U) { | 736 | if (rep->rr_len == ~0U) { |
@@ -783,6 +785,7 @@ repost: | |||
783 | 785 | ||
784 | /* from here on, the reply is no longer an orphan */ | 786 | /* from here on, the reply is no longer an orphan */ |
785 | req->rl_reply = rep; | 787 | req->rl_reply = rep; |
788 | xprt->reestablish_timeout = 0; | ||
786 | 789 | ||
787 | /* check for expected message types */ | 790 | /* check for expected message types */ |
788 | /* The order of some of these tests is important. */ | 791 | /* The order of some of these tests is important. */ |
@@ -857,26 +860,10 @@ badheader: | |||
857 | break; | 860 | break; |
858 | } | 861 | } |
859 | 862 | ||
860 | /* If using mw bind, start the deregister process now. */ | 863 | cwnd = xprt->cwnd; |
861 | /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ | 864 | xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; |
862 | if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { | 865 | if (xprt->cwnd > cwnd) |
863 | case RPCRDMA_MEMWINDOWS: | 866 | xprt_release_rqst_cong(rqst->rq_task); |
864 | for (i = 0; req->rl_nchunks-- > 1;) | ||
865 | i += rpcrdma_deregister_external( | ||
866 | &req->rl_segments[i], r_xprt, NULL); | ||
867 | /* Optionally wait (not here) for unbinds to complete */ | ||
868 | rep->rr_func = rpcrdma_unbind_func; | ||
869 | (void) rpcrdma_deregister_external(&req->rl_segments[i], | ||
870 | r_xprt, rep); | ||
871 | break; | ||
872 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
873 | for (i = 0; req->rl_nchunks--;) | ||
874 | i += rpcrdma_deregister_external(&req->rl_segments[i], | ||
875 | r_xprt, NULL); | ||
876 | break; | ||
877 | default: | ||
878 | break; | ||
879 | } | ||
880 | 867 | ||
881 | dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", | 868 | dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", |
882 | __func__, xprt, rqst, status); | 869 | __func__, xprt, rqst, status); |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1eb9c468d0c9..66f91f0d071a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -149,6 +149,11 @@ static struct ctl_table sunrpc_table[] = { | |||
149 | 149 | ||
150 | #endif | 150 | #endif |
151 | 151 | ||
152 | #define RPCRDMA_BIND_TO (60U * HZ) | ||
153 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | ||
154 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | ||
155 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | ||
156 | |||
152 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ |
153 | 158 | ||
154 | static void | 159 | static void |
@@ -229,7 +234,6 @@ static void | |||
229 | xprt_rdma_destroy(struct rpc_xprt *xprt) | 234 | xprt_rdma_destroy(struct rpc_xprt *xprt) |
230 | { | 235 | { |
231 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 236 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
232 | int rc; | ||
233 | 237 | ||
234 | dprintk("RPC: %s: called\n", __func__); | 238 | dprintk("RPC: %s: called\n", __func__); |
235 | 239 | ||
@@ -238,10 +242,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) | |||
238 | xprt_clear_connected(xprt); | 242 | xprt_clear_connected(xprt); |
239 | 243 | ||
240 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); | 244 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); |
241 | rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); | 245 | rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); |
242 | if (rc) | ||
243 | dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", | ||
244 | __func__, rc); | ||
245 | rpcrdma_ia_close(&r_xprt->rx_ia); | 246 | rpcrdma_ia_close(&r_xprt->rx_ia); |
246 | 247 | ||
247 | xprt_rdma_free_addresses(xprt); | 248 | xprt_rdma_free_addresses(xprt); |
@@ -289,9 +290,9 @@ xprt_setup_rdma(struct xprt_create *args) | |||
289 | 290 | ||
290 | /* 60 second timeout, no retries */ | 291 | /* 60 second timeout, no retries */ |
291 | xprt->timeout = &xprt_rdma_default_timeout; | 292 | xprt->timeout = &xprt_rdma_default_timeout; |
292 | xprt->bind_timeout = (60U * HZ); | 293 | xprt->bind_timeout = RPCRDMA_BIND_TO; |
293 | xprt->reestablish_timeout = (5U * HZ); | 294 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
294 | xprt->idle_timeout = (5U * 60 * HZ); | 295 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; |
295 | 296 | ||
296 | xprt->resvport = 0; /* privileged port not needed */ | 297 | xprt->resvport = 0; /* privileged port not needed */ |
297 | xprt->tsh_size = 0; /* RPC-RDMA handles framing */ | 298 | xprt->tsh_size = 0; /* RPC-RDMA handles framing */ |
@@ -391,7 +392,7 @@ out4: | |||
391 | xprt_rdma_free_addresses(xprt); | 392 | xprt_rdma_free_addresses(xprt); |
392 | rc = -EINVAL; | 393 | rc = -EINVAL; |
393 | out3: | 394 | out3: |
394 | (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); | 395 | rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); |
395 | out2: | 396 | out2: |
396 | rpcrdma_ia_close(&new_xprt->rx_ia); | 397 | rpcrdma_ia_close(&new_xprt->rx_ia); |
397 | out1: | 398 | out1: |
@@ -436,10 +437,10 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
436 | schedule_delayed_work(&r_xprt->rdma_connect, | 437 | schedule_delayed_work(&r_xprt->rdma_connect, |
437 | xprt->reestablish_timeout); | 438 | xprt->reestablish_timeout); |
438 | xprt->reestablish_timeout <<= 1; | 439 | xprt->reestablish_timeout <<= 1; |
439 | if (xprt->reestablish_timeout > (30 * HZ)) | 440 | if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) |
440 | xprt->reestablish_timeout = (30 * HZ); | 441 | xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; |
441 | else if (xprt->reestablish_timeout < (5 * HZ)) | 442 | else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) |
442 | xprt->reestablish_timeout = (5 * HZ); | 443 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
443 | } else { | 444 | } else { |
444 | schedule_delayed_work(&r_xprt->rdma_connect, 0); | 445 | schedule_delayed_work(&r_xprt->rdma_connect, 0); |
445 | if (!RPC_IS_ASYNC(task)) | 446 | if (!RPC_IS_ASYNC(task)) |
@@ -447,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
447 | } | 448 | } |
448 | } | 449 | } |
449 | 450 | ||
450 | static int | ||
451 | xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) | ||
452 | { | ||
453 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
454 | int credits = atomic_read(&r_xprt->rx_buf.rb_credits); | ||
455 | |||
456 | /* == RPC_CWNDSCALE @ init, but *after* setup */ | ||
457 | if (r_xprt->rx_buf.rb_cwndscale == 0UL) { | ||
458 | r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; | ||
459 | dprintk("RPC: %s: cwndscale %lu\n", __func__, | ||
460 | r_xprt->rx_buf.rb_cwndscale); | ||
461 | BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); | ||
462 | } | ||
463 | xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; | ||
464 | return xprt_reserve_xprt_cong(xprt, task); | ||
465 | } | ||
466 | |||
467 | /* | 451 | /* |
468 | * The RDMA allocate/free functions need the task structure as a place | 452 | * The RDMA allocate/free functions need the task structure as a place |
469 | * to hide the struct rpcrdma_req, which is necessary for the actual send/recv | 453 | * to hide the struct rpcrdma_req, which is necessary for the actual send/recv |
@@ -479,7 +463,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
479 | struct rpcrdma_req *req, *nreq; | 463 | struct rpcrdma_req *req, *nreq; |
480 | 464 | ||
481 | req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); | 465 | req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); |
482 | BUG_ON(NULL == req); | 466 | if (req == NULL) |
467 | return NULL; | ||
483 | 468 | ||
484 | if (size > req->rl_size) { | 469 | if (size > req->rl_size) { |
485 | dprintk("RPC: %s: size %zd too large for buffer[%zd]: " | 470 | dprintk("RPC: %s: size %zd too large for buffer[%zd]: " |
@@ -503,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
503 | * If the allocation or registration fails, the RPC framework | 488 | * If the allocation or registration fails, the RPC framework |
504 | * will (doggedly) retry. | 489 | * will (doggedly) retry. |
505 | */ | 490 | */ |
506 | if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == | ||
507 | RPCRDMA_BOUNCEBUFFERS) { | ||
508 | /* forced to "pure inline" */ | ||
509 | dprintk("RPC: %s: too much data (%zd) for inline " | ||
510 | "(r/w max %d/%d)\n", __func__, size, | ||
511 | rpcx_to_rdmad(xprt).inline_rsize, | ||
512 | rpcx_to_rdmad(xprt).inline_wsize); | ||
513 | size = req->rl_size; | ||
514 | rpc_exit(task, -EIO); /* fail the operation */ | ||
515 | rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; | ||
516 | goto out; | ||
517 | } | ||
518 | if (task->tk_flags & RPC_TASK_SWAPPER) | 491 | if (task->tk_flags & RPC_TASK_SWAPPER) |
519 | nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); | 492 | nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); |
520 | else | 493 | else |
@@ -543,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
543 | req = nreq; | 516 | req = nreq; |
544 | } | 517 | } |
545 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | 518 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); |
546 | out: | ||
547 | req->rl_connect_cookie = 0; /* our reserved value */ | 519 | req->rl_connect_cookie = 0; /* our reserved value */ |
548 | return req->rl_xdr_buf; | 520 | return req->rl_xdr_buf; |
549 | 521 | ||
@@ -579,9 +551,7 @@ xprt_rdma_free(void *buffer) | |||
579 | __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); | 551 | __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); |
580 | 552 | ||
581 | /* | 553 | /* |
582 | * Finish the deregistration. When using mw bind, this was | 554 | * Finish the deregistration. The process is considered |
583 | * begun in rpcrdma_reply_handler(). In all other modes, we | ||
584 | * do it here, in thread context. The process is considered | ||
585 | * complete when the rr_func vector becomes NULL - this | 555 | * complete when the rr_func vector becomes NULL - this |
586 | * was put in place during rpcrdma_reply_handler() - the wait | 556 | * was put in place during rpcrdma_reply_handler() - the wait |
587 | * call below will not block if the dereg is "done". If | 557 | * call below will not block if the dereg is "done". If |
@@ -590,12 +560,7 @@ xprt_rdma_free(void *buffer) | |||
590 | for (i = 0; req->rl_nchunks;) { | 560 | for (i = 0; req->rl_nchunks;) { |
591 | --req->rl_nchunks; | 561 | --req->rl_nchunks; |
592 | i += rpcrdma_deregister_external( | 562 | i += rpcrdma_deregister_external( |
593 | &req->rl_segments[i], r_xprt, NULL); | 563 | &req->rl_segments[i], r_xprt); |
594 | } | ||
595 | |||
596 | if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { | ||
597 | rep->rr_func = NULL; /* abandon the callback */ | ||
598 | req->rl_reply = NULL; | ||
599 | } | 564 | } |
600 | 565 | ||
601 | if (req->rl_iov.length == 0) { /* see allocate above */ | 566 | if (req->rl_iov.length == 0) { /* see allocate above */ |
@@ -630,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
630 | struct rpc_xprt *xprt = rqst->rq_xprt; | 595 | struct rpc_xprt *xprt = rqst->rq_xprt; |
631 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 596 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
632 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 597 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
598 | int rc; | ||
633 | 599 | ||
634 | /* marshal the send itself */ | 600 | if (req->rl_niovs == 0) { |
635 | if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { | 601 | rc = rpcrdma_marshal_req(rqst); |
636 | r_xprt->rx_stats.failed_marshal_count++; | 602 | if (rc < 0) |
637 | dprintk("RPC: %s: rpcrdma_marshal_req failed\n", | 603 | goto failed_marshal; |
638 | __func__); | ||
639 | return -EIO; | ||
640 | } | 604 | } |
641 | 605 | ||
642 | if (req->rl_reply == NULL) /* e.g. reconnection */ | 606 | if (req->rl_reply == NULL) /* e.g. reconnection */ |
@@ -660,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
660 | rqst->rq_bytes_sent = 0; | 624 | rqst->rq_bytes_sent = 0; |
661 | return 0; | 625 | return 0; |
662 | 626 | ||
627 | failed_marshal: | ||
628 | r_xprt->rx_stats.failed_marshal_count++; | ||
629 | dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", | ||
630 | __func__, rc); | ||
631 | if (rc == -EIO) | ||
632 | return -EIO; | ||
663 | drop_connection: | 633 | drop_connection: |
664 | xprt_disconnect_done(xprt); | 634 | xprt_disconnect_done(xprt); |
665 | return -ENOTCONN; /* implies disconnect */ | 635 | return -ENOTCONN; /* implies disconnect */ |
@@ -705,7 +675,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | |||
705 | */ | 675 | */ |
706 | 676 | ||
707 | static struct rpc_xprt_ops xprt_rdma_procs = { | 677 | static struct rpc_xprt_ops xprt_rdma_procs = { |
708 | .reserve_xprt = xprt_rdma_reserve_xprt, | 678 | .reserve_xprt = xprt_reserve_xprt_cong, |
709 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ | 679 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ |
710 | .alloc_slot = xprt_alloc_slot, | 680 | .alloc_slot = xprt_alloc_slot, |
711 | .release_request = xprt_release_rqst_cong, /* ditto */ | 681 | .release_request = xprt_release_rqst_cong, /* ditto */ |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 93726560eaa8..13dbd1c389ff 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -48,8 +48,8 @@ | |||
48 | */ | 48 | */ |
49 | 49 | ||
50 | #include <linux/interrupt.h> | 50 | #include <linux/interrupt.h> |
51 | #include <linux/pci.h> /* for Tavor hack below */ | ||
52 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | #include <asm/bitops.h> | ||
53 | 53 | ||
54 | #include "xprt_rdma.h" | 54 | #include "xprt_rdma.h" |
55 | 55 | ||
@@ -142,98 +142,139 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) | |||
142 | } | 142 | } |
143 | } | 143 | } |
144 | 144 | ||
145 | static inline | 145 | static void |
146 | void rpcrdma_event_process(struct ib_wc *wc) | 146 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) |
147 | { | 147 | { |
148 | struct rpcrdma_mw *frmr; | 148 | struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
149 | struct rpcrdma_rep *rep = | ||
150 | (struct rpcrdma_rep *)(unsigned long) wc->wr_id; | ||
151 | 149 | ||
152 | dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", | 150 | dprintk("RPC: %s: frmr %p status %X opcode %d\n", |
153 | __func__, rep, wc->status, wc->opcode, wc->byte_len); | 151 | __func__, frmr, wc->status, wc->opcode); |
154 | 152 | ||
155 | if (!rep) /* send or bind completion that we don't care about */ | 153 | if (wc->wr_id == 0ULL) |
156 | return; | 154 | return; |
157 | 155 | if (wc->status != IB_WC_SUCCESS) | |
158 | if (IB_WC_SUCCESS != wc->status) { | ||
159 | dprintk("RPC: %s: WC opcode %d status %X, connection lost\n", | ||
160 | __func__, wc->opcode, wc->status); | ||
161 | rep->rr_len = ~0U; | ||
162 | if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV) | ||
163 | rpcrdma_schedule_tasklet(rep); | ||
164 | return; | 156 | return; |
165 | } | ||
166 | 157 | ||
167 | switch (wc->opcode) { | 158 | if (wc->opcode == IB_WC_FAST_REG_MR) |
168 | case IB_WC_FAST_REG_MR: | ||
169 | frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
170 | frmr->r.frmr.state = FRMR_IS_VALID; | 159 | frmr->r.frmr.state = FRMR_IS_VALID; |
171 | break; | 160 | else if (wc->opcode == IB_WC_LOCAL_INV) |
172 | case IB_WC_LOCAL_INV: | ||
173 | frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
174 | frmr->r.frmr.state = FRMR_IS_INVALID; | 161 | frmr->r.frmr.state = FRMR_IS_INVALID; |
175 | break; | ||
176 | case IB_WC_RECV: | ||
177 | rep->rr_len = wc->byte_len; | ||
178 | ib_dma_sync_single_for_cpu( | ||
179 | rdmab_to_ia(rep->rr_buffer)->ri_id->device, | ||
180 | rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); | ||
181 | /* Keep (only) the most recent credits, after check validity */ | ||
182 | if (rep->rr_len >= 16) { | ||
183 | struct rpcrdma_msg *p = | ||
184 | (struct rpcrdma_msg *) rep->rr_base; | ||
185 | unsigned int credits = ntohl(p->rm_credit); | ||
186 | if (credits == 0) { | ||
187 | dprintk("RPC: %s: server" | ||
188 | " dropped credits to 0!\n", __func__); | ||
189 | /* don't deadlock */ | ||
190 | credits = 1; | ||
191 | } else if (credits > rep->rr_buffer->rb_max_requests) { | ||
192 | dprintk("RPC: %s: server" | ||
193 | " over-crediting: %d (%d)\n", | ||
194 | __func__, credits, | ||
195 | rep->rr_buffer->rb_max_requests); | ||
196 | credits = rep->rr_buffer->rb_max_requests; | ||
197 | } | ||
198 | atomic_set(&rep->rr_buffer->rb_credits, credits); | ||
199 | } | ||
200 | /* fall through */ | ||
201 | case IB_WC_BIND_MW: | ||
202 | rpcrdma_schedule_tasklet(rep); | ||
203 | break; | ||
204 | default: | ||
205 | dprintk("RPC: %s: unexpected WC event %X\n", | ||
206 | __func__, wc->opcode); | ||
207 | break; | ||
208 | } | ||
209 | } | 162 | } |
210 | 163 | ||
211 | static inline int | 164 | static int |
212 | rpcrdma_cq_poll(struct ib_cq *cq) | 165 | rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) |
213 | { | 166 | { |
214 | struct ib_wc wc; | 167 | struct ib_wc *wcs; |
215 | int rc; | 168 | int budget, count, rc; |
216 | 169 | ||
217 | for (;;) { | 170 | budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; |
218 | rc = ib_poll_cq(cq, 1, &wc); | 171 | do { |
219 | if (rc < 0) { | 172 | wcs = ep->rep_send_wcs; |
220 | dprintk("RPC: %s: ib_poll_cq failed %i\n", | 173 | |
221 | __func__, rc); | 174 | rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); |
175 | if (rc <= 0) | ||
222 | return rc; | 176 | return rc; |
223 | } | ||
224 | if (rc == 0) | ||
225 | break; | ||
226 | 177 | ||
227 | rpcrdma_event_process(&wc); | 178 | count = rc; |
179 | while (count-- > 0) | ||
180 | rpcrdma_sendcq_process_wc(wcs++); | ||
181 | } while (rc == RPCRDMA_POLLSIZE && --budget); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * Handle send, fast_reg_mr, and local_inv completions. | ||
187 | * | ||
188 | * Send events are typically suppressed and thus do not result | ||
189 | * in an upcall. Occasionally one is signaled, however. This | ||
190 | * prevents the provider's completion queue from wrapping and | ||
191 | * losing a completion. | ||
192 | */ | ||
193 | static void | ||
194 | rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) | ||
195 | { | ||
196 | struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; | ||
197 | int rc; | ||
198 | |||
199 | rc = rpcrdma_sendcq_poll(cq, ep); | ||
200 | if (rc) { | ||
201 | dprintk("RPC: %s: ib_poll_cq failed: %i\n", | ||
202 | __func__, rc); | ||
203 | return; | ||
228 | } | 204 | } |
229 | 205 | ||
206 | rc = ib_req_notify_cq(cq, | ||
207 | IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); | ||
208 | if (rc == 0) | ||
209 | return; | ||
210 | if (rc < 0) { | ||
211 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
212 | __func__, rc); | ||
213 | return; | ||
214 | } | ||
215 | |||
216 | rpcrdma_sendcq_poll(cq, ep); | ||
217 | } | ||
218 | |||
219 | static void | ||
220 | rpcrdma_recvcq_process_wc(struct ib_wc *wc) | ||
221 | { | ||
222 | struct rpcrdma_rep *rep = | ||
223 | (struct rpcrdma_rep *)(unsigned long)wc->wr_id; | ||
224 | |||
225 | dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", | ||
226 | __func__, rep, wc->status, wc->opcode, wc->byte_len); | ||
227 | |||
228 | if (wc->status != IB_WC_SUCCESS) { | ||
229 | rep->rr_len = ~0U; | ||
230 | goto out_schedule; | ||
231 | } | ||
232 | if (wc->opcode != IB_WC_RECV) | ||
233 | return; | ||
234 | |||
235 | rep->rr_len = wc->byte_len; | ||
236 | ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, | ||
237 | rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); | ||
238 | |||
239 | if (rep->rr_len >= 16) { | ||
240 | struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base; | ||
241 | unsigned int credits = ntohl(p->rm_credit); | ||
242 | |||
243 | if (credits == 0) | ||
244 | credits = 1; /* don't deadlock */ | ||
245 | else if (credits > rep->rr_buffer->rb_max_requests) | ||
246 | credits = rep->rr_buffer->rb_max_requests; | ||
247 | atomic_set(&rep->rr_buffer->rb_credits, credits); | ||
248 | } | ||
249 | |||
250 | out_schedule: | ||
251 | rpcrdma_schedule_tasklet(rep); | ||
252 | } | ||
253 | |||
254 | static int | ||
255 | rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) | ||
256 | { | ||
257 | struct ib_wc *wcs; | ||
258 | int budget, count, rc; | ||
259 | |||
260 | budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; | ||
261 | do { | ||
262 | wcs = ep->rep_recv_wcs; | ||
263 | |||
264 | rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); | ||
265 | if (rc <= 0) | ||
266 | return rc; | ||
267 | |||
268 | count = rc; | ||
269 | while (count-- > 0) | ||
270 | rpcrdma_recvcq_process_wc(wcs++); | ||
271 | } while (rc == RPCRDMA_POLLSIZE && --budget); | ||
230 | return 0; | 272 | return 0; |
231 | } | 273 | } |
232 | 274 | ||
233 | /* | 275 | /* |
234 | * rpcrdma_cq_event_upcall | 276 | * Handle receive completions. |
235 | * | 277 | * |
236 | * This upcall handles recv, send, bind and unbind events. | ||
237 | * It is reentrant but processes single events in order to maintain | 278 | * It is reentrant but processes single events in order to maintain |
238 | * ordering of receives to keep server credits. | 279 | * ordering of receives to keep server credits. |
239 | * | 280 | * |
@@ -242,26 +283,31 @@ rpcrdma_cq_poll(struct ib_cq *cq) | |||
242 | * connection shutdown. That is, the structures required for | 283 | * connection shutdown. That is, the structures required for |
243 | * the completion of the reply handler must remain intact until | 284 | * the completion of the reply handler must remain intact until |
244 | * all memory has been reclaimed. | 285 | * all memory has been reclaimed. |
245 | * | ||
246 | * Note that send events are suppressed and do not result in an upcall. | ||
247 | */ | 286 | */ |
248 | static void | 287 | static void |
249 | rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) | 288 | rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) |
250 | { | 289 | { |
290 | struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; | ||
251 | int rc; | 291 | int rc; |
252 | 292 | ||
253 | rc = rpcrdma_cq_poll(cq); | 293 | rc = rpcrdma_recvcq_poll(cq, ep); |
254 | if (rc) | 294 | if (rc) { |
295 | dprintk("RPC: %s: ib_poll_cq failed: %i\n", | ||
296 | __func__, rc); | ||
255 | return; | 297 | return; |
298 | } | ||
256 | 299 | ||
257 | rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | 300 | rc = ib_req_notify_cq(cq, |
258 | if (rc) { | 301 | IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); |
259 | dprintk("RPC: %s: ib_req_notify_cq failed %i\n", | 302 | if (rc == 0) |
303 | return; | ||
304 | if (rc < 0) { | ||
305 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
260 | __func__, rc); | 306 | __func__, rc); |
261 | return; | 307 | return; |
262 | } | 308 | } |
263 | 309 | ||
264 | rpcrdma_cq_poll(cq); | 310 | rpcrdma_recvcq_poll(cq, ep); |
265 | } | 311 | } |
266 | 312 | ||
267 | #ifdef RPC_DEBUG | 313 | #ifdef RPC_DEBUG |
@@ -493,54 +539,32 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
493 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | 539 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; |
494 | } | 540 | } |
495 | 541 | ||
496 | switch (memreg) { | 542 | if (memreg == RPCRDMA_FRMR) { |
497 | case RPCRDMA_MEMWINDOWS: | ||
498 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
499 | if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { | ||
500 | dprintk("RPC: %s: MEMWINDOWS registration " | ||
501 | "specified but not supported by adapter, " | ||
502 | "using slower RPCRDMA_REGISTER\n", | ||
503 | __func__); | ||
504 | memreg = RPCRDMA_REGISTER; | ||
505 | } | ||
506 | break; | ||
507 | case RPCRDMA_MTHCAFMR: | ||
508 | if (!ia->ri_id->device->alloc_fmr) { | ||
509 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
510 | dprintk("RPC: %s: MTHCAFMR registration " | ||
511 | "specified but not supported by adapter, " | ||
512 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
513 | __func__); | ||
514 | memreg = RPCRDMA_ALLPHYSICAL; | ||
515 | #else | ||
516 | dprintk("RPC: %s: MTHCAFMR registration " | ||
517 | "specified but not supported by adapter, " | ||
518 | "using slower RPCRDMA_REGISTER\n", | ||
519 | __func__); | ||
520 | memreg = RPCRDMA_REGISTER; | ||
521 | #endif | ||
522 | } | ||
523 | break; | ||
524 | case RPCRDMA_FRMR: | ||
525 | /* Requires both frmr reg and local dma lkey */ | 543 | /* Requires both frmr reg and local dma lkey */ |
526 | if ((devattr.device_cap_flags & | 544 | if ((devattr.device_cap_flags & |
527 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | 545 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != |
528 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | 546 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { |
529 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
530 | dprintk("RPC: %s: FRMR registration " | 547 | dprintk("RPC: %s: FRMR registration " |
531 | "specified but not supported by adapter, " | 548 | "not supported by HCA\n", __func__); |
532 | "using riskier RPCRDMA_ALLPHYSICAL\n", | 549 | memreg = RPCRDMA_MTHCAFMR; |
533 | __func__); | 550 | } else { |
551 | /* Mind the ia limit on FRMR page list depth */ | ||
552 | ia->ri_max_frmr_depth = min_t(unsigned int, | ||
553 | RPCRDMA_MAX_DATA_SEGS, | ||
554 | devattr.max_fast_reg_page_list_len); | ||
555 | } | ||
556 | } | ||
557 | if (memreg == RPCRDMA_MTHCAFMR) { | ||
558 | if (!ia->ri_id->device->alloc_fmr) { | ||
559 | dprintk("RPC: %s: MTHCAFMR registration " | ||
560 | "not supported by HCA\n", __func__); | ||
561 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
534 | memreg = RPCRDMA_ALLPHYSICAL; | 562 | memreg = RPCRDMA_ALLPHYSICAL; |
535 | #else | 563 | #else |
536 | dprintk("RPC: %s: FRMR registration " | 564 | rc = -ENOMEM; |
537 | "specified but not supported by adapter, " | 565 | goto out2; |
538 | "using slower RPCRDMA_REGISTER\n", | ||
539 | __func__); | ||
540 | memreg = RPCRDMA_REGISTER; | ||
541 | #endif | 566 | #endif |
542 | } | 567 | } |
543 | break; | ||
544 | } | 568 | } |
545 | 569 | ||
546 | /* | 570 | /* |
@@ -552,8 +576,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
552 | * adapter. | 576 | * adapter. |
553 | */ | 577 | */ |
554 | switch (memreg) { | 578 | switch (memreg) { |
555 | case RPCRDMA_BOUNCEBUFFERS: | ||
556 | case RPCRDMA_REGISTER: | ||
557 | case RPCRDMA_FRMR: | 579 | case RPCRDMA_FRMR: |
558 | break; | 580 | break; |
559 | #if RPCRDMA_PERSISTENT_REGISTRATION | 581 | #if RPCRDMA_PERSISTENT_REGISTRATION |
@@ -563,30 +585,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
563 | IB_ACCESS_REMOTE_READ; | 585 | IB_ACCESS_REMOTE_READ; |
564 | goto register_setup; | 586 | goto register_setup; |
565 | #endif | 587 | #endif |
566 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
567 | case RPCRDMA_MEMWINDOWS: | ||
568 | mem_priv = IB_ACCESS_LOCAL_WRITE | | ||
569 | IB_ACCESS_MW_BIND; | ||
570 | goto register_setup; | ||
571 | case RPCRDMA_MTHCAFMR: | 588 | case RPCRDMA_MTHCAFMR: |
572 | if (ia->ri_have_dma_lkey) | 589 | if (ia->ri_have_dma_lkey) |
573 | break; | 590 | break; |
574 | mem_priv = IB_ACCESS_LOCAL_WRITE; | 591 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
592 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
575 | register_setup: | 593 | register_setup: |
594 | #endif | ||
576 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | 595 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); |
577 | if (IS_ERR(ia->ri_bind_mem)) { | 596 | if (IS_ERR(ia->ri_bind_mem)) { |
578 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | 597 | printk(KERN_ALERT "%s: ib_get_dma_mr for " |
579 | "phys register failed with %lX\n\t" | 598 | "phys register failed with %lX\n", |
580 | "Will continue with degraded performance\n", | ||
581 | __func__, PTR_ERR(ia->ri_bind_mem)); | 599 | __func__, PTR_ERR(ia->ri_bind_mem)); |
582 | memreg = RPCRDMA_REGISTER; | 600 | rc = -ENOMEM; |
583 | ia->ri_bind_mem = NULL; | 601 | goto out2; |
584 | } | 602 | } |
585 | break; | 603 | break; |
586 | default: | 604 | default: |
587 | printk(KERN_ERR "%s: invalid memory registration mode %d\n", | 605 | printk(KERN_ERR "RPC: Unsupported memory " |
588 | __func__, memreg); | 606 | "registration mode: %d\n", memreg); |
589 | rc = -EINVAL; | 607 | rc = -ENOMEM; |
590 | goto out2; | 608 | goto out2; |
591 | } | 609 | } |
592 | dprintk("RPC: %s: memory registration strategy is %d\n", | 610 | dprintk("RPC: %s: memory registration strategy is %d\n", |
@@ -640,6 +658,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
640 | struct rpcrdma_create_data_internal *cdata) | 658 | struct rpcrdma_create_data_internal *cdata) |
641 | { | 659 | { |
642 | struct ib_device_attr devattr; | 660 | struct ib_device_attr devattr; |
661 | struct ib_cq *sendcq, *recvcq; | ||
643 | int rc, err; | 662 | int rc, err; |
644 | 663 | ||
645 | rc = ib_query_device(ia->ri_id->device, &devattr); | 664 | rc = ib_query_device(ia->ri_id->device, &devattr); |
@@ -659,32 +678,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
659 | ep->rep_attr.srq = NULL; | 678 | ep->rep_attr.srq = NULL; |
660 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 679 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
661 | switch (ia->ri_memreg_strategy) { | 680 | switch (ia->ri_memreg_strategy) { |
662 | case RPCRDMA_FRMR: | 681 | case RPCRDMA_FRMR: { |
682 | int depth = 7; | ||
683 | |||
663 | /* Add room for frmr register and invalidate WRs. | 684 | /* Add room for frmr register and invalidate WRs. |
664 | * 1. FRMR reg WR for head | 685 | * 1. FRMR reg WR for head |
665 | * 2. FRMR invalidate WR for head | 686 | * 2. FRMR invalidate WR for head |
666 | * 3. FRMR reg WR for pagelist | 687 | * 3. N FRMR reg WRs for pagelist |
667 | * 4. FRMR invalidate WR for pagelist | 688 | * 4. N FRMR invalidate WRs for pagelist |
668 | * 5. FRMR reg WR for tail | 689 | * 5. FRMR reg WR for tail |
669 | * 6. FRMR invalidate WR for tail | 690 | * 6. FRMR invalidate WR for tail |
670 | * 7. The RDMA_SEND WR | 691 | * 7. The RDMA_SEND WR |
671 | */ | 692 | */ |
672 | ep->rep_attr.cap.max_send_wr *= 7; | 693 | |
694 | /* Calculate N if the device max FRMR depth is smaller than | ||
695 | * RPCRDMA_MAX_DATA_SEGS. | ||
696 | */ | ||
697 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
698 | int delta = RPCRDMA_MAX_DATA_SEGS - | ||
699 | ia->ri_max_frmr_depth; | ||
700 | |||
701 | do { | ||
702 | depth += 2; /* FRMR reg + invalidate */ | ||
703 | delta -= ia->ri_max_frmr_depth; | ||
704 | } while (delta > 0); | ||
705 | |||
706 | } | ||
707 | ep->rep_attr.cap.max_send_wr *= depth; | ||
673 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { | 708 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { |
674 | cdata->max_requests = devattr.max_qp_wr / 7; | 709 | cdata->max_requests = devattr.max_qp_wr / depth; |
675 | if (!cdata->max_requests) | 710 | if (!cdata->max_requests) |
676 | return -EINVAL; | 711 | return -EINVAL; |
677 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; | 712 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * |
713 | depth; | ||
678 | } | 714 | } |
679 | break; | 715 | break; |
680 | case RPCRDMA_MEMWINDOWS_ASYNC: | 716 | } |
681 | case RPCRDMA_MEMWINDOWS: | ||
682 | /* Add room for mw_binds+unbinds - overkill! */ | ||
683 | ep->rep_attr.cap.max_send_wr++; | ||
684 | ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); | ||
685 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
686 | return -EINVAL; | ||
687 | break; | ||
688 | default: | 717 | default: |
689 | break; | 718 | break; |
690 | } | 719 | } |
@@ -705,46 +734,51 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
705 | ep->rep_attr.cap.max_recv_sge); | 734 | ep->rep_attr.cap.max_recv_sge); |
706 | 735 | ||
707 | /* set trigger for requesting send completion */ | 736 | /* set trigger for requesting send completion */ |
708 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; | 737 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; |
709 | switch (ia->ri_memreg_strategy) { | ||
710 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
711 | case RPCRDMA_MEMWINDOWS: | ||
712 | ep->rep_cqinit -= RPCRDMA_MAX_SEGS; | ||
713 | break; | ||
714 | default: | ||
715 | break; | ||
716 | } | ||
717 | if (ep->rep_cqinit <= 2) | 738 | if (ep->rep_cqinit <= 2) |
718 | ep->rep_cqinit = 0; | 739 | ep->rep_cqinit = 0; |
719 | INIT_CQCOUNT(ep); | 740 | INIT_CQCOUNT(ep); |
720 | ep->rep_ia = ia; | 741 | ep->rep_ia = ia; |
721 | init_waitqueue_head(&ep->rep_connect_wait); | 742 | init_waitqueue_head(&ep->rep_connect_wait); |
743 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); | ||
722 | 744 | ||
723 | /* | 745 | sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, |
724 | * Create a single cq for receive dto and mw_bind (only ever | 746 | rpcrdma_cq_async_error_upcall, ep, |
725 | * care about unbind, really). Send completions are suppressed. | ||
726 | * Use single threaded tasklet upcalls to maintain ordering. | ||
727 | */ | ||
728 | ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, | ||
729 | rpcrdma_cq_async_error_upcall, NULL, | ||
730 | ep->rep_attr.cap.max_recv_wr + | ||
731 | ep->rep_attr.cap.max_send_wr + 1, 0); | 747 | ep->rep_attr.cap.max_send_wr + 1, 0); |
732 | if (IS_ERR(ep->rep_cq)) { | 748 | if (IS_ERR(sendcq)) { |
733 | rc = PTR_ERR(ep->rep_cq); | 749 | rc = PTR_ERR(sendcq); |
734 | dprintk("RPC: %s: ib_create_cq failed: %i\n", | 750 | dprintk("RPC: %s: failed to create send CQ: %i\n", |
735 | __func__, rc); | 751 | __func__, rc); |
736 | goto out1; | 752 | goto out1; |
737 | } | 753 | } |
738 | 754 | ||
739 | rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); | 755 | rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); |
756 | if (rc) { | ||
757 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
758 | __func__, rc); | ||
759 | goto out2; | ||
760 | } | ||
761 | |||
762 | recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, | ||
763 | rpcrdma_cq_async_error_upcall, ep, | ||
764 | ep->rep_attr.cap.max_recv_wr + 1, 0); | ||
765 | if (IS_ERR(recvcq)) { | ||
766 | rc = PTR_ERR(recvcq); | ||
767 | dprintk("RPC: %s: failed to create recv CQ: %i\n", | ||
768 | __func__, rc); | ||
769 | goto out2; | ||
770 | } | ||
771 | |||
772 | rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP); | ||
740 | if (rc) { | 773 | if (rc) { |
741 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | 774 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", |
742 | __func__, rc); | 775 | __func__, rc); |
776 | ib_destroy_cq(recvcq); | ||
743 | goto out2; | 777 | goto out2; |
744 | } | 778 | } |
745 | 779 | ||
746 | ep->rep_attr.send_cq = ep->rep_cq; | 780 | ep->rep_attr.send_cq = sendcq; |
747 | ep->rep_attr.recv_cq = ep->rep_cq; | 781 | ep->rep_attr.recv_cq = recvcq; |
748 | 782 | ||
749 | /* Initialize cma parameters */ | 783 | /* Initialize cma parameters */ |
750 | 784 | ||
@@ -754,9 +788,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
754 | 788 | ||
755 | /* Client offers RDMA Read but does not initiate */ | 789 | /* Client offers RDMA Read but does not initiate */ |
756 | ep->rep_remote_cma.initiator_depth = 0; | 790 | ep->rep_remote_cma.initiator_depth = 0; |
757 | if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) | 791 | if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
758 | ep->rep_remote_cma.responder_resources = 0; | ||
759 | else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ | ||
760 | ep->rep_remote_cma.responder_resources = 32; | 792 | ep->rep_remote_cma.responder_resources = 32; |
761 | else | 793 | else |
762 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | 794 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; |
@@ -768,7 +800,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
768 | return 0; | 800 | return 0; |
769 | 801 | ||
770 | out2: | 802 | out2: |
771 | err = ib_destroy_cq(ep->rep_cq); | 803 | err = ib_destroy_cq(sendcq); |
772 | if (err) | 804 | if (err) |
773 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | 805 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", |
774 | __func__, err); | 806 | __func__, err); |
@@ -782,11 +814,8 @@ out1: | |||
782 | * Disconnect and destroy endpoint. After this, the only | 814 | * Disconnect and destroy endpoint. After this, the only |
783 | * valid operations on the ep are to free it (if dynamically | 815 | * valid operations on the ep are to free it (if dynamically |
784 | * allocated) or re-create it. | 816 | * allocated) or re-create it. |
785 | * | ||
786 | * The caller's error handling must be sure to not leak the endpoint | ||
787 | * if this function fails. | ||
788 | */ | 817 | */ |
789 | int | 818 | void |
790 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 819 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
791 | { | 820 | { |
792 | int rc; | 821 | int rc; |
@@ -794,6 +823,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
794 | dprintk("RPC: %s: entering, connected is %d\n", | 823 | dprintk("RPC: %s: entering, connected is %d\n", |
795 | __func__, ep->rep_connected); | 824 | __func__, ep->rep_connected); |
796 | 825 | ||
826 | cancel_delayed_work_sync(&ep->rep_connect_worker); | ||
827 | |||
797 | if (ia->ri_id->qp) { | 828 | if (ia->ri_id->qp) { |
798 | rc = rpcrdma_ep_disconnect(ep, ia); | 829 | rc = rpcrdma_ep_disconnect(ep, ia); |
799 | if (rc) | 830 | if (rc) |
@@ -809,13 +840,17 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
809 | ep->rep_pad_mr = NULL; | 840 | ep->rep_pad_mr = NULL; |
810 | } | 841 | } |
811 | 842 | ||
812 | rpcrdma_clean_cq(ep->rep_cq); | 843 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); |
813 | rc = ib_destroy_cq(ep->rep_cq); | 844 | rc = ib_destroy_cq(ep->rep_attr.recv_cq); |
814 | if (rc) | 845 | if (rc) |
815 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | 846 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", |
816 | __func__, rc); | 847 | __func__, rc); |
817 | 848 | ||
818 | return rc; | 849 | rpcrdma_clean_cq(ep->rep_attr.send_cq); |
850 | rc = ib_destroy_cq(ep->rep_attr.send_cq); | ||
851 | if (rc) | ||
852 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | ||
853 | __func__, rc); | ||
819 | } | 854 | } |
820 | 855 | ||
821 | /* | 856 | /* |
@@ -831,17 +866,20 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
831 | if (ep->rep_connected != 0) { | 866 | if (ep->rep_connected != 0) { |
832 | struct rpcrdma_xprt *xprt; | 867 | struct rpcrdma_xprt *xprt; |
833 | retry: | 868 | retry: |
869 | dprintk("RPC: %s: reconnecting...\n", __func__); | ||
834 | rc = rpcrdma_ep_disconnect(ep, ia); | 870 | rc = rpcrdma_ep_disconnect(ep, ia); |
835 | if (rc && rc != -ENOTCONN) | 871 | if (rc && rc != -ENOTCONN) |
836 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | 872 | dprintk("RPC: %s: rpcrdma_ep_disconnect" |
837 | " status %i\n", __func__, rc); | 873 | " status %i\n", __func__, rc); |
838 | rpcrdma_clean_cq(ep->rep_cq); | 874 | |
875 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); | ||
876 | rpcrdma_clean_cq(ep->rep_attr.send_cq); | ||
839 | 877 | ||
840 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 878 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
841 | id = rpcrdma_create_id(xprt, ia, | 879 | id = rpcrdma_create_id(xprt, ia, |
842 | (struct sockaddr *)&xprt->rx_data.addr); | 880 | (struct sockaddr *)&xprt->rx_data.addr); |
843 | if (IS_ERR(id)) { | 881 | if (IS_ERR(id)) { |
844 | rc = PTR_ERR(id); | 882 | rc = -EHOSTUNREACH; |
845 | goto out; | 883 | goto out; |
846 | } | 884 | } |
847 | /* TEMP TEMP TEMP - fail if new device: | 885 | /* TEMP TEMP TEMP - fail if new device: |
@@ -855,35 +893,32 @@ retry: | |||
855 | printk("RPC: %s: can't reconnect on " | 893 | printk("RPC: %s: can't reconnect on " |
856 | "different device!\n", __func__); | 894 | "different device!\n", __func__); |
857 | rdma_destroy_id(id); | 895 | rdma_destroy_id(id); |
858 | rc = -ENETDOWN; | 896 | rc = -ENETUNREACH; |
859 | goto out; | 897 | goto out; |
860 | } | 898 | } |
861 | /* END TEMP */ | 899 | /* END TEMP */ |
900 | rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); | ||
901 | if (rc) { | ||
902 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
903 | __func__, rc); | ||
904 | rdma_destroy_id(id); | ||
905 | rc = -ENETUNREACH; | ||
906 | goto out; | ||
907 | } | ||
862 | rdma_destroy_qp(ia->ri_id); | 908 | rdma_destroy_qp(ia->ri_id); |
863 | rdma_destroy_id(ia->ri_id); | 909 | rdma_destroy_id(ia->ri_id); |
864 | ia->ri_id = id; | 910 | ia->ri_id = id; |
911 | } else { | ||
912 | dprintk("RPC: %s: connecting...\n", __func__); | ||
913 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | ||
914 | if (rc) { | ||
915 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
916 | __func__, rc); | ||
917 | /* do not update ep->rep_connected */ | ||
918 | return -ENETUNREACH; | ||
919 | } | ||
865 | } | 920 | } |
866 | 921 | ||
867 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | ||
868 | if (rc) { | ||
869 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
870 | __func__, rc); | ||
871 | goto out; | ||
872 | } | ||
873 | |||
874 | /* XXX Tavor device performs badly with 2K MTU! */ | ||
875 | if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | ||
876 | struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); | ||
877 | if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && | ||
878 | (pcid->vendor == PCI_VENDOR_ID_MELLANOX || | ||
879 | pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { | ||
880 | struct ib_qp_attr attr = { | ||
881 | .path_mtu = IB_MTU_1024 | ||
882 | }; | ||
883 | rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); | ||
884 | } | ||
885 | } | ||
886 | |||
887 | ep->rep_connected = 0; | 922 | ep->rep_connected = 0; |
888 | 923 | ||
889 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 924 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
@@ -944,7 +979,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
944 | { | 979 | { |
945 | int rc; | 980 | int rc; |
946 | 981 | ||
947 | rpcrdma_clean_cq(ep->rep_cq); | 982 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); |
983 | rpcrdma_clean_cq(ep->rep_attr.send_cq); | ||
948 | rc = rdma_disconnect(ia->ri_id); | 984 | rc = rdma_disconnect(ia->ri_id); |
949 | if (!rc) { | 985 | if (!rc) { |
950 | /* returns without wait if not connected */ | 986 | /* returns without wait if not connected */ |
@@ -967,7 +1003,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
967 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) | 1003 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) |
968 | { | 1004 | { |
969 | char *p; | 1005 | char *p; |
970 | size_t len; | 1006 | size_t len, rlen, wlen; |
971 | int i, rc; | 1007 | int i, rc; |
972 | struct rpcrdma_mw *r; | 1008 | struct rpcrdma_mw *r; |
973 | 1009 | ||
@@ -997,11 +1033,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
997 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 1033 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
998 | sizeof(struct rpcrdma_mw); | 1034 | sizeof(struct rpcrdma_mw); |
999 | break; | 1035 | break; |
1000 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1001 | case RPCRDMA_MEMWINDOWS: | ||
1002 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | ||
1003 | sizeof(struct rpcrdma_mw); | ||
1004 | break; | ||
1005 | default: | 1036 | default: |
1006 | break; | 1037 | break; |
1007 | } | 1038 | } |
@@ -1032,32 +1063,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
1032 | } | 1063 | } |
1033 | p += cdata->padding; | 1064 | p += cdata->padding; |
1034 | 1065 | ||
1035 | /* | ||
1036 | * Allocate the fmr's, or mw's for mw_bind chunk registration. | ||
1037 | * We "cycle" the mw's in order to minimize rkey reuse, | ||
1038 | * and also reduce unbind-to-bind collision. | ||
1039 | */ | ||
1040 | INIT_LIST_HEAD(&buf->rb_mws); | 1066 | INIT_LIST_HEAD(&buf->rb_mws); |
1041 | r = (struct rpcrdma_mw *)p; | 1067 | r = (struct rpcrdma_mw *)p; |
1042 | switch (ia->ri_memreg_strategy) { | 1068 | switch (ia->ri_memreg_strategy) { |
1043 | case RPCRDMA_FRMR: | 1069 | case RPCRDMA_FRMR: |
1044 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | 1070 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { |
1045 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | 1071 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, |
1046 | RPCRDMA_MAX_SEGS); | 1072 | ia->ri_max_frmr_depth); |
1047 | if (IS_ERR(r->r.frmr.fr_mr)) { | 1073 | if (IS_ERR(r->r.frmr.fr_mr)) { |
1048 | rc = PTR_ERR(r->r.frmr.fr_mr); | 1074 | rc = PTR_ERR(r->r.frmr.fr_mr); |
1049 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | 1075 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" |
1050 | " failed %i\n", __func__, rc); | 1076 | " failed %i\n", __func__, rc); |
1051 | goto out; | 1077 | goto out; |
1052 | } | 1078 | } |
1053 | r->r.frmr.fr_pgl = | 1079 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( |
1054 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | 1080 | ia->ri_id->device, |
1055 | RPCRDMA_MAX_SEGS); | 1081 | ia->ri_max_frmr_depth); |
1056 | if (IS_ERR(r->r.frmr.fr_pgl)) { | 1082 | if (IS_ERR(r->r.frmr.fr_pgl)) { |
1057 | rc = PTR_ERR(r->r.frmr.fr_pgl); | 1083 | rc = PTR_ERR(r->r.frmr.fr_pgl); |
1058 | dprintk("RPC: %s: " | 1084 | dprintk("RPC: %s: " |
1059 | "ib_alloc_fast_reg_page_list " | 1085 | "ib_alloc_fast_reg_page_list " |
1060 | "failed %i\n", __func__, rc); | 1086 | "failed %i\n", __func__, rc); |
1087 | |||
1088 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
1061 | goto out; | 1089 | goto out; |
1062 | } | 1090 | } |
1063 | list_add(&r->mw_list, &buf->rb_mws); | 1091 | list_add(&r->mw_list, &buf->rb_mws); |
@@ -1082,21 +1110,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
1082 | ++r; | 1110 | ++r; |
1083 | } | 1111 | } |
1084 | break; | 1112 | break; |
1085 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1086 | case RPCRDMA_MEMWINDOWS: | ||
1087 | /* Allocate one extra request's worth, for full cycling */ | ||
1088 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | ||
1089 | r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1); | ||
1090 | if (IS_ERR(r->r.mw)) { | ||
1091 | rc = PTR_ERR(r->r.mw); | ||
1092 | dprintk("RPC: %s: ib_alloc_mw" | ||
1093 | " failed %i\n", __func__, rc); | ||
1094 | goto out; | ||
1095 | } | ||
1096 | list_add(&r->mw_list, &buf->rb_mws); | ||
1097 | ++r; | ||
1098 | } | ||
1099 | break; | ||
1100 | default: | 1113 | default: |
1101 | break; | 1114 | break; |
1102 | } | 1115 | } |
@@ -1105,16 +1118,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
1105 | * Allocate/init the request/reply buffers. Doing this | 1118 | * Allocate/init the request/reply buffers. Doing this |
1106 | * using kmalloc for now -- one for each buf. | 1119 | * using kmalloc for now -- one for each buf. |
1107 | */ | 1120 | */ |
1121 | wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req)); | ||
1122 | rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep)); | ||
1123 | dprintk("RPC: %s: wlen = %zu, rlen = %zu\n", | ||
1124 | __func__, wlen, rlen); | ||
1125 | |||
1108 | for (i = 0; i < buf->rb_max_requests; i++) { | 1126 | for (i = 0; i < buf->rb_max_requests; i++) { |
1109 | struct rpcrdma_req *req; | 1127 | struct rpcrdma_req *req; |
1110 | struct rpcrdma_rep *rep; | 1128 | struct rpcrdma_rep *rep; |
1111 | 1129 | ||
1112 | len = cdata->inline_wsize + sizeof(struct rpcrdma_req); | 1130 | req = kmalloc(wlen, GFP_KERNEL); |
1113 | /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ | ||
1114 | /* Typical ~2400b, so rounding up saves work later */ | ||
1115 | if (len < 4096) | ||
1116 | len = 4096; | ||
1117 | req = kmalloc(len, GFP_KERNEL); | ||
1118 | if (req == NULL) { | 1131 | if (req == NULL) { |
1119 | dprintk("RPC: %s: request buffer %d alloc" | 1132 | dprintk("RPC: %s: request buffer %d alloc" |
1120 | " failed\n", __func__, i); | 1133 | " failed\n", __func__, i); |
@@ -1126,16 +1139,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
1126 | buf->rb_send_bufs[i]->rl_buffer = buf; | 1139 | buf->rb_send_bufs[i]->rl_buffer = buf; |
1127 | 1140 | ||
1128 | rc = rpcrdma_register_internal(ia, req->rl_base, | 1141 | rc = rpcrdma_register_internal(ia, req->rl_base, |
1129 | len - offsetof(struct rpcrdma_req, rl_base), | 1142 | wlen - offsetof(struct rpcrdma_req, rl_base), |
1130 | &buf->rb_send_bufs[i]->rl_handle, | 1143 | &buf->rb_send_bufs[i]->rl_handle, |
1131 | &buf->rb_send_bufs[i]->rl_iov); | 1144 | &buf->rb_send_bufs[i]->rl_iov); |
1132 | if (rc) | 1145 | if (rc) |
1133 | goto out; | 1146 | goto out; |
1134 | 1147 | ||
1135 | buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); | 1148 | buf->rb_send_bufs[i]->rl_size = wlen - |
1149 | sizeof(struct rpcrdma_req); | ||
1136 | 1150 | ||
1137 | len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); | 1151 | rep = kmalloc(rlen, GFP_KERNEL); |
1138 | rep = kmalloc(len, GFP_KERNEL); | ||
1139 | if (rep == NULL) { | 1152 | if (rep == NULL) { |
1140 | dprintk("RPC: %s: reply buffer %d alloc failed\n", | 1153 | dprintk("RPC: %s: reply buffer %d alloc failed\n", |
1141 | __func__, i); | 1154 | __func__, i); |
@@ -1145,10 +1158,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
1145 | memset(rep, 0, sizeof(struct rpcrdma_rep)); | 1158 | memset(rep, 0, sizeof(struct rpcrdma_rep)); |
1146 | buf->rb_recv_bufs[i] = rep; | 1159 | buf->rb_recv_bufs[i] = rep; |
1147 | buf->rb_recv_bufs[i]->rr_buffer = buf; | 1160 | buf->rb_recv_bufs[i]->rr_buffer = buf; |
1148 | init_waitqueue_head(&rep->rr_unbind); | ||
1149 | 1161 | ||
1150 | rc = rpcrdma_register_internal(ia, rep->rr_base, | 1162 | rc = rpcrdma_register_internal(ia, rep->rr_base, |
1151 | len - offsetof(struct rpcrdma_rep, rr_base), | 1163 | rlen - offsetof(struct rpcrdma_rep, rr_base), |
1152 | &buf->rb_recv_bufs[i]->rr_handle, | 1164 | &buf->rb_recv_bufs[i]->rr_handle, |
1153 | &buf->rb_recv_bufs[i]->rr_iov); | 1165 | &buf->rb_recv_bufs[i]->rr_iov); |
1154 | if (rc) | 1166 | if (rc) |
@@ -1179,7 +1191,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1179 | 1191 | ||
1180 | /* clean up in reverse order from create | 1192 | /* clean up in reverse order from create |
1181 | * 1. recv mr memory (mr free, then kfree) | 1193 | * 1. recv mr memory (mr free, then kfree) |
1182 | * 1a. bind mw memory | ||
1183 | * 2. send mr memory (mr free, then kfree) | 1194 | * 2. send mr memory (mr free, then kfree) |
1184 | * 3. padding (if any) [moved to rpcrdma_ep_destroy] | 1195 | * 3. padding (if any) [moved to rpcrdma_ep_destroy] |
1185 | * 4. arrays | 1196 | * 4. arrays |
@@ -1194,41 +1205,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1194 | kfree(buf->rb_recv_bufs[i]); | 1205 | kfree(buf->rb_recv_bufs[i]); |
1195 | } | 1206 | } |
1196 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | 1207 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { |
1197 | while (!list_empty(&buf->rb_mws)) { | ||
1198 | r = list_entry(buf->rb_mws.next, | ||
1199 | struct rpcrdma_mw, mw_list); | ||
1200 | list_del(&r->mw_list); | ||
1201 | switch (ia->ri_memreg_strategy) { | ||
1202 | case RPCRDMA_FRMR: | ||
1203 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1204 | if (rc) | ||
1205 | dprintk("RPC: %s:" | ||
1206 | " ib_dereg_mr" | ||
1207 | " failed %i\n", | ||
1208 | __func__, rc); | ||
1209 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1210 | break; | ||
1211 | case RPCRDMA_MTHCAFMR: | ||
1212 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1213 | if (rc) | ||
1214 | dprintk("RPC: %s:" | ||
1215 | " ib_dealloc_fmr" | ||
1216 | " failed %i\n", | ||
1217 | __func__, rc); | ||
1218 | break; | ||
1219 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1220 | case RPCRDMA_MEMWINDOWS: | ||
1221 | rc = ib_dealloc_mw(r->r.mw); | ||
1222 | if (rc) | ||
1223 | dprintk("RPC: %s:" | ||
1224 | " ib_dealloc_mw" | ||
1225 | " failed %i\n", | ||
1226 | __func__, rc); | ||
1227 | break; | ||
1228 | default: | ||
1229 | break; | ||
1230 | } | ||
1231 | } | ||
1232 | rpcrdma_deregister_internal(ia, | 1208 | rpcrdma_deregister_internal(ia, |
1233 | buf->rb_send_bufs[i]->rl_handle, | 1209 | buf->rb_send_bufs[i]->rl_handle, |
1234 | &buf->rb_send_bufs[i]->rl_iov); | 1210 | &buf->rb_send_bufs[i]->rl_iov); |
@@ -1236,6 +1212,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1236 | } | 1212 | } |
1237 | } | 1213 | } |
1238 | 1214 | ||
1215 | while (!list_empty(&buf->rb_mws)) { | ||
1216 | r = list_entry(buf->rb_mws.next, | ||
1217 | struct rpcrdma_mw, mw_list); | ||
1218 | list_del(&r->mw_list); | ||
1219 | switch (ia->ri_memreg_strategy) { | ||
1220 | case RPCRDMA_FRMR: | ||
1221 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1222 | if (rc) | ||
1223 | dprintk("RPC: %s:" | ||
1224 | " ib_dereg_mr" | ||
1225 | " failed %i\n", | ||
1226 | __func__, rc); | ||
1227 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1228 | break; | ||
1229 | case RPCRDMA_MTHCAFMR: | ||
1230 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1231 | if (rc) | ||
1232 | dprintk("RPC: %s:" | ||
1233 | " ib_dealloc_fmr" | ||
1234 | " failed %i\n", | ||
1235 | __func__, rc); | ||
1236 | break; | ||
1237 | default: | ||
1238 | break; | ||
1239 | } | ||
1240 | } | ||
1241 | |||
1239 | kfree(buf->rb_pool); | 1242 | kfree(buf->rb_pool); |
1240 | } | 1243 | } |
1241 | 1244 | ||
@@ -1299,21 +1302,17 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
1299 | int i; | 1302 | int i; |
1300 | unsigned long flags; | 1303 | unsigned long flags; |
1301 | 1304 | ||
1302 | BUG_ON(req->rl_nchunks != 0); | ||
1303 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1305 | spin_lock_irqsave(&buffers->rb_lock, flags); |
1304 | buffers->rb_send_bufs[--buffers->rb_send_index] = req; | 1306 | buffers->rb_send_bufs[--buffers->rb_send_index] = req; |
1305 | req->rl_niovs = 0; | 1307 | req->rl_niovs = 0; |
1306 | if (req->rl_reply) { | 1308 | if (req->rl_reply) { |
1307 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; | 1309 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; |
1308 | init_waitqueue_head(&req->rl_reply->rr_unbind); | ||
1309 | req->rl_reply->rr_func = NULL; | 1310 | req->rl_reply->rr_func = NULL; |
1310 | req->rl_reply = NULL; | 1311 | req->rl_reply = NULL; |
1311 | } | 1312 | } |
1312 | switch (ia->ri_memreg_strategy) { | 1313 | switch (ia->ri_memreg_strategy) { |
1313 | case RPCRDMA_FRMR: | 1314 | case RPCRDMA_FRMR: |
1314 | case RPCRDMA_MTHCAFMR: | 1315 | case RPCRDMA_MTHCAFMR: |
1315 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1316 | case RPCRDMA_MEMWINDOWS: | ||
1317 | /* | 1316 | /* |
1318 | * Cycle mw's back in reverse order, and "spin" them. | 1317 | * Cycle mw's back in reverse order, and "spin" them. |
1319 | * This delays and scrambles reuse as much as possible. | 1318 | * This delays and scrambles reuse as much as possible. |
@@ -1358,8 +1357,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) | |||
1358 | 1357 | ||
1359 | /* | 1358 | /* |
1360 | * Put reply buffers back into pool when not attached to | 1359 | * Put reply buffers back into pool when not attached to |
1361 | * request. This happens in error conditions, and when | 1360 | * request. This happens in error conditions. |
1362 | * aborting unbinds. Pre-decrement counter/array index. | ||
1363 | */ | 1361 | */ |
1364 | void | 1362 | void |
1365 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | 1363 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
@@ -1498,8 +1496,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
1498 | seg1->mr_offset -= pageoff; /* start of page */ | 1496 | seg1->mr_offset -= pageoff; /* start of page */ |
1499 | seg1->mr_len += pageoff; | 1497 | seg1->mr_len += pageoff; |
1500 | len = -pageoff; | 1498 | len = -pageoff; |
1501 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | 1499 | if (*nsegs > ia->ri_max_frmr_depth) |
1502 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | 1500 | *nsegs = ia->ri_max_frmr_depth; |
1503 | for (page_no = i = 0; i < *nsegs;) { | 1501 | for (page_no = i = 0; i < *nsegs;) { |
1504 | rpcrdma_map_one(ia, seg, writing); | 1502 | rpcrdma_map_one(ia, seg, writing); |
1505 | pa = seg->mr_dma; | 1503 | pa = seg->mr_dma; |
@@ -1536,10 +1534,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
1536 | } else | 1534 | } else |
1537 | post_wr = &frmr_wr; | 1535 | post_wr = &frmr_wr; |
1538 | 1536 | ||
1539 | /* Bump the key */ | ||
1540 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
1541 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
1542 | |||
1543 | /* Prepare FRMR WR */ | 1537 | /* Prepare FRMR WR */ |
1544 | memset(&frmr_wr, 0, sizeof frmr_wr); | 1538 | memset(&frmr_wr, 0, sizeof frmr_wr); |
1545 | frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; | 1539 | frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; |
@@ -1550,7 +1544,16 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
1550 | frmr_wr.wr.fast_reg.page_list_len = page_no; | 1544 | frmr_wr.wr.fast_reg.page_list_len = page_no; |
1551 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | 1545 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; |
1552 | frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; | 1546 | frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; |
1553 | BUG_ON(frmr_wr.wr.fast_reg.length < len); | 1547 | if (frmr_wr.wr.fast_reg.length < len) { |
1548 | while (seg1->mr_nsegs--) | ||
1549 | rpcrdma_unmap_one(ia, seg++); | ||
1550 | return -EIO; | ||
1551 | } | ||
1552 | |||
1553 | /* Bump the key */ | ||
1554 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
1555 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
1556 | |||
1554 | frmr_wr.wr.fast_reg.access_flags = (writing ? | 1557 | frmr_wr.wr.fast_reg.access_flags = (writing ? |
1555 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 1558 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
1556 | IB_ACCESS_REMOTE_READ); | 1559 | IB_ACCESS_REMOTE_READ); |
@@ -1661,135 +1664,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | |||
1661 | return rc; | 1664 | return rc; |
1662 | } | 1665 | } |
1663 | 1666 | ||
1664 | static int | ||
1665 | rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1666 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1667 | struct rpcrdma_xprt *r_xprt) | ||
1668 | { | ||
1669 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1670 | IB_ACCESS_REMOTE_READ); | ||
1671 | struct ib_mw_bind param; | ||
1672 | int rc; | ||
1673 | |||
1674 | *nsegs = 1; | ||
1675 | rpcrdma_map_one(ia, seg, writing); | ||
1676 | param.bind_info.mr = ia->ri_bind_mem; | ||
1677 | param.wr_id = 0ULL; /* no send cookie */ | ||
1678 | param.bind_info.addr = seg->mr_dma; | ||
1679 | param.bind_info.length = seg->mr_len; | ||
1680 | param.send_flags = 0; | ||
1681 | param.bind_info.mw_access_flags = mem_priv; | ||
1682 | |||
1683 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1684 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1685 | if (rc) { | ||
1686 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1687 | "%u@0x%llx status %i\n", | ||
1688 | __func__, seg->mr_len, | ||
1689 | (unsigned long long)seg->mr_dma, rc); | ||
1690 | rpcrdma_unmap_one(ia, seg); | ||
1691 | } else { | ||
1692 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1693 | seg->mr_base = param.bind_info.addr; | ||
1694 | seg->mr_nsegs = 1; | ||
1695 | } | ||
1696 | return rc; | ||
1697 | } | ||
1698 | |||
1699 | static int | ||
1700 | rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1701 | struct rpcrdma_ia *ia, | ||
1702 | struct rpcrdma_xprt *r_xprt, void **r) | ||
1703 | { | ||
1704 | struct ib_mw_bind param; | ||
1705 | LIST_HEAD(l); | ||
1706 | int rc; | ||
1707 | |||
1708 | BUG_ON(seg->mr_nsegs != 1); | ||
1709 | param.bind_info.mr = ia->ri_bind_mem; | ||
1710 | param.bind_info.addr = 0ULL; /* unbind */ | ||
1711 | param.bind_info.length = 0; | ||
1712 | param.bind_info.mw_access_flags = 0; | ||
1713 | if (*r) { | ||
1714 | param.wr_id = (u64) (unsigned long) *r; | ||
1715 | param.send_flags = IB_SEND_SIGNALED; | ||
1716 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1717 | } else { | ||
1718 | param.wr_id = 0ULL; | ||
1719 | param.send_flags = 0; | ||
1720 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1721 | } | ||
1722 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1723 | rpcrdma_unmap_one(ia, seg); | ||
1724 | if (rc) | ||
1725 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1726 | " status %i\n", __func__, rc); | ||
1727 | else | ||
1728 | *r = NULL; /* will upcall on completion */ | ||
1729 | return rc; | ||
1730 | } | ||
1731 | |||
1732 | static int | ||
1733 | rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, | ||
1734 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1735 | { | ||
1736 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1737 | IB_ACCESS_REMOTE_READ); | ||
1738 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1739 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1740 | int len, i, rc = 0; | ||
1741 | |||
1742 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1743 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1744 | for (len = 0, i = 0; i < *nsegs;) { | ||
1745 | rpcrdma_map_one(ia, seg, writing); | ||
1746 | ipb[i].addr = seg->mr_dma; | ||
1747 | ipb[i].size = seg->mr_len; | ||
1748 | len += seg->mr_len; | ||
1749 | ++seg; | ||
1750 | ++i; | ||
1751 | /* Check for holes */ | ||
1752 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1753 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1754 | break; | ||
1755 | } | ||
1756 | seg1->mr_base = seg1->mr_dma; | ||
1757 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1758 | ipb, i, mem_priv, &seg1->mr_base); | ||
1759 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1760 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1761 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1762 | "%u@0x%llx (%d)... status %i\n", | ||
1763 | __func__, len, | ||
1764 | (unsigned long long)seg1->mr_dma, i, rc); | ||
1765 | while (i--) | ||
1766 | rpcrdma_unmap_one(ia, --seg); | ||
1767 | } else { | ||
1768 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1769 | seg1->mr_nsegs = i; | ||
1770 | seg1->mr_len = len; | ||
1771 | } | ||
1772 | *nsegs = i; | ||
1773 | return rc; | ||
1774 | } | ||
1775 | |||
1776 | static int | ||
1777 | rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, | ||
1778 | struct rpcrdma_ia *ia) | ||
1779 | { | ||
1780 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1781 | int rc; | ||
1782 | |||
1783 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
1784 | seg1->mr_chunk.rl_mr = NULL; | ||
1785 | while (seg1->mr_nsegs--) | ||
1786 | rpcrdma_unmap_one(ia, seg++); | ||
1787 | if (rc) | ||
1788 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1789 | " status %i\n", __func__, rc); | ||
1790 | return rc; | ||
1791 | } | ||
1792 | |||
1793 | int | 1667 | int |
1794 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | 1668 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, |
1795 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | 1669 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) |
@@ -1819,16 +1693,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
1819 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | 1693 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
1820 | break; | 1694 | break; |
1821 | 1695 | ||
1822 | /* Registration using memory windows */ | ||
1823 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1824 | case RPCRDMA_MEMWINDOWS: | ||
1825 | rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); | ||
1826 | break; | ||
1827 | |||
1828 | /* Default registration each time */ | ||
1829 | default: | 1696 | default: |
1830 | rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); | 1697 | return -1; |
1831 | break; | ||
1832 | } | 1698 | } |
1833 | if (rc) | 1699 | if (rc) |
1834 | return -1; | 1700 | return -1; |
@@ -1838,7 +1704,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
1838 | 1704 | ||
1839 | int | 1705 | int |
1840 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | 1706 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, |
1841 | struct rpcrdma_xprt *r_xprt, void *r) | 1707 | struct rpcrdma_xprt *r_xprt) |
1842 | { | 1708 | { |
1843 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1709 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1844 | int nsegs = seg->mr_nsegs, rc; | 1710 | int nsegs = seg->mr_nsegs, rc; |
@@ -1847,9 +1713,7 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1847 | 1713 | ||
1848 | #if RPCRDMA_PERSISTENT_REGISTRATION | 1714 | #if RPCRDMA_PERSISTENT_REGISTRATION |
1849 | case RPCRDMA_ALLPHYSICAL: | 1715 | case RPCRDMA_ALLPHYSICAL: |
1850 | BUG_ON(nsegs != 1); | ||
1851 | rpcrdma_unmap_one(ia, seg); | 1716 | rpcrdma_unmap_one(ia, seg); |
1852 | rc = 0; | ||
1853 | break; | 1717 | break; |
1854 | #endif | 1718 | #endif |
1855 | 1719 | ||
@@ -1861,21 +1725,9 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1861 | rc = rpcrdma_deregister_fmr_external(seg, ia); | 1725 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
1862 | break; | 1726 | break; |
1863 | 1727 | ||
1864 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
1865 | case RPCRDMA_MEMWINDOWS: | ||
1866 | rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); | ||
1867 | break; | ||
1868 | |||
1869 | default: | 1728 | default: |
1870 | rc = rpcrdma_deregister_default_external(seg, ia); | ||
1871 | break; | 1729 | break; |
1872 | } | 1730 | } |
1873 | if (r) { | ||
1874 | struct rpcrdma_rep *rep = r; | ||
1875 | void (*func)(struct rpcrdma_rep *) = rep->rr_func; | ||
1876 | rep->rr_func = NULL; | ||
1877 | func(rep); /* dereg done, callback now */ | ||
1878 | } | ||
1879 | return nsegs; | 1731 | return nsegs; |
1880 | } | 1732 | } |
1881 | 1733 | ||
@@ -1950,7 +1802,6 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
1950 | ib_dma_sync_single_for_cpu(ia->ri_id->device, | 1802 | ib_dma_sync_single_for_cpu(ia->ri_id->device, |
1951 | rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); | 1803 | rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); |
1952 | 1804 | ||
1953 | DECR_CQCOUNT(ep); | ||
1954 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); | 1805 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); |
1955 | 1806 | ||
1956 | if (rc) | 1807 | if (rc) |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cc1445dc1d1a..89e7cd479705 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | 43 | #include <linux/wait.h> /* wait_queue_head_t, etc */ |
44 | #include <linux/spinlock.h> /* spinlock_t, etc */ | 44 | #include <linux/spinlock.h> /* spinlock_t, etc */ |
45 | #include <linux/atomic.h> /* atomic_t, etc */ | 45 | #include <linux/atomic.h> /* atomic_t, etc */ |
46 | #include <linux/workqueue.h> /* struct work_struct */ | ||
46 | 47 | ||
47 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | 48 | #include <rdma/rdma_cm.h> /* RDMA connection api */ |
48 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | 49 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ |
@@ -66,18 +67,21 @@ struct rpcrdma_ia { | |||
66 | struct completion ri_done; | 67 | struct completion ri_done; |
67 | int ri_async_rc; | 68 | int ri_async_rc; |
68 | enum rpcrdma_memreg ri_memreg_strategy; | 69 | enum rpcrdma_memreg ri_memreg_strategy; |
70 | unsigned int ri_max_frmr_depth; | ||
69 | }; | 71 | }; |
70 | 72 | ||
71 | /* | 73 | /* |
72 | * RDMA Endpoint -- one per transport instance | 74 | * RDMA Endpoint -- one per transport instance |
73 | */ | 75 | */ |
74 | 76 | ||
77 | #define RPCRDMA_WC_BUDGET (128) | ||
78 | #define RPCRDMA_POLLSIZE (16) | ||
79 | |||
75 | struct rpcrdma_ep { | 80 | struct rpcrdma_ep { |
76 | atomic_t rep_cqcount; | 81 | atomic_t rep_cqcount; |
77 | int rep_cqinit; | 82 | int rep_cqinit; |
78 | int rep_connected; | 83 | int rep_connected; |
79 | struct rpcrdma_ia *rep_ia; | 84 | struct rpcrdma_ia *rep_ia; |
80 | struct ib_cq *rep_cq; | ||
81 | struct ib_qp_init_attr rep_attr; | 85 | struct ib_qp_init_attr rep_attr; |
82 | wait_queue_head_t rep_connect_wait; | 86 | wait_queue_head_t rep_connect_wait; |
83 | struct ib_sge rep_pad; /* holds zeroed pad */ | 87 | struct ib_sge rep_pad; /* holds zeroed pad */ |
@@ -86,6 +90,9 @@ struct rpcrdma_ep { | |||
86 | struct rpc_xprt *rep_xprt; /* for rep_func */ | 90 | struct rpc_xprt *rep_xprt; /* for rep_func */ |
87 | struct rdma_conn_param rep_remote_cma; | 91 | struct rdma_conn_param rep_remote_cma; |
88 | struct sockaddr_storage rep_remote_addr; | 92 | struct sockaddr_storage rep_remote_addr; |
93 | struct delayed_work rep_connect_worker; | ||
94 | struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE]; | ||
95 | struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; | ||
89 | }; | 96 | }; |
90 | 97 | ||
91 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 98 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
@@ -124,7 +131,6 @@ struct rpcrdma_rep { | |||
124 | struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ | 131 | struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ |
125 | void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ | 132 | void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ |
126 | struct list_head rr_list; /* tasklet list */ | 133 | struct list_head rr_list; /* tasklet list */ |
127 | wait_queue_head_t rr_unbind; /* optional unbind wait */ | ||
128 | struct ib_sge rr_iov; /* for posting */ | 134 | struct ib_sge rr_iov; /* for posting */ |
129 | struct ib_mr *rr_handle; /* handle for mem in rr_iov */ | 135 | struct ib_mr *rr_handle; /* handle for mem in rr_iov */ |
130 | char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ | 136 | char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ |
@@ -159,7 +165,6 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ | |||
159 | struct ib_mr *rl_mr; /* if registered directly */ | 165 | struct ib_mr *rl_mr; /* if registered directly */ |
160 | struct rpcrdma_mw { /* if registered from region */ | 166 | struct rpcrdma_mw { /* if registered from region */ |
161 | union { | 167 | union { |
162 | struct ib_mw *mw; | ||
163 | struct ib_fmr *fmr; | 168 | struct ib_fmr *fmr; |
164 | struct { | 169 | struct { |
165 | struct ib_fast_reg_page_list *fr_pgl; | 170 | struct ib_fast_reg_page_list *fr_pgl; |
@@ -207,7 +212,6 @@ struct rpcrdma_req { | |||
207 | struct rpcrdma_buffer { | 212 | struct rpcrdma_buffer { |
208 | spinlock_t rb_lock; /* protects indexes */ | 213 | spinlock_t rb_lock; /* protects indexes */ |
209 | atomic_t rb_credits; /* most recent server credits */ | 214 | atomic_t rb_credits; /* most recent server credits */ |
210 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ | ||
211 | int rb_max_requests;/* client max requests */ | 215 | int rb_max_requests;/* client max requests */ |
212 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ | 216 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ |
213 | int rb_send_index; | 217 | int rb_send_index; |
@@ -300,7 +304,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); | |||
300 | */ | 304 | */ |
301 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | 305 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, |
302 | struct rpcrdma_create_data_internal *); | 306 | struct rpcrdma_create_data_internal *); |
303 | int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); | 307 | void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); |
304 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); | 308 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
305 | int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); | 309 | int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
306 | 310 | ||
@@ -330,11 +334,12 @@ int rpcrdma_deregister_internal(struct rpcrdma_ia *, | |||
330 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | 334 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, |
331 | int, int, struct rpcrdma_xprt *); | 335 | int, int, struct rpcrdma_xprt *); |
332 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | 336 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, |
333 | struct rpcrdma_xprt *, void *); | 337 | struct rpcrdma_xprt *); |
334 | 338 | ||
335 | /* | 339 | /* |
336 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 340 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
337 | */ | 341 | */ |
342 | void rpcrdma_connect_worker(struct work_struct *); | ||
338 | void rpcrdma_conn_func(struct rpcrdma_ep *); | 343 | void rpcrdma_conn_func(struct rpcrdma_ep *); |
339 | void rpcrdma_reply_handler(struct rpcrdma_rep *); | 344 | void rpcrdma_reply_handler(struct rpcrdma_rep *); |
340 | 345 | ||