diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-10 18:02:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-10 18:02:42 -0400 |
commit | d1e1cda862c16252087374ac75949b0e89a5717e (patch) | |
tree | 544ce467bed23638949a1991b4f7b00e7472baa4 /fs/nfs/pagelist.c | |
parent | 07888238f55056605cd23aa4ea3ca97d5e15938f (diff) | |
parent | a914722f333b3359d2f4f12919380a334176bb89 (diff) |
Merge tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
- massive cleanup of the NFS read/write code by Anna and Dros
- support multiple NFS read/write requests per page in order to deal
with non-page aligned pNFS striping. Also cleans up the r/wsize <
page size code nicely.
- stable fix for ensuring inode is declared uptodate only after all
the attributes have been checked.
- stable fix for a kernel Oops when remounting
- NFS over RDMA client fixes
- move the pNFS files layout driver into its own subdirectory"
* tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits)
NFS: populate ->net in mount data when remounting
pnfs: fix lockup caused by pnfs_generic_pg_test
NFSv4.1: Fix typo in dprintk
NFSv4.1: Comment is now wrong and redundant to code
NFS: Use raw_write_seqcount_begin/end int nfs4_reclaim_open_state
xprtrdma: Disconnect on registration failure
xprtrdma: Remove BUG_ON() call sites
xprtrdma: Avoid deadlock when credit window is reset
SUNRPC: Move congestion window constants to header file
xprtrdma: Reset connection timeout after successful reconnect
xprtrdma: Use macros for reconnection timeout constants
xprtrdma: Allocate missing pagelist
xprtrdma: Remove Tavor MTU setting
xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting
xprtrdma: Reduce the number of hardway buffer allocations
xprtrdma: Limit work done by completion handler
xprtrmda: Reduce calls to ib_poll_cq() in completion handlers
xprtrmda: Reduce lock contention in completion handlers
xprtrdma: Split the completion queue
xprtrdma: Make rpcrdma_ep_destroy() return void
...
Diffstat (limited to 'fs/nfs/pagelist.c')
-rw-r--r-- | fs/nfs/pagelist.c | 633 |
1 files changed, 583 insertions, 50 deletions
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 03ed984ab4d8..b6ee3a6ee96d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -24,9 +24,14 @@ | |||
24 | #include "internal.h" | 24 | #include "internal.h" |
25 | #include "pnfs.h" | 25 | #include "pnfs.h" |
26 | 26 | ||
27 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | ||
28 | |||
27 | static struct kmem_cache *nfs_page_cachep; | 29 | static struct kmem_cache *nfs_page_cachep; |
30 | static const struct rpc_call_ops nfs_pgio_common_ops; | ||
31 | |||
32 | static void nfs_free_request(struct nfs_page *); | ||
28 | 33 | ||
29 | bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) | 34 | static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) |
30 | { | 35 | { |
31 | p->npages = pagecount; | 36 | p->npages = pagecount; |
32 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 37 | if (pagecount <= ARRAY_SIZE(p->page_array)) |
@@ -133,11 +138,156 @@ nfs_iocounter_wait(struct nfs_io_counter *c) | |||
133 | return __nfs_iocounter_wait(c); | 138 | return __nfs_iocounter_wait(c); |
134 | } | 139 | } |
135 | 140 | ||
141 | static int nfs_wait_bit_uninterruptible(void *word) | ||
142 | { | ||
143 | io_schedule(); | ||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * nfs_page_group_lock - lock the head of the page group | ||
149 | * @req - request in group that is to be locked | ||
150 | * | ||
151 | * this lock must be held if modifying the page group list | ||
152 | */ | ||
153 | void | ||
154 | nfs_page_group_lock(struct nfs_page *req) | ||
155 | { | ||
156 | struct nfs_page *head = req->wb_head; | ||
157 | |||
158 | WARN_ON_ONCE(head != head->wb_head); | ||
159 | |||
160 | wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, | ||
161 | nfs_wait_bit_uninterruptible, | ||
162 | TASK_UNINTERRUPTIBLE); | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * nfs_page_group_unlock - unlock the head of the page group | ||
167 | * @req - request in group that is to be unlocked | ||
168 | */ | ||
169 | void | ||
170 | nfs_page_group_unlock(struct nfs_page *req) | ||
171 | { | ||
172 | struct nfs_page *head = req->wb_head; | ||
173 | |||
174 | WARN_ON_ONCE(head != head->wb_head); | ||
175 | |||
176 | smp_mb__before_atomic(); | ||
177 | clear_bit(PG_HEADLOCK, &head->wb_flags); | ||
178 | smp_mb__after_atomic(); | ||
179 | wake_up_bit(&head->wb_flags, PG_HEADLOCK); | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * nfs_page_group_sync_on_bit_locked | ||
184 | * | ||
185 | * must be called with page group lock held | ||
186 | */ | ||
187 | static bool | ||
188 | nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) | ||
189 | { | ||
190 | struct nfs_page *head = req->wb_head; | ||
191 | struct nfs_page *tmp; | ||
192 | |||
193 | WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags)); | ||
194 | WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags)); | ||
195 | |||
196 | tmp = req->wb_this_page; | ||
197 | while (tmp != req) { | ||
198 | if (!test_bit(bit, &tmp->wb_flags)) | ||
199 | return false; | ||
200 | tmp = tmp->wb_this_page; | ||
201 | } | ||
202 | |||
203 | /* true! reset all bits */ | ||
204 | tmp = req; | ||
205 | do { | ||
206 | clear_bit(bit, &tmp->wb_flags); | ||
207 | tmp = tmp->wb_this_page; | ||
208 | } while (tmp != req); | ||
209 | |||
210 | return true; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * nfs_page_group_sync_on_bit - set bit on current request, but only | ||
215 | * return true if the bit is set for all requests in page group | ||
216 | * @req - request in page group | ||
217 | * @bit - PG_* bit that is used to sync page group | ||
218 | */ | ||
219 | bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) | ||
220 | { | ||
221 | bool ret; | ||
222 | |||
223 | nfs_page_group_lock(req); | ||
224 | ret = nfs_page_group_sync_on_bit_locked(req, bit); | ||
225 | nfs_page_group_unlock(req); | ||
226 | |||
227 | return ret; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * nfs_page_group_init - Initialize the page group linkage for @req | ||
232 | * @req - a new nfs request | ||
233 | * @prev - the previous request in page group, or NULL if @req is the first | ||
234 | * or only request in the group (the head). | ||
235 | */ | ||
236 | static inline void | ||
237 | nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) | ||
238 | { | ||
239 | WARN_ON_ONCE(prev == req); | ||
240 | |||
241 | if (!prev) { | ||
242 | req->wb_head = req; | ||
243 | req->wb_this_page = req; | ||
244 | } else { | ||
245 | WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); | ||
246 | WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); | ||
247 | req->wb_head = prev->wb_head; | ||
248 | req->wb_this_page = prev->wb_this_page; | ||
249 | prev->wb_this_page = req; | ||
250 | |||
251 | /* grab extra ref if head request has extra ref from | ||
252 | * the write/commit path to handle handoff between write | ||
253 | * and commit lists */ | ||
254 | if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) | ||
255 | kref_get(&req->wb_kref); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * nfs_page_group_destroy - sync the destruction of page groups | ||
261 | * @req - request that no longer needs the page group | ||
262 | * | ||
263 | * releases the page group reference from each member once all | ||
264 | * members have called this function. | ||
265 | */ | ||
266 | static void | ||
267 | nfs_page_group_destroy(struct kref *kref) | ||
268 | { | ||
269 | struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); | ||
270 | struct nfs_page *tmp, *next; | ||
271 | |||
272 | if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) | ||
273 | return; | ||
274 | |||
275 | tmp = req; | ||
276 | do { | ||
277 | next = tmp->wb_this_page; | ||
278 | /* unlink and free */ | ||
279 | tmp->wb_this_page = tmp; | ||
280 | tmp->wb_head = tmp; | ||
281 | nfs_free_request(tmp); | ||
282 | tmp = next; | ||
283 | } while (tmp != req); | ||
284 | } | ||
285 | |||
136 | /** | 286 | /** |
137 | * nfs_create_request - Create an NFS read/write request. | 287 | * nfs_create_request - Create an NFS read/write request. |
138 | * @ctx: open context to use | 288 | * @ctx: open context to use |
139 | * @inode: inode to which the request is attached | ||
140 | * @page: page to write | 289 | * @page: page to write |
290 | * @last: last nfs request created for this page group or NULL if head | ||
141 | * @offset: starting offset within the page for the write | 291 | * @offset: starting offset within the page for the write |
142 | * @count: number of bytes to read/write | 292 | * @count: number of bytes to read/write |
143 | * | 293 | * |
@@ -146,9 +296,9 @@ nfs_iocounter_wait(struct nfs_io_counter *c) | |||
146 | * User should ensure it is safe to sleep in this function. | 296 | * User should ensure it is safe to sleep in this function. |
147 | */ | 297 | */ |
148 | struct nfs_page * | 298 | struct nfs_page * |
149 | nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | 299 | nfs_create_request(struct nfs_open_context *ctx, struct page *page, |
150 | struct page *page, | 300 | struct nfs_page *last, unsigned int offset, |
151 | unsigned int offset, unsigned int count) | 301 | unsigned int count) |
152 | { | 302 | { |
153 | struct nfs_page *req; | 303 | struct nfs_page *req; |
154 | struct nfs_lock_context *l_ctx; | 304 | struct nfs_lock_context *l_ctx; |
@@ -180,6 +330,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | |||
180 | req->wb_bytes = count; | 330 | req->wb_bytes = count; |
181 | req->wb_context = get_nfs_open_context(ctx); | 331 | req->wb_context = get_nfs_open_context(ctx); |
182 | kref_init(&req->wb_kref); | 332 | kref_init(&req->wb_kref); |
333 | nfs_page_group_init(req, last); | ||
183 | return req; | 334 | return req; |
184 | } | 335 | } |
185 | 336 | ||
@@ -237,16 +388,22 @@ static void nfs_clear_request(struct nfs_page *req) | |||
237 | } | 388 | } |
238 | } | 389 | } |
239 | 390 | ||
240 | |||
241 | /** | 391 | /** |
242 | * nfs_release_request - Release the count on an NFS read/write request | 392 | * nfs_release_request - Release the count on an NFS read/write request |
243 | * @req: request to release | 393 | * @req: request to release |
244 | * | 394 | * |
245 | * Note: Should never be called with the spinlock held! | 395 | * Note: Should never be called with the spinlock held! |
246 | */ | 396 | */ |
247 | static void nfs_free_request(struct kref *kref) | 397 | static void nfs_free_request(struct nfs_page *req) |
248 | { | 398 | { |
249 | struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); | 399 | WARN_ON_ONCE(req->wb_this_page != req); |
400 | |||
401 | /* extra debug: make sure no sync bits are still set */ | ||
402 | WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); | ||
403 | WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags)); | ||
404 | WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags)); | ||
405 | WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags)); | ||
406 | WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags)); | ||
250 | 407 | ||
251 | /* Release struct file and open context */ | 408 | /* Release struct file and open context */ |
252 | nfs_clear_request(req); | 409 | nfs_clear_request(req); |
@@ -255,13 +412,7 @@ static void nfs_free_request(struct kref *kref) | |||
255 | 412 | ||
256 | void nfs_release_request(struct nfs_page *req) | 413 | void nfs_release_request(struct nfs_page *req) |
257 | { | 414 | { |
258 | kref_put(&req->wb_kref, nfs_free_request); | 415 | kref_put(&req->wb_kref, nfs_page_group_destroy); |
259 | } | ||
260 | |||
261 | static int nfs_wait_bit_uninterruptible(void *word) | ||
262 | { | ||
263 | io_schedule(); | ||
264 | return 0; | ||
265 | } | 416 | } |
266 | 417 | ||
267 | /** | 418 | /** |
@@ -279,22 +430,249 @@ nfs_wait_on_request(struct nfs_page *req) | |||
279 | TASK_UNINTERRUPTIBLE); | 430 | TASK_UNINTERRUPTIBLE); |
280 | } | 431 | } |
281 | 432 | ||
282 | bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) | 433 | /* |
434 | * nfs_generic_pg_test - determine if requests can be coalesced | ||
435 | * @desc: pointer to descriptor | ||
436 | * @prev: previous request in desc, or NULL | ||
437 | * @req: this request | ||
438 | * | ||
439 | * Returns zero if @req can be coalesced into @desc, otherwise it returns | ||
440 | * the size of the request. | ||
441 | */ | ||
442 | size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | ||
443 | struct nfs_page *prev, struct nfs_page *req) | ||
283 | { | 444 | { |
284 | /* | 445 | if (desc->pg_count > desc->pg_bsize) { |
285 | * FIXME: ideally we should be able to coalesce all requests | 446 | /* should never happen */ |
286 | * that are not block boundary aligned, but currently this | 447 | WARN_ON_ONCE(1); |
287 | * is problematic for the case of bsize < PAGE_CACHE_SIZE, | ||
288 | * since nfs_flush_multi and nfs_pagein_multi assume you | ||
289 | * can have only one struct nfs_page. | ||
290 | */ | ||
291 | if (desc->pg_bsize < PAGE_SIZE) | ||
292 | return 0; | 448 | return 0; |
449 | } | ||
293 | 450 | ||
294 | return desc->pg_count + req->wb_bytes <= desc->pg_bsize; | 451 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); |
295 | } | 452 | } |
296 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | 453 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); |
297 | 454 | ||
455 | static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) | ||
456 | { | ||
457 | return container_of(hdr, struct nfs_rw_header, header); | ||
458 | } | ||
459 | |||
460 | /** | ||
461 | * nfs_rw_header_alloc - Allocate a header for a read or write | ||
462 | * @ops: Read or write function vector | ||
463 | */ | ||
464 | struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) | ||
465 | { | ||
466 | struct nfs_rw_header *header = ops->rw_alloc_header(); | ||
467 | |||
468 | if (header) { | ||
469 | struct nfs_pgio_header *hdr = &header->header; | ||
470 | |||
471 | INIT_LIST_HEAD(&hdr->pages); | ||
472 | spin_lock_init(&hdr->lock); | ||
473 | atomic_set(&hdr->refcnt, 0); | ||
474 | hdr->rw_ops = ops; | ||
475 | } | ||
476 | return header; | ||
477 | } | ||
478 | EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); | ||
479 | |||
480 | /* | ||
481 | * nfs_rw_header_free - Free a read or write header | ||
482 | * @hdr: The header to free | ||
483 | */ | ||
484 | void nfs_rw_header_free(struct nfs_pgio_header *hdr) | ||
485 | { | ||
486 | hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); | ||
487 | } | ||
488 | EXPORT_SYMBOL_GPL(nfs_rw_header_free); | ||
489 | |||
490 | /** | ||
491 | * nfs_pgio_data_alloc - Allocate pageio data | ||
492 | * @hdr: The header making a request | ||
493 | * @pagecount: Number of pages to create | ||
494 | */ | ||
495 | static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, | ||
496 | unsigned int pagecount) | ||
497 | { | ||
498 | struct nfs_pgio_data *data, *prealloc; | ||
499 | |||
500 | prealloc = &NFS_RW_HEADER(hdr)->rpc_data; | ||
501 | if (prealloc->header == NULL) | ||
502 | data = prealloc; | ||
503 | else | ||
504 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
505 | if (!data) | ||
506 | goto out; | ||
507 | |||
508 | if (nfs_pgarray_set(&data->pages, pagecount)) { | ||
509 | data->header = hdr; | ||
510 | atomic_inc(&hdr->refcnt); | ||
511 | } else { | ||
512 | if (data != prealloc) | ||
513 | kfree(data); | ||
514 | data = NULL; | ||
515 | } | ||
516 | out: | ||
517 | return data; | ||
518 | } | ||
519 | |||
520 | /** | ||
521 | * nfs_pgio_data_release - Properly free pageio data | ||
522 | * @data: The data to release | ||
523 | */ | ||
524 | void nfs_pgio_data_release(struct nfs_pgio_data *data) | ||
525 | { | ||
526 | struct nfs_pgio_header *hdr = data->header; | ||
527 | struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); | ||
528 | |||
529 | put_nfs_open_context(data->args.context); | ||
530 | if (data->pages.pagevec != data->pages.page_array) | ||
531 | kfree(data->pages.pagevec); | ||
532 | if (data == &pageio_header->rpc_data) { | ||
533 | data->header = NULL; | ||
534 | data = NULL; | ||
535 | } | ||
536 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
537 | hdr->completion_ops->completion(hdr); | ||
538 | /* Note: we only free the rpc_task after callbacks are done. | ||
539 | * See the comment in rpc_free_task() for why | ||
540 | */ | ||
541 | kfree(data); | ||
542 | } | ||
543 | EXPORT_SYMBOL_GPL(nfs_pgio_data_release); | ||
544 | |||
545 | /** | ||
546 | * nfs_pgio_rpcsetup - Set up arguments for a pageio call | ||
547 | * @data: The pageio data | ||
548 | * @count: Number of bytes to read | ||
549 | * @offset: Initial offset | ||
550 | * @how: How to commit data (writes only) | ||
551 | * @cinfo: Commit information for the call (writes only) | ||
552 | */ | ||
553 | static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, | ||
554 | unsigned int count, unsigned int offset, | ||
555 | int how, struct nfs_commit_info *cinfo) | ||
556 | { | ||
557 | struct nfs_page *req = data->header->req; | ||
558 | |||
559 | /* Set up the RPC argument and reply structs | ||
560 | * NB: take care not to mess about with data->commit et al. */ | ||
561 | |||
562 | data->args.fh = NFS_FH(data->header->inode); | ||
563 | data->args.offset = req_offset(req) + offset; | ||
564 | /* pnfs_set_layoutcommit needs this */ | ||
565 | data->mds_offset = data->args.offset; | ||
566 | data->args.pgbase = req->wb_pgbase + offset; | ||
567 | data->args.pages = data->pages.pagevec; | ||
568 | data->args.count = count; | ||
569 | data->args.context = get_nfs_open_context(req->wb_context); | ||
570 | data->args.lock_context = req->wb_lock_context; | ||
571 | data->args.stable = NFS_UNSTABLE; | ||
572 | switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { | ||
573 | case 0: | ||
574 | break; | ||
575 | case FLUSH_COND_STABLE: | ||
576 | if (nfs_reqs_to_commit(cinfo)) | ||
577 | break; | ||
578 | default: | ||
579 | data->args.stable = NFS_FILE_SYNC; | ||
580 | } | ||
581 | |||
582 | data->res.fattr = &data->fattr; | ||
583 | data->res.count = count; | ||
584 | data->res.eof = 0; | ||
585 | data->res.verf = &data->verf; | ||
586 | nfs_fattr_init(&data->fattr); | ||
587 | } | ||
588 | |||
589 | /** | ||
590 | * nfs_pgio_prepare - Prepare pageio data to go over the wire | ||
591 | * @task: The current task | ||
592 | * @calldata: pageio data to prepare | ||
593 | */ | ||
594 | static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) | ||
595 | { | ||
596 | struct nfs_pgio_data *data = calldata; | ||
597 | int err; | ||
598 | err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); | ||
599 | if (err) | ||
600 | rpc_exit(task, err); | ||
601 | } | ||
602 | |||
603 | int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, | ||
604 | const struct rpc_call_ops *call_ops, int how, int flags) | ||
605 | { | ||
606 | struct rpc_task *task; | ||
607 | struct rpc_message msg = { | ||
608 | .rpc_argp = &data->args, | ||
609 | .rpc_resp = &data->res, | ||
610 | .rpc_cred = data->header->cred, | ||
611 | }; | ||
612 | struct rpc_task_setup task_setup_data = { | ||
613 | .rpc_client = clnt, | ||
614 | .task = &data->task, | ||
615 | .rpc_message = &msg, | ||
616 | .callback_ops = call_ops, | ||
617 | .callback_data = data, | ||
618 | .workqueue = nfsiod_workqueue, | ||
619 | .flags = RPC_TASK_ASYNC | flags, | ||
620 | }; | ||
621 | int ret = 0; | ||
622 | |||
623 | data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); | ||
624 | |||
625 | dprintk("NFS: %5u initiated pgio call " | ||
626 | "(req %s/%llu, %u bytes @ offset %llu)\n", | ||
627 | data->task.tk_pid, | ||
628 | data->header->inode->i_sb->s_id, | ||
629 | (unsigned long long)NFS_FILEID(data->header->inode), | ||
630 | data->args.count, | ||
631 | (unsigned long long)data->args.offset); | ||
632 | |||
633 | task = rpc_run_task(&task_setup_data); | ||
634 | if (IS_ERR(task)) { | ||
635 | ret = PTR_ERR(task); | ||
636 | goto out; | ||
637 | } | ||
638 | if (how & FLUSH_SYNC) { | ||
639 | ret = rpc_wait_for_completion_task(task); | ||
640 | if (ret == 0) | ||
641 | ret = task->tk_status; | ||
642 | } | ||
643 | rpc_put_task(task); | ||
644 | out: | ||
645 | return ret; | ||
646 | } | ||
647 | EXPORT_SYMBOL_GPL(nfs_initiate_pgio); | ||
648 | |||
649 | /** | ||
650 | * nfs_pgio_error - Clean up from a pageio error | ||
651 | * @desc: IO descriptor | ||
652 | * @hdr: pageio header | ||
653 | */ | ||
654 | static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, | ||
655 | struct nfs_pgio_header *hdr) | ||
656 | { | ||
657 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | ||
658 | nfs_pgio_data_release(hdr->data); | ||
659 | hdr->data = NULL; | ||
660 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
661 | return -ENOMEM; | ||
662 | } | ||
663 | |||
664 | /** | ||
665 | * nfs_pgio_release - Release pageio data | ||
666 | * @calldata: The pageio data to release | ||
667 | */ | ||
668 | static void nfs_pgio_release(void *calldata) | ||
669 | { | ||
670 | struct nfs_pgio_data *data = calldata; | ||
671 | if (data->header->rw_ops->rw_release) | ||
672 | data->header->rw_ops->rw_release(data); | ||
673 | nfs_pgio_data_release(data); | ||
674 | } | ||
675 | |||
298 | /** | 676 | /** |
299 | * nfs_pageio_init - initialise a page io descriptor | 677 | * nfs_pageio_init - initialise a page io descriptor |
300 | * @desc: pointer to descriptor | 678 | * @desc: pointer to descriptor |
@@ -307,6 +685,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
307 | struct inode *inode, | 685 | struct inode *inode, |
308 | const struct nfs_pageio_ops *pg_ops, | 686 | const struct nfs_pageio_ops *pg_ops, |
309 | const struct nfs_pgio_completion_ops *compl_ops, | 687 | const struct nfs_pgio_completion_ops *compl_ops, |
688 | const struct nfs_rw_ops *rw_ops, | ||
310 | size_t bsize, | 689 | size_t bsize, |
311 | int io_flags) | 690 | int io_flags) |
312 | { | 691 | { |
@@ -320,6 +699,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
320 | desc->pg_inode = inode; | 699 | desc->pg_inode = inode; |
321 | desc->pg_ops = pg_ops; | 700 | desc->pg_ops = pg_ops; |
322 | desc->pg_completion_ops = compl_ops; | 701 | desc->pg_completion_ops = compl_ops; |
702 | desc->pg_rw_ops = rw_ops; | ||
323 | desc->pg_ioflags = io_flags; | 703 | desc->pg_ioflags = io_flags; |
324 | desc->pg_error = 0; | 704 | desc->pg_error = 0; |
325 | desc->pg_lseg = NULL; | 705 | desc->pg_lseg = NULL; |
@@ -328,6 +708,94 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
328 | } | 708 | } |
329 | EXPORT_SYMBOL_GPL(nfs_pageio_init); | 709 | EXPORT_SYMBOL_GPL(nfs_pageio_init); |
330 | 710 | ||
711 | /** | ||
712 | * nfs_pgio_result - Basic pageio error handling | ||
713 | * @task: The task that ran | ||
714 | * @calldata: Pageio data to check | ||
715 | */ | ||
716 | static void nfs_pgio_result(struct rpc_task *task, void *calldata) | ||
717 | { | ||
718 | struct nfs_pgio_data *data = calldata; | ||
719 | struct inode *inode = data->header->inode; | ||
720 | |||
721 | dprintk("NFS: %s: %5u, (status %d)\n", __func__, | ||
722 | task->tk_pid, task->tk_status); | ||
723 | |||
724 | if (data->header->rw_ops->rw_done(task, data, inode) != 0) | ||
725 | return; | ||
726 | if (task->tk_status < 0) | ||
727 | nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); | ||
728 | else | ||
729 | data->header->rw_ops->rw_result(task, data); | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * Create an RPC task for the given read or write request and kick it. | ||
734 | * The page must have been locked by the caller. | ||
735 | * | ||
736 | * It may happen that the page we're passed is not marked dirty. | ||
737 | * This is the case if nfs_updatepage detects a conflicting request | ||
738 | * that has been written but not committed. | ||
739 | */ | ||
740 | int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | ||
741 | struct nfs_pgio_header *hdr) | ||
742 | { | ||
743 | struct nfs_page *req; | ||
744 | struct page **pages; | ||
745 | struct nfs_pgio_data *data; | ||
746 | struct list_head *head = &desc->pg_list; | ||
747 | struct nfs_commit_info cinfo; | ||
748 | |||
749 | data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, | ||
750 | desc->pg_count)); | ||
751 | if (!data) | ||
752 | return nfs_pgio_error(desc, hdr); | ||
753 | |||
754 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | ||
755 | pages = data->pages.pagevec; | ||
756 | while (!list_empty(head)) { | ||
757 | req = nfs_list_entry(head->next); | ||
758 | nfs_list_remove_request(req); | ||
759 | nfs_list_add_request(req, &hdr->pages); | ||
760 | *pages++ = req->wb_page; | ||
761 | } | ||
762 | |||
763 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
764 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) | ||
765 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
766 | |||
767 | /* Set up the argument struct */ | ||
768 | nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); | ||
769 | hdr->data = data; | ||
770 | desc->pg_rpc_callops = &nfs_pgio_common_ops; | ||
771 | return 0; | ||
772 | } | ||
773 | EXPORT_SYMBOL_GPL(nfs_generic_pgio); | ||
774 | |||
775 | static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) | ||
776 | { | ||
777 | struct nfs_rw_header *rw_hdr; | ||
778 | struct nfs_pgio_header *hdr; | ||
779 | int ret; | ||
780 | |||
781 | rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); | ||
782 | if (!rw_hdr) { | ||
783 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
784 | return -ENOMEM; | ||
785 | } | ||
786 | hdr = &rw_hdr->header; | ||
787 | nfs_pgheader_init(desc, hdr, nfs_rw_header_free); | ||
788 | atomic_inc(&hdr->refcnt); | ||
789 | ret = nfs_generic_pgio(desc, hdr); | ||
790 | if (ret == 0) | ||
791 | ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), | ||
792 | hdr->data, desc->pg_rpc_callops, | ||
793 | desc->pg_ioflags, 0); | ||
794 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
795 | hdr->completion_ops->completion(hdr); | ||
796 | return ret; | ||
797 | } | ||
798 | |||
331 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, | 799 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, |
332 | const struct nfs_open_context *ctx2) | 800 | const struct nfs_open_context *ctx2) |
333 | { | 801 | { |
@@ -356,18 +824,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
356 | struct nfs_page *req, | 824 | struct nfs_page *req, |
357 | struct nfs_pageio_descriptor *pgio) | 825 | struct nfs_pageio_descriptor *pgio) |
358 | { | 826 | { |
359 | if (!nfs_match_open_context(req->wb_context, prev->wb_context)) | 827 | size_t size; |
360 | return false; | 828 | |
361 | if (req->wb_context->dentry->d_inode->i_flock != NULL && | 829 | if (prev) { |
362 | !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) | 830 | if (!nfs_match_open_context(req->wb_context, prev->wb_context)) |
363 | return false; | 831 | return false; |
364 | if (req->wb_pgbase != 0) | 832 | if (req->wb_context->dentry->d_inode->i_flock != NULL && |
365 | return false; | 833 | !nfs_match_lock_context(req->wb_lock_context, |
366 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) | 834 | prev->wb_lock_context)) |
367 | return false; | 835 | return false; |
368 | if (req_offset(req) != req_offset(prev) + prev->wb_bytes) | 836 | if (req_offset(req) != req_offset(prev) + prev->wb_bytes) |
369 | return false; | 837 | return false; |
370 | return pgio->pg_ops->pg_test(pgio, prev, req); | 838 | } |
839 | size = pgio->pg_ops->pg_test(pgio, prev, req); | ||
840 | WARN_ON_ONCE(size > req->wb_bytes); | ||
841 | if (size && size < req->wb_bytes) | ||
842 | req->wb_bytes = size; | ||
843 | return size > 0; | ||
371 | } | 844 | } |
372 | 845 | ||
373 | /** | 846 | /** |
@@ -381,17 +854,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
381 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | 854 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, |
382 | struct nfs_page *req) | 855 | struct nfs_page *req) |
383 | { | 856 | { |
857 | struct nfs_page *prev = NULL; | ||
384 | if (desc->pg_count != 0) { | 858 | if (desc->pg_count != 0) { |
385 | struct nfs_page *prev; | ||
386 | |||
387 | prev = nfs_list_entry(desc->pg_list.prev); | 859 | prev = nfs_list_entry(desc->pg_list.prev); |
388 | if (!nfs_can_coalesce_requests(prev, req, desc)) | ||
389 | return 0; | ||
390 | } else { | 860 | } else { |
391 | if (desc->pg_ops->pg_init) | 861 | if (desc->pg_ops->pg_init) |
392 | desc->pg_ops->pg_init(desc, req); | 862 | desc->pg_ops->pg_init(desc, req); |
393 | desc->pg_base = req->wb_pgbase; | 863 | desc->pg_base = req->wb_pgbase; |
394 | } | 864 | } |
865 | if (!nfs_can_coalesce_requests(prev, req, desc)) | ||
866 | return 0; | ||
395 | nfs_list_remove_request(req); | 867 | nfs_list_remove_request(req); |
396 | nfs_list_add_request(req, &desc->pg_list); | 868 | nfs_list_add_request(req, &desc->pg_list); |
397 | desc->pg_count += req->wb_bytes; | 869 | desc->pg_count += req->wb_bytes; |
@@ -421,22 +893,73 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | |||
421 | * @desc: destination io descriptor | 893 | * @desc: destination io descriptor |
422 | * @req: request | 894 | * @req: request |
423 | * | 895 | * |
896 | * This may split a request into subrequests which are all part of the | ||
897 | * same page group. | ||
898 | * | ||
424 | * Returns true if the request 'req' was successfully coalesced into the | 899 | * Returns true if the request 'req' was successfully coalesced into the |
425 | * existing list of pages 'desc'. | 900 | * existing list of pages 'desc'. |
426 | */ | 901 | */ |
427 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | 902 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, |
428 | struct nfs_page *req) | 903 | struct nfs_page *req) |
429 | { | 904 | { |
430 | while (!nfs_pageio_do_add_request(desc, req)) { | 905 | struct nfs_page *subreq; |
431 | desc->pg_moreio = 1; | 906 | unsigned int bytes_left = 0; |
432 | nfs_pageio_doio(desc); | 907 | unsigned int offset, pgbase; |
433 | if (desc->pg_error < 0) | 908 | |
434 | return 0; | 909 | nfs_page_group_lock(req); |
435 | desc->pg_moreio = 0; | 910 | |
436 | if (desc->pg_recoalesce) | 911 | subreq = req; |
437 | return 0; | 912 | bytes_left = subreq->wb_bytes; |
438 | } | 913 | offset = subreq->wb_offset; |
914 | pgbase = subreq->wb_pgbase; | ||
915 | |||
916 | do { | ||
917 | if (!nfs_pageio_do_add_request(desc, subreq)) { | ||
918 | /* make sure pg_test call(s) did nothing */ | ||
919 | WARN_ON_ONCE(subreq->wb_bytes != bytes_left); | ||
920 | WARN_ON_ONCE(subreq->wb_offset != offset); | ||
921 | WARN_ON_ONCE(subreq->wb_pgbase != pgbase); | ||
922 | |||
923 | nfs_page_group_unlock(req); | ||
924 | desc->pg_moreio = 1; | ||
925 | nfs_pageio_doio(desc); | ||
926 | if (desc->pg_error < 0) | ||
927 | return 0; | ||
928 | desc->pg_moreio = 0; | ||
929 | if (desc->pg_recoalesce) | ||
930 | return 0; | ||
931 | /* retry add_request for this subreq */ | ||
932 | nfs_page_group_lock(req); | ||
933 | continue; | ||
934 | } | ||
935 | |||
936 | /* check for buggy pg_test call(s) */ | ||
937 | WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE); | ||
938 | WARN_ON_ONCE(subreq->wb_bytes > bytes_left); | ||
939 | WARN_ON_ONCE(subreq->wb_bytes == 0); | ||
940 | |||
941 | bytes_left -= subreq->wb_bytes; | ||
942 | offset += subreq->wb_bytes; | ||
943 | pgbase += subreq->wb_bytes; | ||
944 | |||
945 | if (bytes_left) { | ||
946 | subreq = nfs_create_request(req->wb_context, | ||
947 | req->wb_page, | ||
948 | subreq, pgbase, bytes_left); | ||
949 | if (IS_ERR(subreq)) | ||
950 | goto err_ptr; | ||
951 | nfs_lock_request(subreq); | ||
952 | subreq->wb_offset = offset; | ||
953 | subreq->wb_index = req->wb_index; | ||
954 | } | ||
955 | } while (bytes_left > 0); | ||
956 | |||
957 | nfs_page_group_unlock(req); | ||
439 | return 1; | 958 | return 1; |
959 | err_ptr: | ||
960 | desc->pg_error = PTR_ERR(subreq); | ||
961 | nfs_page_group_unlock(req); | ||
962 | return 0; | ||
440 | } | 963 | } |
441 | 964 | ||
442 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | 965 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) |
@@ -535,3 +1058,13 @@ void nfs_destroy_nfspagecache(void) | |||
535 | kmem_cache_destroy(nfs_page_cachep); | 1058 | kmem_cache_destroy(nfs_page_cachep); |
536 | } | 1059 | } |
537 | 1060 | ||
1061 | static const struct rpc_call_ops nfs_pgio_common_ops = { | ||
1062 | .rpc_call_prepare = nfs_pgio_prepare, | ||
1063 | .rpc_call_done = nfs_pgio_result, | ||
1064 | .rpc_release = nfs_pgio_release, | ||
1065 | }; | ||
1066 | |||
1067 | const struct nfs_pageio_ops nfs_pgio_rw_ops = { | ||
1068 | .pg_test = nfs_generic_pg_test, | ||
1069 | .pg_doio = nfs_generic_pg_pgios, | ||
1070 | }; | ||