diff options
author | Chuck Lever <cel@netapp.com> | 2006-06-20 12:57:03 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-06-24 13:11:39 -0400 |
commit | 82b145c5a572f7fa7211dffe2097234dc91bcecc (patch) | |
tree | 35689aa653d29f17681f13d89d592c88e7c112e5 /fs/nfs | |
parent | 06cf6f2ed0b19629700794727d86ed57b9c0583e (diff) |
NFS: alloc nfs_read/write_data as direct I/O is scheduled
Re-arrange the logic in the NFS direct I/O path so that nfs_read/write_data
structs are allocated just before they are scheduled, rather than
allocating them all at once before we start scheduling requests.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/direct.c | 210 |
1 files changed, 65 insertions, 145 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b1630d53fbb..e25b7595b7a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -68,8 +68,6 @@ struct nfs_direct_req { | |||
68 | struct kref kref; /* release manager */ | 68 | struct kref kref; /* release manager */ |
69 | 69 | ||
70 | /* I/O parameters */ | 70 | /* I/O parameters */ |
71 | struct list_head list, /* nfs_read/write_data structs */ | ||
72 | rewrite_list; /* saved nfs_write_data structs */ | ||
73 | struct nfs_open_context *ctx; /* file open context info */ | 71 | struct nfs_open_context *ctx; /* file open context info */ |
74 | struct kiocb * iocb; /* controlling i/o request */ | 72 | struct kiocb * iocb; /* controlling i/o request */ |
75 | struct inode * inode; /* target file of i/o */ | 73 | struct inode * inode; /* target file of i/o */ |
@@ -82,6 +80,7 @@ struct nfs_direct_req { | |||
82 | struct completion completion; /* wait for i/o completion */ | 80 | struct completion completion; /* wait for i/o completion */ |
83 | 81 | ||
84 | /* commit state */ | 82 | /* commit state */ |
83 | struct list_head rewrite_list; /* saved nfs_write_data structs */ | ||
85 | struct nfs_write_data * commit_data; /* special write_data for commits */ | 84 | struct nfs_write_data * commit_data; /* special write_data for commits */ |
86 | int flags; | 85 | int flags; |
87 | #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ | 86 | #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ |
@@ -116,6 +115,11 @@ static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) | |||
116 | return page_count; | 115 | return page_count; |
117 | } | 116 | } |
118 | 117 | ||
118 | static inline unsigned int nfs_max_pages(unsigned int size) | ||
119 | { | ||
120 | return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
121 | } | ||
122 | |||
119 | /** | 123 | /** |
120 | * nfs_direct_IO - NFS address space operation for direct I/O | 124 | * nfs_direct_IO - NFS address space operation for direct I/O |
121 | * @rw: direction (read or write) | 125 | * @rw: direction (read or write) |
@@ -164,8 +168,8 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||
164 | return NULL; | 168 | return NULL; |
165 | 169 | ||
166 | kref_init(&dreq->kref); | 170 | kref_init(&dreq->kref); |
171 | kref_get(&dreq->kref); | ||
167 | init_completion(&dreq->completion); | 172 | init_completion(&dreq->completion); |
168 | INIT_LIST_HEAD(&dreq->list); | ||
169 | INIT_LIST_HEAD(&dreq->rewrite_list); | 173 | INIT_LIST_HEAD(&dreq->rewrite_list); |
170 | dreq->iocb = NULL; | 174 | dreq->iocb = NULL; |
171 | dreq->ctx = NULL; | 175 | dreq->ctx = NULL; |
@@ -228,49 +232,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) | |||
228 | } | 232 | } |
229 | 233 | ||
230 | /* | 234 | /* |
231 | * Note we also set the number of requests we have in the dreq when we are | ||
232 | * done. This prevents races with I/O completion so we will always wait | ||
233 | * until all requests have been dispatched and completed. | ||
234 | */ | ||
235 | static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) | ||
236 | { | ||
237 | struct list_head *list; | ||
238 | struct nfs_direct_req *dreq; | ||
239 | unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
240 | |||
241 | dreq = nfs_direct_req_alloc(); | ||
242 | if (!dreq) | ||
243 | return NULL; | ||
244 | |||
245 | list = &dreq->list; | ||
246 | for(;;) { | ||
247 | struct nfs_read_data *data = nfs_readdata_alloc(rpages); | ||
248 | |||
249 | if (unlikely(!data)) { | ||
250 | while (!list_empty(list)) { | ||
251 | data = list_entry(list->next, | ||
252 | struct nfs_read_data, pages); | ||
253 | list_del(&data->pages); | ||
254 | nfs_readdata_free(data); | ||
255 | } | ||
256 | kref_put(&dreq->kref, nfs_direct_req_release); | ||
257 | return NULL; | ||
258 | } | ||
259 | |||
260 | INIT_LIST_HEAD(&data->pages); | ||
261 | list_add(&data->pages, list); | ||
262 | |||
263 | data->req = (struct nfs_page *) dreq; | ||
264 | get_dreq(dreq); | ||
265 | if (nbytes <= rsize) | ||
266 | break; | ||
267 | nbytes -= rsize; | ||
268 | } | ||
269 | kref_get(&dreq->kref); | ||
270 | return dreq; | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * We must hold a reference to all the pages in this direct read request | 235 | * We must hold a reference to all the pages in this direct read request |
275 | * until the RPCs complete. This could be long *after* we are woken up in | 236 | * until the RPCs complete. This could be long *after* we are woken up in |
276 | * nfs_direct_wait (for instance, if someone hits ^C on a slow server). | 237 | * nfs_direct_wait (for instance, if someone hits ^C on a slow server). |
@@ -305,42 +266,53 @@ static const struct rpc_call_ops nfs_read_direct_ops = { | |||
305 | }; | 266 | }; |
306 | 267 | ||
307 | /* | 268 | /* |
308 | * For each nfs_read_data struct that was allocated on the list, dispatch | 269 | * For each rsize'd chunk of the user's buffer, dispatch an NFS READ |
309 | * an NFS READ operation. If get_user_pages() fails, we stop sending reads. | 270 | * operation. If nfs_readdata_alloc() or get_user_pages() fails, |
310 | * Read length accounting is handled by nfs_direct_read_result(). | 271 | * bail and stop sending more reads. Read length accounting is |
311 | * Otherwise, if no requests have been sent, just return an error. | 272 | * handled automatically by nfs_direct_read_result(). Otherwise, if |
273 | * no requests have been sent, just return an error. | ||
312 | */ | 274 | */ |
313 | static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) | 275 | static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) |
314 | { | 276 | { |
315 | struct nfs_open_context *ctx = dreq->ctx; | 277 | struct nfs_open_context *ctx = dreq->ctx; |
316 | struct inode *inode = ctx->dentry->d_inode; | 278 | struct inode *inode = ctx->dentry->d_inode; |
317 | struct list_head *list = &dreq->list; | ||
318 | size_t rsize = NFS_SERVER(inode)->rsize; | 279 | size_t rsize = NFS_SERVER(inode)->rsize; |
280 | unsigned int rpages = nfs_max_pages(rsize); | ||
319 | unsigned int pgbase; | 281 | unsigned int pgbase; |
320 | int result; | 282 | int result; |
321 | ssize_t started = 0; | 283 | ssize_t started = 0; |
322 | struct nfs_read_data *data; | 284 | |
285 | get_dreq(dreq); | ||
323 | 286 | ||
324 | pgbase = user_addr & ~PAGE_MASK; | 287 | pgbase = user_addr & ~PAGE_MASK; |
325 | do { | 288 | do { |
289 | struct nfs_read_data *data; | ||
326 | size_t bytes; | 290 | size_t bytes; |
327 | 291 | ||
292 | result = -ENOMEM; | ||
293 | data = nfs_readdata_alloc(rpages); | ||
294 | if (unlikely(!data)) | ||
295 | break; | ||
296 | |||
328 | bytes = rsize; | 297 | bytes = rsize; |
329 | if (count < rsize) | 298 | if (count < rsize) |
330 | bytes = count; | 299 | bytes = count; |
331 | 300 | ||
332 | BUG_ON(list_empty(list)); | ||
333 | data = list_entry(list->next, struct nfs_read_data, pages); | ||
334 | list_del_init(&data->pages); | ||
335 | |||
336 | data->npages = nfs_direct_count_pages(user_addr, bytes); | 301 | data->npages = nfs_direct_count_pages(user_addr, bytes); |
337 | down_read(¤t->mm->mmap_sem); | 302 | down_read(¤t->mm->mmap_sem); |
338 | result = get_user_pages(current, current->mm, user_addr, | 303 | result = get_user_pages(current, current->mm, user_addr, |
339 | data->npages, 1, 0, data->pagevec, NULL); | 304 | data->npages, 1, 0, data->pagevec, NULL); |
340 | up_read(¤t->mm->mmap_sem); | 305 | up_read(¤t->mm->mmap_sem); |
341 | if (unlikely(result < data->npages)) | 306 | if (unlikely(result < data->npages)) { |
342 | goto out_err; | 307 | if (result > 0) |
308 | nfs_direct_release_pages(data->pagevec, result); | ||
309 | nfs_readdata_release(data); | ||
310 | break; | ||
311 | } | ||
343 | 312 | ||
313 | get_dreq(dreq); | ||
314 | |||
315 | data->req = (struct nfs_page *) dreq; | ||
344 | data->inode = inode; | 316 | data->inode = inode; |
345 | data->cred = ctx->cred; | 317 | data->cred = ctx->cred; |
346 | data->args.fh = NFS_FH(inode); | 318 | data->args.fh = NFS_FH(inode); |
@@ -378,21 +350,9 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo | |||
378 | 350 | ||
379 | count -= bytes; | 351 | count -= bytes; |
380 | } while (count != 0); | 352 | } while (count != 0); |
381 | BUG_ON(!list_empty(list)); | ||
382 | return 0; | ||
383 | 353 | ||
384 | out_err: | 354 | if (put_dreq(dreq)) |
385 | if (result > 0) | 355 | nfs_direct_complete(dreq); |
386 | nfs_direct_release_pages(data->pagevec, result); | ||
387 | |||
388 | list_add(&data->pages, list); | ||
389 | while (!list_empty(list)) { | ||
390 | data = list_entry(list->next, struct nfs_read_data, pages); | ||
391 | list_del(&data->pages); | ||
392 | nfs_readdata_free(data); | ||
393 | if (put_dreq(dreq)) | ||
394 | nfs_direct_complete(dreq); | ||
395 | } | ||
396 | 356 | ||
397 | if (started) | 357 | if (started) |
398 | return 0; | 358 | return 0; |
@@ -401,13 +361,13 @@ out_err: | |||
401 | 361 | ||
402 | static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) | 362 | static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) |
403 | { | 363 | { |
404 | ssize_t result; | 364 | ssize_t result = 0; |
405 | sigset_t oldset; | 365 | sigset_t oldset; |
406 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 366 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
407 | struct rpc_clnt *clnt = NFS_CLIENT(inode); | 367 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
408 | struct nfs_direct_req *dreq; | 368 | struct nfs_direct_req *dreq; |
409 | 369 | ||
410 | dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); | 370 | dreq = nfs_direct_req_alloc(); |
411 | if (!dreq) | 371 | if (!dreq) |
412 | return -ENOMEM; | 372 | return -ENOMEM; |
413 | 373 | ||
@@ -428,9 +388,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size | |||
428 | 388 | ||
429 | static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) | 389 | static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) |
430 | { | 390 | { |
431 | list_splice_init(&dreq->rewrite_list, &dreq->list); | 391 | while (!list_empty(&dreq->rewrite_list)) { |
432 | while (!list_empty(&dreq->list)) { | 392 | struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); |
433 | struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); | ||
434 | list_del(&data->pages); | 393 | list_del(&data->pages); |
435 | nfs_direct_release_pages(data->pagevec, data->npages); | 394 | nfs_direct_release_pages(data->pagevec, data->npages); |
436 | nfs_writedata_release(data); | 395 | nfs_writedata_release(data); |
@@ -584,47 +543,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
584 | } | 543 | } |
585 | #endif | 544 | #endif |
586 | 545 | ||
587 | static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) | ||
588 | { | ||
589 | struct list_head *list; | ||
590 | struct nfs_direct_req *dreq; | ||
591 | unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
592 | |||
593 | dreq = nfs_direct_req_alloc(); | ||
594 | if (!dreq) | ||
595 | return NULL; | ||
596 | |||
597 | list = &dreq->list; | ||
598 | for(;;) { | ||
599 | struct nfs_write_data *data = nfs_writedata_alloc(wpages); | ||
600 | |||
601 | if (unlikely(!data)) { | ||
602 | while (!list_empty(list)) { | ||
603 | data = list_entry(list->next, | ||
604 | struct nfs_write_data, pages); | ||
605 | list_del(&data->pages); | ||
606 | nfs_writedata_free(data); | ||
607 | } | ||
608 | kref_put(&dreq->kref, nfs_direct_req_release); | ||
609 | return NULL; | ||
610 | } | ||
611 | |||
612 | INIT_LIST_HEAD(&data->pages); | ||
613 | list_add(&data->pages, list); | ||
614 | |||
615 | data->req = (struct nfs_page *) dreq; | ||
616 | get_dreq(dreq); | ||
617 | if (nbytes <= wsize) | ||
618 | break; | ||
619 | nbytes -= wsize; | ||
620 | } | ||
621 | |||
622 | nfs_alloc_commit_data(dreq); | ||
623 | |||
624 | kref_get(&dreq->kref); | ||
625 | return dreq; | ||
626 | } | ||
627 | |||
628 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) | 546 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) |
629 | { | 547 | { |
630 | struct nfs_write_data *data = calldata; | 548 | struct nfs_write_data *data = calldata; |
@@ -677,43 +595,55 @@ static const struct rpc_call_ops nfs_write_direct_ops = { | |||
677 | }; | 595 | }; |
678 | 596 | ||
679 | /* | 597 | /* |
680 | * For each nfs_write_data struct that was allocated on the list, dispatch | 598 | * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE |
681 | * an NFS WRITE operation. If get_user_pages() fails, we stop sending writes. | 599 | * operation. If nfs_writedata_alloc() or get_user_pages() fails, |
682 | * Write length accounting is handled by nfs_direct_write_result(). | 600 | * bail and stop sending more writes. Write length accounting is |
683 | * Otherwise, if no requests have been sent, just return an error. | 601 | * handled automatically by nfs_direct_write_result(). Otherwise, if |
602 | * no requests have been sent, just return an error. | ||
684 | */ | 603 | */ |
685 | static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) | 604 | static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) |
686 | { | 605 | { |
687 | struct nfs_open_context *ctx = dreq->ctx; | 606 | struct nfs_open_context *ctx = dreq->ctx; |
688 | struct inode *inode = ctx->dentry->d_inode; | 607 | struct inode *inode = ctx->dentry->d_inode; |
689 | struct list_head *list = &dreq->list; | ||
690 | size_t wsize = NFS_SERVER(inode)->wsize; | 608 | size_t wsize = NFS_SERVER(inode)->wsize; |
609 | unsigned int wpages = nfs_max_pages(wsize); | ||
691 | unsigned int pgbase; | 610 | unsigned int pgbase; |
692 | int result; | 611 | int result; |
693 | ssize_t started = 0; | 612 | ssize_t started = 0; |
694 | struct nfs_write_data *data; | 613 | |
614 | get_dreq(dreq); | ||
695 | 615 | ||
696 | pgbase = user_addr & ~PAGE_MASK; | 616 | pgbase = user_addr & ~PAGE_MASK; |
697 | do { | 617 | do { |
618 | struct nfs_write_data *data; | ||
698 | size_t bytes; | 619 | size_t bytes; |
699 | 620 | ||
621 | result = -ENOMEM; | ||
622 | data = nfs_writedata_alloc(wpages); | ||
623 | if (unlikely(!data)) | ||
624 | break; | ||
625 | |||
700 | bytes = wsize; | 626 | bytes = wsize; |
701 | if (count < wsize) | 627 | if (count < wsize) |
702 | bytes = count; | 628 | bytes = count; |
703 | 629 | ||
704 | BUG_ON(list_empty(list)); | ||
705 | data = list_entry(list->next, struct nfs_write_data, pages); | ||
706 | |||
707 | data->npages = nfs_direct_count_pages(user_addr, bytes); | 630 | data->npages = nfs_direct_count_pages(user_addr, bytes); |
708 | down_read(¤t->mm->mmap_sem); | 631 | down_read(¤t->mm->mmap_sem); |
709 | result = get_user_pages(current, current->mm, user_addr, | 632 | result = get_user_pages(current, current->mm, user_addr, |
710 | data->npages, 0, 0, data->pagevec, NULL); | 633 | data->npages, 0, 0, data->pagevec, NULL); |
711 | up_read(¤t->mm->mmap_sem); | 634 | up_read(¤t->mm->mmap_sem); |
712 | if (unlikely(result < data->npages)) | 635 | if (unlikely(result < data->npages)) { |
713 | goto out_err; | 636 | if (result > 0) |
637 | nfs_direct_release_pages(data->pagevec, result); | ||
638 | nfs_writedata_release(data); | ||
639 | break; | ||
640 | } | ||
641 | |||
642 | get_dreq(dreq); | ||
714 | 643 | ||
715 | list_move_tail(&data->pages, &dreq->rewrite_list); | 644 | list_move_tail(&data->pages, &dreq->rewrite_list); |
716 | 645 | ||
646 | data->req = (struct nfs_page *) dreq; | ||
717 | data->inode = inode; | 647 | data->inode = inode; |
718 | data->cred = ctx->cred; | 648 | data->cred = ctx->cred; |
719 | data->args.fh = NFS_FH(inode); | 649 | data->args.fh = NFS_FH(inode); |
@@ -752,21 +682,9 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l | |||
752 | 682 | ||
753 | count -= bytes; | 683 | count -= bytes; |
754 | } while (count != 0); | 684 | } while (count != 0); |
755 | BUG_ON(!list_empty(list)); | ||
756 | return 0; | ||
757 | |||
758 | out_err: | ||
759 | if (result > 0) | ||
760 | nfs_direct_release_pages(data->pagevec, result); | ||
761 | 685 | ||
762 | list_add(&data->pages, list); | 686 | if (put_dreq(dreq)) |
763 | while (!list_empty(list)) { | 687 | nfs_direct_write_complete(dreq, inode); |
764 | data = list_entry(list->next, struct nfs_write_data, pages); | ||
765 | list_del(&data->pages); | ||
766 | nfs_writedata_free(data); | ||
767 | if (put_dreq(dreq)) | ||
768 | nfs_direct_write_complete(dreq, inode); | ||
769 | } | ||
770 | 688 | ||
771 | if (started) | 689 | if (started) |
772 | return 0; | 690 | return 0; |
@@ -775,7 +693,7 @@ out_err: | |||
775 | 693 | ||
776 | static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) | 694 | static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) |
777 | { | 695 | { |
778 | ssize_t result; | 696 | ssize_t result = 0; |
779 | sigset_t oldset; | 697 | sigset_t oldset; |
780 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 698 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
781 | struct rpc_clnt *clnt = NFS_CLIENT(inode); | 699 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
@@ -783,9 +701,11 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
783 | size_t wsize = NFS_SERVER(inode)->wsize; | 701 | size_t wsize = NFS_SERVER(inode)->wsize; |
784 | int sync = 0; | 702 | int sync = 0; |
785 | 703 | ||
786 | dreq = nfs_direct_write_alloc(count, wsize); | 704 | dreq = nfs_direct_req_alloc(); |
787 | if (!dreq) | 705 | if (!dreq) |
788 | return -ENOMEM; | 706 | return -ENOMEM; |
707 | nfs_alloc_commit_data(dreq); | ||
708 | |||
789 | if (dreq->commit_data == NULL || count < wsize) | 709 | if (dreq->commit_data == NULL || count < wsize) |
790 | sync = FLUSH_STABLE; | 710 | sync = FLUSH_STABLE; |
791 | 711 | ||