aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/direct.c
diff options
context:
space:
mode:
authorChuck Lever <cel@netapp.com>2006-06-20 12:57:03 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-06-24 13:11:39 -0400
commit82b145c5a572f7fa7211dffe2097234dc91bcecc (patch)
tree35689aa653d29f17681f13d89d592c88e7c112e5 /fs/nfs/direct.c
parent06cf6f2ed0b19629700794727d86ed57b9c0583e (diff)
NFS: alloc nfs_read/write_data as direct I/O is scheduled
Re-arrange the logic in the NFS direct I/O path so that nfs_read/write_data structs are allocated just before they are scheduled, rather than allocating them all at once before we start scheduling requests. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs/direct.c')
-rw-r--r--fs/nfs/direct.c210
1 files changed, 65 insertions, 145 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b1630d53fbb1..e25b7595b7ad 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -68,8 +68,6 @@ struct nfs_direct_req {
68 struct kref kref; /* release manager */ 68 struct kref kref; /* release manager */
69 69
70 /* I/O parameters */ 70 /* I/O parameters */
71 struct list_head list, /* nfs_read/write_data structs */
72 rewrite_list; /* saved nfs_write_data structs */
73 struct nfs_open_context *ctx; /* file open context info */ 71 struct nfs_open_context *ctx; /* file open context info */
74 struct kiocb * iocb; /* controlling i/o request */ 72 struct kiocb * iocb; /* controlling i/o request */
75 struct inode * inode; /* target file of i/o */ 73 struct inode * inode; /* target file of i/o */
@@ -82,6 +80,7 @@ struct nfs_direct_req {
82 struct completion completion; /* wait for i/o completion */ 80 struct completion completion; /* wait for i/o completion */
83 81
84 /* commit state */ 82 /* commit state */
83 struct list_head rewrite_list; /* saved nfs_write_data structs */
85 struct nfs_write_data * commit_data; /* special write_data for commits */ 84 struct nfs_write_data * commit_data; /* special write_data for commits */
86 int flags; 85 int flags;
87#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ 86#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
@@ -116,6 +115,11 @@ static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
116 return page_count; 115 return page_count;
117} 116}
118 117
118static inline unsigned int nfs_max_pages(unsigned int size)
119{
120 return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
121}
122
119/** 123/**
120 * nfs_direct_IO - NFS address space operation for direct I/O 124 * nfs_direct_IO - NFS address space operation for direct I/O
121 * @rw: direction (read or write) 125 * @rw: direction (read or write)
@@ -164,8 +168,8 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
164 return NULL; 168 return NULL;
165 169
166 kref_init(&dreq->kref); 170 kref_init(&dreq->kref);
171 kref_get(&dreq->kref);
167 init_completion(&dreq->completion); 172 init_completion(&dreq->completion);
168 INIT_LIST_HEAD(&dreq->list);
169 INIT_LIST_HEAD(&dreq->rewrite_list); 173 INIT_LIST_HEAD(&dreq->rewrite_list);
170 dreq->iocb = NULL; 174 dreq->iocb = NULL;
171 dreq->ctx = NULL; 175 dreq->ctx = NULL;
@@ -228,49 +232,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
228} 232}
229 233
230/* 234/*
231 * Note we also set the number of requests we have in the dreq when we are
232 * done. This prevents races with I/O completion so we will always wait
233 * until all requests have been dispatched and completed.
234 */
235static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
236{
237 struct list_head *list;
238 struct nfs_direct_req *dreq;
239 unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
240
241 dreq = nfs_direct_req_alloc();
242 if (!dreq)
243 return NULL;
244
245 list = &dreq->list;
246 for(;;) {
247 struct nfs_read_data *data = nfs_readdata_alloc(rpages);
248
249 if (unlikely(!data)) {
250 while (!list_empty(list)) {
251 data = list_entry(list->next,
252 struct nfs_read_data, pages);
253 list_del(&data->pages);
254 nfs_readdata_free(data);
255 }
256 kref_put(&dreq->kref, nfs_direct_req_release);
257 return NULL;
258 }
259
260 INIT_LIST_HEAD(&data->pages);
261 list_add(&data->pages, list);
262
263 data->req = (struct nfs_page *) dreq;
264 get_dreq(dreq);
265 if (nbytes <= rsize)
266 break;
267 nbytes -= rsize;
268 }
269 kref_get(&dreq->kref);
270 return dreq;
271}
272
273/*
274 * We must hold a reference to all the pages in this direct read request 235 * We must hold a reference to all the pages in this direct read request
275 * until the RPCs complete. This could be long *after* we are woken up in 236 * until the RPCs complete. This could be long *after* we are woken up in
276 * nfs_direct_wait (for instance, if someone hits ^C on a slow server). 237 * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
@@ -305,42 +266,53 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
305}; 266};
306 267
307/* 268/*
308 * For each nfs_read_data struct that was allocated on the list, dispatch 269 * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
309 * an NFS READ operation. If get_user_pages() fails, we stop sending reads. 270 * operation. If nfs_readdata_alloc() or get_user_pages() fails,
310 * Read length accounting is handled by nfs_direct_read_result(). 271 * bail and stop sending more reads. Read length accounting is
311 * Otherwise, if no requests have been sent, just return an error. 272 * handled automatically by nfs_direct_read_result(). Otherwise, if
273 * no requests have been sent, just return an error.
312 */ 274 */
313static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) 275static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
314{ 276{
315 struct nfs_open_context *ctx = dreq->ctx; 277 struct nfs_open_context *ctx = dreq->ctx;
316 struct inode *inode = ctx->dentry->d_inode; 278 struct inode *inode = ctx->dentry->d_inode;
317 struct list_head *list = &dreq->list;
318 size_t rsize = NFS_SERVER(inode)->rsize; 279 size_t rsize = NFS_SERVER(inode)->rsize;
280 unsigned int rpages = nfs_max_pages(rsize);
319 unsigned int pgbase; 281 unsigned int pgbase;
320 int result; 282 int result;
321 ssize_t started = 0; 283 ssize_t started = 0;
322 struct nfs_read_data *data; 284
285 get_dreq(dreq);
323 286
324 pgbase = user_addr & ~PAGE_MASK; 287 pgbase = user_addr & ~PAGE_MASK;
325 do { 288 do {
289 struct nfs_read_data *data;
326 size_t bytes; 290 size_t bytes;
327 291
292 result = -ENOMEM;
293 data = nfs_readdata_alloc(rpages);
294 if (unlikely(!data))
295 break;
296
328 bytes = rsize; 297 bytes = rsize;
329 if (count < rsize) 298 if (count < rsize)
330 bytes = count; 299 bytes = count;
331 300
332 BUG_ON(list_empty(list));
333 data = list_entry(list->next, struct nfs_read_data, pages);
334 list_del_init(&data->pages);
335
336 data->npages = nfs_direct_count_pages(user_addr, bytes); 301 data->npages = nfs_direct_count_pages(user_addr, bytes);
337 down_read(&current->mm->mmap_sem); 302 down_read(&current->mm->mmap_sem);
338 result = get_user_pages(current, current->mm, user_addr, 303 result = get_user_pages(current, current->mm, user_addr,
339 data->npages, 1, 0, data->pagevec, NULL); 304 data->npages, 1, 0, data->pagevec, NULL);
340 up_read(&current->mm->mmap_sem); 305 up_read(&current->mm->mmap_sem);
341 if (unlikely(result < data->npages)) 306 if (unlikely(result < data->npages)) {
342 goto out_err; 307 if (result > 0)
308 nfs_direct_release_pages(data->pagevec, result);
309 nfs_readdata_release(data);
310 break;
311 }
343 312
313 get_dreq(dreq);
314
315 data->req = (struct nfs_page *) dreq;
344 data->inode = inode; 316 data->inode = inode;
345 data->cred = ctx->cred; 317 data->cred = ctx->cred;
346 data->args.fh = NFS_FH(inode); 318 data->args.fh = NFS_FH(inode);
@@ -378,21 +350,9 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
378 350
379 count -= bytes; 351 count -= bytes;
380 } while (count != 0); 352 } while (count != 0);
381 BUG_ON(!list_empty(list));
382 return 0;
383 353
384out_err: 354 if (put_dreq(dreq))
385 if (result > 0) 355 nfs_direct_complete(dreq);
386 nfs_direct_release_pages(data->pagevec, result);
387
388 list_add(&data->pages, list);
389 while (!list_empty(list)) {
390 data = list_entry(list->next, struct nfs_read_data, pages);
391 list_del(&data->pages);
392 nfs_readdata_free(data);
393 if (put_dreq(dreq))
394 nfs_direct_complete(dreq);
395 }
396 356
397 if (started) 357 if (started)
398 return 0; 358 return 0;
@@ -401,13 +361,13 @@ out_err:
401 361
402static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) 362static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
403{ 363{
404 ssize_t result; 364 ssize_t result = 0;
405 sigset_t oldset; 365 sigset_t oldset;
406 struct inode *inode = iocb->ki_filp->f_mapping->host; 366 struct inode *inode = iocb->ki_filp->f_mapping->host;
407 struct rpc_clnt *clnt = NFS_CLIENT(inode); 367 struct rpc_clnt *clnt = NFS_CLIENT(inode);
408 struct nfs_direct_req *dreq; 368 struct nfs_direct_req *dreq;
409 369
410 dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); 370 dreq = nfs_direct_req_alloc();
411 if (!dreq) 371 if (!dreq)
412 return -ENOMEM; 372 return -ENOMEM;
413 373
@@ -428,9 +388,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
428 388
429static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) 389static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
430{ 390{
431 list_splice_init(&dreq->rewrite_list, &dreq->list); 391 while (!list_empty(&dreq->rewrite_list)) {
432 while (!list_empty(&dreq->list)) { 392 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
433 struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
434 list_del(&data->pages); 393 list_del(&data->pages);
435 nfs_direct_release_pages(data->pagevec, data->npages); 394 nfs_direct_release_pages(data->pagevec, data->npages);
436 nfs_writedata_release(data); 395 nfs_writedata_release(data);
@@ -584,47 +543,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
584} 543}
585#endif 544#endif
586 545
587static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
588{
589 struct list_head *list;
590 struct nfs_direct_req *dreq;
591 unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
592
593 dreq = nfs_direct_req_alloc();
594 if (!dreq)
595 return NULL;
596
597 list = &dreq->list;
598 for(;;) {
599 struct nfs_write_data *data = nfs_writedata_alloc(wpages);
600
601 if (unlikely(!data)) {
602 while (!list_empty(list)) {
603 data = list_entry(list->next,
604 struct nfs_write_data, pages);
605 list_del(&data->pages);
606 nfs_writedata_free(data);
607 }
608 kref_put(&dreq->kref, nfs_direct_req_release);
609 return NULL;
610 }
611
612 INIT_LIST_HEAD(&data->pages);
613 list_add(&data->pages, list);
614
615 data->req = (struct nfs_page *) dreq;
616 get_dreq(dreq);
617 if (nbytes <= wsize)
618 break;
619 nbytes -= wsize;
620 }
621
622 nfs_alloc_commit_data(dreq);
623
624 kref_get(&dreq->kref);
625 return dreq;
626}
627
628static void nfs_direct_write_result(struct rpc_task *task, void *calldata) 546static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
629{ 547{
630 struct nfs_write_data *data = calldata; 548 struct nfs_write_data *data = calldata;
@@ -677,43 +595,55 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
677}; 595};
678 596
679/* 597/*
680 * For each nfs_write_data struct that was allocated on the list, dispatch 598 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
681 * an NFS WRITE operation. If get_user_pages() fails, we stop sending writes. 599 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
682 * Write length accounting is handled by nfs_direct_write_result(). 600 * bail and stop sending more writes. Write length accounting is
683 * Otherwise, if no requests have been sent, just return an error. 601 * handled automatically by nfs_direct_write_result(). Otherwise, if
602 * no requests have been sent, just return an error.
684 */ 603 */
685static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) 604static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
686{ 605{
687 struct nfs_open_context *ctx = dreq->ctx; 606 struct nfs_open_context *ctx = dreq->ctx;
688 struct inode *inode = ctx->dentry->d_inode; 607 struct inode *inode = ctx->dentry->d_inode;
689 struct list_head *list = &dreq->list;
690 size_t wsize = NFS_SERVER(inode)->wsize; 608 size_t wsize = NFS_SERVER(inode)->wsize;
609 unsigned int wpages = nfs_max_pages(wsize);
691 unsigned int pgbase; 610 unsigned int pgbase;
692 int result; 611 int result;
693 ssize_t started = 0; 612 ssize_t started = 0;
694 struct nfs_write_data *data; 613
614 get_dreq(dreq);
695 615
696 pgbase = user_addr & ~PAGE_MASK; 616 pgbase = user_addr & ~PAGE_MASK;
697 do { 617 do {
618 struct nfs_write_data *data;
698 size_t bytes; 619 size_t bytes;
699 620
621 result = -ENOMEM;
622 data = nfs_writedata_alloc(wpages);
623 if (unlikely(!data))
624 break;
625
700 bytes = wsize; 626 bytes = wsize;
701 if (count < wsize) 627 if (count < wsize)
702 bytes = count; 628 bytes = count;
703 629
704 BUG_ON(list_empty(list));
705 data = list_entry(list->next, struct nfs_write_data, pages);
706
707 data->npages = nfs_direct_count_pages(user_addr, bytes); 630 data->npages = nfs_direct_count_pages(user_addr, bytes);
708 down_read(&current->mm->mmap_sem); 631 down_read(&current->mm->mmap_sem);
709 result = get_user_pages(current, current->mm, user_addr, 632 result = get_user_pages(current, current->mm, user_addr,
710 data->npages, 0, 0, data->pagevec, NULL); 633 data->npages, 0, 0, data->pagevec, NULL);
711 up_read(&current->mm->mmap_sem); 634 up_read(&current->mm->mmap_sem);
712 if (unlikely(result < data->npages)) 635 if (unlikely(result < data->npages)) {
713 goto out_err; 636 if (result > 0)
637 nfs_direct_release_pages(data->pagevec, result);
638 nfs_writedata_release(data);
639 break;
640 }
641
642 get_dreq(dreq);
714 643
715 list_move_tail(&data->pages, &dreq->rewrite_list); 644 list_move_tail(&data->pages, &dreq->rewrite_list);
716 645
646 data->req = (struct nfs_page *) dreq;
717 data->inode = inode; 647 data->inode = inode;
718 data->cred = ctx->cred; 648 data->cred = ctx->cred;
719 data->args.fh = NFS_FH(inode); 649 data->args.fh = NFS_FH(inode);
@@ -752,21 +682,9 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
752 682
753 count -= bytes; 683 count -= bytes;
754 } while (count != 0); 684 } while (count != 0);
755 BUG_ON(!list_empty(list));
756 return 0;
757
758out_err:
759 if (result > 0)
760 nfs_direct_release_pages(data->pagevec, result);
761 685
762 list_add(&data->pages, list); 686 if (put_dreq(dreq))
763 while (!list_empty(list)) { 687 nfs_direct_write_complete(dreq, inode);
764 data = list_entry(list->next, struct nfs_write_data, pages);
765 list_del(&data->pages);
766 nfs_writedata_free(data);
767 if (put_dreq(dreq))
768 nfs_direct_write_complete(dreq, inode);
769 }
770 688
771 if (started) 689 if (started)
772 return 0; 690 return 0;
@@ -775,7 +693,7 @@ out_err:
775 693
776static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) 694static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
777{ 695{
778 ssize_t result; 696 ssize_t result = 0;
779 sigset_t oldset; 697 sigset_t oldset;
780 struct inode *inode = iocb->ki_filp->f_mapping->host; 698 struct inode *inode = iocb->ki_filp->f_mapping->host;
781 struct rpc_clnt *clnt = NFS_CLIENT(inode); 699 struct rpc_clnt *clnt = NFS_CLIENT(inode);
@@ -783,9 +701,11 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
783 size_t wsize = NFS_SERVER(inode)->wsize; 701 size_t wsize = NFS_SERVER(inode)->wsize;
784 int sync = 0; 702 int sync = 0;
785 703
786 dreq = nfs_direct_write_alloc(count, wsize); 704 dreq = nfs_direct_req_alloc();
787 if (!dreq) 705 if (!dreq)
788 return -ENOMEM; 706 return -ENOMEM;
707 nfs_alloc_commit_data(dreq);
708
789 if (dreq->commit_data == NULL || count < wsize) 709 if (dreq->commit_data == NULL || count < wsize)
790 sync = FLUSH_STABLE; 710 sync = FLUSH_STABLE;
791 711