diff options
-rw-r--r-- | fs/nfs/direct.c | 54 |
1 files changed, 31 insertions, 23 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index df86e526702f..bcbc213b4033 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -7,11 +7,11 @@ | |||
7 | * | 7 | * |
8 | * There are important applications whose performance or correctness | 8 | * There are important applications whose performance or correctness |
9 | * depends on uncached access to file data. Database clusters | 9 | * depends on uncached access to file data. Database clusters |
10 | * (multiple copies of the same instance running on separate hosts) | 10 | * (multiple copies of the same instance running on separate hosts) |
11 | * implement their own cache coherency protocol that subsumes file | 11 | * implement their own cache coherency protocol that subsumes file |
12 | * system cache protocols. Applications that process datasets | 12 | * system cache protocols. Applications that process datasets |
13 | * considerably larger than the client's memory do not always benefit | 13 | * considerably larger than the client's memory do not always benefit |
14 | * from a local cache. A streaming video server, for instance, has no | 14 | * from a local cache. A streaming video server, for instance, has no |
15 | * need to cache the contents of a file. | 15 | * need to cache the contents of a file. |
16 | * | 16 | * |
17 | * When an application requests uncached I/O, all read and write requests | 17 | * When an application requests uncached I/O, all read and write requests |
@@ -34,6 +34,7 @@ | |||
34 | * 08 Jun 2003 Port to 2.5 APIs --cel | 34 | * 08 Jun 2003 Port to 2.5 APIs --cel |
35 | * 31 Mar 2004 Handle direct I/O without VFS support --cel | 35 | * 31 Mar 2004 Handle direct I/O without VFS support --cel |
36 | * 15 Sep 2004 Parallel async reads --cel | 36 | * 15 Sep 2004 Parallel async reads --cel |
37 | * 04 May 2005 support O_DIRECT with aio --cel | ||
37 | * | 38 | * |
38 | */ | 39 | */ |
39 | 40 | ||
@@ -67,11 +68,11 @@ static kmem_cache_t *nfs_direct_cachep; | |||
67 | */ | 68 | */ |
68 | struct nfs_direct_req { | 69 | struct nfs_direct_req { |
69 | struct kref kref; /* release manager */ | 70 | struct kref kref; /* release manager */ |
70 | struct list_head list; /* nfs_read_data structs */ | 71 | struct list_head list; /* nfs_read/write_data structs */ |
71 | struct file * filp; /* file descriptor */ | 72 | struct file * filp; /* file descriptor */ |
72 | struct kiocb * iocb; /* controlling i/o request */ | 73 | struct kiocb * iocb; /* controlling i/o request */ |
73 | wait_queue_head_t wait; /* wait for i/o completion */ | 74 | wait_queue_head_t wait; /* wait for i/o completion */ |
74 | struct inode * inode; /* target file of I/O */ | 75 | struct inode * inode; /* target file of i/o */ |
75 | struct page ** pages; /* pages in our buffer */ | 76 | struct page ** pages; /* pages in our buffer */ |
76 | unsigned int npages; /* count of pages */ | 77 | unsigned int npages; /* count of pages */ |
77 | atomic_t complete, /* i/os we're waiting for */ | 78 | atomic_t complete, /* i/os we're waiting for */ |
@@ -110,7 +111,6 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz | |||
110 | size_t array_size; | 111 | size_t array_size; |
111 | 112 | ||
112 | /* set an arbitrary limit to prevent type overflow */ | 113 | /* set an arbitrary limit to prevent type overflow */ |
113 | /* XXX: this can probably be as large as INT_MAX */ | ||
114 | if (size > MAX_DIRECTIO_SIZE) { | 114 | if (size > MAX_DIRECTIO_SIZE) { |
115 | *pages = NULL; | 115 | *pages = NULL; |
116 | return -EFBIG; | 116 | return -EFBIG; |
@@ -294,7 +294,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = { | |||
294 | * For each nfs_read_data struct that was allocated on the list, dispatch | 294 | * For each nfs_read_data struct that was allocated on the list, dispatch |
295 | * an NFS READ operation | 295 | * an NFS READ operation |
296 | */ | 296 | */ |
297 | static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset) | 297 | static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) |
298 | { | 298 | { |
299 | struct file *file = dreq->filp; | 299 | struct file *file = dreq->filp; |
300 | struct inode *inode = file->f_mapping->host; | 300 | struct inode *inode = file->f_mapping->host; |
@@ -322,7 +322,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long | |||
322 | data->cred = ctx->cred; | 322 | data->cred = ctx->cred; |
323 | data->args.fh = NFS_FH(inode); | 323 | data->args.fh = NFS_FH(inode); |
324 | data->args.context = ctx; | 324 | data->args.context = ctx; |
325 | data->args.offset = file_offset; | 325 | data->args.offset = pos; |
326 | data->args.pgbase = pgbase; | 326 | data->args.pgbase = pgbase; |
327 | data->args.pages = &pages[curpage]; | 327 | data->args.pages = &pages[curpage]; |
328 | data->args.count = bytes; | 328 | data->args.count = bytes; |
@@ -347,7 +347,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long | |||
347 | bytes, | 347 | bytes, |
348 | (unsigned long long)data->args.offset); | 348 | (unsigned long long)data->args.offset); |
349 | 349 | ||
350 | file_offset += bytes; | 350 | pos += bytes; |
351 | pgbase += bytes; | 351 | pgbase += bytes; |
352 | curpage += pgbase >> PAGE_SHIFT; | 352 | curpage += pgbase >> PAGE_SHIFT; |
353 | pgbase &= ~PAGE_MASK; | 353 | pgbase &= ~PAGE_MASK; |
@@ -356,7 +356,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long | |||
356 | } while (count != 0); | 356 | } while (count != 0); |
357 | } | 357 | } |
358 | 358 | ||
359 | static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages) | 359 | static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) |
360 | { | 360 | { |
361 | ssize_t result; | 361 | ssize_t result; |
362 | sigset_t oldset; | 362 | sigset_t oldset; |
@@ -377,7 +377,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size | |||
377 | 377 | ||
378 | nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); | 378 | nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); |
379 | rpc_clnt_sigmask(clnt, &oldset); | 379 | rpc_clnt_sigmask(clnt, &oldset); |
380 | nfs_direct_read_schedule(dreq, user_addr, count, file_offset); | 380 | nfs_direct_read_schedule(dreq, user_addr, count, pos); |
381 | result = nfs_direct_wait(dreq); | 381 | result = nfs_direct_wait(dreq); |
382 | rpc_clnt_sigunmask(clnt, &oldset); | 382 | rpc_clnt_sigunmask(clnt, &oldset); |
383 | 383 | ||
@@ -459,7 +459,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = { | |||
459 | * XXX: For now, support only FILE_SYNC writes. Later we may add | 459 | * XXX: For now, support only FILE_SYNC writes. Later we may add |
460 | * support for UNSTABLE + COMMIT. | 460 | * support for UNSTABLE + COMMIT. |
461 | */ | 461 | */ |
462 | static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset) | 462 | static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) |
463 | { | 463 | { |
464 | struct file *file = dreq->filp; | 464 | struct file *file = dreq->filp; |
465 | struct inode *inode = file->f_mapping->host; | 465 | struct inode *inode = file->f_mapping->host; |
@@ -487,7 +487,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long | |||
487 | data->cred = ctx->cred; | 487 | data->cred = ctx->cred; |
488 | data->args.fh = NFS_FH(inode); | 488 | data->args.fh = NFS_FH(inode); |
489 | data->args.context = ctx; | 489 | data->args.context = ctx; |
490 | data->args.offset = file_offset; | 490 | data->args.offset = pos; |
491 | data->args.pgbase = pgbase; | 491 | data->args.pgbase = pgbase; |
492 | data->args.pages = &pages[curpage]; | 492 | data->args.pages = &pages[curpage]; |
493 | data->args.count = bytes; | 493 | data->args.count = bytes; |
@@ -513,7 +513,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long | |||
513 | bytes, | 513 | bytes, |
514 | (unsigned long long)data->args.offset); | 514 | (unsigned long long)data->args.offset); |
515 | 515 | ||
516 | file_offset += bytes; | 516 | pos += bytes; |
517 | pgbase += bytes; | 517 | pgbase += bytes; |
518 | curpage += pgbase >> PAGE_SHIFT; | 518 | curpage += pgbase >> PAGE_SHIFT; |
519 | pgbase &= ~PAGE_MASK; | 519 | pgbase &= ~PAGE_MASK; |
@@ -522,7 +522,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long | |||
522 | } while (count != 0); | 522 | } while (count != 0); |
523 | } | 523 | } |
524 | 524 | ||
525 | static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) | 525 | static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) |
526 | { | 526 | { |
527 | ssize_t result; | 527 | ssize_t result; |
528 | sigset_t oldset; | 528 | sigset_t oldset; |
@@ -546,7 +546,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
546 | nfs_begin_data_update(inode); | 546 | nfs_begin_data_update(inode); |
547 | 547 | ||
548 | rpc_clnt_sigmask(clnt, &oldset); | 548 | rpc_clnt_sigmask(clnt, &oldset); |
549 | nfs_direct_write_schedule(dreq, user_addr, count, file_offset); | 549 | nfs_direct_write_schedule(dreq, user_addr, count, pos); |
550 | result = nfs_direct_wait(dreq); | 550 | result = nfs_direct_wait(dreq); |
551 | rpc_clnt_sigunmask(clnt, &oldset); | 551 | rpc_clnt_sigunmask(clnt, &oldset); |
552 | 552 | ||
@@ -557,18 +557,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
557 | * nfs_file_direct_read - file direct read operation for NFS files | 557 | * nfs_file_direct_read - file direct read operation for NFS files |
558 | * @iocb: target I/O control block | 558 | * @iocb: target I/O control block |
559 | * @buf: user's buffer into which to read data | 559 | * @buf: user's buffer into which to read data |
560 | * count: number of bytes to read | 560 | * @count: number of bytes to read |
561 | * pos: byte offset in file where reading starts | 561 | * @pos: byte offset in file where reading starts |
562 | * | 562 | * |
563 | * We use this function for direct reads instead of calling | 563 | * We use this function for direct reads instead of calling |
564 | * generic_file_aio_read() in order to avoid gfar's check to see if | 564 | * generic_file_aio_read() in order to avoid gfar's check to see if |
565 | * the request starts before the end of the file. For that check | 565 | * the request starts before the end of the file. For that check |
566 | * to work, we must generate a GETATTR before each direct read, and | 566 | * to work, we must generate a GETATTR before each direct read, and |
567 | * even then there is a window between the GETATTR and the subsequent | 567 | * even then there is a window between the GETATTR and the subsequent |
568 | * READ where the file size could change. So our preference is simply | 568 | * READ where the file size could change. Our preference is simply |
569 | * to do all reads the application wants, and the server will take | 569 | * to do all reads the application wants, and the server will take |
570 | * care of managing the end of file boundary. | 570 | * care of managing the end of file boundary. |
571 | * | 571 | * |
572 | * This function also eliminates unnecessarily updating the file's | 572 | * This function also eliminates unnecessarily updating the file's |
573 | * atime locally, as the NFS server sets the file's atime, and this | 573 | * atime locally, as the NFS server sets the file's atime, and this |
574 | * client must read the updated atime from the server back into its | 574 | * client must read the updated atime from the server back into its |
@@ -621,8 +621,8 @@ out: | |||
621 | * nfs_file_direct_write - file direct write operation for NFS files | 621 | * nfs_file_direct_write - file direct write operation for NFS files |
622 | * @iocb: target I/O control block | 622 | * @iocb: target I/O control block |
623 | * @buf: user's buffer from which to write data | 623 | * @buf: user's buffer from which to write data |
624 | * count: number of bytes to write | 624 | * @count: number of bytes to write |
625 | * pos: byte offset in file where writing starts | 625 | * @pos: byte offset in file where writing starts |
626 | * | 626 | * |
627 | * We use this function for direct writes instead of calling | 627 | * We use this function for direct writes instead of calling |
628 | * generic_file_aio_write() in order to avoid taking the inode | 628 | * generic_file_aio_write() in order to avoid taking the inode |
@@ -703,6 +703,10 @@ out: | |||
703 | return retval; | 703 | return retval; |
704 | } | 704 | } |
705 | 705 | ||
706 | /** | ||
707 | * nfs_init_directcache - create a slab cache for nfs_direct_req structures | ||
708 | * | ||
709 | */ | ||
706 | int nfs_init_directcache(void) | 710 | int nfs_init_directcache(void) |
707 | { | 711 | { |
708 | nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", | 712 | nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", |
@@ -715,6 +719,10 @@ int nfs_init_directcache(void) | |||
715 | return 0; | 719 | return 0; |
716 | } | 720 | } |
717 | 721 | ||
722 | /** | ||
723 | * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures | ||
724 | * | ||
725 | */ | ||
718 | void nfs_destroy_directcache(void) | 726 | void nfs_destroy_directcache(void) |
719 | { | 727 | { |
720 | if (kmem_cache_destroy(nfs_direct_cachep)) | 728 | if (kmem_cache_destroy(nfs_direct_cachep)) |