diff options
author | Chuck Lever <cel@netapp.com> | 2005-11-30 18:09:02 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-01-06 14:58:49 -0500 |
commit | 40859d7ee64ed6bfad8a4e93f9bb5c1074afadff (patch) | |
tree | ed4069423c3d6551035d5b6116f50452cdac4103 /include | |
parent | 325cfed9ae901320e9234b18c21434b783dbe342 (diff) |
NFS: support large reads and writes on the wire
Most NFS server implementations allow up to 64KB reads and writes on the
wire. The Solaris NFS server allows up to a megabyte, for instance.
Now the Linux NFS client supports transfer sizes up to 1MB, too. This will
help reduce protocol and context switch overhead on read/write intensive NFS
workloads, and support larger atomic read and write operations on servers
that support them.
Test-plan:
Connectathon and iozone on mount point with wsize=rsize>32768 over TCP.
Tests with NFS over UDP to verify the maximum RPC payload size cap.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/nfs_fs.h | 41 | ||||
-rw-r--r-- | include/linux/nfs_xdr.h | 29 | ||||
-rw-r--r-- | include/linux/sunrpc/xdr.h | 5 |
3 files changed, 51 insertions, 24 deletions
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4dff705d2ff2..d38010ba6477 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -38,9 +38,6 @@ | |||
38 | # define NFS_DEBUG | 38 | # define NFS_DEBUG |
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 | ||
42 | #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 | ||
43 | |||
44 | /* Default timeout values */ | 41 | /* Default timeout values */ |
45 | #define NFS_MAX_UDP_TIMEOUT (60*HZ) | 42 | #define NFS_MAX_UDP_TIMEOUT (60*HZ) |
46 | #define NFS_MAX_TCP_TIMEOUT (600*HZ) | 43 | #define NFS_MAX_TCP_TIMEOUT (600*HZ) |
@@ -462,18 +459,33 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) | |||
462 | */ | 459 | */ |
463 | extern mempool_t *nfs_wdata_mempool; | 460 | extern mempool_t *nfs_wdata_mempool; |
464 | 461 | ||
465 | static inline struct nfs_write_data *nfs_writedata_alloc(void) | 462 | static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) |
466 | { | 463 | { |
467 | struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); | 464 | struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); |
465 | |||
468 | if (p) { | 466 | if (p) { |
469 | memset(p, 0, sizeof(*p)); | 467 | memset(p, 0, sizeof(*p)); |
470 | INIT_LIST_HEAD(&p->pages); | 468 | INIT_LIST_HEAD(&p->pages); |
469 | if (pagecount < NFS_PAGEVEC_SIZE) | ||
470 | p->pagevec = &p->page_array[0]; | ||
471 | else { | ||
472 | size_t size = ++pagecount * sizeof(struct page *); | ||
473 | p->pagevec = kmalloc(size, GFP_NOFS); | ||
474 | if (p->pagevec) { | ||
475 | memset(p->pagevec, 0, size); | ||
476 | } else { | ||
477 | mempool_free(p, nfs_wdata_mempool); | ||
478 | p = NULL; | ||
479 | } | ||
480 | } | ||
471 | } | 481 | } |
472 | return p; | 482 | return p; |
473 | } | 483 | } |
474 | 484 | ||
475 | static inline void nfs_writedata_free(struct nfs_write_data *p) | 485 | static inline void nfs_writedata_free(struct nfs_write_data *p) |
476 | { | 486 | { |
487 | if (p && (p->pagevec != &p->page_array[0])) | ||
488 | kfree(p->pagevec); | ||
477 | mempool_free(p, nfs_wdata_mempool); | 489 | mempool_free(p, nfs_wdata_mempool); |
478 | } | 490 | } |
479 | 491 | ||
@@ -492,16 +504,33 @@ extern void nfs_readdata_release(void *data); | |||
492 | */ | 504 | */ |
493 | extern mempool_t *nfs_rdata_mempool; | 505 | extern mempool_t *nfs_rdata_mempool; |
494 | 506 | ||
495 | static inline struct nfs_read_data *nfs_readdata_alloc(void) | 507 | static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) |
496 | { | 508 | { |
497 | struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); | 509 | struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); |
498 | if (p) | 510 | |
511 | if (p) { | ||
499 | memset(p, 0, sizeof(*p)); | 512 | memset(p, 0, sizeof(*p)); |
513 | INIT_LIST_HEAD(&p->pages); | ||
514 | if (pagecount < NFS_PAGEVEC_SIZE) | ||
515 | p->pagevec = &p->page_array[0]; | ||
516 | else { | ||
517 | size_t size = ++pagecount * sizeof(struct page *); | ||
518 | p->pagevec = kmalloc(size, GFP_NOFS); | ||
519 | if (p->pagevec) { | ||
520 | memset(p->pagevec, 0, size); | ||
521 | } else { | ||
522 | mempool_free(p, nfs_rdata_mempool); | ||
523 | p = NULL; | ||
524 | } | ||
525 | } | ||
526 | } | ||
500 | return p; | 527 | return p; |
501 | } | 528 | } |
502 | 529 | ||
503 | static inline void nfs_readdata_free(struct nfs_read_data *p) | 530 | static inline void nfs_readdata_free(struct nfs_read_data *p) |
504 | { | 531 | { |
532 | if (p && (p->pagevec != &p->page_array[0])) | ||
533 | kfree(p->pagevec); | ||
505 | mempool_free(p, nfs_rdata_mempool); | 534 | mempool_free(p, nfs_rdata_mempool); |
506 | } | 535 | } |
507 | 536 | ||
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b8b0eed98ec9..9f422fd87673 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -4,6 +4,16 @@ | |||
4 | #include <linux/sunrpc/xprt.h> | 4 | #include <linux/sunrpc/xprt.h> |
5 | #include <linux/nfsacl.h> | 5 | #include <linux/nfsacl.h> |
6 | 6 | ||
7 | /* | ||
8 | * To change the maximum rsize and wsize supported by the NFS client, adjust | ||
9 | * NFS_MAX_FILE_IO_SIZE. 64KB is a typical maximum, but some servers can | ||
10 | * support a megabyte or more. The default is left at 4096 bytes, which is | ||
11 | * reasonable for NFS over UDP. | ||
12 | */ | ||
13 | #define NFS_MAX_FILE_IO_SIZE (1048576U) | ||
14 | #define NFS_DEF_FILE_IO_SIZE (4096U) | ||
15 | #define NFS_MIN_FILE_IO_SIZE (1024U) | ||
16 | |||
7 | struct nfs4_fsid { | 17 | struct nfs4_fsid { |
8 | __u64 major; | 18 | __u64 major; |
9 | __u64 minor; | 19 | __u64 minor; |
@@ -215,12 +225,6 @@ struct nfs4_delegreturnargs { | |||
215 | /* | 225 | /* |
216 | * Arguments to the read call. | 226 | * Arguments to the read call. |
217 | */ | 227 | */ |
218 | |||
219 | #define NFS_READ_MAXIOV (9U) | ||
220 | #if (NFS_READ_MAXIOV > (MAX_IOVEC -2)) | ||
221 | #error "NFS_READ_MAXIOV is too large" | ||
222 | #endif | ||
223 | |||
224 | struct nfs_readargs { | 228 | struct nfs_readargs { |
225 | struct nfs_fh * fh; | 229 | struct nfs_fh * fh; |
226 | struct nfs_open_context *context; | 230 | struct nfs_open_context *context; |
@@ -239,11 +243,6 @@ struct nfs_readres { | |||
239 | /* | 243 | /* |
240 | * Arguments to the write call. | 244 | * Arguments to the write call. |
241 | */ | 245 | */ |
242 | #define NFS_WRITE_MAXIOV (9U) | ||
243 | #if (NFS_WRITE_MAXIOV > (MAX_IOVEC -2)) | ||
244 | #error "NFS_WRITE_MAXIOV is too large" | ||
245 | #endif | ||
246 | |||
247 | struct nfs_writeargs { | 246 | struct nfs_writeargs { |
248 | struct nfs_fh * fh; | 247 | struct nfs_fh * fh; |
249 | struct nfs_open_context *context; | 248 | struct nfs_open_context *context; |
@@ -674,6 +673,8 @@ struct nfs4_server_caps_res { | |||
674 | 673 | ||
675 | struct nfs_page; | 674 | struct nfs_page; |
676 | 675 | ||
676 | #define NFS_PAGEVEC_SIZE (8U) | ||
677 | |||
677 | struct nfs_read_data { | 678 | struct nfs_read_data { |
678 | int flags; | 679 | int flags; |
679 | struct rpc_task task; | 680 | struct rpc_task task; |
@@ -682,13 +683,14 @@ struct nfs_read_data { | |||
682 | struct nfs_fattr fattr; /* fattr storage */ | 683 | struct nfs_fattr fattr; /* fattr storage */ |
683 | struct list_head pages; /* Coalesced read requests */ | 684 | struct list_head pages; /* Coalesced read requests */ |
684 | struct nfs_page *req; /* multi ops per nfs_page */ | 685 | struct nfs_page *req; /* multi ops per nfs_page */ |
685 | struct page *pagevec[NFS_READ_MAXIOV]; | 686 | struct page **pagevec; |
686 | struct nfs_readargs args; | 687 | struct nfs_readargs args; |
687 | struct nfs_readres res; | 688 | struct nfs_readres res; |
688 | #ifdef CONFIG_NFS_V4 | 689 | #ifdef CONFIG_NFS_V4 |
689 | unsigned long timestamp; /* For lease renewal */ | 690 | unsigned long timestamp; /* For lease renewal */ |
690 | #endif | 691 | #endif |
691 | void (*complete) (struct nfs_read_data *, int); | 692 | void (*complete) (struct nfs_read_data *, int); |
693 | struct page *page_array[NFS_PAGEVEC_SIZE + 1]; | ||
692 | }; | 694 | }; |
693 | 695 | ||
694 | struct nfs_write_data { | 696 | struct nfs_write_data { |
@@ -700,13 +702,14 @@ struct nfs_write_data { | |||
700 | struct nfs_writeverf verf; | 702 | struct nfs_writeverf verf; |
701 | struct list_head pages; /* Coalesced requests we wish to flush */ | 703 | struct list_head pages; /* Coalesced requests we wish to flush */ |
702 | struct nfs_page *req; /* multi ops per nfs_page */ | 704 | struct nfs_page *req; /* multi ops per nfs_page */ |
703 | struct page *pagevec[NFS_WRITE_MAXIOV]; | 705 | struct page **pagevec; |
704 | struct nfs_writeargs args; /* argument struct */ | 706 | struct nfs_writeargs args; /* argument struct */ |
705 | struct nfs_writeres res; /* result struct */ | 707 | struct nfs_writeres res; /* result struct */ |
706 | #ifdef CONFIG_NFS_V4 | 708 | #ifdef CONFIG_NFS_V4 |
707 | unsigned long timestamp; /* For lease renewal */ | 709 | unsigned long timestamp; /* For lease renewal */ |
708 | #endif | 710 | #endif |
709 | void (*complete) (struct nfs_write_data *, int); | 711 | void (*complete) (struct nfs_write_data *, int); |
712 | struct page *page_array[NFS_PAGEVEC_SIZE + 1]; | ||
710 | }; | 713 | }; |
711 | 714 | ||
712 | struct nfs_access_entry; | 715 | struct nfs_access_entry; |
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5da968729cf8..5676794ee34f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h | |||
@@ -135,11 +135,6 @@ xdr_adjust_iovec(struct kvec *iov, u32 *p) | |||
135 | } | 135 | } |
136 | 136 | ||
137 | /* | 137 | /* |
138 | * Maximum number of iov's we use. | ||
139 | */ | ||
140 | #define MAX_IOVEC (12) | ||
141 | |||
142 | /* | ||
143 | * XDR buffer helper functions | 138 | * XDR buffer helper functions |
144 | */ | 139 | */ |
145 | extern void xdr_shift_buf(struct xdr_buf *, size_t); | 140 | extern void xdr_shift_buf(struct xdr_buf *, size_t); |