diff options
| author | Chuck Lever <cel@netapp.com> | 2005-11-30 18:09:02 -0500 |
|---|---|---|
| committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-01-06 14:58:49 -0500 |
| commit | 40859d7ee64ed6bfad8a4e93f9bb5c1074afadff (patch) | |
| tree | ed4069423c3d6551035d5b6116f50452cdac4103 /include/linux | |
| parent | 325cfed9ae901320e9234b18c21434b783dbe342 (diff) | |
NFS: support large reads and writes on the wire
Most NFS server implementations allow up to 64KB reads and writes on the
wire. The Solaris NFS server allows up to a megabyte, for instance.
Now the Linux NFS client supports transfer sizes up to 1MB, too. This will
help reduce protocol and context switch overhead on read/write intensive NFS
workloads, and support larger atomic read and write operations on servers
that support them.
Test-plan:
Connectathon and iozone on mount point with wsize=rsize>32768 over TCP.
Tests with NFS over UDP to verify the maximum RPC payload size cap.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/nfs_fs.h | 41 | ||||
| -rw-r--r-- | include/linux/nfs_xdr.h | 29 | ||||
| -rw-r--r-- | include/linux/sunrpc/xdr.h | 5 |
3 files changed, 51 insertions, 24 deletions
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4dff705d2ff2..d38010ba6477 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
| @@ -38,9 +38,6 @@ | |||
| 38 | # define NFS_DEBUG | 38 | # define NFS_DEBUG |
| 39 | #endif | 39 | #endif |
| 40 | 40 | ||
| 41 | #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 | ||
| 42 | #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 | ||
| 43 | |||
| 44 | /* Default timeout values */ | 41 | /* Default timeout values */ |
| 45 | #define NFS_MAX_UDP_TIMEOUT (60*HZ) | 42 | #define NFS_MAX_UDP_TIMEOUT (60*HZ) |
| 46 | #define NFS_MAX_TCP_TIMEOUT (600*HZ) | 43 | #define NFS_MAX_TCP_TIMEOUT (600*HZ) |
| @@ -462,18 +459,33 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) | |||
| 462 | */ | 459 | */ |
| 463 | extern mempool_t *nfs_wdata_mempool; | 460 | extern mempool_t *nfs_wdata_mempool; |
| 464 | 461 | ||
| 465 | static inline struct nfs_write_data *nfs_writedata_alloc(void) | 462 | static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) |
| 466 | { | 463 | { |
| 467 | struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); | 464 | struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); |
| 465 | |||
| 468 | if (p) { | 466 | if (p) { |
| 469 | memset(p, 0, sizeof(*p)); | 467 | memset(p, 0, sizeof(*p)); |
| 470 | INIT_LIST_HEAD(&p->pages); | 468 | INIT_LIST_HEAD(&p->pages); |
| 469 | if (pagecount < NFS_PAGEVEC_SIZE) | ||
| 470 | p->pagevec = &p->page_array[0]; | ||
| 471 | else { | ||
| 472 | size_t size = ++pagecount * sizeof(struct page *); | ||
| 473 | p->pagevec = kmalloc(size, GFP_NOFS); | ||
| 474 | if (p->pagevec) { | ||
| 475 | memset(p->pagevec, 0, size); | ||
| 476 | } else { | ||
| 477 | mempool_free(p, nfs_wdata_mempool); | ||
| 478 | p = NULL; | ||
| 479 | } | ||
| 480 | } | ||
| 471 | } | 481 | } |
| 472 | return p; | 482 | return p; |
| 473 | } | 483 | } |
| 474 | 484 | ||
| 475 | static inline void nfs_writedata_free(struct nfs_write_data *p) | 485 | static inline void nfs_writedata_free(struct nfs_write_data *p) |
| 476 | { | 486 | { |
| 487 | if (p && (p->pagevec != &p->page_array[0])) | ||
| 488 | kfree(p->pagevec); | ||
| 477 | mempool_free(p, nfs_wdata_mempool); | 489 | mempool_free(p, nfs_wdata_mempool); |
| 478 | } | 490 | } |
| 479 | 491 | ||
| @@ -492,16 +504,33 @@ extern void nfs_readdata_release(void *data); | |||
| 492 | */ | 504 | */ |
| 493 | extern mempool_t *nfs_rdata_mempool; | 505 | extern mempool_t *nfs_rdata_mempool; |
| 494 | 506 | ||
| 495 | static inline struct nfs_read_data *nfs_readdata_alloc(void) | 507 | static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) |
| 496 | { | 508 | { |
| 497 | struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); | 509 | struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); |
| 498 | if (p) | 510 | |
| 511 | if (p) { | ||
| 499 | memset(p, 0, sizeof(*p)); | 512 | memset(p, 0, sizeof(*p)); |
| 513 | INIT_LIST_HEAD(&p->pages); | ||
| 514 | if (pagecount < NFS_PAGEVEC_SIZE) | ||
| 515 | p->pagevec = &p->page_array[0]; | ||
| 516 | else { | ||
| 517 | size_t size = ++pagecount * sizeof(struct page *); | ||
| 518 | p->pagevec = kmalloc(size, GFP_NOFS); | ||
| 519 | if (p->pagevec) { | ||
| 520 | memset(p->pagevec, 0, size); | ||
| 521 | } else { | ||
| 522 | mempool_free(p, nfs_rdata_mempool); | ||
| 523 | p = NULL; | ||
| 524 | } | ||
| 525 | } | ||
| 526 | } | ||
| 500 | return p; | 527 | return p; |
| 501 | } | 528 | } |
| 502 | 529 | ||
| 503 | static inline void nfs_readdata_free(struct nfs_read_data *p) | 530 | static inline void nfs_readdata_free(struct nfs_read_data *p) |
| 504 | { | 531 | { |
| 532 | if (p && (p->pagevec != &p->page_array[0])) | ||
| 533 | kfree(p->pagevec); | ||
| 505 | mempool_free(p, nfs_rdata_mempool); | 534 | mempool_free(p, nfs_rdata_mempool); |
| 506 | } | 535 | } |
| 507 | 536 | ||
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b8b0eed98ec9..9f422fd87673 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
| @@ -4,6 +4,16 @@ | |||
| 4 | #include <linux/sunrpc/xprt.h> | 4 | #include <linux/sunrpc/xprt.h> |
| 5 | #include <linux/nfsacl.h> | 5 | #include <linux/nfsacl.h> |
| 6 | 6 | ||
| 7 | /* | ||
| 8 | * To change the maximum rsize and wsize supported by the NFS client, adjust | ||
| 9 | * NFS_MAX_FILE_IO_SIZE. 64KB is a typical maximum, but some servers can | ||
| 10 | * support a megabyte or more. The default is left at 4096 bytes, which is | ||
| 11 | * reasonable for NFS over UDP. | ||
| 12 | */ | ||
| 13 | #define NFS_MAX_FILE_IO_SIZE (1048576U) | ||
| 14 | #define NFS_DEF_FILE_IO_SIZE (4096U) | ||
| 15 | #define NFS_MIN_FILE_IO_SIZE (1024U) | ||
| 16 | |||
| 7 | struct nfs4_fsid { | 17 | struct nfs4_fsid { |
| 8 | __u64 major; | 18 | __u64 major; |
| 9 | __u64 minor; | 19 | __u64 minor; |
| @@ -215,12 +225,6 @@ struct nfs4_delegreturnargs { | |||
| 215 | /* | 225 | /* |
| 216 | * Arguments to the read call. | 226 | * Arguments to the read call. |
| 217 | */ | 227 | */ |
| 218 | |||
| 219 | #define NFS_READ_MAXIOV (9U) | ||
| 220 | #if (NFS_READ_MAXIOV > (MAX_IOVEC -2)) | ||
| 221 | #error "NFS_READ_MAXIOV is too large" | ||
| 222 | #endif | ||
| 223 | |||
| 224 | struct nfs_readargs { | 228 | struct nfs_readargs { |
| 225 | struct nfs_fh * fh; | 229 | struct nfs_fh * fh; |
| 226 | struct nfs_open_context *context; | 230 | struct nfs_open_context *context; |
| @@ -239,11 +243,6 @@ struct nfs_readres { | |||
| 239 | /* | 243 | /* |
| 240 | * Arguments to the write call. | 244 | * Arguments to the write call. |
| 241 | */ | 245 | */ |
| 242 | #define NFS_WRITE_MAXIOV (9U) | ||
| 243 | #if (NFS_WRITE_MAXIOV > (MAX_IOVEC -2)) | ||
| 244 | #error "NFS_WRITE_MAXIOV is too large" | ||
| 245 | #endif | ||
| 246 | |||
| 247 | struct nfs_writeargs { | 246 | struct nfs_writeargs { |
| 248 | struct nfs_fh * fh; | 247 | struct nfs_fh * fh; |
| 249 | struct nfs_open_context *context; | 248 | struct nfs_open_context *context; |
| @@ -674,6 +673,8 @@ struct nfs4_server_caps_res { | |||
| 674 | 673 | ||
| 675 | struct nfs_page; | 674 | struct nfs_page; |
| 676 | 675 | ||
| 676 | #define NFS_PAGEVEC_SIZE (8U) | ||
| 677 | |||
| 677 | struct nfs_read_data { | 678 | struct nfs_read_data { |
| 678 | int flags; | 679 | int flags; |
| 679 | struct rpc_task task; | 680 | struct rpc_task task; |
| @@ -682,13 +683,14 @@ struct nfs_read_data { | |||
| 682 | struct nfs_fattr fattr; /* fattr storage */ | 683 | struct nfs_fattr fattr; /* fattr storage */ |
| 683 | struct list_head pages; /* Coalesced read requests */ | 684 | struct list_head pages; /* Coalesced read requests */ |
| 684 | struct nfs_page *req; /* multi ops per nfs_page */ | 685 | struct nfs_page *req; /* multi ops per nfs_page */ |
| 685 | struct page *pagevec[NFS_READ_MAXIOV]; | 686 | struct page **pagevec; |
| 686 | struct nfs_readargs args; | 687 | struct nfs_readargs args; |
| 687 | struct nfs_readres res; | 688 | struct nfs_readres res; |
| 688 | #ifdef CONFIG_NFS_V4 | 689 | #ifdef CONFIG_NFS_V4 |
| 689 | unsigned long timestamp; /* For lease renewal */ | 690 | unsigned long timestamp; /* For lease renewal */ |
| 690 | #endif | 691 | #endif |
| 691 | void (*complete) (struct nfs_read_data *, int); | 692 | void (*complete) (struct nfs_read_data *, int); |
| 693 | struct page *page_array[NFS_PAGEVEC_SIZE + 1]; | ||
| 692 | }; | 694 | }; |
| 693 | 695 | ||
| 694 | struct nfs_write_data { | 696 | struct nfs_write_data { |
| @@ -700,13 +702,14 @@ struct nfs_write_data { | |||
| 700 | struct nfs_writeverf verf; | 702 | struct nfs_writeverf verf; |
| 701 | struct list_head pages; /* Coalesced requests we wish to flush */ | 703 | struct list_head pages; /* Coalesced requests we wish to flush */ |
| 702 | struct nfs_page *req; /* multi ops per nfs_page */ | 704 | struct nfs_page *req; /* multi ops per nfs_page */ |
| 703 | struct page *pagevec[NFS_WRITE_MAXIOV]; | 705 | struct page **pagevec; |
| 704 | struct nfs_writeargs args; /* argument struct */ | 706 | struct nfs_writeargs args; /* argument struct */ |
| 705 | struct nfs_writeres res; /* result struct */ | 707 | struct nfs_writeres res; /* result struct */ |
| 706 | #ifdef CONFIG_NFS_V4 | 708 | #ifdef CONFIG_NFS_V4 |
| 707 | unsigned long timestamp; /* For lease renewal */ | 709 | unsigned long timestamp; /* For lease renewal */ |
| 708 | #endif | 710 | #endif |
| 709 | void (*complete) (struct nfs_write_data *, int); | 711 | void (*complete) (struct nfs_write_data *, int); |
| 712 | struct page *page_array[NFS_PAGEVEC_SIZE + 1]; | ||
| 710 | }; | 713 | }; |
| 711 | 714 | ||
| 712 | struct nfs_access_entry; | 715 | struct nfs_access_entry; |
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5da968729cf8..5676794ee34f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h | |||
| @@ -135,11 +135,6 @@ xdr_adjust_iovec(struct kvec *iov, u32 *p) | |||
| 135 | } | 135 | } |
| 136 | 136 | ||
| 137 | /* | 137 | /* |
| 138 | * Maximum number of iov's we use. | ||
| 139 | */ | ||
| 140 | #define MAX_IOVEC (12) | ||
| 141 | |||
| 142 | /* | ||
| 143 | * XDR buffer helper functions | 138 | * XDR buffer helper functions |
| 144 | */ | 139 | */ |
| 145 | extern void xdr_shift_buf(struct xdr_buf *, size_t); | 140 | extern void xdr_shift_buf(struct xdr_buf *, size_t); |
