diff options
author | Chuck Lever <cel@netapp.com> | 2005-11-30 18:09:02 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-01-06 14:58:49 -0500 |
commit | 40859d7ee64ed6bfad8a4e93f9bb5c1074afadff (patch) | |
tree | ed4069423c3d6551035d5b6116f50452cdac4103 /fs | |
parent | 325cfed9ae901320e9234b18c21434b783dbe342 (diff) |
NFS: support large reads and writes on the wire
Most NFS server implementations allow up to 64KB reads and writes on the
wire. The Solaris NFS server allows up to a megabyte, for instance.
Now the Linux NFS client supports transfer sizes up to 1MB, too. This will
help reduce protocol and context switch overhead on read/write intensive NFS
workloads, and support larger atomic read and write operations on servers
that support them.
Test-plan:
Connectathon and iozone on mount point with wsize=rsize>32768 over TCP.
Tests with NFS over UDP to verify the maximum RPC payload size cap.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/nfs/direct.c | 5 | ||||
-rw-r--r-- | fs/nfs/inode.c | 25 | ||||
-rw-r--r-- | fs/nfs/nfsroot.c | 4 | ||||
-rw-r--r-- | fs/nfs/read.c | 6 | ||||
-rw-r--r-- | fs/nfs/write.c | 29 |
5 files changed, 40 insertions, 29 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f69d95aa78b2..fd7ac5e841c1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -154,6 +154,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int | |||
154 | struct list_head *list; | 154 | struct list_head *list; |
155 | struct nfs_direct_req *dreq; | 155 | struct nfs_direct_req *dreq; |
156 | unsigned int reads = 0; | 156 | unsigned int reads = 0; |
157 | unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
157 | 158 | ||
158 | dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); | 159 | dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); |
159 | if (!dreq) | 160 | if (!dreq) |
@@ -167,7 +168,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int | |||
167 | 168 | ||
168 | list = &dreq->list; | 169 | list = &dreq->list; |
169 | for(;;) { | 170 | for(;;) { |
170 | struct nfs_read_data *data = nfs_readdata_alloc(); | 171 | struct nfs_read_data *data = nfs_readdata_alloc(rpages); |
171 | 172 | ||
172 | if (unlikely(!data)) { | 173 | if (unlikely(!data)) { |
173 | while (!list_empty(list)) { | 174 | while (!list_empty(list)) { |
@@ -431,7 +432,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, | |||
431 | struct nfs_writeverf first_verf; | 432 | struct nfs_writeverf first_verf; |
432 | struct nfs_write_data *wdata; | 433 | struct nfs_write_data *wdata; |
433 | 434 | ||
434 | wdata = nfs_writedata_alloc(); | 435 | wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); |
435 | if (!wdata) | 436 | if (!wdata) |
436 | return -ENOMEM; | 437 | return -ENOMEM; |
437 | 438 | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4e6558df54b8..acde2c5725bf 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -221,10 +221,10 @@ nfs_calc_block_size(u64 tsize) | |||
221 | static inline unsigned long | 221 | static inline unsigned long |
222 | nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) | 222 | nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) |
223 | { | 223 | { |
224 | if (bsize < 1024) | 224 | if (bsize < NFS_MIN_FILE_IO_SIZE) |
225 | bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; | 225 | bsize = NFS_DEF_FILE_IO_SIZE; |
226 | else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) | 226 | else if (bsize >= NFS_MAX_FILE_IO_SIZE) |
227 | bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; | 227 | bsize = NFS_MAX_FILE_IO_SIZE; |
228 | 228 | ||
229 | return nfs_block_bits(bsize, nrbitsp); | 229 | return nfs_block_bits(bsize, nrbitsp); |
230 | } | 230 | } |
@@ -307,20 +307,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) | |||
307 | max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); | 307 | max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); |
308 | if (server->rsize > max_rpc_payload) | 308 | if (server->rsize > max_rpc_payload) |
309 | server->rsize = max_rpc_payload; | 309 | server->rsize = max_rpc_payload; |
310 | if (server->wsize > max_rpc_payload) | 310 | if (server->rsize > NFS_MAX_FILE_IO_SIZE) |
311 | server->wsize = max_rpc_payload; | 311 | server->rsize = NFS_MAX_FILE_IO_SIZE; |
312 | |||
313 | server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 312 | server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
314 | if (server->rpages > NFS_READ_MAXIOV) { | ||
315 | server->rpages = NFS_READ_MAXIOV; | ||
316 | server->rsize = server->rpages << PAGE_CACHE_SHIFT; | ||
317 | } | ||
318 | 313 | ||
314 | if (server->wsize > max_rpc_payload) | ||
315 | server->wsize = max_rpc_payload; | ||
316 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) | ||
317 | server->wsize = NFS_MAX_FILE_IO_SIZE; | ||
319 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 318 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
320 | if (server->wpages > NFS_WRITE_MAXIOV) { | ||
321 | server->wpages = NFS_WRITE_MAXIOV; | ||
322 | server->wsize = server->wpages << PAGE_CACHE_SHIFT; | ||
323 | } | ||
324 | 319 | ||
325 | if (sb->s_blocksize == 0) | 320 | if (sb->s_blocksize == 0) |
326 | sb->s_blocksize = nfs_block_bits(server->wsize, | 321 | sb->s_blocksize = nfs_block_bits(server->wsize, |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 1b272a135a31..985cc53b8dd5 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -296,8 +296,8 @@ static int __init root_nfs_name(char *name) | |||
296 | nfs_port = -1; | 296 | nfs_port = -1; |
297 | nfs_data.version = NFS_MOUNT_VERSION; | 297 | nfs_data.version = NFS_MOUNT_VERSION; |
298 | nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ | 298 | nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ |
299 | nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; | 299 | nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; |
300 | nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; | 300 | nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; |
301 | nfs_data.acregmin = 3; | 301 | nfs_data.acregmin = 3; |
302 | nfs_data.acregmax = 60; | 302 | nfs_data.acregmax = 60; |
303 | nfs_data.acdirmin = 30; | 303 | nfs_data.acdirmin = 30; |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 21486242c3d3..05eb43fadf8e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -83,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, | |||
83 | int result; | 83 | int result; |
84 | struct nfs_read_data *rdata; | 84 | struct nfs_read_data *rdata; |
85 | 85 | ||
86 | rdata = nfs_readdata_alloc(); | 86 | rdata = nfs_readdata_alloc(1); |
87 | if (!rdata) | 87 | if (!rdata) |
88 | return -ENOMEM; | 88 | return -ENOMEM; |
89 | 89 | ||
@@ -283,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) | |||
283 | 283 | ||
284 | nbytes = req->wb_bytes; | 284 | nbytes = req->wb_bytes; |
285 | for(;;) { | 285 | for(;;) { |
286 | data = nfs_readdata_alloc(); | 286 | data = nfs_readdata_alloc(1); |
287 | if (!data) | 287 | if (!data) |
288 | goto out_bad; | 288 | goto out_bad; |
289 | INIT_LIST_HEAD(&data->pages); | 289 | INIT_LIST_HEAD(&data->pages); |
@@ -339,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) | |||
339 | if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) | 339 | if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) |
340 | return nfs_pagein_multi(head, inode); | 340 | return nfs_pagein_multi(head, inode); |
341 | 341 | ||
342 | data = nfs_readdata_alloc(); | 342 | data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); |
343 | if (!data) | 343 | if (!data) |
344 | goto out_bad; | 344 | goto out_bad; |
345 | 345 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 80bc4ea1b824..1ce0c200df16 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -89,18 +89,33 @@ static mempool_t *nfs_commit_mempool; | |||
89 | 89 | ||
90 | static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); | 90 | static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); |
91 | 91 | ||
92 | static inline struct nfs_write_data *nfs_commit_alloc(void) | 92 | static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) |
93 | { | 93 | { |
94 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); | 94 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); |
95 | |||
95 | if (p) { | 96 | if (p) { |
96 | memset(p, 0, sizeof(*p)); | 97 | memset(p, 0, sizeof(*p)); |
97 | INIT_LIST_HEAD(&p->pages); | 98 | INIT_LIST_HEAD(&p->pages); |
99 | if (pagecount < NFS_PAGEVEC_SIZE) | ||
100 | p->pagevec = &p->page_array[0]; | ||
101 | else { | ||
102 | size_t size = ++pagecount * sizeof(struct page *); | ||
103 | p->pagevec = kmalloc(size, GFP_NOFS); | ||
104 | if (p->pagevec) { | ||
105 | memset(p->pagevec, 0, size); | ||
106 | } else { | ||
107 | mempool_free(p, nfs_commit_mempool); | ||
108 | p = NULL; | ||
109 | } | ||
110 | } | ||
98 | } | 111 | } |
99 | return p; | 112 | return p; |
100 | } | 113 | } |
101 | 114 | ||
102 | static inline void nfs_commit_free(struct nfs_write_data *p) | 115 | static inline void nfs_commit_free(struct nfs_write_data *p) |
103 | { | 116 | { |
117 | if (p && (p->pagevec != &p->page_array[0])) | ||
118 | kfree(p->pagevec); | ||
104 | mempool_free(p, nfs_commit_mempool); | 119 | mempool_free(p, nfs_commit_mempool); |
105 | } | 120 | } |
106 | 121 | ||
@@ -167,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, | |||
167 | int result, written = 0; | 182 | int result, written = 0; |
168 | struct nfs_write_data *wdata; | 183 | struct nfs_write_data *wdata; |
169 | 184 | ||
170 | wdata = nfs_writedata_alloc(); | 185 | wdata = nfs_writedata_alloc(1); |
171 | if (!wdata) | 186 | if (!wdata) |
172 | return -ENOMEM; | 187 | return -ENOMEM; |
173 | 188 | ||
@@ -909,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) | |||
909 | 924 | ||
910 | nbytes = req->wb_bytes; | 925 | nbytes = req->wb_bytes; |
911 | for (;;) { | 926 | for (;;) { |
912 | data = nfs_writedata_alloc(); | 927 | data = nfs_writedata_alloc(1); |
913 | if (!data) | 928 | if (!data) |
914 | goto out_bad; | 929 | goto out_bad; |
915 | list_add(&data->pages, &list); | 930 | list_add(&data->pages, &list); |
@@ -973,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) | |||
973 | if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) | 988 | if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) |
974 | return nfs_flush_multi(head, inode, how); | 989 | return nfs_flush_multi(head, inode, how); |
975 | 990 | ||
976 | data = nfs_writedata_alloc(); | 991 | data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); |
977 | if (!data) | 992 | if (!data) |
978 | goto out_bad; | 993 | goto out_bad; |
979 | 994 | ||
@@ -1241,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head, | |||
1241 | * Commit dirty pages | 1256 | * Commit dirty pages |
1242 | */ | 1257 | */ |
1243 | static int | 1258 | static int |
1244 | nfs_commit_list(struct list_head *head, int how) | 1259 | nfs_commit_list(struct inode *inode, struct list_head *head, int how) |
1245 | { | 1260 | { |
1246 | struct nfs_write_data *data; | 1261 | struct nfs_write_data *data; |
1247 | struct nfs_page *req; | 1262 | struct nfs_page *req; |
1248 | 1263 | ||
1249 | data = nfs_commit_alloc(); | 1264 | data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); |
1250 | 1265 | ||
1251 | if (!data) | 1266 | if (!data) |
1252 | goto out_bad; | 1267 | goto out_bad; |
@@ -1351,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how) | |||
1351 | res = nfs_scan_commit(inode, &head, 0, 0); | 1366 | res = nfs_scan_commit(inode, &head, 0, 0); |
1352 | spin_unlock(&nfsi->req_lock); | 1367 | spin_unlock(&nfsi->req_lock); |
1353 | if (res) { | 1368 | if (res) { |
1354 | error = nfs_commit_list(&head, how); | 1369 | error = nfs_commit_list(inode, &head, how); |
1355 | if (error < 0) | 1370 | if (error < 0) |
1356 | return error; | 1371 | return error; |
1357 | } | 1372 | } |