aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChuck Lever <cel@netapp.com>2005-11-30 18:09:02 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-01-06 14:58:49 -0500
commit40859d7ee64ed6bfad8a4e93f9bb5c1074afadff (patch)
treeed4069423c3d6551035d5b6116f50452cdac4103 /fs
parent325cfed9ae901320e9234b18c21434b783dbe342 (diff)
NFS: support large reads and writes on the wire
Most NFS server implementations allow up to 64KB reads and writes on the wire. The Solaris NFS server allows up to a megabyte, for instance. Now the Linux NFS client supports transfer sizes up to 1MB, too. This will help reduce protocol and context switch overhead on read/write intensive NFS workloads, and support larger atomic read and write operations on servers that support them. Test-plan: Connectathon and iozone on mount point with wsize=rsize>32768 over TCP. Tests with NFS over UDP to verify the maximum RPC payload size cap. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/direct.c5
-rw-r--r--fs/nfs/inode.c25
-rw-r--r--fs/nfs/nfsroot.c4
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/write.c29
5 files changed, 40 insertions, 29 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f69d95aa78b..fd7ac5e841c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -154,6 +154,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
154 struct list_head *list; 154 struct list_head *list;
155 struct nfs_direct_req *dreq; 155 struct nfs_direct_req *dreq;
156 unsigned int reads = 0; 156 unsigned int reads = 0;
157 unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
157 158
158 dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); 159 dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
159 if (!dreq) 160 if (!dreq)
@@ -167,7 +168,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
167 168
168 list = &dreq->list; 169 list = &dreq->list;
169 for(;;) { 170 for(;;) {
170 struct nfs_read_data *data = nfs_readdata_alloc(); 171 struct nfs_read_data *data = nfs_readdata_alloc(rpages);
171 172
172 if (unlikely(!data)) { 173 if (unlikely(!data)) {
173 while (!list_empty(list)) { 174 while (!list_empty(list)) {
@@ -431,7 +432,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode,
431 struct nfs_writeverf first_verf; 432 struct nfs_writeverf first_verf;
432 struct nfs_write_data *wdata; 433 struct nfs_write_data *wdata;
433 434
434 wdata = nfs_writedata_alloc(); 435 wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
435 if (!wdata) 436 if (!wdata)
436 return -ENOMEM; 437 return -ENOMEM;
437 438
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4e6558df54b..acde2c5725b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -221,10 +221,10 @@ nfs_calc_block_size(u64 tsize)
221static inline unsigned long 221static inline unsigned long
222nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) 222nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
223{ 223{
224 if (bsize < 1024) 224 if (bsize < NFS_MIN_FILE_IO_SIZE)
225 bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; 225 bsize = NFS_DEF_FILE_IO_SIZE;
226 else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) 226 else if (bsize >= NFS_MAX_FILE_IO_SIZE)
227 bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; 227 bsize = NFS_MAX_FILE_IO_SIZE;
228 228
229 return nfs_block_bits(bsize, nrbitsp); 229 return nfs_block_bits(bsize, nrbitsp);
230} 230}
@@ -307,20 +307,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
307 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); 307 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
308 if (server->rsize > max_rpc_payload) 308 if (server->rsize > max_rpc_payload)
309 server->rsize = max_rpc_payload; 309 server->rsize = max_rpc_payload;
310 if (server->wsize > max_rpc_payload) 310 if (server->rsize > NFS_MAX_FILE_IO_SIZE)
311 server->wsize = max_rpc_payload; 311 server->rsize = NFS_MAX_FILE_IO_SIZE;
312
313 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 312 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
314 if (server->rpages > NFS_READ_MAXIOV) {
315 server->rpages = NFS_READ_MAXIOV;
316 server->rsize = server->rpages << PAGE_CACHE_SHIFT;
317 }
318 313
314 if (server->wsize > max_rpc_payload)
315 server->wsize = max_rpc_payload;
316 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
317 server->wsize = NFS_MAX_FILE_IO_SIZE;
319 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 318 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
320 if (server->wpages > NFS_WRITE_MAXIOV) {
321 server->wpages = NFS_WRITE_MAXIOV;
322 server->wsize = server->wpages << PAGE_CACHE_SHIFT;
323 }
324 319
325 if (sb->s_blocksize == 0) 320 if (sb->s_blocksize == 0)
326 sb->s_blocksize = nfs_block_bits(server->wsize, 321 sb->s_blocksize = nfs_block_bits(server->wsize,
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 1b272a135a3..985cc53b8dd 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -296,8 +296,8 @@ static int __init root_nfs_name(char *name)
296 nfs_port = -1; 296 nfs_port = -1;
297 nfs_data.version = NFS_MOUNT_VERSION; 297 nfs_data.version = NFS_MOUNT_VERSION;
298 nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ 298 nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */
299 nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; 299 nfs_data.rsize = NFS_DEF_FILE_IO_SIZE;
300 nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; 300 nfs_data.wsize = NFS_DEF_FILE_IO_SIZE;
301 nfs_data.acregmin = 3; 301 nfs_data.acregmin = 3;
302 nfs_data.acregmax = 60; 302 nfs_data.acregmax = 60;
303 nfs_data.acdirmin = 30; 303 nfs_data.acdirmin = 30;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 21486242c3d..05eb43fadf8 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -83,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
83 int result; 83 int result;
84 struct nfs_read_data *rdata; 84 struct nfs_read_data *rdata;
85 85
86 rdata = nfs_readdata_alloc(); 86 rdata = nfs_readdata_alloc(1);
87 if (!rdata) 87 if (!rdata)
88 return -ENOMEM; 88 return -ENOMEM;
89 89
@@ -283,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
283 283
284 nbytes = req->wb_bytes; 284 nbytes = req->wb_bytes;
285 for(;;) { 285 for(;;) {
286 data = nfs_readdata_alloc(); 286 data = nfs_readdata_alloc(1);
287 if (!data) 287 if (!data)
288 goto out_bad; 288 goto out_bad;
289 INIT_LIST_HEAD(&data->pages); 289 INIT_LIST_HEAD(&data->pages);
@@ -339,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
339 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 339 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
340 return nfs_pagein_multi(head, inode); 340 return nfs_pagein_multi(head, inode);
341 341
342 data = nfs_readdata_alloc(); 342 data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
343 if (!data) 343 if (!data)
344 goto out_bad; 344 goto out_bad;
345 345
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80bc4ea1b82..1ce0c200df1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -89,18 +89,33 @@ static mempool_t *nfs_commit_mempool;
89 89
90static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); 90static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
91 91
92static inline struct nfs_write_data *nfs_commit_alloc(void) 92static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
93{ 93{
94 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); 94 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
95
95 if (p) { 96 if (p) {
96 memset(p, 0, sizeof(*p)); 97 memset(p, 0, sizeof(*p));
97 INIT_LIST_HEAD(&p->pages); 98 INIT_LIST_HEAD(&p->pages);
99 if (pagecount < NFS_PAGEVEC_SIZE)
100 p->pagevec = &p->page_array[0];
101 else {
102 size_t size = ++pagecount * sizeof(struct page *);
103 p->pagevec = kmalloc(size, GFP_NOFS);
104 if (p->pagevec) {
105 memset(p->pagevec, 0, size);
106 } else {
107 mempool_free(p, nfs_commit_mempool);
108 p = NULL;
109 }
110 }
98 } 111 }
99 return p; 112 return p;
100} 113}
101 114
102static inline void nfs_commit_free(struct nfs_write_data *p) 115static inline void nfs_commit_free(struct nfs_write_data *p)
103{ 116{
117 if (p && (p->pagevec != &p->page_array[0]))
118 kfree(p->pagevec);
104 mempool_free(p, nfs_commit_mempool); 119 mempool_free(p, nfs_commit_mempool);
105} 120}
106 121
@@ -167,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
167 int result, written = 0; 182 int result, written = 0;
168 struct nfs_write_data *wdata; 183 struct nfs_write_data *wdata;
169 184
170 wdata = nfs_writedata_alloc(); 185 wdata = nfs_writedata_alloc(1);
171 if (!wdata) 186 if (!wdata)
172 return -ENOMEM; 187 return -ENOMEM;
173 188
@@ -909,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
909 924
910 nbytes = req->wb_bytes; 925 nbytes = req->wb_bytes;
911 for (;;) { 926 for (;;) {
912 data = nfs_writedata_alloc(); 927 data = nfs_writedata_alloc(1);
913 if (!data) 928 if (!data)
914 goto out_bad; 929 goto out_bad;
915 list_add(&data->pages, &list); 930 list_add(&data->pages, &list);
@@ -973,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
973 if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) 988 if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
974 return nfs_flush_multi(head, inode, how); 989 return nfs_flush_multi(head, inode, how);
975 990
976 data = nfs_writedata_alloc(); 991 data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
977 if (!data) 992 if (!data)
978 goto out_bad; 993 goto out_bad;
979 994
@@ -1241,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1241 * Commit dirty pages 1256 * Commit dirty pages
1242 */ 1257 */
1243static int 1258static int
1244nfs_commit_list(struct list_head *head, int how) 1259nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1245{ 1260{
1246 struct nfs_write_data *data; 1261 struct nfs_write_data *data;
1247 struct nfs_page *req; 1262 struct nfs_page *req;
1248 1263
1249 data = nfs_commit_alloc(); 1264 data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);
1250 1265
1251 if (!data) 1266 if (!data)
1252 goto out_bad; 1267 goto out_bad;
@@ -1351,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1351 res = nfs_scan_commit(inode, &head, 0, 0); 1366 res = nfs_scan_commit(inode, &head, 0, 0);
1352 spin_unlock(&nfsi->req_lock); 1367 spin_unlock(&nfsi->req_lock);
1353 if (res) { 1368 if (res) {
1354 error = nfs_commit_list(&head, how); 1369 error = nfs_commit_list(inode, &head, how);
1355 if (error < 0) 1370 if (error < 0)
1356 return error; 1371 return error;
1357 } 1372 }