aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2006-09-08 12:48:54 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-08 13:22:51 -0400
commite9f7bee1df223dcf83743b46cb06c08d95497ec0 (patch)
tree67beae4733ef0286645112a52623c81c8f8a19a9 /fs/nfs
parent016eb4a0ed06a3677d67a584da901f0e9a63c666 (diff)
[PATCH] NFS: large non-page-aligned direct I/O clobbers memory
The logic in nfs_direct_read_schedule and nfs_direct_write_schedule can allow data->npages to be one larger than rpages. This causes a page pointer to be written beyond the end of the pagevec in nfs_read_data (or nfs_write_data). Fix this by making nfs_(read|write)_alloc() calculate the size of the pagevec array, and initialise data->npages. Also get rid of the redundant argument to nfs_commit_alloc(). Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/direct.c50
-rw-r--r--fs/nfs/read.c24
-rw-r--r--fs/nfs/write.c37
3 files changed, 42 insertions, 69 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fecd3b095de..76ca1cbc38f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
100 return atomic_dec_and_test(&dreq->io_count); 100 return atomic_dec_and_test(&dreq->io_count);
101} 101}
102 102
103/*
104 * "size" is never larger than rsize or wsize.
105 */
106static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
107{
108 int page_count;
109
110 page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
111 page_count -= user_addr >> PAGE_SHIFT;
112 BUG_ON(page_count < 0);
113
114 return page_count;
115}
116
117static inline unsigned int nfs_max_pages(unsigned int size)
118{
119 return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
120}
121
122/** 103/**
123 * nfs_direct_IO - NFS address space operation for direct I/O 104 * nfs_direct_IO - NFS address space operation for direct I/O
124 * @rw: direction (read or write) 105 * @rw: direction (read or write)
@@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
276 struct nfs_open_context *ctx = dreq->ctx; 257 struct nfs_open_context *ctx = dreq->ctx;
277 struct inode *inode = ctx->dentry->d_inode; 258 struct inode *inode = ctx->dentry->d_inode;
278 size_t rsize = NFS_SERVER(inode)->rsize; 259 size_t rsize = NFS_SERVER(inode)->rsize;
279 unsigned int rpages = nfs_max_pages(rsize);
280 unsigned int pgbase; 260 unsigned int pgbase;
281 int result; 261 int result;
282 ssize_t started = 0; 262 ssize_t started = 0;
283 263
284 get_dreq(dreq); 264 get_dreq(dreq);
285 265
286 pgbase = user_addr & ~PAGE_MASK;
287 do { 266 do {
288 struct nfs_read_data *data; 267 struct nfs_read_data *data;
289 size_t bytes; 268 size_t bytes;
290 269
270 pgbase = user_addr & ~PAGE_MASK;
271 bytes = min(rsize,count);
272
291 result = -ENOMEM; 273 result = -ENOMEM;
292 data = nfs_readdata_alloc(rpages); 274 data = nfs_readdata_alloc(pgbase + bytes);
293 if (unlikely(!data)) 275 if (unlikely(!data))
294 break; 276 break;
295 277
296 bytes = rsize;
297 if (count < rsize)
298 bytes = count;
299
300 data->npages = nfs_direct_count_pages(user_addr, bytes);
301 down_read(&current->mm->mmap_sem); 278 down_read(&current->mm->mmap_sem);
302 result = get_user_pages(current, current->mm, user_addr, 279 result = get_user_pages(current, current->mm, user_addr,
303 data->npages, 1, 0, data->pagevec, NULL); 280 data->npages, 1, 0, data->pagevec, NULL);
@@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
344 started += bytes; 321 started += bytes;
345 user_addr += bytes; 322 user_addr += bytes;
346 pos += bytes; 323 pos += bytes;
324 /* FIXME: Remove this unnecessary math from final patch */
347 pgbase += bytes; 325 pgbase += bytes;
348 pgbase &= ~PAGE_MASK; 326 pgbase &= ~PAGE_MASK;
327 BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
349 328
350 count -= bytes; 329 count -= bytes;
351 } while (count != 0); 330 } while (count != 0);
@@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
524 503
525static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 504static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
526{ 505{
527 dreq->commit_data = nfs_commit_alloc(0); 506 dreq->commit_data = nfs_commit_alloc();
528 if (dreq->commit_data != NULL) 507 if (dreq->commit_data != NULL)
529 dreq->commit_data->req = (struct nfs_page *) dreq; 508 dreq->commit_data->req = (struct nfs_page *) dreq;
530} 509}
@@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
605 struct nfs_open_context *ctx = dreq->ctx; 584 struct nfs_open_context *ctx = dreq->ctx;
606 struct inode *inode = ctx->dentry->d_inode; 585 struct inode *inode = ctx->dentry->d_inode;
607 size_t wsize = NFS_SERVER(inode)->wsize; 586 size_t wsize = NFS_SERVER(inode)->wsize;
608 unsigned int wpages = nfs_max_pages(wsize);
609 unsigned int pgbase; 587 unsigned int pgbase;
610 int result; 588 int result;
611 ssize_t started = 0; 589 ssize_t started = 0;
612 590
613 get_dreq(dreq); 591 get_dreq(dreq);
614 592
615 pgbase = user_addr & ~PAGE_MASK;
616 do { 593 do {
617 struct nfs_write_data *data; 594 struct nfs_write_data *data;
618 size_t bytes; 595 size_t bytes;
619 596
597 pgbase = user_addr & ~PAGE_MASK;
598 bytes = min(wsize,count);
599
620 result = -ENOMEM; 600 result = -ENOMEM;
621 data = nfs_writedata_alloc(wpages); 601 data = nfs_writedata_alloc(pgbase + bytes);
622 if (unlikely(!data)) 602 if (unlikely(!data))
623 break; 603 break;
624 604
625 bytes = wsize;
626 if (count < wsize)
627 bytes = count;
628
629 data->npages = nfs_direct_count_pages(user_addr, bytes);
630 down_read(&current->mm->mmap_sem); 605 down_read(&current->mm->mmap_sem);
631 result = get_user_pages(current, current->mm, user_addr, 606 result = get_user_pages(current, current->mm, user_addr,
632 data->npages, 0, 0, data->pagevec, NULL); 607 data->npages, 0, 0, data->pagevec, NULL);
@@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
676 started += bytes; 651 started += bytes;
677 user_addr += bytes; 652 user_addr += bytes;
678 pos += bytes; 653 pos += bytes;
654
655 /* FIXME: Remove this useless math from the final patch */
679 pgbase += bytes; 656 pgbase += bytes;
680 pgbase &= ~PAGE_MASK; 657 pgbase &= ~PAGE_MASK;
658 BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
681 659
682 count -= bytes; 660 count -= bytes;
683 } while (count != 0); 661 } while (count != 0);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index da9cf11c326..7a9ee00e0c6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool;
43 43
44#define MIN_POOL_READ (32) 44#define MIN_POOL_READ (32)
45 45
46struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 46struct nfs_read_data *nfs_readdata_alloc(size_t len)
47{ 47{
48 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
48 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); 49 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
49 50
50 if (p) { 51 if (p) {
51 memset(p, 0, sizeof(*p)); 52 memset(p, 0, sizeof(*p));
52 INIT_LIST_HEAD(&p->pages); 53 INIT_LIST_HEAD(&p->pages);
54 p->npages = pagecount;
53 if (pagecount <= ARRAY_SIZE(p->page_array)) 55 if (pagecount <= ARRAY_SIZE(p->page_array))
54 p->pagevec = p->page_array; 56 p->pagevec = p->page_array;
55 else { 57 else {
@@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
140 int result; 142 int result;
141 struct nfs_read_data *rdata; 143 struct nfs_read_data *rdata;
142 144
143 rdata = nfs_readdata_alloc(1); 145 rdata = nfs_readdata_alloc(count);
144 if (!rdata) 146 if (!rdata)
145 return -ENOMEM; 147 return -ENOMEM;
146 148
@@ -336,25 +338,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
336 struct nfs_page *req = nfs_list_entry(head->next); 338 struct nfs_page *req = nfs_list_entry(head->next);
337 struct page *page = req->wb_page; 339 struct page *page = req->wb_page;
338 struct nfs_read_data *data; 340 struct nfs_read_data *data;
339 unsigned int rsize = NFS_SERVER(inode)->rsize; 341 size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
340 unsigned int nbytes, offset; 342 unsigned int offset;
341 int requests = 0; 343 int requests = 0;
342 LIST_HEAD(list); 344 LIST_HEAD(list);
343 345
344 nfs_list_remove_request(req); 346 nfs_list_remove_request(req);
345 347
346 nbytes = req->wb_bytes; 348 nbytes = req->wb_bytes;
347 for(;;) { 349 do {
348 data = nfs_readdata_alloc(1); 350 size_t len = min(nbytes,rsize);
351
352 data = nfs_readdata_alloc(len);
349 if (!data) 353 if (!data)
350 goto out_bad; 354 goto out_bad;
351 INIT_LIST_HEAD(&data->pages); 355 INIT_LIST_HEAD(&data->pages);
352 list_add(&data->pages, &list); 356 list_add(&data->pages, &list);
353 requests++; 357 requests++;
354 if (nbytes <= rsize) 358 nbytes -= len;
355 break; 359 } while(nbytes != 0);
356 nbytes -= rsize;
357 }
358 atomic_set(&req->wb_complete, requests); 360 atomic_set(&req->wb_complete, requests);
359 361
360 ClearPageError(page); 362 ClearPageError(page);
@@ -402,7 +404,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
402 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 404 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
403 return nfs_pagein_multi(head, inode); 405 return nfs_pagein_multi(head, inode);
404 406
405 data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); 407 data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
406 if (!data) 408 if (!data)
407 goto out_bad; 409 goto out_bad;
408 410
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 50774991f8d..8ab3cf10d79 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool;
90 90
91static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); 91static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
92 92
93struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) 93struct nfs_write_data *nfs_commit_alloc(void)
94{ 94{
95 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); 95 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
96 96
97 if (p) { 97 if (p) {
98 memset(p, 0, sizeof(*p)); 98 memset(p, 0, sizeof(*p));
99 INIT_LIST_HEAD(&p->pages); 99 INIT_LIST_HEAD(&p->pages);
100 if (pagecount <= ARRAY_SIZE(p->page_array))
101 p->pagevec = p->page_array;
102 else {
103 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
104 if (!p->pagevec) {
105 mempool_free(p, nfs_commit_mempool);
106 p = NULL;
107 }
108 }
109 } 100 }
110 return p; 101 return p;
111} 102}
@@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p)
117 mempool_free(p, nfs_commit_mempool); 108 mempool_free(p, nfs_commit_mempool);
118} 109}
119 110
120struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 111struct nfs_write_data *nfs_writedata_alloc(size_t len)
121{ 112{
113 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
122 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); 114 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
123 115
124 if (p) { 116 if (p) {
125 memset(p, 0, sizeof(*p)); 117 memset(p, 0, sizeof(*p));
126 INIT_LIST_HEAD(&p->pages); 118 INIT_LIST_HEAD(&p->pages);
119 p->npages = pagecount;
127 if (pagecount <= ARRAY_SIZE(p->page_array)) 120 if (pagecount <= ARRAY_SIZE(p->page_array))
128 p->pagevec = p->page_array; 121 p->pagevec = p->page_array;
129 else { 122 else {
@@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
208 int result, written = 0; 201 int result, written = 0;
209 struct nfs_write_data *wdata; 202 struct nfs_write_data *wdata;
210 203
211 wdata = nfs_writedata_alloc(1); 204 wdata = nfs_writedata_alloc(wsize);
212 if (!wdata) 205 if (!wdata)
213 return -ENOMEM; 206 return -ENOMEM;
214 207
@@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
999 struct nfs_page *req = nfs_list_entry(head->next); 992 struct nfs_page *req = nfs_list_entry(head->next);
1000 struct page *page = req->wb_page; 993 struct page *page = req->wb_page;
1001 struct nfs_write_data *data; 994 struct nfs_write_data *data;
1002 unsigned int wsize = NFS_SERVER(inode)->wsize; 995 size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
1003 unsigned int nbytes, offset; 996 unsigned int offset;
1004 int requests = 0; 997 int requests = 0;
1005 LIST_HEAD(list); 998 LIST_HEAD(list);
1006 999
1007 nfs_list_remove_request(req); 1000 nfs_list_remove_request(req);
1008 1001
1009 nbytes = req->wb_bytes; 1002 nbytes = req->wb_bytes;
1010 for (;;) { 1003 do {
1011 data = nfs_writedata_alloc(1); 1004 size_t len = min(nbytes, wsize);
1005
1006 data = nfs_writedata_alloc(len);
1012 if (!data) 1007 if (!data)
1013 goto out_bad; 1008 goto out_bad;
1014 list_add(&data->pages, &list); 1009 list_add(&data->pages, &list);
1015 requests++; 1010 requests++;
1016 if (nbytes <= wsize) 1011 nbytes -= len;
1017 break; 1012 } while (nbytes != 0);
1018 nbytes -= wsize;
1019 }
1020 atomic_set(&req->wb_complete, requests); 1013 atomic_set(&req->wb_complete, requests);
1021 1014
1022 ClearPageError(page); 1015 ClearPageError(page);
@@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
1070 struct nfs_write_data *data; 1063 struct nfs_write_data *data;
1071 unsigned int count; 1064 unsigned int count;
1072 1065
1073 data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); 1066 data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize);
1074 if (!data) 1067 if (!data)
1075 goto out_bad; 1068 goto out_bad;
1076 1069
@@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1378 struct nfs_write_data *data; 1371 struct nfs_write_data *data;
1379 struct nfs_page *req; 1372 struct nfs_page *req;
1380 1373
1381 data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); 1374 data = nfs_commit_alloc();
1382 1375
1383 if (!data) 1376 if (!data)
1384 goto out_bad; 1377 goto out_bad;