aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2008-05-19 02:34:42 -0400
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-05-23 04:12:49 -0400
commit6ab455eeaff6893cd06da33843e840d888cdc04a (patch)
treee7744d1580647ca3b08e829bcf976f2f60c49986
parentc8f5f12e46f079a954d4f7163ba59dadee08ca26 (diff)
[XFS] Fix memory corruption with small buffer reads
When we have multiple buffers in a single page for a blocksize == pagesize filesystem we might overwrite the page contents if two callers hit it shortly after each other. To prevent that we need to keep the page locked until I/O is completed and the page marked uptodate. Thanks to Eric Sandeen for triaging this bug and finding a reproducible testcase and Dave Chinner for additional advice. This should fix kernel.org bz #10421. Tested-by: Eric Sandeen <sandeen@sandeen.net> SGI-PV: 981813 SGI-Modid: xfs-linux-melb:xfs-kern:31173a Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h19
2 files changed, 39 insertions, 4 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5105015a75ad..98e0e86093b4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -387,6 +387,8 @@ _xfs_buf_lookup_pages(
387 if (unlikely(page == NULL)) { 387 if (unlikely(page == NULL)) {
388 if (flags & XBF_READ_AHEAD) { 388 if (flags & XBF_READ_AHEAD) {
389 bp->b_page_count = i; 389 bp->b_page_count = i;
390 for (i = 0; i < bp->b_page_count; i++)
391 unlock_page(bp->b_pages[i]);
390 return -ENOMEM; 392 return -ENOMEM;
391 } 393 }
392 394
@@ -416,17 +418,24 @@ _xfs_buf_lookup_pages(
416 ASSERT(!PagePrivate(page)); 418 ASSERT(!PagePrivate(page));
417 if (!PageUptodate(page)) { 419 if (!PageUptodate(page)) {
418 page_count--; 420 page_count--;
419 if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { 421 if (blocksize >= PAGE_CACHE_SIZE) {
422 if (flags & XBF_READ)
423 bp->b_flags |= _XBF_PAGE_LOCKED;
424 } else if (!PagePrivate(page)) {
420 if (test_page_region(page, offset, nbytes)) 425 if (test_page_region(page, offset, nbytes))
421 page_count++; 426 page_count++;
422 } 427 }
423 } 428 }
424 429
425 unlock_page(page);
426 bp->b_pages[i] = page; 430 bp->b_pages[i] = page;
427 offset = 0; 431 offset = 0;
428 } 432 }
429 433
434 if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
435 for (i = 0; i < bp->b_page_count; i++)
436 unlock_page(bp->b_pages[i]);
437 }
438
430 if (page_count == bp->b_page_count) 439 if (page_count == bp->b_page_count)
431 bp->b_flags |= XBF_DONE; 440 bp->b_flags |= XBF_DONE;
432 441
@@ -746,6 +755,7 @@ xfs_buf_associate_memory(
746 bp->b_count_desired = len; 755 bp->b_count_desired = len;
747 bp->b_buffer_length = buflen; 756 bp->b_buffer_length = buflen;
748 bp->b_flags |= XBF_MAPPED; 757 bp->b_flags |= XBF_MAPPED;
758 bp->b_flags &= ~_XBF_PAGE_LOCKED;
749 759
750 return 0; 760 return 0;
751} 761}
@@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
1093 xfs_buf_t *bp, 1103 xfs_buf_t *bp,
1094 int schedule) 1104 int schedule)
1095{ 1105{
1096 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) 1106 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1107 bp->b_flags &= ~_XBF_PAGE_LOCKED;
1097 xfs_buf_ioend(bp, schedule); 1108 xfs_buf_ioend(bp, schedule);
1109 }
1098} 1110}
1099 1111
1100STATIC void 1112STATIC void
@@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
1125 1137
1126 if (--bvec >= bio->bi_io_vec) 1138 if (--bvec >= bio->bi_io_vec)
1127 prefetchw(&bvec->bv_page->flags); 1139 prefetchw(&bvec->bv_page->flags);
1140
1141 if (bp->b_flags & _XBF_PAGE_LOCKED)
1142 unlock_page(page);
1128 } while (bvec >= bio->bi_io_vec); 1143 } while (bvec >= bio->bi_io_vec);
1129 1144
1130 _xfs_buf_ioend(bp, 1); 1145 _xfs_buf_ioend(bp, 1);
@@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
1163 * filesystem block size is not smaller than the page size. 1178 * filesystem block size is not smaller than the page size.
1164 */ 1179 */
1165 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && 1180 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1166 (bp->b_flags & XBF_READ) && 1181 ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
1182 (XBF_READ|_XBF_PAGE_LOCKED)) &&
1167 (blocksize >= PAGE_CACHE_SIZE)) { 1183 (blocksize >= PAGE_CACHE_SIZE)) {
1168 bio = bio_alloc(GFP_NOIO, 1); 1184 bio = bio_alloc(GFP_NOIO, 1);
1169 1185
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 841d7883528d..f948ec7ba9a4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -66,6 +66,25 @@ typedef enum {
66 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ 66 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
69
70 /*
71 * Special flag for supporting metadata blocks smaller than a FSB.
72 *
73 * In this case we can have multiple xfs_buf_t on a single page and
74 * need to lock out concurrent xfs_buf_t readers as they only
75 * serialise access to the buffer.
76 *
77 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
78 * between reads of the page. Hence we can have one thread read the
79 * page and modify it, but then race with another thread that thinks
80 * the page is not up-to-date and hence reads it again.
81 *
82 * The result is that the first modifcation to the page is lost.
83 * This sort of AGF/AGI reading race can happen when unlinking inodes
84 * that require truncation and results in the AGI unlinked list
85 * modifications being lost.
86 */
87 _XBF_PAGE_LOCKED = (1 << 22),
69} xfs_buf_flags_t; 88} xfs_buf_flags_t;
70 89
71typedef enum { 90typedef enum {