diff options
author | Christoph Hellwig <hch@infradead.org> | 2008-05-19 02:34:42 -0400 |
---|---|---|
committer | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-05-23 04:12:49 -0400 |
commit | 6ab455eeaff6893cd06da33843e840d888cdc04a (patch) | |
tree | e7744d1580647ca3b08e829bcf976f2f60c49986 /fs/xfs | |
parent | c8f5f12e46f079a954d4f7163ba59dadee08ca26 (diff) |
[XFS] Fix memory corruption with small buffer reads
When we have multiple buffers in a single page for a blocksize == pagesize
filesystem we might overwrite the page contents if two callers hit it
shortly after each other. To prevent that we need to keep the page locked
until I/O is completed and the page marked uptodate.
Thanks to Eric Sandeen for triaging this bug and finding a reproducible
testcase and Dave Chinner for additional advice.
This should fix kernel.org bz #10421.
Tested-by: Eric Sandeen <sandeen@sandeen.net>
SGI-PV: 981813
SGI-Modid: xfs-linux-melb:xfs-kern:31173a
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 24 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 19 |
2 files changed, 39 insertions, 4 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 5105015a75ad..98e0e86093b4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -387,6 +387,8 @@ _xfs_buf_lookup_pages( | |||
387 | if (unlikely(page == NULL)) { | 387 | if (unlikely(page == NULL)) { |
388 | if (flags & XBF_READ_AHEAD) { | 388 | if (flags & XBF_READ_AHEAD) { |
389 | bp->b_page_count = i; | 389 | bp->b_page_count = i; |
390 | for (i = 0; i < bp->b_page_count; i++) | ||
391 | unlock_page(bp->b_pages[i]); | ||
390 | return -ENOMEM; | 392 | return -ENOMEM; |
391 | } | 393 | } |
392 | 394 | ||
@@ -416,17 +418,24 @@ _xfs_buf_lookup_pages( | |||
416 | ASSERT(!PagePrivate(page)); | 418 | ASSERT(!PagePrivate(page)); |
417 | if (!PageUptodate(page)) { | 419 | if (!PageUptodate(page)) { |
418 | page_count--; | 420 | page_count--; |
419 | if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { | 421 | if (blocksize >= PAGE_CACHE_SIZE) { |
422 | if (flags & XBF_READ) | ||
423 | bp->b_flags |= _XBF_PAGE_LOCKED; | ||
424 | } else if (!PagePrivate(page)) { | ||
420 | if (test_page_region(page, offset, nbytes)) | 425 | if (test_page_region(page, offset, nbytes)) |
421 | page_count++; | 426 | page_count++; |
422 | } | 427 | } |
423 | } | 428 | } |
424 | 429 | ||
425 | unlock_page(page); | ||
426 | bp->b_pages[i] = page; | 430 | bp->b_pages[i] = page; |
427 | offset = 0; | 431 | offset = 0; |
428 | } | 432 | } |
429 | 433 | ||
434 | if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { | ||
435 | for (i = 0; i < bp->b_page_count; i++) | ||
436 | unlock_page(bp->b_pages[i]); | ||
437 | } | ||
438 | |||
430 | if (page_count == bp->b_page_count) | 439 | if (page_count == bp->b_page_count) |
431 | bp->b_flags |= XBF_DONE; | 440 | bp->b_flags |= XBF_DONE; |
432 | 441 | ||
@@ -746,6 +755,7 @@ xfs_buf_associate_memory( | |||
746 | bp->b_count_desired = len; | 755 | bp->b_count_desired = len; |
747 | bp->b_buffer_length = buflen; | 756 | bp->b_buffer_length = buflen; |
748 | bp->b_flags |= XBF_MAPPED; | 757 | bp->b_flags |= XBF_MAPPED; |
758 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
749 | 759 | ||
750 | return 0; | 760 | return 0; |
751 | } | 761 | } |
@@ -1093,8 +1103,10 @@ _xfs_buf_ioend( | |||
1093 | xfs_buf_t *bp, | 1103 | xfs_buf_t *bp, |
1094 | int schedule) | 1104 | int schedule) |
1095 | { | 1105 | { |
1096 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) | 1106 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { |
1107 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
1097 | xfs_buf_ioend(bp, schedule); | 1108 | xfs_buf_ioend(bp, schedule); |
1109 | } | ||
1098 | } | 1110 | } |
1099 | 1111 | ||
1100 | STATIC void | 1112 | STATIC void |
@@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io( | |||
1125 | 1137 | ||
1126 | if (--bvec >= bio->bi_io_vec) | 1138 | if (--bvec >= bio->bi_io_vec) |
1127 | prefetchw(&bvec->bv_page->flags); | 1139 | prefetchw(&bvec->bv_page->flags); |
1140 | |||
1141 | if (bp->b_flags & _XBF_PAGE_LOCKED) | ||
1142 | unlock_page(page); | ||
1128 | } while (bvec >= bio->bi_io_vec); | 1143 | } while (bvec >= bio->bi_io_vec); |
1129 | 1144 | ||
1130 | _xfs_buf_ioend(bp, 1); | 1145 | _xfs_buf_ioend(bp, 1); |
@@ -1163,7 +1178,8 @@ _xfs_buf_ioapply( | |||
1163 | * filesystem block size is not smaller than the page size. | 1178 | * filesystem block size is not smaller than the page size. |
1164 | */ | 1179 | */ |
1165 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && | 1180 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && |
1166 | (bp->b_flags & XBF_READ) && | 1181 | ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == |
1182 | (XBF_READ|_XBF_PAGE_LOCKED)) && | ||
1167 | (blocksize >= PAGE_CACHE_SIZE)) { | 1183 | (blocksize >= PAGE_CACHE_SIZE)) { |
1168 | bio = bio_alloc(GFP_NOIO, 1); | 1184 | bio = bio_alloc(GFP_NOIO, 1); |
1169 | 1185 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 841d7883528d..f948ec7ba9a4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -66,6 +66,25 @@ typedef enum { | |||
66 | _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ | 66 | _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ |
67 | _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ | 67 | _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ |
68 | _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ | 68 | _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ |
69 | |||
70 | /* | ||
71 | * Special flag for supporting metadata blocks smaller than a FSB. | ||
72 | * | ||
73 | * In this case we can have multiple xfs_buf_t on a single page and | ||
74 | * need to lock out concurrent xfs_buf_t readers as they only | ||
75 | * serialise access to the buffer. | ||
76 | * | ||
77 | * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation | ||
78 | * between reads of the page. Hence we can have one thread read the | ||
79 | * page and modify it, but then race with another thread that thinks | ||
80 | * the page is not up-to-date and hence reads it again. | ||
81 | * | ||
82 | * The result is that the first modifcation to the page is lost. | ||
83 | * This sort of AGF/AGI reading race can happen when unlinking inodes | ||
84 | * that require truncation and results in the AGI unlinked list | ||
85 | * modifications being lost. | ||
86 | */ | ||
87 | _XBF_PAGE_LOCKED = (1 << 22), | ||
69 | } xfs_buf_flags_t; | 88 | } xfs_buf_flags_t; |
70 | 89 | ||
71 | typedef enum { | 90 | typedef enum { |