aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCurt Wohlgemuth <curtw@google.com>2011-02-07 12:46:14 -0500
committerTheodore Ts'o <tytso@mit.edu>2011-02-07 12:46:14 -0500
commitd50bdd5aa55127635fd8a5c74bd2abb256bd34e3 (patch)
tree3dbe0f5d06502e144c0398c71e8fe479a855d6f4
parentdd68314ccf3fb918c1fb6471817edbc60ece4b52 (diff)
ext4: Fix data corruption with multi-block writepages support
This fixes a corruption problem with the multi-block writepages submittal change for ext4, from commit bd2d0210cf22f2bd0cef72eb97cf94fc7d31d8cc ("ext4: use bio layer instead of buffer layer in mpage_da_submit_io"). (Note that this corruption is not present in 2.6.37 on ext4, because the corruption was detected after the feature was merged in 2.6.37-rc1, and so it was turned off by adding a non-default mount option, mblk_io_submit. With this commit, which hopefully fixes the last of the bugs with this feature, we'll be able to turn on this performance feature by default in 2.6.38, and remove the mblk_io_submit option.) The ext4 code path to bundle multiple pages for writeback in ext4_bio_write_page() had a bug: we should be clearing buffer head dirty flags *before* we submit the bio, not in the completion routine. The patch below was tested on 2.6.37 under KVM with the postgresql script which was submitted by Jon Nelson as documented in commit 1449032be1. Without the patch, I'd hit the corruption problem about 50-70% of the time. With the patch, I executed the script > 100 times with no corruption seen. I also fixed a bug to make sure ext4_end_bio() doesn't dereference the bio after the bio_put() call. Reported-by: Jon Nelson <jnelson@jamponi.net> Reported-by: Matthias Bayer <jackdachef@gmail.com> Signed-off-by: Curt Wohlgemuth <curtw@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@kernel.org
-rw-r--r--fs/ext4/page-io.c11
1 files changed, 6 insertions, 5 deletions
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcfca92..4e9b0a242f4 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -190,6 +190,7 @@ static void ext4_end_bio(struct bio *bio, int error)
190 struct inode *inode; 190 struct inode *inode;
191 unsigned long flags; 191 unsigned long flags;
192 int i; 192 int i;
193 sector_t bi_sector = bio->bi_sector;
193 194
194 BUG_ON(!io_end); 195 BUG_ON(!io_end);
195 bio->bi_private = NULL; 196 bio->bi_private = NULL;
@@ -207,9 +208,7 @@ static void ext4_end_bio(struct bio *bio, int error)
207 if (error) 208 if (error)
208 SetPageError(page); 209 SetPageError(page);
209 BUG_ON(!head); 210 BUG_ON(!head);
210 if (head->b_size == PAGE_CACHE_SIZE) 211 if (head->b_size != PAGE_CACHE_SIZE) {
211 clear_buffer_dirty(head);
212 else {
213 loff_t offset; 212 loff_t offset;
214 loff_t io_end_offset = io_end->offset + io_end->size; 213 loff_t io_end_offset = io_end->offset + io_end->size;
215 214
@@ -221,7 +220,6 @@ static void ext4_end_bio(struct bio *bio, int error)
221 if (error) 220 if (error)
222 buffer_io_error(bh); 221 buffer_io_error(bh);
223 222
224 clear_buffer_dirty(bh);
225 } 223 }
226 if (buffer_delay(bh)) 224 if (buffer_delay(bh))
227 partial_write = 1; 225 partial_write = 1;
@@ -257,7 +255,7 @@ static void ext4_end_bio(struct bio *bio, int error)
257 (unsigned long long) io_end->offset, 255 (unsigned long long) io_end->offset,
258 (long) io_end->size, 256 (long) io_end->size,
259 (unsigned long long) 257 (unsigned long long)
260 bio->bi_sector >> (inode->i_blkbits - 9)); 258 bi_sector >> (inode->i_blkbits - 9));
261 } 259 }
262 260
263 /* Add the io_end to per-inode completed io list*/ 261 /* Add the io_end to per-inode completed io list*/
@@ -380,6 +378,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
380 378
381 blocksize = 1 << inode->i_blkbits; 379 blocksize = 1 << inode->i_blkbits;
382 380
381 BUG_ON(!PageLocked(page));
383 BUG_ON(PageWriteback(page)); 382 BUG_ON(PageWriteback(page));
384 set_page_writeback(page); 383 set_page_writeback(page);
385 ClearPageError(page); 384 ClearPageError(page);
@@ -397,12 +396,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
397 for (bh = head = page_buffers(page), block_start = 0; 396 for (bh = head = page_buffers(page), block_start = 0;
398 bh != head || !block_start; 397 bh != head || !block_start;
399 block_start = block_end, bh = bh->b_this_page) { 398 block_start = block_end, bh = bh->b_this_page) {
399
400 block_end = block_start + blocksize; 400 block_end = block_start + blocksize;
401 if (block_start >= len) { 401 if (block_start >= len) {
402 clear_buffer_dirty(bh); 402 clear_buffer_dirty(bh);
403 set_buffer_uptodate(bh); 403 set_buffer_uptodate(bh);
404 continue; 404 continue;
405 } 405 }
406 clear_buffer_dirty(bh);
406 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 407 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
407 if (ret) { 408 if (ret) {
408 /* 409 /*