aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-22 11:18:09 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:05 -0400
commitf421950f86bf96a11fef932e167ab2e70d4c43a0 (patch)
treea2b62b942b023e37b6aae39891c2b314d8d8a3fb /fs/btrfs/ordered-data.c
parenta61e6f29dc7c9d56a776a518eed92bbc61848263 (diff)
Btrfs: Fix some data=ordered related data corruptions
Stress testing was showing data checksum errors, most of which were caused by a lookup bug in the extent_map tree. The tree was caching the last pointer returned, and searches would check the last pointer first. But, search callers also expect the search to return the very first matching extent in the range, which wasn't always true with the last pointer usage. For now, the code to cache the last return value is just removed. It is easy to fix, but I think lookups are rare enough that it isn't required anymore. This commit also replaces do_sync_mapping_range with a local copy of the related functions. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c115
1 files changed, 97 insertions, 18 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 0d87795fdd8f..830dbaea6853 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -19,6 +19,8 @@
19#include <linux/gfp.h> 19#include <linux/gfp.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/writeback.h>
23#include <linux/pagevec.h>
22#include "ctree.h" 24#include "ctree.h"
23#include "transaction.h" 25#include "transaction.h"
24#include "btrfs_inode.h" 26#include "btrfs_inode.h"
@@ -307,12 +309,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
307 * start IO on any dirty ones so the wait doesn't stall waiting 309 * start IO on any dirty ones so the wait doesn't stall waiting
308 * for pdflush to find them 310 * for pdflush to find them
309 */ 311 */
310#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) 312 btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE);
311 do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
312#else
313 do_sync_mapping_range(inode->i_mapping, start, end,
314 SYNC_FILE_RANGE_WRITE);
315#endif
316 if (wait) 313 if (wait)
317 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, 314 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
318 &entry->flags)); 315 &entry->flags));
@@ -327,28 +324,26 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
327 u64 orig_end; 324 u64 orig_end;
328 u64 wait_end; 325 u64 wait_end;
329 struct btrfs_ordered_extent *ordered; 326 struct btrfs_ordered_extent *ordered;
330 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
331 327
332 if (start + len < start) { 328 if (start + len < start) {
333 wait_end = (inode->i_size + mask) & ~mask; 329 orig_end = INT_LIMIT(loff_t);
334 orig_end = (u64)-1;
335 } else { 330 } else {
336 orig_end = start + len - 1; 331 orig_end = start + len - 1;
337 wait_end = orig_end; 332 if (orig_end > INT_LIMIT(loff_t))
333 orig_end = INT_LIMIT(loff_t);
338 } 334 }
335 wait_end = orig_end;
339again: 336again:
340 /* start IO across the range first to instantiate any delalloc 337 /* start IO across the range first to instantiate any delalloc
341 * extents 338 * extents
342 */ 339 */
343#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) 340 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE);
344 do_sync_file_range(file, start, wait_end, SYNC_FILE_RANGE_WRITE); 341
345#else 342 btrfs_wait_on_page_writeback_range(inode->i_mapping,
346 do_sync_mapping_range(inode->i_mapping, start, wait_end, 343 start >> PAGE_CACHE_SHIFT,
347 SYNC_FILE_RANGE_WRITE); 344 orig_end >> PAGE_CACHE_SHIFT);
348#endif
349 end = orig_end;
350 wait_on_extent_writeback(&BTRFS_I(inode)->io_tree, start, orig_end);
351 345
346 end = orig_end;
352 while(1) { 347 while(1) {
353 ordered = btrfs_lookup_first_ordered_extent(inode, end); 348 ordered = btrfs_lookup_first_ordered_extent(inode, end);
354 if (!ordered) { 349 if (!ordered) {
@@ -565,3 +560,87 @@ out:
565 return ret; 560 return ret;
566} 561}
567 562
563
564/**
565 * taken from mm/filemap.c because it isn't exported
566 *
567 * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
568 * @mapping: address space structure to write
569 * @start: offset in bytes where the range starts
570 * @end: offset in bytes where the range ends (inclusive)
571 * @sync_mode: enable synchronous operation
572 *
573 * Start writeback against all of a mapping's dirty pages that lie
574 * within the byte offsets <start, end> inclusive.
575 *
576 * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
577 * opposed to a regular memory cleansing writeback. The difference between
578 * these two operations is that if a dirty page/buffer is encountered, it must
579 * be waited upon, and not just skipped over.
580 */
581int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
582 loff_t end, int sync_mode)
583{
584 struct writeback_control wbc = {
585 .sync_mode = sync_mode,
586 .nr_to_write = mapping->nrpages * 2,
587 .range_start = start,
588 .range_end = end,
589 .for_writepages = 1,
590 };
591 return btrfs_writepages(mapping, &wbc);
592}
593
594/**
595 * taken from mm/filemap.c because it isn't exported
596 *
597 * wait_on_page_writeback_range - wait for writeback to complete
598 * @mapping: target address_space
599 * @start: beginning page index
600 * @end: ending page index
601 *
602 * Wait for writeback to complete against pages indexed by start->end
603 * inclusive
604 */
605int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
606 pgoff_t start, pgoff_t end)
607{
608 struct pagevec pvec;
609 int nr_pages;
610 int ret = 0;
611 pgoff_t index;
612
613 if (end < start)
614 return 0;
615
616 pagevec_init(&pvec, 0);
617 index = start;
618 while ((index <= end) &&
619 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
620 PAGECACHE_TAG_WRITEBACK,
621 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
622 unsigned i;
623
624 for (i = 0; i < nr_pages; i++) {
625 struct page *page = pvec.pages[i];
626
627 /* until radix tree lookup accepts end_index */
628 if (page->index > end)
629 continue;
630
631 wait_on_page_writeback(page);
632 if (PageError(page))
633 ret = -EIO;
634 }
635 pagevec_release(&pvec);
636 cond_resched();
637 }
638
639 /* Check for outstanding write errors */
640 if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
641 ret = -ENOSPC;
642 if (test_and_clear_bit(AS_EIO, &mapping->flags))
643 ret = -EIO;
644
645 return ret;
646}