aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 12:53:51 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commit247e743cbe6e655768c3679f84821e03c1577902 (patch)
treeedc2b27284365f019859a936885bd100960eb659 /fs/btrfs/inode.c
parente6dcd2dc9c489108648e2ed543315dd134d50a9a (diff)
Btrfs: Use async helpers to deal with pages that have been improperly dirtied
Higher layers sometimes call set_page_dirty without asking the filesystem to help. This causes many problems for the data=ordered and cow code. This commit detects pages that haven't been properly setup for IO and kicks off an async helper to deal with them. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c93
1 files changed, 84 insertions, 9 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c5a62f0b9595..47a008c19308 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -403,6 +403,87 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
403 return 0; 403 return 0;
404} 404}
405 405
406struct btrfs_writepage_fixup {
407 struct page *page;
408 struct btrfs_work work;
409};
410
411/* see btrfs_writepage_start_hook for details on why this is required */
412void btrfs_writepage_fixup_worker(struct btrfs_work *work)
413{
414 struct btrfs_writepage_fixup *fixup;
415 struct btrfs_ordered_extent *ordered;
416 struct page *page;
417 struct inode *inode;
418 u64 page_start;
419 u64 page_end;
420
421 fixup = container_of(work, struct btrfs_writepage_fixup, work);
422 page = fixup->page;
423
424 lock_page(page);
425 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
426 ClearPageChecked(page);
427 goto out_page;
428 }
429
430 inode = page->mapping->host;
431 page_start = page_offset(page);
432 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
433
434 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
435 ordered = btrfs_lookup_ordered_extent(inode, page_start);
436 if (ordered)
437 goto out;
438
439 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
440 GFP_NOFS);
441 ClearPageChecked(page);
442out:
443 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
444out_page:
445 unlock_page(page);
446 page_cache_release(page);
447}
448
449/*
450 * There are a few paths in the higher layers of the kernel that directly
451 * set the page dirty bit without asking the filesystem if it is a
452 * good idea. This causes problems because we want to make sure COW
453 * properly happens and the data=ordered rules are followed.
454 *
455 * In our case any range that doesn't have the EXTENT_ORDERED bit set
456 * hasn't been properly setup for IO. We kick off an async process
457 * to fix it up. The async helper will wait for ordered extents, set
458 * the delalloc bit and make it safe to write the page.
459 */
460int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
461{
462 struct inode *inode = page->mapping->host;
463 struct btrfs_writepage_fixup *fixup;
464 struct btrfs_root *root = BTRFS_I(inode)->root;
465 int ret;
466
467 ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
468 EXTENT_ORDERED, 0);
469 if (ret)
470 return 0;
471
472 if (PageChecked(page))
473 return -EAGAIN;
474
475 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
476 if (!fixup)
477 return -EAGAIN;
478printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page));
479 SetPageChecked(page);
480 page_cache_get(page);
481 fixup->work.func = btrfs_writepage_fixup_worker;
482 fixup->page = page;
483 btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
484 return -EAGAIN;
485}
486
406int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 487int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
407 struct extent_state *state, int uptodate) 488 struct extent_state *state, int uptodate)
408{ 489{
@@ -1263,6 +1344,7 @@ again:
1263 flush_dcache_page(page); 1344 flush_dcache_page(page);
1264 kunmap(page); 1345 kunmap(page);
1265 } 1346 }
1347 ClearPageChecked(page);
1266 set_page_dirty(page); 1348 set_page_dirty(page);
1267 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 1349 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1268 1350
@@ -2658,6 +2740,7 @@ again:
2658 flush_dcache_page(page); 2740 flush_dcache_page(page);
2659 kunmap(page); 2741 kunmap(page);
2660 } 2742 }
2743 ClearPageChecked(page);
2661 set_page_dirty(page); 2744 set_page_dirty(page);
2662 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2745 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2663 2746
@@ -3039,15 +3122,6 @@ out_fail:
3039 3122
3040static int btrfs_set_page_dirty(struct page *page) 3123static int btrfs_set_page_dirty(struct page *page)
3041{ 3124{
3042 struct inode *inode = page->mapping->host;
3043 u64 page_start = page_offset(page);
3044 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
3045
3046 if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3047 EXTENT_DELALLOC, 0)) {
3048printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page));
3049WARN_ON(1);
3050 }
3051 return __set_page_dirty_nobuffers(page); 3125 return __set_page_dirty_nobuffers(page);
3052} 3126}
3053 3127
@@ -3098,6 +3172,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
3098 .readpage_io_hook = btrfs_readpage_io_hook, 3172 .readpage_io_hook = btrfs_readpage_io_hook,
3099 .readpage_end_io_hook = btrfs_readpage_end_io_hook, 3173 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3100 .writepage_end_io_hook = btrfs_writepage_end_io_hook, 3174 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
3175 .writepage_start_hook = btrfs_writepage_start_hook,
3101 .readpage_io_failed_hook = btrfs_io_failed_hook, 3176 .readpage_io_failed_hook = btrfs_io_failed_hook,
3102 .set_bit_hook = btrfs_set_bit_hook, 3177 .set_bit_hook = btrfs_set_bit_hook,
3103 .clear_bit_hook = btrfs_clear_bit_hook, 3178 .clear_bit_hook = btrfs_clear_bit_hook,