diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-07-17 12:53:51 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:04 -0400 |
commit | 247e743cbe6e655768c3679f84821e03c1577902 (patch) | |
tree | edc2b27284365f019859a936885bd100960eb659 | |
parent | e6dcd2dc9c489108648e2ed543315dd134d50a9a (diff) |
Btrfs: Use async helpers to deal with pages that have been improperly dirtied
Higher layers sometimes call set_page_dirty without asking the filesystem
to help. This causes many problems for the data=ordered and cow code.
This commit detects pages that haven't been properly setup for IO and
kicks off an async helper to deal with them.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/ctree.h | 6 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 4 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 10 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/file.c | 1 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 93 |
6 files changed, 106 insertions, 9 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ceebc052ddcb..4ddc8a8f82cd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -546,6 +546,12 @@ struct btrfs_fs_info { | |||
546 | struct btrfs_workers endio_workers; | 546 | struct btrfs_workers endio_workers; |
547 | struct btrfs_workers endio_write_workers; | 547 | struct btrfs_workers endio_write_workers; |
548 | struct btrfs_workers submit_workers; | 548 | struct btrfs_workers submit_workers; |
549 | /* | ||
550 | * fixup workers take dirty pages that didn't properly go through | ||
551 | * the cow mechanism and make them safe to write. It happens | ||
552 | * for the sys_munmap function call path | ||
553 | */ | ||
554 | struct btrfs_workers fixup_workers; | ||
549 | struct task_struct *transaction_kthread; | 555 | struct task_struct *transaction_kthread; |
550 | struct task_struct *cleaner_kthread; | 556 | struct task_struct *cleaner_kthread; |
551 | int thread_pool_size; | 557 | int thread_pool_size; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a5ebafb935a..66466d125c05 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1329,11 +1329,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1329 | */ | 1329 | */ |
1330 | btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); | 1330 | btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); |
1331 | btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); | 1331 | btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); |
1332 | btrfs_init_workers(&fs_info->fixup_workers, 1); | ||
1332 | btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | 1333 | btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); |
1333 | btrfs_init_workers(&fs_info->endio_write_workers, | 1334 | btrfs_init_workers(&fs_info->endio_write_workers, |
1334 | fs_info->thread_pool_size); | 1335 | fs_info->thread_pool_size); |
1335 | btrfs_start_workers(&fs_info->workers, 1); | 1336 | btrfs_start_workers(&fs_info->workers, 1); |
1336 | btrfs_start_workers(&fs_info->submit_workers, 1); | 1337 | btrfs_start_workers(&fs_info->submit_workers, 1); |
1338 | btrfs_start_workers(&fs_info->fixup_workers, 1); | ||
1337 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | 1339 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); |
1338 | btrfs_start_workers(&fs_info->endio_write_workers, | 1340 | btrfs_start_workers(&fs_info->endio_write_workers, |
1339 | fs_info->thread_pool_size); | 1341 | fs_info->thread_pool_size); |
@@ -1454,6 +1456,7 @@ fail_tree_root: | |||
1454 | fail_sys_array: | 1456 | fail_sys_array: |
1455 | fail_sb_buffer: | 1457 | fail_sb_buffer: |
1456 | extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); | 1458 | extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); |
1459 | btrfs_stop_workers(&fs_info->fixup_workers); | ||
1457 | btrfs_stop_workers(&fs_info->workers); | 1460 | btrfs_stop_workers(&fs_info->workers); |
1458 | btrfs_stop_workers(&fs_info->endio_workers); | 1461 | btrfs_stop_workers(&fs_info->endio_workers); |
1459 | btrfs_stop_workers(&fs_info->endio_write_workers); | 1462 | btrfs_stop_workers(&fs_info->endio_write_workers); |
@@ -1710,6 +1713,7 @@ int close_ctree(struct btrfs_root *root) | |||
1710 | 1713 | ||
1711 | truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); | 1714 | truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); |
1712 | 1715 | ||
1716 | btrfs_stop_workers(&fs_info->fixup_workers); | ||
1713 | btrfs_stop_workers(&fs_info->workers); | 1717 | btrfs_stop_workers(&fs_info->workers); |
1714 | btrfs_stop_workers(&fs_info->endio_workers); | 1718 | btrfs_stop_workers(&fs_info->endio_workers); |
1715 | btrfs_stop_workers(&fs_info->endio_write_workers); | 1719 | btrfs_stop_workers(&fs_info->endio_write_workers); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3f82a6e9ca4f..feff16cb9b40 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2050,6 +2050,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2050 | lock_extent(tree, start, page_end, GFP_NOFS); | 2050 | lock_extent(tree, start, page_end, GFP_NOFS); |
2051 | unlock_start = start; | 2051 | unlock_start = start; |
2052 | 2052 | ||
2053 | if (tree->ops && tree->ops->writepage_start_hook) { | ||
2054 | ret = tree->ops->writepage_start_hook(page, start, page_end); | ||
2055 | if (ret == -EAGAIN) { | ||
2056 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
2057 | redirty_page_for_writepage(wbc, page); | ||
2058 | unlock_page(page); | ||
2059 | return 0; | ||
2060 | } | ||
2061 | } | ||
2062 | |||
2053 | end = page_end; | 2063 | end = page_end; |
2054 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { | 2064 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { |
2055 | printk("found delalloc bits after lock_extent\n"); | 2065 | printk("found delalloc bits after lock_extent\n"); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2268a7995896..23affd27af5e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -30,6 +30,7 @@ typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | |||
30 | struct bio *bio, int mirror_num); | 30 | struct bio *bio, int mirror_num); |
31 | struct extent_io_ops { | 31 | struct extent_io_ops { |
32 | int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); | 32 | int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); |
33 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); | ||
33 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); | 34 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); |
34 | extent_submit_bio_hook_t *submit_bio_hook; | 35 | extent_submit_bio_hook_t *submit_bio_hook; |
35 | int (*merge_bio_hook)(struct page *page, unsigned long offset, | 36 | int (*merge_bio_hook)(struct page *page, unsigned long offset, |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 12e765f7e0d4..20928639d173 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -313,6 +313,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
313 | for (i = 0; i < num_pages; i++) { | 313 | for (i = 0; i < num_pages; i++) { |
314 | struct page *p = pages[i]; | 314 | struct page *p = pages[i]; |
315 | SetPageUptodate(p); | 315 | SetPageUptodate(p); |
316 | ClearPageChecked(p); | ||
316 | set_page_dirty(p); | 317 | set_page_dirty(p); |
317 | } | 318 | } |
318 | } else { | 319 | } else { |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c5a62f0b9595..47a008c19308 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -403,6 +403,87 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, | |||
403 | return 0; | 403 | return 0; |
404 | } | 404 | } |
405 | 405 | ||
406 | struct btrfs_writepage_fixup { | ||
407 | struct page *page; | ||
408 | struct btrfs_work work; | ||
409 | }; | ||
410 | |||
411 | /* see btrfs_writepage_start_hook for details on why this is required */ | ||
412 | void btrfs_writepage_fixup_worker(struct btrfs_work *work) | ||
413 | { | ||
414 | struct btrfs_writepage_fixup *fixup; | ||
415 | struct btrfs_ordered_extent *ordered; | ||
416 | struct page *page; | ||
417 | struct inode *inode; | ||
418 | u64 page_start; | ||
419 | u64 page_end; | ||
420 | |||
421 | fixup = container_of(work, struct btrfs_writepage_fixup, work); | ||
422 | page = fixup->page; | ||
423 | |||
424 | lock_page(page); | ||
425 | if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { | ||
426 | ClearPageChecked(page); | ||
427 | goto out_page; | ||
428 | } | ||
429 | |||
430 | inode = page->mapping->host; | ||
431 | page_start = page_offset(page); | ||
432 | page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; | ||
433 | |||
434 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | ||
435 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
436 | if (ordered) | ||
437 | goto out; | ||
438 | |||
439 | set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, | ||
440 | GFP_NOFS); | ||
441 | ClearPageChecked(page); | ||
442 | out: | ||
443 | unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | ||
444 | out_page: | ||
445 | unlock_page(page); | ||
446 | page_cache_release(page); | ||
447 | } | ||
448 | |||
449 | /* | ||
450 | * There are a few paths in the higher layers of the kernel that directly | ||
451 | * set the page dirty bit without asking the filesystem if it is a | ||
452 | * good idea. This causes problems because we want to make sure COW | ||
453 | * properly happens and the data=ordered rules are followed. | ||
454 | * | ||
455 | * In our case any range that doesn't have the EXTENT_ORDERED bit set | ||
456 | * hasn't been properly setup for IO. We kick off an async process | ||
457 | * to fix it up. The async helper will wait for ordered extents, set | ||
458 | * the delalloc bit and make it safe to write the page. | ||
459 | */ | ||
460 | int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | ||
461 | { | ||
462 | struct inode *inode = page->mapping->host; | ||
463 | struct btrfs_writepage_fixup *fixup; | ||
464 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
465 | int ret; | ||
466 | |||
467 | ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | ||
468 | EXTENT_ORDERED, 0); | ||
469 | if (ret) | ||
470 | return 0; | ||
471 | |||
472 | if (PageChecked(page)) | ||
473 | return -EAGAIN; | ||
474 | |||
475 | fixup = kzalloc(sizeof(*fixup), GFP_NOFS); | ||
476 | if (!fixup) | ||
477 | return -EAGAIN; | ||
478 | printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page)); | ||
479 | SetPageChecked(page); | ||
480 | page_cache_get(page); | ||
481 | fixup->work.func = btrfs_writepage_fixup_worker; | ||
482 | fixup->page = page; | ||
483 | btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); | ||
484 | return -EAGAIN; | ||
485 | } | ||
486 | |||
406 | int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 487 | int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
407 | struct extent_state *state, int uptodate) | 488 | struct extent_state *state, int uptodate) |
408 | { | 489 | { |
@@ -1263,6 +1344,7 @@ again: | |||
1263 | flush_dcache_page(page); | 1344 | flush_dcache_page(page); |
1264 | kunmap(page); | 1345 | kunmap(page); |
1265 | } | 1346 | } |
1347 | ClearPageChecked(page); | ||
1266 | set_page_dirty(page); | 1348 | set_page_dirty(page); |
1267 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 1349 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
1268 | 1350 | ||
@@ -2658,6 +2740,7 @@ again: | |||
2658 | flush_dcache_page(page); | 2740 | flush_dcache_page(page); |
2659 | kunmap(page); | 2741 | kunmap(page); |
2660 | } | 2742 | } |
2743 | ClearPageChecked(page); | ||
2661 | set_page_dirty(page); | 2744 | set_page_dirty(page); |
2662 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2745 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
2663 | 2746 | ||
@@ -3039,15 +3122,6 @@ out_fail: | |||
3039 | 3122 | ||
3040 | static int btrfs_set_page_dirty(struct page *page) | 3123 | static int btrfs_set_page_dirty(struct page *page) |
3041 | { | 3124 | { |
3042 | struct inode *inode = page->mapping->host; | ||
3043 | u64 page_start = page_offset(page); | ||
3044 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
3045 | |||
3046 | if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | ||
3047 | EXTENT_DELALLOC, 0)) { | ||
3048 | printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page)); | ||
3049 | WARN_ON(1); | ||
3050 | } | ||
3051 | return __set_page_dirty_nobuffers(page); | 3125 | return __set_page_dirty_nobuffers(page); |
3052 | } | 3126 | } |
3053 | 3127 | ||
@@ -3098,6 +3172,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
3098 | .readpage_io_hook = btrfs_readpage_io_hook, | 3172 | .readpage_io_hook = btrfs_readpage_io_hook, |
3099 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, | 3173 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, |
3100 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, | 3174 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, |
3175 | .writepage_start_hook = btrfs_writepage_start_hook, | ||
3101 | .readpage_io_failed_hook = btrfs_io_failed_hook, | 3176 | .readpage_io_failed_hook = btrfs_io_failed_hook, |
3102 | .set_bit_hook = btrfs_set_bit_hook, | 3177 | .set_bit_hook = btrfs_set_bit_hook, |
3103 | .clear_bit_hook = btrfs_clear_bit_hook, | 3178 | .clear_bit_hook = btrfs_clear_bit_hook, |