aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 12:53:51 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commit247e743cbe6e655768c3679f84821e03c1577902 (patch)
treeedc2b27284365f019859a936885bd100960eb659
parente6dcd2dc9c489108648e2ed543315dd134d50a9a (diff)
Btrfs: Use async helpers to deal with pages that have been improperly dirtied
Higher layers sometimes call set_page_dirty without asking the filesystem to help. This causes many problems for the data=ordered and cow code. This commit detects pages that haven't been properly setup for IO and kicks off an async helper to deal with them. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/btrfs/inode.c93
6 files changed, 106 insertions, 9 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ceebc052ddcb..4ddc8a8f82cd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -546,6 +546,12 @@ struct btrfs_fs_info {
546 struct btrfs_workers endio_workers; 546 struct btrfs_workers endio_workers;
547 struct btrfs_workers endio_write_workers; 547 struct btrfs_workers endio_write_workers;
548 struct btrfs_workers submit_workers; 548 struct btrfs_workers submit_workers;
549 /*
550 * fixup workers take dirty pages that didn't properly go through
551 * the cow mechanism and make them safe to write. It happens
552 * for the sys_munmap function call path
553 */
554 struct btrfs_workers fixup_workers;
549 struct task_struct *transaction_kthread; 555 struct task_struct *transaction_kthread;
550 struct task_struct *cleaner_kthread; 556 struct task_struct *cleaner_kthread;
551 int thread_pool_size; 557 int thread_pool_size;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4a5ebafb935a..66466d125c05 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1329,11 +1329,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1329 */ 1329 */
1330 btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); 1330 btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
1331 btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); 1331 btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
1332 btrfs_init_workers(&fs_info->fixup_workers, 1);
1332 btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1333 btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
1333 btrfs_init_workers(&fs_info->endio_write_workers, 1334 btrfs_init_workers(&fs_info->endio_write_workers,
1334 fs_info->thread_pool_size); 1335 fs_info->thread_pool_size);
1335 btrfs_start_workers(&fs_info->workers, 1); 1336 btrfs_start_workers(&fs_info->workers, 1);
1336 btrfs_start_workers(&fs_info->submit_workers, 1); 1337 btrfs_start_workers(&fs_info->submit_workers, 1);
1338 btrfs_start_workers(&fs_info->fixup_workers, 1);
1337 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1339 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
1338 btrfs_start_workers(&fs_info->endio_write_workers, 1340 btrfs_start_workers(&fs_info->endio_write_workers,
1339 fs_info->thread_pool_size); 1341 fs_info->thread_pool_size);
@@ -1454,6 +1456,7 @@ fail_tree_root:
1454fail_sys_array: 1456fail_sys_array:
1455fail_sb_buffer: 1457fail_sb_buffer:
1456 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); 1458 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
1459 btrfs_stop_workers(&fs_info->fixup_workers);
1457 btrfs_stop_workers(&fs_info->workers); 1460 btrfs_stop_workers(&fs_info->workers);
1458 btrfs_stop_workers(&fs_info->endio_workers); 1461 btrfs_stop_workers(&fs_info->endio_workers);
1459 btrfs_stop_workers(&fs_info->endio_write_workers); 1462 btrfs_stop_workers(&fs_info->endio_write_workers);
@@ -1710,6 +1713,7 @@ int close_ctree(struct btrfs_root *root)
1710 1713
1711 truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); 1714 truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
1712 1715
1716 btrfs_stop_workers(&fs_info->fixup_workers);
1713 btrfs_stop_workers(&fs_info->workers); 1717 btrfs_stop_workers(&fs_info->workers);
1714 btrfs_stop_workers(&fs_info->endio_workers); 1718 btrfs_stop_workers(&fs_info->endio_workers);
1715 btrfs_stop_workers(&fs_info->endio_write_workers); 1719 btrfs_stop_workers(&fs_info->endio_write_workers);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3f82a6e9ca4f..feff16cb9b40 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2050,6 +2050,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2050 lock_extent(tree, start, page_end, GFP_NOFS); 2050 lock_extent(tree, start, page_end, GFP_NOFS);
2051 unlock_start = start; 2051 unlock_start = start;
2052 2052
2053 if (tree->ops && tree->ops->writepage_start_hook) {
2054 ret = tree->ops->writepage_start_hook(page, start, page_end);
2055 if (ret == -EAGAIN) {
2056 unlock_extent(tree, start, page_end, GFP_NOFS);
2057 redirty_page_for_writepage(wbc, page);
2058 unlock_page(page);
2059 return 0;
2060 }
2061 }
2062
2053 end = page_end; 2063 end = page_end;
2054 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 2064 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
2055 printk("found delalloc bits after lock_extent\n"); 2065 printk("found delalloc bits after lock_extent\n");
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2268a7995896..23affd27af5e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -30,6 +30,7 @@ typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
30 struct bio *bio, int mirror_num); 30 struct bio *bio, int mirror_num);
31struct extent_io_ops { 31struct extent_io_ops {
32 int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); 32 int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
33 int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
33 int (*writepage_io_hook)(struct page *page, u64 start, u64 end); 34 int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
34 extent_submit_bio_hook_t *submit_bio_hook; 35 extent_submit_bio_hook_t *submit_bio_hook;
35 int (*merge_bio_hook)(struct page *page, unsigned long offset, 36 int (*merge_bio_hook)(struct page *page, unsigned long offset,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 12e765f7e0d4..20928639d173 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -313,6 +313,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
313 for (i = 0; i < num_pages; i++) { 313 for (i = 0; i < num_pages; i++) {
314 struct page *p = pages[i]; 314 struct page *p = pages[i];
315 SetPageUptodate(p); 315 SetPageUptodate(p);
316 ClearPageChecked(p);
316 set_page_dirty(p); 317 set_page_dirty(p);
317 } 318 }
318 } else { 319 } else {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c5a62f0b9595..47a008c19308 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -403,6 +403,87 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
403 return 0; 403 return 0;
404} 404}
405 405
406struct btrfs_writepage_fixup {
407 struct page *page;
408 struct btrfs_work work;
409};
410
411/* see btrfs_writepage_start_hook for details on why this is required */
412void btrfs_writepage_fixup_worker(struct btrfs_work *work)
413{
414 struct btrfs_writepage_fixup *fixup;
415 struct btrfs_ordered_extent *ordered;
416 struct page *page;
417 struct inode *inode;
418 u64 page_start;
419 u64 page_end;
420
421 fixup = container_of(work, struct btrfs_writepage_fixup, work);
422 page = fixup->page;
423
424 lock_page(page);
425 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
426 ClearPageChecked(page);
427 goto out_page;
428 }
429
430 inode = page->mapping->host;
431 page_start = page_offset(page);
432 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
433
434 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
435 ordered = btrfs_lookup_ordered_extent(inode, page_start);
436 if (ordered)
437 goto out;
438
439 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
440 GFP_NOFS);
441 ClearPageChecked(page);
442out:
443 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
444out_page:
445 unlock_page(page);
446 page_cache_release(page);
447}
448
449/*
450 * There are a few paths in the higher layers of the kernel that directly
451 * set the page dirty bit without asking the filesystem if it is a
452 * good idea. This causes problems because we want to make sure COW
453 * properly happens and the data=ordered rules are followed.
454 *
455 * In our case any range that doesn't have the EXTENT_ORDERED bit set
456 * hasn't been properly setup for IO. We kick off an async process
457 * to fix it up. The async helper will wait for ordered extents, set
458 * the delalloc bit and make it safe to write the page.
459 */
460int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
461{
462 struct inode *inode = page->mapping->host;
463 struct btrfs_writepage_fixup *fixup;
464 struct btrfs_root *root = BTRFS_I(inode)->root;
465 int ret;
466
467 ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
468 EXTENT_ORDERED, 0);
469 if (ret)
470 return 0;
471
472 if (PageChecked(page))
473 return -EAGAIN;
474
475 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
476 if (!fixup)
477 return -EAGAIN;
478printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page));
479 SetPageChecked(page);
480 page_cache_get(page);
481 fixup->work.func = btrfs_writepage_fixup_worker;
482 fixup->page = page;
483 btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
484 return -EAGAIN;
485}
486
406int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 487int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
407 struct extent_state *state, int uptodate) 488 struct extent_state *state, int uptodate)
408{ 489{
@@ -1263,6 +1344,7 @@ again:
1263 flush_dcache_page(page); 1344 flush_dcache_page(page);
1264 kunmap(page); 1345 kunmap(page);
1265 } 1346 }
1347 ClearPageChecked(page);
1266 set_page_dirty(page); 1348 set_page_dirty(page);
1267 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 1349 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1268 1350
@@ -2658,6 +2740,7 @@ again:
2658 flush_dcache_page(page); 2740 flush_dcache_page(page);
2659 kunmap(page); 2741 kunmap(page);
2660 } 2742 }
2743 ClearPageChecked(page);
2661 set_page_dirty(page); 2744 set_page_dirty(page);
2662 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2745 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2663 2746
@@ -3039,15 +3122,6 @@ out_fail:
3039 3122
3040static int btrfs_set_page_dirty(struct page *page) 3123static int btrfs_set_page_dirty(struct page *page)
3041{ 3124{
3042 struct inode *inode = page->mapping->host;
3043 u64 page_start = page_offset(page);
3044 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
3045
3046 if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3047 EXTENT_DELALLOC, 0)) {
3048printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page));
3049WARN_ON(1);
3050 }
3051 return __set_page_dirty_nobuffers(page); 3125 return __set_page_dirty_nobuffers(page);
3052} 3126}
3053 3127
@@ -3098,6 +3172,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
3098 .readpage_io_hook = btrfs_readpage_io_hook, 3172 .readpage_io_hook = btrfs_readpage_io_hook,
3099 .readpage_end_io_hook = btrfs_readpage_end_io_hook, 3173 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3100 .writepage_end_io_hook = btrfs_writepage_end_io_hook, 3174 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
3175 .writepage_start_hook = btrfs_writepage_start_hook,
3101 .readpage_io_failed_hook = btrfs_io_failed_hook, 3176 .readpage_io_failed_hook = btrfs_io_failed_hook,
3102 .set_bit_hook = btrfs_set_bit_hook, 3177 .set_bit_hook = btrfs_set_bit_hook,
3103 .clear_bit_hook = btrfs_clear_bit_hook, 3178 .clear_bit_hook = btrfs_clear_bit_hook,