aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-09-02 16:53:46 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-11 13:31:07 -0400
commit8b62b72b26bcd72082c4a69d179dd906bcc22200 (patch)
treeceee20dfebe45654cb3a25d8916c195836cdbabf /fs/btrfs/inode.c
parent9655d2982b53fdb38a9e0f2f11315b99b92d66e2 (diff)
Btrfs: Use PagePrivate2 to track pages in the data=ordered code.
Btrfs writes go through delalloc to the data=ordered code. This makes sure that all of the data is on disk before the metadata that references it. The tracking means that we have to make sure each page in an extent is fully written before we add that extent into the on-disk btree. This was done in the past by setting the EXTENT_ORDERED bit for the range of an extent when it was added to the data=ordered code, and then clearing the EXTENT_ORDERED bit in the extent state tree as each page finished IO. One of the reasons we had to do this was because sometimes pages are magically dirtied without page_mkwrite being called. The EXTENT_ORDERED bit is checked at writepage time, and if it isn't there, our page become dirty without going through the proper path. These bit operations make for a number of rbtree searches for each page, and can cause considerable lock contention. This commit switches from the EXTENT_ORDERED bit to use PagePrivate2. As pages go into the ordered code, PagePrivate2 is set on each one. This is a cheap operation because we already have all the pages locked and ready to go. As IO finishes, the PagePrivate2 bit is cleared and the ordered accoutning is updated for each page. At writepage time, if the PagePrivate2 bit is missing, we go into the writepage fixup code to handle improperly dirtied pages. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c47
1 files changed, 30 insertions, 17 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3f8e93de2989..739a245e25d6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -426,7 +426,7 @@ again:
426 extent_clear_unlock_delalloc(inode, 426 extent_clear_unlock_delalloc(inode,
427 &BTRFS_I(inode)->io_tree, 427 &BTRFS_I(inode)->io_tree,
428 start, end, NULL, 1, 0, 428 start, end, NULL, 1, 0,
429 0, 1, 1, 1); 429 0, 1, 1, 1, 0);
430 ret = 0; 430 ret = 0;
431 goto free_pages_out; 431 goto free_pages_out;
432 } 432 }
@@ -641,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
641 async_extent->start, 641 async_extent->start,
642 async_extent->start + 642 async_extent->start +
643 async_extent->ram_size - 1, 643 async_extent->ram_size - 1,
644 NULL, 1, 1, 0, 1, 1, 0); 644 NULL, 1, 1, 0, 1, 1, 0, 0);
645 645
646 ret = btrfs_submit_compressed_write(inode, 646 ret = btrfs_submit_compressed_write(inode,
647 async_extent->start, 647 async_extent->start,
@@ -714,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode,
714 extent_clear_unlock_delalloc(inode, 714 extent_clear_unlock_delalloc(inode,
715 &BTRFS_I(inode)->io_tree, 715 &BTRFS_I(inode)->io_tree,
716 start, end, NULL, 1, 1, 716 start, end, NULL, 1, 1,
717 1, 1, 1, 1); 717 1, 1, 1, 1, 0);
718 *nr_written = *nr_written + 718 *nr_written = *nr_written +
719 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 719 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
720 *page_started = 1; 720 *page_started = 1;
@@ -777,11 +777,14 @@ static noinline int cow_file_range(struct inode *inode,
777 /* we're not doing compressed IO, don't unlock the first 777 /* we're not doing compressed IO, don't unlock the first
778 * page (which the caller expects to stay locked), don't 778 * page (which the caller expects to stay locked), don't
779 * clear any dirty bits and don't set any writeback bits 779 * clear any dirty bits and don't set any writeback bits
780 *
781 * Do set the Private2 bit so we know this page was properly
782 * setup for writepage
780 */ 783 */
781 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 784 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
782 start, start + ram_size - 1, 785 start, start + ram_size - 1,
783 locked_page, unlock, 1, 786 locked_page, unlock, 1,
784 1, 0, 0, 0); 787 1, 0, 0, 0, 1);
785 disk_num_bytes -= cur_alloc_size; 788 disk_num_bytes -= cur_alloc_size;
786 num_bytes -= cur_alloc_size; 789 num_bytes -= cur_alloc_size;
787 alloc_hint = ins.objectid + ins.offset; 790 alloc_hint = ins.objectid + ins.offset;
@@ -1102,7 +1105,7 @@ out_check:
1102 1105
1103 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1106 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1104 cur_offset, cur_offset + num_bytes - 1, 1107 cur_offset, cur_offset + num_bytes - 1,
1105 locked_page, 1, 1, 1, 0, 0, 0); 1108 locked_page, 1, 1, 1, 0, 0, 0, 1);
1106 cur_offset = extent_end; 1109 cur_offset = extent_end;
1107 if (cur_offset > end) 1110 if (cur_offset > end)
1108 break; 1111 break;
@@ -1375,10 +1378,8 @@ again:
1375 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1378 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
1376 1379
1377 /* already ordered? We're done */ 1380 /* already ordered? We're done */
1378 if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, 1381 if (PagePrivate2(page))
1379 EXTENT_ORDERED, 0, NULL)) {
1380 goto out; 1382 goto out;
1381 }
1382 1383
1383 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1384 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1384 if (ordered) { 1385 if (ordered) {
@@ -1414,11 +1415,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1414 struct inode *inode = page->mapping->host; 1415 struct inode *inode = page->mapping->host;
1415 struct btrfs_writepage_fixup *fixup; 1416 struct btrfs_writepage_fixup *fixup;
1416 struct btrfs_root *root = BTRFS_I(inode)->root; 1417 struct btrfs_root *root = BTRFS_I(inode)->root;
1417 int ret;
1418 1418
1419 ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, 1419 /* this page is properly in the ordered list */
1420 EXTENT_ORDERED, 0, NULL); 1420 if (TestClearPagePrivate2(page))
1421 if (ret)
1422 return 0; 1421 return 0;
1423 1422
1424 if (PageChecked(page)) 1423 if (PageChecked(page))
@@ -1624,6 +1623,7 @@ nocow:
1624static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1623static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1625 struct extent_state *state, int uptodate) 1624 struct extent_state *state, int uptodate)
1626{ 1625{
1626 ClearPagePrivate2(page);
1627 return btrfs_finish_ordered_io(page->mapping->host, start, end); 1627 return btrfs_finish_ordered_io(page->mapping->host, start, end);
1628} 1628}
1629 1629
@@ -4403,13 +4403,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4403 u64 page_start = page_offset(page); 4403 u64 page_start = page_offset(page);
4404 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 4404 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
4405 4405
4406
4407 /*
4408 * we have the page locked, so new writeback can't start,
4409 * and the dirty bit won't be cleared while we are here.
4410 *
4411 * Wait for IO on this page so that we can safely clear
4412 * the PagePrivate2 bit and do ordered accounting
4413 */
4406 wait_on_page_writeback(page); 4414 wait_on_page_writeback(page);
4415
4407 tree = &BTRFS_I(page->mapping->host)->io_tree; 4416 tree = &BTRFS_I(page->mapping->host)->io_tree;
4408 if (offset) { 4417 if (offset) {
4409 btrfs_releasepage(page, GFP_NOFS); 4418 btrfs_releasepage(page, GFP_NOFS);
4410 return; 4419 return;
4411 } 4420 }
4412
4413 lock_extent(tree, page_start, page_end, GFP_NOFS); 4421 lock_extent(tree, page_start, page_end, GFP_NOFS);
4414 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 4422 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
4415 page_offset(page)); 4423 page_offset(page));
@@ -4421,14 +4429,19 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4421 clear_extent_bit(tree, page_start, page_end, 4429 clear_extent_bit(tree, page_start, page_end,
4422 EXTENT_DIRTY | EXTENT_DELALLOC | 4430 EXTENT_DIRTY | EXTENT_DELALLOC |
4423 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); 4431 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
4424 btrfs_finish_ordered_io(page->mapping->host, 4432 /*
4425 page_start, page_end); 4433 * whoever cleared the private bit is responsible
4434 * for the finish_ordered_io
4435 */
4436 if (TestClearPagePrivate2(page)) {
4437 btrfs_finish_ordered_io(page->mapping->host,
4438 page_start, page_end);
4439 }
4426 btrfs_put_ordered_extent(ordered); 4440 btrfs_put_ordered_extent(ordered);
4427 lock_extent(tree, page_start, page_end, GFP_NOFS); 4441 lock_extent(tree, page_start, page_end, GFP_NOFS);
4428 } 4442 }
4429 clear_extent_bit(tree, page_start, page_end, 4443 clear_extent_bit(tree, page_start, page_end,
4430 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 4444 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
4431 EXTENT_ORDERED,
4432 1, 1, NULL, GFP_NOFS); 4445 1, 1, NULL, GFP_NOFS);
4433 __btrfs_releasepage(page, GFP_NOFS); 4446 __btrfs_releasepage(page, GFP_NOFS);
4434 4447