diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-07-21 10:29:44 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:05 -0400 |
commit | 4a09675279674041862d2210635b0cc1f60be28e (patch) | |
tree | 19e4736c062f87729dcdc1bd57f4919b3227ec32 | |
parent | e5a2217ef6ff088d08a27208929a6f9c635d672c (diff) |
Btrfs: Data ordered fixes
* In btrfs_delete_inode, wait for ordered extents after calling
truncate_inode_pages. This is much faster, and more correct
* Properly clear our the PageChecked bit everywhere we redirty the page.
* Change the writepage fixup handler to lock the page range and check to
see if an ordered extent had been inserted since the improperly dirtied
page was discovered
* Wait for ordered extents outside the transaction. This isn't required
for locking rules but does improve transaction latencies
* Reduce contention on the alloc_mutex by dropping it while incrementing
refs on a node/leaf and while dropping refs on a leaf.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/extent-tree.c | 18 | ||||
-rw-r--r-- | fs/btrfs/file.c | 1 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 23 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 11 |
4 files changed, 43 insertions, 10 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be2aef1cb7ae..ccd49322f793 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -934,7 +934,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
934 | if (!root->ref_cows) | 934 | if (!root->ref_cows) |
935 | return 0; | 935 | return 0; |
936 | 936 | ||
937 | mutex_lock(&root->fs_info->alloc_mutex); | ||
938 | level = btrfs_header_level(buf); | 937 | level = btrfs_header_level(buf); |
939 | nritems = btrfs_header_nritems(buf); | 938 | nritems = btrfs_header_nritems(buf); |
940 | for (i = 0; i < nritems; i++) { | 939 | for (i = 0; i < nritems; i++) { |
@@ -951,29 +950,36 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
951 | disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | 950 | disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); |
952 | if (disk_bytenr == 0) | 951 | if (disk_bytenr == 0) |
953 | continue; | 952 | continue; |
953 | |||
954 | mutex_lock(&root->fs_info->alloc_mutex); | ||
954 | ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, | 955 | ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, |
955 | btrfs_file_extent_disk_num_bytes(buf, fi), | 956 | btrfs_file_extent_disk_num_bytes(buf, fi), |
956 | root->root_key.objectid, trans->transid, | 957 | root->root_key.objectid, trans->transid, |
957 | key.objectid, key.offset); | 958 | key.objectid, key.offset); |
959 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
958 | if (ret) { | 960 | if (ret) { |
959 | faili = i; | 961 | faili = i; |
962 | WARN_ON(1); | ||
960 | goto fail; | 963 | goto fail; |
961 | } | 964 | } |
962 | } else { | 965 | } else { |
963 | bytenr = btrfs_node_blockptr(buf, i); | 966 | bytenr = btrfs_node_blockptr(buf, i); |
964 | btrfs_node_key_to_cpu(buf, &key, i); | 967 | btrfs_node_key_to_cpu(buf, &key, i); |
968 | |||
969 | mutex_lock(&root->fs_info->alloc_mutex); | ||
965 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, | 970 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, |
966 | btrfs_level_size(root, level - 1), | 971 | btrfs_level_size(root, level - 1), |
967 | root->root_key.objectid, | 972 | root->root_key.objectid, |
968 | trans->transid, | 973 | trans->transid, |
969 | level - 1, key.objectid); | 974 | level - 1, key.objectid); |
975 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
970 | if (ret) { | 976 | if (ret) { |
971 | faili = i; | 977 | faili = i; |
978 | WARN_ON(1); | ||
972 | goto fail; | 979 | goto fail; |
973 | } | 980 | } |
974 | } | 981 | } |
975 | } | 982 | } |
976 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
977 | return 0; | 983 | return 0; |
978 | fail: | 984 | fail: |
979 | WARN_ON(1); | 985 | WARN_ON(1); |
@@ -1004,7 +1010,6 @@ fail: | |||
1004 | } | 1010 | } |
1005 | } | 1011 | } |
1006 | #endif | 1012 | #endif |
1007 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
1008 | return ret; | 1013 | return ret; |
1009 | } | 1014 | } |
1010 | 1015 | ||
@@ -2180,6 +2185,8 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
2180 | leaf_owner = btrfs_header_owner(leaf); | 2185 | leaf_owner = btrfs_header_owner(leaf); |
2181 | leaf_generation = btrfs_header_generation(leaf); | 2186 | leaf_generation = btrfs_header_generation(leaf); |
2182 | 2187 | ||
2188 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
2189 | |||
2183 | for (i = 0; i < nritems; i++) { | 2190 | for (i = 0; i < nritems; i++) { |
2184 | u64 disk_bytenr; | 2191 | u64 disk_bytenr; |
2185 | 2192 | ||
@@ -2197,12 +2204,17 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
2197 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 2204 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
2198 | if (disk_bytenr == 0) | 2205 | if (disk_bytenr == 0) |
2199 | continue; | 2206 | continue; |
2207 | |||
2208 | mutex_lock(&root->fs_info->alloc_mutex); | ||
2200 | ret = __btrfs_free_extent(trans, root, disk_bytenr, | 2209 | ret = __btrfs_free_extent(trans, root, disk_bytenr, |
2201 | btrfs_file_extent_disk_num_bytes(leaf, fi), | 2210 | btrfs_file_extent_disk_num_bytes(leaf, fi), |
2202 | leaf_owner, leaf_generation, | 2211 | leaf_owner, leaf_generation, |
2203 | key.objectid, key.offset, 0); | 2212 | key.objectid, key.offset, 0); |
2213 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
2204 | BUG_ON(ret); | 2214 | BUG_ON(ret); |
2205 | } | 2215 | } |
2216 | |||
2217 | mutex_lock(&root->fs_info->alloc_mutex); | ||
2206 | return 0; | 2218 | return 0; |
2207 | } | 2219 | } |
2208 | 2220 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eccdb9562ba8..591a30208acd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
75 | for (i = 0; i < num_pages; i++) { | 75 | for (i = 0; i < num_pages; i++) { |
76 | if (!pages[i]) | 76 | if (!pages[i]) |
77 | break; | 77 | break; |
78 | ClearPageChecked(pages[i]); | ||
78 | unlock_page(pages[i]); | 79 | unlock_page(pages[i]); |
79 | mark_page_accessed(pages[i]); | 80 | mark_page_accessed(pages[i]); |
80 | page_cache_release(pages[i]); | 81 | page_cache_release(pages[i]); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 50ee4befac8e..8fb6dc25e7a5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -418,7 +418,7 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work) | |||
418 | 418 | ||
419 | fixup = container_of(work, struct btrfs_writepage_fixup, work); | 419 | fixup = container_of(work, struct btrfs_writepage_fixup, work); |
420 | page = fixup->page; | 420 | page = fixup->page; |
421 | 421 | again: | |
422 | lock_page(page); | 422 | lock_page(page); |
423 | if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { | 423 | if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { |
424 | ClearPageChecked(page); | 424 | ClearPageChecked(page); |
@@ -430,9 +430,21 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work) | |||
430 | page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; | 430 | page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; |
431 | 431 | ||
432 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | 432 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); |
433 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 433 | |
434 | if (ordered) | 434 | /* already ordered? We're done */ |
435 | if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | ||
436 | EXTENT_ORDERED, 0)) { | ||
435 | goto out; | 437 | goto out; |
438 | } | ||
439 | |||
440 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
441 | if (ordered) { | ||
442 | unlock_extent(&BTRFS_I(inode)->io_tree, page_start, | ||
443 | page_end, GFP_NOFS); | ||
444 | unlock_page(page); | ||
445 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
446 | goto again; | ||
447 | } | ||
436 | 448 | ||
437 | set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, | 449 | set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, |
438 | GFP_NOFS); | 450 | GFP_NOFS); |
@@ -1465,11 +1477,11 @@ void btrfs_delete_inode(struct inode *inode) | |||
1465 | unsigned long nr; | 1477 | unsigned long nr; |
1466 | int ret; | 1478 | int ret; |
1467 | 1479 | ||
1468 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
1469 | truncate_inode_pages(&inode->i_data, 0); | 1480 | truncate_inode_pages(&inode->i_data, 0); |
1470 | if (is_bad_inode(inode)) { | 1481 | if (is_bad_inode(inode)) { |
1471 | goto no_delete; | 1482 | goto no_delete; |
1472 | } | 1483 | } |
1484 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
1473 | 1485 | ||
1474 | btrfs_i_size_write(inode, 0); | 1486 | btrfs_i_size_write(inode, 0); |
1475 | trans = btrfs_start_transaction(root, 1); | 1487 | trans = btrfs_start_transaction(root, 1); |
@@ -2707,6 +2719,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
2707 | 1, 1, GFP_NOFS); | 2719 | 1, 1, GFP_NOFS); |
2708 | __btrfs_releasepage(page, GFP_NOFS); | 2720 | __btrfs_releasepage(page, GFP_NOFS); |
2709 | 2721 | ||
2722 | ClearPageChecked(page); | ||
2710 | if (PagePrivate(page)) { | 2723 | if (PagePrivate(page)) { |
2711 | invalidate_extent_lru(tree, page_offset(page), | 2724 | invalidate_extent_lru(tree, page_offset(page), |
2712 | PAGE_CACHE_SIZE); | 2725 | PAGE_CACHE_SIZE); |
@@ -2818,10 +2831,10 @@ static void btrfs_truncate(struct inode *inode) | |||
2818 | return; | 2831 | return; |
2819 | 2832 | ||
2820 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | 2833 | btrfs_truncate_page(inode->i_mapping, inode->i_size); |
2834 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | ||
2821 | 2835 | ||
2822 | trans = btrfs_start_transaction(root, 1); | 2836 | trans = btrfs_start_transaction(root, 1); |
2823 | btrfs_set_trans_block_group(trans, inode); | 2837 | btrfs_set_trans_block_group(trans, inode); |
2824 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | ||
2825 | btrfs_i_size_write(inode, inode->i_size); | 2838 | btrfs_i_size_write(inode, inode->i_size); |
2826 | 2839 | ||
2827 | /* FIXME, add redo link to tree so we don't leak on crash */ | 2840 | /* FIXME, add redo link to tree so we don't leak on crash */ |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index c2b4a9c4ddb6..0d87795fdd8f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -336,7 +336,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
336 | orig_end = start + len - 1; | 336 | orig_end = start + len - 1; |
337 | wait_end = orig_end; | 337 | wait_end = orig_end; |
338 | } | 338 | } |
339 | 339 | again: | |
340 | /* start IO across the range first to instantiate any delalloc | 340 | /* start IO across the range first to instantiate any delalloc |
341 | * extents | 341 | * extents |
342 | */ | 342 | */ |
@@ -369,6 +369,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
369 | break; | 369 | break; |
370 | end--; | 370 | end--; |
371 | } | 371 | } |
372 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | ||
373 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | ||
374 | printk("inode %lu still ordered or delalloc after wait " | ||
375 | "%llu %llu\n", inode->i_ino, | ||
376 | (unsigned long long)start, | ||
377 | (unsigned long long)orig_end); | ||
378 | goto again; | ||
379 | } | ||
372 | } | 380 | } |
373 | 381 | ||
374 | /* | 382 | /* |
@@ -545,7 +553,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) | |||
545 | sector_sums = &ordered_sum->sums; | 553 | sector_sums = &ordered_sum->sums; |
546 | for (i = 0; i < num_sectors; i++) { | 554 | for (i = 0; i < num_sectors; i++) { |
547 | if (sector_sums[i].offset == offset) { | 555 | if (sector_sums[i].offset == offset) { |
548 | printk("find ordered sum inode %lu offset %Lu\n", inode->i_ino, offset); | ||
549 | *sum = sector_sums[i].sum; | 556 | *sum = sector_sums[i].sum; |
550 | ret = 0; | 557 | ret = 0; |
551 | goto out; | 558 | goto out; |