aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-21 10:29:44 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:05 -0400
commit4a09675279674041862d2210635b0cc1f60be28e (patch)
tree19e4736c062f87729dcdc1bd57f4919b3227ec32 /fs/btrfs
parente5a2217ef6ff088d08a27208929a6f9c635d672c (diff)
Btrfs: Data ordered fixes
* In btrfs_delete_inode, wait for ordered extents after calling truncate_inode_pages. This is much faster, and more correct * Properly clear our the PageChecked bit everywhere we redirty the page. * Change the writepage fixup handler to lock the page range and check to see if an ordered extent had been inserted since the improperly dirtied page was discovered * Wait for ordered extents outside the transaction. This isn't required for locking rules but does improve transaction latencies * Reduce contention on the alloc_mutex by dropping it while incrementing refs on a node/leaf and while dropping refs on a leaf. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent-tree.c18
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/btrfs/inode.c23
-rw-r--r--fs/btrfs/ordered-data.c11
4 files changed, 43 insertions, 10 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index be2aef1cb7ae..ccd49322f793 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -934,7 +934,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
934 if (!root->ref_cows) 934 if (!root->ref_cows)
935 return 0; 935 return 0;
936 936
937 mutex_lock(&root->fs_info->alloc_mutex);
938 level = btrfs_header_level(buf); 937 level = btrfs_header_level(buf);
939 nritems = btrfs_header_nritems(buf); 938 nritems = btrfs_header_nritems(buf);
940 for (i = 0; i < nritems; i++) { 939 for (i = 0; i < nritems; i++) {
@@ -951,29 +950,36 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
951 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 950 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
952 if (disk_bytenr == 0) 951 if (disk_bytenr == 0)
953 continue; 952 continue;
953
954 mutex_lock(&root->fs_info->alloc_mutex);
954 ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, 955 ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
955 btrfs_file_extent_disk_num_bytes(buf, fi), 956 btrfs_file_extent_disk_num_bytes(buf, fi),
956 root->root_key.objectid, trans->transid, 957 root->root_key.objectid, trans->transid,
957 key.objectid, key.offset); 958 key.objectid, key.offset);
959 mutex_unlock(&root->fs_info->alloc_mutex);
958 if (ret) { 960 if (ret) {
959 faili = i; 961 faili = i;
962 WARN_ON(1);
960 goto fail; 963 goto fail;
961 } 964 }
962 } else { 965 } else {
963 bytenr = btrfs_node_blockptr(buf, i); 966 bytenr = btrfs_node_blockptr(buf, i);
964 btrfs_node_key_to_cpu(buf, &key, i); 967 btrfs_node_key_to_cpu(buf, &key, i);
968
969 mutex_lock(&root->fs_info->alloc_mutex);
965 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 970 ret = __btrfs_inc_extent_ref(trans, root, bytenr,
966 btrfs_level_size(root, level - 1), 971 btrfs_level_size(root, level - 1),
967 root->root_key.objectid, 972 root->root_key.objectid,
968 trans->transid, 973 trans->transid,
969 level - 1, key.objectid); 974 level - 1, key.objectid);
975 mutex_unlock(&root->fs_info->alloc_mutex);
970 if (ret) { 976 if (ret) {
971 faili = i; 977 faili = i;
978 WARN_ON(1);
972 goto fail; 979 goto fail;
973 } 980 }
974 } 981 }
975 } 982 }
976 mutex_unlock(&root->fs_info->alloc_mutex);
977 return 0; 983 return 0;
978fail: 984fail:
979 WARN_ON(1); 985 WARN_ON(1);
@@ -1004,7 +1010,6 @@ fail:
1004 } 1010 }
1005 } 1011 }
1006#endif 1012#endif
1007 mutex_unlock(&root->fs_info->alloc_mutex);
1008 return ret; 1013 return ret;
1009} 1014}
1010 1015
@@ -2180,6 +2185,8 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2180 leaf_owner = btrfs_header_owner(leaf); 2185 leaf_owner = btrfs_header_owner(leaf);
2181 leaf_generation = btrfs_header_generation(leaf); 2186 leaf_generation = btrfs_header_generation(leaf);
2182 2187
2188 mutex_unlock(&root->fs_info->alloc_mutex);
2189
2183 for (i = 0; i < nritems; i++) { 2190 for (i = 0; i < nritems; i++) {
2184 u64 disk_bytenr; 2191 u64 disk_bytenr;
2185 2192
@@ -2197,12 +2204,17 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2197 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 2204 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2198 if (disk_bytenr == 0) 2205 if (disk_bytenr == 0)
2199 continue; 2206 continue;
2207
2208 mutex_lock(&root->fs_info->alloc_mutex);
2200 ret = __btrfs_free_extent(trans, root, disk_bytenr, 2209 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2201 btrfs_file_extent_disk_num_bytes(leaf, fi), 2210 btrfs_file_extent_disk_num_bytes(leaf, fi),
2202 leaf_owner, leaf_generation, 2211 leaf_owner, leaf_generation,
2203 key.objectid, key.offset, 0); 2212 key.objectid, key.offset, 0);
2213 mutex_unlock(&root->fs_info->alloc_mutex);
2204 BUG_ON(ret); 2214 BUG_ON(ret);
2205 } 2215 }
2216
2217 mutex_lock(&root->fs_info->alloc_mutex);
2206 return 0; 2218 return 0;
2207} 2219}
2208 2220
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eccdb9562ba8..591a30208acd 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
75 for (i = 0; i < num_pages; i++) { 75 for (i = 0; i < num_pages; i++) {
76 if (!pages[i]) 76 if (!pages[i])
77 break; 77 break;
78 ClearPageChecked(pages[i]);
78 unlock_page(pages[i]); 79 unlock_page(pages[i]);
79 mark_page_accessed(pages[i]); 80 mark_page_accessed(pages[i]);
80 page_cache_release(pages[i]); 81 page_cache_release(pages[i]);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 50ee4befac8e..8fb6dc25e7a5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -418,7 +418,7 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work)
418 418
419 fixup = container_of(work, struct btrfs_writepage_fixup, work); 419 fixup = container_of(work, struct btrfs_writepage_fixup, work);
420 page = fixup->page; 420 page = fixup->page;
421 421again:
422 lock_page(page); 422 lock_page(page);
423 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { 423 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
424 ClearPageChecked(page); 424 ClearPageChecked(page);
@@ -430,9 +430,21 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work)
430 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 430 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
431 431
432 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 432 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
433 ordered = btrfs_lookup_ordered_extent(inode, page_start); 433
434 if (ordered) 434 /* already ordered? We're done */
435 if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
436 EXTENT_ORDERED, 0)) {
435 goto out; 437 goto out;
438 }
439
440 ordered = btrfs_lookup_ordered_extent(inode, page_start);
441 if (ordered) {
442 unlock_extent(&BTRFS_I(inode)->io_tree, page_start,
443 page_end, GFP_NOFS);
444 unlock_page(page);
445 btrfs_start_ordered_extent(inode, ordered, 1);
446 goto again;
447 }
436 448
437 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, 449 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
438 GFP_NOFS); 450 GFP_NOFS);
@@ -1465,11 +1477,11 @@ void btrfs_delete_inode(struct inode *inode)
1465 unsigned long nr; 1477 unsigned long nr;
1466 int ret; 1478 int ret;
1467 1479
1468 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1469 truncate_inode_pages(&inode->i_data, 0); 1480 truncate_inode_pages(&inode->i_data, 0);
1470 if (is_bad_inode(inode)) { 1481 if (is_bad_inode(inode)) {
1471 goto no_delete; 1482 goto no_delete;
1472 } 1483 }
1484 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1473 1485
1474 btrfs_i_size_write(inode, 0); 1486 btrfs_i_size_write(inode, 0);
1475 trans = btrfs_start_transaction(root, 1); 1487 trans = btrfs_start_transaction(root, 1);
@@ -2707,6 +2719,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2707 1, 1, GFP_NOFS); 2719 1, 1, GFP_NOFS);
2708 __btrfs_releasepage(page, GFP_NOFS); 2720 __btrfs_releasepage(page, GFP_NOFS);
2709 2721
2722 ClearPageChecked(page);
2710 if (PagePrivate(page)) { 2723 if (PagePrivate(page)) {
2711 invalidate_extent_lru(tree, page_offset(page), 2724 invalidate_extent_lru(tree, page_offset(page),
2712 PAGE_CACHE_SIZE); 2725 PAGE_CACHE_SIZE);
@@ -2818,10 +2831,10 @@ static void btrfs_truncate(struct inode *inode)
2818 return; 2831 return;
2819 2832
2820 btrfs_truncate_page(inode->i_mapping, inode->i_size); 2833 btrfs_truncate_page(inode->i_mapping, inode->i_size);
2834 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
2821 2835
2822 trans = btrfs_start_transaction(root, 1); 2836 trans = btrfs_start_transaction(root, 1);
2823 btrfs_set_trans_block_group(trans, inode); 2837 btrfs_set_trans_block_group(trans, inode);
2824 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
2825 btrfs_i_size_write(inode, inode->i_size); 2838 btrfs_i_size_write(inode, inode->i_size);
2826 2839
2827 /* FIXME, add redo link to tree so we don't leak on crash */ 2840 /* FIXME, add redo link to tree so we don't leak on crash */
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index c2b4a9c4ddb6..0d87795fdd8f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -336,7 +336,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
336 orig_end = start + len - 1; 336 orig_end = start + len - 1;
337 wait_end = orig_end; 337 wait_end = orig_end;
338 } 338 }
339 339again:
340 /* start IO across the range first to instantiate any delalloc 340 /* start IO across the range first to instantiate any delalloc
341 * extents 341 * extents
342 */ 342 */
@@ -369,6 +369,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
369 break; 369 break;
370 end--; 370 end--;
371 } 371 }
372 if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
373 EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
374 printk("inode %lu still ordered or delalloc after wait "
375 "%llu %llu\n", inode->i_ino,
376 (unsigned long long)start,
377 (unsigned long long)orig_end);
378 goto again;
379 }
372} 380}
373 381
374/* 382/*
@@ -545,7 +553,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
545 sector_sums = &ordered_sum->sums; 553 sector_sums = &ordered_sum->sums;
546 for (i = 0; i < num_sectors; i++) { 554 for (i = 0; i < num_sectors; i++) {
547 if (sector_sums[i].offset == offset) { 555 if (sector_sums[i].offset == offset) {
548printk("find ordered sum inode %lu offset %Lu\n", inode->i_ino, offset);
549 *sum = sector_sums[i].sum; 556 *sum = sector_sums[i].sum;
550 ret = 0; 557 ret = 0;
551 goto out; 558 goto out;