diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-09-05 16:13:11 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:07 -0400 |
commit | e02119d5a7b4396c5a872582fddc8bd6d305a70a (patch) | |
tree | 825efe2a79dbca8d61256183f3526a5b5dc40dc6 /fs/btrfs/extent-tree.c | |
parent | a1b32a5932cfac7c38b442582285f3da2a09dfd8 (diff) |
Btrfs: Add a write ahead tree log to optimize synchronous operations
File syncs and directory syncs are optimized by copying their
items into a special (copy-on-write) log tree. There is one log tree per
subvolume and the btrfs super block points to a tree of log tree roots.
After a crash, items are copied out of the log tree and back into the
subvolume. See tree-log.c for all the details.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 93 |
1 files changed, 75 insertions, 18 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e63b3b4bed7c..646b9148ca21 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -496,6 +496,23 @@ static int match_extent_ref(struct extent_buffer *leaf, | |||
496 | return ret == 0; | 496 | return ret == 0; |
497 | } | 497 | } |
498 | 498 | ||
499 | /* simple helper to search for an existing extent at a given offset */ | ||
500 | int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, | ||
501 | u64 start, u64 len) | ||
502 | { | ||
503 | int ret; | ||
504 | struct btrfs_key key; | ||
505 | |||
506 | maybe_lock_mutex(root); | ||
507 | key.objectid = start; | ||
508 | key.offset = len; | ||
509 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | ||
510 | ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, | ||
511 | 0, 0); | ||
512 | maybe_unlock_mutex(root); | ||
513 | return ret; | ||
514 | } | ||
515 | |||
499 | static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, | 516 | static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, |
500 | struct btrfs_root *root, | 517 | struct btrfs_root *root, |
501 | struct btrfs_path *path, u64 bytenr, | 518 | struct btrfs_path *path, u64 bytenr, |
@@ -1409,7 +1426,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
1409 | } | 1426 | } |
1410 | 1427 | ||
1411 | 1428 | ||
1412 | static int update_pinned_extents(struct btrfs_root *root, | 1429 | int btrfs_update_pinned_extents(struct btrfs_root *root, |
1413 | u64 bytenr, u64 num, int pin) | 1430 | u64 bytenr, u64 num, int pin) |
1414 | { | 1431 | { |
1415 | u64 len; | 1432 | u64 len; |
@@ -1492,7 +1509,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
1492 | EXTENT_DIRTY); | 1509 | EXTENT_DIRTY); |
1493 | if (ret) | 1510 | if (ret) |
1494 | break; | 1511 | break; |
1495 | update_pinned_extents(root, start, end + 1 - start, 0); | 1512 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); |
1496 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 1513 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
1497 | set_extent_dirty(free_space_cache, start, end, GFP_NOFS); | 1514 | set_extent_dirty(free_space_cache, start, end, GFP_NOFS); |
1498 | if (need_resched()) { | 1515 | if (need_resched()) { |
@@ -1538,14 +1555,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, | |||
1538 | clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, | 1555 | clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, |
1539 | GFP_NOFS); | 1556 | GFP_NOFS); |
1540 | 1557 | ||
1541 | eb = btrfs_find_tree_block(extent_root, ins.objectid, | 1558 | eb = btrfs_find_create_tree_block(extent_root, ins.objectid, |
1542 | ins.offset); | 1559 | ins.offset); |
1543 | 1560 | ||
1544 | if (!btrfs_buffer_uptodate(eb, trans->transid)) { | 1561 | if (!btrfs_buffer_uptodate(eb, trans->transid)) |
1545 | mutex_unlock(&extent_root->fs_info->alloc_mutex); | ||
1546 | btrfs_read_buffer(eb, trans->transid); | 1562 | btrfs_read_buffer(eb, trans->transid); |
1547 | mutex_lock(&extent_root->fs_info->alloc_mutex); | ||
1548 | } | ||
1549 | 1563 | ||
1550 | btrfs_tree_lock(eb); | 1564 | btrfs_tree_lock(eb); |
1551 | level = btrfs_header_level(eb); | 1565 | level = btrfs_header_level(eb); |
@@ -1585,13 +1599,20 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, | |||
1585 | struct extent_buffer *buf; | 1599 | struct extent_buffer *buf; |
1586 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | 1600 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); |
1587 | if (buf) { | 1601 | if (buf) { |
1602 | /* we can reuse a block if it hasn't been written | ||
1603 | * and it is from this transaction. We can't | ||
1604 | * reuse anything from the tree log root because | ||
1605 | * it has tiny sub-transactions. | ||
1606 | */ | ||
1588 | if (btrfs_buffer_uptodate(buf, 0) && | 1607 | if (btrfs_buffer_uptodate(buf, 0) && |
1589 | btrfs_try_tree_lock(buf)) { | 1608 | btrfs_try_tree_lock(buf)) { |
1590 | u64 transid = | 1609 | u64 transid = |
1591 | root->fs_info->running_transaction->transid; | 1610 | root->fs_info->running_transaction->transid; |
1592 | u64 header_transid = | 1611 | u64 header_transid = |
1593 | btrfs_header_generation(buf); | 1612 | btrfs_header_generation(buf); |
1594 | if (header_transid == transid && | 1613 | if (btrfs_header_owner(buf) != |
1614 | BTRFS_TREE_LOG_OBJECTID && | ||
1615 | header_transid == transid && | ||
1595 | !btrfs_header_flag(buf, | 1616 | !btrfs_header_flag(buf, |
1596 | BTRFS_HEADER_FLAG_WRITTEN)) { | 1617 | BTRFS_HEADER_FLAG_WRITTEN)) { |
1597 | clean_tree_block(NULL, root, buf); | 1618 | clean_tree_block(NULL, root, buf); |
@@ -1603,7 +1624,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, | |||
1603 | } | 1624 | } |
1604 | free_extent_buffer(buf); | 1625 | free_extent_buffer(buf); |
1605 | } | 1626 | } |
1606 | update_pinned_extents(root, bytenr, num_bytes, 1); | 1627 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); |
1607 | } else { | 1628 | } else { |
1608 | set_extent_bits(&root->fs_info->pending_del, | 1629 | set_extent_bits(&root->fs_info->pending_del, |
1609 | bytenr, bytenr + num_bytes - 1, | 1630 | bytenr, bytenr + num_bytes - 1, |
@@ -1801,7 +1822,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct | |||
1801 | GFP_NOFS); | 1822 | GFP_NOFS); |
1802 | if (!test_range_bit(&extent_root->fs_info->extent_ins, | 1823 | if (!test_range_bit(&extent_root->fs_info->extent_ins, |
1803 | start, end, EXTENT_LOCKED, 0)) { | 1824 | start, end, EXTENT_LOCKED, 0)) { |
1804 | update_pinned_extents(extent_root, start, | 1825 | btrfs_update_pinned_extents(extent_root, start, |
1805 | end + 1 - start, 1); | 1826 | end + 1 - start, 1); |
1806 | ret = __free_extent(trans, extent_root, | 1827 | ret = __free_extent(trans, extent_root, |
1807 | start, end + 1 - start, | 1828 | start, end + 1 - start, |
@@ -1919,6 +1940,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, | |||
1919 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { | 1940 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { |
1920 | last_ptr = &root->fs_info->last_data_alloc; | 1941 | last_ptr = &root->fs_info->last_data_alloc; |
1921 | } | 1942 | } |
1943 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | ||
1944 | last_ptr = &root->fs_info->last_log_alloc; | ||
1945 | if (!last_ptr == 0 && root->fs_info->last_alloc) { | ||
1946 | *last_ptr = root->fs_info->last_alloc + empty_cluster; | ||
1947 | } | ||
1948 | } | ||
1922 | 1949 | ||
1923 | if (last_ptr) { | 1950 | if (last_ptr) { |
1924 | if (*last_ptr) | 1951 | if (*last_ptr) |
@@ -2268,6 +2295,35 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
2268 | maybe_unlock_mutex(root); | 2295 | maybe_unlock_mutex(root); |
2269 | return ret; | 2296 | return ret; |
2270 | } | 2297 | } |
2298 | |||
2299 | /* | ||
2300 | * this is used by the tree logging recovery code. It records that | ||
2301 | * an extent has been allocated and makes sure to clear the free | ||
2302 | * space cache bits as well | ||
2303 | */ | ||
2304 | int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | ||
2305 | struct btrfs_root *root, | ||
2306 | u64 root_objectid, u64 ref_generation, | ||
2307 | u64 owner, u64 owner_offset, | ||
2308 | struct btrfs_key *ins) | ||
2309 | { | ||
2310 | int ret; | ||
2311 | struct btrfs_block_group_cache *block_group; | ||
2312 | |||
2313 | maybe_lock_mutex(root); | ||
2314 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
2315 | cache_block_group(root, block_group); | ||
2316 | |||
2317 | clear_extent_dirty(&root->fs_info->free_space_cache, | ||
2318 | ins->objectid, ins->objectid + ins->offset - 1, | ||
2319 | GFP_NOFS); | ||
2320 | ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, | ||
2321 | ref_generation, owner, | ||
2322 | owner_offset, ins); | ||
2323 | maybe_unlock_mutex(root); | ||
2324 | return ret; | ||
2325 | } | ||
2326 | |||
2271 | /* | 2327 | /* |
2272 | * finds a free extent and does all the dirty work required for allocation | 2328 | * finds a free extent and does all the dirty work required for allocation |
2273 | * returns the key for the extent through ins, and a tree buffer for | 2329 | * returns the key for the extent through ins, and a tree buffer for |
@@ -2350,9 +2406,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
2350 | return buf; | 2406 | return buf; |
2351 | } | 2407 | } |
2352 | 2408 | ||
2353 | static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, | 2409 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
2354 | struct btrfs_root *root, | 2410 | struct btrfs_root *root, struct extent_buffer *leaf) |
2355 | struct extent_buffer *leaf) | ||
2356 | { | 2411 | { |
2357 | u64 leaf_owner; | 2412 | u64 leaf_owner; |
2358 | u64 leaf_generation; | 2413 | u64 leaf_generation; |
@@ -2402,9 +2457,9 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, | |||
2402 | return 0; | 2457 | return 0; |
2403 | } | 2458 | } |
2404 | 2459 | ||
2405 | static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, | 2460 | static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, |
2406 | struct btrfs_root *root, | 2461 | struct btrfs_root *root, |
2407 | struct btrfs_leaf_ref *ref) | 2462 | struct btrfs_leaf_ref *ref) |
2408 | { | 2463 | { |
2409 | int i; | 2464 | int i; |
2410 | int ret; | 2465 | int ret; |
@@ -2512,7 +2567,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, | |||
2512 | btrfs_header_nritems(cur)) | 2567 | btrfs_header_nritems(cur)) |
2513 | break; | 2568 | break; |
2514 | if (*level == 0) { | 2569 | if (*level == 0) { |
2515 | ret = drop_leaf_ref_no_cache(trans, root, cur); | 2570 | ret = btrfs_drop_leaf_ref(trans, root, cur); |
2516 | BUG_ON(ret); | 2571 | BUG_ON(ret); |
2517 | break; | 2572 | break; |
2518 | } | 2573 | } |
@@ -2552,7 +2607,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, | |||
2552 | btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); | 2607 | btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); |
2553 | ref = btrfs_lookup_leaf_ref(root, bytenr); | 2608 | ref = btrfs_lookup_leaf_ref(root, bytenr); |
2554 | if (ref) { | 2609 | if (ref) { |
2555 | ret = drop_leaf_ref(trans, root, ref); | 2610 | ret = cache_drop_leaf_ref(trans, root, ref); |
2556 | BUG_ON(ret); | 2611 | BUG_ON(ret); |
2557 | btrfs_remove_leaf_ref(root, ref); | 2612 | btrfs_remove_leaf_ref(root, ref); |
2558 | btrfs_free_leaf_ref(root, ref); | 2613 | btrfs_free_leaf_ref(root, ref); |
@@ -3628,6 +3683,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
3628 | extent_root = root->fs_info->extent_root; | 3683 | extent_root = root->fs_info->extent_root; |
3629 | block_group_cache = &root->fs_info->block_group_cache; | 3684 | block_group_cache = &root->fs_info->block_group_cache; |
3630 | 3685 | ||
3686 | root->fs_info->last_trans_new_blockgroup = trans->transid; | ||
3687 | |||
3631 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 3688 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
3632 | BUG_ON(!cache); | 3689 | BUG_ON(!cache); |
3633 | cache->key.objectid = chunk_offset; | 3690 | cache->key.objectid = chunk_offset; |