aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c483
1 files changed, 281 insertions, 202 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b0292b3ead54..6b092a1c4e37 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1192,6 +1192,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1192 root->objectid = objectid; 1192 root->objectid = objectid;
1193 root->last_trans = 0; 1193 root->last_trans = 0;
1194 root->highest_objectid = 0; 1194 root->highest_objectid = 0;
1195 root->nr_delalloc_inodes = 0;
1196 root->nr_ordered_extents = 0;
1195 root->name = NULL; 1197 root->name = NULL;
1196 root->inode_tree = RB_ROOT; 1198 root->inode_tree = RB_ROOT;
1197 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1199 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
@@ -1200,10 +1202,16 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1200 1202
1201 INIT_LIST_HEAD(&root->dirty_list); 1203 INIT_LIST_HEAD(&root->dirty_list);
1202 INIT_LIST_HEAD(&root->root_list); 1204 INIT_LIST_HEAD(&root->root_list);
1205 INIT_LIST_HEAD(&root->delalloc_inodes);
1206 INIT_LIST_HEAD(&root->delalloc_root);
1207 INIT_LIST_HEAD(&root->ordered_extents);
1208 INIT_LIST_HEAD(&root->ordered_root);
1203 INIT_LIST_HEAD(&root->logged_list[0]); 1209 INIT_LIST_HEAD(&root->logged_list[0]);
1204 INIT_LIST_HEAD(&root->logged_list[1]); 1210 INIT_LIST_HEAD(&root->logged_list[1]);
1205 spin_lock_init(&root->orphan_lock); 1211 spin_lock_init(&root->orphan_lock);
1206 spin_lock_init(&root->inode_lock); 1212 spin_lock_init(&root->inode_lock);
1213 spin_lock_init(&root->delalloc_lock);
1214 spin_lock_init(&root->ordered_extent_lock);
1207 spin_lock_init(&root->accounting_lock); 1215 spin_lock_init(&root->accounting_lock);
1208 spin_lock_init(&root->log_extents_lock[0]); 1216 spin_lock_init(&root->log_extents_lock[0]);
1209 spin_lock_init(&root->log_extents_lock[1]); 1217 spin_lock_init(&root->log_extents_lock[1]);
@@ -1217,6 +1225,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1217 atomic_set(&root->log_writers, 0); 1225 atomic_set(&root->log_writers, 0);
1218 atomic_set(&root->log_batch, 0); 1226 atomic_set(&root->log_batch, 0);
1219 atomic_set(&root->orphan_inodes, 0); 1227 atomic_set(&root->orphan_inodes, 0);
1228 atomic_set(&root->refs, 1);
1220 root->log_transid = 0; 1229 root->log_transid = 0;
1221 root->last_log_commit = 0; 1230 root->last_log_commit = 0;
1222 extent_io_tree_init(&root->dirty_log_pages, 1231 extent_io_tree_init(&root->dirty_log_pages,
@@ -1235,39 +1244,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1235 spin_lock_init(&root->root_item_lock); 1244 spin_lock_init(&root->root_item_lock);
1236} 1245}
1237 1246
1238static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
1239 struct btrfs_fs_info *fs_info,
1240 u64 objectid,
1241 struct btrfs_root *root)
1242{
1243 int ret;
1244 u32 blocksize;
1245 u64 generation;
1246
1247 __setup_root(tree_root->nodesize, tree_root->leafsize,
1248 tree_root->sectorsize, tree_root->stripesize,
1249 root, fs_info, objectid);
1250 ret = btrfs_find_last_root(tree_root, objectid,
1251 &root->root_item, &root->root_key);
1252 if (ret > 0)
1253 return -ENOENT;
1254 else if (ret < 0)
1255 return ret;
1256
1257 generation = btrfs_root_generation(&root->root_item);
1258 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1259 root->commit_root = NULL;
1260 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1261 blocksize, generation);
1262 if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) {
1263 free_extent_buffer(root->node);
1264 root->node = NULL;
1265 return -EIO;
1266 }
1267 root->commit_root = btrfs_root_node(root);
1268 return 0;
1269}
1270
1271static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) 1247static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
1272{ 1248{
1273 struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); 1249 struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS);
@@ -1452,70 +1428,73 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1452 return 0; 1428 return 0;
1453} 1429}
1454 1430
1455struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 1431struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1456 struct btrfs_key *location) 1432 struct btrfs_key *key)
1457{ 1433{
1458 struct btrfs_root *root; 1434 struct btrfs_root *root;
1459 struct btrfs_fs_info *fs_info = tree_root->fs_info; 1435 struct btrfs_fs_info *fs_info = tree_root->fs_info;
1460 struct btrfs_path *path; 1436 struct btrfs_path *path;
1461 struct extent_buffer *l;
1462 u64 generation; 1437 u64 generation;
1463 u32 blocksize; 1438 u32 blocksize;
1464 int ret = 0; 1439 int ret;
1465 int slot;
1466 1440
1467 root = btrfs_alloc_root(fs_info); 1441 path = btrfs_alloc_path();
1468 if (!root) 1442 if (!path)
1469 return ERR_PTR(-ENOMEM); 1443 return ERR_PTR(-ENOMEM);
1470 if (location->offset == (u64)-1) { 1444
1471 ret = find_and_setup_root(tree_root, fs_info, 1445 root = btrfs_alloc_root(fs_info);
1472 location->objectid, root); 1446 if (!root) {
1473 if (ret) { 1447 ret = -ENOMEM;
1474 kfree(root); 1448 goto alloc_fail;
1475 return ERR_PTR(ret);
1476 }
1477 goto out;
1478 } 1449 }
1479 1450
1480 __setup_root(tree_root->nodesize, tree_root->leafsize, 1451 __setup_root(tree_root->nodesize, tree_root->leafsize,
1481 tree_root->sectorsize, tree_root->stripesize, 1452 tree_root->sectorsize, tree_root->stripesize,
1482 root, fs_info, location->objectid); 1453 root, fs_info, key->objectid);
1483 1454
1484 path = btrfs_alloc_path(); 1455 ret = btrfs_find_root(tree_root, key, path,
1485 if (!path) { 1456 &root->root_item, &root->root_key);
1486 kfree(root);
1487 return ERR_PTR(-ENOMEM);
1488 }
1489 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
1490 if (ret == 0) {
1491 l = path->nodes[0];
1492 slot = path->slots[0];
1493 btrfs_read_root_item(l, slot, &root->root_item);
1494 memcpy(&root->root_key, location, sizeof(*location));
1495 }
1496 btrfs_free_path(path);
1497 if (ret) { 1457 if (ret) {
1498 kfree(root);
1499 if (ret > 0) 1458 if (ret > 0)
1500 ret = -ENOENT; 1459 ret = -ENOENT;
1501 return ERR_PTR(ret); 1460 goto find_fail;
1502 } 1461 }
1503 1462
1504 generation = btrfs_root_generation(&root->root_item); 1463 generation = btrfs_root_generation(&root->root_item);
1505 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1464 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1506 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1465 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1507 blocksize, generation); 1466 blocksize, generation);
1508 if (!root->node || !extent_buffer_uptodate(root->node)) { 1467 if (!root->node) {
1509 ret = (!root->node) ? -ENOMEM : -EIO; 1468 ret = -ENOMEM;
1510 1469 goto find_fail;
1511 free_extent_buffer(root->node); 1470 } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
1512 kfree(root); 1471 ret = -EIO;
1513 return ERR_PTR(ret); 1472 goto read_fail;
1514 } 1473 }
1515
1516 root->commit_root = btrfs_root_node(root); 1474 root->commit_root = btrfs_root_node(root);
1517out: 1475out:
1518 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1476 btrfs_free_path(path);
1477 return root;
1478
1479read_fail:
1480 free_extent_buffer(root->node);
1481find_fail:
1482 kfree(root);
1483alloc_fail:
1484 root = ERR_PTR(ret);
1485 goto out;
1486}
1487
1488struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
1489 struct btrfs_key *location)
1490{
1491 struct btrfs_root *root;
1492
1493 root = btrfs_read_tree_root(tree_root, location);
1494 if (IS_ERR(root))
1495 return root;
1496
1497 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
1519 root->ref_cows = 1; 1498 root->ref_cows = 1;
1520 btrfs_check_and_init_root_item(&root->root_item); 1499 btrfs_check_and_init_root_item(&root->root_item);
1521 } 1500 }
@@ -1523,6 +1502,66 @@ out:
1523 return root; 1502 return root;
1524} 1503}
1525 1504
1505int btrfs_init_fs_root(struct btrfs_root *root)
1506{
1507 int ret;
1508
1509 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1510 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1511 GFP_NOFS);
1512 if (!root->free_ino_pinned || !root->free_ino_ctl) {
1513 ret = -ENOMEM;
1514 goto fail;
1515 }
1516
1517 btrfs_init_free_ino_ctl(root);
1518 mutex_init(&root->fs_commit_mutex);
1519 spin_lock_init(&root->cache_lock);
1520 init_waitqueue_head(&root->cache_wait);
1521
1522 ret = get_anon_bdev(&root->anon_dev);
1523 if (ret)
1524 goto fail;
1525 return 0;
1526fail:
1527 kfree(root->free_ino_ctl);
1528 kfree(root->free_ino_pinned);
1529 return ret;
1530}
1531
1532struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1533 u64 root_id)
1534{
1535 struct btrfs_root *root;
1536
1537 spin_lock(&fs_info->fs_roots_radix_lock);
1538 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1539 (unsigned long)root_id);
1540 spin_unlock(&fs_info->fs_roots_radix_lock);
1541 return root;
1542}
1543
1544int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
1545 struct btrfs_root *root)
1546{
1547 int ret;
1548
1549 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
1550 if (ret)
1551 return ret;
1552
1553 spin_lock(&fs_info->fs_roots_radix_lock);
1554 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1555 (unsigned long)root->root_key.objectid,
1556 root);
1557 if (ret == 0)
1558 root->in_radix = 1;
1559 spin_unlock(&fs_info->fs_roots_radix_lock);
1560 radix_tree_preload_end();
1561
1562 return ret;
1563}
1564
1526struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 1565struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1527 struct btrfs_key *location) 1566 struct btrfs_key *location)
1528{ 1567{
@@ -1543,58 +1582,30 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1543 return fs_info->quota_root ? fs_info->quota_root : 1582 return fs_info->quota_root ? fs_info->quota_root :
1544 ERR_PTR(-ENOENT); 1583 ERR_PTR(-ENOENT);
1545again: 1584again:
1546 spin_lock(&fs_info->fs_roots_radix_lock); 1585 root = btrfs_lookup_fs_root(fs_info, location->objectid);
1547 root = radix_tree_lookup(&fs_info->fs_roots_radix,
1548 (unsigned long)location->objectid);
1549 spin_unlock(&fs_info->fs_roots_radix_lock);
1550 if (root) 1586 if (root)
1551 return root; 1587 return root;
1552 1588
1553 root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); 1589 root = btrfs_read_fs_root(fs_info->tree_root, location);
1554 if (IS_ERR(root)) 1590 if (IS_ERR(root))
1555 return root; 1591 return root;
1556 1592
1557 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); 1593 if (btrfs_root_refs(&root->root_item) == 0) {
1558 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), 1594 ret = -ENOENT;
1559 GFP_NOFS);
1560 if (!root->free_ino_pinned || !root->free_ino_ctl) {
1561 ret = -ENOMEM;
1562 goto fail; 1595 goto fail;
1563 } 1596 }
1564 1597
1565 btrfs_init_free_ino_ctl(root); 1598 ret = btrfs_init_fs_root(root);
1566 mutex_init(&root->fs_commit_mutex);
1567 spin_lock_init(&root->cache_lock);
1568 init_waitqueue_head(&root->cache_wait);
1569
1570 ret = get_anon_bdev(&root->anon_dev);
1571 if (ret) 1599 if (ret)
1572 goto fail; 1600 goto fail;
1573 1601
1574 if (btrfs_root_refs(&root->root_item) == 0) {
1575 ret = -ENOENT;
1576 goto fail;
1577 }
1578
1579 ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); 1602 ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
1580 if (ret < 0) 1603 if (ret < 0)
1581 goto fail; 1604 goto fail;
1582 if (ret == 0) 1605 if (ret == 0)
1583 root->orphan_item_inserted = 1; 1606 root->orphan_item_inserted = 1;
1584 1607
1585 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 1608 ret = btrfs_insert_fs_root(fs_info, root);
1586 if (ret)
1587 goto fail;
1588
1589 spin_lock(&fs_info->fs_roots_radix_lock);
1590 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1591 (unsigned long)root->root_key.objectid,
1592 root);
1593 if (ret == 0)
1594 root->in_radix = 1;
1595
1596 spin_unlock(&fs_info->fs_roots_radix_lock);
1597 radix_tree_preload_end();
1598 if (ret) { 1609 if (ret) {
1599 if (ret == -EEXIST) { 1610 if (ret == -EEXIST) {
1600 free_fs_root(root); 1611 free_fs_root(root);
@@ -1602,10 +1613,6 @@ again:
1602 } 1613 }
1603 goto fail; 1614 goto fail;
1604 } 1615 }
1605
1606 ret = btrfs_find_dead_roots(fs_info->tree_root,
1607 root->root_key.objectid);
1608 WARN_ON(ret);
1609 return root; 1616 return root;
1610fail: 1617fail:
1611 free_fs_root(root); 1618 free_fs_root(root);
@@ -1677,21 +1684,37 @@ static void end_workqueue_fn(struct btrfs_work *work)
1677static int cleaner_kthread(void *arg) 1684static int cleaner_kthread(void *arg)
1678{ 1685{
1679 struct btrfs_root *root = arg; 1686 struct btrfs_root *root = arg;
1687 int again;
1680 1688
1681 do { 1689 do {
1682 int again = 0; 1690 again = 0;
1683 1691
1684 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1692 /* Make the cleaner go to sleep early. */
1685 down_read_trylock(&root->fs_info->sb->s_umount)) { 1693 if (btrfs_need_cleaner_sleep(root))
1686 if (mutex_trylock(&root->fs_info->cleaner_mutex)) { 1694 goto sleep;
1687 btrfs_run_delayed_iputs(root); 1695
1688 again = btrfs_clean_one_deleted_snapshot(root); 1696 if (!mutex_trylock(&root->fs_info->cleaner_mutex))
1689 mutex_unlock(&root->fs_info->cleaner_mutex); 1697 goto sleep;
1690 } 1698
1691 btrfs_run_defrag_inodes(root->fs_info); 1699 /*
1692 up_read(&root->fs_info->sb->s_umount); 1700 * Avoid the problem that we change the status of the fs
1701 * during the above check and trylock.
1702 */
1703 if (btrfs_need_cleaner_sleep(root)) {
1704 mutex_unlock(&root->fs_info->cleaner_mutex);
1705 goto sleep;
1693 } 1706 }
1694 1707
1708 btrfs_run_delayed_iputs(root);
1709 again = btrfs_clean_one_deleted_snapshot(root);
1710 mutex_unlock(&root->fs_info->cleaner_mutex);
1711
1712 /*
1713 * The defragger has dealt with the R/O remount and umount,
1714 * needn't do anything special here.
1715 */
1716 btrfs_run_defrag_inodes(root->fs_info);
1717sleep:
1695 if (!try_to_freeze() && !again) { 1718 if (!try_to_freeze() && !again) {
1696 set_current_state(TASK_INTERRUPTIBLE); 1719 set_current_state(TASK_INTERRUPTIBLE);
1697 if (!kthread_should_stop()) 1720 if (!kthread_should_stop())
@@ -1725,7 +1748,7 @@ static int transaction_kthread(void *arg)
1725 } 1748 }
1726 1749
1727 now = get_seconds(); 1750 now = get_seconds();
1728 if (!cur->blocked && 1751 if (cur->state < TRANS_STATE_BLOCKED &&
1729 (now < cur->start_time || now - cur->start_time < 30)) { 1752 (now < cur->start_time || now - cur->start_time < 30)) {
1730 spin_unlock(&root->fs_info->trans_lock); 1753 spin_unlock(&root->fs_info->trans_lock);
1731 delay = HZ * 5; 1754 delay = HZ * 5;
@@ -2035,11 +2058,11 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
2035 list_del(&gang[0]->root_list); 2058 list_del(&gang[0]->root_list);
2036 2059
2037 if (gang[0]->in_radix) { 2060 if (gang[0]->in_radix) {
2038 btrfs_free_fs_root(fs_info, gang[0]); 2061 btrfs_drop_and_free_fs_root(fs_info, gang[0]);
2039 } else { 2062 } else {
2040 free_extent_buffer(gang[0]->node); 2063 free_extent_buffer(gang[0]->node);
2041 free_extent_buffer(gang[0]->commit_root); 2064 free_extent_buffer(gang[0]->commit_root);
2042 kfree(gang[0]); 2065 btrfs_put_fs_root(gang[0]);
2043 } 2066 }
2044 } 2067 }
2045 2068
@@ -2050,7 +2073,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
2050 if (!ret) 2073 if (!ret)
2051 break; 2074 break;
2052 for (i = 0; i < ret; i++) 2075 for (i = 0; i < ret; i++)
2053 btrfs_free_fs_root(fs_info, gang[i]); 2076 btrfs_drop_and_free_fs_root(fs_info, gang[i]);
2054 } 2077 }
2055} 2078}
2056 2079
@@ -2082,14 +2105,8 @@ int open_ctree(struct super_block *sb,
2082 int backup_index = 0; 2105 int backup_index = 0;
2083 2106
2084 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); 2107 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
2085 extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info);
2086 csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
2087 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); 2108 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
2088 dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); 2109 if (!tree_root || !chunk_root) {
2089 quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info);
2090
2091 if (!tree_root || !extent_root || !csum_root ||
2092 !chunk_root || !dev_root || !quota_root) {
2093 err = -ENOMEM; 2110 err = -ENOMEM;
2094 goto fail; 2111 goto fail;
2095 } 2112 }
@@ -2132,9 +2149,9 @@ int open_ctree(struct super_block *sb,
2132 INIT_LIST_HEAD(&fs_info->trans_list); 2149 INIT_LIST_HEAD(&fs_info->trans_list);
2133 INIT_LIST_HEAD(&fs_info->dead_roots); 2150 INIT_LIST_HEAD(&fs_info->dead_roots);
2134 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2151 INIT_LIST_HEAD(&fs_info->delayed_iputs);
2135 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 2152 INIT_LIST_HEAD(&fs_info->delalloc_roots);
2136 INIT_LIST_HEAD(&fs_info->caching_block_groups); 2153 INIT_LIST_HEAD(&fs_info->caching_block_groups);
2137 spin_lock_init(&fs_info->delalloc_lock); 2154 spin_lock_init(&fs_info->delalloc_root_lock);
2138 spin_lock_init(&fs_info->trans_lock); 2155 spin_lock_init(&fs_info->trans_lock);
2139 spin_lock_init(&fs_info->fs_roots_radix_lock); 2156 spin_lock_init(&fs_info->fs_roots_radix_lock);
2140 spin_lock_init(&fs_info->delayed_iput_lock); 2157 spin_lock_init(&fs_info->delayed_iput_lock);
@@ -2170,7 +2187,6 @@ int open_ctree(struct super_block *sb,
2170 fs_info->max_inline = 8192 * 1024; 2187 fs_info->max_inline = 8192 * 1024;
2171 fs_info->metadata_ratio = 0; 2188 fs_info->metadata_ratio = 0;
2172 fs_info->defrag_inodes = RB_ROOT; 2189 fs_info->defrag_inodes = RB_ROOT;
2173 fs_info->trans_no_join = 0;
2174 fs_info->free_chunk_space = 0; 2190 fs_info->free_chunk_space = 0;
2175 fs_info->tree_mod_log = RB_ROOT; 2191 fs_info->tree_mod_log = RB_ROOT;
2176 2192
@@ -2181,8 +2197,8 @@ int open_ctree(struct super_block *sb,
2181 fs_info->thread_pool_size = min_t(unsigned long, 2197 fs_info->thread_pool_size = min_t(unsigned long,
2182 num_online_cpus() + 2, 8); 2198 num_online_cpus() + 2, 8);
2183 2199
2184 INIT_LIST_HEAD(&fs_info->ordered_extents); 2200 INIT_LIST_HEAD(&fs_info->ordered_roots);
2185 spin_lock_init(&fs_info->ordered_extent_lock); 2201 spin_lock_init(&fs_info->ordered_root_lock);
2186 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), 2202 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
2187 GFP_NOFS); 2203 GFP_NOFS);
2188 if (!fs_info->delayed_root) { 2204 if (!fs_info->delayed_root) {
@@ -2275,6 +2291,7 @@ int open_ctree(struct super_block *sb,
2275 fs_info->qgroup_seq = 1; 2291 fs_info->qgroup_seq = 1;
2276 fs_info->quota_enabled = 0; 2292 fs_info->quota_enabled = 0;
2277 fs_info->pending_quota_state = 0; 2293 fs_info->pending_quota_state = 0;
2294 fs_info->qgroup_ulist = NULL;
2278 mutex_init(&fs_info->qgroup_rescan_lock); 2295 mutex_init(&fs_info->qgroup_rescan_lock);
2279 2296
2280 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 2297 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -2639,33 +2656,44 @@ retry_root_backup:
2639 btrfs_set_root_node(&tree_root->root_item, tree_root->node); 2656 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
2640 tree_root->commit_root = btrfs_root_node(tree_root); 2657 tree_root->commit_root = btrfs_root_node(tree_root);
2641 2658
2642 ret = find_and_setup_root(tree_root, fs_info, 2659 location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
2643 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 2660 location.type = BTRFS_ROOT_ITEM_KEY;
2644 if (ret) 2661 location.offset = 0;
2662
2663 extent_root = btrfs_read_tree_root(tree_root, &location);
2664 if (IS_ERR(extent_root)) {
2665 ret = PTR_ERR(extent_root);
2645 goto recovery_tree_root; 2666 goto recovery_tree_root;
2667 }
2646 extent_root->track_dirty = 1; 2668 extent_root->track_dirty = 1;
2669 fs_info->extent_root = extent_root;
2647 2670
2648 ret = find_and_setup_root(tree_root, fs_info, 2671 location.objectid = BTRFS_DEV_TREE_OBJECTID;
2649 BTRFS_DEV_TREE_OBJECTID, dev_root); 2672 dev_root = btrfs_read_tree_root(tree_root, &location);
2650 if (ret) 2673 if (IS_ERR(dev_root)) {
2674 ret = PTR_ERR(dev_root);
2651 goto recovery_tree_root; 2675 goto recovery_tree_root;
2676 }
2652 dev_root->track_dirty = 1; 2677 dev_root->track_dirty = 1;
2678 fs_info->dev_root = dev_root;
2679 btrfs_init_devices_late(fs_info);
2653 2680
2654 ret = find_and_setup_root(tree_root, fs_info, 2681 location.objectid = BTRFS_CSUM_TREE_OBJECTID;
2655 BTRFS_CSUM_TREE_OBJECTID, csum_root); 2682 csum_root = btrfs_read_tree_root(tree_root, &location);
2656 if (ret) 2683 if (IS_ERR(csum_root)) {
2684 ret = PTR_ERR(csum_root);
2657 goto recovery_tree_root; 2685 goto recovery_tree_root;
2686 }
2658 csum_root->track_dirty = 1; 2687 csum_root->track_dirty = 1;
2688 fs_info->csum_root = csum_root;
2659 2689
2660 ret = find_and_setup_root(tree_root, fs_info, 2690 location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
2661 BTRFS_QUOTA_TREE_OBJECTID, quota_root); 2691 quota_root = btrfs_read_tree_root(tree_root, &location);
2662 if (ret) { 2692 if (!IS_ERR(quota_root)) {
2663 kfree(quota_root);
2664 quota_root = fs_info->quota_root = NULL;
2665 } else {
2666 quota_root->track_dirty = 1; 2693 quota_root->track_dirty = 1;
2667 fs_info->quota_enabled = 1; 2694 fs_info->quota_enabled = 1;
2668 fs_info->pending_quota_state = 1; 2695 fs_info->pending_quota_state = 1;
2696 fs_info->quota_root = quota_root;
2669 } 2697 }
2670 2698
2671 fs_info->generation = generation; 2699 fs_info->generation = generation;
@@ -2818,11 +2846,9 @@ retry_root_backup:
2818 2846
2819 location.objectid = BTRFS_FS_TREE_OBJECTID; 2847 location.objectid = BTRFS_FS_TREE_OBJECTID;
2820 location.type = BTRFS_ROOT_ITEM_KEY; 2848 location.type = BTRFS_ROOT_ITEM_KEY;
2821 location.offset = (u64)-1; 2849 location.offset = 0;
2822 2850
2823 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 2851 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
2824 if (!fs_info->fs_root)
2825 goto fail_qgroup;
2826 if (IS_ERR(fs_info->fs_root)) { 2852 if (IS_ERR(fs_info->fs_root)) {
2827 err = PTR_ERR(fs_info->fs_root); 2853 err = PTR_ERR(fs_info->fs_root);
2828 goto fail_qgroup; 2854 goto fail_qgroup;
@@ -2854,6 +2880,8 @@ retry_root_backup:
2854 return ret; 2880 return ret;
2855 } 2881 }
2856 2882
2883 btrfs_qgroup_rescan_resume(fs_info);
2884
2857 return 0; 2885 return 0;
2858 2886
2859fail_qgroup: 2887fail_qgroup:
@@ -3259,7 +3287,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
3259 BTRFS_BLOCK_GROUP_RAID10)) { 3287 BTRFS_BLOCK_GROUP_RAID10)) {
3260 num_tolerated_disk_barrier_failures = 1; 3288 num_tolerated_disk_barrier_failures = 1;
3261 } else if (flags & 3289 } else if (flags &
3262 BTRFS_BLOCK_GROUP_RAID5) { 3290 BTRFS_BLOCK_GROUP_RAID6) {
3263 num_tolerated_disk_barrier_failures = 2; 3291 num_tolerated_disk_barrier_failures = 2;
3264 } 3292 }
3265 } 3293 }
@@ -3367,7 +3395,9 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
3367 return ret; 3395 return ret;
3368} 3396}
3369 3397
3370void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3398/* Drop a fs root from the radix tree and free it. */
3399void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
3400 struct btrfs_root *root)
3371{ 3401{
3372 spin_lock(&fs_info->fs_roots_radix_lock); 3402 spin_lock(&fs_info->fs_roots_radix_lock);
3373 radix_tree_delete(&fs_info->fs_roots_radix, 3403 radix_tree_delete(&fs_info->fs_roots_radix,
@@ -3398,7 +3428,12 @@ static void free_fs_root(struct btrfs_root *root)
3398 kfree(root->free_ino_ctl); 3428 kfree(root->free_ino_ctl);
3399 kfree(root->free_ino_pinned); 3429 kfree(root->free_ino_pinned);
3400 kfree(root->name); 3430 kfree(root->name);
3401 kfree(root); 3431 btrfs_put_fs_root(root);
3432}
3433
3434void btrfs_free_fs_root(struct btrfs_root *root)
3435{
3436 free_fs_root(root);
3402} 3437}
3403 3438
3404int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 3439int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
@@ -3654,7 +3689,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3654 INIT_LIST_HEAD(&splice); 3689 INIT_LIST_HEAD(&splice);
3655 3690
3656 mutex_lock(&root->fs_info->ordered_operations_mutex); 3691 mutex_lock(&root->fs_info->ordered_operations_mutex);
3657 spin_lock(&root->fs_info->ordered_extent_lock); 3692 spin_lock(&root->fs_info->ordered_root_lock);
3658 3693
3659 list_splice_init(&t->ordered_operations, &splice); 3694 list_splice_init(&t->ordered_operations, &splice);
3660 while (!list_empty(&splice)) { 3695 while (!list_empty(&splice)) {
@@ -3662,14 +3697,14 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3662 ordered_operations); 3697 ordered_operations);
3663 3698
3664 list_del_init(&btrfs_inode->ordered_operations); 3699 list_del_init(&btrfs_inode->ordered_operations);
3665 spin_unlock(&root->fs_info->ordered_extent_lock); 3700 spin_unlock(&root->fs_info->ordered_root_lock);
3666 3701
3667 btrfs_invalidate_inodes(btrfs_inode->root); 3702 btrfs_invalidate_inodes(btrfs_inode->root);
3668 3703
3669 spin_lock(&root->fs_info->ordered_extent_lock); 3704 spin_lock(&root->fs_info->ordered_root_lock);
3670 } 3705 }
3671 3706
3672 spin_unlock(&root->fs_info->ordered_extent_lock); 3707 spin_unlock(&root->fs_info->ordered_root_lock);
3673 mutex_unlock(&root->fs_info->ordered_operations_mutex); 3708 mutex_unlock(&root->fs_info->ordered_operations_mutex);
3674} 3709}
3675 3710
@@ -3677,15 +3712,36 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3677{ 3712{
3678 struct btrfs_ordered_extent *ordered; 3713 struct btrfs_ordered_extent *ordered;
3679 3714
3680 spin_lock(&root->fs_info->ordered_extent_lock); 3715 spin_lock(&root->ordered_extent_lock);
3681 /* 3716 /*
3682 * This will just short circuit the ordered completion stuff which will 3717 * This will just short circuit the ordered completion stuff which will
3683 * make sure the ordered extent gets properly cleaned up. 3718 * make sure the ordered extent gets properly cleaned up.
3684 */ 3719 */
3685 list_for_each_entry(ordered, &root->fs_info->ordered_extents, 3720 list_for_each_entry(ordered, &root->ordered_extents,
3686 root_extent_list) 3721 root_extent_list)
3687 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); 3722 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
3688 spin_unlock(&root->fs_info->ordered_extent_lock); 3723 spin_unlock(&root->ordered_extent_lock);
3724}
3725
3726static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
3727{
3728 struct btrfs_root *root;
3729 struct list_head splice;
3730
3731 INIT_LIST_HEAD(&splice);
3732
3733 spin_lock(&fs_info->ordered_root_lock);
3734 list_splice_init(&fs_info->ordered_roots, &splice);
3735 while (!list_empty(&splice)) {
3736 root = list_first_entry(&splice, struct btrfs_root,
3737 ordered_root);
3738 list_del_init(&root->ordered_root);
3739
3740 btrfs_destroy_ordered_extents(root);
3741
3742 cond_resched_lock(&fs_info->ordered_root_lock);
3743 }
3744 spin_unlock(&fs_info->ordered_root_lock);
3689} 3745}
3690 3746
3691int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 3747int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@ -3707,6 +3763,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3707 3763
3708 while ((node = rb_first(&delayed_refs->root)) != NULL) { 3764 while ((node = rb_first(&delayed_refs->root)) != NULL) {
3709 struct btrfs_delayed_ref_head *head = NULL; 3765 struct btrfs_delayed_ref_head *head = NULL;
3766 bool pin_bytes = false;
3710 3767
3711 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3768 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3712 atomic_set(&ref->refs, 1); 3769 atomic_set(&ref->refs, 1);
@@ -3727,8 +3784,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3727 } 3784 }
3728 3785
3729 if (head->must_insert_reserved) 3786 if (head->must_insert_reserved)
3730 btrfs_pin_extent(root, ref->bytenr, 3787 pin_bytes = true;
3731 ref->num_bytes, 1);
3732 btrfs_free_delayed_extent_op(head->extent_op); 3788 btrfs_free_delayed_extent_op(head->extent_op);
3733 delayed_refs->num_heads--; 3789 delayed_refs->num_heads--;
3734 if (list_empty(&head->cluster)) 3790 if (list_empty(&head->cluster))
@@ -3739,9 +3795,13 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3739 ref->in_tree = 0; 3795 ref->in_tree = 0;
3740 rb_erase(&ref->rb_node, &delayed_refs->root); 3796 rb_erase(&ref->rb_node, &delayed_refs->root);
3741 delayed_refs->num_entries--; 3797 delayed_refs->num_entries--;
3742 if (head)
3743 mutex_unlock(&head->mutex);
3744 spin_unlock(&delayed_refs->lock); 3798 spin_unlock(&delayed_refs->lock);
3799 if (head) {
3800 if (pin_bytes)
3801 btrfs_pin_extent(root, ref->bytenr,
3802 ref->num_bytes, 1);
3803 mutex_unlock(&head->mutex);
3804 }
3745 btrfs_put_delayed_ref(ref); 3805 btrfs_put_delayed_ref(ref);
3746 3806
3747 cond_resched(); 3807 cond_resched();
@@ -3778,24 +3838,49 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3778 3838
3779 INIT_LIST_HEAD(&splice); 3839 INIT_LIST_HEAD(&splice);
3780 3840
3781 spin_lock(&root->fs_info->delalloc_lock); 3841 spin_lock(&root->delalloc_lock);
3782 list_splice_init(&root->fs_info->delalloc_inodes, &splice); 3842 list_splice_init(&root->delalloc_inodes, &splice);
3783 3843
3784 while (!list_empty(&splice)) { 3844 while (!list_empty(&splice)) {
3785 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 3845 btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
3786 delalloc_inodes); 3846 delalloc_inodes);
3787 3847
3788 list_del_init(&btrfs_inode->delalloc_inodes); 3848 list_del_init(&btrfs_inode->delalloc_inodes);
3789 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, 3849 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
3790 &btrfs_inode->runtime_flags); 3850 &btrfs_inode->runtime_flags);
3791 spin_unlock(&root->fs_info->delalloc_lock); 3851 spin_unlock(&root->delalloc_lock);
3792 3852
3793 btrfs_invalidate_inodes(btrfs_inode->root); 3853 btrfs_invalidate_inodes(btrfs_inode->root);
3794 3854
3795 spin_lock(&root->fs_info->delalloc_lock); 3855 spin_lock(&root->delalloc_lock);
3796 } 3856 }
3797 3857
3798 spin_unlock(&root->fs_info->delalloc_lock); 3858 spin_unlock(&root->delalloc_lock);
3859}
3860
3861static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
3862{
3863 struct btrfs_root *root;
3864 struct list_head splice;
3865
3866 INIT_LIST_HEAD(&splice);
3867
3868 spin_lock(&fs_info->delalloc_root_lock);
3869 list_splice_init(&fs_info->delalloc_roots, &splice);
3870 while (!list_empty(&splice)) {
3871 root = list_first_entry(&splice, struct btrfs_root,
3872 delalloc_root);
3873 list_del_init(&root->delalloc_root);
3874 root = btrfs_grab_fs_root(root);
3875 BUG_ON(!root);
3876 spin_unlock(&fs_info->delalloc_root_lock);
3877
3878 btrfs_destroy_delalloc_inodes(root);
3879 btrfs_put_fs_root(root);
3880
3881 spin_lock(&fs_info->delalloc_root_lock);
3882 }
3883 spin_unlock(&fs_info->delalloc_root_lock);
3799} 3884}
3800 3885
3801static int btrfs_destroy_marked_extents(struct btrfs_root *root, 3886static int btrfs_destroy_marked_extents(struct btrfs_root *root,
@@ -3879,19 +3964,14 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3879 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, 3964 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
3880 cur_trans->dirty_pages.dirty_bytes); 3965 cur_trans->dirty_pages.dirty_bytes);
3881 3966
3882 /* FIXME: cleanup wait for commit */ 3967 cur_trans->state = TRANS_STATE_COMMIT_START;
3883 cur_trans->in_commit = 1;
3884 cur_trans->blocked = 1;
3885 wake_up(&root->fs_info->transaction_blocked_wait); 3968 wake_up(&root->fs_info->transaction_blocked_wait);
3886 3969
3887 btrfs_evict_pending_snapshots(cur_trans); 3970 btrfs_evict_pending_snapshots(cur_trans);
3888 3971
3889 cur_trans->blocked = 0; 3972 cur_trans->state = TRANS_STATE_UNBLOCKED;
3890 wake_up(&root->fs_info->transaction_wait); 3973 wake_up(&root->fs_info->transaction_wait);
3891 3974
3892 cur_trans->commit_done = 1;
3893 wake_up(&cur_trans->commit_wait);
3894
3895 btrfs_destroy_delayed_inodes(root); 3975 btrfs_destroy_delayed_inodes(root);
3896 btrfs_assert_delayed_root_empty(root); 3976 btrfs_assert_delayed_root_empty(root);
3897 3977
@@ -3900,6 +3980,9 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3900 btrfs_destroy_pinned_extent(root, 3980 btrfs_destroy_pinned_extent(root,
3901 root->fs_info->pinned_extents); 3981 root->fs_info->pinned_extents);
3902 3982
3983 cur_trans->state =TRANS_STATE_COMPLETED;
3984 wake_up(&cur_trans->commit_wait);
3985
3903 /* 3986 /*
3904 memset(cur_trans, 0, sizeof(*cur_trans)); 3987 memset(cur_trans, 0, sizeof(*cur_trans));
3905 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 3988 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
@@ -3915,7 +3998,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3915 3998
3916 spin_lock(&root->fs_info->trans_lock); 3999 spin_lock(&root->fs_info->trans_lock);
3917 list_splice_init(&root->fs_info->trans_list, &list); 4000 list_splice_init(&root->fs_info->trans_list, &list);
3918 root->fs_info->trans_no_join = 1; 4001 root->fs_info->running_transaction = NULL;
3919 spin_unlock(&root->fs_info->trans_lock); 4002 spin_unlock(&root->fs_info->trans_lock);
3920 4003
3921 while (!list_empty(&list)) { 4004 while (!list_empty(&list)) {
@@ -3923,37 +4006,31 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3923 4006
3924 btrfs_destroy_ordered_operations(t, root); 4007 btrfs_destroy_ordered_operations(t, root);
3925 4008
3926 btrfs_destroy_ordered_extents(root); 4009 btrfs_destroy_all_ordered_extents(root->fs_info);
3927 4010
3928 btrfs_destroy_delayed_refs(t, root); 4011 btrfs_destroy_delayed_refs(t, root);
3929 4012
3930 /* FIXME: cleanup wait for commit */ 4013 /*
3931 t->in_commit = 1; 4014 * FIXME: cleanup wait for commit
3932 t->blocked = 1; 4015 * We needn't acquire the lock here, because we are during
4016 * the umount, there is no other task which will change it.
4017 */
4018 t->state = TRANS_STATE_COMMIT_START;
3933 smp_mb(); 4019 smp_mb();
3934 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 4020 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3935 wake_up(&root->fs_info->transaction_blocked_wait); 4021 wake_up(&root->fs_info->transaction_blocked_wait);
3936 4022
3937 btrfs_evict_pending_snapshots(t); 4023 btrfs_evict_pending_snapshots(t);
3938 4024
3939 t->blocked = 0; 4025 t->state = TRANS_STATE_UNBLOCKED;
3940 smp_mb(); 4026 smp_mb();
3941 if (waitqueue_active(&root->fs_info->transaction_wait)) 4027 if (waitqueue_active(&root->fs_info->transaction_wait))
3942 wake_up(&root->fs_info->transaction_wait); 4028 wake_up(&root->fs_info->transaction_wait);
3943 4029
3944 t->commit_done = 1;
3945 smp_mb();
3946 if (waitqueue_active(&t->commit_wait))
3947 wake_up(&t->commit_wait);
3948
3949 btrfs_destroy_delayed_inodes(root); 4030 btrfs_destroy_delayed_inodes(root);
3950 btrfs_assert_delayed_root_empty(root); 4031 btrfs_assert_delayed_root_empty(root);
3951 4032
3952 btrfs_destroy_delalloc_inodes(root); 4033 btrfs_destroy_all_delalloc_inodes(root->fs_info);
3953
3954 spin_lock(&root->fs_info->trans_lock);
3955 root->fs_info->running_transaction = NULL;
3956 spin_unlock(&root->fs_info->trans_lock);
3957 4034
3958 btrfs_destroy_marked_extents(root, &t->dirty_pages, 4035 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3959 EXTENT_DIRTY); 4036 EXTENT_DIRTY);
@@ -3961,15 +4038,17 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3961 btrfs_destroy_pinned_extent(root, 4038 btrfs_destroy_pinned_extent(root,
3962 root->fs_info->pinned_extents); 4039 root->fs_info->pinned_extents);
3963 4040
4041 t->state = TRANS_STATE_COMPLETED;
4042 smp_mb();
4043 if (waitqueue_active(&t->commit_wait))
4044 wake_up(&t->commit_wait);
4045
3964 atomic_set(&t->use_count, 0); 4046 atomic_set(&t->use_count, 0);
3965 list_del_init(&t->list); 4047 list_del_init(&t->list);
3966 memset(t, 0, sizeof(*t)); 4048 memset(t, 0, sizeof(*t));
3967 kmem_cache_free(btrfs_transaction_cachep, t); 4049 kmem_cache_free(btrfs_transaction_cachep, t);
3968 } 4050 }
3969 4051
3970 spin_lock(&root->fs_info->trans_lock);
3971 root->fs_info->trans_no_join = 0;
3972 spin_unlock(&root->fs_info->trans_lock);
3973 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 4052 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3974 4053
3975 return 0; 4054 return 0;