aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c132
1 files changed, 91 insertions, 41 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7827841b55cb..1255fcc8ade5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -137,11 +137,20 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
137 137
138 mutex_lock(&root->log_mutex); 138 mutex_lock(&root->log_mutex);
139 if (root->log_root) { 139 if (root->log_root) {
140 if (!root->log_start_pid) {
141 root->log_start_pid = current->pid;
142 root->log_multiple_pids = false;
143 } else if (root->log_start_pid != current->pid) {
144 root->log_multiple_pids = true;
145 }
146
140 root->log_batch++; 147 root->log_batch++;
141 atomic_inc(&root->log_writers); 148 atomic_inc(&root->log_writers);
142 mutex_unlock(&root->log_mutex); 149 mutex_unlock(&root->log_mutex);
143 return 0; 150 return 0;
144 } 151 }
152 root->log_multiple_pids = false;
153 root->log_start_pid = current->pid;
145 mutex_lock(&root->fs_info->tree_log_mutex); 154 mutex_lock(&root->fs_info->tree_log_mutex);
146 if (!root->fs_info->log_root_tree) { 155 if (!root->fs_info->log_root_tree) {
147 ret = btrfs_init_log_root_tree(trans, root->fs_info); 156 ret = btrfs_init_log_root_tree(trans, root->fs_info);
@@ -436,7 +445,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
436 key.objectid = objectid; 445 key.objectid = objectid;
437 key.type = BTRFS_INODE_ITEM_KEY; 446 key.type = BTRFS_INODE_ITEM_KEY;
438 key.offset = 0; 447 key.offset = 0;
439 inode = btrfs_iget(root->fs_info->sb, &key, root); 448 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
440 if (IS_ERR(inode)) { 449 if (IS_ERR(inode)) {
441 inode = NULL; 450 inode = NULL;
442 } else if (is_bad_inode(inode)) { 451 } else if (is_bad_inode(inode)) {
@@ -533,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
533 542
534 saved_nbytes = inode_get_bytes(inode); 543 saved_nbytes = inode_get_bytes(inode);
535 /* drop any overlapping extents */ 544 /* drop any overlapping extents */
536 ret = btrfs_drop_extents(trans, root, inode, 545 ret = btrfs_drop_extents(trans, inode, start, extent_end,
537 start, extent_end, extent_end, start, &alloc_hint, 1); 546 &alloc_hint, 1);
538 BUG_ON(ret); 547 BUG_ON(ret);
539 548
540 if (found_type == BTRFS_FILE_EXTENT_REG || 549 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -921,6 +930,17 @@ out_nowrite:
921 return 0; 930 return 0;
922} 931}
923 932
933static int insert_orphan_item(struct btrfs_trans_handle *trans,
934 struct btrfs_root *root, u64 offset)
935{
936 int ret;
937 ret = btrfs_find_orphan_item(root, offset);
938 if (ret > 0)
939 ret = btrfs_insert_orphan_item(trans, root, offset);
940 return ret;
941}
942
943
924/* 944/*
925 * There are a few corners where the link count of the file can't 945 * There are a few corners where the link count of the file can't
926 * be properly maintained during replay. So, instead of adding 946 * be properly maintained during replay. So, instead of adding
@@ -988,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
988 } 1008 }
989 BTRFS_I(inode)->index_cnt = (u64)-1; 1009 BTRFS_I(inode)->index_cnt = (u64)-1;
990 1010
991 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1011 if (inode->i_nlink == 0) {
992 ret = replay_dir_deletes(trans, root, NULL, path, 1012 if (S_ISDIR(inode->i_mode)) {
993 inode->i_ino, 1); 1013 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1);
1015 BUG_ON(ret);
1016 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino);
994 BUG_ON(ret); 1018 BUG_ON(ret);
995 } 1019 }
996 btrfs_free_path(path); 1020 btrfs_free_path(path);
@@ -1578,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1578 /* inode keys are done during the first stage */ 1602 /* inode keys are done during the first stage */
1579 if (key.type == BTRFS_INODE_ITEM_KEY && 1603 if (key.type == BTRFS_INODE_ITEM_KEY &&
1580 wc->stage == LOG_WALK_REPLAY_INODES) { 1604 wc->stage == LOG_WALK_REPLAY_INODES) {
1581 struct inode *inode;
1582 struct btrfs_inode_item *inode_item; 1605 struct btrfs_inode_item *inode_item;
1583 u32 mode; 1606 u32 mode;
1584 1607
@@ -1594,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1594 eb, i, &key); 1617 eb, i, &key);
1595 BUG_ON(ret); 1618 BUG_ON(ret);
1596 1619
1597 /* for regular files, truncate away 1620 /* for regular files, make sure corresponding
1598 * extents past the new EOF 1621 * orhpan item exist. extents past the new EOF
1622 * will be truncated later by orphan cleanup.
1599 */ 1623 */
1600 if (S_ISREG(mode)) { 1624 if (S_ISREG(mode)) {
1601 inode = read_one_inode(root, 1625 ret = insert_orphan_item(wc->trans, root,
1602 key.objectid); 1626 key.objectid);
1603 BUG_ON(!inode);
1604
1605 ret = btrfs_truncate_inode_items(wc->trans,
1606 root, inode, inode->i_size,
1607 BTRFS_EXTENT_DATA_KEY);
1608 BUG_ON(ret); 1627 BUG_ON(ret);
1609
1610 /* if the nlink count is zero here, the iput
1611 * will free the inode. We bump it to make
1612 * sure it doesn't get freed until the link
1613 * count fixup is done
1614 */
1615 if (inode->i_nlink == 0) {
1616 btrfs_inc_nlink(inode);
1617 btrfs_update_inode(wc->trans,
1618 root, inode);
1619 }
1620 iput(inode);
1621 } 1628 }
1629
1622 ret = link_to_fixup_dir(wc->trans, root, 1630 ret = link_to_fixup_dir(wc->trans, root,
1623 path, key.objectid); 1631 path, key.objectid);
1624 BUG_ON(ret); 1632 BUG_ON(ret);
@@ -1968,9 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1968{ 1976{
1969 int index1; 1977 int index1;
1970 int index2; 1978 int index2;
1979 int mark;
1971 int ret; 1980 int ret;
1972 struct btrfs_root *log = root->log_root; 1981 struct btrfs_root *log = root->log_root;
1973 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983 unsigned long log_transid = 0;
1974 1984
1975 mutex_lock(&root->log_mutex); 1985 mutex_lock(&root->log_mutex);
1976 index1 = root->log_transid % 2; 1986 index1 = root->log_transid % 2;
@@ -1987,10 +1997,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1987 1997
1988 while (1) { 1998 while (1) {
1989 unsigned long batch = root->log_batch; 1999 unsigned long batch = root->log_batch;
1990 mutex_unlock(&root->log_mutex); 2000 if (root->log_multiple_pids) {
1991 schedule_timeout_uninterruptible(1); 2001 mutex_unlock(&root->log_mutex);
1992 mutex_lock(&root->log_mutex); 2002 schedule_timeout_uninterruptible(1);
1993 2003 mutex_lock(&root->log_mutex);
2004 }
1994 wait_for_writer(trans, root); 2005 wait_for_writer(trans, root);
1995 if (batch == root->log_batch) 2006 if (batch == root->log_batch)
1996 break; 2007 break;
@@ -2003,7 +2014,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2003 goto out; 2014 goto out;
2004 } 2015 }
2005 2016
2006 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 2017 log_transid = root->log_transid;
2018 if (log_transid % 2 == 0)
2019 mark = EXTENT_DIRTY;
2020 else
2021 mark = EXTENT_NEW;
2022
2023 /* we start IO on all the marked extents here, but we don't actually
2024 * wait for them until later.
2025 */
2026 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2007 BUG_ON(ret); 2027 BUG_ON(ret);
2008 2028
2009 btrfs_set_root_node(&log->root_item, log->node); 2029 btrfs_set_root_node(&log->root_item, log->node);
@@ -2011,11 +2031,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2011 root->log_batch = 0; 2031 root->log_batch = 0;
2012 root->log_transid++; 2032 root->log_transid++;
2013 log->log_transid = root->log_transid; 2033 log->log_transid = root->log_transid;
2034 root->log_start_pid = 0;
2014 smp_mb(); 2035 smp_mb();
2015 /* 2036 /*
2016 * log tree has been flushed to disk, new modifications of 2037 * IO has been started, blocks of the log tree have WRITTEN flag set
2017 * the log will be written to new positions. so it's safe to 2038 * in their headers. new modifications of the log will be written to
2018 * allow log writers to go in. 2039 * new positions. so it's safe to allow log writers to go in.
2019 */ 2040 */
2020 mutex_unlock(&root->log_mutex); 2041 mutex_unlock(&root->log_mutex);
2021 2042
@@ -2036,6 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2036 2057
2037 index2 = log_root_tree->log_transid % 2; 2058 index2 = log_root_tree->log_transid % 2;
2038 if (atomic_read(&log_root_tree->log_commit[index2])) { 2059 if (atomic_read(&log_root_tree->log_commit[index2])) {
2060 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2039 wait_log_commit(trans, log_root_tree, 2061 wait_log_commit(trans, log_root_tree,
2040 log_root_tree->log_transid); 2062 log_root_tree->log_transid);
2041 mutex_unlock(&log_root_tree->log_mutex); 2063 mutex_unlock(&log_root_tree->log_mutex);
@@ -2055,14 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2055 * check the full commit flag again 2077 * check the full commit flag again
2056 */ 2078 */
2057 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2079 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2080 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2058 mutex_unlock(&log_root_tree->log_mutex); 2081 mutex_unlock(&log_root_tree->log_mutex);
2059 ret = -EAGAIN; 2082 ret = -EAGAIN;
2060 goto out_wake_log_root; 2083 goto out_wake_log_root;
2061 } 2084 }
2062 2085
2063 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2086 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2064 &log_root_tree->dirty_log_pages); 2087 &log_root_tree->dirty_log_pages,
2088 EXTENT_DIRTY | EXTENT_NEW);
2065 BUG_ON(ret); 2089 BUG_ON(ret);
2090 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2066 2091
2067 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2092 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2068 log_root_tree->node->start); 2093 log_root_tree->node->start);
@@ -2082,9 +2107,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2082 * the running transaction open, so a full commit can't hop 2107 * the running transaction open, so a full commit can't hop
2083 * in and cause problems either. 2108 * in and cause problems either.
2084 */ 2109 */
2085 write_ctree_super(trans, root->fs_info->tree_root, 2); 2110 write_ctree_super(trans, root->fs_info->tree_root, 1);
2086 ret = 0; 2111 ret = 0;
2087 2112
2113 mutex_lock(&root->log_mutex);
2114 if (root->last_log_commit < log_transid)
2115 root->last_log_commit = log_transid;
2116 mutex_unlock(&root->log_mutex);
2117
2088out_wake_log_root: 2118out_wake_log_root:
2089 atomic_set(&log_root_tree->log_commit[index2], 0); 2119 atomic_set(&log_root_tree->log_commit[index2], 0);
2090 smp_mb(); 2120 smp_mb();
@@ -2123,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2123 2153
2124 while (1) { 2154 while (1) {
2125 ret = find_first_extent_bit(&log->dirty_log_pages, 2155 ret = find_first_extent_bit(&log->dirty_log_pages,
2126 0, &start, &end, EXTENT_DIRTY); 2156 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2127 if (ret) 2157 if (ret)
2128 break; 2158 break;
2129 2159
2130 clear_extent_dirty(&log->dirty_log_pages, 2160 clear_extent_bits(&log->dirty_log_pages, start, end,
2131 start, end, GFP_NOFS); 2161 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2132 } 2162 }
2133 2163
2134 if (log->log_transid > 0) { 2164 if (log->log_transid > 0) {
@@ -2852,6 +2882,21 @@ out:
2852 return ret; 2882 return ret;
2853} 2883}
2854 2884
2885static int inode_in_log(struct btrfs_trans_handle *trans,
2886 struct inode *inode)
2887{
2888 struct btrfs_root *root = BTRFS_I(inode)->root;
2889 int ret = 0;
2890
2891 mutex_lock(&root->log_mutex);
2892 if (BTRFS_I(inode)->logged_trans == trans->transid &&
2893 BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
2894 ret = 1;
2895 mutex_unlock(&root->log_mutex);
2896 return ret;
2897}
2898
2899
2855/* 2900/*
2856 * helper function around btrfs_log_inode to make sure newly created 2901 * helper function around btrfs_log_inode to make sure newly created
2857 * parent directories also end up in the log. A minimal inode and backref 2902 * parent directories also end up in the log. A minimal inode and backref
@@ -2891,6 +2936,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
2891 if (ret) 2936 if (ret)
2892 goto end_no_trans; 2937 goto end_no_trans;
2893 2938
2939 if (inode_in_log(trans, inode)) {
2940 ret = BTRFS_NO_LOG_SYNC;
2941 goto end_no_trans;
2942 }
2943
2894 start_log_trans(trans, root); 2944 start_log_trans(trans, root);
2895 2945
2896 ret = btrfs_log_inode(trans, root, inode, inode_only); 2946 ret = btrfs_log_inode(trans, root, inode, inode_only);