aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2012-05-23 14:26:42 -0400
committerJosef Bacik <josef@redhat.com>2012-05-30 10:23:37 -0400
commit8a35d95ff4680a456d3ce47df9638f33d4f54f20 (patch)
tree8adb116747b84209406d29767b56e2a6ade2f2f4 /fs/btrfs/inode.c
parent72ac3c0d7921f943d92d1ef42a549fb52e56817d (diff)
Btrfs: fix how we deal with the orphan block rsv
Ceph was hitting this race where we would remove an inode from the per-root orphan list before we would release the space we had reserved for the inode. We actually don't need a list or anything, we just need to make sure the root doesn't try to free up the orphan reserve until after the inodes have released their reservations. So use an atomic counter instead of a list on the root and only decrement the counter after we've released our reservation. I've tested this as well as several others and we no longer see the warnings that you would see while running ceph. Thanks, Btrfs: fix how we deal with the orphan block rsv Ceph was hitting this race where we would remove an inode from the per-root orphan list before we would release the space we had reserved for the inode. We actually don't need a list or anything, we just need to make sure the root doesn't try to free up the orphan reserve until after the inodes have released their reservations. So use an atomic counter instead of a list on the root and only decrement the counter after we've released our reservation. I've tested this as well as several others and we no longer see the warnings that you would see while running ceph. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c38
1 files changed, 21 insertions, 17 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 91ad6390175..029892887fc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2104,12 +2104,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2104 struct btrfs_block_rsv *block_rsv; 2104 struct btrfs_block_rsv *block_rsv;
2105 int ret; 2105 int ret;
2106 2106
2107 if (!list_empty(&root->orphan_list) || 2107 if (atomic_read(&root->orphan_inodes) ||
2108 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) 2108 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
2109 return; 2109 return;
2110 2110
2111 spin_lock(&root->orphan_lock); 2111 spin_lock(&root->orphan_lock);
2112 if (!list_empty(&root->orphan_list)) { 2112 if (atomic_read(&root->orphan_inodes)) {
2113 spin_unlock(&root->orphan_lock); 2113 spin_unlock(&root->orphan_lock);
2114 return; 2114 return;
2115 } 2115 }
@@ -2166,8 +2166,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2166 block_rsv = NULL; 2166 block_rsv = NULL;
2167 } 2167 }
2168 2168
2169 if (list_empty(&BTRFS_I(inode)->i_orphan)) { 2169 if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2170 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2170 &BTRFS_I(inode)->runtime_flags)) {
2171#if 0 2171#if 0
2172 /* 2172 /*
2173 * For proper ENOSPC handling, we should do orphan 2173 * For proper ENOSPC handling, we should do orphan
@@ -2180,6 +2180,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2180 insert = 1; 2180 insert = 1;
2181#endif 2181#endif
2182 insert = 1; 2182 insert = 1;
2183 atomic_dec(&root->orphan_inodes);
2183 } 2184 }
2184 2185
2185 if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, 2186 if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
@@ -2197,6 +2198,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2197 if (insert >= 1) { 2198 if (insert >= 1) {
2198 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2199 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2199 if (ret && ret != -EEXIST) { 2200 if (ret && ret != -EEXIST) {
2201 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2202 &BTRFS_I(inode)->runtime_flags);
2200 btrfs_abort_transaction(trans, root, ret); 2203 btrfs_abort_transaction(trans, root, ret);
2201 return ret; 2204 return ret;
2202 } 2205 }
@@ -2227,10 +2230,9 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2227 int ret = 0; 2230 int ret = 0;
2228 2231
2229 spin_lock(&root->orphan_lock); 2232 spin_lock(&root->orphan_lock);
2230 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 2233 if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2231 list_del_init(&BTRFS_I(inode)->i_orphan); 2234 &BTRFS_I(inode)->runtime_flags))
2232 delete_item = 1; 2235 delete_item = 1;
2233 }
2234 2236
2235 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, 2237 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
2236 &BTRFS_I(inode)->runtime_flags)) 2238 &BTRFS_I(inode)->runtime_flags))
@@ -2242,8 +2244,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2242 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ 2244 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
2243 } 2245 }
2244 2246
2245 if (release_rsv) 2247 if (release_rsv) {
2246 btrfs_orphan_release_metadata(inode); 2248 btrfs_orphan_release_metadata(inode);
2249 atomic_dec(&root->orphan_inodes);
2250 }
2247 2251
2248 return 0; 2252 return 0;
2249} 2253}
@@ -2371,6 +2375,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2371 ret = PTR_ERR(trans); 2375 ret = PTR_ERR(trans);
2372 goto out; 2376 goto out;
2373 } 2377 }
2378 printk(KERN_ERR "auto deleting %Lu\n",
2379 found_key.objectid);
2374 ret = btrfs_del_orphan_item(trans, root, 2380 ret = btrfs_del_orphan_item(trans, root,
2375 found_key.objectid); 2381 found_key.objectid);
2376 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ 2382 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@@ -2382,9 +2388,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2382 * add this inode to the orphan list so btrfs_orphan_del does 2388 * add this inode to the orphan list so btrfs_orphan_del does
2383 * the proper thing when we hit it 2389 * the proper thing when we hit it
2384 */ 2390 */
2385 spin_lock(&root->orphan_lock); 2391 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2386 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2392 &BTRFS_I(inode)->runtime_flags);
2387 spin_unlock(&root->orphan_lock);
2388 2393
2389 /* if we have links, this was a truncate, lets do that */ 2394 /* if we have links, this was a truncate, lets do that */
2390 if (inode->i_nlink) { 2395 if (inode->i_nlink) {
@@ -3706,7 +3711,8 @@ void btrfs_evict_inode(struct inode *inode)
3706 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3711 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3707 3712
3708 if (root->fs_info->log_root_recovering) { 3713 if (root->fs_info->log_root_recovering) {
3709 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); 3714 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3715 &BTRFS_I(inode)->runtime_flags));
3710 goto no_delete; 3716 goto no_delete;
3711 } 3717 }
3712 3718
@@ -6903,7 +6909,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6903 mutex_init(&ei->log_mutex); 6909 mutex_init(&ei->log_mutex);
6904 mutex_init(&ei->delalloc_mutex); 6910 mutex_init(&ei->delalloc_mutex);
6905 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6911 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
6906 INIT_LIST_HEAD(&ei->i_orphan);
6907 INIT_LIST_HEAD(&ei->delalloc_inodes); 6912 INIT_LIST_HEAD(&ei->delalloc_inodes);
6908 INIT_LIST_HEAD(&ei->ordered_operations); 6913 INIT_LIST_HEAD(&ei->ordered_operations);
6909 RB_CLEAR_NODE(&ei->rb_node); 6914 RB_CLEAR_NODE(&ei->rb_node);
@@ -6948,13 +6953,12 @@ void btrfs_destroy_inode(struct inode *inode)
6948 spin_unlock(&root->fs_info->ordered_extent_lock); 6953 spin_unlock(&root->fs_info->ordered_extent_lock);
6949 } 6954 }
6950 6955
6951 spin_lock(&root->orphan_lock); 6956 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
6952 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6957 &BTRFS_I(inode)->runtime_flags)) {
6953 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 6958 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
6954 (unsigned long long)btrfs_ino(inode)); 6959 (unsigned long long)btrfs_ino(inode));
6955 list_del_init(&BTRFS_I(inode)->i_orphan); 6960 atomic_dec(&root->orphan_inodes);
6956 } 6961 }
6957 spin_unlock(&root->orphan_lock);
6958 6962
6959 while (1) { 6963 while (1) {
6960 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 6964 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);