aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Mahoney <jeffm@suse.com>2017-01-25 09:50:33 -0500
committerDavid Sterba <dsterba@suse.com>2017-02-14 09:50:59 -0500
commit003d7c59e8afc9b2c6b0d163e8e115406c4faecc (patch)
tree5e3f48d58117b4c784c403a4751b3c76cb836180
parent9a9239acb465df1f6aab379c77befd5cde98c9df (diff)
btrfs: allow unlink to exceed subvolume quota
Once a qgroup limit is exceeded, it's impossible to restore normal operation to the subvolume without modifying the limit or removing the subvolume. This is a surprising situation for many users used to the typical workflow with quotas on other file systems where it's possible to remove files until the used space is back under the limit. When we go to unlink a file and start the transaction, we'll hit the qgroup limit while trying to reserve space for the items we'll modify while removing the file. We discussed last month how best to handle this situation and agreed that there is no perfect solution. The best principle-of-least-surprise solution is to handle it similarly to how we already handle ENOSPC when unlinking, which is to allow the operation to succeed with the expectation that it will ultimately release space under most circumstances. This patch modifies the transaction start path to select whether to honor the qgroups limits. btrfs_start_transaction_fallback_global_rsv is the only caller that skips enforcement. The reservation and tracking still happens normally -- it just skips the enforcement step. Signed-off-by: Jeff Mahoney <jeffm@suse.com> Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/extent-tree.c4
-rw-r--r--fs/btrfs/qgroup.c33
-rw-r--r--fs/btrfs/qgroup.h3
-rw-r--r--fs/btrfs/transaction.c34
4 files changed, 45 insertions, 29 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9fde23475387..7dd71fcc1051 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5799,7 +5799,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5799 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 5799 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
5800 /* One for parent inode, two for dir entries */ 5800 /* One for parent inode, two for dir entries */
5801 num_bytes = 3 * fs_info->nodesize; 5801 num_bytes = 3 * fs_info->nodesize;
5802 ret = btrfs_qgroup_reserve_meta(root, num_bytes); 5802 ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
5803 if (ret) 5803 if (ret)
5804 return ret; 5804 return ret;
5805 } else { 5805 } else {
@@ -5975,7 +5975,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5975 5975
5976 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 5976 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
5977 ret = btrfs_qgroup_reserve_meta(root, 5977 ret = btrfs_qgroup_reserve_meta(root,
5978 nr_extents * fs_info->nodesize); 5978 nr_extents * fs_info->nodesize, true);
5979 if (ret) 5979 if (ret)
5980 goto out_fail; 5980 goto out_fail;
5981 } 5981 }
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1c555f1e49ba..8496dbf3f38b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2324,7 +2324,20 @@ out:
2324 return ret; 2324 return ret;
2325} 2325}
2326 2326
2327static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes) 2327static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
2328{
2329 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2330 qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
2331 return false;
2332
2333 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2334 qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
2335 return false;
2336
2337 return true;
2338}
2339
2340static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
2328{ 2341{
2329 struct btrfs_root *quota_root; 2342 struct btrfs_root *quota_root;
2330 struct btrfs_qgroup *qgroup; 2343 struct btrfs_qgroup *qgroup;
@@ -2365,16 +2378,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2365 2378
2366 qg = unode_aux_to_qgroup(unode); 2379 qg = unode_aux_to_qgroup(unode);
2367 2380
2368 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2381 if (enforce && !qgroup_check_limits(qg, num_bytes)) {
2369 qg->reserved + (s64)qg->rfer + num_bytes >
2370 qg->max_rfer) {
2371 ret = -EDQUOT;
2372 goto out;
2373 }
2374
2375 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2376 qg->reserved + (s64)qg->excl + num_bytes >
2377 qg->max_excl) {
2378 ret = -EDQUOT; 2382 ret = -EDQUOT;
2379 goto out; 2383 goto out;
2380 } 2384 }
@@ -2832,7 +2836,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
2832 QGROUP_RESERVE); 2836 QGROUP_RESERVE);
2833 if (ret < 0) 2837 if (ret < 0)
2834 goto cleanup; 2838 goto cleanup;
2835 ret = qgroup_reserve(root, changeset.bytes_changed); 2839 ret = qgroup_reserve(root, changeset.bytes_changed, true);
2836 if (ret < 0) 2840 if (ret < 0)
2837 goto cleanup; 2841 goto cleanup;
2838 2842
@@ -2913,7 +2917,8 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
2913 return __btrfs_qgroup_release_data(inode, start, len, 0); 2917 return __btrfs_qgroup_release_data(inode, start, len, 0);
2914} 2918}
2915 2919
2916int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes) 2920int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
2921 bool enforce)
2917{ 2922{
2918 struct btrfs_fs_info *fs_info = root->fs_info; 2923 struct btrfs_fs_info *fs_info = root->fs_info;
2919 int ret; 2924 int ret;
@@ -2923,7 +2928,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
2923 return 0; 2928 return 0;
2924 2929
2925 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 2930 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
2926 ret = qgroup_reserve(root, num_bytes); 2931 ret = qgroup_reserve(root, num_bytes, enforce);
2927 if (ret < 0) 2932 if (ret < 0)
2928 return ret; 2933 return ret;
2929 atomic_add(num_bytes, &root->qgroup_meta_rsv); 2934 atomic_add(num_bytes, &root->qgroup_meta_rsv);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 416ae8e1d23c..ee95f456a61f 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -181,7 +181,8 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len);
181int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); 181int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
182int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len); 182int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
183 183
184int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes); 184int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
185 bool enforce);
185void btrfs_qgroup_free_meta_all(struct btrfs_root *root); 186void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
186void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes); 187void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
187void btrfs_qgroup_check_reserved_leak(struct inode *inode); 188void btrfs_qgroup_check_reserved_leak(struct inode *inode);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 90e73f65dccf..48aabb367f73 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -474,7 +474,8 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
474 474
475static struct btrfs_trans_handle * 475static struct btrfs_trans_handle *
476start_transaction(struct btrfs_root *root, unsigned int num_items, 476start_transaction(struct btrfs_root *root, unsigned int num_items,
477 unsigned int type, enum btrfs_reserve_flush_enum flush) 477 unsigned int type, enum btrfs_reserve_flush_enum flush,
478 bool enforce_qgroups)
478{ 479{
479 struct btrfs_fs_info *fs_info = root->fs_info; 480 struct btrfs_fs_info *fs_info = root->fs_info;
480 481
@@ -505,9 +506,10 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
505 * Do the reservation before we join the transaction so we can do all 506 * Do the reservation before we join the transaction so we can do all
506 * the appropriate flushing if need be. 507 * the appropriate flushing if need be.
507 */ 508 */
508 if (num_items > 0 && root != fs_info->chunk_root) { 509 if (num_items && root != fs_info->chunk_root) {
509 qgroup_reserved = num_items * fs_info->nodesize; 510 qgroup_reserved = num_items * fs_info->nodesize;
510 ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved); 511 ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved,
512 enforce_qgroups);
511 if (ret) 513 if (ret)
512 return ERR_PTR(ret); 514 return ERR_PTR(ret);
513 515
@@ -613,8 +615,9 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
613 unsigned int num_items) 615 unsigned int num_items)
614{ 616{
615 return start_transaction(root, num_items, TRANS_START, 617 return start_transaction(root, num_items, TRANS_START,
616 BTRFS_RESERVE_FLUSH_ALL); 618 BTRFS_RESERVE_FLUSH_ALL, true);
617} 619}
620
618struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( 621struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
619 struct btrfs_root *root, 622 struct btrfs_root *root,
620 unsigned int num_items, 623 unsigned int num_items,
@@ -625,7 +628,14 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
625 u64 num_bytes; 628 u64 num_bytes;
626 int ret; 629 int ret;
627 630
628 trans = btrfs_start_transaction(root, num_items); 631 /*
632 * We have two callers: unlink and block group removal. The
633 * former should succeed even if we will temporarily exceed
634 * quota and the latter operates on the extent root so
635 * qgroup enforcement is ignored anyway.
636 */
637 trans = start_transaction(root, num_items, TRANS_START,
638 BTRFS_RESERVE_FLUSH_ALL, false);
629 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 639 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
630 return trans; 640 return trans;
631 641
@@ -654,25 +664,25 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush(
654 unsigned int num_items) 664 unsigned int num_items)
655{ 665{
656 return start_transaction(root, num_items, TRANS_START, 666 return start_transaction(root, num_items, TRANS_START,
657 BTRFS_RESERVE_FLUSH_LIMIT); 667 BTRFS_RESERVE_FLUSH_LIMIT, true);
658} 668}
659 669
660struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) 670struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
661{ 671{
662 return start_transaction(root, 0, TRANS_JOIN, 672 return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
663 BTRFS_RESERVE_NO_FLUSH); 673 true);
664} 674}
665 675
666struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) 676struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
667{ 677{
668 return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 678 return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
669 BTRFS_RESERVE_NO_FLUSH); 679 BTRFS_RESERVE_NO_FLUSH, true);
670} 680}
671 681
672struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) 682struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
673{ 683{
674 return start_transaction(root, 0, TRANS_USERSPACE, 684 return start_transaction(root, 0, TRANS_USERSPACE,
675 BTRFS_RESERVE_NO_FLUSH); 685 BTRFS_RESERVE_NO_FLUSH, true);
676} 686}
677 687
678/* 688/*
@@ -691,7 +701,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
691struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) 701struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
692{ 702{
693 return start_transaction(root, 0, TRANS_ATTACH, 703 return start_transaction(root, 0, TRANS_ATTACH,
694 BTRFS_RESERVE_NO_FLUSH); 704 BTRFS_RESERVE_NO_FLUSH, true);
695} 705}
696 706
697/* 707/*
@@ -707,7 +717,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
707 struct btrfs_trans_handle *trans; 717 struct btrfs_trans_handle *trans;
708 718
709 trans = start_transaction(root, 0, TRANS_ATTACH, 719 trans = start_transaction(root, 0, TRANS_ATTACH,
710 BTRFS_RESERVE_NO_FLUSH); 720 BTRFS_RESERVE_NO_FLUSH, true);
711 if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT) 721 if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
712 btrfs_wait_for_commit(root->fs_info, 0); 722 btrfs_wait_for_commit(root->fs_info, 0);
713 723