diff options
author | Dongsheng Yang <yangds.fnst@cn.fujitsu.com> | 2014-12-12 03:44:35 -0500 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-04-13 10:52:47 -0400 |
commit | 31193213f1f9c13f6485007ef1e233b119e46910 (patch) | |
tree | 755184ecff35a6387944eeefa4d2448917a43bbb /fs | |
parent | 804ca127fb93988c6a9d5f2bf4a8f1a780c9a2d0 (diff) |
Btrfs: qgroup: Introduce a may_use to account space_info->bytes_may_use.
Currently, for pre_alloc or delay_alloc, the bytes will be accounted
in space_info by the three guys.
space_info->bytes_may_use --- space_info->reserved --- space_info->used.
But on the other hand, in qgroup, there are only two counters to account the
bytes, qgroup->reserved and qgroup->excl. And qg->reserved accounts
bytes in space_info->bytes_may_use and qg->excl accounts bytes in
space_info->used. So the bytes in space_info->reserved is not accounted
in qgroup. If so, there is a window we can exceed the quota limit when
bytes is in space_info->reserved.
Example:
# btrfs quota enable /mnt
# btrfs qgroup limit -e 10M /mnt
# for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
# sync
# btrfs qgroup show -pcre /mnt
qgroupid rfer excl max_rfer max_excl parent child
-------- ---- ---- -------- -------- ------ -----
0/5 20987904 20987904 0 10485760 --- ---
qg->excl is 20987904 larger than max_excl 10485760.
This patch introduce a new counter named may_use to qgroup, then
there are three counters in qgroup to account bytes in space_info
as below.
space_info->bytes_may_use --- space_info->reserved --- space_info->used.
qgroup->may_use --- qgroup->reserved --- qgroup->excl
With this patch applied:
# btrfs quota enable /mnt
# btrfs qgroup limit -e 10M /mnt
# for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
fallocate: /mnt/data9: fallocate failed: Disk quota exceeded
fallocate: /mnt/data10: fallocate failed: Disk quota exceeded
fallocate: /mnt/data11: fallocate failed: Disk quota exceeded
fallocate: /mnt/data12: fallocate failed: Disk quota exceeded
fallocate: /mnt/data13: fallocate failed: Disk quota exceeded
fallocate: /mnt/data14: fallocate failed: Disk quota exceeded
fallocate: /mnt/data15: fallocate failed: Disk quota exceeded
fallocate: /mnt/data16: fallocate failed: Disk quota exceeded
fallocate: /mnt/data17: fallocate failed: Disk quota exceeded
fallocate: /mnt/data18: fallocate failed: Disk quota exceeded
fallocate: /mnt/data19: fallocate failed: Disk quota exceeded
# sync
# btrfs qgroup show -pcre /mnt
qgroupid rfer excl max_rfer max_excl parent child
-------- ---- ---- -------- -------- ------ -----
0/5 9453568 9453568 0 10485760 --- ---
Reported-by: Cyril SCETBON <cyril.scetbon@free.fr>
Signed-off-by: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/extent-tree.c | 20 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 18 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 68 | ||||
-rw-r--r-- | fs/btrfs/qgroup.h | 4 |
4 files changed, 104 insertions, 6 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 695d5110e020..3113e0b79b99 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -5725,8 +5725,12 @@ static int pin_down_extent(struct btrfs_root *root, | |||
5725 | 5725 | ||
5726 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, | 5726 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, |
5727 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | 5727 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); |
5728 | if (reserved) | 5728 | if (reserved) { |
5729 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | ||
5730 | root->root_key.objectid, | ||
5731 | num_bytes, -1); | ||
5729 | trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); | 5732 | trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); |
5733 | } | ||
5730 | return 0; | 5734 | return 0; |
5731 | } | 5735 | } |
5732 | 5736 | ||
@@ -6464,6 +6468,9 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
6464 | btrfs_put_block_group(cache); | 6468 | btrfs_put_block_group(cache); |
6465 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); | 6469 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); |
6466 | pin = 0; | 6470 | pin = 0; |
6471 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | ||
6472 | root->root_key.objectid, | ||
6473 | buf->len, -1); | ||
6467 | } | 6474 | } |
6468 | out: | 6475 | out: |
6469 | if (pin) | 6476 | if (pin) |
@@ -7196,7 +7203,11 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, | |||
7196 | ret = btrfs_discard_extent(root, start, len, NULL); | 7203 | ret = btrfs_discard_extent(root, start, len, NULL); |
7197 | btrfs_add_free_space(cache, start, len); | 7204 | btrfs_add_free_space(cache, start, len); |
7198 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); | 7205 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); |
7206 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | ||
7207 | root->root_key.objectid, | ||
7208 | len, -1); | ||
7199 | } | 7209 | } |
7210 | |||
7200 | btrfs_put_block_group(cache); | 7211 | btrfs_put_block_group(cache); |
7201 | 7212 | ||
7202 | trace_btrfs_reserved_extent_free(root, start, len); | 7213 | trace_btrfs_reserved_extent_free(root, start, len); |
@@ -7433,6 +7444,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
7433 | BUG_ON(ret); /* logic error */ | 7444 | BUG_ON(ret); /* logic error */ |
7434 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 7445 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
7435 | 0, owner, offset, ins, 1); | 7446 | 0, owner, offset, ins, 1); |
7447 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | ||
7448 | root->root_key.objectid, | ||
7449 | ins->offset, 1); | ||
7436 | btrfs_put_block_group(block_group); | 7450 | btrfs_put_block_group(block_group); |
7437 | return ret; | 7451 | return ret; |
7438 | } | 7452 | } |
@@ -7579,6 +7593,10 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
7579 | return ERR_PTR(ret); | 7593 | return ERR_PTR(ret); |
7580 | } | 7594 | } |
7581 | 7595 | ||
7596 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | ||
7597 | root_objectid, | ||
7598 | ins.offset, 1); | ||
7599 | |||
7582 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); | 7600 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); |
7583 | BUG_ON(IS_ERR(buf)); /* -ENOMEM */ | 7601 | BUG_ON(IS_ERR(buf)); /* -ENOMEM */ |
7584 | 7602 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6ef97c184c7b..a9f69a0d4b08 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include "backref.h" | 60 | #include "backref.h" |
61 | #include "hash.h" | 61 | #include "hash.h" |
62 | #include "props.h" | 62 | #include "props.h" |
63 | #include "qgroup.h" | ||
63 | 64 | ||
64 | struct btrfs_iget_args { | 65 | struct btrfs_iget_args { |
65 | struct btrfs_key *location; | 66 | struct btrfs_key *location; |
@@ -753,7 +754,9 @@ retry: | |||
753 | } | 754 | } |
754 | goto out_free; | 755 | goto out_free; |
755 | } | 756 | } |
756 | 757 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | |
758 | root->root_key.objectid, | ||
759 | ins.offset, 1); | ||
757 | /* | 760 | /* |
758 | * here we're doing allocation and writeback of the | 761 | * here we're doing allocation and writeback of the |
759 | * compressed pages | 762 | * compressed pages |
@@ -978,6 +981,10 @@ static noinline int cow_file_range(struct inode *inode, | |||
978 | if (ret < 0) | 981 | if (ret < 0) |
979 | goto out_unlock; | 982 | goto out_unlock; |
980 | 983 | ||
984 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | ||
985 | root->root_key.objectid, | ||
986 | ins.offset, 1); | ||
987 | |||
981 | em = alloc_extent_map(); | 988 | em = alloc_extent_map(); |
982 | if (!em) { | 989 | if (!em) { |
983 | ret = -ENOMEM; | 990 | ret = -ENOMEM; |
@@ -7030,6 +7037,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
7030 | return ERR_PTR(ret); | 7037 | return ERR_PTR(ret); |
7031 | } | 7038 | } |
7032 | 7039 | ||
7040 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | ||
7041 | root->root_key.objectid, | ||
7042 | ins.offset, 1); | ||
7043 | |||
7033 | return em; | 7044 | return em; |
7034 | } | 7045 | } |
7035 | 7046 | ||
@@ -9583,6 +9594,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
9583 | btrfs_end_transaction(trans, root); | 9594 | btrfs_end_transaction(trans, root); |
9584 | break; | 9595 | break; |
9585 | } | 9596 | } |
9597 | |||
9598 | btrfs_qgroup_update_reserved_bytes(root->fs_info, | ||
9599 | root->root_key.objectid, | ||
9600 | ins.offset, 1); | ||
9601 | |||
9586 | btrfs_drop_extent_cache(inode, cur_offset, | 9602 | btrfs_drop_extent_cache(inode, cur_offset, |
9587 | cur_offset + ins.offset -1, 0); | 9603 | cur_offset + ins.offset -1, 0); |
9588 | 9604 | ||
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index de321c90130c..cd291733dc3e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -72,6 +72,7 @@ struct btrfs_qgroup { | |||
72 | /* | 72 | /* |
73 | * reservation tracking | 73 | * reservation tracking |
74 | */ | 74 | */ |
75 | u64 may_use; | ||
75 | u64 reserved; | 76 | u64 reserved; |
76 | 77 | ||
77 | /* | 78 | /* |
@@ -1417,6 +1418,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | |||
1417 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); | 1418 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); |
1418 | qgroup->excl += sign * oper->num_bytes; | 1419 | qgroup->excl += sign * oper->num_bytes; |
1419 | qgroup->excl_cmpr += sign * oper->num_bytes; | 1420 | qgroup->excl_cmpr += sign * oper->num_bytes; |
1421 | if (sign > 0) | ||
1422 | qgroup->reserved -= oper->num_bytes; | ||
1420 | 1423 | ||
1421 | qgroup_dirty(fs_info, qgroup); | 1424 | qgroup_dirty(fs_info, qgroup); |
1422 | 1425 | ||
@@ -1436,6 +1439,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | |||
1436 | qgroup->rfer_cmpr += sign * oper->num_bytes; | 1439 | qgroup->rfer_cmpr += sign * oper->num_bytes; |
1437 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); | 1440 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); |
1438 | qgroup->excl += sign * oper->num_bytes; | 1441 | qgroup->excl += sign * oper->num_bytes; |
1442 | if (sign > 0) | ||
1443 | qgroup->reserved -= oper->num_bytes; | ||
1439 | qgroup->excl_cmpr += sign * oper->num_bytes; | 1444 | qgroup->excl_cmpr += sign * oper->num_bytes; |
1440 | qgroup_dirty(fs_info, qgroup); | 1445 | qgroup_dirty(fs_info, qgroup); |
1441 | 1446 | ||
@@ -2378,6 +2383,61 @@ out: | |||
2378 | return ret; | 2383 | return ret; |
2379 | } | 2384 | } |
2380 | 2385 | ||
2386 | int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info, | ||
2387 | u64 ref_root, | ||
2388 | u64 num_bytes, | ||
2389 | int sign) | ||
2390 | { | ||
2391 | struct btrfs_root *quota_root; | ||
2392 | struct btrfs_qgroup *qgroup; | ||
2393 | int ret = 0; | ||
2394 | struct ulist_node *unode; | ||
2395 | struct ulist_iterator uiter; | ||
2396 | |||
2397 | if (!is_fstree(ref_root) || !fs_info->quota_enabled) | ||
2398 | return 0; | ||
2399 | |||
2400 | if (num_bytes == 0) | ||
2401 | return 0; | ||
2402 | |||
2403 | spin_lock(&fs_info->qgroup_lock); | ||
2404 | quota_root = fs_info->quota_root; | ||
2405 | if (!quota_root) | ||
2406 | goto out; | ||
2407 | |||
2408 | qgroup = find_qgroup_rb(fs_info, ref_root); | ||
2409 | if (!qgroup) | ||
2410 | goto out; | ||
2411 | |||
2412 | ulist_reinit(fs_info->qgroup_ulist); | ||
2413 | ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, | ||
2414 | (uintptr_t)qgroup, GFP_ATOMIC); | ||
2415 | if (ret < 0) | ||
2416 | goto out; | ||
2417 | |||
2418 | ULIST_ITER_INIT(&uiter); | ||
2419 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { | ||
2420 | struct btrfs_qgroup *qg; | ||
2421 | struct btrfs_qgroup_list *glist; | ||
2422 | |||
2423 | qg = u64_to_ptr(unode->aux); | ||
2424 | |||
2425 | qg->reserved += sign * num_bytes; | ||
2426 | |||
2427 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
2428 | ret = ulist_add(fs_info->qgroup_ulist, | ||
2429 | glist->group->qgroupid, | ||
2430 | (uintptr_t)glist->group, GFP_ATOMIC); | ||
2431 | if (ret < 0) | ||
2432 | goto out; | ||
2433 | } | ||
2434 | } | ||
2435 | |||
2436 | out: | ||
2437 | spin_unlock(&fs_info->qgroup_lock); | ||
2438 | return ret; | ||
2439 | } | ||
2440 | |||
2381 | /* | 2441 | /* |
2382 | * reserve some space for a qgroup and all its parents. The reservation takes | 2442 | * reserve some space for a qgroup and all its parents. The reservation takes |
2383 | * place with start_transaction or dealloc_reserve, similar to ENOSPC | 2443 | * place with start_transaction or dealloc_reserve, similar to ENOSPC |
@@ -2426,14 +2486,14 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
2426 | qg = u64_to_ptr(unode->aux); | 2486 | qg = u64_to_ptr(unode->aux); |
2427 | 2487 | ||
2428 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && | 2488 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && |
2429 | qg->reserved + (s64)qg->rfer + num_bytes > | 2489 | qg->reserved + qg->may_use + (s64)qg->rfer + num_bytes > |
2430 | qg->max_rfer) { | 2490 | qg->max_rfer) { |
2431 | ret = -EDQUOT; | 2491 | ret = -EDQUOT; |
2432 | goto out; | 2492 | goto out; |
2433 | } | 2493 | } |
2434 | 2494 | ||
2435 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && | 2495 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && |
2436 | qg->reserved + (s64)qg->excl + num_bytes > | 2496 | qg->reserved + qg->may_use + (s64)qg->excl + num_bytes > |
2437 | qg->max_excl) { | 2497 | qg->max_excl) { |
2438 | ret = -EDQUOT; | 2498 | ret = -EDQUOT; |
2439 | goto out; | 2499 | goto out; |
@@ -2457,7 +2517,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
2457 | 2517 | ||
2458 | qg = u64_to_ptr(unode->aux); | 2518 | qg = u64_to_ptr(unode->aux); |
2459 | 2519 | ||
2460 | qg->reserved += num_bytes; | 2520 | qg->may_use += num_bytes; |
2461 | } | 2521 | } |
2462 | 2522 | ||
2463 | out: | 2523 | out: |
@@ -2503,7 +2563,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
2503 | 2563 | ||
2504 | qg = u64_to_ptr(unode->aux); | 2564 | qg = u64_to_ptr(unode->aux); |
2505 | 2565 | ||
2506 | qg->reserved -= num_bytes; | 2566 | qg->may_use -= num_bytes; |
2507 | 2567 | ||
2508 | list_for_each_entry(glist, &qg->groups, next_group) { | 2568 | list_for_each_entry(glist, &qg->groups, next_group) { |
2509 | ret = ulist_add(fs_info->qgroup_ulist, | 2569 | ret = ulist_add(fs_info->qgroup_ulist, |
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index c5242aa9a4b2..64d49b8482b3 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h | |||
@@ -94,6 +94,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
94 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | 94 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, |
95 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | 95 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, |
96 | struct btrfs_qgroup_inherit *inherit); | 96 | struct btrfs_qgroup_inherit *inherit); |
97 | int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info, | ||
98 | u64 ref_root, | ||
99 | u64 num_bytes, | ||
100 | int sign); | ||
97 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); | 101 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); |
98 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); | 102 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); |
99 | 103 | ||