aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-22 15:09:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-22 15:09:27 -0400
commitd54b5c1315b25c7baea2fa6017a9e5d9d326742a (patch)
treee7feabc729cc94abc548f0238fa65a7212327439
parent37a535edd72cacf73b456d4e9272ff2ee4bf7405 (diff)
parentc0872323746e11fc79344e3738b283a8cda86654 (diff)
Merge tag 'for-4.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "This contains a few fixups to the qgroup patches that were merged this dev cycle, unaligned access fix, blockgroup removal corner case fix and a small debugging output tweak" * tag 'for-4.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: print-tree: debugging output enhancement btrfs: Fix race condition between delayed refs and blockgroup removal btrfs: fix unaligned access in readdir btrfs: Fix wrong btrfs_delalloc_release_extents parameter btrfs: delayed-inode: Remove wrong qgroup meta reservation calls btrfs: qgroup: Use independent and accurate per inode qgroup rsv btrfs: qgroup: Commit transaction in advance to reduce early EDQUOT
-rw-r--r--fs/btrfs/ctree.h25
-rw-r--r--fs/btrfs/delayed-inode.c20
-rw-r--r--fs/btrfs/delayed-ref.c19
-rw-r--r--fs/btrfs/delayed-ref.h1
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent-tree.c73
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c20
-rw-r--r--fs/btrfs/print-tree.c25
-rw-r--r--fs/btrfs/print-tree.h2
-rw-r--r--fs/btrfs/qgroup.c43
-rw-r--r--fs/btrfs/transaction.c1
-rw-r--r--fs/btrfs/transaction.h14
13 files changed, 199 insertions, 47 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5474ef14d6e6..2771cc56a622 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -459,6 +459,25 @@ struct btrfs_block_rsv {
459 unsigned short full; 459 unsigned short full;
460 unsigned short type; 460 unsigned short type;
461 unsigned short failfast; 461 unsigned short failfast;
462
463 /*
464 * Qgroup equivalent for @size @reserved
465 *
466 * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
467 * about things like csum size nor how many tree blocks it will need to
468 * reserve.
469 *
470 * Qgroup cares more about net change of the extent usage.
471 *
472 * So for one newly inserted file extent, in worst case it will cause
473 * leaf split and level increase, nodesize for each file extent is
474 * already too much.
475 *
476 * In short, qgroup_size/reserved is the upper limit of possible needed
477 * qgroup metadata reservation.
478 */
479 u64 qgroup_rsv_size;
480 u64 qgroup_rsv_reserved;
462}; 481};
463 482
464/* 483/*
@@ -714,6 +733,12 @@ struct btrfs_delayed_root;
714 */ 733 */
715#define BTRFS_FS_EXCL_OP 16 734#define BTRFS_FS_EXCL_OP 16
716 735
736/*
737 * To info transaction_kthread we need an immediate commit so it doesn't
738 * need to wait for commit_interval
739 */
740#define BTRFS_FS_NEED_ASYNC_COMMIT 17
741
717struct btrfs_fs_info { 742struct btrfs_fs_info {
718 u8 fsid[BTRFS_FSID_SIZE]; 743 u8 fsid[BTRFS_FSID_SIZE];
719 u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; 744 u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 06ec8ab6d9ba..a8d492dbd3e7 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
556 dst_rsv = &fs_info->delayed_block_rsv; 556 dst_rsv = &fs_info->delayed_block_rsv;
557 557
558 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); 558 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
559
560 /*
561 * Here we migrate space rsv from transaction rsv, since have already
562 * reserved space when starting a transaction. So no need to reserve
563 * qgroup space here.
564 */
559 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); 565 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
560 if (!ret) { 566 if (!ret) {
561 trace_btrfs_space_reservation(fs_info, "delayed_item", 567 trace_btrfs_space_reservation(fs_info, "delayed_item",
@@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
577 return; 583 return;
578 584
579 rsv = &fs_info->delayed_block_rsv; 585 rsv = &fs_info->delayed_block_rsv;
580 btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved); 586 /*
587 * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
588 * to release/reserve qgroup space.
589 */
581 trace_btrfs_space_reservation(fs_info, "delayed_item", 590 trace_btrfs_space_reservation(fs_info, "delayed_item",
582 item->key.objectid, item->bytes_reserved, 591 item->key.objectid, item->bytes_reserved,
583 0); 592 0);
@@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata(
602 611
603 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); 612 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
604 613
605 ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
606 if (ret < 0)
607 return ret;
608 /* 614 /*
609 * btrfs_dirty_inode will update the inode under btrfs_join_transaction 615 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
610 * which doesn't reserve space for speed. This is a problem since we 616 * which doesn't reserve space for speed. This is a problem since we
@@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata(
616 */ 622 */
617 if (!src_rsv || (!trans->bytes_reserved && 623 if (!src_rsv || (!trans->bytes_reserved &&
618 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { 624 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
625 ret = btrfs_qgroup_reserve_meta_prealloc(root,
626 fs_info->nodesize, true);
627 if (ret < 0)
628 return ret;
619 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, 629 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
620 BTRFS_RESERVE_NO_FLUSH); 630 BTRFS_RESERVE_NO_FLUSH);
621 /* 631 /*
@@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata(
634 "delayed_inode", 644 "delayed_inode",
635 btrfs_ino(inode), 645 btrfs_ino(inode),
636 num_bytes, 1); 646 num_bytes, 1);
647 } else {
648 btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
637 } 649 }
638 return ret; 650 return ret;
639 } 651 }
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 9e98295de7ce..e1b0651686f7 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
540 struct btrfs_delayed_ref_head *head_ref, 540 struct btrfs_delayed_ref_head *head_ref,
541 struct btrfs_qgroup_extent_record *qrecord, 541 struct btrfs_qgroup_extent_record *qrecord,
542 u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, 542 u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
543 int action, int is_data, int *qrecord_inserted_ret, 543 int action, int is_data, int is_system,
544 int *qrecord_inserted_ret,
544 int *old_ref_mod, int *new_ref_mod) 545 int *old_ref_mod, int *new_ref_mod)
546
545{ 547{
546 struct btrfs_delayed_ref_head *existing; 548 struct btrfs_delayed_ref_head *existing;
547 struct btrfs_delayed_ref_root *delayed_refs; 549 struct btrfs_delayed_ref_root *delayed_refs;
@@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
585 head_ref->ref_mod = count_mod; 587 head_ref->ref_mod = count_mod;
586 head_ref->must_insert_reserved = must_insert_reserved; 588 head_ref->must_insert_reserved = must_insert_reserved;
587 head_ref->is_data = is_data; 589 head_ref->is_data = is_data;
590 head_ref->is_system = is_system;
588 head_ref->ref_tree = RB_ROOT; 591 head_ref->ref_tree = RB_ROOT;
589 INIT_LIST_HEAD(&head_ref->ref_add_list); 592 INIT_LIST_HEAD(&head_ref->ref_add_list);
590 RB_CLEAR_NODE(&head_ref->href_node); 593 RB_CLEAR_NODE(&head_ref->href_node);
@@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
772 struct btrfs_delayed_ref_root *delayed_refs; 775 struct btrfs_delayed_ref_root *delayed_refs;
773 struct btrfs_qgroup_extent_record *record = NULL; 776 struct btrfs_qgroup_extent_record *record = NULL;
774 int qrecord_inserted; 777 int qrecord_inserted;
778 int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
775 779
776 BUG_ON(extent_op && extent_op->is_data); 780 BUG_ON(extent_op && extent_op->is_data);
777 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); 781 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
800 */ 804 */
801 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, 805 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
802 bytenr, num_bytes, 0, 0, action, 0, 806 bytenr, num_bytes, 0, 0, action, 0,
803 &qrecord_inserted, old_ref_mod, 807 is_system, &qrecord_inserted,
804 new_ref_mod); 808 old_ref_mod, new_ref_mod);
805 809
806 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, 810 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
807 num_bytes, parent, ref_root, level, action); 811 num_bytes, parent, ref_root, level, action);
@@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
868 */ 872 */
869 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, 873 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
870 bytenr, num_bytes, ref_root, reserved, 874 bytenr, num_bytes, ref_root, reserved,
871 action, 1, &qrecord_inserted, 875 action, 1, 0, &qrecord_inserted,
872 old_ref_mod, new_ref_mod); 876 old_ref_mod, new_ref_mod);
873 877
874 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, 878 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
898 delayed_refs = &trans->transaction->delayed_refs; 902 delayed_refs = &trans->transaction->delayed_refs;
899 spin_lock(&delayed_refs->lock); 903 spin_lock(&delayed_refs->lock);
900 904
905 /*
906 * extent_ops just modify the flags of an extent and they don't result
907 * in ref count changes, hence it's safe to pass false/0 for is_system
908 * argument
909 */
901 add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr, 910 add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
902 num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, 911 num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
903 extent_op->is_data, NULL, NULL, NULL); 912 extent_op->is_data, 0, NULL, NULL, NULL);
904 913
905 spin_unlock(&delayed_refs->lock); 914 spin_unlock(&delayed_refs->lock);
906 return 0; 915 return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 741869dbc316..7f00db50bd24 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head {
127 */ 127 */
128 unsigned int must_insert_reserved:1; 128 unsigned int must_insert_reserved:1;
129 unsigned int is_data:1; 129 unsigned int is_data:1;
130 unsigned int is_system:1;
130 unsigned int processing:1; 131 unsigned int processing:1;
131}; 132};
132 133
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4ac8b1d21baf..60caa68c3618 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg)
1824 1824
1825 now = get_seconds(); 1825 now = get_seconds();
1826 if (cur->state < TRANS_STATE_BLOCKED && 1826 if (cur->state < TRANS_STATE_BLOCKED &&
1827 !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
1827 (now < cur->start_time || 1828 (now < cur->start_time ||
1828 now - cur->start_time < fs_info->commit_interval)) { 1829 now - cur->start_time < fs_info->commit_interval)) {
1829 spin_unlock(&fs_info->trans_lock); 1830 spin_unlock(&fs_info->trans_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 75cfb80d2551..e2f16b68fcbf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2601 trace_run_delayed_ref_head(fs_info, head, 0); 2601 trace_run_delayed_ref_head(fs_info, head, 0);
2602 2602
2603 if (head->total_ref_mod < 0) { 2603 if (head->total_ref_mod < 0) {
2604 struct btrfs_block_group_cache *cache; 2604 struct btrfs_space_info *space_info;
2605 u64 flags;
2605 2606
2606 cache = btrfs_lookup_block_group(fs_info, head->bytenr); 2607 if (head->is_data)
2607 ASSERT(cache); 2608 flags = BTRFS_BLOCK_GROUP_DATA;
2608 percpu_counter_add(&cache->space_info->total_bytes_pinned, 2609 else if (head->is_system)
2610 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2611 else
2612 flags = BTRFS_BLOCK_GROUP_METADATA;
2613 space_info = __find_space_info(fs_info, flags);
2614 ASSERT(space_info);
2615 percpu_counter_add(&space_info->total_bytes_pinned,
2609 -head->num_bytes); 2616 -head->num_bytes);
2610 btrfs_put_block_group(cache);
2611 2617
2612 if (head->is_data) { 2618 if (head->is_data) {
2613 spin_lock(&delayed_refs->lock); 2619 spin_lock(&delayed_refs->lock);
@@ -5559,14 +5565,18 @@ again:
5559 5565
5560static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, 5566static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5561 struct btrfs_block_rsv *block_rsv, 5567 struct btrfs_block_rsv *block_rsv,
5562 struct btrfs_block_rsv *dest, u64 num_bytes) 5568 struct btrfs_block_rsv *dest, u64 num_bytes,
5569 u64 *qgroup_to_release_ret)
5563{ 5570{
5564 struct btrfs_space_info *space_info = block_rsv->space_info; 5571 struct btrfs_space_info *space_info = block_rsv->space_info;
5572 u64 qgroup_to_release = 0;
5565 u64 ret; 5573 u64 ret;
5566 5574
5567 spin_lock(&block_rsv->lock); 5575 spin_lock(&block_rsv->lock);
5568 if (num_bytes == (u64)-1) 5576 if (num_bytes == (u64)-1) {
5569 num_bytes = block_rsv->size; 5577 num_bytes = block_rsv->size;
5578 qgroup_to_release = block_rsv->qgroup_rsv_size;
5579 }
5570 block_rsv->size -= num_bytes; 5580 block_rsv->size -= num_bytes;
5571 if (block_rsv->reserved >= block_rsv->size) { 5581 if (block_rsv->reserved >= block_rsv->size) {
5572 num_bytes = block_rsv->reserved - block_rsv->size; 5582 num_bytes = block_rsv->reserved - block_rsv->size;
@@ -5575,6 +5585,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5575 } else { 5585 } else {
5576 num_bytes = 0; 5586 num_bytes = 0;
5577 } 5587 }
5588 if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
5589 qgroup_to_release = block_rsv->qgroup_rsv_reserved -
5590 block_rsv->qgroup_rsv_size;
5591 block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
5592 } else {
5593 qgroup_to_release = 0;
5594 }
5578 spin_unlock(&block_rsv->lock); 5595 spin_unlock(&block_rsv->lock);
5579 5596
5580 ret = num_bytes; 5597 ret = num_bytes;
@@ -5597,6 +5614,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5597 space_info_add_old_bytes(fs_info, space_info, 5614 space_info_add_old_bytes(fs_info, space_info,
5598 num_bytes); 5615 num_bytes);
5599 } 5616 }
5617 if (qgroup_to_release_ret)
5618 *qgroup_to_release_ret = qgroup_to_release;
5600 return ret; 5619 return ret;
5601} 5620}
5602 5621
@@ -5738,17 +5757,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
5738 struct btrfs_root *root = inode->root; 5757 struct btrfs_root *root = inode->root;
5739 struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 5758 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5740 u64 num_bytes = 0; 5759 u64 num_bytes = 0;
5760 u64 qgroup_num_bytes = 0;
5741 int ret = -ENOSPC; 5761 int ret = -ENOSPC;
5742 5762
5743 spin_lock(&block_rsv->lock); 5763 spin_lock(&block_rsv->lock);
5744 if (block_rsv->reserved < block_rsv->size) 5764 if (block_rsv->reserved < block_rsv->size)
5745 num_bytes = block_rsv->size - block_rsv->reserved; 5765 num_bytes = block_rsv->size - block_rsv->reserved;
5766 if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
5767 qgroup_num_bytes = block_rsv->qgroup_rsv_size -
5768 block_rsv->qgroup_rsv_reserved;
5746 spin_unlock(&block_rsv->lock); 5769 spin_unlock(&block_rsv->lock);
5747 5770
5748 if (num_bytes == 0) 5771 if (num_bytes == 0)
5749 return 0; 5772 return 0;
5750 5773
5751 ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); 5774 ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
5752 if (ret) 5775 if (ret)
5753 return ret; 5776 return ret;
5754 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); 5777 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
@@ -5756,7 +5779,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
5756 block_rsv_add_bytes(block_rsv, num_bytes, 0); 5779 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5757 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5780 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5758 btrfs_ino(inode), num_bytes, 1); 5781 btrfs_ino(inode), num_bytes, 1);
5759 } 5782
5783 /* Don't forget to increase qgroup_rsv_reserved */
5784 spin_lock(&block_rsv->lock);
5785 block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
5786 spin_unlock(&block_rsv->lock);
5787 } else
5788 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
5760 return ret; 5789 return ret;
5761} 5790}
5762 5791
@@ -5777,20 +5806,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
5777 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 5806 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5778 struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 5807 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5779 u64 released = 0; 5808 u64 released = 0;
5809 u64 qgroup_to_release = 0;
5780 5810
5781 /* 5811 /*
5782 * Since we statically set the block_rsv->size we just want to say we 5812 * Since we statically set the block_rsv->size we just want to say we
5783 * are releasing 0 bytes, and then we'll just get the reservation over 5813 * are releasing 0 bytes, and then we'll just get the reservation over
5784 * the size free'd. 5814 * the size free'd.
5785 */ 5815 */
5786 released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); 5816 released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
5817 &qgroup_to_release);
5787 if (released > 0) 5818 if (released > 0)
5788 trace_btrfs_space_reservation(fs_info, "delalloc", 5819 trace_btrfs_space_reservation(fs_info, "delalloc",
5789 btrfs_ino(inode), released, 0); 5820 btrfs_ino(inode), released, 0);
5790 if (qgroup_free) 5821 if (qgroup_free)
5791 btrfs_qgroup_free_meta_prealloc(inode->root, released); 5822 btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
5792 else 5823 else
5793 btrfs_qgroup_convert_reserved_meta(inode->root, released); 5824 btrfs_qgroup_convert_reserved_meta(inode->root,
5825 qgroup_to_release);
5794} 5826}
5795 5827
5796void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, 5828void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
@@ -5802,7 +5834,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5802 if (global_rsv == block_rsv || 5834 if (global_rsv == block_rsv ||
5803 block_rsv->space_info != global_rsv->space_info) 5835 block_rsv->space_info != global_rsv->space_info)
5804 global_rsv = NULL; 5836 global_rsv = NULL;
5805 block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes); 5837 block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
5806} 5838}
5807 5839
5808static void update_global_block_rsv(struct btrfs_fs_info *fs_info) 5840static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -5882,7 +5914,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5882static void release_global_block_rsv(struct btrfs_fs_info *fs_info) 5914static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5883{ 5915{
5884 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, 5916 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5885 (u64)-1); 5917 (u64)-1, NULL);
5886 WARN_ON(fs_info->trans_block_rsv.size > 0); 5918 WARN_ON(fs_info->trans_block_rsv.size > 0);
5887 WARN_ON(fs_info->trans_block_rsv.reserved > 0); 5919 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5888 WARN_ON(fs_info->chunk_block_rsv.size > 0); 5920 WARN_ON(fs_info->chunk_block_rsv.size > 0);
@@ -5906,7 +5938,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5906 WARN_ON_ONCE(!list_empty(&trans->new_bgs)); 5938 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5907 5939
5908 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, 5940 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5909 trans->chunk_bytes_reserved); 5941 trans->chunk_bytes_reserved, NULL);
5910 trans->chunk_bytes_reserved = 0; 5942 trans->chunk_bytes_reserved = 0;
5911} 5943}
5912 5944
@@ -6011,6 +6043,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
6011{ 6043{
6012 struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 6044 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
6013 u64 reserve_size = 0; 6045 u64 reserve_size = 0;
6046 u64 qgroup_rsv_size = 0;
6014 u64 csum_leaves; 6047 u64 csum_leaves;
6015 unsigned outstanding_extents; 6048 unsigned outstanding_extents;
6016 6049
@@ -6023,9 +6056,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
6023 inode->csum_bytes); 6056 inode->csum_bytes);
6024 reserve_size += btrfs_calc_trans_metadata_size(fs_info, 6057 reserve_size += btrfs_calc_trans_metadata_size(fs_info,
6025 csum_leaves); 6058 csum_leaves);
6059 /*
6060 * For qgroup rsv, the calculation is very simple:
6061 * account one nodesize for each outstanding extent
6062 *
6063 * This is overestimating in most cases.
6064 */
6065 qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
6026 6066
6027 spin_lock(&block_rsv->lock); 6067 spin_lock(&block_rsv->lock);
6028 block_rsv->size = reserve_size; 6068 block_rsv->size = reserve_size;
6069 block_rsv->qgroup_rsv_size = qgroup_rsv_size;
6029 spin_unlock(&block_rsv->lock); 6070 spin_unlock(&block_rsv->lock);
6030} 6071}
6031 6072
@@ -8403,7 +8444,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8403 struct btrfs_block_rsv *block_rsv, u32 blocksize) 8444 struct btrfs_block_rsv *block_rsv, u32 blocksize)
8404{ 8445{
8405 block_rsv_add_bytes(block_rsv, blocksize, 0); 8446 block_rsv_add_bytes(block_rsv, blocksize, 0);
8406 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0); 8447 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
8407} 8448}
8408 8449
8409/* 8450/*
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0167a9c97c9c..f660ba1e5e58 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1748,7 +1748,7 @@ again:
1748 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1748 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1749 lockstart, lockend, &cached_state); 1749 lockstart, lockend, &cached_state);
1750 btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, 1750 btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
1751 (ret != 0)); 1751 true);
1752 if (ret) { 1752 if (ret) {
1753 btrfs_drop_pages(pages, num_pages); 1753 btrfs_drop_pages(pages, num_pages);
1754 break; 1754 break;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e064c49c9a9a..d241285a0d2a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/uio.h> 31#include <linux/uio.h>
32#include <linux/magic.h> 32#include <linux/magic.h>
33#include <linux/iversion.h> 33#include <linux/iversion.h>
34#include <asm/unaligned.h>
34#include "ctree.h" 35#include "ctree.h"
35#include "disk-io.h" 36#include "disk-io.h"
36#include "transaction.h" 37#include "transaction.h"
@@ -5905,11 +5906,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5905 struct dir_entry *entry = addr; 5906 struct dir_entry *entry = addr;
5906 char *name = (char *)(entry + 1); 5907 char *name = (char *)(entry + 1);
5907 5908
5908 ctx->pos = entry->offset; 5909 ctx->pos = get_unaligned(&entry->offset);
5909 if (!dir_emit(ctx, name, entry->name_len, entry->ino, 5910 if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
5910 entry->type)) 5911 get_unaligned(&entry->ino),
5912 get_unaligned(&entry->type)))
5911 return 1; 5913 return 1;
5912 addr += sizeof(struct dir_entry) + entry->name_len; 5914 addr += sizeof(struct dir_entry) +
5915 get_unaligned(&entry->name_len);
5913 ctx->pos++; 5916 ctx->pos++;
5914 } 5917 }
5915 return 0; 5918 return 0;
@@ -5999,14 +6002,15 @@ again:
5999 } 6002 }
6000 6003
6001 entry = addr; 6004 entry = addr;
6002 entry->name_len = name_len; 6005 put_unaligned(name_len, &entry->name_len);
6003 name_ptr = (char *)(entry + 1); 6006 name_ptr = (char *)(entry + 1);
6004 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), 6007 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
6005 name_len); 6008 name_len);
6006 entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; 6009 put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
6010 &entry->type);
6007 btrfs_dir_item_key_to_cpu(leaf, di, &location); 6011 btrfs_dir_item_key_to_cpu(leaf, di, &location);
6008 entry->ino = location.objectid; 6012 put_unaligned(location.objectid, &entry->ino);
6009 entry->offset = found_key.offset; 6013 put_unaligned(found_key.offset, &entry->offset);
6010 entries++; 6014 entries++;
6011 addr += sizeof(struct dir_entry) + name_len; 6015 addr += sizeof(struct dir_entry) + name_len;
6012 total_len += sizeof(struct dir_entry) + name_len; 6016 total_len += sizeof(struct dir_entry) + name_len;
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 124276bba8cf..21a831d3d087 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l)
189 fs_info = l->fs_info; 189 fs_info = l->fs_info;
190 nr = btrfs_header_nritems(l); 190 nr = btrfs_header_nritems(l);
191 191
192 btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d", 192 btrfs_info(fs_info,
193 btrfs_header_bytenr(l), nr, 193 "leaf %llu gen %llu total ptrs %d free space %d owner %llu",
194 btrfs_leaf_free_space(fs_info, l)); 194 btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
195 btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
195 for (i = 0 ; i < nr ; i++) { 196 for (i = 0 ; i < nr ; i++) {
196 item = btrfs_item_nr(i); 197 item = btrfs_item_nr(i);
197 btrfs_item_key_to_cpu(l, &key, i); 198 btrfs_item_key_to_cpu(l, &key, i);
@@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
325 } 326 }
326} 327}
327 328
328void btrfs_print_tree(struct extent_buffer *c) 329void btrfs_print_tree(struct extent_buffer *c, bool follow)
329{ 330{
330 struct btrfs_fs_info *fs_info; 331 struct btrfs_fs_info *fs_info;
331 int i; u32 nr; 332 int i; u32 nr;
@@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c)
342 return; 343 return;
343 } 344 }
344 btrfs_info(fs_info, 345 btrfs_info(fs_info,
345 "node %llu level %d total ptrs %d free spc %u", 346 "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu",
346 btrfs_header_bytenr(c), level, nr, 347 btrfs_header_bytenr(c), level, btrfs_header_generation(c),
347 (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr); 348 nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr,
349 btrfs_header_owner(c));
348 for (i = 0; i < nr; i++) { 350 for (i = 0; i < nr; i++) {
349 btrfs_node_key_to_cpu(c, &key, i); 351 btrfs_node_key_to_cpu(c, &key, i);
350 pr_info("\tkey %d (%llu %u %llu) block %llu\n", 352 pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n",
351 i, key.objectid, key.type, key.offset, 353 i, key.objectid, key.type, key.offset,
352 btrfs_node_blockptr(c, i)); 354 btrfs_node_blockptr(c, i),
355 btrfs_node_ptr_generation(c, i));
353 } 356 }
357 if (!follow)
358 return;
354 for (i = 0; i < nr; i++) { 359 for (i = 0; i < nr; i++) {
355 struct btrfs_key first_key; 360 struct btrfs_key first_key;
356 struct extent_buffer *next; 361 struct extent_buffer *next;
@@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c)
372 if (btrfs_header_level(next) != 377 if (btrfs_header_level(next) !=
373 level - 1) 378 level - 1)
374 BUG(); 379 BUG();
375 btrfs_print_tree(next); 380 btrfs_print_tree(next, follow);
376 free_extent_buffer(next); 381 free_extent_buffer(next);
377 } 382 }
378} 383}
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index 4a98481688f4..e6bb38fd75ad 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -7,6 +7,6 @@
7#define BTRFS_PRINT_TREE_H 7#define BTRFS_PRINT_TREE_H
8 8
9void btrfs_print_leaf(struct extent_buffer *l); 9void btrfs_print_leaf(struct extent_buffer *l);
10void btrfs_print_tree(struct extent_buffer *c); 10void btrfs_print_tree(struct extent_buffer *c, bool follow);
11 11
12#endif 12#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 09c7e4fd550f..9fb758d5077a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -11,6 +11,7 @@
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/workqueue.h> 12#include <linux/workqueue.h>
13#include <linux/btrfs.h> 13#include <linux/btrfs.h>
14#include <linux/sizes.h>
14 15
15#include "ctree.h" 16#include "ctree.h"
16#include "transaction.h" 17#include "transaction.h"
@@ -2375,8 +2376,21 @@ out:
2375 return ret; 2376 return ret;
2376} 2377}
2377 2378
2378static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2379/*
2380 * Two limits to commit transaction in advance.
2381 *
2382 * For RATIO, it will be 1/RATIO of the remaining limit
2383 * (excluding data and prealloc meta) as threshold.
2384 * For SIZE, it will be in byte unit as threshold.
2385 */
2386#define QGROUP_PERTRANS_RATIO 32
2387#define QGROUP_PERTRANS_SIZE SZ_32M
2388static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
2389 const struct btrfs_qgroup *qg, u64 num_bytes)
2379{ 2390{
2391 u64 limit;
2392 u64 threshold;
2393
2380 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2394 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2381 qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) 2395 qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
2382 return false; 2396 return false;
@@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
2385 qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) 2399 qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
2386 return false; 2400 return false;
2387 2401
2402 /*
2403 * Even if we passed the check, it's better to check if reservation
2404 * for meta_pertrans is pushing us near limit.
2405 * If there is too much pertrans reservation or it's near the limit,
2406 * let's try commit transaction to free some, using transaction_kthread
2407 */
2408 if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER |
2409 BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
2410 if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
2411 limit = qg->max_excl;
2412 else
2413 limit = qg->max_rfer;
2414 threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] -
2415 qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) /
2416 QGROUP_PERTRANS_RATIO;
2417 threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE);
2418
2419 /*
2420 * Use transaction_kthread to commit transaction, so we no
2421 * longer need to bother nested transaction nor lock context.
2422 */
2423 if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold)
2424 btrfs_commit_transaction_locksafe(fs_info);
2425 }
2426
2388 return true; 2427 return true;
2389} 2428}
2390 2429
@@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
2434 2473
2435 qg = unode_aux_to_qgroup(unode); 2474 qg = unode_aux_to_qgroup(unode);
2436 2475
2437 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2476 if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) {
2438 ret = -EDQUOT; 2477 ret = -EDQUOT;
2439 goto out; 2478 goto out;
2440 } 2479 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 63fdcab64b01..c944b4769e3c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
2267 */ 2267 */
2268 cur_trans->state = TRANS_STATE_COMPLETED; 2268 cur_trans->state = TRANS_STATE_COMPLETED;
2269 wake_up(&cur_trans->commit_wait); 2269 wake_up(&cur_trans->commit_wait);
2270 clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
2270 2271
2271 spin_lock(&fs_info->trans_lock); 2272 spin_lock(&fs_info->trans_lock);
2272 list_del_init(&cur_trans->list); 2273 list_del_init(&cur_trans->list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index c88fccd80bc5..d8c0826bc2c7 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
199int btrfs_commit_transaction(struct btrfs_trans_handle *trans); 199int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
200int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, 200int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
201 int wait_for_unblock); 201 int wait_for_unblock);
202
203/*
204 * Try to commit transaction asynchronously, so this is safe to call
205 * even holding a spinlock.
206 *
207 * It's done by informing transaction_kthread to commit transaction without
208 * waiting for commit interval.
209 */
210static inline void btrfs_commit_transaction_locksafe(
211 struct btrfs_fs_info *fs_info)
212{
213 set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
214 wake_up_process(fs_info->transaction_kthread);
215}
202int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); 216int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
203int btrfs_should_end_transaction(struct btrfs_trans_handle *trans); 217int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
204void btrfs_throttle(struct btrfs_fs_info *fs_info); 218void btrfs_throttle(struct btrfs_fs_info *fs_info);