aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2015-02-11 15:08:59 -0500
committerChris Mason <clm@fb.com>2015-02-14 11:22:48 -0500
commitdcab6a3b2ae657a2017637083c28ee303b6b1b8e (patch)
tree4aa9951e7ab997702f90aec9882f305e1885fc40 /fs/btrfs
parent3266789f9d08b27275bae5ab1dcd27d1bbf15e79 (diff)
Btrfs: account for large extents with enospc
On our gluster boxes we stream large tar balls of backups onto our fses. With 160gb of ram this means we get really large contiguous ranges of dirty data, but the way our ENOSPC stuff works is that as long as it's contiguous we only hold metadata reservation for one extent. The problem is we limit our extents to 128mb, so we'll end up with at least 800 extents so our enospc accounting is quite a bit lower than what we need. To keep track of this make sure we increase outstanding_extents for every multiple of the max extent size so we can be sure to have enough reserved metadata space. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c16
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/inode.c63
4 files changed, 76 insertions, 7 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d3562dd43c66..b3dd55f52f71 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
198 198
199#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) 199#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
200 200
201#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
202
201/* 203/*
202 * The key defines the order in the tree, and so it also defines (optimal) 204 * The key defines the order in the tree, and so it also defines (optimal)
203 * block layout. 205 * block layout.
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 50de1fa6fc9e..0f6737063142 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4963,19 +4963,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
4963/** 4963/**
4964 * drop_outstanding_extent - drop an outstanding extent 4964 * drop_outstanding_extent - drop an outstanding extent
4965 * @inode: the inode we're dropping the extent for 4965 * @inode: the inode we're dropping the extent for
4966 * @num_bytes: the number of bytes we're relaseing.
4966 * 4967 *
4967 * This is called when we are freeing up an outstanding extent, either called 4968 * This is called when we are freeing up an outstanding extent, either called
4968 * after an error or after an extent is written. This will return the number of 4969 * after an error or after an extent is written. This will return the number of
4969 * reserved extents that need to be freed. This must be called with 4970 * reserved extents that need to be freed. This must be called with
4970 * BTRFS_I(inode)->lock held. 4971 * BTRFS_I(inode)->lock held.
4971 */ 4972 */
4972static unsigned drop_outstanding_extent(struct inode *inode) 4973static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
4973{ 4974{
4974 unsigned drop_inode_space = 0; 4975 unsigned drop_inode_space = 0;
4975 unsigned dropped_extents = 0; 4976 unsigned dropped_extents = 0;
4977 unsigned num_extents = 0;
4976 4978
4977 BUG_ON(!BTRFS_I(inode)->outstanding_extents); 4979 num_extents = (unsigned)div64_u64(num_bytes +
4978 BTRFS_I(inode)->outstanding_extents--; 4980 BTRFS_MAX_EXTENT_SIZE - 1,
4981 BTRFS_MAX_EXTENT_SIZE);
4982 ASSERT(num_extents);
4983 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
4984 BTRFS_I(inode)->outstanding_extents -= num_extents;
4979 4985
4980 if (BTRFS_I(inode)->outstanding_extents == 0 && 4986 if (BTRFS_I(inode)->outstanding_extents == 0 &&
4981 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, 4987 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
@@ -5146,7 +5152,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5146 5152
5147out_fail: 5153out_fail:
5148 spin_lock(&BTRFS_I(inode)->lock); 5154 spin_lock(&BTRFS_I(inode)->lock);
5149 dropped = drop_outstanding_extent(inode); 5155 dropped = drop_outstanding_extent(inode, num_bytes);
5150 /* 5156 /*
5151 * If the inodes csum_bytes is the same as the original 5157 * If the inodes csum_bytes is the same as the original
5152 * csum_bytes then we know we haven't raced with any free()ers 5158 * csum_bytes then we know we haven't raced with any free()ers
@@ -5225,7 +5231,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5225 5231
5226 num_bytes = ALIGN(num_bytes, root->sectorsize); 5232 num_bytes = ALIGN(num_bytes, root->sectorsize);
5227 spin_lock(&BTRFS_I(inode)->lock); 5233 spin_lock(&BTRFS_I(inode)->lock);
5228 dropped = drop_outstanding_extent(inode); 5234 dropped = drop_outstanding_extent(inode, num_bytes);
5229 5235
5230 if (num_bytes) 5236 if (num_bytes)
5231 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 5237 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a7f66009519a..29850d4a3827 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3242,7 +3242,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
3242 page, 3242 page,
3243 &delalloc_start, 3243 &delalloc_start,
3244 &delalloc_end, 3244 &delalloc_end,
3245 128 * 1024 * 1024); 3245 BTRFS_MAX_EXTENT_SIZE);
3246 if (nr_delalloc == 0) { 3246 if (nr_delalloc == 0) {
3247 delalloc_start = delalloc_end + 1; 3247 delalloc_start = delalloc_end + 1;
3248 continue; 3248 continue;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3b957921ba59..8564d8ce03de 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1530,10 +1530,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1530static void btrfs_split_extent_hook(struct inode *inode, 1530static void btrfs_split_extent_hook(struct inode *inode,
1531 struct extent_state *orig, u64 split) 1531 struct extent_state *orig, u64 split)
1532{ 1532{
1533 u64 size;
1534
1533 /* not delalloc, ignore it */ 1535 /* not delalloc, ignore it */
1534 if (!(orig->state & EXTENT_DELALLOC)) 1536 if (!(orig->state & EXTENT_DELALLOC))
1535 return; 1537 return;
1536 1538
1539 size = orig->end - orig->start + 1;
1540 if (size > BTRFS_MAX_EXTENT_SIZE) {
1541 u64 num_extents;
1542 u64 new_size;
1543
1544 /*
1545 * We need the largest size of the remaining extent to see if we
1546 * need to add a new outstanding extent. Think of the following
1547 * case
1548 *
1549 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
1550 *
1551 * The new_size would just be 4k and we'd think we had enough
1552 * outstanding extents for this if we only took one side of the
1553 * split, same goes for the other direction. We need to see if
1554 * the larger size still is the same amount of extents as the
1555 * original size, because if it is we need to add a new
1556 * outstanding extent. But if we split up and the larger size
1557 * is less than the original then we are good to go since we've
1558 * already accounted for the extra extent in our original
1559 * accounting.
1560 */
1561 new_size = orig->end - split + 1;
1562 if ((split - orig->start) > new_size)
1563 new_size = split - orig->start;
1564
1565 num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1566 BTRFS_MAX_EXTENT_SIZE);
1567 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1568 BTRFS_MAX_EXTENT_SIZE) < num_extents)
1569 return;
1570 }
1571
1537 spin_lock(&BTRFS_I(inode)->lock); 1572 spin_lock(&BTRFS_I(inode)->lock);
1538 BTRFS_I(inode)->outstanding_extents++; 1573 BTRFS_I(inode)->outstanding_extents++;
1539 spin_unlock(&BTRFS_I(inode)->lock); 1574 spin_unlock(&BTRFS_I(inode)->lock);
@@ -1549,10 +1584,34 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1549 struct extent_state *new, 1584 struct extent_state *new,
1550 struct extent_state *other) 1585 struct extent_state *other)
1551{ 1586{
1587 u64 new_size, old_size;
1588 u64 num_extents;
1589
1552 /* not delalloc, ignore it */ 1590 /* not delalloc, ignore it */
1553 if (!(other->state & EXTENT_DELALLOC)) 1591 if (!(other->state & EXTENT_DELALLOC))
1554 return; 1592 return;
1555 1593
1594 old_size = other->end - other->start + 1;
1595 new_size = old_size + (new->end - new->start + 1);
1596
1597 /* we're not bigger than the max, unreserve the space and go */
1598 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1599 spin_lock(&BTRFS_I(inode)->lock);
1600 BTRFS_I(inode)->outstanding_extents--;
1601 spin_unlock(&BTRFS_I(inode)->lock);
1602 return;
1603 }
1604
1605 /*
1606 * If we grew by another max_extent, just return, we want to keep that
1607 * reserved amount.
1608 */
1609 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1610 BTRFS_MAX_EXTENT_SIZE);
1611 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1612 BTRFS_MAX_EXTENT_SIZE) > num_extents)
1613 return;
1614
1556 spin_lock(&BTRFS_I(inode)->lock); 1615 spin_lock(&BTRFS_I(inode)->lock);
1557 BTRFS_I(inode)->outstanding_extents--; 1616 BTRFS_I(inode)->outstanding_extents--;
1558 spin_unlock(&BTRFS_I(inode)->lock); 1617 spin_unlock(&BTRFS_I(inode)->lock);
@@ -1648,6 +1707,8 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1648 unsigned *bits) 1707 unsigned *bits)
1649{ 1708{
1650 u64 len = state->end + 1 - state->start; 1709 u64 len = state->end + 1 - state->start;
1710 u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
1711 BTRFS_MAX_EXTENT_SIZE);
1651 1712
1652 spin_lock(&BTRFS_I(inode)->lock); 1713 spin_lock(&BTRFS_I(inode)->lock);
1653 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) 1714 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1667,7 +1728,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1667 *bits &= ~EXTENT_FIRST_DELALLOC; 1728 *bits &= ~EXTENT_FIRST_DELALLOC;
1668 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { 1729 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1669 spin_lock(&BTRFS_I(inode)->lock); 1730 spin_lock(&BTRFS_I(inode)->lock);
1670 BTRFS_I(inode)->outstanding_extents--; 1731 BTRFS_I(inode)->outstanding_extents -= num_extents;
1671 spin_unlock(&BTRFS_I(inode)->lock); 1732 spin_unlock(&BTRFS_I(inode)->lock);
1672 } 1733 }
1673 1734