aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-10-09 10:56:23 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-10-09 10:56:23 -0400
commita30d542a0035b886ffaafd0057ced0a2b28c3a4f (patch)
tree9e677eaf4eb1611701828554b27a1b0b0ae523fd
parentc4a0c46ec92c194c873232b88debce4e1a448483 (diff)
ext4: Make sure all the block allocation paths reserve blocks
With delayed allocation we need to make sure block are reserved before we attempt to allocate them. Otherwise we get block allocation failure (ENOSPC) during writepages which cannot be handled. This would mean silent data loss (We do a printk stating data will be lost). This patch updates the DIO and fallocate code path to do block reservation before block allocation. This is needed to make sure parallel DIO and fallocate request doesn't take block out of delayed reserve space. When free blocks count go below a threshold we switch to a slow patch which looks at other CPU's accumulated percpu counter values. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/balloc.c58
-rw-r--r--fs/ext4/ext4.h13
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/mballoc.c23
4 files changed, 69 insertions, 30 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 58005c01abb8..1707850301d6 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1602,6 +1602,32 @@ out:
1602 return ret; 1602 return ret;
1603} 1603}
1604 1604
1605int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
1606 ext4_fsblk_t nblocks)
1607{
1608 s64 free_blocks;
1609 ext4_fsblk_t root_blocks = 0;
1610 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
1611
1612 free_blocks = percpu_counter_read(fbc);
1613
1614 if (!capable(CAP_SYS_RESOURCE) &&
1615 sbi->s_resuid != current->fsuid &&
1616 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1617 root_blocks = ext4_r_blocks_count(sbi->s_es);
1618
1619 if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
1620 free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
1621
1622 if (free_blocks < (root_blocks + nblocks))
1623 /* we don't have free space */
1624 return -ENOSPC;
1625
1626 /* reduce fs free blocks counter */
1627 percpu_counter_sub(fbc, nblocks);
1628 return 0;
1629}
1630
1605/** 1631/**
1606 * ext4_has_free_blocks() 1632 * ext4_has_free_blocks()
1607 * @sbi: in-core super block structure. 1633 * @sbi: in-core super block structure.
@@ -1623,18 +1649,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
1623 sbi->s_resuid != current->fsuid && 1649 sbi->s_resuid != current->fsuid &&
1624 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) 1650 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1625 root_blocks = ext4_r_blocks_count(sbi->s_es); 1651 root_blocks = ext4_r_blocks_count(sbi->s_es);
1626#ifdef CONFIG_SMP 1652
1627 if (free_blocks - root_blocks < FBC_BATCH) 1653 if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
1628 free_blocks = 1654 free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
1629 percpu_counter_sum(&sbi->s_freeblocks_counter); 1655
1630#endif
1631 if (free_blocks <= root_blocks) 1656 if (free_blocks <= root_blocks)
1632 /* we don't have free space */ 1657 /* we don't have free space */
1633 return 0; 1658 return 0;
1634 if (free_blocks - root_blocks < nblocks) 1659 if (free_blocks - root_blocks < nblocks)
1635 return free_blocks - root_blocks; 1660 return free_blocks - root_blocks;
1636 return nblocks; 1661 return nblocks;
1637 } 1662}
1638 1663
1639 1664
1640/** 1665/**
@@ -1713,14 +1738,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
1713 /* 1738 /*
1714 * With delalloc we already reserved the blocks 1739 * With delalloc we already reserved the blocks
1715 */ 1740 */
1716 *count = ext4_has_free_blocks(sbi, *count); 1741 if (ext4_claim_free_blocks(sbi, *count)) {
1717 } 1742 *errp = -ENOSPC;
1718 if (*count == 0) { 1743 return 0; /*return with ENOSPC error */
1719 *errp = -ENOSPC; 1744 }
1720 return 0; /*return with ENOSPC error */
1721 } 1745 }
1722 num = *count;
1723
1724 /* 1746 /*
1725 * Check quota for allocation of this block. 1747 * Check quota for allocation of this block.
1726 */ 1748 */
@@ -1915,9 +1937,13 @@ allocated:
1915 le16_add_cpu(&gdp->bg_free_blocks_count, -num); 1937 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1916 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1938 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1917 spin_unlock(sb_bgl_lock(sbi, group_no)); 1939 spin_unlock(sb_bgl_lock(sbi, group_no));
1918 if (!EXT4_I(inode)->i_delalloc_reserved_flag) 1940 if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
1919 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1941 /*
1920 1942 * we allocated less blocks than we
1943 * claimed. Add the difference back.
1944 */
1945 percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
1946 }
1921 if (sbi->s_log_groups_per_flex) { 1947 if (sbi->s_log_groups_per_flex) {
1922 ext4_group_t flex_group = ext4_flex_group(sbi, group_no); 1948 ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
1923 spin_lock(sb_bgl_lock(sbi, flex_group)); 1949 spin_lock(sb_bgl_lock(sbi, flex_group));
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8c701318844d..0154c2d0b242 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -983,6 +983,8 @@ extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
983 unsigned long *count, int *errp); 983 unsigned long *count, int *errp);
984extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, 984extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
985 ext4_fsblk_t goal, unsigned long *count, int *errp); 985 ext4_fsblk_t goal, unsigned long *count, int *errp);
986extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
987 ext4_fsblk_t nblocks);
986extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 988extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
987 ext4_fsblk_t nblocks); 989 ext4_fsblk_t nblocks);
988extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 990extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
@@ -1207,6 +1209,17 @@ do { \
1207 __ext4_std_error((sb), __func__, (errno)); \ 1209 __ext4_std_error((sb), __func__, (errno)); \
1208} while (0) 1210} while (0)
1209 1211
1212#ifdef CONFIG_SMP
1213/* Each CPU can accumulate FBC_BATCH blocks in their local
1214 * counters. So we need to make sure we have free blocks more
1215 * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
1216 */
1217#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
1218#else
1219#define EXT4_FREEBLOCKS_WATERMARK 0
1220#endif
1221
1222
1210/* 1223/*
1211 * Inodes and files operations 1224 * Inodes and files operations
1212 */ 1225 */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b6fa0c4087e9..b778d5a33ea7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1537,13 +1537,10 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1537 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; 1537 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1538 total = md_needed + nrblocks; 1538 total = md_needed + nrblocks;
1539 1539
1540 if (ext4_has_free_blocks(sbi, total) < total) { 1540 if (ext4_claim_free_blocks(sbi, total)) {
1541 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1541 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1542 return -ENOSPC; 1542 return -ENOSPC;
1543 } 1543 }
1544 /* reduce fs free blocks counter */
1545 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1546
1547 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1544 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
1548 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; 1545 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
1549 1546
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0db2ccfa0dad..2c10b5058a8d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2975,9 +2975,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2975 * at write_begin() time for delayed allocation 2975 * at write_begin() time for delayed allocation
2976 * do not double accounting 2976 * do not double accounting
2977 */ 2977 */
2978 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2978 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
2979 percpu_counter_sub(&sbi->s_freeblocks_counter, 2979 ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
2980 ac->ac_b_ex.fe_len); 2980 /*
2981 * we allocated less blocks than we calimed
2982 * Add the difference back
2983 */
2984 percpu_counter_add(&sbi->s_freeblocks_counter,
2985 ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
2986 }
2981 2987
2982 if (sbi->s_log_groups_per_flex) { 2988 if (sbi->s_log_groups_per_flex) {
2983 ext4_group_t flex_group = ext4_flex_group(sbi, 2989 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4389,14 +4395,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4389 /* 4395 /*
4390 * With delalloc we already reserved the blocks 4396 * With delalloc we already reserved the blocks
4391 */ 4397 */
4392 ar->len = ext4_has_free_blocks(sbi, ar->len); 4398 if (ext4_claim_free_blocks(sbi, ar->len)) {
4393 } 4399 *errp = -ENOSPC;
4394 4400 return 0;
4395 if (ar->len == 0) { 4401 }
4396 *errp = -ENOSPC;
4397 return 0;
4398 } 4402 }
4399
4400 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { 4403 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
4401 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4404 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4402 ar->len--; 4405 ar->len--;