diff options
author | Mingming Cao <cmm@us.ibm.com> | 2009-01-22 12:13:05 -0500 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2009-03-25 21:18:34 -0400 |
commit | 60e58e0f30e723464c2a7d34b71b8675566c572d (patch) | |
tree | 2797ae1bccdad12d53c989a6dccea1c8ffd2bce3 /fs | |
parent | 643d00ccc311664188c8209bf8b596a30e139c3a (diff) |
ext4: quota reservation for delayed allocation
Uses quota reservation/claim/release to handle quota properly for delayed
allocation in the three steps: 1) quotas are reserved when data being copied
to cache when block allocation is defered 2) when new blocks are allocated.
reserved quotas are converted to the real allocated quota, 2) over-booked
quotas for metadata blocks are released back.
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 36 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 44 | ||||
-rw-r--r-- | fs/ext4/super.c | 4 |
4 files changed, 65 insertions, 21 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b0c87dce66a3..6083bb38057b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
21 | #include <linux/magic.h> | 21 | #include <linux/magic.h> |
22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
23 | #include <linux/quota.h> | ||
23 | #include "ext4_i.h" | 24 | #include "ext4_i.h" |
24 | 25 | ||
25 | /* | 26 | /* |
@@ -1098,6 +1099,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | |||
1098 | extern int ext4_block_truncate_page(handle_t *handle, | 1099 | extern int ext4_block_truncate_page(handle_t *handle, |
1099 | struct address_space *mapping, loff_t from); | 1100 | struct address_space *mapping, loff_t from); |
1100 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 1101 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); |
1102 | extern qsize_t ext4_get_reserved_space(struct inode *inode); | ||
1101 | 1103 | ||
1102 | /* ioctl.c */ | 1104 | /* ioctl.c */ |
1103 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1105 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c7fed5b18745..8290cfbd9fa7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -975,6 +975,17 @@ out: | |||
975 | return err; | 975 | return err; |
976 | } | 976 | } |
977 | 977 | ||
978 | qsize_t ext4_get_reserved_space(struct inode *inode) | ||
979 | { | ||
980 | unsigned long long total; | ||
981 | |||
982 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
983 | total = EXT4_I(inode)->i_reserved_data_blocks + | ||
984 | EXT4_I(inode)->i_reserved_meta_blocks; | ||
985 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
986 | |||
987 | return total; | ||
988 | } | ||
978 | /* | 989 | /* |
979 | * Calculate the number of metadata blocks need to reserve | 990 | * Calculate the number of metadata blocks need to reserve |
980 | * to allocate @blocks for non extent file based file | 991 | * to allocate @blocks for non extent file based file |
@@ -1036,8 +1047,14 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1036 | /* update per-inode reservations */ | 1047 | /* update per-inode reservations */ |
1037 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | 1048 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); |
1038 | EXT4_I(inode)->i_reserved_data_blocks -= used; | 1049 | EXT4_I(inode)->i_reserved_data_blocks -= used; |
1039 | |||
1040 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1050 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1051 | |||
1052 | /* | ||
1053 | * free those over-booking quota for metadata blocks | ||
1054 | */ | ||
1055 | |||
1056 | if (mdb_free) | ||
1057 | vfs_dq_release_reservation_block(inode, mdb_free); | ||
1041 | } | 1058 | } |
1042 | 1059 | ||
1043 | /* | 1060 | /* |
@@ -1553,8 +1570,8 @@ static int ext4_journalled_write_end(struct file *file, | |||
1553 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1570 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) |
1554 | { | 1571 | { |
1555 | int retries = 0; | 1572 | int retries = 0; |
1556 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1573 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1557 | unsigned long md_needed, mdblocks, total = 0; | 1574 | unsigned long md_needed, mdblocks, total = 0; |
1558 | 1575 | ||
1559 | /* | 1576 | /* |
1560 | * recalculate the amount of metadata blocks to reserve | 1577 | * recalculate the amount of metadata blocks to reserve |
@@ -1570,12 +1587,23 @@ repeat: | |||
1570 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | 1587 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; |
1571 | total = md_needed + nrblocks; | 1588 | total = md_needed + nrblocks; |
1572 | 1589 | ||
1590 | /* | ||
1591 | * Make quota reservation here to prevent quota overflow | ||
1592 | * later. Real quota accounting is done at pages writeout | ||
1593 | * time. | ||
1594 | */ | ||
1595 | if (vfs_dq_reserve_block(inode, total)) { | ||
1596 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1597 | return -EDQUOT; | ||
1598 | } | ||
1599 | |||
1573 | if (ext4_claim_free_blocks(sbi, total)) { | 1600 | if (ext4_claim_free_blocks(sbi, total)) { |
1574 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1601 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1575 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1602 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1576 | yield(); | 1603 | yield(); |
1577 | goto repeat; | 1604 | goto repeat; |
1578 | } | 1605 | } |
1606 | vfs_dq_release_reservation_block(inode, total); | ||
1579 | return -ENOSPC; | 1607 | return -ENOSPC; |
1580 | } | 1608 | } |
1581 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1609 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; |
@@ -1629,6 +1657,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1629 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1657 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); |
1630 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | 1658 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; |
1631 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1659 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1660 | |||
1661 | vfs_dq_release_reservation_block(inode, release); | ||
1632 | } | 1662 | } |
1633 | 1663 | ||
1634 | static void ext4_da_page_release_reservation(struct page *page, | 1664 | static void ext4_da_page_release_reservation(struct page *page, |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9f61e62f435f..4de42090c41f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3086,9 +3086,12 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
3086 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 3086 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
3087 | /* release all the reserved blocks if non delalloc */ | 3087 | /* release all the reserved blocks if non delalloc */ |
3088 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); | 3088 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
3089 | else | 3089 | else { |
3090 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 3090 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
3091 | ac->ac_b_ex.fe_len); | 3091 | ac->ac_b_ex.fe_len); |
3092 | /* convert reserved quota blocks to real quota blocks */ | ||
3093 | vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len); | ||
3094 | } | ||
3092 | 3095 | ||
3093 | if (sbi->s_log_groups_per_flex) { | 3096 | if (sbi->s_log_groups_per_flex) { |
3094 | ext4_group_t flex_group = ext4_flex_group(sbi, | 3097 | ext4_group_t flex_group = ext4_flex_group(sbi, |
@@ -4544,7 +4547,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4544 | struct ext4_sb_info *sbi; | 4547 | struct ext4_sb_info *sbi; |
4545 | struct super_block *sb; | 4548 | struct super_block *sb; |
4546 | ext4_fsblk_t block = 0; | 4549 | ext4_fsblk_t block = 0; |
4547 | unsigned int inquota; | 4550 | unsigned int inquota = 0; |
4548 | unsigned int reserv_blks = 0; | 4551 | unsigned int reserv_blks = 0; |
4549 | 4552 | ||
4550 | sb = ar->inode->i_sb; | 4553 | sb = ar->inode->i_sb; |
@@ -4562,9 +4565,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4562 | (unsigned long long) ar->pleft, | 4565 | (unsigned long long) ar->pleft, |
4563 | (unsigned long long) ar->pright); | 4566 | (unsigned long long) ar->pright); |
4564 | 4567 | ||
4565 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | 4568 | /* |
4566 | /* | 4569 | * For delayed allocation, we could skip the ENOSPC and |
4567 | * With delalloc we already reserved the blocks | 4570 | * EDQUOT check, as blocks and quotas have been already |
4571 | * reserved when data being copied into pagecache. | ||
4572 | */ | ||
4573 | if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) | ||
4574 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; | ||
4575 | else { | ||
4576 | /* Without delayed allocation we need to verify | ||
4577 | * there is enough free blocks to do block allocation | ||
4578 | * and verify allocation doesn't exceed the quota limits. | ||
4568 | */ | 4579 | */ |
4569 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { | 4580 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { |
4570 | /* let others to free the space */ | 4581 | /* let others to free the space */ |
@@ -4576,19 +4587,16 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4576 | return 0; | 4587 | return 0; |
4577 | } | 4588 | } |
4578 | reserv_blks = ar->len; | 4589 | reserv_blks = ar->len; |
4590 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | ||
4591 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4592 | ar->len--; | ||
4593 | } | ||
4594 | inquota = ar->len; | ||
4595 | if (ar->len == 0) { | ||
4596 | *errp = -EDQUOT; | ||
4597 | goto out3; | ||
4598 | } | ||
4579 | } | 4599 | } |
4580 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | ||
4581 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4582 | ar->len--; | ||
4583 | } | ||
4584 | if (ar->len == 0) { | ||
4585 | *errp = -EDQUOT; | ||
4586 | goto out3; | ||
4587 | } | ||
4588 | inquota = ar->len; | ||
4589 | |||
4590 | if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) | ||
4591 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; | ||
4592 | 4600 | ||
4593 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4601 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
4594 | if (!ac) { | 4602 | if (!ac) { |
@@ -4654,7 +4662,7 @@ repeat: | |||
4654 | out2: | 4662 | out2: |
4655 | kmem_cache_free(ext4_ac_cachep, ac); | 4663 | kmem_cache_free(ext4_ac_cachep, ac); |
4656 | out1: | 4664 | out1: |
4657 | if (ar->len < inquota) | 4665 | if (inquota && ar->len < inquota) |
4658 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); | 4666 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); |
4659 | out3: | 4667 | out3: |
4660 | if (!ar->len) { | 4668 | if (!ar->len) { |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 41f879497f91..5a238e9c71ce 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -943,6 +943,10 @@ static struct dquot_operations ext4_quota_operations = { | |||
943 | .initialize = dquot_initialize, | 943 | .initialize = dquot_initialize, |
944 | .drop = dquot_drop, | 944 | .drop = dquot_drop, |
945 | .alloc_space = dquot_alloc_space, | 945 | .alloc_space = dquot_alloc_space, |
946 | .reserve_space = dquot_reserve_space, | ||
947 | .claim_space = dquot_claim_space, | ||
948 | .release_rsv = dquot_release_reserved_space, | ||
949 | .get_reserved_space = ext4_get_reserved_space, | ||
946 | .alloc_inode = dquot_alloc_inode, | 950 | .alloc_inode = dquot_alloc_inode, |
947 | .free_space = dquot_free_space, | 951 | .free_space = dquot_free_space, |
948 | .free_inode = dquot_free_inode, | 952 | .free_inode = dquot_free_inode, |