diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Kconfig | 1 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 1 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 3 | ||||
-rw-r--r-- | fs/ext4/extents.c | 77 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 16 | ||||
-rw-r--r-- | fs/ext4/inode.c | 225 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 1 | ||||
-rw-r--r-- | fs/ext4/super.c | 7 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 2 |
10 files changed, 211 insertions, 124 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9acf7e808139..9ed1bb1f319f 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -28,6 +28,7 @@ config EXT4_FS | |||
28 | 28 | ||
29 | config EXT4_USE_FOR_EXT23 | 29 | config EXT4_USE_FOR_EXT23 |
30 | bool "Use ext4 for ext2/ext3 file systems" | 30 | bool "Use ext4 for ext2/ext3 file systems" |
31 | depends on EXT4_FS | ||
31 | depends on EXT3_FS=n || EXT2_FS=n | 32 | depends on EXT3_FS=n || EXT2_FS=n |
32 | default y | 33 | default y |
33 | help | 34 | help |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 4df8621ec31c..a60ab9aad57d 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/swap.h> | 17 | #include <linux/swap.h> |
18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
19 | #include <linux/version.h> | ||
20 | #include <linux/blkdev.h> | 19 | #include <linux/blkdev.h> |
21 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
22 | #include "ext4.h" | 21 | #include "ext4.h" |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 56f9271ee8cc..af7b62699ea9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -699,6 +699,8 @@ struct ext4_inode_info { | |||
699 | unsigned int i_reserved_meta_blocks; | 699 | unsigned int i_reserved_meta_blocks; |
700 | unsigned int i_allocated_meta_blocks; | 700 | unsigned int i_allocated_meta_blocks; |
701 | unsigned short i_delalloc_reserved_flag; | 701 | unsigned short i_delalloc_reserved_flag; |
702 | sector_t i_da_metadata_calc_last_lblock; | ||
703 | int i_da_metadata_calc_len; | ||
702 | 704 | ||
703 | /* on-disk additional length */ | 705 | /* on-disk additional length */ |
704 | __u16 i_extra_isize; | 706 | __u16 i_extra_isize; |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 2ca686454e87..bdb6ce7e2eb4 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) | |||
225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); | 225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); |
226 | } | 226 | } |
227 | 227 | ||
228 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | 228 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
229 | sector_t lblocks); | ||
229 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); | 230 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); |
230 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 231 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
231 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 232 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3a7928f825e4..7d7b74e94687 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
296 | * to allocate @blocks | 296 | * to allocate @blocks |
297 | * Worse case is one block per extent | 297 | * Worse case is one block per extent |
298 | */ | 298 | */ |
299 | int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) | 299 | int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) |
300 | { | 300 | { |
301 | int lcap, icap, rcap, leafs, idxs, num; | 301 | struct ext4_inode_info *ei = EXT4_I(inode); |
302 | int newextents = blocks; | 302 | int idxs, num = 0; |
303 | |||
304 | rcap = ext4_ext_space_root_idx(inode, 0); | ||
305 | lcap = ext4_ext_space_block(inode, 0); | ||
306 | icap = ext4_ext_space_block_idx(inode, 0); | ||
307 | 303 | ||
308 | /* number of new leaf blocks needed */ | 304 | idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
309 | num = leafs = (newextents + lcap - 1) / lcap; | 305 | / sizeof(struct ext4_extent_idx)); |
310 | 306 | ||
311 | /* | 307 | /* |
312 | * Worse case, we need separate index block(s) | 308 | * If the new delayed allocation block is contiguous with the |
313 | * to link all new leaf blocks | 309 | * previous da block, it can share index blocks with the |
310 | * previous block, so we only need to allocate a new index | ||
311 | * block every idxs leaf blocks. At ldxs**2 blocks, we need | ||
312 | * an additional index block, and at ldxs**3 blocks, yet | ||
313 | * another index blocks. | ||
314 | */ | 314 | */ |
315 | idxs = (leafs + icap - 1) / icap; | 315 | if (ei->i_da_metadata_calc_len && |
316 | do { | 316 | ei->i_da_metadata_calc_last_lblock+1 == lblock) { |
317 | num += idxs; | 317 | if ((ei->i_da_metadata_calc_len % idxs) == 0) |
318 | idxs = (idxs + icap - 1) / icap; | 318 | num++; |
319 | } while (idxs > rcap); | 319 | if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) |
320 | num++; | ||
321 | if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { | ||
322 | num++; | ||
323 | ei->i_da_metadata_calc_len = 0; | ||
324 | } else | ||
325 | ei->i_da_metadata_calc_len++; | ||
326 | ei->i_da_metadata_calc_last_lblock++; | ||
327 | return num; | ||
328 | } | ||
320 | 329 | ||
321 | return num; | 330 | /* |
331 | * In the worst case we need a new set of index blocks at | ||
332 | * every level of the inode's extent tree. | ||
333 | */ | ||
334 | ei->i_da_metadata_calc_len = 1; | ||
335 | ei->i_da_metadata_calc_last_lblock = lblock; | ||
336 | return ext_depth(inode) + 1; | ||
322 | } | 337 | } |
323 | 338 | ||
324 | static int | 339 | static int |
@@ -3023,6 +3038,14 @@ out: | |||
3023 | return err; | 3038 | return err; |
3024 | } | 3039 | } |
3025 | 3040 | ||
3041 | static void unmap_underlying_metadata_blocks(struct block_device *bdev, | ||
3042 | sector_t block, int count) | ||
3043 | { | ||
3044 | int i; | ||
3045 | for (i = 0; i < count; i++) | ||
3046 | unmap_underlying_metadata(bdev, block + i); | ||
3047 | } | ||
3048 | |||
3026 | static int | 3049 | static int |
3027 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3050 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3028 | ext4_lblk_t iblock, unsigned int max_blocks, | 3051 | ext4_lblk_t iblock, unsigned int max_blocks, |
@@ -3098,6 +3121,18 @@ out: | |||
3098 | } else | 3121 | } else |
3099 | allocated = ret; | 3122 | allocated = ret; |
3100 | set_buffer_new(bh_result); | 3123 | set_buffer_new(bh_result); |
3124 | /* | ||
3125 | * if we allocated more blocks than requested | ||
3126 | * we need to make sure we unmap the extra block | ||
3127 | * allocated. The actual needed block will get | ||
3128 | * unmapped later when we find the buffer_head marked | ||
3129 | * new. | ||
3130 | */ | ||
3131 | if (allocated > max_blocks) { | ||
3132 | unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, | ||
3133 | newblock + max_blocks, | ||
3134 | allocated - max_blocks); | ||
3135 | } | ||
3101 | map_out: | 3136 | map_out: |
3102 | set_buffer_mapped(bh_result); | 3137 | set_buffer_mapped(bh_result); |
3103 | out1: | 3138 | out1: |
@@ -3190,7 +3225,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3190 | * this situation is possible, though, _during_ tree modification; | 3225 | * this situation is possible, though, _during_ tree modification; |
3191 | * this is why assert can't be put in ext4_ext_find_extent() | 3226 | * this is why assert can't be put in ext4_ext_find_extent() |
3192 | */ | 3227 | */ |
3193 | BUG_ON(path[depth].p_ext == NULL && depth != 0); | 3228 | if (path[depth].p_ext == NULL && depth != 0) { |
3229 | ext4_error(inode->i_sb, __func__, "bad extent address " | ||
3230 | "inode: %lu, iblock: %d, depth: %d", | ||
3231 | inode->i_ino, iblock, depth); | ||
3232 | err = -EIO; | ||
3233 | goto out2; | ||
3234 | } | ||
3194 | eh = path[depth].p_hdr; | 3235 | eh = path[depth].p_hdr; |
3195 | 3236 | ||
3196 | ex = path[depth].p_ext; | 3237 | ex = path[depth].p_ext; |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0b22497d92e1..98bd140aad01 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
88 | return ext4_force_commit(inode->i_sb); | 88 | return ext4_force_commit(inode->i_sb); |
89 | 89 | ||
90 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; | 90 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
91 | if (jbd2_log_start_commit(journal, commit_tid)) | 91 | if (jbd2_log_start_commit(journal, commit_tid)) { |
92 | /* | ||
93 | * When the journal is on a different device than the | ||
94 | * fs data disk, we need to issue the barrier in | ||
95 | * writeback mode. (In ordered mode, the jbd2 layer | ||
96 | * will take care of issuing the barrier. In | ||
97 | * data=journal, all of the data blocks are written to | ||
98 | * the journal device.) | ||
99 | */ | ||
100 | if (ext4_should_writeback_data(inode) && | ||
101 | (journal->j_fs_dev != journal->j_dev) && | ||
102 | (journal->j_flags & JBD2_BARRIER)) | ||
103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
92 | jbd2_log_wait_commit(journal, commit_tid); | 104 | jbd2_log_wait_commit(journal, commit_tid); |
93 | else if (journal->j_flags & JBD2_BARRIER) | 105 | } else if (journal->j_flags & JBD2_BARRIER) |
94 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 106 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); |
95 | return ret; | 107 | return ret; |
96 | } | 108 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ab807963a614..c818972c8302 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1009,77 +1009,88 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) | |||
1009 | return &EXT4_I(inode)->i_reserved_quota; | 1009 | return &EXT4_I(inode)->i_reserved_quota; |
1010 | } | 1010 | } |
1011 | #endif | 1011 | #endif |
1012 | |||
1012 | /* | 1013 | /* |
1013 | * Calculate the number of metadata blocks need to reserve | 1014 | * Calculate the number of metadata blocks need to reserve |
1014 | * to allocate @blocks for non extent file based file | 1015 | * to allocate a new block at @lblocks for non extent file based file |
1015 | */ | 1016 | */ |
1016 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | 1017 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, |
1018 | sector_t lblock) | ||
1017 | { | 1019 | { |
1018 | int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1020 | struct ext4_inode_info *ei = EXT4_I(inode); |
1019 | int ind_blks, dind_blks, tind_blks; | 1021 | int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; |
1020 | 1022 | int blk_bits; | |
1021 | /* number of new indirect blocks needed */ | ||
1022 | ind_blks = (blocks + icap - 1) / icap; | ||
1023 | 1023 | ||
1024 | dind_blks = (ind_blks + icap - 1) / icap; | 1024 | if (lblock < EXT4_NDIR_BLOCKS) |
1025 | return 0; | ||
1025 | 1026 | ||
1026 | tind_blks = 1; | 1027 | lblock -= EXT4_NDIR_BLOCKS; |
1027 | 1028 | ||
1028 | return ind_blks + dind_blks + tind_blks; | 1029 | if (ei->i_da_metadata_calc_len && |
1030 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | ||
1031 | ei->i_da_metadata_calc_len++; | ||
1032 | return 0; | ||
1033 | } | ||
1034 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | ||
1035 | ei->i_da_metadata_calc_len = 1; | ||
1036 | blk_bits = roundup_pow_of_two(lblock + 1); | ||
1037 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | ||
1029 | } | 1038 | } |
1030 | 1039 | ||
1031 | /* | 1040 | /* |
1032 | * Calculate the number of metadata blocks need to reserve | 1041 | * Calculate the number of metadata blocks need to reserve |
1033 | * to allocate given number of blocks | 1042 | * to allocate a block located at @lblock |
1034 | */ | 1043 | */ |
1035 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 1044 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) |
1036 | { | 1045 | { |
1037 | if (!blocks) | ||
1038 | return 0; | ||
1039 | |||
1040 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1046 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
1041 | return ext4_ext_calc_metadata_amount(inode, blocks); | 1047 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1042 | 1048 | ||
1043 | return ext4_indirect_calc_metadata_amount(inode, blocks); | 1049 | return ext4_indirect_calc_metadata_amount(inode, lblock); |
1044 | } | 1050 | } |
1045 | 1051 | ||
1052 | /* | ||
1053 | * Called with i_data_sem down, which is important since we can call | ||
1054 | * ext4_discard_preallocations() from here. | ||
1055 | */ | ||
1046 | static void ext4_da_update_reserve_space(struct inode *inode, int used) | 1056 | static void ext4_da_update_reserve_space(struct inode *inode, int used) |
1047 | { | 1057 | { |
1048 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1058 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1049 | int total, mdb, mdb_free, mdb_claim = 0; | 1059 | struct ext4_inode_info *ei = EXT4_I(inode); |
1050 | 1060 | int mdb_free = 0; | |
1051 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1061 | |
1052 | /* recalculate the number of metablocks still need to be reserved */ | 1062 | spin_lock(&ei->i_block_reservation_lock); |
1053 | total = EXT4_I(inode)->i_reserved_data_blocks - used; | 1063 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
1054 | mdb = ext4_calc_metadata_amount(inode, total); | 1064 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
1055 | 1065 | "with only %d reserved data blocks\n", | |
1056 | /* figure out how many metablocks to release */ | 1066 | __func__, inode->i_ino, used, |
1057 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1067 | ei->i_reserved_data_blocks); |
1058 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1068 | WARN_ON(1); |
1059 | 1069 | used = ei->i_reserved_data_blocks; | |
1060 | if (mdb_free) { | 1070 | } |
1061 | /* Account for allocated meta_blocks */ | 1071 | |
1062 | mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; | 1072 | /* Update per-inode reservations */ |
1063 | BUG_ON(mdb_free < mdb_claim); | 1073 | ei->i_reserved_data_blocks -= used; |
1064 | mdb_free -= mdb_claim; | 1074 | used += ei->i_allocated_meta_blocks; |
1065 | 1075 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | |
1066 | /* update fs dirty blocks counter */ | 1076 | ei->i_allocated_meta_blocks = 0; |
1077 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); | ||
1078 | |||
1079 | if (ei->i_reserved_data_blocks == 0) { | ||
1080 | /* | ||
1081 | * We can release all of the reserved metadata blocks | ||
1082 | * only when we have written all of the delayed | ||
1083 | * allocation blocks. | ||
1084 | */ | ||
1085 | mdb_free = ei->i_reserved_meta_blocks; | ||
1086 | ei->i_reserved_meta_blocks = 0; | ||
1087 | ei->i_da_metadata_calc_len = 0; | ||
1067 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); | 1088 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
1068 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1069 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1070 | } | 1089 | } |
1071 | |||
1072 | /* update per-inode reservations */ | ||
1073 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | ||
1074 | EXT4_I(inode)->i_reserved_data_blocks -= used; | ||
1075 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim); | ||
1076 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1090 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1077 | 1091 | ||
1078 | vfs_dq_claim_block(inode, used + mdb_claim); | 1092 | /* Update quota subsystem */ |
1079 | 1093 | vfs_dq_claim_block(inode, used); | |
1080 | /* | ||
1081 | * free those over-booking quota for metadata blocks | ||
1082 | */ | ||
1083 | if (mdb_free) | 1094 | if (mdb_free) |
1084 | vfs_dq_release_reservation_block(inode, mdb_free); | 1095 | vfs_dq_release_reservation_block(inode, mdb_free); |
1085 | 1096 | ||
@@ -1088,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1088 | * there aren't any writers on the inode, we can discard the | 1099 | * there aren't any writers on the inode, we can discard the |
1089 | * inode's preallocations. | 1100 | * inode's preallocations. |
1090 | */ | 1101 | */ |
1091 | if (!total && (atomic_read(&inode->i_writecount) == 0)) | 1102 | if ((ei->i_reserved_data_blocks == 0) && |
1103 | (atomic_read(&inode->i_writecount) == 0)) | ||
1092 | ext4_discard_preallocations(inode); | 1104 | ext4_discard_preallocations(inode); |
1093 | } | 1105 | } |
1094 | 1106 | ||
@@ -1797,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file, | |||
1797 | return ret ? ret : copied; | 1809 | return ret ? ret : copied; |
1798 | } | 1810 | } |
1799 | 1811 | ||
1800 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1812 | /* |
1813 | * Reserve a single block located at lblock | ||
1814 | */ | ||
1815 | static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | ||
1801 | { | 1816 | { |
1802 | int retries = 0; | 1817 | int retries = 0; |
1803 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1818 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1804 | unsigned long md_needed, mdblocks, total = 0; | 1819 | struct ext4_inode_info *ei = EXT4_I(inode); |
1820 | unsigned long md_needed, md_reserved; | ||
1805 | 1821 | ||
1806 | /* | 1822 | /* |
1807 | * recalculate the amount of metadata blocks to reserve | 1823 | * recalculate the amount of metadata blocks to reserve |
@@ -1809,35 +1825,43 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1809 | * worse case is one extent per block | 1825 | * worse case is one extent per block |
1810 | */ | 1826 | */ |
1811 | repeat: | 1827 | repeat: |
1812 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1828 | spin_lock(&ei->i_block_reservation_lock); |
1813 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1829 | md_reserved = ei->i_reserved_meta_blocks; |
1814 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1830 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1815 | BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); | 1831 | spin_unlock(&ei->i_block_reservation_lock); |
1816 | |||
1817 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | ||
1818 | total = md_needed + nrblocks; | ||
1819 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1820 | 1832 | ||
1821 | /* | 1833 | /* |
1822 | * Make quota reservation here to prevent quota overflow | 1834 | * Make quota reservation here to prevent quota overflow |
1823 | * later. Real quota accounting is done at pages writeout | 1835 | * later. Real quota accounting is done at pages writeout |
1824 | * time. | 1836 | * time. |
1825 | */ | 1837 | */ |
1826 | if (vfs_dq_reserve_block(inode, total)) | 1838 | if (vfs_dq_reserve_block(inode, md_needed + 1)) { |
1839 | /* | ||
1840 | * We tend to badly over-estimate the amount of | ||
1841 | * metadata blocks which are needed, so if we have | ||
1842 | * reserved any metadata blocks, try to force out the | ||
1843 | * inode and see if we have any better luck. | ||
1844 | */ | ||
1845 | if (md_reserved && retries++ <= 3) | ||
1846 | goto retry; | ||
1827 | return -EDQUOT; | 1847 | return -EDQUOT; |
1848 | } | ||
1828 | 1849 | ||
1829 | if (ext4_claim_free_blocks(sbi, total)) { | 1850 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { |
1830 | vfs_dq_release_reservation_block(inode, total); | 1851 | vfs_dq_release_reservation_block(inode, md_needed + 1); |
1831 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1852 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1853 | retry: | ||
1854 | if (md_reserved) | ||
1855 | write_inode_now(inode, (retries == 3)); | ||
1832 | yield(); | 1856 | yield(); |
1833 | goto repeat; | 1857 | goto repeat; |
1834 | } | 1858 | } |
1835 | return -ENOSPC; | 1859 | return -ENOSPC; |
1836 | } | 1860 | } |
1837 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1861 | spin_lock(&ei->i_block_reservation_lock); |
1838 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1862 | ei->i_reserved_data_blocks++; |
1839 | EXT4_I(inode)->i_reserved_meta_blocks += md_needed; | 1863 | ei->i_reserved_meta_blocks += md_needed; |
1840 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1864 | spin_unlock(&ei->i_block_reservation_lock); |
1841 | 1865 | ||
1842 | return 0; /* success */ | 1866 | return 0; /* success */ |
1843 | } | 1867 | } |
@@ -1845,49 +1869,46 @@ repeat: | |||
1845 | static void ext4_da_release_space(struct inode *inode, int to_free) | 1869 | static void ext4_da_release_space(struct inode *inode, int to_free) |
1846 | { | 1870 | { |
1847 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1871 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1848 | int total, mdb, mdb_free, release; | 1872 | struct ext4_inode_info *ei = EXT4_I(inode); |
1849 | 1873 | ||
1850 | if (!to_free) | 1874 | if (!to_free) |
1851 | return; /* Nothing to release, exit */ | 1875 | return; /* Nothing to release, exit */ |
1852 | 1876 | ||
1853 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1877 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1854 | 1878 | ||
1855 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | 1879 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
1856 | /* | 1880 | /* |
1857 | * if there is no reserved blocks, but we try to free some | 1881 | * if there aren't enough reserved blocks, then the |
1858 | * then the counter is messed up somewhere. | 1882 | * counter is messed up somewhere. Since this |
1859 | * but since this function is called from invalidate | 1883 | * function is called from invalidate page, it's |
1860 | * page, it's harmless to return without any action | 1884 | * harmless to return without any action. |
1861 | */ | 1885 | */ |
1862 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | 1886 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " |
1863 | "blocks for inode %lu, but there is no reserved " | 1887 | "ino %lu, to_free %d with only %d reserved " |
1864 | "data blocks\n", to_free, inode->i_ino); | 1888 | "data blocks\n", inode->i_ino, to_free, |
1865 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1889 | ei->i_reserved_data_blocks); |
1866 | return; | 1890 | WARN_ON(1); |
1891 | to_free = ei->i_reserved_data_blocks; | ||
1867 | } | 1892 | } |
1893 | ei->i_reserved_data_blocks -= to_free; | ||
1868 | 1894 | ||
1869 | /* recalculate the number of metablocks still need to be reserved */ | 1895 | if (ei->i_reserved_data_blocks == 0) { |
1870 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1896 | /* |
1871 | mdb = ext4_calc_metadata_amount(inode, total); | 1897 | * We can release all of the reserved metadata blocks |
1872 | 1898 | * only when we have written all of the delayed | |
1873 | /* figure out how many metablocks to release */ | 1899 | * allocation blocks. |
1874 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1900 | */ |
1875 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1901 | to_free += ei->i_reserved_meta_blocks; |
1876 | 1902 | ei->i_reserved_meta_blocks = 0; | |
1877 | release = to_free + mdb_free; | 1903 | ei->i_da_metadata_calc_len = 0; |
1878 | 1904 | } | |
1879 | /* update fs dirty blocks counter for truncate case */ | ||
1880 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); | ||
1881 | 1905 | ||
1882 | /* update per-inode reservations */ | 1906 | /* update fs dirty blocks counter */ |
1883 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1907 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); |
1884 | EXT4_I(inode)->i_reserved_data_blocks -= to_free; | ||
1885 | 1908 | ||
1886 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1887 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1888 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1909 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1889 | 1910 | ||
1890 | vfs_dq_release_reservation_block(inode, release); | 1911 | vfs_dq_release_reservation_block(inode, to_free); |
1891 | } | 1912 | } |
1892 | 1913 | ||
1893 | static void ext4_da_page_release_reservation(struct page *page, | 1914 | static void ext4_da_page_release_reservation(struct page *page, |
@@ -2493,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2493 | * XXX: __block_prepare_write() unmaps passed block, | 2514 | * XXX: __block_prepare_write() unmaps passed block, |
2494 | * is it OK? | 2515 | * is it OK? |
2495 | */ | 2516 | */ |
2496 | ret = ext4_da_reserve_space(inode, 1); | 2517 | ret = ext4_da_reserve_space(inode, iblock); |
2497 | if (ret) | 2518 | if (ret) |
2498 | /* not enough space to reserve */ | 2519 | /* not enough space to reserve */ |
2499 | return ret; | 2520 | return ret; |
@@ -2967,8 +2988,7 @@ retry: | |||
2967 | out_writepages: | 2988 | out_writepages: |
2968 | if (!no_nrwrite_index_update) | 2989 | if (!no_nrwrite_index_update) |
2969 | wbc->no_nrwrite_index_update = 0; | 2990 | wbc->no_nrwrite_index_update = 0; |
2970 | if (wbc->nr_to_write > nr_to_writebump) | 2991 | wbc->nr_to_write -= nr_to_writebump; |
2971 | wbc->nr_to_write -= nr_to_writebump; | ||
2972 | wbc->range_start = range_start; | 2992 | wbc->range_start = range_start; |
2973 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 2993 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
2974 | return ret; | 2994 | return ret; |
@@ -2993,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2993 | if (2 * free_blocks < 3 * dirty_blocks || | 3013 | if (2 * free_blocks < 3 * dirty_blocks || |
2994 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 3014 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { |
2995 | /* | 3015 | /* |
2996 | * free block count is less that 150% of dirty blocks | 3016 | * free block count is less than 150% of dirty blocks |
2997 | * or free blocks is less that watermark | 3017 | * or free blocks is less than watermark |
2998 | */ | 3018 | */ |
2999 | return 1; | 3019 | return 1; |
3000 | } | 3020 | } |
3021 | /* | ||
3022 | * Even if we don't switch but are nearing capacity, | ||
3023 | * start pushing delalloc when 1/2 of free blocks are dirty. | ||
3024 | */ | ||
3025 | if (free_blocks < 2 * dirty_blocks) | ||
3026 | writeback_inodes_sb_if_idle(sb); | ||
3027 | |||
3001 | return 0; | 3028 | return 0; |
3002 | } | 3029 | } |
3003 | 3030 | ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 0ca811061bc7..436521cae456 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/proc_fs.h> | 17 | #include <linux/proc_fs.h> |
18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/version.h> | ||
21 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
22 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
23 | #include "ext4_jbd2.h" | 22 | #include "ext4_jbd2.h" |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6ed9aa91f27d..735c20d5fd56 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
702 | ei->i_reserved_data_blocks = 0; | 702 | ei->i_reserved_data_blocks = 0; |
703 | ei->i_reserved_meta_blocks = 0; | 703 | ei->i_reserved_meta_blocks = 0; |
704 | ei->i_allocated_meta_blocks = 0; | 704 | ei->i_allocated_meta_blocks = 0; |
705 | ei->i_da_metadata_calc_len = 0; | ||
705 | ei->i_delalloc_reserved_flag = 0; | 706 | ei->i_delalloc_reserved_flag = 0; |
706 | spin_lock_init(&(ei->i_block_reservation_lock)); | 707 | spin_lock_init(&(ei->i_block_reservation_lock)); |
707 | #ifdef CONFIG_QUOTA | 708 | #ifdef CONFIG_QUOTA |
@@ -2174,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, | |||
2174 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2175 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
2175 | 2176 | ||
2176 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2177 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2177 | sbi->s_kbytes_written + | 2178 | (unsigned long long)(sbi->s_kbytes_written + |
2178 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2179 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
2179 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); | 2180 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); |
2180 | } | 2181 | } |
2181 | 2182 | ||
2182 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | 2183 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, |
@@ -4005,6 +4006,7 @@ static inline void unregister_as_ext2(void) | |||
4005 | { | 4006 | { |
4006 | unregister_filesystem(&ext2_fs_type); | 4007 | unregister_filesystem(&ext2_fs_type); |
4007 | } | 4008 | } |
4009 | MODULE_ALIAS("ext2"); | ||
4008 | #else | 4010 | #else |
4009 | static inline void register_as_ext2(void) { } | 4011 | static inline void register_as_ext2(void) { } |
4010 | static inline void unregister_as_ext2(void) { } | 4012 | static inline void unregister_as_ext2(void) { } |
@@ -4031,6 +4033,7 @@ static inline void unregister_as_ext3(void) | |||
4031 | { | 4033 | { |
4032 | unregister_filesystem(&ext3_fs_type); | 4034 | unregister_filesystem(&ext3_fs_type); |
4033 | } | 4035 | } |
4036 | MODULE_ALIAS("ext3"); | ||
4034 | #else | 4037 | #else |
4035 | static inline void register_as_ext3(void) { } | 4038 | static inline void register_as_ext3(void) { } |
4036 | static inline void unregister_as_ext3(void) { } | 4039 | static inline void unregister_as_ext3(void) { } |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 83218bebbc7c..f3a2f7ed45aa 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -1332,6 +1332,8 @@ retry: | |||
1332 | goto cleanup; | 1332 | goto cleanup; |
1333 | kfree(b_entry_name); | 1333 | kfree(b_entry_name); |
1334 | kfree(buffer); | 1334 | kfree(buffer); |
1335 | b_entry_name = NULL; | ||
1336 | buffer = NULL; | ||
1335 | brelse(is->iloc.bh); | 1337 | brelse(is->iloc.bh); |
1336 | kfree(is); | 1338 | kfree(is); |
1337 | kfree(bs); | 1339 | kfree(bs); |