aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c77
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c225
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr.c2
10 files changed, 211 insertions, 124 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9acf7e808139..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -28,6 +28,7 @@ config EXT4_FS
28 28
29config EXT4_USE_FOR_EXT23 29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
31 depends on EXT3_FS=n || EXT2_FS=n 32 depends on EXT3_FS=n || EXT2_FS=n
32 default y 33 default y
33 help 34 help
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31c..a60ab9aad57d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 56f9271ee8cc..af7b62699ea9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -699,6 +699,8 @@ struct ext4_inode_info {
699 unsigned int i_reserved_meta_blocks; 699 unsigned int i_reserved_meta_blocks;
700 unsigned int i_allocated_meta_blocks; 700 unsigned int i_allocated_meta_blocks;
701 unsigned short i_delalloc_reserved_flag; 701 unsigned short i_delalloc_reserved_flag;
702 sector_t i_da_metadata_calc_last_lblock;
703 int i_da_metadata_calc_len;
702 704
703 /* on-disk additional length */ 705 /* on-disk additional length */
704 __u16 i_extra_isize; 706 __u16 i_extra_isize;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..7d7b74e94687 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
@@ -3023,6 +3038,14 @@ out:
3023 return err; 3038 return err;
3024} 3039}
3025 3040
3041static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3042 sector_t block, int count)
3043{
3044 int i;
3045 for (i = 0; i < count; i++)
3046 unmap_underlying_metadata(bdev, block + i);
3047}
3048
3026static int 3049static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3050ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3051 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3121,18 @@ out:
3098 } else 3121 } else
3099 allocated = ret; 3122 allocated = ret;
3100 set_buffer_new(bh_result); 3123 set_buffer_new(bh_result);
3124 /*
3125 * if we allocated more blocks than requested
3126 * we need to make sure we unmap the extra block
3127 * allocated. The actual needed block will get
3128 * unmapped later when we find the buffer_head marked
3129 * new.
3130 */
3131 if (allocated > max_blocks) {
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks,
3134 allocated - max_blocks);
3135 }
3101map_out: 3136map_out:
3102 set_buffer_mapped(bh_result); 3137 set_buffer_mapped(bh_result);
3103out1: 3138out1:
@@ -3190,7 +3225,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3225 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3226 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3227 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3228 if (path[depth].p_ext == NULL && depth != 0) {
3229 ext4_error(inode->i_sb, __func__, "bad extent address "
3230 "inode: %lu, iblock: %d, depth: %d",
3231 inode->i_ino, iblock, depth);
3232 err = -EIO;
3233 goto out2;
3234 }
3194 eh = path[depth].p_hdr; 3235 eh = path[depth].p_hdr;
3195 3236
3196 ex = path[depth].p_ext; 3237 ex = path[depth].p_ext;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
88 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
89 89
90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
91 if (jbd2_log_start_commit(journal, commit_tid)) 91 if (jbd2_log_start_commit(journal, commit_tid)) {
92 /*
93 * When the journal is on a different device than the
94 * fs data disk, we need to issue the barrier in
95 * writeback mode. (In ordered mode, the jbd2 layer
96 * will take care of issuing the barrier. In
97 * data=journal, all of the data blocks are written to
98 * the journal device.)
99 */
100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
92 jbd2_log_wait_commit(journal, commit_tid); 104 jbd2_log_wait_commit(journal, commit_tid);
93 else if (journal->j_flags & JBD2_BARRIER) 105 } else if (journal->j_flags & JBD2_BARRIER)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
95 return ret; 107 return ret;
96} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab807963a614..c818972c8302 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1009,77 +1009,88 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
1009 return &EXT4_I(inode)->i_reserved_quota; 1009 return &EXT4_I(inode)->i_reserved_quota;
1010} 1010}
1011#endif 1011#endif
1012
1012/* 1013/*
1013 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1014 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1015 */ 1016 */
1016static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1017{ 1019{
1018 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1019 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1020 1022 int blk_bits;
1021 /* number of new indirect blocks needed */
1022 ind_blks = (blocks + icap - 1) / icap;
1023 1023
1024 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1025 1026
1026 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1027 1028
1028 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1029} 1038}
1030 1039
1031/* 1040/*
1032 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1033 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1034 */ 1043 */
1035static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1036{ 1045{
1037 if (!blocks)
1038 return 0;
1039
1040 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1041 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1042 1048
1043 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1044} 1050}
1045 1051
1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1046static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056static void ext4_da_update_reserve_space(struct inode *inode, int used)
1047{ 1057{
1048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1049 int total, mdb, mdb_free, mdb_claim = 0; 1059 struct ext4_inode_info *ei = EXT4_I(inode);
1050 1060 int mdb_free = 0;
1051 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1061
1052 /* recalculate the number of metablocks still need to be reserved */ 1062 spin_lock(&ei->i_block_reservation_lock);
1053 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1063 if (unlikely(used > ei->i_reserved_data_blocks)) {
1054 mdb = ext4_calc_metadata_amount(inode, total); 1064 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1055 1065 "with only %d reserved data blocks\n",
1056 /* figure out how many metablocks to release */ 1066 __func__, inode->i_ino, used,
1057 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1067 ei->i_reserved_data_blocks);
1058 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1068 WARN_ON(1);
1059 1069 used = ei->i_reserved_data_blocks;
1060 if (mdb_free) { 1070 }
1061 /* Account for allocated meta_blocks */ 1071
1062 mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; 1072 /* Update per-inode reservations */
1063 BUG_ON(mdb_free < mdb_claim); 1073 ei->i_reserved_data_blocks -= used;
1064 mdb_free -= mdb_claim; 1074 used += ei->i_allocated_meta_blocks;
1065 1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1066 /* update fs dirty blocks counter */ 1076 ei->i_allocated_meta_blocks = 0;
1077 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1078
1079 if (ei->i_reserved_data_blocks == 0) {
1080 /*
1081 * We can release all of the reserved metadata blocks
1082 * only when we have written all of the delayed
1083 * allocation blocks.
1084 */
1085 mdb_free = ei->i_reserved_meta_blocks;
1086 ei->i_reserved_meta_blocks = 0;
1087 ei->i_da_metadata_calc_len = 0;
1067 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1068 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1069 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1070 } 1089 }
1071
1072 /* update per-inode reservations */
1073 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1074 EXT4_I(inode)->i_reserved_data_blocks -= used;
1075 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
1076 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1077 1091
1078 vfs_dq_claim_block(inode, used + mdb_claim); 1092 /* Update quota subsystem */
1079 1093 vfs_dq_claim_block(inode, used);
1080 /*
1081 * free those over-booking quota for metadata blocks
1082 */
1083 if (mdb_free) 1094 if (mdb_free)
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1095 vfs_dq_release_reservation_block(inode, mdb_free);
1085 1096
@@ -1088,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1088 * there aren't any writers on the inode, we can discard the 1099 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1100 * inode's preallocations.
1090 */ 1101 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1102 if ((ei->i_reserved_data_blocks == 0) &&
1103 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1104 ext4_discard_preallocations(inode);
1093} 1105}
1094 1106
@@ -1797,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1809 return ret ? ret : copied;
1798} 1810}
1799 1811
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1812/*
1813 * Reserve a single block located at lblock
1814 */
1815static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1816{
1802 int retries = 0; 1817 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1818 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1819 struct ext4_inode_info *ei = EXT4_I(inode);
1820 unsigned long md_needed, md_reserved;
1805 1821
1806 /* 1822 /*
1807 * recalculate the amount of metadata blocks to reserve 1823 * recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1825,43 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1825 * worse case is one extent per block
1810 */ 1826 */
1811repeat: 1827repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1828 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1829 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1830 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1831 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1820 1832
1821 /* 1833 /*
1822 * Make quota reservation here to prevent quota overflow 1834 * Make quota reservation here to prevent quota overflow
1823 * later. Real quota accounting is done at pages writeout 1835 * later. Real quota accounting is done at pages writeout
1824 * time. 1836 * time.
1825 */ 1837 */
1826 if (vfs_dq_reserve_block(inode, total)) 1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) {
1839 /*
1840 * We tend to badly over-estimate the amount of
1841 * metadata blocks which are needed, so if we have
1842 * reserved any metadata blocks, try to force out the
1843 * inode and see if we have any better luck.
1844 */
1845 if (md_reserved && retries++ <= 3)
1846 goto retry;
1827 return -EDQUOT; 1847 return -EDQUOT;
1848 }
1828 1849
1829 if (ext4_claim_free_blocks(sbi, total)) { 1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1830 vfs_dq_release_reservation_block(inode, total); 1851 vfs_dq_release_reservation_block(inode, md_needed + 1);
1831 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1853 retry:
1854 if (md_reserved)
1855 write_inode_now(inode, (retries == 3));
1832 yield(); 1856 yield();
1833 goto repeat; 1857 goto repeat;
1834 } 1858 }
1835 return -ENOSPC; 1859 return -ENOSPC;
1836 } 1860 }
1837 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1861 spin_lock(&ei->i_block_reservation_lock);
1838 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1862 ei->i_reserved_data_blocks++;
1839 EXT4_I(inode)->i_reserved_meta_blocks += md_needed; 1863 ei->i_reserved_meta_blocks += md_needed;
1840 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1864 spin_unlock(&ei->i_block_reservation_lock);
1841 1865
1842 return 0; /* success */ 1866 return 0; /* success */
1843} 1867}
@@ -1845,49 +1869,46 @@ repeat:
1845static void ext4_da_release_space(struct inode *inode, int to_free) 1869static void ext4_da_release_space(struct inode *inode, int to_free)
1846{ 1870{
1847 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1871 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1848 int total, mdb, mdb_free, release; 1872 struct ext4_inode_info *ei = EXT4_I(inode);
1849 1873
1850 if (!to_free) 1874 if (!to_free)
1851 return; /* Nothing to release, exit */ 1875 return; /* Nothing to release, exit */
1852 1876
1853 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1877 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1854 1878
1855 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1879 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1856 /* 1880 /*
1857 * if there is no reserved blocks, but we try to free some 1881 * if there aren't enough reserved blocks, then the
1858 * then the counter is messed up somewhere. 1882 * counter is messed up somewhere. Since this
1859 * but since this function is called from invalidate 1883 * function is called from invalidate page, it's
1860 * page, it's harmless to return without any action 1884 * harmless to return without any action.
1861 */ 1885 */
1862 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1886 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1863 "blocks for inode %lu, but there is no reserved " 1887 "ino %lu, to_free %d with only %d reserved "
1864 "data blocks\n", to_free, inode->i_ino); 1888 "data blocks\n", inode->i_ino, to_free,
1865 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1889 ei->i_reserved_data_blocks);
1866 return; 1890 WARN_ON(1);
1891 to_free = ei->i_reserved_data_blocks;
1867 } 1892 }
1893 ei->i_reserved_data_blocks -= to_free;
1868 1894
1869 /* recalculate the number of metablocks still need to be reserved */ 1895 if (ei->i_reserved_data_blocks == 0) {
1870 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1896 /*
1871 mdb = ext4_calc_metadata_amount(inode, total); 1897 * We can release all of the reserved metadata blocks
1872 1898 * only when we have written all of the delayed
1873 /* figure out how many metablocks to release */ 1899 * allocation blocks.
1874 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1900 */
1875 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1901 to_free += ei->i_reserved_meta_blocks;
1876 1902 ei->i_reserved_meta_blocks = 0;
1877 release = to_free + mdb_free; 1903 ei->i_da_metadata_calc_len = 0;
1878 1904 }
1879 /* update fs dirty blocks counter for truncate case */
1880 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1881 1905
1882 /* update per-inode reservations */ 1906 /* update fs dirty blocks counter */
1883 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1907 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1884 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1885 1908
1886 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1887 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1888 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1909 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1889 1910
1890 vfs_dq_release_reservation_block(inode, release); 1911 vfs_dq_release_reservation_block(inode, to_free);
1891} 1912}
1892 1913
1893static void ext4_da_page_release_reservation(struct page *page, 1914static void ext4_da_page_release_reservation(struct page *page,
@@ -2493,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2493 * XXX: __block_prepare_write() unmaps passed block, 2514 * XXX: __block_prepare_write() unmaps passed block,
2494 * is it OK? 2515 * is it OK?
2495 */ 2516 */
2496 ret = ext4_da_reserve_space(inode, 1); 2517 ret = ext4_da_reserve_space(inode, iblock);
2497 if (ret) 2518 if (ret)
2498 /* not enough space to reserve */ 2519 /* not enough space to reserve */
2499 return ret; 2520 return ret;
@@ -2967,8 +2988,7 @@ retry:
2967out_writepages: 2988out_writepages:
2968 if (!no_nrwrite_index_update) 2989 if (!no_nrwrite_index_update)
2969 wbc->no_nrwrite_index_update = 0; 2990 wbc->no_nrwrite_index_update = 0;
2970 if (wbc->nr_to_write > nr_to_writebump) 2991 wbc->nr_to_write -= nr_to_writebump;
2971 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->range_start = range_start; 2992 wbc->range_start = range_start;
2973 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2993 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2974 return ret; 2994 return ret;
@@ -2993,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2993 if (2 * free_blocks < 3 * dirty_blocks || 3013 if (2 * free_blocks < 3 * dirty_blocks ||
2994 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3014 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2995 /* 3015 /*
2996 * free block count is less that 150% of dirty blocks 3016 * free block count is less than 150% of dirty blocks
2997 * or free blocks is less that watermark 3017 * or free blocks is less than watermark
2998 */ 3018 */
2999 return 1; 3019 return 1;
3000 } 3020 }
3021 /*
3022 * Even if we don't switch but are nearing capacity,
3023 * start pushing delalloc when 1/2 of free blocks are dirty.
3024 */
3025 if (free_blocks < 2 * dirty_blocks)
3026 writeback_inodes_sb_if_idle(sb);
3027
3001 return 0; 3028 return 0;
3002} 3029}
3003 3030
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..436521cae456 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed9aa91f27d..735c20d5fd56 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
702 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
703 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
704 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
705 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
706 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
707#ifdef CONFIG_QUOTA 708#ifdef CONFIG_QUOTA
@@ -2174,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2174 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2175 2176
2176 return snprintf(buf, PAGE_SIZE, "%llu\n", 2177 return snprintf(buf, PAGE_SIZE, "%llu\n",
2177 sbi->s_kbytes_written + 2178 (unsigned long long)(sbi->s_kbytes_written +
2178 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2179 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2180} 2181}
2181 2182
2182static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2183static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4005,6 +4006,7 @@ static inline void unregister_as_ext2(void)
4005{ 4006{
4006 unregister_filesystem(&ext2_fs_type); 4007 unregister_filesystem(&ext2_fs_type);
4007} 4008}
4009MODULE_ALIAS("ext2");
4008#else 4010#else
4009static inline void register_as_ext2(void) { } 4011static inline void register_as_ext2(void) { }
4010static inline void unregister_as_ext2(void) { } 4012static inline void unregister_as_ext2(void) { }
@@ -4031,6 +4033,7 @@ static inline void unregister_as_ext3(void)
4031{ 4033{
4032 unregister_filesystem(&ext3_fs_type); 4034 unregister_filesystem(&ext3_fs_type);
4033} 4035}
4036MODULE_ALIAS("ext3");
4034#else 4037#else
4035static inline void register_as_ext3(void) { } 4038static inline void register_as_ext3(void) { }
4036static inline void unregister_as_ext3(void) { } 4039static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 83218bebbc7c..f3a2f7ed45aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1332,6 +1332,8 @@ retry:
1332 goto cleanup; 1332 goto cleanup;
1333 kfree(b_entry_name); 1333 kfree(b_entry_name);
1334 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1335 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1336 kfree(is); 1338 kfree(is);
1337 kfree(bs); 1339 kfree(bs);