aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2010-02-22 19:20:34 -0500
committerH. Peter Anvin <hpa@zytor.com>2010-02-22 19:20:34 -0500
commitd02e30c31c57683a66ed68a1bcff900ca78f6d56 (patch)
treec3ce99a00061bcc1199b50fa838147d876c56717 /fs/ext4
parent0fdc7a8022c3eaff6b5ee27ffb9e913e5e58d8e9 (diff)
parentaef55d4922e62a0d887e60d87319f3718aec6ced (diff)
Merge branch 'x86/irq' into x86/apic
Merge reason: Conflicts in arch/x86/kernel/apic/io_apic.c Resolved Conflicts: arch/x86/kernel/apic/io_apic.c Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c98
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c277
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr.c2
10 files changed, 273 insertions, 144 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9acf7e808139..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -28,6 +28,7 @@ config EXT4_FS
28 28
29config EXT4_USE_FOR_EXT23 29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
31 depends on EXT3_FS=n || EXT2_FS=n 32 depends on EXT3_FS=n || EXT2_FS=n
32 default y 33 default y
33 help 34 help
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31c..a60ab9aad57d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 56f9271ee8cc..874d169a193e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -361,14 +361,11 @@ struct ext4_new_group_data {
361 so set the magic i_delalloc_reserve_flag after taking the 361 so set the magic i_delalloc_reserve_flag after taking the
362 inode allocation semaphore for */ 362 inode allocation semaphore for */
363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 363#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
364 /* Call ext4_da_update_reserve_space() after successfully
365 allocating the blocks */
366#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
367 /* caller is from the direct IO path, request to creation of an 364 /* caller is from the direct IO path, request to creation of an
368 unitialized extents if not allocated, split the uninitialized 365 unitialized extents if not allocated, split the uninitialized
369 extent if blocks has been preallocated already*/ 366 extent if blocks has been preallocated already*/
370#define EXT4_GET_BLOCKS_DIO 0x0010 367#define EXT4_GET_BLOCKS_DIO 0x0008
371#define EXT4_GET_BLOCKS_CONVERT 0x0020 368#define EXT4_GET_BLOCKS_CONVERT 0x0010
372#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ 369#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\
373 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 370 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
374 /* Convert extent to initialized after direct IO complete */ 371 /* Convert extent to initialized after direct IO complete */
@@ -699,6 +696,8 @@ struct ext4_inode_info {
699 unsigned int i_reserved_meta_blocks; 696 unsigned int i_reserved_meta_blocks;
700 unsigned int i_allocated_meta_blocks; 697 unsigned int i_allocated_meta_blocks;
701 unsigned short i_delalloc_reserved_flag; 698 unsigned short i_delalloc_reserved_flag;
699 sector_t i_da_metadata_calc_last_lblock;
700 int i_da_metadata_calc_len;
702 701
703 /* on-disk additional length */ 702 /* on-disk additional length */
704 __u16 i_extra_isize; 703 __u16 i_extra_isize;
@@ -1441,6 +1440,8 @@ extern int ext4_block_truncate_page(handle_t *handle,
1441extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1440extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1442extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1441extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1443extern int flush_aio_dio_completed_IO(struct inode *inode); 1442extern int flush_aio_dio_completed_IO(struct inode *inode);
1443extern void ext4_da_update_reserve_space(struct inode *inode,
1444 int used, int quota_claim);
1444/* ioctl.c */ 1445/* ioctl.c */
1445extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1446extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1446extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); 1447extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..765a4826b118 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
@@ -3023,6 +3038,14 @@ out:
3023 return err; 3038 return err;
3024} 3039}
3025 3040
3041static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3042 sector_t block, int count)
3043{
3044 int i;
3045 for (i = 0; i < count; i++)
3046 unmap_underlying_metadata(bdev, block + i);
3047}
3048
3026static int 3049static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3050ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3051 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3121,30 @@ out:
3098 } else 3121 } else
3099 allocated = ret; 3122 allocated = ret;
3100 set_buffer_new(bh_result); 3123 set_buffer_new(bh_result);
3124 /*
3125 * if we allocated more blocks than requested
3126 * we need to make sure we unmap the extra block
3127 * allocated. The actual needed block will get
3128 * unmapped later when we find the buffer_head marked
3129 * new.
3130 */
3131 if (allocated > max_blocks) {
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks,
3134 allocated - max_blocks);
3135 allocated = max_blocks;
3136 }
3137
3138 /*
3139 * If we have done fallocate with the offset that is already
3140 * delayed allocated, we would have block reservation
3141 * and quota reservation done in the delayed write path.
3142 * But fallocate would have already updated quota and block
3143 * count for this offset. So cancel these reservation
3144 */
3145 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3146 ext4_da_update_reserve_space(inode, allocated, 0);
3147
3101map_out: 3148map_out:
3102 set_buffer_mapped(bh_result); 3149 set_buffer_mapped(bh_result);
3103out1: 3150out1:
@@ -3190,7 +3237,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3237 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3238 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3239 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3240 if (path[depth].p_ext == NULL && depth != 0) {
3241 ext4_error(inode->i_sb, __func__, "bad extent address "
3242 "inode: %lu, iblock: %d, depth: %d",
3243 inode->i_ino, iblock, depth);
3244 err = -EIO;
3245 goto out2;
3246 }
3194 eh = path[depth].p_hdr; 3247 eh = path[depth].p_hdr;
3195 3248
3196 ex = path[depth].p_ext; 3249 ex = path[depth].p_ext;
@@ -3327,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3327 /* previous routine could use block we allocated */ 3380 /* previous routine could use block we allocated */
3328 newblock = ext_pblock(&newex); 3381 newblock = ext_pblock(&newex);
3329 allocated = ext4_ext_get_actual_len(&newex); 3382 allocated = ext4_ext_get_actual_len(&newex);
3383 if (allocated > max_blocks)
3384 allocated = max_blocks;
3330 set_buffer_new(bh_result); 3385 set_buffer_new(bh_result);
3331 3386
3332 /* 3387 /*
3388 * Update reserved blocks/metadata blocks after successful
3389 * block allocation which had been deferred till now.
3390 */
3391 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
3392 ext4_da_update_reserve_space(inode, allocated, 1);
3393
3394 /*
3333 * Cache the extent and update transaction to commit on fdatasync only 3395 * Cache the extent and update transaction to commit on fdatasync only
3334 * when it is _not_ an uninitialized extent. 3396 * when it is _not_ an uninitialized extent.
3335 */ 3397 */
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
88 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
89 89
90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
91 if (jbd2_log_start_commit(journal, commit_tid)) 91 if (jbd2_log_start_commit(journal, commit_tid)) {
92 /*
93 * When the journal is on a different device than the
94 * fs data disk, we need to issue the barrier in
95 * writeback mode. (In ordered mode, the jbd2 layer
96 * will take care of issuing the barrier. In
97 * data=journal, all of the data blocks are written to
98 * the journal device.)
99 */
100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
92 jbd2_log_wait_commit(journal, commit_tid); 104 jbd2_log_wait_commit(journal, commit_tid);
93 else if (journal->j_flags & JBD2_BARRIER) 105 } else if (journal->j_flags & JBD2_BARRIER)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
95 return ret; 107 return ret;
96} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab807963a614..e11952404e02 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1009,86 +1009,114 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
1009 return &EXT4_I(inode)->i_reserved_quota; 1009 return &EXT4_I(inode)->i_reserved_quota;
1010} 1010}
1011#endif 1011#endif
1012
1012/* 1013/*
1013 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1014 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1015 */ 1016 */
1016static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1017{ 1019{
1018 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1019 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1020 1022 int blk_bits;
1021 /* number of new indirect blocks needed */
1022 ind_blks = (blocks + icap - 1) / icap;
1023 1023
1024 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1025 1026
1026 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1027 1028
1028 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1029} 1038}
1030 1039
1031/* 1040/*
1032 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1033 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1034 */ 1043 */
1035static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1036{ 1045{
1037 if (!blocks)
1038 return 0;
1039
1040 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1041 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1042 1048
1043 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1044} 1050}
1045 1051
1046static void ext4_da_update_reserve_space(struct inode *inode, int used) 1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1056void ext4_da_update_reserve_space(struct inode *inode,
1057 int used, int quota_claim)
1047{ 1058{
1048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1059 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1049 int total, mdb, mdb_free, mdb_claim = 0; 1060 struct ext4_inode_info *ei = EXT4_I(inode);
1050 1061 int mdb_free = 0, allocated_meta_blocks = 0;
1051 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1062
1052 /* recalculate the number of metablocks still need to be reserved */ 1063 spin_lock(&ei->i_block_reservation_lock);
1053 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1064 if (unlikely(used > ei->i_reserved_data_blocks)) {
1054 mdb = ext4_calc_metadata_amount(inode, total); 1065 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1055 1066 "with only %d reserved data blocks\n",
1056 /* figure out how many metablocks to release */ 1067 __func__, inode->i_ino, used,
1057 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1068 ei->i_reserved_data_blocks);
1058 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1069 WARN_ON(1);
1059 1070 used = ei->i_reserved_data_blocks;
1060 if (mdb_free) { 1071 }
1061 /* Account for allocated meta_blocks */ 1072
1062 mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; 1073 /* Update per-inode reservations */
1063 BUG_ON(mdb_free < mdb_claim); 1074 ei->i_reserved_data_blocks -= used;
1064 mdb_free -= mdb_claim; 1075 used += ei->i_allocated_meta_blocks;
1065 1076 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1066 /* update fs dirty blocks counter */ 1077 allocated_meta_blocks = ei->i_allocated_meta_blocks;
1078 ei->i_allocated_meta_blocks = 0;
1079 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1080
1081 if (ei->i_reserved_data_blocks == 0) {
1082 /*
1083 * We can release all of the reserved metadata blocks
1084 * only when we have written all of the delayed
1085 * allocation blocks.
1086 */
1087 mdb_free = ei->i_reserved_meta_blocks;
1088 ei->i_reserved_meta_blocks = 0;
1089 ei->i_da_metadata_calc_len = 0;
1067 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1090 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1068 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1069 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1070 } 1091 }
1071
1072 /* update per-inode reservations */
1073 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1074 EXT4_I(inode)->i_reserved_data_blocks -= used;
1075 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
1076 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1092 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1077 1093
1078 vfs_dq_claim_block(inode, used + mdb_claim); 1094 /* Update quota subsystem */
1079 1095 if (quota_claim) {
1080 /* 1096 vfs_dq_claim_block(inode, used);
1081 * free those over-booking quota for metadata blocks 1097 if (mdb_free)
1082 */ 1098 vfs_dq_release_reservation_block(inode, mdb_free);
1083 if (mdb_free) 1099 } else {
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1100 /*
1101 * We did fallocate with an offset that is already delayed
1102 * allocated. So on delayed allocated writeback we should
1103 * not update the quota for allocated blocks. But then
1104 * converting an fallocate region to initialized region would
1105 * have caused a metadata allocation. So claim quota for
1106 * that
1107 */
1108 if (allocated_meta_blocks)
1109 vfs_dq_claim_block(inode, allocated_meta_blocks);
1110 vfs_dq_release_reservation_block(inode, mdb_free + used);
1111 }
1085 1112
1086 /* 1113 /*
1087 * If we have done all the pending block allocations and if 1114 * If we have done all the pending block allocations and if
1088 * there aren't any writers on the inode, we can discard the 1115 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1116 * inode's preallocations.
1090 */ 1117 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1118 if ((ei->i_reserved_data_blocks == 0) &&
1119 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1120 ext4_discard_preallocations(inode);
1093} 1121}
1094 1122
@@ -1280,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1280 */ 1308 */
1281 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1309 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
1282 } 1310 }
1283 }
1284 1311
1312 /*
1313 * Update reserved blocks/metadata blocks after successful
1314 * block allocation which had been deferred till now. We don't
1315 * support fallocate for non extent files. So we can update
1316 * reserve space here.
1317 */
1318 if ((retval > 0) &&
1319 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
1320 ext4_da_update_reserve_space(inode, retval, 1);
1321 }
1285 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1322 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1286 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1323 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1287 1324
1288 /*
1289 * Update reserved blocks/metadata blocks after successful
1290 * block allocation which had been deferred till now.
1291 */
1292 if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
1293 ext4_da_update_reserve_space(inode, retval);
1294
1295 up_write((&EXT4_I(inode)->i_data_sem)); 1325 up_write((&EXT4_I(inode)->i_data_sem));
1296 if (retval > 0 && buffer_mapped(bh)) { 1326 if (retval > 0 && buffer_mapped(bh)) {
1297 int ret = check_block_validity(inode, "file system " 1327 int ret = check_block_validity(inode, "file system "
@@ -1797,11 +1827,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1827 return ret ? ret : copied;
1798} 1828}
1799 1829
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1830/*
1831 * Reserve a single block located at lblock
1832 */
1833static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1834{
1802 int retries = 0; 1835 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1836 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1837 struct ext4_inode_info *ei = EXT4_I(inode);
1838 unsigned long md_needed, md_reserved;
1805 1839
1806 /* 1840 /*
1807 * recalculate the amount of metadata blocks to reserve 1841 * recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1843,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1843 * worse case is one extent per block
1810 */ 1844 */
1811repeat: 1845repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1846 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1847 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1848 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1849 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1820 1850
1821 /* 1851 /*
1822 * Make quota reservation here to prevent quota overflow 1852 * Make quota reservation here to prevent quota overflow
1823 * later. Real quota accounting is done at pages writeout 1853 * later. Real quota accounting is done at pages writeout
1824 * time. 1854 * time.
1825 */ 1855 */
1826 if (vfs_dq_reserve_block(inode, total)) 1856 if (vfs_dq_reserve_block(inode, md_needed + 1))
1827 return -EDQUOT; 1857 return -EDQUOT;
1828 1858
1829 if (ext4_claim_free_blocks(sbi, total)) { 1859 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1830 vfs_dq_release_reservation_block(inode, total); 1860 vfs_dq_release_reservation_block(inode, md_needed + 1);
1831 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1861 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1832 yield(); 1862 yield();
1833 goto repeat; 1863 goto repeat;
1834 } 1864 }
1835 return -ENOSPC; 1865 return -ENOSPC;
1836 } 1866 }
1837 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1867 spin_lock(&ei->i_block_reservation_lock);
1838 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1868 ei->i_reserved_data_blocks++;
1839 EXT4_I(inode)->i_reserved_meta_blocks += md_needed; 1869 ei->i_reserved_meta_blocks += md_needed;
1840 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1870 spin_unlock(&ei->i_block_reservation_lock);
1841 1871
1842 return 0; /* success */ 1872 return 0; /* success */
1843} 1873}
@@ -1845,49 +1875,46 @@ repeat:
1845static void ext4_da_release_space(struct inode *inode, int to_free) 1875static void ext4_da_release_space(struct inode *inode, int to_free)
1846{ 1876{
1847 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1877 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1848 int total, mdb, mdb_free, release; 1878 struct ext4_inode_info *ei = EXT4_I(inode);
1849 1879
1850 if (!to_free) 1880 if (!to_free)
1851 return; /* Nothing to release, exit */ 1881 return; /* Nothing to release, exit */
1852 1882
1853 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1883 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1854 1884
1855 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1885 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1856 /* 1886 /*
1857 * if there is no reserved blocks, but we try to free some 1887 * if there aren't enough reserved blocks, then the
1858 * then the counter is messed up somewhere. 1888 * counter is messed up somewhere. Since this
1859 * but since this function is called from invalidate 1889 * function is called from invalidate page, it's
1860 * page, it's harmless to return without any action 1890 * harmless to return without any action.
1861 */ 1891 */
1862 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1892 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1863 "blocks for inode %lu, but there is no reserved " 1893 "ino %lu, to_free %d with only %d reserved "
1864 "data blocks\n", to_free, inode->i_ino); 1894 "data blocks\n", inode->i_ino, to_free,
1865 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1895 ei->i_reserved_data_blocks);
1866 return; 1896 WARN_ON(1);
1897 to_free = ei->i_reserved_data_blocks;
1867 } 1898 }
1899 ei->i_reserved_data_blocks -= to_free;
1868 1900
1869 /* recalculate the number of metablocks still need to be reserved */ 1901 if (ei->i_reserved_data_blocks == 0) {
1870 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1902 /*
1871 mdb = ext4_calc_metadata_amount(inode, total); 1903 * We can release all of the reserved metadata blocks
1872 1904 * only when we have written all of the delayed
1873 /* figure out how many metablocks to release */ 1905 * allocation blocks.
1874 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1906 */
1875 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1907 to_free += ei->i_reserved_meta_blocks;
1876 1908 ei->i_reserved_meta_blocks = 0;
1877 release = to_free + mdb_free; 1909 ei->i_da_metadata_calc_len = 0;
1878 1910 }
1879 /* update fs dirty blocks counter for truncate case */
1880 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1881 1911
1882 /* update per-inode reservations */ 1912 /* update fs dirty blocks counter */
1883 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1913 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1884 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1885 1914
1886 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1887 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1888 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1915 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1889 1916
1890 vfs_dq_release_reservation_block(inode, release); 1917 vfs_dq_release_reservation_block(inode, to_free);
1891} 1918}
1892 1919
1893static void ext4_da_page_release_reservation(struct page *page, 1920static void ext4_da_page_release_reservation(struct page *page,
@@ -2192,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2192 * variables are updated after the blocks have been allocated. 2219 * variables are updated after the blocks have been allocated.
2193 */ 2220 */
2194 new.b_state = 0; 2221 new.b_state = 0;
2195 get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | 2222 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
2196 EXT4_GET_BLOCKS_DELALLOC_RESERVE);
2197 if (mpd->b_state & (1 << BH_Delay)) 2223 if (mpd->b_state & (1 << BH_Delay))
2198 get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; 2224 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2225
2199 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, 2226 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
2200 &new, get_blocks_flags); 2227 &new, get_blocks_flags);
2201 if (blks < 0) { 2228 if (blks < 0) {
@@ -2493,7 +2520,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2493 * XXX: __block_prepare_write() unmaps passed block, 2520 * XXX: __block_prepare_write() unmaps passed block,
2494 * is it OK? 2521 * is it OK?
2495 */ 2522 */
2496 ret = ext4_da_reserve_space(inode, 1); 2523 ret = ext4_da_reserve_space(inode, iblock);
2497 if (ret) 2524 if (ret)
2498 /* not enough space to reserve */ 2525 /* not enough space to reserve */
2499 return ret; 2526 return ret;
@@ -2967,8 +2994,7 @@ retry:
2967out_writepages: 2994out_writepages:
2968 if (!no_nrwrite_index_update) 2995 if (!no_nrwrite_index_update)
2969 wbc->no_nrwrite_index_update = 0; 2996 wbc->no_nrwrite_index_update = 0;
2970 if (wbc->nr_to_write > nr_to_writebump) 2997 wbc->nr_to_write -= nr_to_writebump;
2971 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->range_start = range_start; 2998 wbc->range_start = range_start;
2973 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2999 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2974 return ret; 3000 return ret;
@@ -2993,11 +3019,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2993 if (2 * free_blocks < 3 * dirty_blocks || 3019 if (2 * free_blocks < 3 * dirty_blocks ||
2994 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3020 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2995 /* 3021 /*
2996 * free block count is less that 150% of dirty blocks 3022 * free block count is less than 150% of dirty blocks
2997 * or free blocks is less that watermark 3023 * or free blocks is less than watermark
2998 */ 3024 */
2999 return 1; 3025 return 1;
3000 } 3026 }
3027 /*
3028 * Even if we don't switch but are nearing capacity,
3029 * start pushing delalloc when 1/2 of free blocks are dirty.
3030 */
3031 if (free_blocks < 2 * dirty_blocks)
3032 writeback_inodes_sb_if_idle(sb);
3033
3001 return 0; 3034 return 0;
3002} 3035}
3003 3036
@@ -3005,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
3005 loff_t pos, unsigned len, unsigned flags, 3038 loff_t pos, unsigned len, unsigned flags,
3006 struct page **pagep, void **fsdata) 3039 struct page **pagep, void **fsdata)
3007{ 3040{
3008 int ret, retries = 0; 3041 int ret, retries = 0, quota_retries = 0;
3009 struct page *page; 3042 struct page *page;
3010 pgoff_t index; 3043 pgoff_t index;
3011 unsigned from, to; 3044 unsigned from, to;
@@ -3064,6 +3097,22 @@ retry:
3064 3097
3065 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3098 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3066 goto retry; 3099 goto retry;
3100
3101 if ((ret == -EDQUOT) &&
3102 EXT4_I(inode)->i_reserved_meta_blocks &&
3103 (quota_retries++ < 3)) {
3104 /*
3105 * Since we often over-estimate the number of meta
3106 * data blocks required, we may sometimes get a
3107 * spurios out of quota error even though there would
3108 * be enough space once we write the data blocks and
3109 * find out how many meta data blocks were _really_
3110 * required. So try forcing the inode write to see if
3111 * that helps.
3112 */
3113 write_inode_now(inode, (quota_retries == 3));
3114 goto retry;
3115 }
3067out: 3116out:
3068 return ret; 3117 return ret;
3069} 3118}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..436521cae456 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed9aa91f27d..735c20d5fd56 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
702 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
703 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
704 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
705 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
706 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
707#ifdef CONFIG_QUOTA 708#ifdef CONFIG_QUOTA
@@ -2174,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2174 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2175 2176
2176 return snprintf(buf, PAGE_SIZE, "%llu\n", 2177 return snprintf(buf, PAGE_SIZE, "%llu\n",
2177 sbi->s_kbytes_written + 2178 (unsigned long long)(sbi->s_kbytes_written +
2178 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2179 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2180} 2181}
2181 2182
2182static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2183static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4005,6 +4006,7 @@ static inline void unregister_as_ext2(void)
4005{ 4006{
4006 unregister_filesystem(&ext2_fs_type); 4007 unregister_filesystem(&ext2_fs_type);
4007} 4008}
4009MODULE_ALIAS("ext2");
4008#else 4010#else
4009static inline void register_as_ext2(void) { } 4011static inline void register_as_ext2(void) { }
4010static inline void unregister_as_ext2(void) { } 4012static inline void unregister_as_ext2(void) { }
@@ -4031,6 +4033,7 @@ static inline void unregister_as_ext3(void)
4031{ 4033{
4032 unregister_filesystem(&ext3_fs_type); 4034 unregister_filesystem(&ext3_fs_type);
4033} 4035}
4036MODULE_ALIAS("ext3");
4034#else 4037#else
4035static inline void register_as_ext3(void) { } 4038static inline void register_as_ext3(void) { }
4036static inline void unregister_as_ext3(void) { } 4039static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 83218bebbc7c..f3a2f7ed45aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1332,6 +1332,8 @@ retry:
1332 goto cleanup; 1332 goto cleanup;
1333 kfree(b_entry_name); 1333 kfree(b_entry_name);
1334 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1335 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1336 kfree(is); 1338 kfree(is);
1337 kfree(bs); 1339 kfree(bs);