aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/ext4.txt7
-rw-r--r--fs/ext3/dir.c2
-rw-r--r--fs/ext4/balloc.c24
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/ext4.h58
-rw-r--r--fs/ext4/ext4_extents.h6
-rw-r--r--fs/ext4/ext4_jbd2.h2
-rw-r--r--fs/ext4/extents.c296
-rw-r--r--fs/ext4/extents_status.c125
-rw-r--r--fs/ext4/extents_status.h51
-rw-r--r--fs/ext4/ialloc.c90
-rw-r--r--fs/ext4/indirect.c1
-rw-r--r--fs/ext4/inode.c293
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/mballoc.c49
-rw-r--r--fs/ext4/migrate.c4
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/namei.c35
-rw-r--r--fs/ext4/super.c47
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/jbd2/recovery.c24
-rw-r--r--fs/quota/dquot.c46
-rw-r--r--fs/stat.c11
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/quotaops.h15
-rw-r--r--include/trace/events/ext4.h29
-rw-r--r--include/uapi/linux/fiemap.h1
28 files changed, 857 insertions, 379 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index f7cbf574a875..b91cfaaf6a0f 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -144,11 +144,12 @@ journal_async_commit Commit block can be written to disk without waiting
144 mount the device. This will enable 'journal_checksum' 144 mount the device. This will enable 'journal_checksum'
145 internally. 145 internally.
146 146
147journal_path=path
147journal_dev=devnum When the external journal device's major/minor numbers 148journal_dev=devnum When the external journal device's major/minor numbers
148 have changed, this option allows the user to specify 149 have changed, these options allow the user to specify
149 the new journal location. The journal device is 150 the new journal location. The journal device is
150 identified through its new major/minor numbers encoded 151 identified through either its new major/minor numbers
151 in devnum. 152 encoded in devnum, or via a path to the device.
152 153
153norecovery Don't load the journal on mounting. Note that 154norecovery Don't load the journal on mounting. Note that
154noload if the filesystem was not unmounted cleanly, 155noload if the filesystem was not unmounted cleanly,
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index f522425aaa24..bafdd48eefde 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -41,7 +41,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
41 41
42/** 42/**
43 * Check if the given dir-inode refers to an htree-indexed directory 43 * Check if the given dir-inode refers to an htree-indexed directory
44 * (or a directory which chould potentially get coverted to use htree 44 * (or a directory which could potentially get converted to use htree
45 * indexing). 45 * indexing).
46 * 46 *
47 * Return 1 if it is a dx dir, 0 if not 47 * Return 1 if it is a dx dir, 0 if not
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index ddd715e42a5c..dc5d572ebd6a 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -184,6 +184,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
184 struct ext4_sb_info *sbi = EXT4_SB(sb); 184 struct ext4_sb_info *sbi = EXT4_SB(sb);
185 ext4_fsblk_t start, tmp; 185 ext4_fsblk_t start, tmp;
186 int flex_bg = 0; 186 int flex_bg = 0;
187 struct ext4_group_info *grp;
187 188
188 J_ASSERT_BH(bh, buffer_locked(bh)); 189 J_ASSERT_BH(bh, buffer_locked(bh));
189 190
@@ -191,11 +192,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
191 * essentially implementing a per-group read-only flag. */ 192 * essentially implementing a per-group read-only flag. */
192 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 193 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
193 ext4_error(sb, "Checksum bad for group %u", block_group); 194 ext4_error(sb, "Checksum bad for group %u", block_group);
194 ext4_free_group_clusters_set(sb, gdp, 0); 195 grp = ext4_get_group_info(sb, block_group);
195 ext4_free_inodes_set(sb, gdp, 0); 196 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
196 ext4_itable_unused_set(sb, gdp, 0); 197 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
197 memset(bh->b_data, 0xff, sb->s_blocksize);
198 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
199 return; 198 return;
200 } 199 }
201 memset(bh->b_data, 0, sb->s_blocksize); 200 memset(bh->b_data, 0, sb->s_blocksize);
@@ -305,7 +304,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
305 */ 304 */
306static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, 305static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
307 struct ext4_group_desc *desc, 306 struct ext4_group_desc *desc,
308 unsigned int block_group, 307 ext4_group_t block_group,
309 struct buffer_head *bh) 308 struct buffer_head *bh)
310{ 309{
311 ext4_grpblk_t offset; 310 ext4_grpblk_t offset;
@@ -352,10 +351,11 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
352 351
353void ext4_validate_block_bitmap(struct super_block *sb, 352void ext4_validate_block_bitmap(struct super_block *sb,
354 struct ext4_group_desc *desc, 353 struct ext4_group_desc *desc,
355 unsigned int block_group, 354 ext4_group_t block_group,
356 struct buffer_head *bh) 355 struct buffer_head *bh)
357{ 356{
358 ext4_fsblk_t blk; 357 ext4_fsblk_t blk;
358 struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
359 359
360 if (buffer_verified(bh)) 360 if (buffer_verified(bh))
361 return; 361 return;
@@ -366,12 +366,14 @@ void ext4_validate_block_bitmap(struct super_block *sb,
366 ext4_unlock_group(sb, block_group); 366 ext4_unlock_group(sb, block_group);
367 ext4_error(sb, "bg %u: block %llu: invalid block bitmap", 367 ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
368 block_group, blk); 368 block_group, blk);
369 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
369 return; 370 return;
370 } 371 }
371 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, 372 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
372 desc, bh))) { 373 desc, bh))) {
373 ext4_unlock_group(sb, block_group); 374 ext4_unlock_group(sb, block_group);
374 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); 375 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
376 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
375 return; 377 return;
376 } 378 }
377 set_buffer_verified(bh); 379 set_buffer_verified(bh);
@@ -445,7 +447,10 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
445 return bh; 447 return bh;
446verify: 448verify:
447 ext4_validate_block_bitmap(sb, desc, block_group, bh); 449 ext4_validate_block_bitmap(sb, desc, block_group, bh);
448 return bh; 450 if (buffer_verified(bh))
451 return bh;
452 put_bh(bh);
453 return NULL;
449} 454}
450 455
451/* Returns 0 on success, 1 on error */ 456/* Returns 0 on success, 1 on error */
@@ -469,7 +474,8 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
469 clear_buffer_new(bh); 474 clear_buffer_new(bh);
470 /* Panic or remount fs read-only if block bitmap is invalid */ 475 /* Panic or remount fs read-only if block bitmap is invalid */
471 ext4_validate_block_bitmap(sb, desc, block_group, bh); 476 ext4_validate_block_bitmap(sb, desc, block_group, bh);
472 return 0; 477 /* ...but check for error just in case errors=continue. */
478 return !buffer_verified(bh);
473} 479}
474 480
475struct buffer_head * 481struct buffer_head *
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3c7d288ae94c..680bb3388919 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,7 +33,7 @@ static int ext4_dx_readdir(struct file *, struct dir_context *);
33 33
34/** 34/**
35 * Check if the given dir-inode refers to an htree-indexed directory 35 * Check if the given dir-inode refers to an htree-indexed directory
36 * (or a directory which chould potentially get coverted to use htree 36 * (or a directory which could potentially get converted to use htree
37 * indexing). 37 * indexing).
38 * 38 *
39 * Return 1 if it is a dx dir, 0 if not 39 * Return 1 if it is a dx dir, 0 if not
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0ab26fbf3380..06b488dca666 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -561,6 +561,18 @@ enum {
561#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 561#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
562 562
563/* 563/*
564 * The bit position of these flags must not overlap with any of the
565 * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(),
566 * read_extent_tree_block(), ext4_split_extent_at(),
567 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
568 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
569 * caching the extents when reading from the extent tree while a
570 * truncate or punch hole operation is in progress.
571 */
572#define EXT4_EX_NOCACHE 0x0400
573#define EXT4_EX_FORCE_CACHE 0x0800
574
575/*
564 * Flags used by ext4_free_blocks 576 * Flags used by ext4_free_blocks
565 */ 577 */
566#define EXT4_FREE_BLOCKS_METADATA 0x0001 578#define EXT4_FREE_BLOCKS_METADATA 0x0001
@@ -569,6 +581,7 @@ enum {
569#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 581#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
570#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 582#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
571#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 583#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
584#define EXT4_FREE_BLOCKS_RESERVE 0x0040
572 585
573/* 586/*
574 * ioctl commands 587 * ioctl commands
@@ -590,6 +603,7 @@ enum {
590#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) 603#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
591#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) 604#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
592#define EXT4_IOC_SWAP_BOOT _IO('f', 17) 605#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
606#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
593 607
594#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 608#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
595/* 609/*
@@ -1375,6 +1389,7 @@ enum {
1375 nolocking */ 1389 nolocking */
1376 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ 1390 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
1377 EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ 1391 EXT4_STATE_ORDERED_MODE, /* data=ordered mode */
1392 EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
1378}; 1393};
1379 1394
1380#define EXT4_INODE_BIT_FNS(name, field, offset) \ 1395#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1915,7 +1930,7 @@ extern ext4_group_t ext4_get_group_number(struct super_block *sb,
1915 1930
1916extern void ext4_validate_block_bitmap(struct super_block *sb, 1931extern void ext4_validate_block_bitmap(struct super_block *sb,
1917 struct ext4_group_desc *desc, 1932 struct ext4_group_desc *desc,
1918 unsigned int block_group, 1933 ext4_group_t block_group,
1919 struct buffer_head *bh); 1934 struct buffer_head *bh);
1920extern unsigned int ext4_block_group(struct super_block *sb, 1935extern unsigned int ext4_block_group(struct super_block *sb,
1921 ext4_fsblk_t blocknr); 1936 ext4_fsblk_t blocknr);
@@ -2417,16 +2432,32 @@ do { \
2417#define EXT4_FREECLUSTERS_WATERMARK 0 2432#define EXT4_FREECLUSTERS_WATERMARK 0
2418#endif 2433#endif
2419 2434
2435/* Update i_disksize. Requires i_mutex to avoid races with truncate */
2420static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) 2436static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2421{ 2437{
2422 /* 2438 WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
2423 * XXX: replace with spinlock if seen contended -bzzz 2439 !mutex_is_locked(&inode->i_mutex));
2424 */
2425 down_write(&EXT4_I(inode)->i_data_sem); 2440 down_write(&EXT4_I(inode)->i_data_sem);
2426 if (newsize > EXT4_I(inode)->i_disksize) 2441 if (newsize > EXT4_I(inode)->i_disksize)
2427 EXT4_I(inode)->i_disksize = newsize; 2442 EXT4_I(inode)->i_disksize = newsize;
2428 up_write(&EXT4_I(inode)->i_data_sem); 2443 up_write(&EXT4_I(inode)->i_data_sem);
2429 return ; 2444}
2445
2446/*
2447 * Update i_disksize after writeback has been started. Races with truncate
2448 * are avoided by checking i_size under i_data_sem.
2449 */
2450static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
2451{
2452 loff_t i_size;
2453
2454 down_write(&EXT4_I(inode)->i_data_sem);
2455 i_size = i_size_read(inode);
2456 if (newsize > i_size)
2457 newsize = i_size;
2458 if (newsize > EXT4_I(inode)->i_disksize)
2459 EXT4_I(inode)->i_disksize = newsize;
2460 up_write(&EXT4_I(inode)->i_data_sem);
2430} 2461}
2431 2462
2432struct ext4_group_info { 2463struct ext4_group_info {
@@ -2449,9 +2480,15 @@ struct ext4_group_info {
2449 2480
2450#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 2481#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
2451#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 2482#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
2483#define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2
2484#define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3
2452 2485
2453#define EXT4_MB_GRP_NEED_INIT(grp) \ 2486#define EXT4_MB_GRP_NEED_INIT(grp) \
2454 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) 2487 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
2488#define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \
2489 (test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
2490#define EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \
2491 (test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
2455 2492
2456#define EXT4_MB_GRP_WAS_TRIMMED(grp) \ 2493#define EXT4_MB_GRP_WAS_TRIMMED(grp) \
2457 (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) 2494 (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
@@ -2655,6 +2692,12 @@ extern int ext4_check_blockref(const char *, unsigned int,
2655struct ext4_ext_path; 2692struct ext4_ext_path;
2656struct ext4_extent; 2693struct ext4_extent;
2657 2694
2695/*
2696 * Maximum number of logical blocks in a file; ext4_extent's ee_block is
2697 * __le32.
2698 */
2699#define EXT_MAX_BLOCKS 0xffffffff
2700
2658extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 2701extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
2659extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 2702extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
2660extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); 2703extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
@@ -2684,7 +2727,8 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *,
2684 struct ext4_ext_path *, 2727 struct ext4_ext_path *,
2685 struct ext4_extent *, int); 2728 struct ext4_extent *, int);
2686extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 2729extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
2687 struct ext4_ext_path *); 2730 struct ext4_ext_path *,
2731 int flags);
2688extern void ext4_ext_drop_refs(struct ext4_ext_path *); 2732extern void ext4_ext_drop_refs(struct ext4_ext_path *);
2689extern int ext4_ext_check_inode(struct inode *inode); 2733extern int ext4_ext_check_inode(struct inode *inode);
2690extern int ext4_find_delalloc_range(struct inode *inode, 2734extern int ext4_find_delalloc_range(struct inode *inode,
@@ -2693,7 +2737,7 @@ extern int ext4_find_delalloc_range(struct inode *inode,
2693extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); 2737extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2694extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2738extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2695 __u64 start, __u64 len); 2739 __u64 start, __u64 len);
2696 2740extern int ext4_ext_precache(struct inode *inode);
2697 2741
2698/* move_extent.c */ 2742/* move_extent.c */
2699extern void ext4_double_down_write_data_sem(struct inode *first, 2743extern void ext4_double_down_write_data_sem(struct inode *first,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 51bc821ade90..5074fe23f19e 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -134,12 +134,6 @@ struct ext4_ext_path {
134 */ 134 */
135 135
136/* 136/*
137 * Maximum number of logical blocks in a file; ext4_extent's ee_block is
138 * __le32.
139 */
140#define EXT_MAX_BLOCKS 0xffffffff
141
142/*
143 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an 137 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
144 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the 138 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
145 * MSB of ee_len field in the extent datastructure to signify if this 139 * MSB of ee_len field in the extent datastructure to signify if this
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 2877258d9497..81cfefa9dc0c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -197,7 +197,7 @@ static inline void ext4_journal_callback_add(handle_t *handle,
197 * ext4_journal_callback_del: delete a registered callback 197 * ext4_journal_callback_del: delete a registered callback
198 * @handle: active journal transaction handle on which callback was registered 198 * @handle: active journal transaction handle on which callback was registered
199 * @jce: registered journal callback entry to unregister 199 * @jce: registered journal callback entry to unregister
200 * Return true if object was sucessfully removed 200 * Return true if object was successfully removed
201 */ 201 */
202static inline bool ext4_journal_callback_try_del(handle_t *handle, 202static inline bool ext4_journal_callback_try_del(handle_t *handle,
203 struct ext4_journal_cb_entry *jce) 203 struct ext4_journal_cb_entry *jce)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 72ba4705d4fa..54d52afcdb19 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -407,7 +407,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
407 407
408static int __ext4_ext_check(const char *function, unsigned int line, 408static int __ext4_ext_check(const char *function, unsigned int line,
409 struct inode *inode, struct ext4_extent_header *eh, 409 struct inode *inode, struct ext4_extent_header *eh,
410 int depth) 410 int depth, ext4_fsblk_t pblk)
411{ 411{
412 const char *error_msg; 412 const char *error_msg;
413 int max = 0; 413 int max = 0;
@@ -447,42 +447,149 @@ static int __ext4_ext_check(const char *function, unsigned int line,
447 447
448corrupted: 448corrupted:
449 ext4_error_inode(inode, function, line, 0, 449 ext4_error_inode(inode, function, line, 0,
450 "bad header/extent: %s - magic %x, " 450 "pblk %llu bad header/extent: %s - magic %x, "
451 "entries %u, max %u(%u), depth %u(%u)", 451 "entries %u, max %u(%u), depth %u(%u)",
452 error_msg, le16_to_cpu(eh->eh_magic), 452 (unsigned long long) pblk, error_msg,
453 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), 453 le16_to_cpu(eh->eh_magic),
454 max, le16_to_cpu(eh->eh_depth), depth); 454 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
455 455 max, le16_to_cpu(eh->eh_depth), depth);
456 return -EIO; 456 return -EIO;
457} 457}
458 458
459#define ext4_ext_check(inode, eh, depth) \ 459#define ext4_ext_check(inode, eh, depth, pblk) \
460 __ext4_ext_check(__func__, __LINE__, inode, eh, depth) 460 __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk))
461 461
462int ext4_ext_check_inode(struct inode *inode) 462int ext4_ext_check_inode(struct inode *inode)
463{ 463{
464 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); 464 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
465} 465}
466 466
467static int __ext4_ext_check_block(const char *function, unsigned int line, 467static struct buffer_head *
468 struct inode *inode, 468__read_extent_tree_block(const char *function, unsigned int line,
469 struct ext4_extent_header *eh, 469 struct inode *inode, ext4_fsblk_t pblk, int depth,
470 int depth, 470 int flags)
471 struct buffer_head *bh)
472{ 471{
473 int ret; 472 struct buffer_head *bh;
473 int err;
474 474
475 if (buffer_verified(bh)) 475 bh = sb_getblk(inode->i_sb, pblk);
476 return 0; 476 if (unlikely(!bh))
477 ret = ext4_ext_check(inode, eh, depth); 477 return ERR_PTR(-ENOMEM);
478 if (ret) 478
479 return ret; 479 if (!bh_uptodate_or_lock(bh)) {
480 trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
481 err = bh_submit_read(bh);
482 if (err < 0)
483 goto errout;
484 }
485 if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
486 return bh;
487 err = __ext4_ext_check(function, line, inode,
488 ext_block_hdr(bh), depth, pblk);
489 if (err)
490 goto errout;
480 set_buffer_verified(bh); 491 set_buffer_verified(bh);
481 return ret; 492 /*
493 * If this is a leaf block, cache all of its entries
494 */
495 if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
496 struct ext4_extent_header *eh = ext_block_hdr(bh);
497 struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
498 ext4_lblk_t prev = 0;
499 int i;
500
501 for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
502 unsigned int status = EXTENT_STATUS_WRITTEN;
503 ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
504 int len = ext4_ext_get_actual_len(ex);
505
506 if (prev && (prev != lblk))
507 ext4_es_cache_extent(inode, prev,
508 lblk - prev, ~0,
509 EXTENT_STATUS_HOLE);
510
511 if (ext4_ext_is_uninitialized(ex))
512 status = EXTENT_STATUS_UNWRITTEN;
513 ext4_es_cache_extent(inode, lblk, len,
514 ext4_ext_pblock(ex), status);
515 prev = lblk + len;
516 }
517 }
518 return bh;
519errout:
520 put_bh(bh);
521 return ERR_PTR(err);
522
482} 523}
483 524
484#define ext4_ext_check_block(inode, eh, depth, bh) \ 525#define read_extent_tree_block(inode, pblk, depth, flags) \
485 __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh) 526 __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \
527 (depth), (flags))
528
529/*
530 * This function is called to cache a file's extent information in the
531 * extent status tree
532 */
533int ext4_ext_precache(struct inode *inode)
534{
535 struct ext4_inode_info *ei = EXT4_I(inode);
536 struct ext4_ext_path *path = NULL;
537 struct buffer_head *bh;
538 int i = 0, depth, ret = 0;
539
540 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
541 return 0; /* not an extent-mapped inode */
542
543 down_read(&ei->i_data_sem);
544 depth = ext_depth(inode);
545
546 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
547 GFP_NOFS);
548 if (path == NULL) {
549 up_read(&ei->i_data_sem);
550 return -ENOMEM;
551 }
552
553 /* Don't cache anything if there are no external extent blocks */
554 if (depth == 0)
555 goto out;
556 path[0].p_hdr = ext_inode_hdr(inode);
557 ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
558 if (ret)
559 goto out;
560 path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
561 while (i >= 0) {
562 /*
563 * If this is a leaf block or we've reached the end of
564 * the index block, go up
565 */
566 if ((i == depth) ||
567 path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
568 brelse(path[i].p_bh);
569 path[i].p_bh = NULL;
570 i--;
571 continue;
572 }
573 bh = read_extent_tree_block(inode,
574 ext4_idx_pblock(path[i].p_idx++),
575 depth - i - 1,
576 EXT4_EX_FORCE_CACHE);
577 if (IS_ERR(bh)) {
578 ret = PTR_ERR(bh);
579 break;
580 }
581 i++;
582 path[i].p_bh = bh;
583 path[i].p_hdr = ext_block_hdr(bh);
584 path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
585 }
586 ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
587out:
588 up_read(&ei->i_data_sem);
589 ext4_ext_drop_refs(path);
590 kfree(path);
591 return ret;
592}
486 593
487#ifdef EXT_DEBUG 594#ifdef EXT_DEBUG
488static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 595static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -716,7 +823,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
716 823
717struct ext4_ext_path * 824struct ext4_ext_path *
718ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, 825ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
719 struct ext4_ext_path *path) 826 struct ext4_ext_path *path, int flags)
720{ 827{
721 struct ext4_extent_header *eh; 828 struct ext4_extent_header *eh;
722 struct buffer_head *bh; 829 struct buffer_head *bh;
@@ -748,20 +855,13 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
748 path[ppos].p_depth = i; 855 path[ppos].p_depth = i;
749 path[ppos].p_ext = NULL; 856 path[ppos].p_ext = NULL;
750 857
751 bh = sb_getblk(inode->i_sb, path[ppos].p_block); 858 bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
752 if (unlikely(!bh)) { 859 flags);
753 ret = -ENOMEM; 860 if (IS_ERR(bh)) {
861 ret = PTR_ERR(bh);
754 goto err; 862 goto err;
755 } 863 }
756 if (!bh_uptodate_or_lock(bh)) { 864
757 trace_ext4_ext_load_extent(inode, block,
758 path[ppos].p_block);
759 ret = bh_submit_read(bh);
760 if (ret < 0) {
761 put_bh(bh);
762 goto err;
763 }
764 }
765 eh = ext_block_hdr(bh); 865 eh = ext_block_hdr(bh);
766 ppos++; 866 ppos++;
767 if (unlikely(ppos > depth)) { 867 if (unlikely(ppos > depth)) {
@@ -773,11 +873,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
773 } 873 }
774 path[ppos].p_bh = bh; 874 path[ppos].p_bh = bh;
775 path[ppos].p_hdr = eh; 875 path[ppos].p_hdr = eh;
776 i--;
777
778 ret = ext4_ext_check_block(inode, eh, i, bh);
779 if (ret < 0)
780 goto err;
781 } 876 }
782 877
783 path[ppos].p_depth = i; 878 path[ppos].p_depth = i;
@@ -1198,7 +1293,8 @@ out:
1198 * if no free index is found, then it requests in-depth growing. 1293 * if no free index is found, then it requests in-depth growing.
1199 */ 1294 */
1200static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1295static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1201 unsigned int flags, 1296 unsigned int mb_flags,
1297 unsigned int gb_flags,
1202 struct ext4_ext_path *path, 1298 struct ext4_ext_path *path,
1203 struct ext4_extent *newext) 1299 struct ext4_extent *newext)
1204{ 1300{
@@ -1220,7 +1316,7 @@ repeat:
1220 if (EXT_HAS_FREE_INDEX(curp)) { 1316 if (EXT_HAS_FREE_INDEX(curp)) {
1221 /* if we found index with free entry, then use that 1317 /* if we found index with free entry, then use that
1222 * entry: create all needed subtree and add new leaf */ 1318 * entry: create all needed subtree and add new leaf */
1223 err = ext4_ext_split(handle, inode, flags, path, newext, i); 1319 err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
1224 if (err) 1320 if (err)
1225 goto out; 1321 goto out;
1226 1322
@@ -1228,12 +1324,12 @@ repeat:
1228 ext4_ext_drop_refs(path); 1324 ext4_ext_drop_refs(path);
1229 path = ext4_ext_find_extent(inode, 1325 path = ext4_ext_find_extent(inode,
1230 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1326 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1231 path); 1327 path, gb_flags);
1232 if (IS_ERR(path)) 1328 if (IS_ERR(path))
1233 err = PTR_ERR(path); 1329 err = PTR_ERR(path);
1234 } else { 1330 } else {
1235 /* tree is full, time to grow in depth */ 1331 /* tree is full, time to grow in depth */
1236 err = ext4_ext_grow_indepth(handle, inode, flags, newext); 1332 err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
1237 if (err) 1333 if (err)
1238 goto out; 1334 goto out;
1239 1335
@@ -1241,7 +1337,7 @@ repeat:
1241 ext4_ext_drop_refs(path); 1337 ext4_ext_drop_refs(path);
1242 path = ext4_ext_find_extent(inode, 1338 path = ext4_ext_find_extent(inode,
1243 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1339 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1244 path); 1340 path, gb_flags);
1245 if (IS_ERR(path)) { 1341 if (IS_ERR(path)) {
1246 err = PTR_ERR(path); 1342 err = PTR_ERR(path);
1247 goto out; 1343 goto out;
@@ -1412,29 +1508,21 @@ got_index:
1412 ix++; 1508 ix++;
1413 block = ext4_idx_pblock(ix); 1509 block = ext4_idx_pblock(ix);
1414 while (++depth < path->p_depth) { 1510 while (++depth < path->p_depth) {
1415 bh = sb_bread(inode->i_sb, block);
1416 if (bh == NULL)
1417 return -EIO;
1418 eh = ext_block_hdr(bh);
1419 /* subtract from p_depth to get proper eh_depth */ 1511 /* subtract from p_depth to get proper eh_depth */
1420 if (ext4_ext_check_block(inode, eh, 1512 bh = read_extent_tree_block(inode, block,
1421 path->p_depth - depth, bh)) { 1513 path->p_depth - depth, 0);
1422 put_bh(bh); 1514 if (IS_ERR(bh))
1423 return -EIO; 1515 return PTR_ERR(bh);
1424 } 1516 eh = ext_block_hdr(bh);
1425 ix = EXT_FIRST_INDEX(eh); 1517 ix = EXT_FIRST_INDEX(eh);
1426 block = ext4_idx_pblock(ix); 1518 block = ext4_idx_pblock(ix);
1427 put_bh(bh); 1519 put_bh(bh);
1428 } 1520 }
1429 1521
1430 bh = sb_bread(inode->i_sb, block); 1522 bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
1431 if (bh == NULL) 1523 if (IS_ERR(bh))
1432 return -EIO; 1524 return PTR_ERR(bh);
1433 eh = ext_block_hdr(bh); 1525 eh = ext_block_hdr(bh);
1434 if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) {
1435 put_bh(bh);
1436 return -EIO;
1437 }
1438 ex = EXT_FIRST_EXTENT(eh); 1526 ex = EXT_FIRST_EXTENT(eh);
1439found_extent: 1527found_extent:
1440 *logical = le32_to_cpu(ex->ee_block); 1528 *logical = le32_to_cpu(ex->ee_block);
@@ -1705,7 +1793,8 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
1705 1793
1706 brelse(path[1].p_bh); 1794 brelse(path[1].p_bh);
1707 ext4_free_blocks(handle, inode, NULL, blk, 1, 1795 ext4_free_blocks(handle, inode, NULL, blk, 1,
1708 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 1796 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET |
1797 EXT4_FREE_BLOCKS_RESERVE);
1709} 1798}
1710 1799
1711/* 1800/*
@@ -1793,7 +1882,7 @@ out:
1793 */ 1882 */
1794int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1883int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1795 struct ext4_ext_path *path, 1884 struct ext4_ext_path *path,
1796 struct ext4_extent *newext, int flag) 1885 struct ext4_extent *newext, int gb_flags)
1797{ 1886{
1798 struct ext4_extent_header *eh; 1887 struct ext4_extent_header *eh;
1799 struct ext4_extent *ex, *fex; 1888 struct ext4_extent *ex, *fex;
@@ -1802,7 +1891,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1802 int depth, len, err; 1891 int depth, len, err;
1803 ext4_lblk_t next; 1892 ext4_lblk_t next;
1804 unsigned uninitialized = 0; 1893 unsigned uninitialized = 0;
1805 int flags = 0; 1894 int mb_flags = 0;
1806 1895
1807 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1896 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1808 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1897 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1817,7 +1906,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1817 } 1906 }
1818 1907
1819 /* try to insert block into found extent and return */ 1908 /* try to insert block into found extent and return */
1820 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) { 1909 if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1821 1910
1822 /* 1911 /*
1823 * Try to see whether we should rather test the extent on 1912 * Try to see whether we should rather test the extent on
@@ -1920,7 +2009,7 @@ prepend:
1920 if (next != EXT_MAX_BLOCKS) { 2009 if (next != EXT_MAX_BLOCKS) {
1921 ext_debug("next leaf block - %u\n", next); 2010 ext_debug("next leaf block - %u\n", next);
1922 BUG_ON(npath != NULL); 2011 BUG_ON(npath != NULL);
1923 npath = ext4_ext_find_extent(inode, next, NULL); 2012 npath = ext4_ext_find_extent(inode, next, NULL, 0);
1924 if (IS_ERR(npath)) 2013 if (IS_ERR(npath))
1925 return PTR_ERR(npath); 2014 return PTR_ERR(npath);
1926 BUG_ON(npath->p_depth != path->p_depth); 2015 BUG_ON(npath->p_depth != path->p_depth);
@@ -1939,9 +2028,10 @@ prepend:
1939 * There is no free space in the found leaf. 2028 * There is no free space in the found leaf.
1940 * We're gonna add a new leaf in the tree. 2029 * We're gonna add a new leaf in the tree.
1941 */ 2030 */
1942 if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL) 2031 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
1943 flags = EXT4_MB_USE_RESERVED; 2032 mb_flags = EXT4_MB_USE_RESERVED;
1944 err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); 2033 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2034 path, newext);
1945 if (err) 2035 if (err)
1946 goto cleanup; 2036 goto cleanup;
1947 depth = ext_depth(inode); 2037 depth = ext_depth(inode);
@@ -2007,7 +2097,7 @@ has_space:
2007 2097
2008merge: 2098merge:
2009 /* try to merge extents */ 2099 /* try to merge extents */
2010 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 2100 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
2011 ext4_ext_try_to_merge(handle, inode, path, nearex); 2101 ext4_ext_try_to_merge(handle, inode, path, nearex);
2012 2102
2013 2103
@@ -2050,7 +2140,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2050 path = NULL; 2140 path = NULL;
2051 } 2141 }
2052 2142
2053 path = ext4_ext_find_extent(inode, block, path); 2143 path = ext4_ext_find_extent(inode, block, path, 0);
2054 if (IS_ERR(path)) { 2144 if (IS_ERR(path)) {
2055 up_read(&EXT4_I(inode)->i_data_sem); 2145 up_read(&EXT4_I(inode)->i_data_sem);
2056 err = PTR_ERR(path); 2146 err = PTR_ERR(path);
@@ -2195,8 +2285,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2195 ext4_lblk_t block) 2285 ext4_lblk_t block)
2196{ 2286{
2197 int depth = ext_depth(inode); 2287 int depth = ext_depth(inode);
2198 unsigned long len; 2288 unsigned long len = 0;
2199 ext4_lblk_t lblock; 2289 ext4_lblk_t lblock = 0;
2200 struct ext4_extent *ex; 2290 struct ext4_extent *ex;
2201 2291
2202 ex = path[depth].p_ext; 2292 ex = path[depth].p_ext;
@@ -2233,7 +2323,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2233 ext4_es_insert_extent(inode, lblock, len, ~0, 2323 ext4_es_insert_extent(inode, lblock, len, ~0,
2234 EXTENT_STATUS_HOLE); 2324 EXTENT_STATUS_HOLE);
2235 } else { 2325 } else {
2236 lblock = len = 0;
2237 BUG(); 2326 BUG();
2238 } 2327 }
2239 2328
@@ -2712,7 +2801,7 @@ again:
2712 ext4_lblk_t ee_block; 2801 ext4_lblk_t ee_block;
2713 2802
2714 /* find extent for this block */ 2803 /* find extent for this block */
2715 path = ext4_ext_find_extent(inode, end, NULL); 2804 path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
2716 if (IS_ERR(path)) { 2805 if (IS_ERR(path)) {
2717 ext4_journal_stop(handle); 2806 ext4_journal_stop(handle);
2718 return PTR_ERR(path); 2807 return PTR_ERR(path);
@@ -2754,6 +2843,7 @@ again:
2754 */ 2843 */
2755 err = ext4_split_extent_at(handle, inode, path, 2844 err = ext4_split_extent_at(handle, inode, path,
2756 end + 1, split_flag, 2845 end + 1, split_flag,
2846 EXT4_EX_NOCACHE |
2757 EXT4_GET_BLOCKS_PRE_IO | 2847 EXT4_GET_BLOCKS_PRE_IO |
2758 EXT4_GET_BLOCKS_METADATA_NOFAIL); 2848 EXT4_GET_BLOCKS_METADATA_NOFAIL);
2759 2849
@@ -2782,7 +2872,7 @@ again:
2782 path[0].p_hdr = ext_inode_hdr(inode); 2872 path[0].p_hdr = ext_inode_hdr(inode);
2783 i = 0; 2873 i = 0;
2784 2874
2785 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2875 if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
2786 err = -EIO; 2876 err = -EIO;
2787 goto out; 2877 goto out;
2788 } 2878 }
@@ -2829,10 +2919,12 @@ again:
2829 ext_debug("move to level %d (block %llu)\n", 2919 ext_debug("move to level %d (block %llu)\n",
2830 i + 1, ext4_idx_pblock(path[i].p_idx)); 2920 i + 1, ext4_idx_pblock(path[i].p_idx));
2831 memset(path + i + 1, 0, sizeof(*path)); 2921 memset(path + i + 1, 0, sizeof(*path));
2832 bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); 2922 bh = read_extent_tree_block(inode,
2833 if (!bh) { 2923 ext4_idx_pblock(path[i].p_idx), depth - i - 1,
2924 EXT4_EX_NOCACHE);
2925 if (IS_ERR(bh)) {
2834 /* should we reset i_size? */ 2926 /* should we reset i_size? */
2835 err = -EIO; 2927 err = PTR_ERR(bh);
2836 break; 2928 break;
2837 } 2929 }
2838 /* Yield here to deal with large extent trees. 2930 /* Yield here to deal with large extent trees.
@@ -2842,11 +2934,6 @@ again:
2842 err = -EIO; 2934 err = -EIO;
2843 break; 2935 break;
2844 } 2936 }
2845 if (ext4_ext_check_block(inode, ext_block_hdr(bh),
2846 depth - i - 1, bh)) {
2847 err = -EIO;
2848 break;
2849 }
2850 path[i + 1].p_bh = bh; 2937 path[i + 1].p_bh = bh;
2851 2938
2852 /* save actual number of indexes since this 2939 /* save actual number of indexes since this
@@ -2961,6 +3048,23 @@ void ext4_ext_release(struct super_block *sb)
2961#endif 3048#endif
2962} 3049}
2963 3050
3051static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3052{
3053 ext4_lblk_t ee_block;
3054 ext4_fsblk_t ee_pblock;
3055 unsigned int ee_len;
3056
3057 ee_block = le32_to_cpu(ex->ee_block);
3058 ee_len = ext4_ext_get_actual_len(ex);
3059 ee_pblock = ext4_ext_pblock(ex);
3060
3061 if (ee_len == 0)
3062 return 0;
3063
3064 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3065 EXTENT_STATUS_WRITTEN);
3066}
3067
2964/* FIXME!! we need to try to merge to left or right after zero-out */ 3068/* FIXME!! we need to try to merge to left or right after zero-out */
2965static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 3069static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2966{ 3070{
@@ -3113,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,
3113 goto fix_extent_len; 3217 goto fix_extent_len;
3114 3218
3115 /* update extent status tree */ 3219 /* update extent status tree */
3116 err = ext4_es_zeroout(inode, &zero_ex); 3220 err = ext4_zeroout_es(inode, &zero_ex);
3117 3221
3118 goto out; 3222 goto out;
3119 } else if (err) 3223 } else if (err)
@@ -3133,7 +3237,7 @@ fix_extent_len:
3133 * ext4_split_extents() splits an extent and mark extent which is covered 3237 * ext4_split_extents() splits an extent and mark extent which is covered
3134 * by @map as split_flags indicates 3238 * by @map as split_flags indicates
3135 * 3239 *
3136 * It may result in splitting the extent into multiple extents (upto three) 3240 * It may result in splitting the extent into multiple extents (up to three)
3137 * There are three possibilities: 3241 * There are three possibilities:
3138 * a> There is no split required 3242 * a> There is no split required
3139 * b> Splits in two extents: Split is happening at either end of the extent 3243 * b> Splits in two extents: Split is happening at either end of the extent
@@ -3181,7 +3285,7 @@ static int ext4_split_extent(handle_t *handle,
3181 * result in split of original leaf or extent zeroout. 3285 * result in split of original leaf or extent zeroout.
3182 */ 3286 */
3183 ext4_ext_drop_refs(path); 3287 ext4_ext_drop_refs(path);
3184 path = ext4_ext_find_extent(inode, map->m_lblk, path); 3288 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3185 if (IS_ERR(path)) 3289 if (IS_ERR(path))
3186 return PTR_ERR(path); 3290 return PTR_ERR(path);
3187 depth = ext_depth(inode); 3291 depth = ext_depth(inode);
@@ -3464,7 +3568,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3464out: 3568out:
3465 /* If we have gotten a failure, don't zero out status tree */ 3569 /* If we have gotten a failure, don't zero out status tree */
3466 if (!err) 3570 if (!err)
3467 err = ext4_es_zeroout(inode, &zero_ex); 3571 err = ext4_zeroout_es(inode, &zero_ex);
3468 return err ? err : allocated; 3572 return err ? err : allocated;
3469} 3573}
3470 3574
@@ -3565,7 +3669,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3565 if (err < 0) 3669 if (err < 0)
3566 goto out; 3670 goto out;
3567 ext4_ext_drop_refs(path); 3671 ext4_ext_drop_refs(path);
3568 path = ext4_ext_find_extent(inode, map->m_lblk, path); 3672 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3569 if (IS_ERR(path)) { 3673 if (IS_ERR(path)) {
3570 err = PTR_ERR(path); 3674 err = PTR_ERR(path);
3571 goto out; 3675 goto out;
@@ -4052,7 +4156,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4052 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 4156 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4053 4157
4054 /* find extent for this block */ 4158 /* find extent for this block */
4055 path = ext4_ext_find_extent(inode, map->m_lblk, NULL); 4159 path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
4056 if (IS_ERR(path)) { 4160 if (IS_ERR(path)) {
4057 err = PTR_ERR(path); 4161 err = PTR_ERR(path);
4058 path = NULL; 4162 path = NULL;
@@ -4744,6 +4848,12 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4744 return error; 4848 return error;
4745 } 4849 }
4746 4850
4851 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4852 error = ext4_ext_precache(inode);
4853 if (error)
4854 return error;
4855 }
4856
4747 /* fallback to generic here if not in extents fmt */ 4857 /* fallback to generic here if not in extents fmt */
4748 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4858 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4749 return generic_block_fiemap(inode, fieinfo, start, len, 4859 return generic_block_fiemap(inode, fieinfo, start, len,
@@ -4771,6 +4881,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4771 error = ext4_fill_fiemap_extents(inode, start_blk, 4881 error = ext4_fill_fiemap_extents(inode, start_blk,
4772 len_blks, fieinfo); 4882 len_blks, fieinfo);
4773 } 4883 }
4774 4884 ext4_es_lru_add(inode);
4775 return error; 4885 return error;
4776} 4886}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 91cb110da1b4..2d1bdbe78c04 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -13,7 +13,6 @@
13#include <linux/list_sort.h> 13#include <linux/list_sort.h>
14#include "ext4.h" 14#include "ext4.h"
15#include "extents_status.h" 15#include "extents_status.h"
16#include "ext4_extents.h"
17 16
18#include <trace/events/ext4.h> 17#include <trace/events/ext4.h>
19 18
@@ -263,7 +262,7 @@ void ext4_es_find_delayed_extent_range(struct inode *inode,
263 if (tree->cache_es) { 262 if (tree->cache_es) {
264 es1 = tree->cache_es; 263 es1 = tree->cache_es;
265 if (in_range(lblk, es1->es_lblk, es1->es_len)) { 264 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
266 es_debug("%u cached by [%u/%u) %llu %llx\n", 265 es_debug("%u cached by [%u/%u) %llu %x\n",
267 lblk, es1->es_lblk, es1->es_len, 266 lblk, es1->es_lblk, es1->es_len,
268 ext4_es_pblock(es1), ext4_es_status(es1)); 267 ext4_es_pblock(es1), ext4_es_status(es1));
269 goto out; 268 goto out;
@@ -409,6 +408,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
409} 408}
410 409
411#ifdef ES_AGGRESSIVE_TEST 410#ifdef ES_AGGRESSIVE_TEST
411#include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */
412
412static void ext4_es_insert_extent_ext_check(struct inode *inode, 413static void ext4_es_insert_extent_ext_check(struct inode *inode,
413 struct extent_status *es) 414 struct extent_status *es)
414{ 415{
@@ -419,7 +420,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
419 unsigned short ee_len; 420 unsigned short ee_len;
420 int depth, ee_status, es_status; 421 int depth, ee_status, es_status;
421 422
422 path = ext4_ext_find_extent(inode, es->es_lblk, NULL); 423 path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
423 if (IS_ERR(path)) 424 if (IS_ERR(path))
424 return; 425 return;
425 426
@@ -641,13 +642,13 @@ out:
641 */ 642 */
642int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 643int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
643 ext4_lblk_t len, ext4_fsblk_t pblk, 644 ext4_lblk_t len, ext4_fsblk_t pblk,
644 unsigned long long status) 645 unsigned int status)
645{ 646{
646 struct extent_status newes; 647 struct extent_status newes;
647 ext4_lblk_t end = lblk + len - 1; 648 ext4_lblk_t end = lblk + len - 1;
648 int err = 0; 649 int err = 0;
649 650
650 es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n", 651 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
651 lblk, len, pblk, status, inode->i_ino); 652 lblk, len, pblk, status, inode->i_ino);
652 653
653 if (!len) 654 if (!len)
@@ -684,6 +685,38 @@ error:
684} 685}
685 686
686/* 687/*
688 * ext4_es_cache_extent() inserts information into the extent status
689 * tree if and only if there isn't information about the range in
690 * question already.
691 */
692void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
693 ext4_lblk_t len, ext4_fsblk_t pblk,
694 unsigned int status)
695{
696 struct extent_status *es;
697 struct extent_status newes;
698 ext4_lblk_t end = lblk + len - 1;
699
700 newes.es_lblk = lblk;
701 newes.es_len = len;
702 ext4_es_store_pblock(&newes, pblk);
703 ext4_es_store_status(&newes, status);
704 trace_ext4_es_cache_extent(inode, &newes);
705
706 if (!len)
707 return;
708
709 BUG_ON(end < lblk);
710
711 write_lock(&EXT4_I(inode)->i_es_lock);
712
713 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
714 if (!es || es->es_lblk > end)
715 __es_insert_extent(inode, &newes);
716 write_unlock(&EXT4_I(inode)->i_es_lock);
717}
718
719/*
687 * ext4_es_lookup_extent() looks up an extent in extent status tree. 720 * ext4_es_lookup_extent() looks up an extent in extent status tree.
688 * 721 *
689 * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. 722 * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
@@ -871,23 +904,6 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
871 return err; 904 return err;
872} 905}
873 906
874int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
875{
876 ext4_lblk_t ee_block;
877 ext4_fsblk_t ee_pblock;
878 unsigned int ee_len;
879
880 ee_block = le32_to_cpu(ex->ee_block);
881 ee_len = ext4_ext_get_actual_len(ex);
882 ee_pblock = ext4_ext_pblock(ex);
883
884 if (ee_len == 0)
885 return 0;
886
887 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
888 EXTENT_STATUS_WRITTEN);
889}
890
891static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, 907static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
892 struct list_head *b) 908 struct list_head *b)
893{ 909{
@@ -895,6 +911,12 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
895 eia = list_entry(a, struct ext4_inode_info, i_es_lru); 911 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
896 eib = list_entry(b, struct ext4_inode_info, i_es_lru); 912 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
897 913
914 if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
915 !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
916 return 1;
917 if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
918 ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
919 return -1;
898 if (eia->i_touch_when == eib->i_touch_when) 920 if (eia->i_touch_when == eib->i_touch_when)
899 return 0; 921 return 0;
900 if (time_after(eia->i_touch_when, eib->i_touch_when)) 922 if (time_after(eia->i_touch_when, eib->i_touch_when))
@@ -908,21 +930,13 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
908{ 930{
909 struct ext4_inode_info *ei; 931 struct ext4_inode_info *ei;
910 struct list_head *cur, *tmp; 932 struct list_head *cur, *tmp;
911 LIST_HEAD(skiped); 933 LIST_HEAD(skipped);
912 int ret, nr_shrunk = 0; 934 int ret, nr_shrunk = 0;
935 int retried = 0, skip_precached = 1, nr_skipped = 0;
913 936
914 spin_lock(&sbi->s_es_lru_lock); 937 spin_lock(&sbi->s_es_lru_lock);
915 938
916 /* 939retry:
917 * If the inode that is at the head of LRU list is newer than
918 * last_sorted time, that means that we need to sort this list.
919 */
920 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru);
921 if (sbi->s_es_last_sorted < ei->i_touch_when) {
922 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
923 sbi->s_es_last_sorted = jiffies;
924 }
925
926 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { 940 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
927 /* 941 /*
928 * If we have already reclaimed all extents from extent 942 * If we have already reclaimed all extents from extent
@@ -933,9 +947,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
933 947
934 ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 948 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
935 949
936 /* Skip the inode that is newer than the last_sorted time */ 950 /*
937 if (sbi->s_es_last_sorted < ei->i_touch_when) { 951 * Skip the inode that is newer than the last_sorted
938 list_move_tail(cur, &skiped); 952 * time. Normally we try hard to avoid shrinking
953 * precached inodes, but we will as a last resort.
954 */
955 if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
956 (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
957 EXT4_STATE_EXT_PRECACHED))) {
958 nr_skipped++;
959 list_move_tail(cur, &skipped);
939 continue; 960 continue;
940 } 961 }
941 962
@@ -955,11 +976,33 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
955 } 976 }
956 977
957 /* Move the newer inodes into the tail of the LRU list. */ 978 /* Move the newer inodes into the tail of the LRU list. */
958 list_splice_tail(&skiped, &sbi->s_es_lru); 979 list_splice_tail(&skipped, &sbi->s_es_lru);
980 INIT_LIST_HEAD(&skipped);
981
982 /*
983 * If we skipped any inodes, and we weren't able to make any
984 * forward progress, sort the list and try again.
985 */
986 if ((nr_shrunk == 0) && nr_skipped && !retried) {
987 retried++;
988 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
989 sbi->s_es_last_sorted = jiffies;
990 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
991 i_es_lru);
992 /*
993 * If there are no non-precached inodes left on the
994 * list, start releasing precached extents.
995 */
996 if (ext4_test_inode_state(&ei->vfs_inode,
997 EXT4_STATE_EXT_PRECACHED))
998 skip_precached = 0;
999 goto retry;
1000 }
1001
959 spin_unlock(&sbi->s_es_lru_lock); 1002 spin_unlock(&sbi->s_es_lru_lock);
960 1003
961 if (locked_ei && nr_shrunk == 0) 1004 if (locked_ei && nr_shrunk == 0)
962 nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); 1005 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
963 1006
964 return nr_shrunk; 1007 return nr_shrunk;
965} 1008}
@@ -1034,10 +1077,16 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1034 struct rb_node *node; 1077 struct rb_node *node;
1035 struct extent_status *es; 1078 struct extent_status *es;
1036 int nr_shrunk = 0; 1079 int nr_shrunk = 0;
1080 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1081 DEFAULT_RATELIMIT_BURST);
1037 1082
1038 if (ei->i_es_lru_nr == 0) 1083 if (ei->i_es_lru_nr == 0)
1039 return 0; 1084 return 0;
1040 1085
1086 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1087 __ratelimit(&_rs))
1088 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1089
1041 node = rb_first(&tree->root); 1090 node = rb_first(&tree->root);
1042 while (node != NULL) { 1091 while (node != NULL) {
1043 es = rb_entry(node, struct extent_status, rb_node); 1092 es = rb_entry(node, struct extent_status, rb_node);
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index e936730cc5b0..167f4ab8ecc3 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -29,16 +29,26 @@
29/* 29/*
30 * These flags live in the high bits of extent_status.es_pblk 30 * These flags live in the high bits of extent_status.es_pblk
31 */ 31 */
32#define EXTENT_STATUS_WRITTEN (1ULL << 63) 32#define ES_SHIFT 60
33#define EXTENT_STATUS_UNWRITTEN (1ULL << 62) 33
34#define EXTENT_STATUS_DELAYED (1ULL << 61) 34#define EXTENT_STATUS_WRITTEN (1 << 3)
35#define EXTENT_STATUS_HOLE (1ULL << 60) 35#define EXTENT_STATUS_UNWRITTEN (1 << 2)
36#define EXTENT_STATUS_DELAYED (1 << 1)
37#define EXTENT_STATUS_HOLE (1 << 0)
36 38
37#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ 39#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
38 EXTENT_STATUS_UNWRITTEN | \ 40 EXTENT_STATUS_UNWRITTEN | \
39 EXTENT_STATUS_DELAYED | \ 41 EXTENT_STATUS_DELAYED | \
40 EXTENT_STATUS_HOLE) 42 EXTENT_STATUS_HOLE)
41 43
44#define ES_WRITTEN (1ULL << 63)
45#define ES_UNWRITTEN (1ULL << 62)
46#define ES_DELAYED (1ULL << 61)
47#define ES_HOLE (1ULL << 60)
48
49#define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \
50 ES_DELAYED | ES_HOLE)
51
42struct ext4_sb_info; 52struct ext4_sb_info;
43struct ext4_extent; 53struct ext4_extent;
44 54
@@ -60,7 +70,10 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
60 70
61extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 71extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
62 ext4_lblk_t len, ext4_fsblk_t pblk, 72 ext4_lblk_t len, ext4_fsblk_t pblk,
63 unsigned long long status); 73 unsigned int status);
74extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
75 ext4_lblk_t len, ext4_fsblk_t pblk,
76 unsigned int status);
64extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 77extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
65 ext4_lblk_t len); 78 ext4_lblk_t len);
66extern void ext4_es_find_delayed_extent_range(struct inode *inode, 79extern void ext4_es_find_delayed_extent_range(struct inode *inode,
@@ -68,36 +81,35 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode,
68 struct extent_status *es); 81 struct extent_status *es);
69extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 82extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
70 struct extent_status *es); 83 struct extent_status *es);
71extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
72 84
73static inline int ext4_es_is_written(struct extent_status *es) 85static inline int ext4_es_is_written(struct extent_status *es)
74{ 86{
75 return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0; 87 return (es->es_pblk & ES_WRITTEN) != 0;
76} 88}
77 89
78static inline int ext4_es_is_unwritten(struct extent_status *es) 90static inline int ext4_es_is_unwritten(struct extent_status *es)
79{ 91{
80 return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0; 92 return (es->es_pblk & ES_UNWRITTEN) != 0;
81} 93}
82 94
83static inline int ext4_es_is_delayed(struct extent_status *es) 95static inline int ext4_es_is_delayed(struct extent_status *es)
84{ 96{
85 return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0; 97 return (es->es_pblk & ES_DELAYED) != 0;
86} 98}
87 99
88static inline int ext4_es_is_hole(struct extent_status *es) 100static inline int ext4_es_is_hole(struct extent_status *es)
89{ 101{
90 return (es->es_pblk & EXTENT_STATUS_HOLE) != 0; 102 return (es->es_pblk & ES_HOLE) != 0;
91} 103}
92 104
93static inline ext4_fsblk_t ext4_es_status(struct extent_status *es) 105static inline unsigned int ext4_es_status(struct extent_status *es)
94{ 106{
95 return (es->es_pblk & EXTENT_STATUS_FLAGS); 107 return es->es_pblk >> ES_SHIFT;
96} 108}
97 109
98static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) 110static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
99{ 111{
100 return (es->es_pblk & ~EXTENT_STATUS_FLAGS); 112 return es->es_pblk & ~ES_MASK;
101} 113}
102 114
103static inline void ext4_es_store_pblock(struct extent_status *es, 115static inline void ext4_es_store_pblock(struct extent_status *es,
@@ -105,19 +117,16 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
105{ 117{
106 ext4_fsblk_t block; 118 ext4_fsblk_t block;
107 119
108 block = (pb & ~EXTENT_STATUS_FLAGS) | 120 block = (pb & ~ES_MASK) | (es->es_pblk & ES_MASK);
109 (es->es_pblk & EXTENT_STATUS_FLAGS);
110 es->es_pblk = block; 121 es->es_pblk = block;
111} 122}
112 123
113static inline void ext4_es_store_status(struct extent_status *es, 124static inline void ext4_es_store_status(struct extent_status *es,
114 unsigned long long status) 125 unsigned int status)
115{ 126{
116 ext4_fsblk_t block; 127 es->es_pblk = (((ext4_fsblk_t)
117 128 (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
118 block = (status & EXTENT_STATUS_FLAGS) | 129 (es->es_pblk & ~ES_MASK));
119 (es->es_pblk & ~EXTENT_STATUS_FLAGS);
120 es->es_pblk = block;
121} 130}
122 131
123extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); 132extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 8bf5999875ee..137193ff389b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -70,18 +70,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
70 ext4_group_t block_group, 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp) 71 struct ext4_group_desc *gdp)
72{ 72{
73 struct ext4_group_info *grp;
73 J_ASSERT_BH(bh, buffer_locked(bh)); 74 J_ASSERT_BH(bh, buffer_locked(bh));
74 75
75 /* If checksum is bad mark all blocks and inodes use to prevent 76 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */ 77 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 78 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
78 ext4_error(sb, "Checksum bad for group %u", block_group); 79 ext4_error(sb, "Checksum bad for group %u", block_group);
79 ext4_free_group_clusters_set(sb, gdp, 0); 80 grp = ext4_get_group_info(sb, block_group);
80 ext4_free_inodes_set(sb, gdp, 0); 81 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
81 ext4_itable_unused_set(sb, gdp, 0); 82 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
82 memset(bh->b_data, 0xff, sb->s_blocksize);
83 ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
84 EXT4_INODES_PER_GROUP(sb) / 8);
85 return 0; 83 return 0;
86 } 84 }
87 85
@@ -117,6 +115,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
117 struct ext4_group_desc *desc; 115 struct ext4_group_desc *desc;
118 struct buffer_head *bh = NULL; 116 struct buffer_head *bh = NULL;
119 ext4_fsblk_t bitmap_blk; 117 ext4_fsblk_t bitmap_blk;
118 struct ext4_group_info *grp;
120 119
121 desc = ext4_get_group_desc(sb, block_group, NULL); 120 desc = ext4_get_group_desc(sb, block_group, NULL);
122 if (!desc) 121 if (!desc)
@@ -185,6 +184,8 @@ verify:
185 put_bh(bh); 184 put_bh(bh);
186 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " 185 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
187 "inode_bitmap = %llu", block_group, bitmap_blk); 186 "inode_bitmap = %llu", block_group, bitmap_blk);
187 grp = ext4_get_group_info(sb, block_group);
188 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
188 return NULL; 189 return NULL;
189 } 190 }
190 ext4_unlock_group(sb, block_group); 191 ext4_unlock_group(sb, block_group);
@@ -221,6 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
221 struct ext4_super_block *es; 222 struct ext4_super_block *es;
222 struct ext4_sb_info *sbi; 223 struct ext4_sb_info *sbi;
223 int fatal = 0, err, count, cleared; 224 int fatal = 0, err, count, cleared;
225 struct ext4_group_info *grp;
224 226
225 if (!sb) { 227 if (!sb) {
226 printk(KERN_ERR "EXT4-fs: %s:%d: inode on " 228 printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
@@ -266,7 +268,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
266 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 268 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
267 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 269 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
268 bitmap_bh = ext4_read_inode_bitmap(sb, block_group); 270 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
269 if (!bitmap_bh) 271 /* Don't bother if the inode bitmap is corrupt. */
272 grp = ext4_get_group_info(sb, block_group);
273 if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh)
270 goto error_return; 274 goto error_return;
271 275
272 BUFFER_TRACE(bitmap_bh, "get_write_access"); 276 BUFFER_TRACE(bitmap_bh, "get_write_access");
@@ -315,8 +319,10 @@ out:
315 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 319 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
316 if (!fatal) 320 if (!fatal)
317 fatal = err; 321 fatal = err;
318 } else 322 } else {
319 ext4_error(sb, "bit already cleared for inode %lu", ino); 323 ext4_error(sb, "bit already cleared for inode %lu", ino);
324 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
325 }
320 326
321error_return: 327error_return:
322 brelse(bitmap_bh); 328 brelse(bitmap_bh);
@@ -625,6 +631,51 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
625} 631}
626 632
627/* 633/*
634 * In no journal mode, if an inode has recently been deleted, we want
635 * to avoid reusing it until we're reasonably sure the inode table
636 * block has been written back to disk. (Yes, these values are
637 * somewhat arbitrary...)
638 */
639#define RECENTCY_MIN 5
640#define RECENTCY_DIRTY 30
641
642static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
643{
644 struct ext4_group_desc *gdp;
645 struct ext4_inode *raw_inode;
646 struct buffer_head *bh;
647 unsigned long dtime, now;
648 int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
649 int offset, ret = 0, recentcy = RECENTCY_MIN;
650
651 gdp = ext4_get_group_desc(sb, group, NULL);
652 if (unlikely(!gdp))
653 return 0;
654
655 bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
656 (ino / inodes_per_block));
657 if (unlikely(!bh) || !buffer_uptodate(bh))
658 /*
659 * If the block is not in the buffer cache, then it
660 * must have been written out.
661 */
662 goto out;
663
664 offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
665 raw_inode = (struct ext4_inode *) (bh->b_data + offset);
666 dtime = le32_to_cpu(raw_inode->i_dtime);
667 now = get_seconds();
668 if (buffer_dirty(bh))
669 recentcy += RECENTCY_DIRTY;
670
671 if (dtime && (dtime < now) && (now < dtime + recentcy))
672 ret = 1;
673out:
674 brelse(bh);
675 return ret;
676}
677
678/*
628 * There are two policies for allocating an inode. If the new inode is 679 * There are two policies for allocating an inode. If the new inode is
629 * a directory, then a forward search is made for a block group with both 680 * a directory, then a forward search is made for a block group with both
630 * free space and a low directory-to-inode ratio; if that fails, then of 681 * free space and a low directory-to-inode ratio; if that fails, then of
@@ -652,6 +703,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
652 struct inode *ret; 703 struct inode *ret;
653 ext4_group_t i; 704 ext4_group_t i;
654 ext4_group_t flex_group; 705 ext4_group_t flex_group;
706 struct ext4_group_info *grp;
655 707
656 /* Cannot create files in a deleted directory */ 708 /* Cannot create files in a deleted directory */
657 if (!dir || !dir->i_nlink) 709 if (!dir || !dir->i_nlink)
@@ -725,10 +777,22 @@ got_group:
725 continue; 777 continue;
726 } 778 }
727 779
780 grp = ext4_get_group_info(sb, group);
781 /* Skip groups with already-known suspicious inode tables */
782 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
783 if (++group == ngroups)
784 group = 0;
785 continue;
786 }
787
728 brelse(inode_bitmap_bh); 788 brelse(inode_bitmap_bh);
729 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); 789 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
730 if (!inode_bitmap_bh) 790 /* Skip groups with suspicious inode tables */
731 goto out; 791 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) {
792 if (++group == ngroups)
793 group = 0;
794 continue;
795 }
732 796
733repeat_in_this_group: 797repeat_in_this_group:
734 ino = ext4_find_next_zero_bit((unsigned long *) 798 ino = ext4_find_next_zero_bit((unsigned long *)
@@ -741,6 +805,11 @@ repeat_in_this_group:
741 "inode=%lu", ino + 1); 805 "inode=%lu", ino + 1);
742 continue; 806 continue;
743 } 807 }
808 if ((EXT4_SB(sb)->s_journal == NULL) &&
809 recently_deleted(sb, group, ino)) {
810 ino++;
811 goto next_inode;
812 }
744 if (!handle) { 813 if (!handle) {
745 BUG_ON(nblocks <= 0); 814 BUG_ON(nblocks <= 0);
746 handle = __ext4_journal_start_sb(dir->i_sb, line_no, 815 handle = __ext4_journal_start_sb(dir->i_sb, line_no,
@@ -764,6 +833,7 @@ repeat_in_this_group:
764 ino++; /* the inode bitmap is zero-based */ 833 ino++; /* the inode bitmap is zero-based */
765 if (!ret2) 834 if (!ret2)
766 goto got; /* we grabbed the inode! */ 835 goto got; /* we grabbed the inode! */
836next_inode:
767 if (ino < EXT4_INODES_PER_GROUP(sb)) 837 if (ino < EXT4_INODES_PER_GROUP(sb))
768 goto repeat_in_this_group; 838 goto repeat_in_this_group;
769next_group: 839next_group:
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 87b30cd357e7..594009f5f523 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -23,7 +23,6 @@
23#include <linux/aio.h> 23#include <linux/aio.h>
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "truncate.h" 25#include "truncate.h"
26#include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */
27 26
28#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
29 28
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2ca04e67a4f..9115f2807515 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -553,7 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
553 } 553 }
554 if (retval > 0) { 554 if (retval > 0) {
555 int ret; 555 int ret;
556 unsigned long long status; 556 unsigned int status;
557 557
558 if (unlikely(retval != map->m_len)) { 558 if (unlikely(retval != map->m_len)) {
559 ext4_warning(inode->i_sb, 559 ext4_warning(inode->i_sb,
@@ -653,7 +653,7 @@ found:
653 653
654 if (retval > 0) { 654 if (retval > 0) {
655 int ret; 655 int ret;
656 unsigned long long status; 656 unsigned int status;
657 657
658 if (unlikely(retval != map->m_len)) { 658 if (unlikely(retval != map->m_len)) {
659 ext4_warning(inode->i_sb, 659 ext4_warning(inode->i_sb,
@@ -969,7 +969,8 @@ retry_journal:
969 ext4_journal_stop(handle); 969 ext4_journal_stop(handle);
970 goto retry_grab; 970 goto retry_grab;
971 } 971 }
972 wait_on_page_writeback(page); 972 /* In case writeback began while the page was unlocked */
973 wait_for_stable_page(page);
973 974
974 if (ext4_should_dioread_nolock(inode)) 975 if (ext4_should_dioread_nolock(inode))
975 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 976 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -1633,7 +1634,7 @@ add_delayed:
1633 set_buffer_delay(bh); 1634 set_buffer_delay(bh);
1634 } else if (retval > 0) { 1635 } else if (retval > 0) {
1635 int ret; 1636 int ret;
1636 unsigned long long status; 1637 unsigned int status;
1637 1638
1638 if (unlikely(retval != map->m_len)) { 1639 if (unlikely(retval != map->m_len)) {
1639 ext4_warning(inode->i_sb, 1640 ext4_warning(inode->i_sb,
@@ -1890,12 +1891,32 @@ static int ext4_writepage(struct page *page,
1890 return ret; 1891 return ret;
1891} 1892}
1892 1893
1894static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
1895{
1896 int len;
1897 loff_t size = i_size_read(mpd->inode);
1898 int err;
1899
1900 BUG_ON(page->index != mpd->first_page);
1901 if (page->index == size >> PAGE_CACHE_SHIFT)
1902 len = size & ~PAGE_CACHE_MASK;
1903 else
1904 len = PAGE_CACHE_SIZE;
1905 clear_page_dirty_for_io(page);
1906 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1907 if (!err)
1908 mpd->wbc->nr_to_write--;
1909 mpd->first_page++;
1910
1911 return err;
1912}
1913
1893#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) 1914#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
1894 1915
1895/* 1916/*
1896 * mballoc gives us at most this number of blocks... 1917 * mballoc gives us at most this number of blocks...
1897 * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). 1918 * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
1898 * The rest of mballoc seems to handle chunks upto full group size. 1919 * The rest of mballoc seems to handle chunks up to full group size.
1899 */ 1920 */
1900#define MAX_WRITEPAGES_EXTENT_LEN 2048 1921#define MAX_WRITEPAGES_EXTENT_LEN 2048
1901 1922
@@ -1904,82 +1925,94 @@ static int ext4_writepage(struct page *page,
1904 * 1925 *
1905 * @mpd - extent of blocks 1926 * @mpd - extent of blocks
1906 * @lblk - logical number of the block in the file 1927 * @lblk - logical number of the block in the file
1907 * @b_state - b_state of the buffer head added 1928 * @bh - buffer head we want to add to the extent
1908 * 1929 *
1909 * the function is used to collect contig. blocks in same state 1930 * The function is used to collect contig. blocks in the same state. If the
1931 * buffer doesn't require mapping for writeback and we haven't started the
1932 * extent of buffers to map yet, the function returns 'true' immediately - the
1933 * caller can write the buffer right away. Otherwise the function returns true
1934 * if the block has been added to the extent, false if the block couldn't be
1935 * added.
1910 */ 1936 */
1911static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, 1937static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
1912 unsigned long b_state) 1938 struct buffer_head *bh)
1913{ 1939{
1914 struct ext4_map_blocks *map = &mpd->map; 1940 struct ext4_map_blocks *map = &mpd->map;
1915 1941
1916 /* Don't go larger than mballoc is willing to allocate */ 1942 /* Buffer that doesn't need mapping for writeback? */
1917 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) 1943 if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
1918 return 0; 1944 (!buffer_delay(bh) && !buffer_unwritten(bh))) {
1945 /* So far no extent to map => we write the buffer right away */
1946 if (map->m_len == 0)
1947 return true;
1948 return false;
1949 }
1919 1950
1920 /* First block in the extent? */ 1951 /* First block in the extent? */
1921 if (map->m_len == 0) { 1952 if (map->m_len == 0) {
1922 map->m_lblk = lblk; 1953 map->m_lblk = lblk;
1923 map->m_len = 1; 1954 map->m_len = 1;
1924 map->m_flags = b_state & BH_FLAGS; 1955 map->m_flags = bh->b_state & BH_FLAGS;
1925 return 1; 1956 return true;
1926 } 1957 }
1927 1958
1959 /* Don't go larger than mballoc is willing to allocate */
1960 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
1961 return false;
1962
1928 /* Can we merge the block to our big extent? */ 1963 /* Can we merge the block to our big extent? */
1929 if (lblk == map->m_lblk + map->m_len && 1964 if (lblk == map->m_lblk + map->m_len &&
1930 (b_state & BH_FLAGS) == map->m_flags) { 1965 (bh->b_state & BH_FLAGS) == map->m_flags) {
1931 map->m_len++; 1966 map->m_len++;
1932 return 1; 1967 return true;
1933 } 1968 }
1934 return 0; 1969 return false;
1935} 1970}
1936 1971
1937static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, 1972/*
1938 struct buffer_head *head, 1973 * mpage_process_page_bufs - submit page buffers for IO or add them to extent
1939 struct buffer_head *bh, 1974 *
1940 ext4_lblk_t lblk) 1975 * @mpd - extent of blocks for mapping
1976 * @head - the first buffer in the page
1977 * @bh - buffer we should start processing from
1978 * @lblk - logical number of the block in the file corresponding to @bh
1979 *
1980 * Walk through page buffers from @bh upto @head (exclusive) and either submit
1981 * the page for IO if all buffers in this page were mapped and there's no
1982 * accumulated extent of buffers to map or add buffers in the page to the
1983 * extent of buffers to map. The function returns 1 if the caller can continue
1984 * by processing the next page, 0 if it should stop adding buffers to the
1985 * extent to map because we cannot extend it anymore. It can also return value
1986 * < 0 in case of error during IO submission.
1987 */
1988static int mpage_process_page_bufs(struct mpage_da_data *mpd,
1989 struct buffer_head *head,
1990 struct buffer_head *bh,
1991 ext4_lblk_t lblk)
1941{ 1992{
1942 struct inode *inode = mpd->inode; 1993 struct inode *inode = mpd->inode;
1994 int err;
1943 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) 1995 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
1944 >> inode->i_blkbits; 1996 >> inode->i_blkbits;
1945 1997
1946 do { 1998 do {
1947 BUG_ON(buffer_locked(bh)); 1999 BUG_ON(buffer_locked(bh));
1948 2000
1949 if (!buffer_dirty(bh) || !buffer_mapped(bh) || 2001 if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
1950 (!buffer_delay(bh) && !buffer_unwritten(bh)) ||
1951 lblk >= blocks) {
1952 /* Found extent to map? */ 2002 /* Found extent to map? */
1953 if (mpd->map.m_len) 2003 if (mpd->map.m_len)
1954 return false; 2004 return 0;
1955 if (lblk >= blocks) 2005 /* Everything mapped so far and we hit EOF */
1956 return true; 2006 break;
1957 continue;
1958 } 2007 }
1959 if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state))
1960 return false;
1961 } while (lblk++, (bh = bh->b_this_page) != head); 2008 } while (lblk++, (bh = bh->b_this_page) != head);
1962 return true; 2009 /* So far everything mapped? Submit the page for IO. */
1963} 2010 if (mpd->map.m_len == 0) {
1964 2011 err = mpage_submit_page(mpd, head->b_page);
1965static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) 2012 if (err < 0)
1966{ 2013 return err;
1967 int len; 2014 }
1968 loff_t size = i_size_read(mpd->inode); 2015 return lblk < blocks;
1969 int err;
1970
1971 BUG_ON(page->index != mpd->first_page);
1972 if (page->index == size >> PAGE_CACHE_SHIFT)
1973 len = size & ~PAGE_CACHE_MASK;
1974 else
1975 len = PAGE_CACHE_SIZE;
1976 clear_page_dirty_for_io(page);
1977 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1978 if (!err)
1979 mpd->wbc->nr_to_write--;
1980 mpd->first_page++;
1981
1982 return err;
1983} 2016}
1984 2017
1985/* 2018/*
@@ -2003,8 +2036,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2003 struct inode *inode = mpd->inode; 2036 struct inode *inode = mpd->inode;
2004 struct buffer_head *head, *bh; 2037 struct buffer_head *head, *bh;
2005 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; 2038 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
2006 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
2007 >> inode->i_blkbits;
2008 pgoff_t start, end; 2039 pgoff_t start, end;
2009 ext4_lblk_t lblk; 2040 ext4_lblk_t lblk;
2010 sector_t pblock; 2041 sector_t pblock;
@@ -2026,7 +2057,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2026 2057
2027 if (page->index > end) 2058 if (page->index > end)
2028 break; 2059 break;
2029 /* Upto 'end' pages must be contiguous */ 2060 /* Up to 'end' pages must be contiguous */
2030 BUG_ON(page->index != start); 2061 BUG_ON(page->index != start);
2031 bh = head = page_buffers(page); 2062 bh = head = page_buffers(page);
2032 do { 2063 do {
@@ -2039,18 +2070,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2039 */ 2070 */
2040 mpd->map.m_len = 0; 2071 mpd->map.m_len = 0;
2041 mpd->map.m_flags = 0; 2072 mpd->map.m_flags = 0;
2042 add_page_bufs_to_extent(mpd, head, bh, 2073 /*
2043 lblk); 2074 * FIXME: If dioread_nolock supports
2075 * blocksize < pagesize, we need to make
2076 * sure we add size mapped so far to
2077 * io_end->size as the following call
2078 * can submit the page for IO.
2079 */
2080 err = mpage_process_page_bufs(mpd, head,
2081 bh, lblk);
2044 pagevec_release(&pvec); 2082 pagevec_release(&pvec);
2045 return 0; 2083 if (err > 0)
2084 err = 0;
2085 return err;
2046 } 2086 }
2047 if (buffer_delay(bh)) { 2087 if (buffer_delay(bh)) {
2048 clear_buffer_delay(bh); 2088 clear_buffer_delay(bh);
2049 bh->b_blocknr = pblock++; 2089 bh->b_blocknr = pblock++;
2050 } 2090 }
2051 clear_buffer_unwritten(bh); 2091 clear_buffer_unwritten(bh);
2052 } while (++lblk < blocks && 2092 } while (lblk++, (bh = bh->b_this_page) != head);
2053 (bh = bh->b_this_page) != head);
2054 2093
2055 /* 2094 /*
2056 * FIXME: This is going to break if dioread_nolock 2095 * FIXME: This is going to break if dioread_nolock
@@ -2199,12 +2238,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2199 2238
2200 /* Update on-disk size after IO is submitted */ 2239 /* Update on-disk size after IO is submitted */
2201 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2240 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2202 if (disksize > i_size_read(inode))
2203 disksize = i_size_read(inode);
2204 if (disksize > EXT4_I(inode)->i_disksize) { 2241 if (disksize > EXT4_I(inode)->i_disksize) {
2205 int err2; 2242 int err2;
2206 2243
2207 ext4_update_i_disksize(inode, disksize); 2244 ext4_wb_update_i_disksize(inode, disksize);
2208 err2 = ext4_mark_inode_dirty(handle, inode); 2245 err2 = ext4_mark_inode_dirty(handle, inode);
2209 if (err2) 2246 if (err2)
2210 ext4_error(inode->i_sb, 2247 ext4_error(inode->i_sb,
@@ -2219,7 +2256,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2219/* 2256/*
2220 * Calculate the total number of credits to reserve for one writepages 2257 * Calculate the total number of credits to reserve for one writepages
2221 * iteration. This is called from ext4_writepages(). We map an extent of 2258 * iteration. This is called from ext4_writepages(). We map an extent of
2222 * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping 2259 * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
2223 * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + 2260 * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
2224 * bpp - 1 blocks in bpp different extents. 2261 * bpp - 1 blocks in bpp different extents.
2225 */ 2262 */
@@ -2319,14 +2356,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2319 lblk = ((ext4_lblk_t)page->index) << 2356 lblk = ((ext4_lblk_t)page->index) <<
2320 (PAGE_CACHE_SHIFT - blkbits); 2357 (PAGE_CACHE_SHIFT - blkbits);
2321 head = page_buffers(page); 2358 head = page_buffers(page);
2322 if (!add_page_bufs_to_extent(mpd, head, head, lblk)) 2359 err = mpage_process_page_bufs(mpd, head, head, lblk);
2360 if (err <= 0)
2323 goto out; 2361 goto out;
2324 /* So far everything mapped? Submit the page for IO. */ 2362 err = 0;
2325 if (mpd->map.m_len == 0) {
2326 err = mpage_submit_page(mpd, page);
2327 if (err < 0)
2328 goto out;
2329 }
2330 2363
2331 /* 2364 /*
2332 * Accumulated enough dirty pages? This doesn't apply 2365 * Accumulated enough dirty pages? This doesn't apply
@@ -2410,7 +2443,7 @@ static int ext4_writepages(struct address_space *mapping,
2410 2443
2411 if (ext4_should_dioread_nolock(inode)) { 2444 if (ext4_should_dioread_nolock(inode)) {
2412 /* 2445 /*
2413 * We may need to convert upto one extent per block in 2446 * We may need to convert up to one extent per block in
2414 * the page and we may dirty the inode. 2447 * the page and we may dirty the inode.
2415 */ 2448 */
2416 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); 2449 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
@@ -2646,7 +2679,7 @@ retry_journal:
2646 goto retry_grab; 2679 goto retry_grab;
2647 } 2680 }
2648 /* In case writeback began while the page was unlocked */ 2681 /* In case writeback began while the page was unlocked */
2649 wait_on_page_writeback(page); 2682 wait_for_stable_page(page);
2650 2683
2651 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2684 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2652 if (ret < 0) { 2685 if (ret < 0) {
@@ -4566,7 +4599,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4566 ext4_journal_stop(handle); 4599 ext4_journal_stop(handle);
4567 } 4600 }
4568 4601
4569 if (attr->ia_valid & ATTR_SIZE) { 4602 if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
4603 handle_t *handle;
4604 loff_t oldsize = inode->i_size;
4570 4605
4571 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 4606 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4572 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4607 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4574,73 +4609,69 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4574 if (attr->ia_size > sbi->s_bitmap_maxbytes) 4609 if (attr->ia_size > sbi->s_bitmap_maxbytes)
4575 return -EFBIG; 4610 return -EFBIG;
4576 } 4611 }
4577 } 4612 if (S_ISREG(inode->i_mode) &&
4578 4613 (attr->ia_size < inode->i_size)) {
4579 if (S_ISREG(inode->i_mode) && 4614 if (ext4_should_order_data(inode)) {
4580 attr->ia_valid & ATTR_SIZE && 4615 error = ext4_begin_ordered_truncate(inode,
4581 (attr->ia_size < inode->i_size)) {
4582 handle_t *handle;
4583
4584 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4585 if (IS_ERR(handle)) {
4586 error = PTR_ERR(handle);
4587 goto err_out;
4588 }
4589 if (ext4_handle_valid(handle)) {
4590 error = ext4_orphan_add(handle, inode);
4591 orphan = 1;
4592 }
4593 EXT4_I(inode)->i_disksize = attr->ia_size;
4594 rc = ext4_mark_inode_dirty(handle, inode);
4595 if (!error)
4596 error = rc;
4597 ext4_journal_stop(handle);
4598
4599 if (ext4_should_order_data(inode)) {
4600 error = ext4_begin_ordered_truncate(inode,
4601 attr->ia_size); 4616 attr->ia_size);
4602 if (error) { 4617 if (error)
4603 /* Do as much error cleanup as possible */
4604 handle = ext4_journal_start(inode,
4605 EXT4_HT_INODE, 3);
4606 if (IS_ERR(handle)) {
4607 ext4_orphan_del(NULL, inode);
4608 goto err_out; 4618 goto err_out;
4609 } 4619 }
4610 ext4_orphan_del(handle, inode); 4620 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4611 orphan = 0; 4621 if (IS_ERR(handle)) {
4612 ext4_journal_stop(handle); 4622 error = PTR_ERR(handle);
4613 goto err_out; 4623 goto err_out;
4614 } 4624 }
4615 } 4625 if (ext4_handle_valid(handle)) {
4616 } 4626 error = ext4_orphan_add(handle, inode);
4617 4627 orphan = 1;
4618 if (attr->ia_valid & ATTR_SIZE) {
4619 if (attr->ia_size != inode->i_size) {
4620 loff_t oldsize = inode->i_size;
4621
4622 i_size_write(inode, attr->ia_size);
4623 /*
4624 * Blocks are going to be removed from the inode. Wait
4625 * for dio in flight. Temporarily disable
4626 * dioread_nolock to prevent livelock.
4627 */
4628 if (orphan) {
4629 if (!ext4_should_journal_data(inode)) {
4630 ext4_inode_block_unlocked_dio(inode);
4631 inode_dio_wait(inode);
4632 ext4_inode_resume_unlocked_dio(inode);
4633 } else
4634 ext4_wait_for_tail_page_commit(inode);
4635 } 4628 }
4629 down_write(&EXT4_I(inode)->i_data_sem);
4630 EXT4_I(inode)->i_disksize = attr->ia_size;
4631 rc = ext4_mark_inode_dirty(handle, inode);
4632 if (!error)
4633 error = rc;
4636 /* 4634 /*
4637 * Truncate pagecache after we've waited for commit 4635 * We have to update i_size under i_data_sem together
4638 * in data=journal mode to make pages freeable. 4636 * with i_disksize to avoid races with writeback code
4637 * running ext4_wb_update_i_disksize().
4639 */ 4638 */
4640 truncate_pagecache(inode, oldsize, inode->i_size); 4639 if (!error)
4640 i_size_write(inode, attr->ia_size);
4641 up_write(&EXT4_I(inode)->i_data_sem);
4642 ext4_journal_stop(handle);
4643 if (error) {
4644 ext4_orphan_del(NULL, inode);
4645 goto err_out;
4646 }
4647 } else
4648 i_size_write(inode, attr->ia_size);
4649
4650 /*
4651 * Blocks are going to be removed from the inode. Wait
4652 * for dio in flight. Temporarily disable
4653 * dioread_nolock to prevent livelock.
4654 */
4655 if (orphan) {
4656 if (!ext4_should_journal_data(inode)) {
4657 ext4_inode_block_unlocked_dio(inode);
4658 inode_dio_wait(inode);
4659 ext4_inode_resume_unlocked_dio(inode);
4660 } else
4661 ext4_wait_for_tail_page_commit(inode);
4641 } 4662 }
4642 ext4_truncate(inode); 4663 /*
4664 * Truncate pagecache after we've waited for commit
4665 * in data=journal mode to make pages freeable.
4666 */
4667 truncate_pagecache(inode, oldsize, inode->i_size);
4643 } 4668 }
4669 /*
4670 * We want to call ext4_truncate() even if attr->ia_size ==
4671 * inode->i_size for cases like truncation of fallocated space
4672 */
4673 if (attr->ia_valid & ATTR_SIZE)
4674 ext4_truncate(inode);
4644 4675
4645 if (!rc) { 4676 if (!rc) {
4646 setattr_copy(inode, attr); 4677 setattr_copy(inode, attr);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c0427e2f6648..a569d335f804 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -17,7 +17,6 @@
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h" 18#include "ext4_jbd2.h"
19#include "ext4.h" 19#include "ext4.h"
20#include "ext4_extents.h"
21 20
22#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) 21#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
23 22
@@ -624,6 +623,8 @@ resizefs_out:
624 623
625 return 0; 624 return 0;
626 } 625 }
626 case EXT4_IOC_PRECACHE_EXTENTS:
627 return ext4_ext_precache(inode);
627 628
628 default: 629 default:
629 return -ENOTTY; 630 return -ENOTTY;
@@ -688,6 +689,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
688 case EXT4_IOC_MOVE_EXT: 689 case EXT4_IOC_MOVE_EXT:
689 case FITRIM: 690 case FITRIM:
690 case EXT4_IOC_RESIZE_FS: 691 case EXT4_IOC_RESIZE_FS:
692 case EXT4_IOC_PRECACHE_EXTENTS:
691 break; 693 break;
692 default: 694 default:
693 return -ENOIOCTLCMD; 695 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4bbbf13bd743..a41e3ba8cfaa 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -751,13 +751,15 @@ void ext4_mb_generate_buddy(struct super_block *sb,
751 751
752 if (free != grp->bb_free) { 752 if (free != grp->bb_free) {
753 ext4_grp_locked_error(sb, group, 0, 0, 753 ext4_grp_locked_error(sb, group, 0, 0,
754 "%u clusters in bitmap, %u in gd", 754 "%u clusters in bitmap, %u in gd; "
755 "block bitmap corrupt.",
755 free, grp->bb_free); 756 free, grp->bb_free);
756 /* 757 /*
757 * If we intent to continue, we consider group descritor 758 * If we intend to continue, we consider group descriptor
758 * corrupt and update bb_free using bitmap value 759 * corrupt and update bb_free using bitmap value
759 */ 760 */
760 grp->bb_free = free; 761 grp->bb_free = free;
762 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
761 } 763 }
762 mb_set_largest_free_order(sb, grp); 764 mb_set_largest_free_order(sb, grp);
763 765
@@ -1398,6 +1400,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1398 1400
1399 BUG_ON(last >= (sb->s_blocksize << 3)); 1401 BUG_ON(last >= (sb->s_blocksize << 3));
1400 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); 1402 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1403 /* Don't bother if the block group is corrupt. */
1404 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1405 return;
1406
1401 mb_check_buddy(e4b); 1407 mb_check_buddy(e4b);
1402 mb_free_blocks_double(inode, e4b, first, count); 1408 mb_free_blocks_double(inode, e4b, first, count);
1403 1409
@@ -1423,7 +1429,11 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1423 inode ? inode->i_ino : 0, 1429 inode ? inode->i_ino : 0,
1424 blocknr, 1430 blocknr,
1425 "freeing already freed block " 1431 "freeing already freed block "
1426 "(bit %u)", block); 1432 "(bit %u); block bitmap corrupt.",
1433 block);
1434 /* Mark the block group as corrupt. */
1435 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1436 &e4b->bd_info->bb_state);
1427 mb_regenerate_buddy(e4b); 1437 mb_regenerate_buddy(e4b);
1428 goto done; 1438 goto done;
1429 } 1439 }
@@ -1790,6 +1800,11 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1790 if (err) 1800 if (err)
1791 return err; 1801 return err;
1792 1802
1803 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1804 ext4_mb_unload_buddy(e4b);
1805 return 0;
1806 }
1807
1793 ext4_lock_group(ac->ac_sb, group); 1808 ext4_lock_group(ac->ac_sb, group);
1794 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, 1809 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1795 ac->ac_g_ex.fe_len, &ex); 1810 ac->ac_g_ex.fe_len, &ex);
@@ -1987,6 +2002,9 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1987 if (cr <= 2 && free < ac->ac_g_ex.fe_len) 2002 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
1988 return 0; 2003 return 0;
1989 2004
2005 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2006 return 0;
2007
1990 /* We only do this if the grp has never been initialized */ 2008 /* We only do this if the grp has never been initialized */
1991 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { 2009 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1992 int ret = ext4_mb_init_group(ac->ac_sb, group); 2010 int ret = ext4_mb_init_group(ac->ac_sb, group);
@@ -4585,6 +4603,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4585 struct buffer_head *gd_bh; 4603 struct buffer_head *gd_bh;
4586 ext4_group_t block_group; 4604 ext4_group_t block_group;
4587 struct ext4_sb_info *sbi; 4605 struct ext4_sb_info *sbi;
4606 struct ext4_inode_info *ei = EXT4_I(inode);
4588 struct ext4_buddy e4b; 4607 struct ext4_buddy e4b;
4589 unsigned int count_clusters; 4608 unsigned int count_clusters;
4590 int err = 0; 4609 int err = 0;
@@ -4673,6 +4692,10 @@ do_more:
4673 overflow = 0; 4692 overflow = 0;
4674 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4693 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4675 4694
4695 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4696 ext4_get_group_info(sb, block_group))))
4697 return;
4698
4676 /* 4699 /*
4677 * Check to see if we are freeing blocks across a group 4700 * Check to see if we are freeing blocks across a group
4678 * boundary. 4701 * boundary.
@@ -4784,7 +4807,6 @@ do_more:
4784 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh); 4807 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4785 ext4_group_desc_csum_set(sb, block_group, gdp); 4808 ext4_group_desc_csum_set(sb, block_group, gdp);
4786 ext4_unlock_group(sb, block_group); 4809 ext4_unlock_group(sb, block_group);
4787 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4788 4810
4789 if (sbi->s_log_groups_per_flex) { 4811 if (sbi->s_log_groups_per_flex) {
4790 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4812 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
@@ -4792,10 +4814,23 @@ do_more:
4792 &sbi->s_flex_groups[flex_group].free_clusters); 4814 &sbi->s_flex_groups[flex_group].free_clusters);
4793 } 4815 }
4794 4816
4795 ext4_mb_unload_buddy(&e4b); 4817 if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) {
4796 4818 percpu_counter_add(&sbi->s_dirtyclusters_counter,
4797 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) 4819 count_clusters);
4820 spin_lock(&ei->i_block_reservation_lock);
4821 if (flags & EXT4_FREE_BLOCKS_METADATA)
4822 ei->i_reserved_meta_blocks += count_clusters;
4823 else
4824 ei->i_reserved_data_blocks += count_clusters;
4825 spin_unlock(&ei->i_block_reservation_lock);
4826 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4827 dquot_reclaim_block(inode,
4828 EXT4_C2B(sbi, count_clusters));
4829 } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4798 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); 4830 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4831 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4832
4833 ext4_mb_unload_buddy(&e4b);
4799 4834
4800 /* We dirtied the bitmap block */ 4835 /* We dirtied the bitmap block */
4801 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4836 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 49e8bdff9163..2ae73a80c19b 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -39,7 +39,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
39 newext.ee_block = cpu_to_le32(lb->first_block); 39 newext.ee_block = cpu_to_le32(lb->first_block);
40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
41 ext4_ext_store_pblock(&newext, lb->first_pblock); 41 ext4_ext_store_pblock(&newext, lb->first_pblock);
42 path = ext4_ext_find_extent(inode, lb->first_block, NULL); 42 path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
43 43
44 if (IS_ERR(path)) { 44 if (IS_ERR(path)) {
45 retval = PTR_ERR(path); 45 retval = PTR_ERR(path);
@@ -494,7 +494,7 @@ int ext4_ext_migrate(struct inode *inode)
494 * superblock modification. 494 * superblock modification.
495 * 495 *
496 * For the tmp_inode we already have committed the 496 * For the tmp_inode we already have committed the
497 * trascation that created the inode. Later as and 497 * transaction that created the inode. Later as and
498 * when we add extents we extent the journal 498 * when we add extents we extent the journal
499 */ 499 */
500 /* 500 /*
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index e86dddbd8296..7fa4d855dbd5 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -37,7 +37,7 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
37 int ret = 0; 37 int ret = 0;
38 struct ext4_ext_path *path; 38 struct ext4_ext_path *path;
39 39
40 path = ext4_ext_find_extent(inode, lblock, *orig_path); 40 path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
41 if (IS_ERR(path)) 41 if (IS_ERR(path))
42 ret = PTR_ERR(path); 42 ret = PTR_ERR(path);
43 else if (path[ext_depth(inode)].p_ext == NULL) 43 else if (path[ext_depth(inode)].p_ext == NULL)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 35f55a0dbc4b..1bec5a5c1e45 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3005,15 +3005,19 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
3005/* 3005/*
3006 * Anybody can rename anything with this: the permission checks are left to the 3006 * Anybody can rename anything with this: the permission checks are left to the
3007 * higher-level routines. 3007 * higher-level routines.
3008 *
3009 * n.b. old_{dentry,inode) refers to the source dentry/inode
3010 * while new_{dentry,inode) refers to the destination dentry/inode
3011 * This comes from rename(const char *oldpath, const char *newpath)
3008 */ 3012 */
3009static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, 3013static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3010 struct inode *new_dir, struct dentry *new_dentry) 3014 struct inode *new_dir, struct dentry *new_dentry)
3011{ 3015{
3012 handle_t *handle; 3016 handle_t *handle = NULL;
3013 struct inode *old_inode, *new_inode; 3017 struct inode *old_inode, *new_inode;
3014 struct buffer_head *old_bh, *new_bh, *dir_bh; 3018 struct buffer_head *old_bh, *new_bh, *dir_bh;
3015 struct ext4_dir_entry_2 *old_de, *new_de; 3019 struct ext4_dir_entry_2 *old_de, *new_de;
3016 int retval, force_da_alloc = 0; 3020 int retval;
3017 int inlined = 0, new_inlined = 0; 3021 int inlined = 0, new_inlined = 0;
3018 struct ext4_dir_entry_2 *parent_de; 3022 struct ext4_dir_entry_2 *parent_de;
3019 3023
@@ -3026,14 +3030,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3026 * in separate transaction */ 3030 * in separate transaction */
3027 if (new_dentry->d_inode) 3031 if (new_dentry->d_inode)
3028 dquot_initialize(new_dentry->d_inode); 3032 dquot_initialize(new_dentry->d_inode);
3029 handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
3030 (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
3031 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3032 if (IS_ERR(handle))
3033 return PTR_ERR(handle);
3034
3035 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
3036 ext4_handle_sync(handle);
3037 3033
3038 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL); 3034 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL);
3039 /* 3035 /*
@@ -3056,6 +3052,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3056 new_bh = NULL; 3052 new_bh = NULL;
3057 } 3053 }
3058 } 3054 }
3055 if (new_inode && !test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
3056 ext4_alloc_da_blocks(old_inode);
3057
3058 handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
3059 (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
3060 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3061 if (IS_ERR(handle))
3062 return PTR_ERR(handle);
3063
3064 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
3065 ext4_handle_sync(handle);
3066
3059 if (S_ISDIR(old_inode->i_mode)) { 3067 if (S_ISDIR(old_inode->i_mode)) {
3060 if (new_inode) { 3068 if (new_inode) {
3061 retval = -ENOTEMPTY; 3069 retval = -ENOTEMPTY;
@@ -3186,8 +3194,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3186 ext4_mark_inode_dirty(handle, new_inode); 3194 ext4_mark_inode_dirty(handle, new_inode);
3187 if (!new_inode->i_nlink) 3195 if (!new_inode->i_nlink)
3188 ext4_orphan_add(handle, new_inode); 3196 ext4_orphan_add(handle, new_inode);
3189 if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
3190 force_da_alloc = 1;
3191 } 3197 }
3192 retval = 0; 3198 retval = 0;
3193 3199
@@ -3195,9 +3201,8 @@ end_rename:
3195 brelse(dir_bh); 3201 brelse(dir_bh);
3196 brelse(old_bh); 3202 brelse(old_bh);
3197 brelse(new_bh); 3203 brelse(new_bh);
3198 ext4_journal_stop(handle); 3204 if (handle)
3199 if (retval == 0 && force_da_alloc) 3205 ext4_journal_stop(handle);
3200 ext4_alloc_da_blocks(old_inode);
3201 return retval; 3206 return retval;
3202} 3207}
3203 3208
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b59373b625e9..42337141e79f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1134,8 +1134,8 @@ enum {
1134 Opt_nouid32, Opt_debug, Opt_removed, 1134 Opt_nouid32, Opt_debug, Opt_removed,
1135 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1135 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1136 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, 1136 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1137 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1137 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1138 Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, 1138 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1139 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1139 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1140 Opt_data_err_abort, Opt_data_err_ignore, 1140 Opt_data_err_abort, Opt_data_err_ignore,
1141 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1141 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -1179,6 +1179,7 @@ static const match_table_t tokens = {
1179 {Opt_min_batch_time, "min_batch_time=%u"}, 1179 {Opt_min_batch_time, "min_batch_time=%u"},
1180 {Opt_max_batch_time, "max_batch_time=%u"}, 1180 {Opt_max_batch_time, "max_batch_time=%u"},
1181 {Opt_journal_dev, "journal_dev=%u"}, 1181 {Opt_journal_dev, "journal_dev=%u"},
1182 {Opt_journal_path, "journal_path=%s"},
1182 {Opt_journal_checksum, "journal_checksum"}, 1183 {Opt_journal_checksum, "journal_checksum"},
1183 {Opt_journal_async_commit, "journal_async_commit"}, 1184 {Opt_journal_async_commit, "journal_async_commit"},
1184 {Opt_abort, "abort"}, 1185 {Opt_abort, "abort"},
@@ -1338,6 +1339,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
1338#define MOPT_NO_EXT2 0x0100 1339#define MOPT_NO_EXT2 0x0100
1339#define MOPT_NO_EXT3 0x0200 1340#define MOPT_NO_EXT3 0x0200
1340#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) 1341#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1342#define MOPT_STRING 0x0400
1341 1343
1342static const struct mount_opts { 1344static const struct mount_opts {
1343 int token; 1345 int token;
@@ -1387,6 +1389,7 @@ static const struct mount_opts {
1387 {Opt_resuid, 0, MOPT_GTE0}, 1389 {Opt_resuid, 0, MOPT_GTE0},
1388 {Opt_resgid, 0, MOPT_GTE0}, 1390 {Opt_resgid, 0, MOPT_GTE0},
1389 {Opt_journal_dev, 0, MOPT_GTE0}, 1391 {Opt_journal_dev, 0, MOPT_GTE0},
1392 {Opt_journal_path, 0, MOPT_STRING},
1390 {Opt_journal_ioprio, 0, MOPT_GTE0}, 1393 {Opt_journal_ioprio, 0, MOPT_GTE0},
1391 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1394 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1392 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1395 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
@@ -1480,7 +1483,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1480 return -1; 1483 return -1;
1481 } 1484 }
1482 1485
1483 if (args->from && match_int(args, &arg)) 1486 if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1484 return -1; 1487 return -1;
1485 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) 1488 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1486 return -1; 1489 return -1;
@@ -1544,6 +1547,44 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1544 return -1; 1547 return -1;
1545 } 1548 }
1546 *journal_devnum = arg; 1549 *journal_devnum = arg;
1550 } else if (token == Opt_journal_path) {
1551 char *journal_path;
1552 struct inode *journal_inode;
1553 struct path path;
1554 int error;
1555
1556 if (is_remount) {
1557 ext4_msg(sb, KERN_ERR,
1558 "Cannot specify journal on remount");
1559 return -1;
1560 }
1561 journal_path = match_strdup(&args[0]);
1562 if (!journal_path) {
1563 ext4_msg(sb, KERN_ERR, "error: could not dup "
1564 "journal device string");
1565 return -1;
1566 }
1567
1568 error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1569 if (error) {
1570 ext4_msg(sb, KERN_ERR, "error: could not find "
1571 "journal device path: error %d", error);
1572 kfree(journal_path);
1573 return -1;
1574 }
1575
1576 journal_inode = path.dentry->d_inode;
1577 if (!S_ISBLK(journal_inode->i_mode)) {
1578 ext4_msg(sb, KERN_ERR, "error: journal path %s "
1579 "is not a block device", journal_path);
1580 path_put(&path);
1581 kfree(journal_path);
1582 return -1;
1583 }
1584
1585 *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1586 path_put(&path);
1587 kfree(journal_path);
1547 } else if (token == Opt_journal_ioprio) { 1588 } else if (token == Opt_journal_ioprio) {
1548 if (arg > 7) { 1589 if (arg > 7) {
1549 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" 1590 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 559bec1a37b4..cf2fc0594063 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -343,14 +343,14 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
343 struct page *page = bh->b_page; 343 struct page *page = bh->b_page;
344 __u8 *addr; 344 __u8 *addr;
345 __u32 csum32; 345 __u32 csum32;
346 __be32 seq;
346 347
347 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 348 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
348 return; 349 return;
349 350
350 sequence = cpu_to_be32(sequence); 351 seq = cpu_to_be32(sequence);
351 addr = kmap_atomic(page); 352 addr = kmap_atomic(page);
352 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, 353 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
353 sizeof(sequence));
354 csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), 354 csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data),
355 bh->b_size); 355 bh->b_size);
356 kunmap_atomic(addr); 356 kunmap_atomic(addr);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 02c7ad9d7a41..52032647dd4a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -130,9 +130,10 @@ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
131} 131}
132 132
133static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) 133static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
134{ 134{
135 __u32 csum, old_csum; 135 __u32 csum;
136 __be32 old_csum;
136 137
137 old_csum = sb->s_checksum; 138 old_csum = sb->s_checksum;
138 sb->s_checksum = 0; 139 sb->s_checksum = 0;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index d4851464b57e..3929c50428b1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -178,7 +178,8 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
178 void *buf) 178 void *buf)
179{ 179{
180 struct jbd2_journal_block_tail *tail; 180 struct jbd2_journal_block_tail *tail;
181 __u32 provided, calculated; 181 __be32 provided;
182 __u32 calculated;
182 183
183 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 184 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
184 return 1; 185 return 1;
@@ -190,8 +191,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
190 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 191 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
191 tail->t_checksum = provided; 192 tail->t_checksum = provided;
192 193
193 provided = be32_to_cpu(provided); 194 return provided == cpu_to_be32(calculated);
194 return provided == calculated;
195} 195}
196 196
197/* 197/*
@@ -381,7 +381,8 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
381static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 381static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
382{ 382{
383 struct commit_header *h; 383 struct commit_header *h;
384 __u32 provided, calculated; 384 __be32 provided;
385 __u32 calculated;
385 386
386 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 387 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
387 return 1; 388 return 1;
@@ -392,21 +393,20 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
392 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 393 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
393 h->h_chksum[0] = provided; 394 h->h_chksum[0] = provided;
394 395
395 provided = be32_to_cpu(provided); 396 return provided == cpu_to_be32(calculated);
396 return provided == calculated;
397} 397}
398 398
399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
400 void *buf, __u32 sequence) 400 void *buf, __u32 sequence)
401{ 401{
402 __u32 csum32; 402 __u32 csum32;
403 __be32 seq;
403 404
404 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 405 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
405 return 1; 406 return 1;
406 407
407 sequence = cpu_to_be32(sequence); 408 seq = cpu_to_be32(sequence);
408 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, 409 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
409 sizeof(sequence));
410 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 410 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
411 411
412 return tag->t_checksum == cpu_to_be16(csum32); 412 return tag->t_checksum == cpu_to_be16(csum32);
@@ -808,7 +808,8 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
808 void *buf) 808 void *buf)
809{ 809{
810 struct jbd2_journal_revoke_tail *tail; 810 struct jbd2_journal_revoke_tail *tail;
811 __u32 provided, calculated; 811 __be32 provided;
812 __u32 calculated;
812 813
813 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 814 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
814 return 1; 815 return 1;
@@ -820,8 +821,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
820 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 821 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
821 tail->r_checksum = provided; 822 tail->r_checksum = provided;
822 823
823 provided = be32_to_cpu(provided); 824 return provided == cpu_to_be32(calculated);
824 return provided == calculated;
825} 825}
826 826
827/* Scan a revoke record, marking all blocks mentioned as revoked. */ 827/* Scan a revoke record, marking all blocks mentioned as revoked. */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fbad622841f9..9a702e193538 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1094,6 +1094,14 @@ static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
1094 dquot->dq_dqb.dqb_rsvspace -= number; 1094 dquot->dq_dqb.dqb_rsvspace -= number;
1095} 1095}
1096 1096
1097static void dquot_reclaim_reserved_space(struct dquot *dquot, qsize_t number)
1098{
1099 if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
1100 number = dquot->dq_dqb.dqb_curspace;
1101 dquot->dq_dqb.dqb_rsvspace += number;
1102 dquot->dq_dqb.dqb_curspace -= number;
1103}
1104
1097static inline 1105static inline
1098void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) 1106void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
1099{ 1107{
@@ -1528,6 +1536,15 @@ void inode_claim_rsv_space(struct inode *inode, qsize_t number)
1528} 1536}
1529EXPORT_SYMBOL(inode_claim_rsv_space); 1537EXPORT_SYMBOL(inode_claim_rsv_space);
1530 1538
1539void inode_reclaim_rsv_space(struct inode *inode, qsize_t number)
1540{
1541 spin_lock(&inode->i_lock);
1542 *inode_reserved_space(inode) += number;
1543 __inode_sub_bytes(inode, number);
1544 spin_unlock(&inode->i_lock);
1545}
1546EXPORT_SYMBOL(inode_reclaim_rsv_space);
1547
1531void inode_sub_rsv_space(struct inode *inode, qsize_t number) 1548void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1532{ 1549{
1533 spin_lock(&inode->i_lock); 1550 spin_lock(&inode->i_lock);
@@ -1702,6 +1719,35 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1702EXPORT_SYMBOL(dquot_claim_space_nodirty); 1719EXPORT_SYMBOL(dquot_claim_space_nodirty);
1703 1720
1704/* 1721/*
1722 * Convert allocated space back to in-memory reserved quotas
1723 */
1724void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
1725{
1726 int cnt;
1727
1728 if (!dquot_active(inode)) {
1729 inode_reclaim_rsv_space(inode, number);
1730 return;
1731 }
1732
1733 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1734 spin_lock(&dq_data_lock);
1735 /* Claim reserved quotas to allocated quotas */
1736 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1737 if (inode->i_dquot[cnt])
1738 dquot_reclaim_reserved_space(inode->i_dquot[cnt],
1739 number);
1740 }
1741 /* Update inode bytes */
1742 inode_reclaim_rsv_space(inode, number);
1743 spin_unlock(&dq_data_lock);
1744 mark_all_dquot_dirty(inode->i_dquot);
1745 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1746 return;
1747}
1748EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
1749
1750/*
1705 * This operation can block, but only after everything is updated 1751 * This operation can block, but only after everything is updated
1706 */ 1752 */
1707void __dquot_free_space(struct inode *inode, qsize_t number, int flags) 1753void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
diff --git a/fs/stat.c b/fs/stat.c
index 04ce1ac20d20..d0ea7ef75e26 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -447,9 +447,8 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
447 447
448EXPORT_SYMBOL(inode_add_bytes); 448EXPORT_SYMBOL(inode_add_bytes);
449 449
450void inode_sub_bytes(struct inode *inode, loff_t bytes) 450void __inode_sub_bytes(struct inode *inode, loff_t bytes)
451{ 451{
452 spin_lock(&inode->i_lock);
453 inode->i_blocks -= bytes >> 9; 452 inode->i_blocks -= bytes >> 9;
454 bytes &= 511; 453 bytes &= 511;
455 if (inode->i_bytes < bytes) { 454 if (inode->i_bytes < bytes) {
@@ -457,6 +456,14 @@ void inode_sub_bytes(struct inode *inode, loff_t bytes)
457 inode->i_bytes += 512; 456 inode->i_bytes += 512;
458 } 457 }
459 inode->i_bytes -= bytes; 458 inode->i_bytes -= bytes;
459}
460
461EXPORT_SYMBOL(__inode_sub_bytes);
462
463void inode_sub_bytes(struct inode *inode, loff_t bytes)
464{
465 spin_lock(&inode->i_lock);
466 __inode_sub_bytes(inode, bytes);
460 spin_unlock(&inode->i_lock); 467 spin_unlock(&inode->i_lock);
461} 468}
462 469
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 981874773e85..e7893523f81f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2503,6 +2503,7 @@ extern void generic_fillattr(struct inode *, struct kstat *);
2503extern int vfs_getattr(struct path *, struct kstat *); 2503extern int vfs_getattr(struct path *, struct kstat *);
2504void __inode_add_bytes(struct inode *inode, loff_t bytes); 2504void __inode_add_bytes(struct inode *inode, loff_t bytes);
2505void inode_add_bytes(struct inode *inode, loff_t bytes); 2505void inode_add_bytes(struct inode *inode, loff_t bytes);
2506void __inode_sub_bytes(struct inode *inode, loff_t bytes);
2506void inode_sub_bytes(struct inode *inode, loff_t bytes); 2507void inode_sub_bytes(struct inode *inode, loff_t bytes);
2507loff_t inode_get_bytes(struct inode *inode); 2508loff_t inode_get_bytes(struct inode *inode);
2508void inode_set_bytes(struct inode *inode, loff_t bytes); 2509void inode_set_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 1c50093ae656..6965fe394c3b 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -41,6 +41,7 @@ void __quota_error(struct super_block *sb, const char *func,
41void inode_add_rsv_space(struct inode *inode, qsize_t number); 41void inode_add_rsv_space(struct inode *inode, qsize_t number);
42void inode_claim_rsv_space(struct inode *inode, qsize_t number); 42void inode_claim_rsv_space(struct inode *inode, qsize_t number);
43void inode_sub_rsv_space(struct inode *inode, qsize_t number); 43void inode_sub_rsv_space(struct inode *inode, qsize_t number);
44void inode_reclaim_rsv_space(struct inode *inode, qsize_t number);
44 45
45void dquot_initialize(struct inode *inode); 46void dquot_initialize(struct inode *inode);
46void dquot_drop(struct inode *inode); 47void dquot_drop(struct inode *inode);
@@ -59,6 +60,7 @@ int dquot_alloc_inode(const struct inode *inode);
59 60
60int dquot_claim_space_nodirty(struct inode *inode, qsize_t number); 61int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
61void dquot_free_inode(const struct inode *inode); 62void dquot_free_inode(const struct inode *inode);
63void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number);
62 64
63int dquot_disable(struct super_block *sb, int type, unsigned int flags); 65int dquot_disable(struct super_block *sb, int type, unsigned int flags);
64/* Suspend quotas on remount RO */ 66/* Suspend quotas on remount RO */
@@ -238,6 +240,13 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
238 return 0; 240 return 0;
239} 241}
240 242
243static inline int dquot_reclaim_space_nodirty(struct inode *inode,
244 qsize_t number)
245{
246 inode_sub_bytes(inode, number);
247 return 0;
248}
249
241static inline int dquot_disable(struct super_block *sb, int type, 250static inline int dquot_disable(struct super_block *sb, int type,
242 unsigned int flags) 251 unsigned int flags)
243{ 252{
@@ -336,6 +345,12 @@ static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
336 return ret; 345 return ret;
337} 346}
338 347
348static inline void dquot_reclaim_block(struct inode *inode, qsize_t nr)
349{
350 dquot_reclaim_space_nodirty(inode, nr << inode->i_blkbits);
351 mark_inode_dirty_sync(inode);
352}
353
339static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr) 354static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr)
340{ 355{
341 __dquot_free_space(inode, nr, 0); 356 __dquot_free_space(inode, nr, 0);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 2068db241f22..197d3125df2a 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -14,7 +14,6 @@ struct ext4_prealloc_space;
14struct ext4_inode_info; 14struct ext4_inode_info;
15struct mpage_da_data; 15struct mpage_da_data;
16struct ext4_map_blocks; 16struct ext4_map_blocks;
17struct ext4_extent;
18struct extent_status; 17struct extent_status;
19 18
20#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) 19#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
@@ -64,10 +63,10 @@ struct extent_status;
64 { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) 63 { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" })
65 64
66#define show_extent_status(status) __print_flags(status, "", \ 65#define show_extent_status(status) __print_flags(status, "", \
67 { (1 << 3), "W" }, \ 66 { EXTENT_STATUS_WRITTEN, "W" }, \
68 { (1 << 2), "U" }, \ 67 { EXTENT_STATUS_UNWRITTEN, "U" }, \
69 { (1 << 1), "D" }, \ 68 { EXTENT_STATUS_DELAYED, "D" }, \
70 { (1 << 0), "H" }) 69 { EXTENT_STATUS_HOLE, "H" })
71 70
72 71
73TRACE_EVENT(ext4_free_inode, 72TRACE_EVENT(ext4_free_inode,
@@ -2192,7 +2191,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
2192 (unsigned short) __entry->eh_entries) 2191 (unsigned short) __entry->eh_entries)
2193); 2192);
2194 2193
2195TRACE_EVENT(ext4_es_insert_extent, 2194DECLARE_EVENT_CLASS(ext4__es_extent,
2196 TP_PROTO(struct inode *inode, struct extent_status *es), 2195 TP_PROTO(struct inode *inode, struct extent_status *es),
2197 2196
2198 TP_ARGS(inode, es), 2197 TP_ARGS(inode, es),
@@ -2212,7 +2211,7 @@ TRACE_EVENT(ext4_es_insert_extent,
2212 __entry->lblk = es->es_lblk; 2211 __entry->lblk = es->es_lblk;
2213 __entry->len = es->es_len; 2212 __entry->len = es->es_len;
2214 __entry->pblk = ext4_es_pblock(es); 2213 __entry->pblk = ext4_es_pblock(es);
2215 __entry->status = ext4_es_status(es) >> 60; 2214 __entry->status = ext4_es_status(es);
2216 ), 2215 ),
2217 2216
2218 TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", 2217 TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s",
@@ -2222,6 +2221,18 @@ TRACE_EVENT(ext4_es_insert_extent,
2222 __entry->pblk, show_extent_status(__entry->status)) 2221 __entry->pblk, show_extent_status(__entry->status))
2223); 2222);
2224 2223
2224DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent,
2225 TP_PROTO(struct inode *inode, struct extent_status *es),
2226
2227 TP_ARGS(inode, es)
2228);
2229
2230DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent,
2231 TP_PROTO(struct inode *inode, struct extent_status *es),
2232
2233 TP_ARGS(inode, es)
2234);
2235
2225TRACE_EVENT(ext4_es_remove_extent, 2236TRACE_EVENT(ext4_es_remove_extent,
2226 TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), 2237 TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len),
2227 2238
@@ -2289,7 +2300,7 @@ TRACE_EVENT(ext4_es_find_delayed_extent_range_exit,
2289 __entry->lblk = es->es_lblk; 2300 __entry->lblk = es->es_lblk;
2290 __entry->len = es->es_len; 2301 __entry->len = es->es_len;
2291 __entry->pblk = ext4_es_pblock(es); 2302 __entry->pblk = ext4_es_pblock(es);
2292 __entry->status = ext4_es_status(es) >> 60; 2303 __entry->status = ext4_es_status(es);
2293 ), 2304 ),
2294 2305
2295 TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", 2306 TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s",
@@ -2343,7 +2354,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
2343 __entry->lblk = es->es_lblk; 2354 __entry->lblk = es->es_lblk;
2344 __entry->len = es->es_len; 2355 __entry->len = es->es_len;
2345 __entry->pblk = ext4_es_pblock(es); 2356 __entry->pblk = ext4_es_pblock(es);
2346 __entry->status = ext4_es_status(es) >> 60; 2357 __entry->status = ext4_es_status(es);
2347 __entry->found = found; 2358 __entry->found = found;
2348 ), 2359 ),
2349 2360
diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h
index d830747f5c0b..0c51d617dae9 100644
--- a/include/uapi/linux/fiemap.h
+++ b/include/uapi/linux/fiemap.h
@@ -40,6 +40,7 @@ struct fiemap {
40 40
41#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ 41#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
42#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ 42#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
43#define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */
43 44
44#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) 45#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
45 46