aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ext3/dir.c2
-rw-r--r--fs/ext4/balloc.c24
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/ext4.h58
-rw-r--r--fs/ext4/ext4_extents.h6
-rw-r--r--fs/ext4/ext4_jbd2.h2
-rw-r--r--fs/ext4/extents.c296
-rw-r--r--fs/ext4/extents_status.c125
-rw-r--r--fs/ext4/extents_status.h51
-rw-r--r--fs/ext4/ialloc.c90
-rw-r--r--fs/ext4/indirect.c1
-rw-r--r--fs/ext4/inode.c293
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/mballoc.c49
-rw-r--r--fs/ext4/migrate.c4
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/namei.c35
-rw-r--r--fs/ext4/super.c47
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/jbd2/recovery.c24
-rw-r--r--fs/quota/dquot.c46
-rw-r--r--fs/stat.c11
23 files changed, 816 insertions, 367 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index f522425aaa24..bafdd48eefde 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -41,7 +41,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
41 41
42/** 42/**
43 * Check if the given dir-inode refers to an htree-indexed directory 43 * Check if the given dir-inode refers to an htree-indexed directory
44 * (or a directory which chould potentially get coverted to use htree 44 * (or a directory which could potentially get converted to use htree
45 * indexing). 45 * indexing).
46 * 46 *
47 * Return 1 if it is a dx dir, 0 if not 47 * Return 1 if it is a dx dir, 0 if not
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index ddd715e42a5c..dc5d572ebd6a 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -184,6 +184,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
184 struct ext4_sb_info *sbi = EXT4_SB(sb); 184 struct ext4_sb_info *sbi = EXT4_SB(sb);
185 ext4_fsblk_t start, tmp; 185 ext4_fsblk_t start, tmp;
186 int flex_bg = 0; 186 int flex_bg = 0;
187 struct ext4_group_info *grp;
187 188
188 J_ASSERT_BH(bh, buffer_locked(bh)); 189 J_ASSERT_BH(bh, buffer_locked(bh));
189 190
@@ -191,11 +192,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
191 * essentially implementing a per-group read-only flag. */ 192 * essentially implementing a per-group read-only flag. */
192 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 193 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
193 ext4_error(sb, "Checksum bad for group %u", block_group); 194 ext4_error(sb, "Checksum bad for group %u", block_group);
194 ext4_free_group_clusters_set(sb, gdp, 0); 195 grp = ext4_get_group_info(sb, block_group);
195 ext4_free_inodes_set(sb, gdp, 0); 196 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
196 ext4_itable_unused_set(sb, gdp, 0); 197 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
197 memset(bh->b_data, 0xff, sb->s_blocksize);
198 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
199 return; 198 return;
200 } 199 }
201 memset(bh->b_data, 0, sb->s_blocksize); 200 memset(bh->b_data, 0, sb->s_blocksize);
@@ -305,7 +304,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
305 */ 304 */
306static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, 305static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
307 struct ext4_group_desc *desc, 306 struct ext4_group_desc *desc,
308 unsigned int block_group, 307 ext4_group_t block_group,
309 struct buffer_head *bh) 308 struct buffer_head *bh)
310{ 309{
311 ext4_grpblk_t offset; 310 ext4_grpblk_t offset;
@@ -352,10 +351,11 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
352 351
353void ext4_validate_block_bitmap(struct super_block *sb, 352void ext4_validate_block_bitmap(struct super_block *sb,
354 struct ext4_group_desc *desc, 353 struct ext4_group_desc *desc,
355 unsigned int block_group, 354 ext4_group_t block_group,
356 struct buffer_head *bh) 355 struct buffer_head *bh)
357{ 356{
358 ext4_fsblk_t blk; 357 ext4_fsblk_t blk;
358 struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
359 359
360 if (buffer_verified(bh)) 360 if (buffer_verified(bh))
361 return; 361 return;
@@ -366,12 +366,14 @@ void ext4_validate_block_bitmap(struct super_block *sb,
366 ext4_unlock_group(sb, block_group); 366 ext4_unlock_group(sb, block_group);
367 ext4_error(sb, "bg %u: block %llu: invalid block bitmap", 367 ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
368 block_group, blk); 368 block_group, blk);
369 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
369 return; 370 return;
370 } 371 }
371 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, 372 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
372 desc, bh))) { 373 desc, bh))) {
373 ext4_unlock_group(sb, block_group); 374 ext4_unlock_group(sb, block_group);
374 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); 375 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
376 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
375 return; 377 return;
376 } 378 }
377 set_buffer_verified(bh); 379 set_buffer_verified(bh);
@@ -445,7 +447,10 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
445 return bh; 447 return bh;
446verify: 448verify:
447 ext4_validate_block_bitmap(sb, desc, block_group, bh); 449 ext4_validate_block_bitmap(sb, desc, block_group, bh);
448 return bh; 450 if (buffer_verified(bh))
451 return bh;
452 put_bh(bh);
453 return NULL;
449} 454}
450 455
451/* Returns 0 on success, 1 on error */ 456/* Returns 0 on success, 1 on error */
@@ -469,7 +474,8 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
469 clear_buffer_new(bh); 474 clear_buffer_new(bh);
470 /* Panic or remount fs read-only if block bitmap is invalid */ 475 /* Panic or remount fs read-only if block bitmap is invalid */
471 ext4_validate_block_bitmap(sb, desc, block_group, bh); 476 ext4_validate_block_bitmap(sb, desc, block_group, bh);
472 return 0; 477 /* ...but check for error just in case errors=continue. */
478 return !buffer_verified(bh);
473} 479}
474 480
475struct buffer_head * 481struct buffer_head *
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3c7d288ae94c..680bb3388919 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,7 +33,7 @@ static int ext4_dx_readdir(struct file *, struct dir_context *);
33 33
34/** 34/**
35 * Check if the given dir-inode refers to an htree-indexed directory 35 * Check if the given dir-inode refers to an htree-indexed directory
36 * (or a directory which chould potentially get coverted to use htree 36 * (or a directory which could potentially get converted to use htree
37 * indexing). 37 * indexing).
38 * 38 *
39 * Return 1 if it is a dx dir, 0 if not 39 * Return 1 if it is a dx dir, 0 if not
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0ab26fbf3380..06b488dca666 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -561,6 +561,18 @@ enum {
561#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 561#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
562 562
563/* 563/*
564 * The bit position of these flags must not overlap with any of the
565 * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(),
566 * read_extent_tree_block(), ext4_split_extent_at(),
567 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
568 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
569 * caching the extents when reading from the extent tree while a
570 * truncate or punch hole operation is in progress.
571 */
572#define EXT4_EX_NOCACHE 0x0400
573#define EXT4_EX_FORCE_CACHE 0x0800
574
575/*
564 * Flags used by ext4_free_blocks 576 * Flags used by ext4_free_blocks
565 */ 577 */
566#define EXT4_FREE_BLOCKS_METADATA 0x0001 578#define EXT4_FREE_BLOCKS_METADATA 0x0001
@@ -569,6 +581,7 @@ enum {
569#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 581#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
570#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 582#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
571#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 583#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
584#define EXT4_FREE_BLOCKS_RESERVE 0x0040
572 585
573/* 586/*
574 * ioctl commands 587 * ioctl commands
@@ -590,6 +603,7 @@ enum {
590#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) 603#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
591#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) 604#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
592#define EXT4_IOC_SWAP_BOOT _IO('f', 17) 605#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
606#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
593 607
594#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 608#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
595/* 609/*
@@ -1375,6 +1389,7 @@ enum {
1375 nolocking */ 1389 nolocking */
1376 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ 1390 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
1377 EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ 1391 EXT4_STATE_ORDERED_MODE, /* data=ordered mode */
1392 EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
1378}; 1393};
1379 1394
1380#define EXT4_INODE_BIT_FNS(name, field, offset) \ 1395#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1915,7 +1930,7 @@ extern ext4_group_t ext4_get_group_number(struct super_block *sb,
1915 1930
1916extern void ext4_validate_block_bitmap(struct super_block *sb, 1931extern void ext4_validate_block_bitmap(struct super_block *sb,
1917 struct ext4_group_desc *desc, 1932 struct ext4_group_desc *desc,
1918 unsigned int block_group, 1933 ext4_group_t block_group,
1919 struct buffer_head *bh); 1934 struct buffer_head *bh);
1920extern unsigned int ext4_block_group(struct super_block *sb, 1935extern unsigned int ext4_block_group(struct super_block *sb,
1921 ext4_fsblk_t blocknr); 1936 ext4_fsblk_t blocknr);
@@ -2417,16 +2432,32 @@ do { \
2417#define EXT4_FREECLUSTERS_WATERMARK 0 2432#define EXT4_FREECLUSTERS_WATERMARK 0
2418#endif 2433#endif
2419 2434
2435/* Update i_disksize. Requires i_mutex to avoid races with truncate */
2420static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) 2436static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2421{ 2437{
2422 /* 2438 WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
2423 * XXX: replace with spinlock if seen contended -bzzz 2439 !mutex_is_locked(&inode->i_mutex));
2424 */
2425 down_write(&EXT4_I(inode)->i_data_sem); 2440 down_write(&EXT4_I(inode)->i_data_sem);
2426 if (newsize > EXT4_I(inode)->i_disksize) 2441 if (newsize > EXT4_I(inode)->i_disksize)
2427 EXT4_I(inode)->i_disksize = newsize; 2442 EXT4_I(inode)->i_disksize = newsize;
2428 up_write(&EXT4_I(inode)->i_data_sem); 2443 up_write(&EXT4_I(inode)->i_data_sem);
2429 return ; 2444}
2445
2446/*
2447 * Update i_disksize after writeback has been started. Races with truncate
2448 * are avoided by checking i_size under i_data_sem.
2449 */
2450static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
2451{
2452 loff_t i_size;
2453
2454 down_write(&EXT4_I(inode)->i_data_sem);
2455 i_size = i_size_read(inode);
2456 if (newsize > i_size)
2457 newsize = i_size;
2458 if (newsize > EXT4_I(inode)->i_disksize)
2459 EXT4_I(inode)->i_disksize = newsize;
2460 up_write(&EXT4_I(inode)->i_data_sem);
2430} 2461}
2431 2462
2432struct ext4_group_info { 2463struct ext4_group_info {
@@ -2449,9 +2480,15 @@ struct ext4_group_info {
2449 2480
2450#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 2481#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
2451#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 2482#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
2483#define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2
2484#define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3
2452 2485
2453#define EXT4_MB_GRP_NEED_INIT(grp) \ 2486#define EXT4_MB_GRP_NEED_INIT(grp) \
2454 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) 2487 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
2488#define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \
2489 (test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
2490#define EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \
2491 (test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
2455 2492
2456#define EXT4_MB_GRP_WAS_TRIMMED(grp) \ 2493#define EXT4_MB_GRP_WAS_TRIMMED(grp) \
2457 (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) 2494 (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
@@ -2655,6 +2692,12 @@ extern int ext4_check_blockref(const char *, unsigned int,
2655struct ext4_ext_path; 2692struct ext4_ext_path;
2656struct ext4_extent; 2693struct ext4_extent;
2657 2694
2695/*
2696 * Maximum number of logical blocks in a file; ext4_extent's ee_block is
2697 * __le32.
2698 */
2699#define EXT_MAX_BLOCKS 0xffffffff
2700
2658extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 2701extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
2659extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 2702extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
2660extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); 2703extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
@@ -2684,7 +2727,8 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *,
2684 struct ext4_ext_path *, 2727 struct ext4_ext_path *,
2685 struct ext4_extent *, int); 2728 struct ext4_extent *, int);
2686extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 2729extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
2687 struct ext4_ext_path *); 2730 struct ext4_ext_path *,
2731 int flags);
2688extern void ext4_ext_drop_refs(struct ext4_ext_path *); 2732extern void ext4_ext_drop_refs(struct ext4_ext_path *);
2689extern int ext4_ext_check_inode(struct inode *inode); 2733extern int ext4_ext_check_inode(struct inode *inode);
2690extern int ext4_find_delalloc_range(struct inode *inode, 2734extern int ext4_find_delalloc_range(struct inode *inode,
@@ -2693,7 +2737,7 @@ extern int ext4_find_delalloc_range(struct inode *inode,
2693extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); 2737extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2694extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2738extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2695 __u64 start, __u64 len); 2739 __u64 start, __u64 len);
2696 2740extern int ext4_ext_precache(struct inode *inode);
2697 2741
2698/* move_extent.c */ 2742/* move_extent.c */
2699extern void ext4_double_down_write_data_sem(struct inode *first, 2743extern void ext4_double_down_write_data_sem(struct inode *first,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 51bc821ade90..5074fe23f19e 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -134,12 +134,6 @@ struct ext4_ext_path {
134 */ 134 */
135 135
136/* 136/*
137 * Maximum number of logical blocks in a file; ext4_extent's ee_block is
138 * __le32.
139 */
140#define EXT_MAX_BLOCKS 0xffffffff
141
142/*
143 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an 137 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
144 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the 138 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
145 * MSB of ee_len field in the extent datastructure to signify if this 139 * MSB of ee_len field in the extent datastructure to signify if this
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 2877258d9497..81cfefa9dc0c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -197,7 +197,7 @@ static inline void ext4_journal_callback_add(handle_t *handle,
197 * ext4_journal_callback_del: delete a registered callback 197 * ext4_journal_callback_del: delete a registered callback
198 * @handle: active journal transaction handle on which callback was registered 198 * @handle: active journal transaction handle on which callback was registered
199 * @jce: registered journal callback entry to unregister 199 * @jce: registered journal callback entry to unregister
200 * Return true if object was sucessfully removed 200 * Return true if object was successfully removed
201 */ 201 */
202static inline bool ext4_journal_callback_try_del(handle_t *handle, 202static inline bool ext4_journal_callback_try_del(handle_t *handle,
203 struct ext4_journal_cb_entry *jce) 203 struct ext4_journal_cb_entry *jce)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 72ba4705d4fa..54d52afcdb19 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -407,7 +407,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
407 407
408static int __ext4_ext_check(const char *function, unsigned int line, 408static int __ext4_ext_check(const char *function, unsigned int line,
409 struct inode *inode, struct ext4_extent_header *eh, 409 struct inode *inode, struct ext4_extent_header *eh,
410 int depth) 410 int depth, ext4_fsblk_t pblk)
411{ 411{
412 const char *error_msg; 412 const char *error_msg;
413 int max = 0; 413 int max = 0;
@@ -447,42 +447,149 @@ static int __ext4_ext_check(const char *function, unsigned int line,
447 447
448corrupted: 448corrupted:
449 ext4_error_inode(inode, function, line, 0, 449 ext4_error_inode(inode, function, line, 0,
450 "bad header/extent: %s - magic %x, " 450 "pblk %llu bad header/extent: %s - magic %x, "
451 "entries %u, max %u(%u), depth %u(%u)", 451 "entries %u, max %u(%u), depth %u(%u)",
452 error_msg, le16_to_cpu(eh->eh_magic), 452 (unsigned long long) pblk, error_msg,
453 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), 453 le16_to_cpu(eh->eh_magic),
454 max, le16_to_cpu(eh->eh_depth), depth); 454 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
455 455 max, le16_to_cpu(eh->eh_depth), depth);
456 return -EIO; 456 return -EIO;
457} 457}
458 458
459#define ext4_ext_check(inode, eh, depth) \ 459#define ext4_ext_check(inode, eh, depth, pblk) \
460 __ext4_ext_check(__func__, __LINE__, inode, eh, depth) 460 __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk))
461 461
462int ext4_ext_check_inode(struct inode *inode) 462int ext4_ext_check_inode(struct inode *inode)
463{ 463{
464 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); 464 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
465} 465}
466 466
467static int __ext4_ext_check_block(const char *function, unsigned int line, 467static struct buffer_head *
468 struct inode *inode, 468__read_extent_tree_block(const char *function, unsigned int line,
469 struct ext4_extent_header *eh, 469 struct inode *inode, ext4_fsblk_t pblk, int depth,
470 int depth, 470 int flags)
471 struct buffer_head *bh)
472{ 471{
473 int ret; 472 struct buffer_head *bh;
473 int err;
474 474
475 if (buffer_verified(bh)) 475 bh = sb_getblk(inode->i_sb, pblk);
476 return 0; 476 if (unlikely(!bh))
477 ret = ext4_ext_check(inode, eh, depth); 477 return ERR_PTR(-ENOMEM);
478 if (ret) 478
479 return ret; 479 if (!bh_uptodate_or_lock(bh)) {
480 trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
481 err = bh_submit_read(bh);
482 if (err < 0)
483 goto errout;
484 }
485 if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
486 return bh;
487 err = __ext4_ext_check(function, line, inode,
488 ext_block_hdr(bh), depth, pblk);
489 if (err)
490 goto errout;
480 set_buffer_verified(bh); 491 set_buffer_verified(bh);
481 return ret; 492 /*
493 * If this is a leaf block, cache all of its entries
494 */
495 if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
496 struct ext4_extent_header *eh = ext_block_hdr(bh);
497 struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
498 ext4_lblk_t prev = 0;
499 int i;
500
501 for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
502 unsigned int status = EXTENT_STATUS_WRITTEN;
503 ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
504 int len = ext4_ext_get_actual_len(ex);
505
506 if (prev && (prev != lblk))
507 ext4_es_cache_extent(inode, prev,
508 lblk - prev, ~0,
509 EXTENT_STATUS_HOLE);
510
511 if (ext4_ext_is_uninitialized(ex))
512 status = EXTENT_STATUS_UNWRITTEN;
513 ext4_es_cache_extent(inode, lblk, len,
514 ext4_ext_pblock(ex), status);
515 prev = lblk + len;
516 }
517 }
518 return bh;
519errout:
520 put_bh(bh);
521 return ERR_PTR(err);
522
482} 523}
483 524
484#define ext4_ext_check_block(inode, eh, depth, bh) \ 525#define read_extent_tree_block(inode, pblk, depth, flags) \
485 __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh) 526 __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \
527 (depth), (flags))
528
529/*
530 * This function is called to cache a file's extent information in the
531 * extent status tree
532 */
533int ext4_ext_precache(struct inode *inode)
534{
535 struct ext4_inode_info *ei = EXT4_I(inode);
536 struct ext4_ext_path *path = NULL;
537 struct buffer_head *bh;
538 int i = 0, depth, ret = 0;
539
540 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
541 return 0; /* not an extent-mapped inode */
542
543 down_read(&ei->i_data_sem);
544 depth = ext_depth(inode);
545
546 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
547 GFP_NOFS);
548 if (path == NULL) {
549 up_read(&ei->i_data_sem);
550 return -ENOMEM;
551 }
552
553 /* Don't cache anything if there are no external extent blocks */
554 if (depth == 0)
555 goto out;
556 path[0].p_hdr = ext_inode_hdr(inode);
557 ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
558 if (ret)
559 goto out;
560 path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
561 while (i >= 0) {
562 /*
563 * If this is a leaf block or we've reached the end of
564 * the index block, go up
565 */
566 if ((i == depth) ||
567 path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
568 brelse(path[i].p_bh);
569 path[i].p_bh = NULL;
570 i--;
571 continue;
572 }
573 bh = read_extent_tree_block(inode,
574 ext4_idx_pblock(path[i].p_idx++),
575 depth - i - 1,
576 EXT4_EX_FORCE_CACHE);
577 if (IS_ERR(bh)) {
578 ret = PTR_ERR(bh);
579 break;
580 }
581 i++;
582 path[i].p_bh = bh;
583 path[i].p_hdr = ext_block_hdr(bh);
584 path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
585 }
586 ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
587out:
588 up_read(&ei->i_data_sem);
589 ext4_ext_drop_refs(path);
590 kfree(path);
591 return ret;
592}
486 593
487#ifdef EXT_DEBUG 594#ifdef EXT_DEBUG
488static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 595static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -716,7 +823,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
716 823
717struct ext4_ext_path * 824struct ext4_ext_path *
718ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, 825ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
719 struct ext4_ext_path *path) 826 struct ext4_ext_path *path, int flags)
720{ 827{
721 struct ext4_extent_header *eh; 828 struct ext4_extent_header *eh;
722 struct buffer_head *bh; 829 struct buffer_head *bh;
@@ -748,20 +855,13 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
748 path[ppos].p_depth = i; 855 path[ppos].p_depth = i;
749 path[ppos].p_ext = NULL; 856 path[ppos].p_ext = NULL;
750 857
751 bh = sb_getblk(inode->i_sb, path[ppos].p_block); 858 bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
752 if (unlikely(!bh)) { 859 flags);
753 ret = -ENOMEM; 860 if (IS_ERR(bh)) {
861 ret = PTR_ERR(bh);
754 goto err; 862 goto err;
755 } 863 }
756 if (!bh_uptodate_or_lock(bh)) { 864
757 trace_ext4_ext_load_extent(inode, block,
758 path[ppos].p_block);
759 ret = bh_submit_read(bh);
760 if (ret < 0) {
761 put_bh(bh);
762 goto err;
763 }
764 }
765 eh = ext_block_hdr(bh); 865 eh = ext_block_hdr(bh);
766 ppos++; 866 ppos++;
767 if (unlikely(ppos > depth)) { 867 if (unlikely(ppos > depth)) {
@@ -773,11 +873,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
773 } 873 }
774 path[ppos].p_bh = bh; 874 path[ppos].p_bh = bh;
775 path[ppos].p_hdr = eh; 875 path[ppos].p_hdr = eh;
776 i--;
777
778 ret = ext4_ext_check_block(inode, eh, i, bh);
779 if (ret < 0)
780 goto err;
781 } 876 }
782 877
783 path[ppos].p_depth = i; 878 path[ppos].p_depth = i;
@@ -1198,7 +1293,8 @@ out:
1198 * if no free index is found, then it requests in-depth growing. 1293 * if no free index is found, then it requests in-depth growing.
1199 */ 1294 */
1200static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1295static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1201 unsigned int flags, 1296 unsigned int mb_flags,
1297 unsigned int gb_flags,
1202 struct ext4_ext_path *path, 1298 struct ext4_ext_path *path,
1203 struct ext4_extent *newext) 1299 struct ext4_extent *newext)
1204{ 1300{
@@ -1220,7 +1316,7 @@ repeat:
1220 if (EXT_HAS_FREE_INDEX(curp)) { 1316 if (EXT_HAS_FREE_INDEX(curp)) {
1221 /* if we found index with free entry, then use that 1317 /* if we found index with free entry, then use that
1222 * entry: create all needed subtree and add new leaf */ 1318 * entry: create all needed subtree and add new leaf */
1223 err = ext4_ext_split(handle, inode, flags, path, newext, i); 1319 err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
1224 if (err) 1320 if (err)
1225 goto out; 1321 goto out;
1226 1322
@@ -1228,12 +1324,12 @@ repeat:
1228 ext4_ext_drop_refs(path); 1324 ext4_ext_drop_refs(path);
1229 path = ext4_ext_find_extent(inode, 1325 path = ext4_ext_find_extent(inode,
1230 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1326 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1231 path); 1327 path, gb_flags);
1232 if (IS_ERR(path)) 1328 if (IS_ERR(path))
1233 err = PTR_ERR(path); 1329 err = PTR_ERR(path);
1234 } else { 1330 } else {
1235 /* tree is full, time to grow in depth */ 1331 /* tree is full, time to grow in depth */
1236 err = ext4_ext_grow_indepth(handle, inode, flags, newext); 1332 err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
1237 if (err) 1333 if (err)
1238 goto out; 1334 goto out;
1239 1335
@@ -1241,7 +1337,7 @@ repeat:
1241 ext4_ext_drop_refs(path); 1337 ext4_ext_drop_refs(path);
1242 path = ext4_ext_find_extent(inode, 1338 path = ext4_ext_find_extent(inode,
1243 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1339 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1244 path); 1340 path, gb_flags);
1245 if (IS_ERR(path)) { 1341 if (IS_ERR(path)) {
1246 err = PTR_ERR(path); 1342 err = PTR_ERR(path);
1247 goto out; 1343 goto out;
@@ -1412,29 +1508,21 @@ got_index:
1412 ix++; 1508 ix++;
1413 block = ext4_idx_pblock(ix); 1509 block = ext4_idx_pblock(ix);
1414 while (++depth < path->p_depth) { 1510 while (++depth < path->p_depth) {
1415 bh = sb_bread(inode->i_sb, block);
1416 if (bh == NULL)
1417 return -EIO;
1418 eh = ext_block_hdr(bh);
1419 /* subtract from p_depth to get proper eh_depth */ 1511 /* subtract from p_depth to get proper eh_depth */
1420 if (ext4_ext_check_block(inode, eh, 1512 bh = read_extent_tree_block(inode, block,
1421 path->p_depth - depth, bh)) { 1513 path->p_depth - depth, 0);
1422 put_bh(bh); 1514 if (IS_ERR(bh))
1423 return -EIO; 1515 return PTR_ERR(bh);
1424 } 1516 eh = ext_block_hdr(bh);
1425 ix = EXT_FIRST_INDEX(eh); 1517 ix = EXT_FIRST_INDEX(eh);
1426 block = ext4_idx_pblock(ix); 1518 block = ext4_idx_pblock(ix);
1427 put_bh(bh); 1519 put_bh(bh);
1428 } 1520 }
1429 1521
1430 bh = sb_bread(inode->i_sb, block); 1522 bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
1431 if (bh == NULL) 1523 if (IS_ERR(bh))
1432 return -EIO; 1524 return PTR_ERR(bh);
1433 eh = ext_block_hdr(bh); 1525 eh = ext_block_hdr(bh);
1434 if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) {
1435 put_bh(bh);
1436 return -EIO;
1437 }
1438 ex = EXT_FIRST_EXTENT(eh); 1526 ex = EXT_FIRST_EXTENT(eh);
1439found_extent: 1527found_extent:
1440 *logical = le32_to_cpu(ex->ee_block); 1528 *logical = le32_to_cpu(ex->ee_block);
@@ -1705,7 +1793,8 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
1705 1793
1706 brelse(path[1].p_bh); 1794 brelse(path[1].p_bh);
1707 ext4_free_blocks(handle, inode, NULL, blk, 1, 1795 ext4_free_blocks(handle, inode, NULL, blk, 1,
1708 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 1796 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET |
1797 EXT4_FREE_BLOCKS_RESERVE);
1709} 1798}
1710 1799
1711/* 1800/*
@@ -1793,7 +1882,7 @@ out:
1793 */ 1882 */
1794int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1883int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1795 struct ext4_ext_path *path, 1884 struct ext4_ext_path *path,
1796 struct ext4_extent *newext, int flag) 1885 struct ext4_extent *newext, int gb_flags)
1797{ 1886{
1798 struct ext4_extent_header *eh; 1887 struct ext4_extent_header *eh;
1799 struct ext4_extent *ex, *fex; 1888 struct ext4_extent *ex, *fex;
@@ -1802,7 +1891,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1802 int depth, len, err; 1891 int depth, len, err;
1803 ext4_lblk_t next; 1892 ext4_lblk_t next;
1804 unsigned uninitialized = 0; 1893 unsigned uninitialized = 0;
1805 int flags = 0; 1894 int mb_flags = 0;
1806 1895
1807 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1896 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1808 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1897 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1817,7 +1906,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1817 } 1906 }
1818 1907
1819 /* try to insert block into found extent and return */ 1908 /* try to insert block into found extent and return */
1820 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) { 1909 if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1821 1910
1822 /* 1911 /*
1823 * Try to see whether we should rather test the extent on 1912 * Try to see whether we should rather test the extent on
@@ -1920,7 +2009,7 @@ prepend:
1920 if (next != EXT_MAX_BLOCKS) { 2009 if (next != EXT_MAX_BLOCKS) {
1921 ext_debug("next leaf block - %u\n", next); 2010 ext_debug("next leaf block - %u\n", next);
1922 BUG_ON(npath != NULL); 2011 BUG_ON(npath != NULL);
1923 npath = ext4_ext_find_extent(inode, next, NULL); 2012 npath = ext4_ext_find_extent(inode, next, NULL, 0);
1924 if (IS_ERR(npath)) 2013 if (IS_ERR(npath))
1925 return PTR_ERR(npath); 2014 return PTR_ERR(npath);
1926 BUG_ON(npath->p_depth != path->p_depth); 2015 BUG_ON(npath->p_depth != path->p_depth);
@@ -1939,9 +2028,10 @@ prepend:
1939 * There is no free space in the found leaf. 2028 * There is no free space in the found leaf.
1940 * We're gonna add a new leaf in the tree. 2029 * We're gonna add a new leaf in the tree.
1941 */ 2030 */
1942 if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL) 2031 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
1943 flags = EXT4_MB_USE_RESERVED; 2032 mb_flags = EXT4_MB_USE_RESERVED;
1944 err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); 2033 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2034 path, newext);
1945 if (err) 2035 if (err)
1946 goto cleanup; 2036 goto cleanup;
1947 depth = ext_depth(inode); 2037 depth = ext_depth(inode);
@@ -2007,7 +2097,7 @@ has_space:
2007 2097
2008merge: 2098merge:
2009 /* try to merge extents */ 2099 /* try to merge extents */
2010 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 2100 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
2011 ext4_ext_try_to_merge(handle, inode, path, nearex); 2101 ext4_ext_try_to_merge(handle, inode, path, nearex);
2012 2102
2013 2103
@@ -2050,7 +2140,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2050 path = NULL; 2140 path = NULL;
2051 } 2141 }
2052 2142
2053 path = ext4_ext_find_extent(inode, block, path); 2143 path = ext4_ext_find_extent(inode, block, path, 0);
2054 if (IS_ERR(path)) { 2144 if (IS_ERR(path)) {
2055 up_read(&EXT4_I(inode)->i_data_sem); 2145 up_read(&EXT4_I(inode)->i_data_sem);
2056 err = PTR_ERR(path); 2146 err = PTR_ERR(path);
@@ -2195,8 +2285,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2195 ext4_lblk_t block) 2285 ext4_lblk_t block)
2196{ 2286{
2197 int depth = ext_depth(inode); 2287 int depth = ext_depth(inode);
2198 unsigned long len; 2288 unsigned long len = 0;
2199 ext4_lblk_t lblock; 2289 ext4_lblk_t lblock = 0;
2200 struct ext4_extent *ex; 2290 struct ext4_extent *ex;
2201 2291
2202 ex = path[depth].p_ext; 2292 ex = path[depth].p_ext;
@@ -2233,7 +2323,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2233 ext4_es_insert_extent(inode, lblock, len, ~0, 2323 ext4_es_insert_extent(inode, lblock, len, ~0,
2234 EXTENT_STATUS_HOLE); 2324 EXTENT_STATUS_HOLE);
2235 } else { 2325 } else {
2236 lblock = len = 0;
2237 BUG(); 2326 BUG();
2238 } 2327 }
2239 2328
@@ -2712,7 +2801,7 @@ again:
2712 ext4_lblk_t ee_block; 2801 ext4_lblk_t ee_block;
2713 2802
2714 /* find extent for this block */ 2803 /* find extent for this block */
2715 path = ext4_ext_find_extent(inode, end, NULL); 2804 path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
2716 if (IS_ERR(path)) { 2805 if (IS_ERR(path)) {
2717 ext4_journal_stop(handle); 2806 ext4_journal_stop(handle);
2718 return PTR_ERR(path); 2807 return PTR_ERR(path);
@@ -2754,6 +2843,7 @@ again:
2754 */ 2843 */
2755 err = ext4_split_extent_at(handle, inode, path, 2844 err = ext4_split_extent_at(handle, inode, path,
2756 end + 1, split_flag, 2845 end + 1, split_flag,
2846 EXT4_EX_NOCACHE |
2757 EXT4_GET_BLOCKS_PRE_IO | 2847 EXT4_GET_BLOCKS_PRE_IO |
2758 EXT4_GET_BLOCKS_METADATA_NOFAIL); 2848 EXT4_GET_BLOCKS_METADATA_NOFAIL);
2759 2849
@@ -2782,7 +2872,7 @@ again:
2782 path[0].p_hdr = ext_inode_hdr(inode); 2872 path[0].p_hdr = ext_inode_hdr(inode);
2783 i = 0; 2873 i = 0;
2784 2874
2785 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2875 if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
2786 err = -EIO; 2876 err = -EIO;
2787 goto out; 2877 goto out;
2788 } 2878 }
@@ -2829,10 +2919,12 @@ again:
2829 ext_debug("move to level %d (block %llu)\n", 2919 ext_debug("move to level %d (block %llu)\n",
2830 i + 1, ext4_idx_pblock(path[i].p_idx)); 2920 i + 1, ext4_idx_pblock(path[i].p_idx));
2831 memset(path + i + 1, 0, sizeof(*path)); 2921 memset(path + i + 1, 0, sizeof(*path));
2832 bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); 2922 bh = read_extent_tree_block(inode,
2833 if (!bh) { 2923 ext4_idx_pblock(path[i].p_idx), depth - i - 1,
2924 EXT4_EX_NOCACHE);
2925 if (IS_ERR(bh)) {
2834 /* should we reset i_size? */ 2926 /* should we reset i_size? */
2835 err = -EIO; 2927 err = PTR_ERR(bh);
2836 break; 2928 break;
2837 } 2929 }
2838 /* Yield here to deal with large extent trees. 2930 /* Yield here to deal with large extent trees.
@@ -2842,11 +2934,6 @@ again:
2842 err = -EIO; 2934 err = -EIO;
2843 break; 2935 break;
2844 } 2936 }
2845 if (ext4_ext_check_block(inode, ext_block_hdr(bh),
2846 depth - i - 1, bh)) {
2847 err = -EIO;
2848 break;
2849 }
2850 path[i + 1].p_bh = bh; 2937 path[i + 1].p_bh = bh;
2851 2938
2852 /* save actual number of indexes since this 2939 /* save actual number of indexes since this
@@ -2961,6 +3048,23 @@ void ext4_ext_release(struct super_block *sb)
2961#endif 3048#endif
2962} 3049}
2963 3050
3051static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3052{
3053 ext4_lblk_t ee_block;
3054 ext4_fsblk_t ee_pblock;
3055 unsigned int ee_len;
3056
3057 ee_block = le32_to_cpu(ex->ee_block);
3058 ee_len = ext4_ext_get_actual_len(ex);
3059 ee_pblock = ext4_ext_pblock(ex);
3060
3061 if (ee_len == 0)
3062 return 0;
3063
3064 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3065 EXTENT_STATUS_WRITTEN);
3066}
3067
2964/* FIXME!! we need to try to merge to left or right after zero-out */ 3068/* FIXME!! we need to try to merge to left or right after zero-out */
2965static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 3069static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2966{ 3070{
@@ -3113,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,
3113 goto fix_extent_len; 3217 goto fix_extent_len;
3114 3218
3115 /* update extent status tree */ 3219 /* update extent status tree */
3116 err = ext4_es_zeroout(inode, &zero_ex); 3220 err = ext4_zeroout_es(inode, &zero_ex);
3117 3221
3118 goto out; 3222 goto out;
3119 } else if (err) 3223 } else if (err)
@@ -3133,7 +3237,7 @@ fix_extent_len:
3133 * ext4_split_extents() splits an extent and mark extent which is covered 3237 * ext4_split_extents() splits an extent and mark extent which is covered
3134 * by @map as split_flags indicates 3238 * by @map as split_flags indicates
3135 * 3239 *
3136 * It may result in splitting the extent into multiple extents (upto three) 3240 * It may result in splitting the extent into multiple extents (up to three)
3137 * There are three possibilities: 3241 * There are three possibilities:
3138 * a> There is no split required 3242 * a> There is no split required
3139 * b> Splits in two extents: Split is happening at either end of the extent 3243 * b> Splits in two extents: Split is happening at either end of the extent
@@ -3181,7 +3285,7 @@ static int ext4_split_extent(handle_t *handle,
3181 * result in split of original leaf or extent zeroout. 3285 * result in split of original leaf or extent zeroout.
3182 */ 3286 */
3183 ext4_ext_drop_refs(path); 3287 ext4_ext_drop_refs(path);
3184 path = ext4_ext_find_extent(inode, map->m_lblk, path); 3288 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3185 if (IS_ERR(path)) 3289 if (IS_ERR(path))
3186 return PTR_ERR(path); 3290 return PTR_ERR(path);
3187 depth = ext_depth(inode); 3291 depth = ext_depth(inode);
@@ -3464,7 +3568,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3464out: 3568out:
3465 /* If we have gotten a failure, don't zero out status tree */ 3569 /* If we have gotten a failure, don't zero out status tree */
3466 if (!err) 3570 if (!err)
3467 err = ext4_es_zeroout(inode, &zero_ex); 3571 err = ext4_zeroout_es(inode, &zero_ex);
3468 return err ? err : allocated; 3572 return err ? err : allocated;
3469} 3573}
3470 3574
@@ -3565,7 +3669,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3565 if (err < 0) 3669 if (err < 0)
3566 goto out; 3670 goto out;
3567 ext4_ext_drop_refs(path); 3671 ext4_ext_drop_refs(path);
3568 path = ext4_ext_find_extent(inode, map->m_lblk, path); 3672 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3569 if (IS_ERR(path)) { 3673 if (IS_ERR(path)) {
3570 err = PTR_ERR(path); 3674 err = PTR_ERR(path);
3571 goto out; 3675 goto out;
@@ -4052,7 +4156,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4052 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 4156 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4053 4157
4054 /* find extent for this block */ 4158 /* find extent for this block */
4055 path = ext4_ext_find_extent(inode, map->m_lblk, NULL); 4159 path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
4056 if (IS_ERR(path)) { 4160 if (IS_ERR(path)) {
4057 err = PTR_ERR(path); 4161 err = PTR_ERR(path);
4058 path = NULL; 4162 path = NULL;
@@ -4744,6 +4848,12 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4744 return error; 4848 return error;
4745 } 4849 }
4746 4850
4851 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4852 error = ext4_ext_precache(inode);
4853 if (error)
4854 return error;
4855 }
4856
4747 /* fallback to generic here if not in extents fmt */ 4857 /* fallback to generic here if not in extents fmt */
4748 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4858 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4749 return generic_block_fiemap(inode, fieinfo, start, len, 4859 return generic_block_fiemap(inode, fieinfo, start, len,
@@ -4771,6 +4881,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4771 error = ext4_fill_fiemap_extents(inode, start_blk, 4881 error = ext4_fill_fiemap_extents(inode, start_blk,
4772 len_blks, fieinfo); 4882 len_blks, fieinfo);
4773 } 4883 }
4774 4884 ext4_es_lru_add(inode);
4775 return error; 4885 return error;
4776} 4886}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 91cb110da1b4..2d1bdbe78c04 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -13,7 +13,6 @@
13#include <linux/list_sort.h> 13#include <linux/list_sort.h>
14#include "ext4.h" 14#include "ext4.h"
15#include "extents_status.h" 15#include "extents_status.h"
16#include "ext4_extents.h"
17 16
18#include <trace/events/ext4.h> 17#include <trace/events/ext4.h>
19 18
@@ -263,7 +262,7 @@ void ext4_es_find_delayed_extent_range(struct inode *inode,
263 if (tree->cache_es) { 262 if (tree->cache_es) {
264 es1 = tree->cache_es; 263 es1 = tree->cache_es;
265 if (in_range(lblk, es1->es_lblk, es1->es_len)) { 264 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
266 es_debug("%u cached by [%u/%u) %llu %llx\n", 265 es_debug("%u cached by [%u/%u) %llu %x\n",
267 lblk, es1->es_lblk, es1->es_len, 266 lblk, es1->es_lblk, es1->es_len,
268 ext4_es_pblock(es1), ext4_es_status(es1)); 267 ext4_es_pblock(es1), ext4_es_status(es1));
269 goto out; 268 goto out;
@@ -409,6 +408,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
409} 408}
410 409
411#ifdef ES_AGGRESSIVE_TEST 410#ifdef ES_AGGRESSIVE_TEST
411#include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */
412
412static void ext4_es_insert_extent_ext_check(struct inode *inode, 413static void ext4_es_insert_extent_ext_check(struct inode *inode,
413 struct extent_status *es) 414 struct extent_status *es)
414{ 415{
@@ -419,7 +420,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
419 unsigned short ee_len; 420 unsigned short ee_len;
420 int depth, ee_status, es_status; 421 int depth, ee_status, es_status;
421 422
422 path = ext4_ext_find_extent(inode, es->es_lblk, NULL); 423 path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
423 if (IS_ERR(path)) 424 if (IS_ERR(path))
424 return; 425 return;
425 426
@@ -641,13 +642,13 @@ out:
641 */ 642 */
642int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 643int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
643 ext4_lblk_t len, ext4_fsblk_t pblk, 644 ext4_lblk_t len, ext4_fsblk_t pblk,
644 unsigned long long status) 645 unsigned int status)
645{ 646{
646 struct extent_status newes; 647 struct extent_status newes;
647 ext4_lblk_t end = lblk + len - 1; 648 ext4_lblk_t end = lblk + len - 1;
648 int err = 0; 649 int err = 0;
649 650
650 es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n", 651 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
651 lblk, len, pblk, status, inode->i_ino); 652 lblk, len, pblk, status, inode->i_ino);
652 653
653 if (!len) 654 if (!len)
@@ -684,6 +685,38 @@ error:
684} 685}
685 686
686/* 687/*
688 * ext4_es_cache_extent() inserts information into the extent status
689 * tree if and only if there isn't information about the range in
690 * question already.
691 */
692void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
693 ext4_lblk_t len, ext4_fsblk_t pblk,
694 unsigned int status)
695{
696 struct extent_status *es;
697 struct extent_status newes;
698 ext4_lblk_t end = lblk + len - 1;
699
700 newes.es_lblk = lblk;
701 newes.es_len = len;
702 ext4_es_store_pblock(&newes, pblk);
703 ext4_es_store_status(&newes, status);
704 trace_ext4_es_cache_extent(inode, &newes);
705
706 if (!len)
707 return;
708
709 BUG_ON(end < lblk);
710
711 write_lock(&EXT4_I(inode)->i_es_lock);
712
713 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
714 if (!es || es->es_lblk > end)
715 __es_insert_extent(inode, &newes);
716 write_unlock(&EXT4_I(inode)->i_es_lock);
717}
718
719/*
687 * ext4_es_lookup_extent() looks up an extent in extent status tree. 720 * ext4_es_lookup_extent() looks up an extent in extent status tree.
688 * 721 *
689 * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. 722 * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
@@ -871,23 +904,6 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
871 return err; 904 return err;
872} 905}
873 906
874int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
875{
876 ext4_lblk_t ee_block;
877 ext4_fsblk_t ee_pblock;
878 unsigned int ee_len;
879
880 ee_block = le32_to_cpu(ex->ee_block);
881 ee_len = ext4_ext_get_actual_len(ex);
882 ee_pblock = ext4_ext_pblock(ex);
883
884 if (ee_len == 0)
885 return 0;
886
887 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
888 EXTENT_STATUS_WRITTEN);
889}
890
891static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, 907static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
892 struct list_head *b) 908 struct list_head *b)
893{ 909{
@@ -895,6 +911,12 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
895 eia = list_entry(a, struct ext4_inode_info, i_es_lru); 911 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
896 eib = list_entry(b, struct ext4_inode_info, i_es_lru); 912 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
897 913
914 if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
915 !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
916 return 1;
917 if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
918 ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
919 return -1;
898 if (eia->i_touch_when == eib->i_touch_when) 920 if (eia->i_touch_when == eib->i_touch_when)
899 return 0; 921 return 0;
900 if (time_after(eia->i_touch_when, eib->i_touch_when)) 922 if (time_after(eia->i_touch_when, eib->i_touch_when))
@@ -908,21 +930,13 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
908{ 930{
909 struct ext4_inode_info *ei; 931 struct ext4_inode_info *ei;
910 struct list_head *cur, *tmp; 932 struct list_head *cur, *tmp;
911 LIST_HEAD(skiped); 933 LIST_HEAD(skipped);
912 int ret, nr_shrunk = 0; 934 int ret, nr_shrunk = 0;
935 int retried = 0, skip_precached = 1, nr_skipped = 0;
913 936
914 spin_lock(&sbi->s_es_lru_lock); 937 spin_lock(&sbi->s_es_lru_lock);
915 938
916 /* 939retry:
917 * If the inode that is at the head of LRU list is newer than
918 * last_sorted time, that means that we need to sort this list.
919 */
920 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru);
921 if (sbi->s_es_last_sorted < ei->i_touch_when) {
922 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
923 sbi->s_es_last_sorted = jiffies;
924 }
925
926 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { 940 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
927 /* 941 /*
928 * If we have already reclaimed all extents from extent 942 * If we have already reclaimed all extents from extent
@@ -933,9 +947,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
933 947
934 ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 948 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
935 949
936 /* Skip the inode that is newer than the last_sorted time */ 950 /*
937 if (sbi->s_es_last_sorted < ei->i_touch_when) { 951 * Skip the inode that is newer than the last_sorted
938 list_move_tail(cur, &skiped); 952 * time. Normally we try hard to avoid shrinking
953 * precached inodes, but we will as a last resort.
954 */
955 if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
956 (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
957 EXT4_STATE_EXT_PRECACHED))) {
958 nr_skipped++;
959 list_move_tail(cur, &skipped);
939 continue; 960 continue;
940 } 961 }
941 962
@@ -955,11 +976,33 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
955 } 976 }
956 977
957 /* Move the newer inodes into the tail of the LRU list. */ 978 /* Move the newer inodes into the tail of the LRU list. */
958 list_splice_tail(&skiped, &sbi->s_es_lru); 979 list_splice_tail(&skipped, &sbi->s_es_lru);
980 INIT_LIST_HEAD(&skipped);
981
982 /*
983 * If we skipped any inodes, and we weren't able to make any
984 * forward progress, sort the list and try again.
985 */
986 if ((nr_shrunk == 0) && nr_skipped && !retried) {
987 retried++;
988 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
989 sbi->s_es_last_sorted = jiffies;
990 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
991 i_es_lru);
992 /*
993 * If there are no non-precached inodes left on the
994 * list, start releasing precached extents.
995 */
996 if (ext4_test_inode_state(&ei->vfs_inode,
997 EXT4_STATE_EXT_PRECACHED))
998 skip_precached = 0;
999 goto retry;
1000 }
1001
959 spin_unlock(&sbi->s_es_lru_lock); 1002 spin_unlock(&sbi->s_es_lru_lock);
960 1003
961 if (locked_ei && nr_shrunk == 0) 1004 if (locked_ei && nr_shrunk == 0)
962 nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); 1005 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
963 1006
964 return nr_shrunk; 1007 return nr_shrunk;
965} 1008}
@@ -1034,10 +1077,16 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1034 struct rb_node *node; 1077 struct rb_node *node;
1035 struct extent_status *es; 1078 struct extent_status *es;
1036 int nr_shrunk = 0; 1079 int nr_shrunk = 0;
1080 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1081 DEFAULT_RATELIMIT_BURST);
1037 1082
1038 if (ei->i_es_lru_nr == 0) 1083 if (ei->i_es_lru_nr == 0)
1039 return 0; 1084 return 0;
1040 1085
1086 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1087 __ratelimit(&_rs))
1088 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1089
1041 node = rb_first(&tree->root); 1090 node = rb_first(&tree->root);
1042 while (node != NULL) { 1091 while (node != NULL) {
1043 es = rb_entry(node, struct extent_status, rb_node); 1092 es = rb_entry(node, struct extent_status, rb_node);
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index e936730cc5b0..167f4ab8ecc3 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -29,16 +29,26 @@
29/* 29/*
30 * These flags live in the high bits of extent_status.es_pblk 30 * These flags live in the high bits of extent_status.es_pblk
31 */ 31 */
32#define EXTENT_STATUS_WRITTEN (1ULL << 63) 32#define ES_SHIFT 60
33#define EXTENT_STATUS_UNWRITTEN (1ULL << 62) 33
34#define EXTENT_STATUS_DELAYED (1ULL << 61) 34#define EXTENT_STATUS_WRITTEN (1 << 3)
35#define EXTENT_STATUS_HOLE (1ULL << 60) 35#define EXTENT_STATUS_UNWRITTEN (1 << 2)
36#define EXTENT_STATUS_DELAYED (1 << 1)
37#define EXTENT_STATUS_HOLE (1 << 0)
36 38
37#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ 39#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
38 EXTENT_STATUS_UNWRITTEN | \ 40 EXTENT_STATUS_UNWRITTEN | \
39 EXTENT_STATUS_DELAYED | \ 41 EXTENT_STATUS_DELAYED | \
40 EXTENT_STATUS_HOLE) 42 EXTENT_STATUS_HOLE)
41 43
44#define ES_WRITTEN (1ULL << 63)
45#define ES_UNWRITTEN (1ULL << 62)
46#define ES_DELAYED (1ULL << 61)
47#define ES_HOLE (1ULL << 60)
48
49#define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \
50 ES_DELAYED | ES_HOLE)
51
42struct ext4_sb_info; 52struct ext4_sb_info;
43struct ext4_extent; 53struct ext4_extent;
44 54
@@ -60,7 +70,10 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
60 70
61extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 71extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
62 ext4_lblk_t len, ext4_fsblk_t pblk, 72 ext4_lblk_t len, ext4_fsblk_t pblk,
63 unsigned long long status); 73 unsigned int status);
74extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
75 ext4_lblk_t len, ext4_fsblk_t pblk,
76 unsigned int status);
64extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 77extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
65 ext4_lblk_t len); 78 ext4_lblk_t len);
66extern void ext4_es_find_delayed_extent_range(struct inode *inode, 79extern void ext4_es_find_delayed_extent_range(struct inode *inode,
@@ -68,36 +81,35 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode,
68 struct extent_status *es); 81 struct extent_status *es);
69extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 82extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
70 struct extent_status *es); 83 struct extent_status *es);
71extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
72 84
73static inline int ext4_es_is_written(struct extent_status *es) 85static inline int ext4_es_is_written(struct extent_status *es)
74{ 86{
75 return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0; 87 return (es->es_pblk & ES_WRITTEN) != 0;
76} 88}
77 89
78static inline int ext4_es_is_unwritten(struct extent_status *es) 90static inline int ext4_es_is_unwritten(struct extent_status *es)
79{ 91{
80 return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0; 92 return (es->es_pblk & ES_UNWRITTEN) != 0;
81} 93}
82 94
83static inline int ext4_es_is_delayed(struct extent_status *es) 95static inline int ext4_es_is_delayed(struct extent_status *es)
84{ 96{
85 return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0; 97 return (es->es_pblk & ES_DELAYED) != 0;
86} 98}
87 99
88static inline int ext4_es_is_hole(struct extent_status *es) 100static inline int ext4_es_is_hole(struct extent_status *es)
89{ 101{
90 return (es->es_pblk & EXTENT_STATUS_HOLE) != 0; 102 return (es->es_pblk & ES_HOLE) != 0;
91} 103}
92 104
93static inline ext4_fsblk_t ext4_es_status(struct extent_status *es) 105static inline unsigned int ext4_es_status(struct extent_status *es)
94{ 106{
95 return (es->es_pblk & EXTENT_STATUS_FLAGS); 107 return es->es_pblk >> ES_SHIFT;
96} 108}
97 109
98static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) 110static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
99{ 111{
100 return (es->es_pblk & ~EXTENT_STATUS_FLAGS); 112 return es->es_pblk & ~ES_MASK;
101} 113}
102 114
103static inline void ext4_es_store_pblock(struct extent_status *es, 115static inline void ext4_es_store_pblock(struct extent_status *es,
@@ -105,19 +117,16 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
105{ 117{
106 ext4_fsblk_t block; 118 ext4_fsblk_t block;
107 119
108 block = (pb & ~EXTENT_STATUS_FLAGS) | 120 block = (pb & ~ES_MASK) | (es->es_pblk & ES_MASK);
109 (es->es_pblk & EXTENT_STATUS_FLAGS);
110 es->es_pblk = block; 121 es->es_pblk = block;
111} 122}
112 123
113static inline void ext4_es_store_status(struct extent_status *es, 124static inline void ext4_es_store_status(struct extent_status *es,
114 unsigned long long status) 125 unsigned int status)
115{ 126{
116 ext4_fsblk_t block; 127 es->es_pblk = (((ext4_fsblk_t)
117 128 (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
118 block = (status & EXTENT_STATUS_FLAGS) | 129 (es->es_pblk & ~ES_MASK));
119 (es->es_pblk & ~EXTENT_STATUS_FLAGS);
120 es->es_pblk = block;
121} 130}
122 131
123extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); 132extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 8bf5999875ee..137193ff389b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -70,18 +70,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
70 ext4_group_t block_group, 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp) 71 struct ext4_group_desc *gdp)
72{ 72{
73 struct ext4_group_info *grp;
73 J_ASSERT_BH(bh, buffer_locked(bh)); 74 J_ASSERT_BH(bh, buffer_locked(bh));
74 75
75 /* If checksum is bad mark all blocks and inodes use to prevent 76 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */ 77 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 78 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
78 ext4_error(sb, "Checksum bad for group %u", block_group); 79 ext4_error(sb, "Checksum bad for group %u", block_group);
79 ext4_free_group_clusters_set(sb, gdp, 0); 80 grp = ext4_get_group_info(sb, block_group);
80 ext4_free_inodes_set(sb, gdp, 0); 81 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
81 ext4_itable_unused_set(sb, gdp, 0); 82 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
82 memset(bh->b_data, 0xff, sb->s_blocksize);
83 ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
84 EXT4_INODES_PER_GROUP(sb) / 8);
85 return 0; 83 return 0;
86 } 84 }
87 85
@@ -117,6 +115,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
117 struct ext4_group_desc *desc; 115 struct ext4_group_desc *desc;
118 struct buffer_head *bh = NULL; 116 struct buffer_head *bh = NULL;
119 ext4_fsblk_t bitmap_blk; 117 ext4_fsblk_t bitmap_blk;
118 struct ext4_group_info *grp;
120 119
121 desc = ext4_get_group_desc(sb, block_group, NULL); 120 desc = ext4_get_group_desc(sb, block_group, NULL);
122 if (!desc) 121 if (!desc)
@@ -185,6 +184,8 @@ verify:
185 put_bh(bh); 184 put_bh(bh);
186 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " 185 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
187 "inode_bitmap = %llu", block_group, bitmap_blk); 186 "inode_bitmap = %llu", block_group, bitmap_blk);
187 grp = ext4_get_group_info(sb, block_group);
188 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
188 return NULL; 189 return NULL;
189 } 190 }
190 ext4_unlock_group(sb, block_group); 191 ext4_unlock_group(sb, block_group);
@@ -221,6 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
221 struct ext4_super_block *es; 222 struct ext4_super_block *es;
222 struct ext4_sb_info *sbi; 223 struct ext4_sb_info *sbi;
223 int fatal = 0, err, count, cleared; 224 int fatal = 0, err, count, cleared;
225 struct ext4_group_info *grp;
224 226
225 if (!sb) { 227 if (!sb) {
226 printk(KERN_ERR "EXT4-fs: %s:%d: inode on " 228 printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
@@ -266,7 +268,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
266 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 268 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
267 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 269 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
268 bitmap_bh = ext4_read_inode_bitmap(sb, block_group); 270 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
269 if (!bitmap_bh) 271 /* Don't bother if the inode bitmap is corrupt. */
272 grp = ext4_get_group_info(sb, block_group);
273 if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh)
270 goto error_return; 274 goto error_return;
271 275
272 BUFFER_TRACE(bitmap_bh, "get_write_access"); 276 BUFFER_TRACE(bitmap_bh, "get_write_access");
@@ -315,8 +319,10 @@ out:
315 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 319 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
316 if (!fatal) 320 if (!fatal)
317 fatal = err; 321 fatal = err;
318 } else 322 } else {
319 ext4_error(sb, "bit already cleared for inode %lu", ino); 323 ext4_error(sb, "bit already cleared for inode %lu", ino);
324 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
325 }
320 326
321error_return: 327error_return:
322 brelse(bitmap_bh); 328 brelse(bitmap_bh);
@@ -625,6 +631,51 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
625} 631}
626 632
627/* 633/*
634 * In no journal mode, if an inode has recently been deleted, we want
635 * to avoid reusing it until we're reasonably sure the inode table
636 * block has been written back to disk. (Yes, these values are
637 * somewhat arbitrary...)
638 */
639#define RECENTCY_MIN 5
640#define RECENTCY_DIRTY 30
641
642static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
643{
644 struct ext4_group_desc *gdp;
645 struct ext4_inode *raw_inode;
646 struct buffer_head *bh;
647 unsigned long dtime, now;
648 int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
649 int offset, ret = 0, recentcy = RECENTCY_MIN;
650
651 gdp = ext4_get_group_desc(sb, group, NULL);
652 if (unlikely(!gdp))
653 return 0;
654
655 bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
656 (ino / inodes_per_block));
657 if (unlikely(!bh) || !buffer_uptodate(bh))
658 /*
659 * If the block is not in the buffer cache, then it
660 * must have been written out.
661 */
662 goto out;
663
664 offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
665 raw_inode = (struct ext4_inode *) (bh->b_data + offset);
666 dtime = le32_to_cpu(raw_inode->i_dtime);
667 now = get_seconds();
668 if (buffer_dirty(bh))
669 recentcy += RECENTCY_DIRTY;
670
671 if (dtime && (dtime < now) && (now < dtime + recentcy))
672 ret = 1;
673out:
674 brelse(bh);
675 return ret;
676}
677
678/*
628 * There are two policies for allocating an inode. If the new inode is 679 * There are two policies for allocating an inode. If the new inode is
629 * a directory, then a forward search is made for a block group with both 680 * a directory, then a forward search is made for a block group with both
630 * free space and a low directory-to-inode ratio; if that fails, then of 681 * free space and a low directory-to-inode ratio; if that fails, then of
@@ -652,6 +703,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
652 struct inode *ret; 703 struct inode *ret;
653 ext4_group_t i; 704 ext4_group_t i;
654 ext4_group_t flex_group; 705 ext4_group_t flex_group;
706 struct ext4_group_info *grp;
655 707
656 /* Cannot create files in a deleted directory */ 708 /* Cannot create files in a deleted directory */
657 if (!dir || !dir->i_nlink) 709 if (!dir || !dir->i_nlink)
@@ -725,10 +777,22 @@ got_group:
725 continue; 777 continue;
726 } 778 }
727 779
780 grp = ext4_get_group_info(sb, group);
781 /* Skip groups with already-known suspicious inode tables */
782 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
783 if (++group == ngroups)
784 group = 0;
785 continue;
786 }
787
728 brelse(inode_bitmap_bh); 788 brelse(inode_bitmap_bh);
729 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); 789 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
730 if (!inode_bitmap_bh) 790 /* Skip groups with suspicious inode tables */
731 goto out; 791 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) {
792 if (++group == ngroups)
793 group = 0;
794 continue;
795 }
732 796
733repeat_in_this_group: 797repeat_in_this_group:
734 ino = ext4_find_next_zero_bit((unsigned long *) 798 ino = ext4_find_next_zero_bit((unsigned long *)
@@ -741,6 +805,11 @@ repeat_in_this_group:
741 "inode=%lu", ino + 1); 805 "inode=%lu", ino + 1);
742 continue; 806 continue;
743 } 807 }
808 if ((EXT4_SB(sb)->s_journal == NULL) &&
809 recently_deleted(sb, group, ino)) {
810 ino++;
811 goto next_inode;
812 }
744 if (!handle) { 813 if (!handle) {
745 BUG_ON(nblocks <= 0); 814 BUG_ON(nblocks <= 0);
746 handle = __ext4_journal_start_sb(dir->i_sb, line_no, 815 handle = __ext4_journal_start_sb(dir->i_sb, line_no,
@@ -764,6 +833,7 @@ repeat_in_this_group:
764 ino++; /* the inode bitmap is zero-based */ 833 ino++; /* the inode bitmap is zero-based */
765 if (!ret2) 834 if (!ret2)
766 goto got; /* we grabbed the inode! */ 835 goto got; /* we grabbed the inode! */
836next_inode:
767 if (ino < EXT4_INODES_PER_GROUP(sb)) 837 if (ino < EXT4_INODES_PER_GROUP(sb))
768 goto repeat_in_this_group; 838 goto repeat_in_this_group;
769next_group: 839next_group:
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 87b30cd357e7..594009f5f523 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -23,7 +23,6 @@
23#include <linux/aio.h> 23#include <linux/aio.h>
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "truncate.h" 25#include "truncate.h"
26#include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */
27 26
28#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
29 28
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2ca04e67a4f..9115f2807515 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -553,7 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
553 } 553 }
554 if (retval > 0) { 554 if (retval > 0) {
555 int ret; 555 int ret;
556 unsigned long long status; 556 unsigned int status;
557 557
558 if (unlikely(retval != map->m_len)) { 558 if (unlikely(retval != map->m_len)) {
559 ext4_warning(inode->i_sb, 559 ext4_warning(inode->i_sb,
@@ -653,7 +653,7 @@ found:
653 653
654 if (retval > 0) { 654 if (retval > 0) {
655 int ret; 655 int ret;
656 unsigned long long status; 656 unsigned int status;
657 657
658 if (unlikely(retval != map->m_len)) { 658 if (unlikely(retval != map->m_len)) {
659 ext4_warning(inode->i_sb, 659 ext4_warning(inode->i_sb,
@@ -969,7 +969,8 @@ retry_journal:
969 ext4_journal_stop(handle); 969 ext4_journal_stop(handle);
970 goto retry_grab; 970 goto retry_grab;
971 } 971 }
972 wait_on_page_writeback(page); 972 /* In case writeback began while the page was unlocked */
973 wait_for_stable_page(page);
973 974
974 if (ext4_should_dioread_nolock(inode)) 975 if (ext4_should_dioread_nolock(inode))
975 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 976 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -1633,7 +1634,7 @@ add_delayed:
1633 set_buffer_delay(bh); 1634 set_buffer_delay(bh);
1634 } else if (retval > 0) { 1635 } else if (retval > 0) {
1635 int ret; 1636 int ret;
1636 unsigned long long status; 1637 unsigned int status;
1637 1638
1638 if (unlikely(retval != map->m_len)) { 1639 if (unlikely(retval != map->m_len)) {
1639 ext4_warning(inode->i_sb, 1640 ext4_warning(inode->i_sb,
@@ -1890,12 +1891,32 @@ static int ext4_writepage(struct page *page,
1890 return ret; 1891 return ret;
1891} 1892}
1892 1893
1894static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
1895{
1896 int len;
1897 loff_t size = i_size_read(mpd->inode);
1898 int err;
1899
1900 BUG_ON(page->index != mpd->first_page);
1901 if (page->index == size >> PAGE_CACHE_SHIFT)
1902 len = size & ~PAGE_CACHE_MASK;
1903 else
1904 len = PAGE_CACHE_SIZE;
1905 clear_page_dirty_for_io(page);
1906 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1907 if (!err)
1908 mpd->wbc->nr_to_write--;
1909 mpd->first_page++;
1910
1911 return err;
1912}
1913
1893#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) 1914#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
1894 1915
1895/* 1916/*
1896 * mballoc gives us at most this number of blocks... 1917 * mballoc gives us at most this number of blocks...
1897 * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). 1918 * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
1898 * The rest of mballoc seems to handle chunks upto full group size. 1919 * The rest of mballoc seems to handle chunks up to full group size.
1899 */ 1920 */
1900#define MAX_WRITEPAGES_EXTENT_LEN 2048 1921#define MAX_WRITEPAGES_EXTENT_LEN 2048
1901 1922
@@ -1904,82 +1925,94 @@ static int ext4_writepage(struct page *page,
1904 * 1925 *
1905 * @mpd - extent of blocks 1926 * @mpd - extent of blocks
1906 * @lblk - logical number of the block in the file 1927 * @lblk - logical number of the block in the file
1907 * @b_state - b_state of the buffer head added 1928 * @bh - buffer head we want to add to the extent
1908 * 1929 *
1909 * the function is used to collect contig. blocks in same state 1930 * The function is used to collect contig. blocks in the same state. If the
1931 * buffer doesn't require mapping for writeback and we haven't started the
1932 * extent of buffers to map yet, the function returns 'true' immediately - the
1933 * caller can write the buffer right away. Otherwise the function returns true
1934 * if the block has been added to the extent, false if the block couldn't be
1935 * added.
1910 */ 1936 */
1911static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, 1937static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
1912 unsigned long b_state) 1938 struct buffer_head *bh)
1913{ 1939{
1914 struct ext4_map_blocks *map = &mpd->map; 1940 struct ext4_map_blocks *map = &mpd->map;
1915 1941
1916 /* Don't go larger than mballoc is willing to allocate */ 1942 /* Buffer that doesn't need mapping for writeback? */
1917 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) 1943 if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
1918 return 0; 1944 (!buffer_delay(bh) && !buffer_unwritten(bh))) {
1945 /* So far no extent to map => we write the buffer right away */
1946 if (map->m_len == 0)
1947 return true;
1948 return false;
1949 }
1919 1950
1920 /* First block in the extent? */ 1951 /* First block in the extent? */
1921 if (map->m_len == 0) { 1952 if (map->m_len == 0) {
1922 map->m_lblk = lblk; 1953 map->m_lblk = lblk;
1923 map->m_len = 1; 1954 map->m_len = 1;
1924 map->m_flags = b_state & BH_FLAGS; 1955 map->m_flags = bh->b_state & BH_FLAGS;
1925 return 1; 1956 return true;
1926 } 1957 }
1927 1958
1959 /* Don't go larger than mballoc is willing to allocate */
1960 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
1961 return false;
1962
1928 /* Can we merge the block to our big extent? */ 1963 /* Can we merge the block to our big extent? */
1929 if (lblk == map->m_lblk + map->m_len && 1964 if (lblk == map->m_lblk + map->m_len &&
1930 (b_state & BH_FLAGS) == map->m_flags) { 1965 (bh->b_state & BH_FLAGS) == map->m_flags) {
1931 map->m_len++; 1966 map->m_len++;
1932 return 1; 1967 return true;
1933 } 1968 }
1934 return 0; 1969 return false;
1935} 1970}
1936 1971
1937static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, 1972/*
1938 struct buffer_head *head, 1973 * mpage_process_page_bufs - submit page buffers for IO or add them to extent
1939 struct buffer_head *bh, 1974 *
1940 ext4_lblk_t lblk) 1975 * @mpd - extent of blocks for mapping
1976 * @head - the first buffer in the page
1977 * @bh - buffer we should start processing from
1978 * @lblk - logical number of the block in the file corresponding to @bh
1979 *
1980 * Walk through page buffers from @bh upto @head (exclusive) and either submit
1981 * the page for IO if all buffers in this page were mapped and there's no
1982 * accumulated extent of buffers to map or add buffers in the page to the
1983 * extent of buffers to map. The function returns 1 if the caller can continue
1984 * by processing the next page, 0 if it should stop adding buffers to the
1985 * extent to map because we cannot extend it anymore. It can also return value
1986 * < 0 in case of error during IO submission.
1987 */
1988static int mpage_process_page_bufs(struct mpage_da_data *mpd,
1989 struct buffer_head *head,
1990 struct buffer_head *bh,
1991 ext4_lblk_t lblk)
1941{ 1992{
1942 struct inode *inode = mpd->inode; 1993 struct inode *inode = mpd->inode;
1994 int err;
1943 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) 1995 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
1944 >> inode->i_blkbits; 1996 >> inode->i_blkbits;
1945 1997
1946 do { 1998 do {
1947 BUG_ON(buffer_locked(bh)); 1999 BUG_ON(buffer_locked(bh));
1948 2000
1949 if (!buffer_dirty(bh) || !buffer_mapped(bh) || 2001 if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
1950 (!buffer_delay(bh) && !buffer_unwritten(bh)) ||
1951 lblk >= blocks) {
1952 /* Found extent to map? */ 2002 /* Found extent to map? */
1953 if (mpd->map.m_len) 2003 if (mpd->map.m_len)
1954 return false; 2004 return 0;
1955 if (lblk >= blocks) 2005 /* Everything mapped so far and we hit EOF */
1956 return true; 2006 break;
1957 continue;
1958 } 2007 }
1959 if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state))
1960 return false;
1961 } while (lblk++, (bh = bh->b_this_page) != head); 2008 } while (lblk++, (bh = bh->b_this_page) != head);
1962 return true; 2009 /* So far everything mapped? Submit the page for IO. */
1963} 2010 if (mpd->map.m_len == 0) {
1964 2011 err = mpage_submit_page(mpd, head->b_page);
1965static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) 2012 if (err < 0)
1966{ 2013 return err;
1967 int len; 2014 }
1968 loff_t size = i_size_read(mpd->inode); 2015 return lblk < blocks;
1969 int err;
1970
1971 BUG_ON(page->index != mpd->first_page);
1972 if (page->index == size >> PAGE_CACHE_SHIFT)
1973 len = size & ~PAGE_CACHE_MASK;
1974 else
1975 len = PAGE_CACHE_SIZE;
1976 clear_page_dirty_for_io(page);
1977 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1978 if (!err)
1979 mpd->wbc->nr_to_write--;
1980 mpd->first_page++;
1981
1982 return err;
1983} 2016}
1984 2017
1985/* 2018/*
@@ -2003,8 +2036,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2003 struct inode *inode = mpd->inode; 2036 struct inode *inode = mpd->inode;
2004 struct buffer_head *head, *bh; 2037 struct buffer_head *head, *bh;
2005 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; 2038 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
2006 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
2007 >> inode->i_blkbits;
2008 pgoff_t start, end; 2039 pgoff_t start, end;
2009 ext4_lblk_t lblk; 2040 ext4_lblk_t lblk;
2010 sector_t pblock; 2041 sector_t pblock;
@@ -2026,7 +2057,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2026 2057
2027 if (page->index > end) 2058 if (page->index > end)
2028 break; 2059 break;
2029 /* Upto 'end' pages must be contiguous */ 2060 /* Up to 'end' pages must be contiguous */
2030 BUG_ON(page->index != start); 2061 BUG_ON(page->index != start);
2031 bh = head = page_buffers(page); 2062 bh = head = page_buffers(page);
2032 do { 2063 do {
@@ -2039,18 +2070,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2039 */ 2070 */
2040 mpd->map.m_len = 0; 2071 mpd->map.m_len = 0;
2041 mpd->map.m_flags = 0; 2072 mpd->map.m_flags = 0;
2042 add_page_bufs_to_extent(mpd, head, bh, 2073 /*
2043 lblk); 2074 * FIXME: If dioread_nolock supports
2075 * blocksize < pagesize, we need to make
2076 * sure we add size mapped so far to
2077 * io_end->size as the following call
2078 * can submit the page for IO.
2079 */
2080 err = mpage_process_page_bufs(mpd, head,
2081 bh, lblk);
2044 pagevec_release(&pvec); 2082 pagevec_release(&pvec);
2045 return 0; 2083 if (err > 0)
2084 err = 0;
2085 return err;
2046 } 2086 }
2047 if (buffer_delay(bh)) { 2087 if (buffer_delay(bh)) {
2048 clear_buffer_delay(bh); 2088 clear_buffer_delay(bh);
2049 bh->b_blocknr = pblock++; 2089 bh->b_blocknr = pblock++;
2050 } 2090 }
2051 clear_buffer_unwritten(bh); 2091 clear_buffer_unwritten(bh);
2052 } while (++lblk < blocks && 2092 } while (lblk++, (bh = bh->b_this_page) != head);
2053 (bh = bh->b_this_page) != head);
2054 2093
2055 /* 2094 /*
2056 * FIXME: This is going to break if dioread_nolock 2095 * FIXME: This is going to break if dioread_nolock
@@ -2199,12 +2238,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2199 2238
2200 /* Update on-disk size after IO is submitted */ 2239 /* Update on-disk size after IO is submitted */
2201 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2240 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2202 if (disksize > i_size_read(inode))
2203 disksize = i_size_read(inode);
2204 if (disksize > EXT4_I(inode)->i_disksize) { 2241 if (disksize > EXT4_I(inode)->i_disksize) {
2205 int err2; 2242 int err2;
2206 2243
2207 ext4_update_i_disksize(inode, disksize); 2244 ext4_wb_update_i_disksize(inode, disksize);
2208 err2 = ext4_mark_inode_dirty(handle, inode); 2245 err2 = ext4_mark_inode_dirty(handle, inode);
2209 if (err2) 2246 if (err2)
2210 ext4_error(inode->i_sb, 2247 ext4_error(inode->i_sb,
@@ -2219,7 +2256,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2219/* 2256/*
2220 * Calculate the total number of credits to reserve for one writepages 2257 * Calculate the total number of credits to reserve for one writepages
2221 * iteration. This is called from ext4_writepages(). We map an extent of 2258 * iteration. This is called from ext4_writepages(). We map an extent of
2222 * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping 2259 * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
2223 * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + 2260 * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
2224 * bpp - 1 blocks in bpp different extents. 2261 * bpp - 1 blocks in bpp different extents.
2225 */ 2262 */
@@ -2319,14 +2356,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2319 lblk = ((ext4_lblk_t)page->index) << 2356 lblk = ((ext4_lblk_t)page->index) <<
2320 (PAGE_CACHE_SHIFT - blkbits); 2357 (PAGE_CACHE_SHIFT - blkbits);
2321 head = page_buffers(page); 2358 head = page_buffers(page);
2322 if (!add_page_bufs_to_extent(mpd, head, head, lblk)) 2359 err = mpage_process_page_bufs(mpd, head, head, lblk);
2360 if (err <= 0)
2323 goto out; 2361 goto out;
2324 /* So far everything mapped? Submit the page for IO. */ 2362 err = 0;
2325 if (mpd->map.m_len == 0) {
2326 err = mpage_submit_page(mpd, page);
2327 if (err < 0)
2328 goto out;
2329 }
2330 2363
2331 /* 2364 /*
2332 * Accumulated enough dirty pages? This doesn't apply 2365 * Accumulated enough dirty pages? This doesn't apply
@@ -2410,7 +2443,7 @@ static int ext4_writepages(struct address_space *mapping,
2410 2443
2411 if (ext4_should_dioread_nolock(inode)) { 2444 if (ext4_should_dioread_nolock(inode)) {
2412 /* 2445 /*
2413 * We may need to convert upto one extent per block in 2446 * We may need to convert up to one extent per block in
2414 * the page and we may dirty the inode. 2447 * the page and we may dirty the inode.
2415 */ 2448 */
2416 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); 2449 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
@@ -2646,7 +2679,7 @@ retry_journal:
2646 goto retry_grab; 2679 goto retry_grab;
2647 } 2680 }
2648 /* In case writeback began while the page was unlocked */ 2681 /* In case writeback began while the page was unlocked */
2649 wait_on_page_writeback(page); 2682 wait_for_stable_page(page);
2650 2683
2651 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2684 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2652 if (ret < 0) { 2685 if (ret < 0) {
@@ -4566,7 +4599,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4566 ext4_journal_stop(handle); 4599 ext4_journal_stop(handle);
4567 } 4600 }
4568 4601
4569 if (attr->ia_valid & ATTR_SIZE) { 4602 if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
4603 handle_t *handle;
4604 loff_t oldsize = inode->i_size;
4570 4605
4571 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 4606 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4572 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4607 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4574,73 +4609,69 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4574 if (attr->ia_size > sbi->s_bitmap_maxbytes) 4609 if (attr->ia_size > sbi->s_bitmap_maxbytes)
4575 return -EFBIG; 4610 return -EFBIG;
4576 } 4611 }
4577 } 4612 if (S_ISREG(inode->i_mode) &&
4578 4613 (attr->ia_size < inode->i_size)) {
4579 if (S_ISREG(inode->i_mode) && 4614 if (ext4_should_order_data(inode)) {
4580 attr->ia_valid & ATTR_SIZE && 4615 error = ext4_begin_ordered_truncate(inode,
4581 (attr->ia_size < inode->i_size)) {
4582 handle_t *handle;
4583
4584 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4585 if (IS_ERR(handle)) {
4586 error = PTR_ERR(handle);
4587 goto err_out;
4588 }
4589 if (ext4_handle_valid(handle)) {
4590 error = ext4_orphan_add(handle, inode);
4591 orphan = 1;
4592 }
4593 EXT4_I(inode)->i_disksize = attr->ia_size;
4594 rc = ext4_mark_inode_dirty(handle, inode);
4595 if (!error)
4596 error = rc;
4597 ext4_journal_stop(handle);
4598
4599 if (ext4_should_order_data(inode)) {
4600 error = ext4_begin_ordered_truncate(inode,
4601 attr->ia_size); 4616 attr->ia_size);
4602 if (error) { 4617 if (error)
4603 /* Do as much error cleanup as possible */
4604 handle = ext4_journal_start(inode,
4605 EXT4_HT_INODE, 3);
4606 if (IS_ERR(handle)) {
4607 ext4_orphan_del(NULL, inode);
4608 goto err_out; 4618 goto err_out;
4609 } 4619 }
4610 ext4_orphan_del(handle, inode); 4620 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4611 orphan = 0; 4621 if (IS_ERR(handle)) {
4612 ext4_journal_stop(handle); 4622 error = PTR_ERR(handle);
4613 goto err_out; 4623 goto err_out;
4614 } 4624 }
4615 } 4625 if (ext4_handle_valid(handle)) {
4616 } 4626 error = ext4_orphan_add(handle, inode);
4617 4627 orphan = 1;
4618 if (attr->ia_valid & ATTR_SIZE) {
4619 if (attr->ia_size != inode->i_size) {
4620 loff_t oldsize = inode->i_size;
4621
4622 i_size_write(inode, attr->ia_size);
4623 /*
4624 * Blocks are going to be removed from the inode. Wait
4625 * for dio in flight. Temporarily disable
4626 * dioread_nolock to prevent livelock.
4627 */
4628 if (orphan) {
4629 if (!ext4_should_journal_data(inode)) {
4630 ext4_inode_block_unlocked_dio(inode);
4631 inode_dio_wait(inode);
4632 ext4_inode_resume_unlocked_dio(inode);
4633 } else
4634 ext4_wait_for_tail_page_commit(inode);
4635 } 4628 }
4629 down_write(&EXT4_I(inode)->i_data_sem);
4630 EXT4_I(inode)->i_disksize = attr->ia_size;
4631 rc = ext4_mark_inode_dirty(handle, inode);
4632 if (!error)
4633 error = rc;
4636 /* 4634 /*
4637 * Truncate pagecache after we've waited for commit 4635 * We have to update i_size under i_data_sem together
4638 * in data=journal mode to make pages freeable. 4636 * with i_disksize to avoid races with writeback code
4637 * running ext4_wb_update_i_disksize().
4639 */ 4638 */
4640 truncate_pagecache(inode, oldsize, inode->i_size); 4639 if (!error)
4640 i_size_write(inode, attr->ia_size);
4641 up_write(&EXT4_I(inode)->i_data_sem);
4642 ext4_journal_stop(handle);
4643 if (error) {
4644 ext4_orphan_del(NULL, inode);
4645 goto err_out;
4646 }
4647 } else
4648 i_size_write(inode, attr->ia_size);
4649
4650 /*
4651 * Blocks are going to be removed from the inode. Wait
4652 * for dio in flight. Temporarily disable
4653 * dioread_nolock to prevent livelock.
4654 */
4655 if (orphan) {
4656 if (!ext4_should_journal_data(inode)) {
4657 ext4_inode_block_unlocked_dio(inode);
4658 inode_dio_wait(inode);
4659 ext4_inode_resume_unlocked_dio(inode);
4660 } else
4661 ext4_wait_for_tail_page_commit(inode);
4641 } 4662 }
4642 ext4_truncate(inode); 4663 /*
4664 * Truncate pagecache after we've waited for commit
4665 * in data=journal mode to make pages freeable.
4666 */
4667 truncate_pagecache(inode, oldsize, inode->i_size);
4643 } 4668 }
4669 /*
4670 * We want to call ext4_truncate() even if attr->ia_size ==
4671 * inode->i_size for cases like truncation of fallocated space
4672 */
4673 if (attr->ia_valid & ATTR_SIZE)
4674 ext4_truncate(inode);
4644 4675
4645 if (!rc) { 4676 if (!rc) {
4646 setattr_copy(inode, attr); 4677 setattr_copy(inode, attr);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c0427e2f6648..a569d335f804 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -17,7 +17,6 @@
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h" 18#include "ext4_jbd2.h"
19#include "ext4.h" 19#include "ext4.h"
20#include "ext4_extents.h"
21 20
22#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) 21#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
23 22
@@ -624,6 +623,8 @@ resizefs_out:
624 623
625 return 0; 624 return 0;
626 } 625 }
626 case EXT4_IOC_PRECACHE_EXTENTS:
627 return ext4_ext_precache(inode);
627 628
628 default: 629 default:
629 return -ENOTTY; 630 return -ENOTTY;
@@ -688,6 +689,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
688 case EXT4_IOC_MOVE_EXT: 689 case EXT4_IOC_MOVE_EXT:
689 case FITRIM: 690 case FITRIM:
690 case EXT4_IOC_RESIZE_FS: 691 case EXT4_IOC_RESIZE_FS:
692 case EXT4_IOC_PRECACHE_EXTENTS:
691 break; 693 break;
692 default: 694 default:
693 return -ENOIOCTLCMD; 695 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4bbbf13bd743..a41e3ba8cfaa 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -751,13 +751,15 @@ void ext4_mb_generate_buddy(struct super_block *sb,
751 751
752 if (free != grp->bb_free) { 752 if (free != grp->bb_free) {
753 ext4_grp_locked_error(sb, group, 0, 0, 753 ext4_grp_locked_error(sb, group, 0, 0,
754 "%u clusters in bitmap, %u in gd", 754 "%u clusters in bitmap, %u in gd; "
755 "block bitmap corrupt.",
755 free, grp->bb_free); 756 free, grp->bb_free);
756 /* 757 /*
757 * If we intent to continue, we consider group descritor 758 * If we intend to continue, we consider group descriptor
758 * corrupt and update bb_free using bitmap value 759 * corrupt and update bb_free using bitmap value
759 */ 760 */
760 grp->bb_free = free; 761 grp->bb_free = free;
762 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
761 } 763 }
762 mb_set_largest_free_order(sb, grp); 764 mb_set_largest_free_order(sb, grp);
763 765
@@ -1398,6 +1400,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1398 1400
1399 BUG_ON(last >= (sb->s_blocksize << 3)); 1401 BUG_ON(last >= (sb->s_blocksize << 3));
1400 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); 1402 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1403 /* Don't bother if the block group is corrupt. */
1404 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1405 return;
1406
1401 mb_check_buddy(e4b); 1407 mb_check_buddy(e4b);
1402 mb_free_blocks_double(inode, e4b, first, count); 1408 mb_free_blocks_double(inode, e4b, first, count);
1403 1409
@@ -1423,7 +1429,11 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1423 inode ? inode->i_ino : 0, 1429 inode ? inode->i_ino : 0,
1424 blocknr, 1430 blocknr,
1425 "freeing already freed block " 1431 "freeing already freed block "
1426 "(bit %u)", block); 1432 "(bit %u); block bitmap corrupt.",
1433 block);
1434 /* Mark the block group as corrupt. */
1435 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1436 &e4b->bd_info->bb_state);
1427 mb_regenerate_buddy(e4b); 1437 mb_regenerate_buddy(e4b);
1428 goto done; 1438 goto done;
1429 } 1439 }
@@ -1790,6 +1800,11 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1790 if (err) 1800 if (err)
1791 return err; 1801 return err;
1792 1802
1803 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1804 ext4_mb_unload_buddy(e4b);
1805 return 0;
1806 }
1807
1793 ext4_lock_group(ac->ac_sb, group); 1808 ext4_lock_group(ac->ac_sb, group);
1794 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, 1809 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1795 ac->ac_g_ex.fe_len, &ex); 1810 ac->ac_g_ex.fe_len, &ex);
@@ -1987,6 +2002,9 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1987 if (cr <= 2 && free < ac->ac_g_ex.fe_len) 2002 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
1988 return 0; 2003 return 0;
1989 2004
2005 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2006 return 0;
2007
1990 /* We only do this if the grp has never been initialized */ 2008 /* We only do this if the grp has never been initialized */
1991 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { 2009 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1992 int ret = ext4_mb_init_group(ac->ac_sb, group); 2010 int ret = ext4_mb_init_group(ac->ac_sb, group);
@@ -4585,6 +4603,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4585 struct buffer_head *gd_bh; 4603 struct buffer_head *gd_bh;
4586 ext4_group_t block_group; 4604 ext4_group_t block_group;
4587 struct ext4_sb_info *sbi; 4605 struct ext4_sb_info *sbi;
4606 struct ext4_inode_info *ei = EXT4_I(inode);
4588 struct ext4_buddy e4b; 4607 struct ext4_buddy e4b;
4589 unsigned int count_clusters; 4608 unsigned int count_clusters;
4590 int err = 0; 4609 int err = 0;
@@ -4673,6 +4692,10 @@ do_more:
4673 overflow = 0; 4692 overflow = 0;
4674 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4693 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4675 4694
4695 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4696 ext4_get_group_info(sb, block_group))))
4697 return;
4698
4676 /* 4699 /*
4677 * Check to see if we are freeing blocks across a group 4700 * Check to see if we are freeing blocks across a group
4678 * boundary. 4701 * boundary.
@@ -4784,7 +4807,6 @@ do_more:
4784 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh); 4807 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4785 ext4_group_desc_csum_set(sb, block_group, gdp); 4808 ext4_group_desc_csum_set(sb, block_group, gdp);
4786 ext4_unlock_group(sb, block_group); 4809 ext4_unlock_group(sb, block_group);
4787 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4788 4810
4789 if (sbi->s_log_groups_per_flex) { 4811 if (sbi->s_log_groups_per_flex) {
4790 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4812 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
@@ -4792,10 +4814,23 @@ do_more:
4792 &sbi->s_flex_groups[flex_group].free_clusters); 4814 &sbi->s_flex_groups[flex_group].free_clusters);
4793 } 4815 }
4794 4816
4795 ext4_mb_unload_buddy(&e4b); 4817 if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) {
4796 4818 percpu_counter_add(&sbi->s_dirtyclusters_counter,
4797 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) 4819 count_clusters);
4820 spin_lock(&ei->i_block_reservation_lock);
4821 if (flags & EXT4_FREE_BLOCKS_METADATA)
4822 ei->i_reserved_meta_blocks += count_clusters;
4823 else
4824 ei->i_reserved_data_blocks += count_clusters;
4825 spin_unlock(&ei->i_block_reservation_lock);
4826 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4827 dquot_reclaim_block(inode,
4828 EXT4_C2B(sbi, count_clusters));
4829 } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4798 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); 4830 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4831 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4832
4833 ext4_mb_unload_buddy(&e4b);
4799 4834
4800 /* We dirtied the bitmap block */ 4835 /* We dirtied the bitmap block */
4801 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4836 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 49e8bdff9163..2ae73a80c19b 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -39,7 +39,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
39 newext.ee_block = cpu_to_le32(lb->first_block); 39 newext.ee_block = cpu_to_le32(lb->first_block);
40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
41 ext4_ext_store_pblock(&newext, lb->first_pblock); 41 ext4_ext_store_pblock(&newext, lb->first_pblock);
42 path = ext4_ext_find_extent(inode, lb->first_block, NULL); 42 path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
43 43
44 if (IS_ERR(path)) { 44 if (IS_ERR(path)) {
45 retval = PTR_ERR(path); 45 retval = PTR_ERR(path);
@@ -494,7 +494,7 @@ int ext4_ext_migrate(struct inode *inode)
494 * superblock modification. 494 * superblock modification.
495 * 495 *
496 * For the tmp_inode we already have committed the 496 * For the tmp_inode we already have committed the
497 * trascation that created the inode. Later as and 497 * transaction that created the inode. Later as and
498 * when we add extents we extent the journal 498 * when we add extents we extent the journal
499 */ 499 */
500 /* 500 /*
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index e86dddbd8296..7fa4d855dbd5 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -37,7 +37,7 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
37 int ret = 0; 37 int ret = 0;
38 struct ext4_ext_path *path; 38 struct ext4_ext_path *path;
39 39
40 path = ext4_ext_find_extent(inode, lblock, *orig_path); 40 path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
41 if (IS_ERR(path)) 41 if (IS_ERR(path))
42 ret = PTR_ERR(path); 42 ret = PTR_ERR(path);
43 else if (path[ext_depth(inode)].p_ext == NULL) 43 else if (path[ext_depth(inode)].p_ext == NULL)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 35f55a0dbc4b..1bec5a5c1e45 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3005,15 +3005,19 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
3005/* 3005/*
3006 * Anybody can rename anything with this: the permission checks are left to the 3006 * Anybody can rename anything with this: the permission checks are left to the
3007 * higher-level routines. 3007 * higher-level routines.
3008 *
3009 * n.b. old_{dentry,inode) refers to the source dentry/inode
3010 * while new_{dentry,inode) refers to the destination dentry/inode
3011 * This comes from rename(const char *oldpath, const char *newpath)
3008 */ 3012 */
3009static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, 3013static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3010 struct inode *new_dir, struct dentry *new_dentry) 3014 struct inode *new_dir, struct dentry *new_dentry)
3011{ 3015{
3012 handle_t *handle; 3016 handle_t *handle = NULL;
3013 struct inode *old_inode, *new_inode; 3017 struct inode *old_inode, *new_inode;
3014 struct buffer_head *old_bh, *new_bh, *dir_bh; 3018 struct buffer_head *old_bh, *new_bh, *dir_bh;
3015 struct ext4_dir_entry_2 *old_de, *new_de; 3019 struct ext4_dir_entry_2 *old_de, *new_de;
3016 int retval, force_da_alloc = 0; 3020 int retval;
3017 int inlined = 0, new_inlined = 0; 3021 int inlined = 0, new_inlined = 0;
3018 struct ext4_dir_entry_2 *parent_de; 3022 struct ext4_dir_entry_2 *parent_de;
3019 3023
@@ -3026,14 +3030,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3026 * in separate transaction */ 3030 * in separate transaction */
3027 if (new_dentry->d_inode) 3031 if (new_dentry->d_inode)
3028 dquot_initialize(new_dentry->d_inode); 3032 dquot_initialize(new_dentry->d_inode);
3029 handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
3030 (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
3031 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3032 if (IS_ERR(handle))
3033 return PTR_ERR(handle);
3034
3035 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
3036 ext4_handle_sync(handle);
3037 3033
3038 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL); 3034 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL);
3039 /* 3035 /*
@@ -3056,6 +3052,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3056 new_bh = NULL; 3052 new_bh = NULL;
3057 } 3053 }
3058 } 3054 }
3055 if (new_inode && !test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
3056 ext4_alloc_da_blocks(old_inode);
3057
3058 handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
3059 (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
3060 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3061 if (IS_ERR(handle))
3062 return PTR_ERR(handle);
3063
3064 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
3065 ext4_handle_sync(handle);
3066
3059 if (S_ISDIR(old_inode->i_mode)) { 3067 if (S_ISDIR(old_inode->i_mode)) {
3060 if (new_inode) { 3068 if (new_inode) {
3061 retval = -ENOTEMPTY; 3069 retval = -ENOTEMPTY;
@@ -3186,8 +3194,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3186 ext4_mark_inode_dirty(handle, new_inode); 3194 ext4_mark_inode_dirty(handle, new_inode);
3187 if (!new_inode->i_nlink) 3195 if (!new_inode->i_nlink)
3188 ext4_orphan_add(handle, new_inode); 3196 ext4_orphan_add(handle, new_inode);
3189 if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
3190 force_da_alloc = 1;
3191 } 3197 }
3192 retval = 0; 3198 retval = 0;
3193 3199
@@ -3195,9 +3201,8 @@ end_rename:
3195 brelse(dir_bh); 3201 brelse(dir_bh);
3196 brelse(old_bh); 3202 brelse(old_bh);
3197 brelse(new_bh); 3203 brelse(new_bh);
3198 ext4_journal_stop(handle); 3204 if (handle)
3199 if (retval == 0 && force_da_alloc) 3205 ext4_journal_stop(handle);
3200 ext4_alloc_da_blocks(old_inode);
3201 return retval; 3206 return retval;
3202} 3207}
3203 3208
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b59373b625e9..42337141e79f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1134,8 +1134,8 @@ enum {
1134 Opt_nouid32, Opt_debug, Opt_removed, 1134 Opt_nouid32, Opt_debug, Opt_removed,
1135 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1135 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1136 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, 1136 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1137 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1137 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1138 Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, 1138 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1139 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1139 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1140 Opt_data_err_abort, Opt_data_err_ignore, 1140 Opt_data_err_abort, Opt_data_err_ignore,
1141 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1141 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -1179,6 +1179,7 @@ static const match_table_t tokens = {
1179 {Opt_min_batch_time, "min_batch_time=%u"}, 1179 {Opt_min_batch_time, "min_batch_time=%u"},
1180 {Opt_max_batch_time, "max_batch_time=%u"}, 1180 {Opt_max_batch_time, "max_batch_time=%u"},
1181 {Opt_journal_dev, "journal_dev=%u"}, 1181 {Opt_journal_dev, "journal_dev=%u"},
1182 {Opt_journal_path, "journal_path=%s"},
1182 {Opt_journal_checksum, "journal_checksum"}, 1183 {Opt_journal_checksum, "journal_checksum"},
1183 {Opt_journal_async_commit, "journal_async_commit"}, 1184 {Opt_journal_async_commit, "journal_async_commit"},
1184 {Opt_abort, "abort"}, 1185 {Opt_abort, "abort"},
@@ -1338,6 +1339,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
1338#define MOPT_NO_EXT2 0x0100 1339#define MOPT_NO_EXT2 0x0100
1339#define MOPT_NO_EXT3 0x0200 1340#define MOPT_NO_EXT3 0x0200
1340#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) 1341#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1342#define MOPT_STRING 0x0400
1341 1343
1342static const struct mount_opts { 1344static const struct mount_opts {
1343 int token; 1345 int token;
@@ -1387,6 +1389,7 @@ static const struct mount_opts {
1387 {Opt_resuid, 0, MOPT_GTE0}, 1389 {Opt_resuid, 0, MOPT_GTE0},
1388 {Opt_resgid, 0, MOPT_GTE0}, 1390 {Opt_resgid, 0, MOPT_GTE0},
1389 {Opt_journal_dev, 0, MOPT_GTE0}, 1391 {Opt_journal_dev, 0, MOPT_GTE0},
1392 {Opt_journal_path, 0, MOPT_STRING},
1390 {Opt_journal_ioprio, 0, MOPT_GTE0}, 1393 {Opt_journal_ioprio, 0, MOPT_GTE0},
1391 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1394 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1392 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1395 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
@@ -1480,7 +1483,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1480 return -1; 1483 return -1;
1481 } 1484 }
1482 1485
1483 if (args->from && match_int(args, &arg)) 1486 if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1484 return -1; 1487 return -1;
1485 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) 1488 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1486 return -1; 1489 return -1;
@@ -1544,6 +1547,44 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1544 return -1; 1547 return -1;
1545 } 1548 }
1546 *journal_devnum = arg; 1549 *journal_devnum = arg;
1550 } else if (token == Opt_journal_path) {
1551 char *journal_path;
1552 struct inode *journal_inode;
1553 struct path path;
1554 int error;
1555
1556 if (is_remount) {
1557 ext4_msg(sb, KERN_ERR,
1558 "Cannot specify journal on remount");
1559 return -1;
1560 }
1561 journal_path = match_strdup(&args[0]);
1562 if (!journal_path) {
1563 ext4_msg(sb, KERN_ERR, "error: could not dup "
1564 "journal device string");
1565 return -1;
1566 }
1567
1568 error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1569 if (error) {
1570 ext4_msg(sb, KERN_ERR, "error: could not find "
1571 "journal device path: error %d", error);
1572 kfree(journal_path);
1573 return -1;
1574 }
1575
1576 journal_inode = path.dentry->d_inode;
1577 if (!S_ISBLK(journal_inode->i_mode)) {
1578 ext4_msg(sb, KERN_ERR, "error: journal path %s "
1579 "is not a block device", journal_path);
1580 path_put(&path);
1581 kfree(journal_path);
1582 return -1;
1583 }
1584
1585 *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1586 path_put(&path);
1587 kfree(journal_path);
1547 } else if (token == Opt_journal_ioprio) { 1588 } else if (token == Opt_journal_ioprio) {
1548 if (arg > 7) { 1589 if (arg > 7) {
1549 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" 1590 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 559bec1a37b4..cf2fc0594063 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -343,14 +343,14 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
343 struct page *page = bh->b_page; 343 struct page *page = bh->b_page;
344 __u8 *addr; 344 __u8 *addr;
345 __u32 csum32; 345 __u32 csum32;
346 __be32 seq;
346 347
347 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 348 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
348 return; 349 return;
349 350
350 sequence = cpu_to_be32(sequence); 351 seq = cpu_to_be32(sequence);
351 addr = kmap_atomic(page); 352 addr = kmap_atomic(page);
352 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, 353 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
353 sizeof(sequence));
354 csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), 354 csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data),
355 bh->b_size); 355 bh->b_size);
356 kunmap_atomic(addr); 356 kunmap_atomic(addr);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 02c7ad9d7a41..52032647dd4a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -130,9 +130,10 @@ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
131} 131}
132 132
133static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) 133static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
134{ 134{
135 __u32 csum, old_csum; 135 __u32 csum;
136 __be32 old_csum;
136 137
137 old_csum = sb->s_checksum; 138 old_csum = sb->s_checksum;
138 sb->s_checksum = 0; 139 sb->s_checksum = 0;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index d4851464b57e..3929c50428b1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -178,7 +178,8 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
178 void *buf) 178 void *buf)
179{ 179{
180 struct jbd2_journal_block_tail *tail; 180 struct jbd2_journal_block_tail *tail;
181 __u32 provided, calculated; 181 __be32 provided;
182 __u32 calculated;
182 183
183 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 184 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
184 return 1; 185 return 1;
@@ -190,8 +191,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
190 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 191 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
191 tail->t_checksum = provided; 192 tail->t_checksum = provided;
192 193
193 provided = be32_to_cpu(provided); 194 return provided == cpu_to_be32(calculated);
194 return provided == calculated;
195} 195}
196 196
197/* 197/*
@@ -381,7 +381,8 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
381static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 381static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
382{ 382{
383 struct commit_header *h; 383 struct commit_header *h;
384 __u32 provided, calculated; 384 __be32 provided;
385 __u32 calculated;
385 386
386 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 387 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
387 return 1; 388 return 1;
@@ -392,21 +393,20 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
392 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 393 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
393 h->h_chksum[0] = provided; 394 h->h_chksum[0] = provided;
394 395
395 provided = be32_to_cpu(provided); 396 return provided == cpu_to_be32(calculated);
396 return provided == calculated;
397} 397}
398 398
399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
400 void *buf, __u32 sequence) 400 void *buf, __u32 sequence)
401{ 401{
402 __u32 csum32; 402 __u32 csum32;
403 __be32 seq;
403 404
404 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 405 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
405 return 1; 406 return 1;
406 407
407 sequence = cpu_to_be32(sequence); 408 seq = cpu_to_be32(sequence);
408 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, 409 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
409 sizeof(sequence));
410 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 410 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
411 411
412 return tag->t_checksum == cpu_to_be16(csum32); 412 return tag->t_checksum == cpu_to_be16(csum32);
@@ -808,7 +808,8 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
808 void *buf) 808 void *buf)
809{ 809{
810 struct jbd2_journal_revoke_tail *tail; 810 struct jbd2_journal_revoke_tail *tail;
811 __u32 provided, calculated; 811 __be32 provided;
812 __u32 calculated;
812 813
813 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 814 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
814 return 1; 815 return 1;
@@ -820,8 +821,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
820 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 821 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
821 tail->r_checksum = provided; 822 tail->r_checksum = provided;
822 823
823 provided = be32_to_cpu(provided); 824 return provided == cpu_to_be32(calculated);
824 return provided == calculated;
825} 825}
826 826
827/* Scan a revoke record, marking all blocks mentioned as revoked. */ 827/* Scan a revoke record, marking all blocks mentioned as revoked. */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fbad622841f9..9a702e193538 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1094,6 +1094,14 @@ static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
1094 dquot->dq_dqb.dqb_rsvspace -= number; 1094 dquot->dq_dqb.dqb_rsvspace -= number;
1095} 1095}
1096 1096
1097static void dquot_reclaim_reserved_space(struct dquot *dquot, qsize_t number)
1098{
1099 if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
1100 number = dquot->dq_dqb.dqb_curspace;
1101 dquot->dq_dqb.dqb_rsvspace += number;
1102 dquot->dq_dqb.dqb_curspace -= number;
1103}
1104
1097static inline 1105static inline
1098void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) 1106void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
1099{ 1107{
@@ -1528,6 +1536,15 @@ void inode_claim_rsv_space(struct inode *inode, qsize_t number)
1528} 1536}
1529EXPORT_SYMBOL(inode_claim_rsv_space); 1537EXPORT_SYMBOL(inode_claim_rsv_space);
1530 1538
1539void inode_reclaim_rsv_space(struct inode *inode, qsize_t number)
1540{
1541 spin_lock(&inode->i_lock);
1542 *inode_reserved_space(inode) += number;
1543 __inode_sub_bytes(inode, number);
1544 spin_unlock(&inode->i_lock);
1545}
1546EXPORT_SYMBOL(inode_reclaim_rsv_space);
1547
1531void inode_sub_rsv_space(struct inode *inode, qsize_t number) 1548void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1532{ 1549{
1533 spin_lock(&inode->i_lock); 1550 spin_lock(&inode->i_lock);
@@ -1702,6 +1719,35 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1702EXPORT_SYMBOL(dquot_claim_space_nodirty); 1719EXPORT_SYMBOL(dquot_claim_space_nodirty);
1703 1720
1704/* 1721/*
1722 * Convert allocated space back to in-memory reserved quotas
1723 */
1724void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
1725{
1726 int cnt;
1727
1728 if (!dquot_active(inode)) {
1729 inode_reclaim_rsv_space(inode, number);
1730 return;
1731 }
1732
1733 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1734 spin_lock(&dq_data_lock);
1735 /* Claim reserved quotas to allocated quotas */
1736 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1737 if (inode->i_dquot[cnt])
1738 dquot_reclaim_reserved_space(inode->i_dquot[cnt],
1739 number);
1740 }
1741 /* Update inode bytes */
1742 inode_reclaim_rsv_space(inode, number);
1743 spin_unlock(&dq_data_lock);
1744 mark_all_dquot_dirty(inode->i_dquot);
1745 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1746 return;
1747}
1748EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
1749
1750/*
1705 * This operation can block, but only after everything is updated 1751 * This operation can block, but only after everything is updated
1706 */ 1752 */
1707void __dquot_free_space(struct inode *inode, qsize_t number, int flags) 1753void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
diff --git a/fs/stat.c b/fs/stat.c
index 04ce1ac20d20..d0ea7ef75e26 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -447,9 +447,8 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
447 447
448EXPORT_SYMBOL(inode_add_bytes); 448EXPORT_SYMBOL(inode_add_bytes);
449 449
450void inode_sub_bytes(struct inode *inode, loff_t bytes) 450void __inode_sub_bytes(struct inode *inode, loff_t bytes)
451{ 451{
452 spin_lock(&inode->i_lock);
453 inode->i_blocks -= bytes >> 9; 452 inode->i_blocks -= bytes >> 9;
454 bytes &= 511; 453 bytes &= 511;
455 if (inode->i_bytes < bytes) { 454 if (inode->i_bytes < bytes) {
@@ -457,6 +456,14 @@ void inode_sub_bytes(struct inode *inode, loff_t bytes)
457 inode->i_bytes += 512; 456 inode->i_bytes += 512;
458 } 457 }
459 inode->i_bytes -= bytes; 458 inode->i_bytes -= bytes;
459}
460
461EXPORT_SYMBOL(__inode_sub_bytes);
462
463void inode_sub_bytes(struct inode *inode, loff_t bytes)
464{
465 spin_lock(&inode->i_lock);
466 __inode_sub_bytes(inode, bytes);
460 spin_unlock(&inode->i_lock); 467 spin_unlock(&inode->i_lock);
461} 468}
462 469