diff options
-rw-r--r-- | Documentation/filesystems/ext4.txt | 7 | ||||
-rw-r--r-- | fs/ext3/dir.c | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 24 | ||||
-rw-r--r-- | fs/ext4/dir.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 58 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 6 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 2 | ||||
-rw-r--r-- | fs/ext4/extents.c | 296 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 125 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 51 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 90 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 1 | ||||
-rw-r--r-- | fs/ext4/inode.c | 293 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 4 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 49 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 4 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 2 | ||||
-rw-r--r-- | fs/ext4/namei.c | 35 | ||||
-rw-r--r-- | fs/ext4/super.c | 47 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 6 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 5 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 24 | ||||
-rw-r--r-- | fs/quota/dquot.c | 46 | ||||
-rw-r--r-- | fs/stat.c | 11 | ||||
-rw-r--r-- | include/linux/fs.h | 1 | ||||
-rw-r--r-- | include/linux/quotaops.h | 15 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 29 | ||||
-rw-r--r-- | include/uapi/linux/fiemap.h | 1 |
28 files changed, 857 insertions, 379 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index f7cbf574a875..b91cfaaf6a0f 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -144,11 +144,12 @@ journal_async_commit Commit block can be written to disk without waiting | |||
144 | mount the device. This will enable 'journal_checksum' | 144 | mount the device. This will enable 'journal_checksum' |
145 | internally. | 145 | internally. |
146 | 146 | ||
147 | journal_path=path | ||
147 | journal_dev=devnum When the external journal device's major/minor numbers | 148 | journal_dev=devnum When the external journal device's major/minor numbers |
148 | have changed, this option allows the user to specify | 149 | have changed, these options allow the user to specify |
149 | the new journal location. The journal device is | 150 | the new journal location. The journal device is |
150 | identified through its new major/minor numbers encoded | 151 | identified through either its new major/minor numbers |
151 | in devnum. | 152 | encoded in devnum, or via a path to the device. |
152 | 153 | ||
153 | norecovery Don't load the journal on mounting. Note that | 154 | norecovery Don't load the journal on mounting. Note that |
154 | noload if the filesystem was not unmounted cleanly, | 155 | noload if the filesystem was not unmounted cleanly, |
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index f522425aaa24..bafdd48eefde 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -41,7 +41,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
41 | 41 | ||
42 | /** | 42 | /** |
43 | * Check if the given dir-inode refers to an htree-indexed directory | 43 | * Check if the given dir-inode refers to an htree-indexed directory |
44 | * (or a directory which chould potentially get coverted to use htree | 44 | * (or a directory which could potentially get converted to use htree |
45 | * indexing). | 45 | * indexing). |
46 | * | 46 | * |
47 | * Return 1 if it is a dx dir, 0 if not | 47 | * Return 1 if it is a dx dir, 0 if not |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ddd715e42a5c..dc5d572ebd6a 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -184,6 +184,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
184 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 184 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
185 | ext4_fsblk_t start, tmp; | 185 | ext4_fsblk_t start, tmp; |
186 | int flex_bg = 0; | 186 | int flex_bg = 0; |
187 | struct ext4_group_info *grp; | ||
187 | 188 | ||
188 | J_ASSERT_BH(bh, buffer_locked(bh)); | 189 | J_ASSERT_BH(bh, buffer_locked(bh)); |
189 | 190 | ||
@@ -191,11 +192,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
191 | * essentially implementing a per-group read-only flag. */ | 192 | * essentially implementing a per-group read-only flag. */ |
192 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { | 193 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
193 | ext4_error(sb, "Checksum bad for group %u", block_group); | 194 | ext4_error(sb, "Checksum bad for group %u", block_group); |
194 | ext4_free_group_clusters_set(sb, gdp, 0); | 195 | grp = ext4_get_group_info(sb, block_group); |
195 | ext4_free_inodes_set(sb, gdp, 0); | 196 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); |
196 | ext4_itable_unused_set(sb, gdp, 0); | 197 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); |
197 | memset(bh->b_data, 0xff, sb->s_blocksize); | ||
198 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); | ||
199 | return; | 198 | return; |
200 | } | 199 | } |
201 | memset(bh->b_data, 0, sb->s_blocksize); | 200 | memset(bh->b_data, 0, sb->s_blocksize); |
@@ -305,7 +304,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, | |||
305 | */ | 304 | */ |
306 | static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, | 305 | static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, |
307 | struct ext4_group_desc *desc, | 306 | struct ext4_group_desc *desc, |
308 | unsigned int block_group, | 307 | ext4_group_t block_group, |
309 | struct buffer_head *bh) | 308 | struct buffer_head *bh) |
310 | { | 309 | { |
311 | ext4_grpblk_t offset; | 310 | ext4_grpblk_t offset; |
@@ -352,10 +351,11 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, | |||
352 | 351 | ||
353 | void ext4_validate_block_bitmap(struct super_block *sb, | 352 | void ext4_validate_block_bitmap(struct super_block *sb, |
354 | struct ext4_group_desc *desc, | 353 | struct ext4_group_desc *desc, |
355 | unsigned int block_group, | 354 | ext4_group_t block_group, |
356 | struct buffer_head *bh) | 355 | struct buffer_head *bh) |
357 | { | 356 | { |
358 | ext4_fsblk_t blk; | 357 | ext4_fsblk_t blk; |
358 | struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); | ||
359 | 359 | ||
360 | if (buffer_verified(bh)) | 360 | if (buffer_verified(bh)) |
361 | return; | 361 | return; |
@@ -366,12 +366,14 @@ void ext4_validate_block_bitmap(struct super_block *sb, | |||
366 | ext4_unlock_group(sb, block_group); | 366 | ext4_unlock_group(sb, block_group); |
367 | ext4_error(sb, "bg %u: block %llu: invalid block bitmap", | 367 | ext4_error(sb, "bg %u: block %llu: invalid block bitmap", |
368 | block_group, blk); | 368 | block_group, blk); |
369 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); | ||
369 | return; | 370 | return; |
370 | } | 371 | } |
371 | if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, | 372 | if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, |
372 | desc, bh))) { | 373 | desc, bh))) { |
373 | ext4_unlock_group(sb, block_group); | 374 | ext4_unlock_group(sb, block_group); |
374 | ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); | 375 | ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); |
376 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); | ||
375 | return; | 377 | return; |
376 | } | 378 | } |
377 | set_buffer_verified(bh); | 379 | set_buffer_verified(bh); |
@@ -445,7 +447,10 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) | |||
445 | return bh; | 447 | return bh; |
446 | verify: | 448 | verify: |
447 | ext4_validate_block_bitmap(sb, desc, block_group, bh); | 449 | ext4_validate_block_bitmap(sb, desc, block_group, bh); |
448 | return bh; | 450 | if (buffer_verified(bh)) |
451 | return bh; | ||
452 | put_bh(bh); | ||
453 | return NULL; | ||
449 | } | 454 | } |
450 | 455 | ||
451 | /* Returns 0 on success, 1 on error */ | 456 | /* Returns 0 on success, 1 on error */ |
@@ -469,7 +474,8 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, | |||
469 | clear_buffer_new(bh); | 474 | clear_buffer_new(bh); |
470 | /* Panic or remount fs read-only if block bitmap is invalid */ | 475 | /* Panic or remount fs read-only if block bitmap is invalid */ |
471 | ext4_validate_block_bitmap(sb, desc, block_group, bh); | 476 | ext4_validate_block_bitmap(sb, desc, block_group, bh); |
472 | return 0; | 477 | /* ...but check for error just in case errors=continue. */ |
478 | return !buffer_verified(bh); | ||
473 | } | 479 | } |
474 | 480 | ||
475 | struct buffer_head * | 481 | struct buffer_head * |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 3c7d288ae94c..680bb3388919 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -33,7 +33,7 @@ static int ext4_dx_readdir(struct file *, struct dir_context *); | |||
33 | 33 | ||
34 | /** | 34 | /** |
35 | * Check if the given dir-inode refers to an htree-indexed directory | 35 | * Check if the given dir-inode refers to an htree-indexed directory |
36 | * (or a directory which chould potentially get coverted to use htree | 36 | * (or a directory which could potentially get converted to use htree |
37 | * indexing). | 37 | * indexing). |
38 | * | 38 | * |
39 | * Return 1 if it is a dx dir, 0 if not | 39 | * Return 1 if it is a dx dir, 0 if not |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0ab26fbf3380..06b488dca666 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -561,6 +561,18 @@ enum { | |||
561 | #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 | 561 | #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 |
562 | 562 | ||
563 | /* | 563 | /* |
564 | * The bit position of these flags must not overlap with any of the | ||
565 | * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(), | ||
566 | * read_extent_tree_block(), ext4_split_extent_at(), | ||
567 | * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). | ||
568 | * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be | ||
569 | * caching the extents when reading from the extent tree while a | ||
570 | * truncate or punch hole operation is in progress. | ||
571 | */ | ||
572 | #define EXT4_EX_NOCACHE 0x0400 | ||
573 | #define EXT4_EX_FORCE_CACHE 0x0800 | ||
574 | |||
575 | /* | ||
564 | * Flags used by ext4_free_blocks | 576 | * Flags used by ext4_free_blocks |
565 | */ | 577 | */ |
566 | #define EXT4_FREE_BLOCKS_METADATA 0x0001 | 578 | #define EXT4_FREE_BLOCKS_METADATA 0x0001 |
@@ -569,6 +581,7 @@ enum { | |||
569 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 | 581 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 |
570 | #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 | 582 | #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 |
571 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | 583 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 |
584 | #define EXT4_FREE_BLOCKS_RESERVE 0x0040 | ||
572 | 585 | ||
573 | /* | 586 | /* |
574 | * ioctl commands | 587 | * ioctl commands |
@@ -590,6 +603,7 @@ enum { | |||
590 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) | 603 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) |
591 | #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) | 604 | #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) |
592 | #define EXT4_IOC_SWAP_BOOT _IO('f', 17) | 605 | #define EXT4_IOC_SWAP_BOOT _IO('f', 17) |
606 | #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) | ||
593 | 607 | ||
594 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 608 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
595 | /* | 609 | /* |
@@ -1375,6 +1389,7 @@ enum { | |||
1375 | nolocking */ | 1389 | nolocking */ |
1376 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ | 1390 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ |
1377 | EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ | 1391 | EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ |
1392 | EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ | ||
1378 | }; | 1393 | }; |
1379 | 1394 | ||
1380 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ | 1395 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ |
@@ -1915,7 +1930,7 @@ extern ext4_group_t ext4_get_group_number(struct super_block *sb, | |||
1915 | 1930 | ||
1916 | extern void ext4_validate_block_bitmap(struct super_block *sb, | 1931 | extern void ext4_validate_block_bitmap(struct super_block *sb, |
1917 | struct ext4_group_desc *desc, | 1932 | struct ext4_group_desc *desc, |
1918 | unsigned int block_group, | 1933 | ext4_group_t block_group, |
1919 | struct buffer_head *bh); | 1934 | struct buffer_head *bh); |
1920 | extern unsigned int ext4_block_group(struct super_block *sb, | 1935 | extern unsigned int ext4_block_group(struct super_block *sb, |
1921 | ext4_fsblk_t blocknr); | 1936 | ext4_fsblk_t blocknr); |
@@ -2417,16 +2432,32 @@ do { \ | |||
2417 | #define EXT4_FREECLUSTERS_WATERMARK 0 | 2432 | #define EXT4_FREECLUSTERS_WATERMARK 0 |
2418 | #endif | 2433 | #endif |
2419 | 2434 | ||
2435 | /* Update i_disksize. Requires i_mutex to avoid races with truncate */ | ||
2420 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | 2436 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) |
2421 | { | 2437 | { |
2422 | /* | 2438 | WARN_ON_ONCE(S_ISREG(inode->i_mode) && |
2423 | * XXX: replace with spinlock if seen contended -bzzz | 2439 | !mutex_is_locked(&inode->i_mutex)); |
2424 | */ | ||
2425 | down_write(&EXT4_I(inode)->i_data_sem); | 2440 | down_write(&EXT4_I(inode)->i_data_sem); |
2426 | if (newsize > EXT4_I(inode)->i_disksize) | 2441 | if (newsize > EXT4_I(inode)->i_disksize) |
2427 | EXT4_I(inode)->i_disksize = newsize; | 2442 | EXT4_I(inode)->i_disksize = newsize; |
2428 | up_write(&EXT4_I(inode)->i_data_sem); | 2443 | up_write(&EXT4_I(inode)->i_data_sem); |
2429 | return ; | 2444 | } |
2445 | |||
2446 | /* | ||
2447 | * Update i_disksize after writeback has been started. Races with truncate | ||
2448 | * are avoided by checking i_size under i_data_sem. | ||
2449 | */ | ||
2450 | static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize) | ||
2451 | { | ||
2452 | loff_t i_size; | ||
2453 | |||
2454 | down_write(&EXT4_I(inode)->i_data_sem); | ||
2455 | i_size = i_size_read(inode); | ||
2456 | if (newsize > i_size) | ||
2457 | newsize = i_size; | ||
2458 | if (newsize > EXT4_I(inode)->i_disksize) | ||
2459 | EXT4_I(inode)->i_disksize = newsize; | ||
2460 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2430 | } | 2461 | } |
2431 | 2462 | ||
2432 | struct ext4_group_info { | 2463 | struct ext4_group_info { |
@@ -2449,9 +2480,15 @@ struct ext4_group_info { | |||
2449 | 2480 | ||
2450 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 2481 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
2451 | #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 | 2482 | #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 |
2483 | #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2 | ||
2484 | #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 | ||
2452 | 2485 | ||
2453 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 2486 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
2454 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 2487 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
2488 | #define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \ | ||
2489 | (test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state))) | ||
2490 | #define EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \ | ||
2491 | (test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state))) | ||
2455 | 2492 | ||
2456 | #define EXT4_MB_GRP_WAS_TRIMMED(grp) \ | 2493 | #define EXT4_MB_GRP_WAS_TRIMMED(grp) \ |
2457 | (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | 2494 | (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) |
@@ -2655,6 +2692,12 @@ extern int ext4_check_blockref(const char *, unsigned int, | |||
2655 | struct ext4_ext_path; | 2692 | struct ext4_ext_path; |
2656 | struct ext4_extent; | 2693 | struct ext4_extent; |
2657 | 2694 | ||
2695 | /* | ||
2696 | * Maximum number of logical blocks in a file; ext4_extent's ee_block is | ||
2697 | * __le32. | ||
2698 | */ | ||
2699 | #define EXT_MAX_BLOCKS 0xffffffff | ||
2700 | |||
2658 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 2701 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
2659 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 2702 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
2660 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); | 2703 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); |
@@ -2684,7 +2727,8 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *, | |||
2684 | struct ext4_ext_path *, | 2727 | struct ext4_ext_path *, |
2685 | struct ext4_extent *, int); | 2728 | struct ext4_extent *, int); |
2686 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 2729 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
2687 | struct ext4_ext_path *); | 2730 | struct ext4_ext_path *, |
2731 | int flags); | ||
2688 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 2732 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
2689 | extern int ext4_ext_check_inode(struct inode *inode); | 2733 | extern int ext4_ext_check_inode(struct inode *inode); |
2690 | extern int ext4_find_delalloc_range(struct inode *inode, | 2734 | extern int ext4_find_delalloc_range(struct inode *inode, |
@@ -2693,7 +2737,7 @@ extern int ext4_find_delalloc_range(struct inode *inode, | |||
2693 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); | 2737 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); |
2694 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2738 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2695 | __u64 start, __u64 len); | 2739 | __u64 start, __u64 len); |
2696 | 2740 | extern int ext4_ext_precache(struct inode *inode); | |
2697 | 2741 | ||
2698 | /* move_extent.c */ | 2742 | /* move_extent.c */ |
2699 | extern void ext4_double_down_write_data_sem(struct inode *first, | 2743 | extern void ext4_double_down_write_data_sem(struct inode *first, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 51bc821ade90..5074fe23f19e 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -134,12 +134,6 @@ struct ext4_ext_path { | |||
134 | */ | 134 | */ |
135 | 135 | ||
136 | /* | 136 | /* |
137 | * Maximum number of logical blocks in a file; ext4_extent's ee_block is | ||
138 | * __le32. | ||
139 | */ | ||
140 | #define EXT_MAX_BLOCKS 0xffffffff | ||
141 | |||
142 | /* | ||
143 | * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an | 137 | * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an |
144 | * initialized extent. This is 2^15 and not (2^16 - 1), since we use the | 138 | * initialized extent. This is 2^15 and not (2^16 - 1), since we use the |
145 | * MSB of ee_len field in the extent datastructure to signify if this | 139 | * MSB of ee_len field in the extent datastructure to signify if this |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 2877258d9497..81cfefa9dc0c 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -197,7 +197,7 @@ static inline void ext4_journal_callback_add(handle_t *handle, | |||
197 | * ext4_journal_callback_del: delete a registered callback | 197 | * ext4_journal_callback_del: delete a registered callback |
198 | * @handle: active journal transaction handle on which callback was registered | 198 | * @handle: active journal transaction handle on which callback was registered |
199 | * @jce: registered journal callback entry to unregister | 199 | * @jce: registered journal callback entry to unregister |
200 | * Return true if object was sucessfully removed | 200 | * Return true if object was successfully removed |
201 | */ | 201 | */ |
202 | static inline bool ext4_journal_callback_try_del(handle_t *handle, | 202 | static inline bool ext4_journal_callback_try_del(handle_t *handle, |
203 | struct ext4_journal_cb_entry *jce) | 203 | struct ext4_journal_cb_entry *jce) |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 72ba4705d4fa..54d52afcdb19 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -407,7 +407,7 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
407 | 407 | ||
408 | static int __ext4_ext_check(const char *function, unsigned int line, | 408 | static int __ext4_ext_check(const char *function, unsigned int line, |
409 | struct inode *inode, struct ext4_extent_header *eh, | 409 | struct inode *inode, struct ext4_extent_header *eh, |
410 | int depth) | 410 | int depth, ext4_fsblk_t pblk) |
411 | { | 411 | { |
412 | const char *error_msg; | 412 | const char *error_msg; |
413 | int max = 0; | 413 | int max = 0; |
@@ -447,42 +447,149 @@ static int __ext4_ext_check(const char *function, unsigned int line, | |||
447 | 447 | ||
448 | corrupted: | 448 | corrupted: |
449 | ext4_error_inode(inode, function, line, 0, | 449 | ext4_error_inode(inode, function, line, 0, |
450 | "bad header/extent: %s - magic %x, " | 450 | "pblk %llu bad header/extent: %s - magic %x, " |
451 | "entries %u, max %u(%u), depth %u(%u)", | 451 | "entries %u, max %u(%u), depth %u(%u)", |
452 | error_msg, le16_to_cpu(eh->eh_magic), | 452 | (unsigned long long) pblk, error_msg, |
453 | le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), | 453 | le16_to_cpu(eh->eh_magic), |
454 | max, le16_to_cpu(eh->eh_depth), depth); | 454 | le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), |
455 | 455 | max, le16_to_cpu(eh->eh_depth), depth); | |
456 | return -EIO; | 456 | return -EIO; |
457 | } | 457 | } |
458 | 458 | ||
459 | #define ext4_ext_check(inode, eh, depth) \ | 459 | #define ext4_ext_check(inode, eh, depth, pblk) \ |
460 | __ext4_ext_check(__func__, __LINE__, inode, eh, depth) | 460 | __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) |
461 | 461 | ||
462 | int ext4_ext_check_inode(struct inode *inode) | 462 | int ext4_ext_check_inode(struct inode *inode) |
463 | { | 463 | { |
464 | return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); | 464 | return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); |
465 | } | 465 | } |
466 | 466 | ||
467 | static int __ext4_ext_check_block(const char *function, unsigned int line, | 467 | static struct buffer_head * |
468 | struct inode *inode, | 468 | __read_extent_tree_block(const char *function, unsigned int line, |
469 | struct ext4_extent_header *eh, | 469 | struct inode *inode, ext4_fsblk_t pblk, int depth, |
470 | int depth, | 470 | int flags) |
471 | struct buffer_head *bh) | ||
472 | { | 471 | { |
473 | int ret; | 472 | struct buffer_head *bh; |
473 | int err; | ||
474 | 474 | ||
475 | if (buffer_verified(bh)) | 475 | bh = sb_getblk(inode->i_sb, pblk); |
476 | return 0; | 476 | if (unlikely(!bh)) |
477 | ret = ext4_ext_check(inode, eh, depth); | 477 | return ERR_PTR(-ENOMEM); |
478 | if (ret) | 478 | |
479 | return ret; | 479 | if (!bh_uptodate_or_lock(bh)) { |
480 | trace_ext4_ext_load_extent(inode, pblk, _RET_IP_); | ||
481 | err = bh_submit_read(bh); | ||
482 | if (err < 0) | ||
483 | goto errout; | ||
484 | } | ||
485 | if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) | ||
486 | return bh; | ||
487 | err = __ext4_ext_check(function, line, inode, | ||
488 | ext_block_hdr(bh), depth, pblk); | ||
489 | if (err) | ||
490 | goto errout; | ||
480 | set_buffer_verified(bh); | 491 | set_buffer_verified(bh); |
481 | return ret; | 492 | /* |
493 | * If this is a leaf block, cache all of its entries | ||
494 | */ | ||
495 | if (!(flags & EXT4_EX_NOCACHE) && depth == 0) { | ||
496 | struct ext4_extent_header *eh = ext_block_hdr(bh); | ||
497 | struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); | ||
498 | ext4_lblk_t prev = 0; | ||
499 | int i; | ||
500 | |||
501 | for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { | ||
502 | unsigned int status = EXTENT_STATUS_WRITTEN; | ||
503 | ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); | ||
504 | int len = ext4_ext_get_actual_len(ex); | ||
505 | |||
506 | if (prev && (prev != lblk)) | ||
507 | ext4_es_cache_extent(inode, prev, | ||
508 | lblk - prev, ~0, | ||
509 | EXTENT_STATUS_HOLE); | ||
510 | |||
511 | if (ext4_ext_is_uninitialized(ex)) | ||
512 | status = EXTENT_STATUS_UNWRITTEN; | ||
513 | ext4_es_cache_extent(inode, lblk, len, | ||
514 | ext4_ext_pblock(ex), status); | ||
515 | prev = lblk + len; | ||
516 | } | ||
517 | } | ||
518 | return bh; | ||
519 | errout: | ||
520 | put_bh(bh); | ||
521 | return ERR_PTR(err); | ||
522 | |||
482 | } | 523 | } |
483 | 524 | ||
484 | #define ext4_ext_check_block(inode, eh, depth, bh) \ | 525 | #define read_extent_tree_block(inode, pblk, depth, flags) \ |
485 | __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh) | 526 | __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ |
527 | (depth), (flags)) | ||
528 | |||
529 | /* | ||
530 | * This function is called to cache a file's extent information in the | ||
531 | * extent status tree | ||
532 | */ | ||
533 | int ext4_ext_precache(struct inode *inode) | ||
534 | { | ||
535 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
536 | struct ext4_ext_path *path = NULL; | ||
537 | struct buffer_head *bh; | ||
538 | int i = 0, depth, ret = 0; | ||
539 | |||
540 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
541 | return 0; /* not an extent-mapped inode */ | ||
542 | |||
543 | down_read(&ei->i_data_sem); | ||
544 | depth = ext_depth(inode); | ||
545 | |||
546 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), | ||
547 | GFP_NOFS); | ||
548 | if (path == NULL) { | ||
549 | up_read(&ei->i_data_sem); | ||
550 | return -ENOMEM; | ||
551 | } | ||
552 | |||
553 | /* Don't cache anything if there are no external extent blocks */ | ||
554 | if (depth == 0) | ||
555 | goto out; | ||
556 | path[0].p_hdr = ext_inode_hdr(inode); | ||
557 | ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0); | ||
558 | if (ret) | ||
559 | goto out; | ||
560 | path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr); | ||
561 | while (i >= 0) { | ||
562 | /* | ||
563 | * If this is a leaf block or we've reached the end of | ||
564 | * the index block, go up | ||
565 | */ | ||
566 | if ((i == depth) || | ||
567 | path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) { | ||
568 | brelse(path[i].p_bh); | ||
569 | path[i].p_bh = NULL; | ||
570 | i--; | ||
571 | continue; | ||
572 | } | ||
573 | bh = read_extent_tree_block(inode, | ||
574 | ext4_idx_pblock(path[i].p_idx++), | ||
575 | depth - i - 1, | ||
576 | EXT4_EX_FORCE_CACHE); | ||
577 | if (IS_ERR(bh)) { | ||
578 | ret = PTR_ERR(bh); | ||
579 | break; | ||
580 | } | ||
581 | i++; | ||
582 | path[i].p_bh = bh; | ||
583 | path[i].p_hdr = ext_block_hdr(bh); | ||
584 | path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr); | ||
585 | } | ||
586 | ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED); | ||
587 | out: | ||
588 | up_read(&ei->i_data_sem); | ||
589 | ext4_ext_drop_refs(path); | ||
590 | kfree(path); | ||
591 | return ret; | ||
592 | } | ||
486 | 593 | ||
487 | #ifdef EXT_DEBUG | 594 | #ifdef EXT_DEBUG |
488 | static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | 595 | static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) |
@@ -716,7 +823,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) | |||
716 | 823 | ||
717 | struct ext4_ext_path * | 824 | struct ext4_ext_path * |
718 | ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | 825 | ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, |
719 | struct ext4_ext_path *path) | 826 | struct ext4_ext_path *path, int flags) |
720 | { | 827 | { |
721 | struct ext4_extent_header *eh; | 828 | struct ext4_extent_header *eh; |
722 | struct buffer_head *bh; | 829 | struct buffer_head *bh; |
@@ -748,20 +855,13 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
748 | path[ppos].p_depth = i; | 855 | path[ppos].p_depth = i; |
749 | path[ppos].p_ext = NULL; | 856 | path[ppos].p_ext = NULL; |
750 | 857 | ||
751 | bh = sb_getblk(inode->i_sb, path[ppos].p_block); | 858 | bh = read_extent_tree_block(inode, path[ppos].p_block, --i, |
752 | if (unlikely(!bh)) { | 859 | flags); |
753 | ret = -ENOMEM; | 860 | if (IS_ERR(bh)) { |
861 | ret = PTR_ERR(bh); | ||
754 | goto err; | 862 | goto err; |
755 | } | 863 | } |
756 | if (!bh_uptodate_or_lock(bh)) { | 864 | |
757 | trace_ext4_ext_load_extent(inode, block, | ||
758 | path[ppos].p_block); | ||
759 | ret = bh_submit_read(bh); | ||
760 | if (ret < 0) { | ||
761 | put_bh(bh); | ||
762 | goto err; | ||
763 | } | ||
764 | } | ||
765 | eh = ext_block_hdr(bh); | 865 | eh = ext_block_hdr(bh); |
766 | ppos++; | 866 | ppos++; |
767 | if (unlikely(ppos > depth)) { | 867 | if (unlikely(ppos > depth)) { |
@@ -773,11 +873,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
773 | } | 873 | } |
774 | path[ppos].p_bh = bh; | 874 | path[ppos].p_bh = bh; |
775 | path[ppos].p_hdr = eh; | 875 | path[ppos].p_hdr = eh; |
776 | i--; | ||
777 | |||
778 | ret = ext4_ext_check_block(inode, eh, i, bh); | ||
779 | if (ret < 0) | ||
780 | goto err; | ||
781 | } | 876 | } |
782 | 877 | ||
783 | path[ppos].p_depth = i; | 878 | path[ppos].p_depth = i; |
@@ -1198,7 +1293,8 @@ out: | |||
1198 | * if no free index is found, then it requests in-depth growing. | 1293 | * if no free index is found, then it requests in-depth growing. |
1199 | */ | 1294 | */ |
1200 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, | 1295 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, |
1201 | unsigned int flags, | 1296 | unsigned int mb_flags, |
1297 | unsigned int gb_flags, | ||
1202 | struct ext4_ext_path *path, | 1298 | struct ext4_ext_path *path, |
1203 | struct ext4_extent *newext) | 1299 | struct ext4_extent *newext) |
1204 | { | 1300 | { |
@@ -1220,7 +1316,7 @@ repeat: | |||
1220 | if (EXT_HAS_FREE_INDEX(curp)) { | 1316 | if (EXT_HAS_FREE_INDEX(curp)) { |
1221 | /* if we found index with free entry, then use that | 1317 | /* if we found index with free entry, then use that |
1222 | * entry: create all needed subtree and add new leaf */ | 1318 | * entry: create all needed subtree and add new leaf */ |
1223 | err = ext4_ext_split(handle, inode, flags, path, newext, i); | 1319 | err = ext4_ext_split(handle, inode, mb_flags, path, newext, i); |
1224 | if (err) | 1320 | if (err) |
1225 | goto out; | 1321 | goto out; |
1226 | 1322 | ||
@@ -1228,12 +1324,12 @@ repeat: | |||
1228 | ext4_ext_drop_refs(path); | 1324 | ext4_ext_drop_refs(path); |
1229 | path = ext4_ext_find_extent(inode, | 1325 | path = ext4_ext_find_extent(inode, |
1230 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), | 1326 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), |
1231 | path); | 1327 | path, gb_flags); |
1232 | if (IS_ERR(path)) | 1328 | if (IS_ERR(path)) |
1233 | err = PTR_ERR(path); | 1329 | err = PTR_ERR(path); |
1234 | } else { | 1330 | } else { |
1235 | /* tree is full, time to grow in depth */ | 1331 | /* tree is full, time to grow in depth */ |
1236 | err = ext4_ext_grow_indepth(handle, inode, flags, newext); | 1332 | err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext); |
1237 | if (err) | 1333 | if (err) |
1238 | goto out; | 1334 | goto out; |
1239 | 1335 | ||
@@ -1241,7 +1337,7 @@ repeat: | |||
1241 | ext4_ext_drop_refs(path); | 1337 | ext4_ext_drop_refs(path); |
1242 | path = ext4_ext_find_extent(inode, | 1338 | path = ext4_ext_find_extent(inode, |
1243 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), | 1339 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), |
1244 | path); | 1340 | path, gb_flags); |
1245 | if (IS_ERR(path)) { | 1341 | if (IS_ERR(path)) { |
1246 | err = PTR_ERR(path); | 1342 | err = PTR_ERR(path); |
1247 | goto out; | 1343 | goto out; |
@@ -1412,29 +1508,21 @@ got_index: | |||
1412 | ix++; | 1508 | ix++; |
1413 | block = ext4_idx_pblock(ix); | 1509 | block = ext4_idx_pblock(ix); |
1414 | while (++depth < path->p_depth) { | 1510 | while (++depth < path->p_depth) { |
1415 | bh = sb_bread(inode->i_sb, block); | ||
1416 | if (bh == NULL) | ||
1417 | return -EIO; | ||
1418 | eh = ext_block_hdr(bh); | ||
1419 | /* subtract from p_depth to get proper eh_depth */ | 1511 | /* subtract from p_depth to get proper eh_depth */ |
1420 | if (ext4_ext_check_block(inode, eh, | 1512 | bh = read_extent_tree_block(inode, block, |
1421 | path->p_depth - depth, bh)) { | 1513 | path->p_depth - depth, 0); |
1422 | put_bh(bh); | 1514 | if (IS_ERR(bh)) |
1423 | return -EIO; | 1515 | return PTR_ERR(bh); |
1424 | } | 1516 | eh = ext_block_hdr(bh); |
1425 | ix = EXT_FIRST_INDEX(eh); | 1517 | ix = EXT_FIRST_INDEX(eh); |
1426 | block = ext4_idx_pblock(ix); | 1518 | block = ext4_idx_pblock(ix); |
1427 | put_bh(bh); | 1519 | put_bh(bh); |
1428 | } | 1520 | } |
1429 | 1521 | ||
1430 | bh = sb_bread(inode->i_sb, block); | 1522 | bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); |
1431 | if (bh == NULL) | 1523 | if (IS_ERR(bh)) |
1432 | return -EIO; | 1524 | return PTR_ERR(bh); |
1433 | eh = ext_block_hdr(bh); | 1525 | eh = ext_block_hdr(bh); |
1434 | if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) { | ||
1435 | put_bh(bh); | ||
1436 | return -EIO; | ||
1437 | } | ||
1438 | ex = EXT_FIRST_EXTENT(eh); | 1526 | ex = EXT_FIRST_EXTENT(eh); |
1439 | found_extent: | 1527 | found_extent: |
1440 | *logical = le32_to_cpu(ex->ee_block); | 1528 | *logical = le32_to_cpu(ex->ee_block); |
@@ -1705,7 +1793,8 @@ static void ext4_ext_try_to_merge_up(handle_t *handle, | |||
1705 | 1793 | ||
1706 | brelse(path[1].p_bh); | 1794 | brelse(path[1].p_bh); |
1707 | ext4_free_blocks(handle, inode, NULL, blk, 1, | 1795 | ext4_free_blocks(handle, inode, NULL, blk, 1, |
1708 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 1796 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET | |
1797 | EXT4_FREE_BLOCKS_RESERVE); | ||
1709 | } | 1798 | } |
1710 | 1799 | ||
1711 | /* | 1800 | /* |
@@ -1793,7 +1882,7 @@ out: | |||
1793 | */ | 1882 | */ |
1794 | int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | 1883 | int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, |
1795 | struct ext4_ext_path *path, | 1884 | struct ext4_ext_path *path, |
1796 | struct ext4_extent *newext, int flag) | 1885 | struct ext4_extent *newext, int gb_flags) |
1797 | { | 1886 | { |
1798 | struct ext4_extent_header *eh; | 1887 | struct ext4_extent_header *eh; |
1799 | struct ext4_extent *ex, *fex; | 1888 | struct ext4_extent *ex, *fex; |
@@ -1802,7 +1891,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1802 | int depth, len, err; | 1891 | int depth, len, err; |
1803 | ext4_lblk_t next; | 1892 | ext4_lblk_t next; |
1804 | unsigned uninitialized = 0; | 1893 | unsigned uninitialized = 0; |
1805 | int flags = 0; | 1894 | int mb_flags = 0; |
1806 | 1895 | ||
1807 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { | 1896 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
1808 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | 1897 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); |
@@ -1817,7 +1906,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1817 | } | 1906 | } |
1818 | 1907 | ||
1819 | /* try to insert block into found extent and return */ | 1908 | /* try to insert block into found extent and return */ |
1820 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) { | 1909 | if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) { |
1821 | 1910 | ||
1822 | /* | 1911 | /* |
1823 | * Try to see whether we should rather test the extent on | 1912 | * Try to see whether we should rather test the extent on |
@@ -1920,7 +2009,7 @@ prepend: | |||
1920 | if (next != EXT_MAX_BLOCKS) { | 2009 | if (next != EXT_MAX_BLOCKS) { |
1921 | ext_debug("next leaf block - %u\n", next); | 2010 | ext_debug("next leaf block - %u\n", next); |
1922 | BUG_ON(npath != NULL); | 2011 | BUG_ON(npath != NULL); |
1923 | npath = ext4_ext_find_extent(inode, next, NULL); | 2012 | npath = ext4_ext_find_extent(inode, next, NULL, 0); |
1924 | if (IS_ERR(npath)) | 2013 | if (IS_ERR(npath)) |
1925 | return PTR_ERR(npath); | 2014 | return PTR_ERR(npath); |
1926 | BUG_ON(npath->p_depth != path->p_depth); | 2015 | BUG_ON(npath->p_depth != path->p_depth); |
@@ -1939,9 +2028,10 @@ prepend: | |||
1939 | * There is no free space in the found leaf. | 2028 | * There is no free space in the found leaf. |
1940 | * We're gonna add a new leaf in the tree. | 2029 | * We're gonna add a new leaf in the tree. |
1941 | */ | 2030 | */ |
1942 | if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL) | 2031 | if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) |
1943 | flags = EXT4_MB_USE_RESERVED; | 2032 | mb_flags = EXT4_MB_USE_RESERVED; |
1944 | err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); | 2033 | err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, |
2034 | path, newext); | ||
1945 | if (err) | 2035 | if (err) |
1946 | goto cleanup; | 2036 | goto cleanup; |
1947 | depth = ext_depth(inode); | 2037 | depth = ext_depth(inode); |
@@ -2007,7 +2097,7 @@ has_space: | |||
2007 | 2097 | ||
2008 | merge: | 2098 | merge: |
2009 | /* try to merge extents */ | 2099 | /* try to merge extents */ |
2010 | if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) | 2100 | if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) |
2011 | ext4_ext_try_to_merge(handle, inode, path, nearex); | 2101 | ext4_ext_try_to_merge(handle, inode, path, nearex); |
2012 | 2102 | ||
2013 | 2103 | ||
@@ -2050,7 +2140,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
2050 | path = NULL; | 2140 | path = NULL; |
2051 | } | 2141 | } |
2052 | 2142 | ||
2053 | path = ext4_ext_find_extent(inode, block, path); | 2143 | path = ext4_ext_find_extent(inode, block, path, 0); |
2054 | if (IS_ERR(path)) { | 2144 | if (IS_ERR(path)) { |
2055 | up_read(&EXT4_I(inode)->i_data_sem); | 2145 | up_read(&EXT4_I(inode)->i_data_sem); |
2056 | err = PTR_ERR(path); | 2146 | err = PTR_ERR(path); |
@@ -2195,8 +2285,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2195 | ext4_lblk_t block) | 2285 | ext4_lblk_t block) |
2196 | { | 2286 | { |
2197 | int depth = ext_depth(inode); | 2287 | int depth = ext_depth(inode); |
2198 | unsigned long len; | 2288 | unsigned long len = 0; |
2199 | ext4_lblk_t lblock; | 2289 | ext4_lblk_t lblock = 0; |
2200 | struct ext4_extent *ex; | 2290 | struct ext4_extent *ex; |
2201 | 2291 | ||
2202 | ex = path[depth].p_ext; | 2292 | ex = path[depth].p_ext; |
@@ -2233,7 +2323,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2233 | ext4_es_insert_extent(inode, lblock, len, ~0, | 2323 | ext4_es_insert_extent(inode, lblock, len, ~0, |
2234 | EXTENT_STATUS_HOLE); | 2324 | EXTENT_STATUS_HOLE); |
2235 | } else { | 2325 | } else { |
2236 | lblock = len = 0; | ||
2237 | BUG(); | 2326 | BUG(); |
2238 | } | 2327 | } |
2239 | 2328 | ||
@@ -2712,7 +2801,7 @@ again: | |||
2712 | ext4_lblk_t ee_block; | 2801 | ext4_lblk_t ee_block; |
2713 | 2802 | ||
2714 | /* find extent for this block */ | 2803 | /* find extent for this block */ |
2715 | path = ext4_ext_find_extent(inode, end, NULL); | 2804 | path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); |
2716 | if (IS_ERR(path)) { | 2805 | if (IS_ERR(path)) { |
2717 | ext4_journal_stop(handle); | 2806 | ext4_journal_stop(handle); |
2718 | return PTR_ERR(path); | 2807 | return PTR_ERR(path); |
@@ -2754,6 +2843,7 @@ again: | |||
2754 | */ | 2843 | */ |
2755 | err = ext4_split_extent_at(handle, inode, path, | 2844 | err = ext4_split_extent_at(handle, inode, path, |
2756 | end + 1, split_flag, | 2845 | end + 1, split_flag, |
2846 | EXT4_EX_NOCACHE | | ||
2757 | EXT4_GET_BLOCKS_PRE_IO | | 2847 | EXT4_GET_BLOCKS_PRE_IO | |
2758 | EXT4_GET_BLOCKS_METADATA_NOFAIL); | 2848 | EXT4_GET_BLOCKS_METADATA_NOFAIL); |
2759 | 2849 | ||
@@ -2782,7 +2872,7 @@ again: | |||
2782 | path[0].p_hdr = ext_inode_hdr(inode); | 2872 | path[0].p_hdr = ext_inode_hdr(inode); |
2783 | i = 0; | 2873 | i = 0; |
2784 | 2874 | ||
2785 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | 2875 | if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) { |
2786 | err = -EIO; | 2876 | err = -EIO; |
2787 | goto out; | 2877 | goto out; |
2788 | } | 2878 | } |
@@ -2829,10 +2919,12 @@ again: | |||
2829 | ext_debug("move to level %d (block %llu)\n", | 2919 | ext_debug("move to level %d (block %llu)\n", |
2830 | i + 1, ext4_idx_pblock(path[i].p_idx)); | 2920 | i + 1, ext4_idx_pblock(path[i].p_idx)); |
2831 | memset(path + i + 1, 0, sizeof(*path)); | 2921 | memset(path + i + 1, 0, sizeof(*path)); |
2832 | bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); | 2922 | bh = read_extent_tree_block(inode, |
2833 | if (!bh) { | 2923 | ext4_idx_pblock(path[i].p_idx), depth - i - 1, |
2924 | EXT4_EX_NOCACHE); | ||
2925 | if (IS_ERR(bh)) { | ||
2834 | /* should we reset i_size? */ | 2926 | /* should we reset i_size? */ |
2835 | err = -EIO; | 2927 | err = PTR_ERR(bh); |
2836 | break; | 2928 | break; |
2837 | } | 2929 | } |
2838 | /* Yield here to deal with large extent trees. | 2930 | /* Yield here to deal with large extent trees. |
@@ -2842,11 +2934,6 @@ again: | |||
2842 | err = -EIO; | 2934 | err = -EIO; |
2843 | break; | 2935 | break; |
2844 | } | 2936 | } |
2845 | if (ext4_ext_check_block(inode, ext_block_hdr(bh), | ||
2846 | depth - i - 1, bh)) { | ||
2847 | err = -EIO; | ||
2848 | break; | ||
2849 | } | ||
2850 | path[i + 1].p_bh = bh; | 2937 | path[i + 1].p_bh = bh; |
2851 | 2938 | ||
2852 | /* save actual number of indexes since this | 2939 | /* save actual number of indexes since this |
@@ -2961,6 +3048,23 @@ void ext4_ext_release(struct super_block *sb) | |||
2961 | #endif | 3048 | #endif |
2962 | } | 3049 | } |
2963 | 3050 | ||
3051 | static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) | ||
3052 | { | ||
3053 | ext4_lblk_t ee_block; | ||
3054 | ext4_fsblk_t ee_pblock; | ||
3055 | unsigned int ee_len; | ||
3056 | |||
3057 | ee_block = le32_to_cpu(ex->ee_block); | ||
3058 | ee_len = ext4_ext_get_actual_len(ex); | ||
3059 | ee_pblock = ext4_ext_pblock(ex); | ||
3060 | |||
3061 | if (ee_len == 0) | ||
3062 | return 0; | ||
3063 | |||
3064 | return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, | ||
3065 | EXTENT_STATUS_WRITTEN); | ||
3066 | } | ||
3067 | |||
2964 | /* FIXME!! we need to try to merge to left or right after zero-out */ | 3068 | /* FIXME!! we need to try to merge to left or right after zero-out */ |
2965 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | 3069 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) |
2966 | { | 3070 | { |
@@ -3113,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle, | |||
3113 | goto fix_extent_len; | 3217 | goto fix_extent_len; |
3114 | 3218 | ||
3115 | /* update extent status tree */ | 3219 | /* update extent status tree */ |
3116 | err = ext4_es_zeroout(inode, &zero_ex); | 3220 | err = ext4_zeroout_es(inode, &zero_ex); |
3117 | 3221 | ||
3118 | goto out; | 3222 | goto out; |
3119 | } else if (err) | 3223 | } else if (err) |
@@ -3133,7 +3237,7 @@ fix_extent_len: | |||
3133 | * ext4_split_extents() splits an extent and mark extent which is covered | 3237 | * ext4_split_extents() splits an extent and mark extent which is covered |
3134 | * by @map as split_flags indicates | 3238 | * by @map as split_flags indicates |
3135 | * | 3239 | * |
3136 | * It may result in splitting the extent into multiple extents (upto three) | 3240 | * It may result in splitting the extent into multiple extents (up to three) |
3137 | * There are three possibilities: | 3241 | * There are three possibilities: |
3138 | * a> There is no split required | 3242 | * a> There is no split required |
3139 | * b> Splits in two extents: Split is happening at either end of the extent | 3243 | * b> Splits in two extents: Split is happening at either end of the extent |
@@ -3181,7 +3285,7 @@ static int ext4_split_extent(handle_t *handle, | |||
3181 | * result in split of original leaf or extent zeroout. | 3285 | * result in split of original leaf or extent zeroout. |
3182 | */ | 3286 | */ |
3183 | ext4_ext_drop_refs(path); | 3287 | ext4_ext_drop_refs(path); |
3184 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | 3288 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); |
3185 | if (IS_ERR(path)) | 3289 | if (IS_ERR(path)) |
3186 | return PTR_ERR(path); | 3290 | return PTR_ERR(path); |
3187 | depth = ext_depth(inode); | 3291 | depth = ext_depth(inode); |
@@ -3464,7 +3568,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3464 | out: | 3568 | out: |
3465 | /* If we have gotten a failure, don't zero out status tree */ | 3569 | /* If we have gotten a failure, don't zero out status tree */ |
3466 | if (!err) | 3570 | if (!err) |
3467 | err = ext4_es_zeroout(inode, &zero_ex); | 3571 | err = ext4_zeroout_es(inode, &zero_ex); |
3468 | return err ? err : allocated; | 3572 | return err ? err : allocated; |
3469 | } | 3573 | } |
3470 | 3574 | ||
@@ -3565,7 +3669,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3565 | if (err < 0) | 3669 | if (err < 0) |
3566 | goto out; | 3670 | goto out; |
3567 | ext4_ext_drop_refs(path); | 3671 | ext4_ext_drop_refs(path); |
3568 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | 3672 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); |
3569 | if (IS_ERR(path)) { | 3673 | if (IS_ERR(path)) { |
3570 | err = PTR_ERR(path); | 3674 | err = PTR_ERR(path); |
3571 | goto out; | 3675 | goto out; |
@@ -4052,7 +4156,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4052 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 4156 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
4053 | 4157 | ||
4054 | /* find extent for this block */ | 4158 | /* find extent for this block */ |
4055 | path = ext4_ext_find_extent(inode, map->m_lblk, NULL); | 4159 | path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0); |
4056 | if (IS_ERR(path)) { | 4160 | if (IS_ERR(path)) { |
4057 | err = PTR_ERR(path); | 4161 | err = PTR_ERR(path); |
4058 | path = NULL; | 4162 | path = NULL; |
@@ -4744,6 +4848,12 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4744 | return error; | 4848 | return error; |
4745 | } | 4849 | } |
4746 | 4850 | ||
4851 | if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { | ||
4852 | error = ext4_ext_precache(inode); | ||
4853 | if (error) | ||
4854 | return error; | ||
4855 | } | ||
4856 | |||
4747 | /* fallback to generic here if not in extents fmt */ | 4857 | /* fallback to generic here if not in extents fmt */ |
4748 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4858 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
4749 | return generic_block_fiemap(inode, fieinfo, start, len, | 4859 | return generic_block_fiemap(inode, fieinfo, start, len, |
@@ -4771,6 +4881,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4771 | error = ext4_fill_fiemap_extents(inode, start_blk, | 4881 | error = ext4_fill_fiemap_extents(inode, start_blk, |
4772 | len_blks, fieinfo); | 4882 | len_blks, fieinfo); |
4773 | } | 4883 | } |
4774 | 4884 | ext4_es_lru_add(inode); | |
4775 | return error; | 4885 | return error; |
4776 | } | 4886 | } |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 91cb110da1b4..2d1bdbe78c04 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/list_sort.h> | 13 | #include <linux/list_sort.h> |
14 | #include "ext4.h" | 14 | #include "ext4.h" |
15 | #include "extents_status.h" | 15 | #include "extents_status.h" |
16 | #include "ext4_extents.h" | ||
17 | 16 | ||
18 | #include <trace/events/ext4.h> | 17 | #include <trace/events/ext4.h> |
19 | 18 | ||
@@ -263,7 +262,7 @@ void ext4_es_find_delayed_extent_range(struct inode *inode, | |||
263 | if (tree->cache_es) { | 262 | if (tree->cache_es) { |
264 | es1 = tree->cache_es; | 263 | es1 = tree->cache_es; |
265 | if (in_range(lblk, es1->es_lblk, es1->es_len)) { | 264 | if (in_range(lblk, es1->es_lblk, es1->es_len)) { |
266 | es_debug("%u cached by [%u/%u) %llu %llx\n", | 265 | es_debug("%u cached by [%u/%u) %llu %x\n", |
267 | lblk, es1->es_lblk, es1->es_len, | 266 | lblk, es1->es_lblk, es1->es_len, |
268 | ext4_es_pblock(es1), ext4_es_status(es1)); | 267 | ext4_es_pblock(es1), ext4_es_status(es1)); |
269 | goto out; | 268 | goto out; |
@@ -409,6 +408,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) | |||
409 | } | 408 | } |
410 | 409 | ||
411 | #ifdef ES_AGGRESSIVE_TEST | 410 | #ifdef ES_AGGRESSIVE_TEST |
411 | #include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */ | ||
412 | |||
412 | static void ext4_es_insert_extent_ext_check(struct inode *inode, | 413 | static void ext4_es_insert_extent_ext_check(struct inode *inode, |
413 | struct extent_status *es) | 414 | struct extent_status *es) |
414 | { | 415 | { |
@@ -419,7 +420,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
419 | unsigned short ee_len; | 420 | unsigned short ee_len; |
420 | int depth, ee_status, es_status; | 421 | int depth, ee_status, es_status; |
421 | 422 | ||
422 | path = ext4_ext_find_extent(inode, es->es_lblk, NULL); | 423 | path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); |
423 | if (IS_ERR(path)) | 424 | if (IS_ERR(path)) |
424 | return; | 425 | return; |
425 | 426 | ||
@@ -641,13 +642,13 @@ out: | |||
641 | */ | 642 | */ |
642 | int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | 643 | int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, |
643 | ext4_lblk_t len, ext4_fsblk_t pblk, | 644 | ext4_lblk_t len, ext4_fsblk_t pblk, |
644 | unsigned long long status) | 645 | unsigned int status) |
645 | { | 646 | { |
646 | struct extent_status newes; | 647 | struct extent_status newes; |
647 | ext4_lblk_t end = lblk + len - 1; | 648 | ext4_lblk_t end = lblk + len - 1; |
648 | int err = 0; | 649 | int err = 0; |
649 | 650 | ||
650 | es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n", | 651 | es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", |
651 | lblk, len, pblk, status, inode->i_ino); | 652 | lblk, len, pblk, status, inode->i_ino); |
652 | 653 | ||
653 | if (!len) | 654 | if (!len) |
@@ -684,6 +685,38 @@ error: | |||
684 | } | 685 | } |
685 | 686 | ||
686 | /* | 687 | /* |
688 | * ext4_es_cache_extent() inserts information into the extent status | ||
689 | * tree if and only if there isn't information about the range in | ||
690 | * question already. | ||
691 | */ | ||
692 | void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, | ||
693 | ext4_lblk_t len, ext4_fsblk_t pblk, | ||
694 | unsigned int status) | ||
695 | { | ||
696 | struct extent_status *es; | ||
697 | struct extent_status newes; | ||
698 | ext4_lblk_t end = lblk + len - 1; | ||
699 | |||
700 | newes.es_lblk = lblk; | ||
701 | newes.es_len = len; | ||
702 | ext4_es_store_pblock(&newes, pblk); | ||
703 | ext4_es_store_status(&newes, status); | ||
704 | trace_ext4_es_cache_extent(inode, &newes); | ||
705 | |||
706 | if (!len) | ||
707 | return; | ||
708 | |||
709 | BUG_ON(end < lblk); | ||
710 | |||
711 | write_lock(&EXT4_I(inode)->i_es_lock); | ||
712 | |||
713 | es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); | ||
714 | if (!es || es->es_lblk > end) | ||
715 | __es_insert_extent(inode, &newes); | ||
716 | write_unlock(&EXT4_I(inode)->i_es_lock); | ||
717 | } | ||
718 | |||
719 | /* | ||
687 | * ext4_es_lookup_extent() looks up an extent in extent status tree. | 720 | * ext4_es_lookup_extent() looks up an extent in extent status tree. |
688 | * | 721 | * |
689 | * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. | 722 | * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. |
@@ -871,23 +904,6 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
871 | return err; | 904 | return err; |
872 | } | 905 | } |
873 | 906 | ||
874 | int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) | ||
875 | { | ||
876 | ext4_lblk_t ee_block; | ||
877 | ext4_fsblk_t ee_pblock; | ||
878 | unsigned int ee_len; | ||
879 | |||
880 | ee_block = le32_to_cpu(ex->ee_block); | ||
881 | ee_len = ext4_ext_get_actual_len(ex); | ||
882 | ee_pblock = ext4_ext_pblock(ex); | ||
883 | |||
884 | if (ee_len == 0) | ||
885 | return 0; | ||
886 | |||
887 | return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, | ||
888 | EXTENT_STATUS_WRITTEN); | ||
889 | } | ||
890 | |||
891 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | 907 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, |
892 | struct list_head *b) | 908 | struct list_head *b) |
893 | { | 909 | { |
@@ -895,6 +911,12 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | |||
895 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | 911 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); |
896 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | 912 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); |
897 | 913 | ||
914 | if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
915 | !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
916 | return 1; | ||
917 | if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
918 | ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
919 | return -1; | ||
898 | if (eia->i_touch_when == eib->i_touch_when) | 920 | if (eia->i_touch_when == eib->i_touch_when) |
899 | return 0; | 921 | return 0; |
900 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | 922 | if (time_after(eia->i_touch_when, eib->i_touch_when)) |
@@ -908,21 +930,13 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | |||
908 | { | 930 | { |
909 | struct ext4_inode_info *ei; | 931 | struct ext4_inode_info *ei; |
910 | struct list_head *cur, *tmp; | 932 | struct list_head *cur, *tmp; |
911 | LIST_HEAD(skiped); | 933 | LIST_HEAD(skipped); |
912 | int ret, nr_shrunk = 0; | 934 | int ret, nr_shrunk = 0; |
935 | int retried = 0, skip_precached = 1, nr_skipped = 0; | ||
913 | 936 | ||
914 | spin_lock(&sbi->s_es_lru_lock); | 937 | spin_lock(&sbi->s_es_lru_lock); |
915 | 938 | ||
916 | /* | 939 | retry: |
917 | * If the inode that is at the head of LRU list is newer than | ||
918 | * last_sorted time, that means that we need to sort this list. | ||
919 | */ | ||
920 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); | ||
921 | if (sbi->s_es_last_sorted < ei->i_touch_when) { | ||
922 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
923 | sbi->s_es_last_sorted = jiffies; | ||
924 | } | ||
925 | |||
926 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 940 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
927 | /* | 941 | /* |
928 | * If we have already reclaimed all extents from extent | 942 | * If we have already reclaimed all extents from extent |
@@ -933,9 +947,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | |||
933 | 947 | ||
934 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 948 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); |
935 | 949 | ||
936 | /* Skip the inode that is newer than the last_sorted time */ | 950 | /* |
937 | if (sbi->s_es_last_sorted < ei->i_touch_when) { | 951 | * Skip the inode that is newer than the last_sorted |
938 | list_move_tail(cur, &skiped); | 952 | * time. Normally we try hard to avoid shrinking |
953 | * precached inodes, but we will as a last resort. | ||
954 | */ | ||
955 | if ((sbi->s_es_last_sorted < ei->i_touch_when) || | ||
956 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | ||
957 | EXT4_STATE_EXT_PRECACHED))) { | ||
958 | nr_skipped++; | ||
959 | list_move_tail(cur, &skipped); | ||
939 | continue; | 960 | continue; |
940 | } | 961 | } |
941 | 962 | ||
@@ -955,11 +976,33 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | |||
955 | } | 976 | } |
956 | 977 | ||
957 | /* Move the newer inodes into the tail of the LRU list. */ | 978 | /* Move the newer inodes into the tail of the LRU list. */ |
958 | list_splice_tail(&skiped, &sbi->s_es_lru); | 979 | list_splice_tail(&skipped, &sbi->s_es_lru); |
980 | INIT_LIST_HEAD(&skipped); | ||
981 | |||
982 | /* | ||
983 | * If we skipped any inodes, and we weren't able to make any | ||
984 | * forward progress, sort the list and try again. | ||
985 | */ | ||
986 | if ((nr_shrunk == 0) && nr_skipped && !retried) { | ||
987 | retried++; | ||
988 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
989 | sbi->s_es_last_sorted = jiffies; | ||
990 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | ||
991 | i_es_lru); | ||
992 | /* | ||
993 | * If there are no non-precached inodes left on the | ||
994 | * list, start releasing precached extents. | ||
995 | */ | ||
996 | if (ext4_test_inode_state(&ei->vfs_inode, | ||
997 | EXT4_STATE_EXT_PRECACHED)) | ||
998 | skip_precached = 0; | ||
999 | goto retry; | ||
1000 | } | ||
1001 | |||
959 | spin_unlock(&sbi->s_es_lru_lock); | 1002 | spin_unlock(&sbi->s_es_lru_lock); |
960 | 1003 | ||
961 | if (locked_ei && nr_shrunk == 0) | 1004 | if (locked_ei && nr_shrunk == 0) |
962 | nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | 1005 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); |
963 | 1006 | ||
964 | return nr_shrunk; | 1007 | return nr_shrunk; |
965 | } | 1008 | } |
@@ -1034,10 +1077,16 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | |||
1034 | struct rb_node *node; | 1077 | struct rb_node *node; |
1035 | struct extent_status *es; | 1078 | struct extent_status *es; |
1036 | int nr_shrunk = 0; | 1079 | int nr_shrunk = 0; |
1080 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | ||
1081 | DEFAULT_RATELIMIT_BURST); | ||
1037 | 1082 | ||
1038 | if (ei->i_es_lru_nr == 0) | 1083 | if (ei->i_es_lru_nr == 0) |
1039 | return 0; | 1084 | return 0; |
1040 | 1085 | ||
1086 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && | ||
1087 | __ratelimit(&_rs)) | ||
1088 | ext4_warning(inode->i_sb, "forced shrink of precached extents"); | ||
1089 | |||
1041 | node = rb_first(&tree->root); | 1090 | node = rb_first(&tree->root); |
1042 | while (node != NULL) { | 1091 | while (node != NULL) { |
1043 | es = rb_entry(node, struct extent_status, rb_node); | 1092 | es = rb_entry(node, struct extent_status, rb_node); |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index e936730cc5b0..167f4ab8ecc3 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -29,16 +29,26 @@ | |||
29 | /* | 29 | /* |
30 | * These flags live in the high bits of extent_status.es_pblk | 30 | * These flags live in the high bits of extent_status.es_pblk |
31 | */ | 31 | */ |
32 | #define EXTENT_STATUS_WRITTEN (1ULL << 63) | 32 | #define ES_SHIFT 60 |
33 | #define EXTENT_STATUS_UNWRITTEN (1ULL << 62) | 33 | |
34 | #define EXTENT_STATUS_DELAYED (1ULL << 61) | 34 | #define EXTENT_STATUS_WRITTEN (1 << 3) |
35 | #define EXTENT_STATUS_HOLE (1ULL << 60) | 35 | #define EXTENT_STATUS_UNWRITTEN (1 << 2) |
36 | #define EXTENT_STATUS_DELAYED (1 << 1) | ||
37 | #define EXTENT_STATUS_HOLE (1 << 0) | ||
36 | 38 | ||
37 | #define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ | 39 | #define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ |
38 | EXTENT_STATUS_UNWRITTEN | \ | 40 | EXTENT_STATUS_UNWRITTEN | \ |
39 | EXTENT_STATUS_DELAYED | \ | 41 | EXTENT_STATUS_DELAYED | \ |
40 | EXTENT_STATUS_HOLE) | 42 | EXTENT_STATUS_HOLE) |
41 | 43 | ||
44 | #define ES_WRITTEN (1ULL << 63) | ||
45 | #define ES_UNWRITTEN (1ULL << 62) | ||
46 | #define ES_DELAYED (1ULL << 61) | ||
47 | #define ES_HOLE (1ULL << 60) | ||
48 | |||
49 | #define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \ | ||
50 | ES_DELAYED | ES_HOLE) | ||
51 | |||
42 | struct ext4_sb_info; | 52 | struct ext4_sb_info; |
43 | struct ext4_extent; | 53 | struct ext4_extent; |
44 | 54 | ||
@@ -60,7 +70,10 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree); | |||
60 | 70 | ||
61 | extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | 71 | extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, |
62 | ext4_lblk_t len, ext4_fsblk_t pblk, | 72 | ext4_lblk_t len, ext4_fsblk_t pblk, |
63 | unsigned long long status); | 73 | unsigned int status); |
74 | extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, | ||
75 | ext4_lblk_t len, ext4_fsblk_t pblk, | ||
76 | unsigned int status); | ||
64 | extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 77 | extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
65 | ext4_lblk_t len); | 78 | ext4_lblk_t len); |
66 | extern void ext4_es_find_delayed_extent_range(struct inode *inode, | 79 | extern void ext4_es_find_delayed_extent_range(struct inode *inode, |
@@ -68,36 +81,35 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode, | |||
68 | struct extent_status *es); | 81 | struct extent_status *es); |
69 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | 82 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, |
70 | struct extent_status *es); | 83 | struct extent_status *es); |
71 | extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex); | ||
72 | 84 | ||
73 | static inline int ext4_es_is_written(struct extent_status *es) | 85 | static inline int ext4_es_is_written(struct extent_status *es) |
74 | { | 86 | { |
75 | return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0; | 87 | return (es->es_pblk & ES_WRITTEN) != 0; |
76 | } | 88 | } |
77 | 89 | ||
78 | static inline int ext4_es_is_unwritten(struct extent_status *es) | 90 | static inline int ext4_es_is_unwritten(struct extent_status *es) |
79 | { | 91 | { |
80 | return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0; | 92 | return (es->es_pblk & ES_UNWRITTEN) != 0; |
81 | } | 93 | } |
82 | 94 | ||
83 | static inline int ext4_es_is_delayed(struct extent_status *es) | 95 | static inline int ext4_es_is_delayed(struct extent_status *es) |
84 | { | 96 | { |
85 | return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0; | 97 | return (es->es_pblk & ES_DELAYED) != 0; |
86 | } | 98 | } |
87 | 99 | ||
88 | static inline int ext4_es_is_hole(struct extent_status *es) | 100 | static inline int ext4_es_is_hole(struct extent_status *es) |
89 | { | 101 | { |
90 | return (es->es_pblk & EXTENT_STATUS_HOLE) != 0; | 102 | return (es->es_pblk & ES_HOLE) != 0; |
91 | } | 103 | } |
92 | 104 | ||
93 | static inline ext4_fsblk_t ext4_es_status(struct extent_status *es) | 105 | static inline unsigned int ext4_es_status(struct extent_status *es) |
94 | { | 106 | { |
95 | return (es->es_pblk & EXTENT_STATUS_FLAGS); | 107 | return es->es_pblk >> ES_SHIFT; |
96 | } | 108 | } |
97 | 109 | ||
98 | static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) | 110 | static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) |
99 | { | 111 | { |
100 | return (es->es_pblk & ~EXTENT_STATUS_FLAGS); | 112 | return es->es_pblk & ~ES_MASK; |
101 | } | 113 | } |
102 | 114 | ||
103 | static inline void ext4_es_store_pblock(struct extent_status *es, | 115 | static inline void ext4_es_store_pblock(struct extent_status *es, |
@@ -105,19 +117,16 @@ static inline void ext4_es_store_pblock(struct extent_status *es, | |||
105 | { | 117 | { |
106 | ext4_fsblk_t block; | 118 | ext4_fsblk_t block; |
107 | 119 | ||
108 | block = (pb & ~EXTENT_STATUS_FLAGS) | | 120 | block = (pb & ~ES_MASK) | (es->es_pblk & ES_MASK); |
109 | (es->es_pblk & EXTENT_STATUS_FLAGS); | ||
110 | es->es_pblk = block; | 121 | es->es_pblk = block; |
111 | } | 122 | } |
112 | 123 | ||
113 | static inline void ext4_es_store_status(struct extent_status *es, | 124 | static inline void ext4_es_store_status(struct extent_status *es, |
114 | unsigned long long status) | 125 | unsigned int status) |
115 | { | 126 | { |
116 | ext4_fsblk_t block; | 127 | es->es_pblk = (((ext4_fsblk_t) |
117 | 128 | (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | | |
118 | block = (status & EXTENT_STATUS_FLAGS) | | 129 | (es->es_pblk & ~ES_MASK)); |
119 | (es->es_pblk & ~EXTENT_STATUS_FLAGS); | ||
120 | es->es_pblk = block; | ||
121 | } | 130 | } |
122 | 131 | ||
123 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 132 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8bf5999875ee..137193ff389b 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -70,18 +70,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
70 | ext4_group_t block_group, | 70 | ext4_group_t block_group, |
71 | struct ext4_group_desc *gdp) | 71 | struct ext4_group_desc *gdp) |
72 | { | 72 | { |
73 | struct ext4_group_info *grp; | ||
73 | J_ASSERT_BH(bh, buffer_locked(bh)); | 74 | J_ASSERT_BH(bh, buffer_locked(bh)); |
74 | 75 | ||
75 | /* If checksum is bad mark all blocks and inodes use to prevent | 76 | /* If checksum is bad mark all blocks and inodes use to prevent |
76 | * allocation, essentially implementing a per-group read-only flag. */ | 77 | * allocation, essentially implementing a per-group read-only flag. */ |
77 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { | 78 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
78 | ext4_error(sb, "Checksum bad for group %u", block_group); | 79 | ext4_error(sb, "Checksum bad for group %u", block_group); |
79 | ext4_free_group_clusters_set(sb, gdp, 0); | 80 | grp = ext4_get_group_info(sb, block_group); |
80 | ext4_free_inodes_set(sb, gdp, 0); | 81 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); |
81 | ext4_itable_unused_set(sb, gdp, 0); | 82 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); |
82 | memset(bh->b_data, 0xff, sb->s_blocksize); | ||
83 | ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, | ||
84 | EXT4_INODES_PER_GROUP(sb) / 8); | ||
85 | return 0; | 83 | return 0; |
86 | } | 84 | } |
87 | 85 | ||
@@ -117,6 +115,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
117 | struct ext4_group_desc *desc; | 115 | struct ext4_group_desc *desc; |
118 | struct buffer_head *bh = NULL; | 116 | struct buffer_head *bh = NULL; |
119 | ext4_fsblk_t bitmap_blk; | 117 | ext4_fsblk_t bitmap_blk; |
118 | struct ext4_group_info *grp; | ||
120 | 119 | ||
121 | desc = ext4_get_group_desc(sb, block_group, NULL); | 120 | desc = ext4_get_group_desc(sb, block_group, NULL); |
122 | if (!desc) | 121 | if (!desc) |
@@ -185,6 +184,8 @@ verify: | |||
185 | put_bh(bh); | 184 | put_bh(bh); |
186 | ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " | 185 | ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " |
187 | "inode_bitmap = %llu", block_group, bitmap_blk); | 186 | "inode_bitmap = %llu", block_group, bitmap_blk); |
187 | grp = ext4_get_group_info(sb, block_group); | ||
188 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); | ||
188 | return NULL; | 189 | return NULL; |
189 | } | 190 | } |
190 | ext4_unlock_group(sb, block_group); | 191 | ext4_unlock_group(sb, block_group); |
@@ -221,6 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
221 | struct ext4_super_block *es; | 222 | struct ext4_super_block *es; |
222 | struct ext4_sb_info *sbi; | 223 | struct ext4_sb_info *sbi; |
223 | int fatal = 0, err, count, cleared; | 224 | int fatal = 0, err, count, cleared; |
225 | struct ext4_group_info *grp; | ||
224 | 226 | ||
225 | if (!sb) { | 227 | if (!sb) { |
226 | printk(KERN_ERR "EXT4-fs: %s:%d: inode on " | 228 | printk(KERN_ERR "EXT4-fs: %s:%d: inode on " |
@@ -266,7 +268,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
266 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | 268 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); |
267 | bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); | 269 | bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); |
268 | bitmap_bh = ext4_read_inode_bitmap(sb, block_group); | 270 | bitmap_bh = ext4_read_inode_bitmap(sb, block_group); |
269 | if (!bitmap_bh) | 271 | /* Don't bother if the inode bitmap is corrupt. */ |
272 | grp = ext4_get_group_info(sb, block_group); | ||
273 | if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh) | ||
270 | goto error_return; | 274 | goto error_return; |
271 | 275 | ||
272 | BUFFER_TRACE(bitmap_bh, "get_write_access"); | 276 | BUFFER_TRACE(bitmap_bh, "get_write_access"); |
@@ -315,8 +319,10 @@ out: | |||
315 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 319 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
316 | if (!fatal) | 320 | if (!fatal) |
317 | fatal = err; | 321 | fatal = err; |
318 | } else | 322 | } else { |
319 | ext4_error(sb, "bit already cleared for inode %lu", ino); | 323 | ext4_error(sb, "bit already cleared for inode %lu", ino); |
324 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); | ||
325 | } | ||
320 | 326 | ||
321 | error_return: | 327 | error_return: |
322 | brelse(bitmap_bh); | 328 | brelse(bitmap_bh); |
@@ -625,6 +631,51 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
625 | } | 631 | } |
626 | 632 | ||
627 | /* | 633 | /* |
634 | * In no journal mode, if an inode has recently been deleted, we want | ||
635 | * to avoid reusing it until we're reasonably sure the inode table | ||
636 | * block has been written back to disk. (Yes, these values are | ||
637 | * somewhat arbitrary...) | ||
638 | */ | ||
639 | #define RECENTCY_MIN 5 | ||
640 | #define RECENTCY_DIRTY 30 | ||
641 | |||
642 | static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) | ||
643 | { | ||
644 | struct ext4_group_desc *gdp; | ||
645 | struct ext4_inode *raw_inode; | ||
646 | struct buffer_head *bh; | ||
647 | unsigned long dtime, now; | ||
648 | int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; | ||
649 | int offset, ret = 0, recentcy = RECENTCY_MIN; | ||
650 | |||
651 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
652 | if (unlikely(!gdp)) | ||
653 | return 0; | ||
654 | |||
655 | bh = sb_getblk(sb, ext4_inode_table(sb, gdp) + | ||
656 | (ino / inodes_per_block)); | ||
657 | if (unlikely(!bh) || !buffer_uptodate(bh)) | ||
658 | /* | ||
659 | * If the block is not in the buffer cache, then it | ||
660 | * must have been written out. | ||
661 | */ | ||
662 | goto out; | ||
663 | |||
664 | offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb); | ||
665 | raw_inode = (struct ext4_inode *) (bh->b_data + offset); | ||
666 | dtime = le32_to_cpu(raw_inode->i_dtime); | ||
667 | now = get_seconds(); | ||
668 | if (buffer_dirty(bh)) | ||
669 | recentcy += RECENTCY_DIRTY; | ||
670 | |||
671 | if (dtime && (dtime < now) && (now < dtime + recentcy)) | ||
672 | ret = 1; | ||
673 | out: | ||
674 | brelse(bh); | ||
675 | return ret; | ||
676 | } | ||
677 | |||
678 | /* | ||
628 | * There are two policies for allocating an inode. If the new inode is | 679 | * There are two policies for allocating an inode. If the new inode is |
629 | * a directory, then a forward search is made for a block group with both | 680 | * a directory, then a forward search is made for a block group with both |
630 | * free space and a low directory-to-inode ratio; if that fails, then of | 681 | * free space and a low directory-to-inode ratio; if that fails, then of |
@@ -652,6 +703,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, | |||
652 | struct inode *ret; | 703 | struct inode *ret; |
653 | ext4_group_t i; | 704 | ext4_group_t i; |
654 | ext4_group_t flex_group; | 705 | ext4_group_t flex_group; |
706 | struct ext4_group_info *grp; | ||
655 | 707 | ||
656 | /* Cannot create files in a deleted directory */ | 708 | /* Cannot create files in a deleted directory */ |
657 | if (!dir || !dir->i_nlink) | 709 | if (!dir || !dir->i_nlink) |
@@ -725,10 +777,22 @@ got_group: | |||
725 | continue; | 777 | continue; |
726 | } | 778 | } |
727 | 779 | ||
780 | grp = ext4_get_group_info(sb, group); | ||
781 | /* Skip groups with already-known suspicious inode tables */ | ||
782 | if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { | ||
783 | if (++group == ngroups) | ||
784 | group = 0; | ||
785 | continue; | ||
786 | } | ||
787 | |||
728 | brelse(inode_bitmap_bh); | 788 | brelse(inode_bitmap_bh); |
729 | inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); | 789 | inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); |
730 | if (!inode_bitmap_bh) | 790 | /* Skip groups with suspicious inode tables */ |
731 | goto out; | 791 | if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) { |
792 | if (++group == ngroups) | ||
793 | group = 0; | ||
794 | continue; | ||
795 | } | ||
732 | 796 | ||
733 | repeat_in_this_group: | 797 | repeat_in_this_group: |
734 | ino = ext4_find_next_zero_bit((unsigned long *) | 798 | ino = ext4_find_next_zero_bit((unsigned long *) |
@@ -741,6 +805,11 @@ repeat_in_this_group: | |||
741 | "inode=%lu", ino + 1); | 805 | "inode=%lu", ino + 1); |
742 | continue; | 806 | continue; |
743 | } | 807 | } |
808 | if ((EXT4_SB(sb)->s_journal == NULL) && | ||
809 | recently_deleted(sb, group, ino)) { | ||
810 | ino++; | ||
811 | goto next_inode; | ||
812 | } | ||
744 | if (!handle) { | 813 | if (!handle) { |
745 | BUG_ON(nblocks <= 0); | 814 | BUG_ON(nblocks <= 0); |
746 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, | 815 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, |
@@ -764,6 +833,7 @@ repeat_in_this_group: | |||
764 | ino++; /* the inode bitmap is zero-based */ | 833 | ino++; /* the inode bitmap is zero-based */ |
765 | if (!ret2) | 834 | if (!ret2) |
766 | goto got; /* we grabbed the inode! */ | 835 | goto got; /* we grabbed the inode! */ |
836 | next_inode: | ||
767 | if (ino < EXT4_INODES_PER_GROUP(sb)) | 837 | if (ino < EXT4_INODES_PER_GROUP(sb)) |
768 | goto repeat_in_this_group; | 838 | goto repeat_in_this_group; |
769 | next_group: | 839 | next_group: |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 87b30cd357e7..594009f5f523 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/aio.h> | 23 | #include <linux/aio.h> |
24 | #include "ext4_jbd2.h" | 24 | #include "ext4_jbd2.h" |
25 | #include "truncate.h" | 25 | #include "truncate.h" |
26 | #include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */ | ||
27 | 26 | ||
28 | #include <trace/events/ext4.h> | 27 | #include <trace/events/ext4.h> |
29 | 28 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2ca04e67a4f..9115f2807515 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -553,7 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
553 | } | 553 | } |
554 | if (retval > 0) { | 554 | if (retval > 0) { |
555 | int ret; | 555 | int ret; |
556 | unsigned long long status; | 556 | unsigned int status; |
557 | 557 | ||
558 | if (unlikely(retval != map->m_len)) { | 558 | if (unlikely(retval != map->m_len)) { |
559 | ext4_warning(inode->i_sb, | 559 | ext4_warning(inode->i_sb, |
@@ -653,7 +653,7 @@ found: | |||
653 | 653 | ||
654 | if (retval > 0) { | 654 | if (retval > 0) { |
655 | int ret; | 655 | int ret; |
656 | unsigned long long status; | 656 | unsigned int status; |
657 | 657 | ||
658 | if (unlikely(retval != map->m_len)) { | 658 | if (unlikely(retval != map->m_len)) { |
659 | ext4_warning(inode->i_sb, | 659 | ext4_warning(inode->i_sb, |
@@ -969,7 +969,8 @@ retry_journal: | |||
969 | ext4_journal_stop(handle); | 969 | ext4_journal_stop(handle); |
970 | goto retry_grab; | 970 | goto retry_grab; |
971 | } | 971 | } |
972 | wait_on_page_writeback(page); | 972 | /* In case writeback began while the page was unlocked */ |
973 | wait_for_stable_page(page); | ||
973 | 974 | ||
974 | if (ext4_should_dioread_nolock(inode)) | 975 | if (ext4_should_dioread_nolock(inode)) |
975 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); | 976 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
@@ -1633,7 +1634,7 @@ add_delayed: | |||
1633 | set_buffer_delay(bh); | 1634 | set_buffer_delay(bh); |
1634 | } else if (retval > 0) { | 1635 | } else if (retval > 0) { |
1635 | int ret; | 1636 | int ret; |
1636 | unsigned long long status; | 1637 | unsigned int status; |
1637 | 1638 | ||
1638 | if (unlikely(retval != map->m_len)) { | 1639 | if (unlikely(retval != map->m_len)) { |
1639 | ext4_warning(inode->i_sb, | 1640 | ext4_warning(inode->i_sb, |
@@ -1890,12 +1891,32 @@ static int ext4_writepage(struct page *page, | |||
1890 | return ret; | 1891 | return ret; |
1891 | } | 1892 | } |
1892 | 1893 | ||
1894 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) | ||
1895 | { | ||
1896 | int len; | ||
1897 | loff_t size = i_size_read(mpd->inode); | ||
1898 | int err; | ||
1899 | |||
1900 | BUG_ON(page->index != mpd->first_page); | ||
1901 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
1902 | len = size & ~PAGE_CACHE_MASK; | ||
1903 | else | ||
1904 | len = PAGE_CACHE_SIZE; | ||
1905 | clear_page_dirty_for_io(page); | ||
1906 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); | ||
1907 | if (!err) | ||
1908 | mpd->wbc->nr_to_write--; | ||
1909 | mpd->first_page++; | ||
1910 | |||
1911 | return err; | ||
1912 | } | ||
1913 | |||
1893 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) | 1914 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) |
1894 | 1915 | ||
1895 | /* | 1916 | /* |
1896 | * mballoc gives us at most this number of blocks... | 1917 | * mballoc gives us at most this number of blocks... |
1897 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). | 1918 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). |
1898 | * The rest of mballoc seems to handle chunks upto full group size. | 1919 | * The rest of mballoc seems to handle chunks up to full group size. |
1899 | */ | 1920 | */ |
1900 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 | 1921 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 |
1901 | 1922 | ||
@@ -1904,82 +1925,94 @@ static int ext4_writepage(struct page *page, | |||
1904 | * | 1925 | * |
1905 | * @mpd - extent of blocks | 1926 | * @mpd - extent of blocks |
1906 | * @lblk - logical number of the block in the file | 1927 | * @lblk - logical number of the block in the file |
1907 | * @b_state - b_state of the buffer head added | 1928 | * @bh - buffer head we want to add to the extent |
1908 | * | 1929 | * |
1909 | * the function is used to collect contig. blocks in same state | 1930 | * The function is used to collect contig. blocks in the same state. If the |
1931 | * buffer doesn't require mapping for writeback and we haven't started the | ||
1932 | * extent of buffers to map yet, the function returns 'true' immediately - the | ||
1933 | * caller can write the buffer right away. Otherwise the function returns true | ||
1934 | * if the block has been added to the extent, false if the block couldn't be | ||
1935 | * added. | ||
1910 | */ | 1936 | */ |
1911 | static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, | 1937 | static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, |
1912 | unsigned long b_state) | 1938 | struct buffer_head *bh) |
1913 | { | 1939 | { |
1914 | struct ext4_map_blocks *map = &mpd->map; | 1940 | struct ext4_map_blocks *map = &mpd->map; |
1915 | 1941 | ||
1916 | /* Don't go larger than mballoc is willing to allocate */ | 1942 | /* Buffer that doesn't need mapping for writeback? */ |
1917 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) | 1943 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || |
1918 | return 0; | 1944 | (!buffer_delay(bh) && !buffer_unwritten(bh))) { |
1945 | /* So far no extent to map => we write the buffer right away */ | ||
1946 | if (map->m_len == 0) | ||
1947 | return true; | ||
1948 | return false; | ||
1949 | } | ||
1919 | 1950 | ||
1920 | /* First block in the extent? */ | 1951 | /* First block in the extent? */ |
1921 | if (map->m_len == 0) { | 1952 | if (map->m_len == 0) { |
1922 | map->m_lblk = lblk; | 1953 | map->m_lblk = lblk; |
1923 | map->m_len = 1; | 1954 | map->m_len = 1; |
1924 | map->m_flags = b_state & BH_FLAGS; | 1955 | map->m_flags = bh->b_state & BH_FLAGS; |
1925 | return 1; | 1956 | return true; |
1926 | } | 1957 | } |
1927 | 1958 | ||
1959 | /* Don't go larger than mballoc is willing to allocate */ | ||
1960 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) | ||
1961 | return false; | ||
1962 | |||
1928 | /* Can we merge the block to our big extent? */ | 1963 | /* Can we merge the block to our big extent? */ |
1929 | if (lblk == map->m_lblk + map->m_len && | 1964 | if (lblk == map->m_lblk + map->m_len && |
1930 | (b_state & BH_FLAGS) == map->m_flags) { | 1965 | (bh->b_state & BH_FLAGS) == map->m_flags) { |
1931 | map->m_len++; | 1966 | map->m_len++; |
1932 | return 1; | 1967 | return true; |
1933 | } | 1968 | } |
1934 | return 0; | 1969 | return false; |
1935 | } | 1970 | } |
1936 | 1971 | ||
1937 | static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, | 1972 | /* |
1938 | struct buffer_head *head, | 1973 | * mpage_process_page_bufs - submit page buffers for IO or add them to extent |
1939 | struct buffer_head *bh, | 1974 | * |
1940 | ext4_lblk_t lblk) | 1975 | * @mpd - extent of blocks for mapping |
1976 | * @head - the first buffer in the page | ||
1977 | * @bh - buffer we should start processing from | ||
1978 | * @lblk - logical number of the block in the file corresponding to @bh | ||
1979 | * | ||
1980 | * Walk through page buffers from @bh upto @head (exclusive) and either submit | ||
1981 | * the page for IO if all buffers in this page were mapped and there's no | ||
1982 | * accumulated extent of buffers to map or add buffers in the page to the | ||
1983 | * extent of buffers to map. The function returns 1 if the caller can continue | ||
1984 | * by processing the next page, 0 if it should stop adding buffers to the | ||
1985 | * extent to map because we cannot extend it anymore. It can also return value | ||
1986 | * < 0 in case of error during IO submission. | ||
1987 | */ | ||
1988 | static int mpage_process_page_bufs(struct mpage_da_data *mpd, | ||
1989 | struct buffer_head *head, | ||
1990 | struct buffer_head *bh, | ||
1991 | ext4_lblk_t lblk) | ||
1941 | { | 1992 | { |
1942 | struct inode *inode = mpd->inode; | 1993 | struct inode *inode = mpd->inode; |
1994 | int err; | ||
1943 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | 1995 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) |
1944 | >> inode->i_blkbits; | 1996 | >> inode->i_blkbits; |
1945 | 1997 | ||
1946 | do { | 1998 | do { |
1947 | BUG_ON(buffer_locked(bh)); | 1999 | BUG_ON(buffer_locked(bh)); |
1948 | 2000 | ||
1949 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || | 2001 | if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) { |
1950 | (!buffer_delay(bh) && !buffer_unwritten(bh)) || | ||
1951 | lblk >= blocks) { | ||
1952 | /* Found extent to map? */ | 2002 | /* Found extent to map? */ |
1953 | if (mpd->map.m_len) | 2003 | if (mpd->map.m_len) |
1954 | return false; | 2004 | return 0; |
1955 | if (lblk >= blocks) | 2005 | /* Everything mapped so far and we hit EOF */ |
1956 | return true; | 2006 | break; |
1957 | continue; | ||
1958 | } | 2007 | } |
1959 | if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state)) | ||
1960 | return false; | ||
1961 | } while (lblk++, (bh = bh->b_this_page) != head); | 2008 | } while (lblk++, (bh = bh->b_this_page) != head); |
1962 | return true; | 2009 | /* So far everything mapped? Submit the page for IO. */ |
1963 | } | 2010 | if (mpd->map.m_len == 0) { |
1964 | 2011 | err = mpage_submit_page(mpd, head->b_page); | |
1965 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) | 2012 | if (err < 0) |
1966 | { | 2013 | return err; |
1967 | int len; | 2014 | } |
1968 | loff_t size = i_size_read(mpd->inode); | 2015 | return lblk < blocks; |
1969 | int err; | ||
1970 | |||
1971 | BUG_ON(page->index != mpd->first_page); | ||
1972 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
1973 | len = size & ~PAGE_CACHE_MASK; | ||
1974 | else | ||
1975 | len = PAGE_CACHE_SIZE; | ||
1976 | clear_page_dirty_for_io(page); | ||
1977 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); | ||
1978 | if (!err) | ||
1979 | mpd->wbc->nr_to_write--; | ||
1980 | mpd->first_page++; | ||
1981 | |||
1982 | return err; | ||
1983 | } | 2016 | } |
1984 | 2017 | ||
1985 | /* | 2018 | /* |
@@ -2003,8 +2036,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | |||
2003 | struct inode *inode = mpd->inode; | 2036 | struct inode *inode = mpd->inode; |
2004 | struct buffer_head *head, *bh; | 2037 | struct buffer_head *head, *bh; |
2005 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; | 2038 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; |
2006 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
2007 | >> inode->i_blkbits; | ||
2008 | pgoff_t start, end; | 2039 | pgoff_t start, end; |
2009 | ext4_lblk_t lblk; | 2040 | ext4_lblk_t lblk; |
2010 | sector_t pblock; | 2041 | sector_t pblock; |
@@ -2026,7 +2057,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | |||
2026 | 2057 | ||
2027 | if (page->index > end) | 2058 | if (page->index > end) |
2028 | break; | 2059 | break; |
2029 | /* Upto 'end' pages must be contiguous */ | 2060 | /* Up to 'end' pages must be contiguous */ |
2030 | BUG_ON(page->index != start); | 2061 | BUG_ON(page->index != start); |
2031 | bh = head = page_buffers(page); | 2062 | bh = head = page_buffers(page); |
2032 | do { | 2063 | do { |
@@ -2039,18 +2070,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | |||
2039 | */ | 2070 | */ |
2040 | mpd->map.m_len = 0; | 2071 | mpd->map.m_len = 0; |
2041 | mpd->map.m_flags = 0; | 2072 | mpd->map.m_flags = 0; |
2042 | add_page_bufs_to_extent(mpd, head, bh, | 2073 | /* |
2043 | lblk); | 2074 | * FIXME: If dioread_nolock supports |
2075 | * blocksize < pagesize, we need to make | ||
2076 | * sure we add size mapped so far to | ||
2077 | * io_end->size as the following call | ||
2078 | * can submit the page for IO. | ||
2079 | */ | ||
2080 | err = mpage_process_page_bufs(mpd, head, | ||
2081 | bh, lblk); | ||
2044 | pagevec_release(&pvec); | 2082 | pagevec_release(&pvec); |
2045 | return 0; | 2083 | if (err > 0) |
2084 | err = 0; | ||
2085 | return err; | ||
2046 | } | 2086 | } |
2047 | if (buffer_delay(bh)) { | 2087 | if (buffer_delay(bh)) { |
2048 | clear_buffer_delay(bh); | 2088 | clear_buffer_delay(bh); |
2049 | bh->b_blocknr = pblock++; | 2089 | bh->b_blocknr = pblock++; |
2050 | } | 2090 | } |
2051 | clear_buffer_unwritten(bh); | 2091 | clear_buffer_unwritten(bh); |
2052 | } while (++lblk < blocks && | 2092 | } while (lblk++, (bh = bh->b_this_page) != head); |
2053 | (bh = bh->b_this_page) != head); | ||
2054 | 2093 | ||
2055 | /* | 2094 | /* |
2056 | * FIXME: This is going to break if dioread_nolock | 2095 | * FIXME: This is going to break if dioread_nolock |
@@ -2199,12 +2238,10 @@ static int mpage_map_and_submit_extent(handle_t *handle, | |||
2199 | 2238 | ||
2200 | /* Update on-disk size after IO is submitted */ | 2239 | /* Update on-disk size after IO is submitted */ |
2201 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; | 2240 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; |
2202 | if (disksize > i_size_read(inode)) | ||
2203 | disksize = i_size_read(inode); | ||
2204 | if (disksize > EXT4_I(inode)->i_disksize) { | 2241 | if (disksize > EXT4_I(inode)->i_disksize) { |
2205 | int err2; | 2242 | int err2; |
2206 | 2243 | ||
2207 | ext4_update_i_disksize(inode, disksize); | 2244 | ext4_wb_update_i_disksize(inode, disksize); |
2208 | err2 = ext4_mark_inode_dirty(handle, inode); | 2245 | err2 = ext4_mark_inode_dirty(handle, inode); |
2209 | if (err2) | 2246 | if (err2) |
2210 | ext4_error(inode->i_sb, | 2247 | ext4_error(inode->i_sb, |
@@ -2219,7 +2256,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, | |||
2219 | /* | 2256 | /* |
2220 | * Calculate the total number of credits to reserve for one writepages | 2257 | * Calculate the total number of credits to reserve for one writepages |
2221 | * iteration. This is called from ext4_writepages(). We map an extent of | 2258 | * iteration. This is called from ext4_writepages(). We map an extent of |
2222 | * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping | 2259 | * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping |
2223 | * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + | 2260 | * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + |
2224 | * bpp - 1 blocks in bpp different extents. | 2261 | * bpp - 1 blocks in bpp different extents. |
2225 | */ | 2262 | */ |
@@ -2319,14 +2356,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) | |||
2319 | lblk = ((ext4_lblk_t)page->index) << | 2356 | lblk = ((ext4_lblk_t)page->index) << |
2320 | (PAGE_CACHE_SHIFT - blkbits); | 2357 | (PAGE_CACHE_SHIFT - blkbits); |
2321 | head = page_buffers(page); | 2358 | head = page_buffers(page); |
2322 | if (!add_page_bufs_to_extent(mpd, head, head, lblk)) | 2359 | err = mpage_process_page_bufs(mpd, head, head, lblk); |
2360 | if (err <= 0) | ||
2323 | goto out; | 2361 | goto out; |
2324 | /* So far everything mapped? Submit the page for IO. */ | 2362 | err = 0; |
2325 | if (mpd->map.m_len == 0) { | ||
2326 | err = mpage_submit_page(mpd, page); | ||
2327 | if (err < 0) | ||
2328 | goto out; | ||
2329 | } | ||
2330 | 2363 | ||
2331 | /* | 2364 | /* |
2332 | * Accumulated enough dirty pages? This doesn't apply | 2365 | * Accumulated enough dirty pages? This doesn't apply |
@@ -2410,7 +2443,7 @@ static int ext4_writepages(struct address_space *mapping, | |||
2410 | 2443 | ||
2411 | if (ext4_should_dioread_nolock(inode)) { | 2444 | if (ext4_should_dioread_nolock(inode)) { |
2412 | /* | 2445 | /* |
2413 | * We may need to convert upto one extent per block in | 2446 | * We may need to convert up to one extent per block in |
2414 | * the page and we may dirty the inode. | 2447 | * the page and we may dirty the inode. |
2415 | */ | 2448 | */ |
2416 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); | 2449 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); |
@@ -2646,7 +2679,7 @@ retry_journal: | |||
2646 | goto retry_grab; | 2679 | goto retry_grab; |
2647 | } | 2680 | } |
2648 | /* In case writeback began while the page was unlocked */ | 2681 | /* In case writeback began while the page was unlocked */ |
2649 | wait_on_page_writeback(page); | 2682 | wait_for_stable_page(page); |
2650 | 2683 | ||
2651 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); | 2684 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
2652 | if (ret < 0) { | 2685 | if (ret < 0) { |
@@ -4566,7 +4599,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4566 | ext4_journal_stop(handle); | 4599 | ext4_journal_stop(handle); |
4567 | } | 4600 | } |
4568 | 4601 | ||
4569 | if (attr->ia_valid & ATTR_SIZE) { | 4602 | if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { |
4603 | handle_t *handle; | ||
4604 | loff_t oldsize = inode->i_size; | ||
4570 | 4605 | ||
4571 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 4606 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
4572 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 4607 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -4574,73 +4609,69 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4574 | if (attr->ia_size > sbi->s_bitmap_maxbytes) | 4609 | if (attr->ia_size > sbi->s_bitmap_maxbytes) |
4575 | return -EFBIG; | 4610 | return -EFBIG; |
4576 | } | 4611 | } |
4577 | } | 4612 | if (S_ISREG(inode->i_mode) && |
4578 | 4613 | (attr->ia_size < inode->i_size)) { | |
4579 | if (S_ISREG(inode->i_mode) && | 4614 | if (ext4_should_order_data(inode)) { |
4580 | attr->ia_valid & ATTR_SIZE && | 4615 | error = ext4_begin_ordered_truncate(inode, |
4581 | (attr->ia_size < inode->i_size)) { | ||
4582 | handle_t *handle; | ||
4583 | |||
4584 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); | ||
4585 | if (IS_ERR(handle)) { | ||
4586 | error = PTR_ERR(handle); | ||
4587 | goto err_out; | ||
4588 | } | ||
4589 | if (ext4_handle_valid(handle)) { | ||
4590 | error = ext4_orphan_add(handle, inode); | ||
4591 | orphan = 1; | ||
4592 | } | ||
4593 | EXT4_I(inode)->i_disksize = attr->ia_size; | ||
4594 | rc = ext4_mark_inode_dirty(handle, inode); | ||
4595 | if (!error) | ||
4596 | error = rc; | ||
4597 | ext4_journal_stop(handle); | ||
4598 | |||
4599 | if (ext4_should_order_data(inode)) { | ||
4600 | error = ext4_begin_ordered_truncate(inode, | ||
4601 | attr->ia_size); | 4616 | attr->ia_size); |
4602 | if (error) { | 4617 | if (error) |
4603 | /* Do as much error cleanup as possible */ | ||
4604 | handle = ext4_journal_start(inode, | ||
4605 | EXT4_HT_INODE, 3); | ||
4606 | if (IS_ERR(handle)) { | ||
4607 | ext4_orphan_del(NULL, inode); | ||
4608 | goto err_out; | 4618 | goto err_out; |
4609 | } | 4619 | } |
4610 | ext4_orphan_del(handle, inode); | 4620 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); |
4611 | orphan = 0; | 4621 | if (IS_ERR(handle)) { |
4612 | ext4_journal_stop(handle); | 4622 | error = PTR_ERR(handle); |
4613 | goto err_out; | 4623 | goto err_out; |
4614 | } | 4624 | } |
4615 | } | 4625 | if (ext4_handle_valid(handle)) { |
4616 | } | 4626 | error = ext4_orphan_add(handle, inode); |
4617 | 4627 | orphan = 1; | |
4618 | if (attr->ia_valid & ATTR_SIZE) { | ||
4619 | if (attr->ia_size != inode->i_size) { | ||
4620 | loff_t oldsize = inode->i_size; | ||
4621 | |||
4622 | i_size_write(inode, attr->ia_size); | ||
4623 | /* | ||
4624 | * Blocks are going to be removed from the inode. Wait | ||
4625 | * for dio in flight. Temporarily disable | ||
4626 | * dioread_nolock to prevent livelock. | ||
4627 | */ | ||
4628 | if (orphan) { | ||
4629 | if (!ext4_should_journal_data(inode)) { | ||
4630 | ext4_inode_block_unlocked_dio(inode); | ||
4631 | inode_dio_wait(inode); | ||
4632 | ext4_inode_resume_unlocked_dio(inode); | ||
4633 | } else | ||
4634 | ext4_wait_for_tail_page_commit(inode); | ||
4635 | } | 4628 | } |
4629 | down_write(&EXT4_I(inode)->i_data_sem); | ||
4630 | EXT4_I(inode)->i_disksize = attr->ia_size; | ||
4631 | rc = ext4_mark_inode_dirty(handle, inode); | ||
4632 | if (!error) | ||
4633 | error = rc; | ||
4636 | /* | 4634 | /* |
4637 | * Truncate pagecache after we've waited for commit | 4635 | * We have to update i_size under i_data_sem together |
4638 | * in data=journal mode to make pages freeable. | 4636 | * with i_disksize to avoid races with writeback code |
4637 | * running ext4_wb_update_i_disksize(). | ||
4639 | */ | 4638 | */ |
4640 | truncate_pagecache(inode, oldsize, inode->i_size); | 4639 | if (!error) |
4640 | i_size_write(inode, attr->ia_size); | ||
4641 | up_write(&EXT4_I(inode)->i_data_sem); | ||
4642 | ext4_journal_stop(handle); | ||
4643 | if (error) { | ||
4644 | ext4_orphan_del(NULL, inode); | ||
4645 | goto err_out; | ||
4646 | } | ||
4647 | } else | ||
4648 | i_size_write(inode, attr->ia_size); | ||
4649 | |||
4650 | /* | ||
4651 | * Blocks are going to be removed from the inode. Wait | ||
4652 | * for dio in flight. Temporarily disable | ||
4653 | * dioread_nolock to prevent livelock. | ||
4654 | */ | ||
4655 | if (orphan) { | ||
4656 | if (!ext4_should_journal_data(inode)) { | ||
4657 | ext4_inode_block_unlocked_dio(inode); | ||
4658 | inode_dio_wait(inode); | ||
4659 | ext4_inode_resume_unlocked_dio(inode); | ||
4660 | } else | ||
4661 | ext4_wait_for_tail_page_commit(inode); | ||
4641 | } | 4662 | } |
4642 | ext4_truncate(inode); | 4663 | /* |
4664 | * Truncate pagecache after we've waited for commit | ||
4665 | * in data=journal mode to make pages freeable. | ||
4666 | */ | ||
4667 | truncate_pagecache(inode, oldsize, inode->i_size); | ||
4643 | } | 4668 | } |
4669 | /* | ||
4670 | * We want to call ext4_truncate() even if attr->ia_size == | ||
4671 | * inode->i_size for cases like truncation of fallocated space | ||
4672 | */ | ||
4673 | if (attr->ia_valid & ATTR_SIZE) | ||
4674 | ext4_truncate(inode); | ||
4644 | 4675 | ||
4645 | if (!rc) { | 4676 | if (!rc) { |
4646 | setattr_copy(inode, attr); | 4677 | setattr_copy(inode, attr); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c0427e2f6648..a569d335f804 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include "ext4_jbd2.h" | 18 | #include "ext4_jbd2.h" |
19 | #include "ext4.h" | 19 | #include "ext4.h" |
20 | #include "ext4_extents.h" | ||
21 | 20 | ||
22 | #define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) | 21 | #define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) |
23 | 22 | ||
@@ -624,6 +623,8 @@ resizefs_out: | |||
624 | 623 | ||
625 | return 0; | 624 | return 0; |
626 | } | 625 | } |
626 | case EXT4_IOC_PRECACHE_EXTENTS: | ||
627 | return ext4_ext_precache(inode); | ||
627 | 628 | ||
628 | default: | 629 | default: |
629 | return -ENOTTY; | 630 | return -ENOTTY; |
@@ -688,6 +689,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
688 | case EXT4_IOC_MOVE_EXT: | 689 | case EXT4_IOC_MOVE_EXT: |
689 | case FITRIM: | 690 | case FITRIM: |
690 | case EXT4_IOC_RESIZE_FS: | 691 | case EXT4_IOC_RESIZE_FS: |
692 | case EXT4_IOC_PRECACHE_EXTENTS: | ||
691 | break; | 693 | break; |
692 | default: | 694 | default: |
693 | return -ENOIOCTLCMD; | 695 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4bbbf13bd743..a41e3ba8cfaa 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -751,13 +751,15 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
751 | 751 | ||
752 | if (free != grp->bb_free) { | 752 | if (free != grp->bb_free) { |
753 | ext4_grp_locked_error(sb, group, 0, 0, | 753 | ext4_grp_locked_error(sb, group, 0, 0, |
754 | "%u clusters in bitmap, %u in gd", | 754 | "%u clusters in bitmap, %u in gd; " |
755 | "block bitmap corrupt.", | ||
755 | free, grp->bb_free); | 756 | free, grp->bb_free); |
756 | /* | 757 | /* |
757 | * If we intent to continue, we consider group descritor | 758 | * If we intend to continue, we consider group descriptor |
758 | * corrupt and update bb_free using bitmap value | 759 | * corrupt and update bb_free using bitmap value |
759 | */ | 760 | */ |
760 | grp->bb_free = free; | 761 | grp->bb_free = free; |
762 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); | ||
761 | } | 763 | } |
762 | mb_set_largest_free_order(sb, grp); | 764 | mb_set_largest_free_order(sb, grp); |
763 | 765 | ||
@@ -1398,6 +1400,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1398 | 1400 | ||
1399 | BUG_ON(last >= (sb->s_blocksize << 3)); | 1401 | BUG_ON(last >= (sb->s_blocksize << 3)); |
1400 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); | 1402 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); |
1403 | /* Don't bother if the block group is corrupt. */ | ||
1404 | if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) | ||
1405 | return; | ||
1406 | |||
1401 | mb_check_buddy(e4b); | 1407 | mb_check_buddy(e4b); |
1402 | mb_free_blocks_double(inode, e4b, first, count); | 1408 | mb_free_blocks_double(inode, e4b, first, count); |
1403 | 1409 | ||
@@ -1423,7 +1429,11 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1423 | inode ? inode->i_ino : 0, | 1429 | inode ? inode->i_ino : 0, |
1424 | blocknr, | 1430 | blocknr, |
1425 | "freeing already freed block " | 1431 | "freeing already freed block " |
1426 | "(bit %u)", block); | 1432 | "(bit %u); block bitmap corrupt.", |
1433 | block); | ||
1434 | /* Mark the block group as corrupt. */ | ||
1435 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, | ||
1436 | &e4b->bd_info->bb_state); | ||
1427 | mb_regenerate_buddy(e4b); | 1437 | mb_regenerate_buddy(e4b); |
1428 | goto done; | 1438 | goto done; |
1429 | } | 1439 | } |
@@ -1790,6 +1800,11 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1790 | if (err) | 1800 | if (err) |
1791 | return err; | 1801 | return err; |
1792 | 1802 | ||
1803 | if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { | ||
1804 | ext4_mb_unload_buddy(e4b); | ||
1805 | return 0; | ||
1806 | } | ||
1807 | |||
1793 | ext4_lock_group(ac->ac_sb, group); | 1808 | ext4_lock_group(ac->ac_sb, group); |
1794 | max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, | 1809 | max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, |
1795 | ac->ac_g_ex.fe_len, &ex); | 1810 | ac->ac_g_ex.fe_len, &ex); |
@@ -1987,6 +2002,9 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1987 | if (cr <= 2 && free < ac->ac_g_ex.fe_len) | 2002 | if (cr <= 2 && free < ac->ac_g_ex.fe_len) |
1988 | return 0; | 2003 | return 0; |
1989 | 2004 | ||
2005 | if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) | ||
2006 | return 0; | ||
2007 | |||
1990 | /* We only do this if the grp has never been initialized */ | 2008 | /* We only do this if the grp has never been initialized */ |
1991 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | 2009 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
1992 | int ret = ext4_mb_init_group(ac->ac_sb, group); | 2010 | int ret = ext4_mb_init_group(ac->ac_sb, group); |
@@ -4585,6 +4603,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4585 | struct buffer_head *gd_bh; | 4603 | struct buffer_head *gd_bh; |
4586 | ext4_group_t block_group; | 4604 | ext4_group_t block_group; |
4587 | struct ext4_sb_info *sbi; | 4605 | struct ext4_sb_info *sbi; |
4606 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
4588 | struct ext4_buddy e4b; | 4607 | struct ext4_buddy e4b; |
4589 | unsigned int count_clusters; | 4608 | unsigned int count_clusters; |
4590 | int err = 0; | 4609 | int err = 0; |
@@ -4673,6 +4692,10 @@ do_more: | |||
4673 | overflow = 0; | 4692 | overflow = 0; |
4674 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4693 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
4675 | 4694 | ||
4695 | if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT( | ||
4696 | ext4_get_group_info(sb, block_group)))) | ||
4697 | return; | ||
4698 | |||
4676 | /* | 4699 | /* |
4677 | * Check to see if we are freeing blocks across a group | 4700 | * Check to see if we are freeing blocks across a group |
4678 | * boundary. | 4701 | * boundary. |
@@ -4784,7 +4807,6 @@ do_more: | |||
4784 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh); | 4807 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh); |
4785 | ext4_group_desc_csum_set(sb, block_group, gdp); | 4808 | ext4_group_desc_csum_set(sb, block_group, gdp); |
4786 | ext4_unlock_group(sb, block_group); | 4809 | ext4_unlock_group(sb, block_group); |
4787 | percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); | ||
4788 | 4810 | ||
4789 | if (sbi->s_log_groups_per_flex) { | 4811 | if (sbi->s_log_groups_per_flex) { |
4790 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | 4812 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
@@ -4792,10 +4814,23 @@ do_more: | |||
4792 | &sbi->s_flex_groups[flex_group].free_clusters); | 4814 | &sbi->s_flex_groups[flex_group].free_clusters); |
4793 | } | 4815 | } |
4794 | 4816 | ||
4795 | ext4_mb_unload_buddy(&e4b); | 4817 | if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) { |
4796 | 4818 | percpu_counter_add(&sbi->s_dirtyclusters_counter, | |
4797 | if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | 4819 | count_clusters); |
4820 | spin_lock(&ei->i_block_reservation_lock); | ||
4821 | if (flags & EXT4_FREE_BLOCKS_METADATA) | ||
4822 | ei->i_reserved_meta_blocks += count_clusters; | ||
4823 | else | ||
4824 | ei->i_reserved_data_blocks += count_clusters; | ||
4825 | spin_unlock(&ei->i_block_reservation_lock); | ||
4826 | if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | ||
4827 | dquot_reclaim_block(inode, | ||
4828 | EXT4_C2B(sbi, count_clusters)); | ||
4829 | } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | ||
4798 | dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); | 4830 | dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); |
4831 | percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); | ||
4832 | |||
4833 | ext4_mb_unload_buddy(&e4b); | ||
4799 | 4834 | ||
4800 | /* We dirtied the bitmap block */ | 4835 | /* We dirtied the bitmap block */ |
4801 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 4836 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 49e8bdff9163..2ae73a80c19b 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -39,7 +39,7 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
39 | newext.ee_block = cpu_to_le32(lb->first_block); | 39 | newext.ee_block = cpu_to_le32(lb->first_block); |
40 | newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); | 40 | newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); |
41 | ext4_ext_store_pblock(&newext, lb->first_pblock); | 41 | ext4_ext_store_pblock(&newext, lb->first_pblock); |
42 | path = ext4_ext_find_extent(inode, lb->first_block, NULL); | 42 | path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0); |
43 | 43 | ||
44 | if (IS_ERR(path)) { | 44 | if (IS_ERR(path)) { |
45 | retval = PTR_ERR(path); | 45 | retval = PTR_ERR(path); |
@@ -494,7 +494,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
494 | * superblock modification. | 494 | * superblock modification. |
495 | * | 495 | * |
496 | * For the tmp_inode we already have committed the | 496 | * For the tmp_inode we already have committed the |
497 | * trascation that created the inode. Later as and | 497 | * transaction that created the inode. Later as and |
498 | * when we add extents we extent the journal | 498 | * when we add extents we extent the journal |
499 | */ | 499 | */ |
500 | /* | 500 | /* |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index e86dddbd8296..7fa4d855dbd5 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -37,7 +37,7 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock, | |||
37 | int ret = 0; | 37 | int ret = 0; |
38 | struct ext4_ext_path *path; | 38 | struct ext4_ext_path *path; |
39 | 39 | ||
40 | path = ext4_ext_find_extent(inode, lblock, *orig_path); | 40 | path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE); |
41 | if (IS_ERR(path)) | 41 | if (IS_ERR(path)) |
42 | ret = PTR_ERR(path); | 42 | ret = PTR_ERR(path); |
43 | else if (path[ext_depth(inode)].p_ext == NULL) | 43 | else if (path[ext_depth(inode)].p_ext == NULL) |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 35f55a0dbc4b..1bec5a5c1e45 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -3005,15 +3005,19 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, | |||
3005 | /* | 3005 | /* |
3006 | * Anybody can rename anything with this: the permission checks are left to the | 3006 | * Anybody can rename anything with this: the permission checks are left to the |
3007 | * higher-level routines. | 3007 | * higher-level routines. |
3008 | * | ||
3009 | * n.b. old_{dentry,inode) refers to the source dentry/inode | ||
3010 | * while new_{dentry,inode) refers to the destination dentry/inode | ||
3011 | * This comes from rename(const char *oldpath, const char *newpath) | ||
3008 | */ | 3012 | */ |
3009 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | 3013 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
3010 | struct inode *new_dir, struct dentry *new_dentry) | 3014 | struct inode *new_dir, struct dentry *new_dentry) |
3011 | { | 3015 | { |
3012 | handle_t *handle; | 3016 | handle_t *handle = NULL; |
3013 | struct inode *old_inode, *new_inode; | 3017 | struct inode *old_inode, *new_inode; |
3014 | struct buffer_head *old_bh, *new_bh, *dir_bh; | 3018 | struct buffer_head *old_bh, *new_bh, *dir_bh; |
3015 | struct ext4_dir_entry_2 *old_de, *new_de; | 3019 | struct ext4_dir_entry_2 *old_de, *new_de; |
3016 | int retval, force_da_alloc = 0; | 3020 | int retval; |
3017 | int inlined = 0, new_inlined = 0; | 3021 | int inlined = 0, new_inlined = 0; |
3018 | struct ext4_dir_entry_2 *parent_de; | 3022 | struct ext4_dir_entry_2 *parent_de; |
3019 | 3023 | ||
@@ -3026,14 +3030,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3026 | * in separate transaction */ | 3030 | * in separate transaction */ |
3027 | if (new_dentry->d_inode) | 3031 | if (new_dentry->d_inode) |
3028 | dquot_initialize(new_dentry->d_inode); | 3032 | dquot_initialize(new_dentry->d_inode); |
3029 | handle = ext4_journal_start(old_dir, EXT4_HT_DIR, | ||
3030 | (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + | ||
3031 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | ||
3032 | if (IS_ERR(handle)) | ||
3033 | return PTR_ERR(handle); | ||
3034 | |||
3035 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | ||
3036 | ext4_handle_sync(handle); | ||
3037 | 3033 | ||
3038 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL); | 3034 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL); |
3039 | /* | 3035 | /* |
@@ -3056,6 +3052,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3056 | new_bh = NULL; | 3052 | new_bh = NULL; |
3057 | } | 3053 | } |
3058 | } | 3054 | } |
3055 | if (new_inode && !test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC)) | ||
3056 | ext4_alloc_da_blocks(old_inode); | ||
3057 | |||
3058 | handle = ext4_journal_start(old_dir, EXT4_HT_DIR, | ||
3059 | (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + | ||
3060 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | ||
3061 | if (IS_ERR(handle)) | ||
3062 | return PTR_ERR(handle); | ||
3063 | |||
3064 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | ||
3065 | ext4_handle_sync(handle); | ||
3066 | |||
3059 | if (S_ISDIR(old_inode->i_mode)) { | 3067 | if (S_ISDIR(old_inode->i_mode)) { |
3060 | if (new_inode) { | 3068 | if (new_inode) { |
3061 | retval = -ENOTEMPTY; | 3069 | retval = -ENOTEMPTY; |
@@ -3186,8 +3194,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3186 | ext4_mark_inode_dirty(handle, new_inode); | 3194 | ext4_mark_inode_dirty(handle, new_inode); |
3187 | if (!new_inode->i_nlink) | 3195 | if (!new_inode->i_nlink) |
3188 | ext4_orphan_add(handle, new_inode); | 3196 | ext4_orphan_add(handle, new_inode); |
3189 | if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC)) | ||
3190 | force_da_alloc = 1; | ||
3191 | } | 3197 | } |
3192 | retval = 0; | 3198 | retval = 0; |
3193 | 3199 | ||
@@ -3195,9 +3201,8 @@ end_rename: | |||
3195 | brelse(dir_bh); | 3201 | brelse(dir_bh); |
3196 | brelse(old_bh); | 3202 | brelse(old_bh); |
3197 | brelse(new_bh); | 3203 | brelse(new_bh); |
3198 | ext4_journal_stop(handle); | 3204 | if (handle) |
3199 | if (retval == 0 && force_da_alloc) | 3205 | ext4_journal_stop(handle); |
3200 | ext4_alloc_da_blocks(old_inode); | ||
3201 | return retval; | 3206 | return retval; |
3202 | } | 3207 | } |
3203 | 3208 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b59373b625e9..42337141e79f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -1134,8 +1134,8 @@ enum { | |||
1134 | Opt_nouid32, Opt_debug, Opt_removed, | 1134 | Opt_nouid32, Opt_debug, Opt_removed, |
1135 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 1135 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
1136 | Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, | 1136 | Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, |
1137 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, | 1137 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, |
1138 | Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, | 1138 | Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, |
1139 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1139 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1140 | Opt_data_err_abort, Opt_data_err_ignore, | 1140 | Opt_data_err_abort, Opt_data_err_ignore, |
1141 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1141 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
@@ -1179,6 +1179,7 @@ static const match_table_t tokens = { | |||
1179 | {Opt_min_batch_time, "min_batch_time=%u"}, | 1179 | {Opt_min_batch_time, "min_batch_time=%u"}, |
1180 | {Opt_max_batch_time, "max_batch_time=%u"}, | 1180 | {Opt_max_batch_time, "max_batch_time=%u"}, |
1181 | {Opt_journal_dev, "journal_dev=%u"}, | 1181 | {Opt_journal_dev, "journal_dev=%u"}, |
1182 | {Opt_journal_path, "journal_path=%s"}, | ||
1182 | {Opt_journal_checksum, "journal_checksum"}, | 1183 | {Opt_journal_checksum, "journal_checksum"}, |
1183 | {Opt_journal_async_commit, "journal_async_commit"}, | 1184 | {Opt_journal_async_commit, "journal_async_commit"}, |
1184 | {Opt_abort, "abort"}, | 1185 | {Opt_abort, "abort"}, |
@@ -1338,6 +1339,7 @@ static int clear_qf_name(struct super_block *sb, int qtype) | |||
1338 | #define MOPT_NO_EXT2 0x0100 | 1339 | #define MOPT_NO_EXT2 0x0100 |
1339 | #define MOPT_NO_EXT3 0x0200 | 1340 | #define MOPT_NO_EXT3 0x0200 |
1340 | #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) | 1341 | #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) |
1342 | #define MOPT_STRING 0x0400 | ||
1341 | 1343 | ||
1342 | static const struct mount_opts { | 1344 | static const struct mount_opts { |
1343 | int token; | 1345 | int token; |
@@ -1387,6 +1389,7 @@ static const struct mount_opts { | |||
1387 | {Opt_resuid, 0, MOPT_GTE0}, | 1389 | {Opt_resuid, 0, MOPT_GTE0}, |
1388 | {Opt_resgid, 0, MOPT_GTE0}, | 1390 | {Opt_resgid, 0, MOPT_GTE0}, |
1389 | {Opt_journal_dev, 0, MOPT_GTE0}, | 1391 | {Opt_journal_dev, 0, MOPT_GTE0}, |
1392 | {Opt_journal_path, 0, MOPT_STRING}, | ||
1390 | {Opt_journal_ioprio, 0, MOPT_GTE0}, | 1393 | {Opt_journal_ioprio, 0, MOPT_GTE0}, |
1391 | {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, | 1394 | {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, |
1392 | {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, | 1395 | {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, |
@@ -1480,7 +1483,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, | |||
1480 | return -1; | 1483 | return -1; |
1481 | } | 1484 | } |
1482 | 1485 | ||
1483 | if (args->from && match_int(args, &arg)) | 1486 | if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) |
1484 | return -1; | 1487 | return -1; |
1485 | if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) | 1488 | if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) |
1486 | return -1; | 1489 | return -1; |
@@ -1544,6 +1547,44 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, | |||
1544 | return -1; | 1547 | return -1; |
1545 | } | 1548 | } |
1546 | *journal_devnum = arg; | 1549 | *journal_devnum = arg; |
1550 | } else if (token == Opt_journal_path) { | ||
1551 | char *journal_path; | ||
1552 | struct inode *journal_inode; | ||
1553 | struct path path; | ||
1554 | int error; | ||
1555 | |||
1556 | if (is_remount) { | ||
1557 | ext4_msg(sb, KERN_ERR, | ||
1558 | "Cannot specify journal on remount"); | ||
1559 | return -1; | ||
1560 | } | ||
1561 | journal_path = match_strdup(&args[0]); | ||
1562 | if (!journal_path) { | ||
1563 | ext4_msg(sb, KERN_ERR, "error: could not dup " | ||
1564 | "journal device string"); | ||
1565 | return -1; | ||
1566 | } | ||
1567 | |||
1568 | error = kern_path(journal_path, LOOKUP_FOLLOW, &path); | ||
1569 | if (error) { | ||
1570 | ext4_msg(sb, KERN_ERR, "error: could not find " | ||
1571 | "journal device path: error %d", error); | ||
1572 | kfree(journal_path); | ||
1573 | return -1; | ||
1574 | } | ||
1575 | |||
1576 | journal_inode = path.dentry->d_inode; | ||
1577 | if (!S_ISBLK(journal_inode->i_mode)) { | ||
1578 | ext4_msg(sb, KERN_ERR, "error: journal path %s " | ||
1579 | "is not a block device", journal_path); | ||
1580 | path_put(&path); | ||
1581 | kfree(journal_path); | ||
1582 | return -1; | ||
1583 | } | ||
1584 | |||
1585 | *journal_devnum = new_encode_dev(journal_inode->i_rdev); | ||
1586 | path_put(&path); | ||
1587 | kfree(journal_path); | ||
1547 | } else if (token == Opt_journal_ioprio) { | 1588 | } else if (token == Opt_journal_ioprio) { |
1548 | if (arg > 7) { | 1589 | if (arg > 7) { |
1549 | ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" | 1590 | ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 559bec1a37b4..cf2fc0594063 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -343,14 +343,14 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, | |||
343 | struct page *page = bh->b_page; | 343 | struct page *page = bh->b_page; |
344 | __u8 *addr; | 344 | __u8 *addr; |
345 | __u32 csum32; | 345 | __u32 csum32; |
346 | __be32 seq; | ||
346 | 347 | ||
347 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 348 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
348 | return; | 349 | return; |
349 | 350 | ||
350 | sequence = cpu_to_be32(sequence); | 351 | seq = cpu_to_be32(sequence); |
351 | addr = kmap_atomic(page); | 352 | addr = kmap_atomic(page); |
352 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | 353 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); |
353 | sizeof(sequence)); | ||
354 | csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), | 354 | csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), |
355 | bh->b_size); | 355 | bh->b_size); |
356 | kunmap_atomic(addr); | 356 | kunmap_atomic(addr); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 02c7ad9d7a41..52032647dd4a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -130,9 +130,10 @@ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | |||
130 | return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; | 130 | return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; |
131 | } | 131 | } |
132 | 132 | ||
133 | static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) | 133 | static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) |
134 | { | 134 | { |
135 | __u32 csum, old_csum; | 135 | __u32 csum; |
136 | __be32 old_csum; | ||
136 | 137 | ||
137 | old_csum = sb->s_checksum; | 138 | old_csum = sb->s_checksum; |
138 | sb->s_checksum = 0; | 139 | sb->s_checksum = 0; |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index d4851464b57e..3929c50428b1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -178,7 +178,8 @@ static int jbd2_descr_block_csum_verify(journal_t *j, | |||
178 | void *buf) | 178 | void *buf) |
179 | { | 179 | { |
180 | struct jbd2_journal_block_tail *tail; | 180 | struct jbd2_journal_block_tail *tail; |
181 | __u32 provided, calculated; | 181 | __be32 provided; |
182 | __u32 calculated; | ||
182 | 183 | ||
183 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 184 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
184 | return 1; | 185 | return 1; |
@@ -190,8 +191,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, | |||
190 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 191 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
191 | tail->t_checksum = provided; | 192 | tail->t_checksum = provided; |
192 | 193 | ||
193 | provided = be32_to_cpu(provided); | 194 | return provided == cpu_to_be32(calculated); |
194 | return provided == calculated; | ||
195 | } | 195 | } |
196 | 196 | ||
197 | /* | 197 | /* |
@@ -381,7 +381,8 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, | |||
381 | static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | 381 | static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) |
382 | { | 382 | { |
383 | struct commit_header *h; | 383 | struct commit_header *h; |
384 | __u32 provided, calculated; | 384 | __be32 provided; |
385 | __u32 calculated; | ||
385 | 386 | ||
386 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 387 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
387 | return 1; | 388 | return 1; |
@@ -392,21 +393,20 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | |||
392 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 393 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
393 | h->h_chksum[0] = provided; | 394 | h->h_chksum[0] = provided; |
394 | 395 | ||
395 | provided = be32_to_cpu(provided); | 396 | return provided == cpu_to_be32(calculated); |
396 | return provided == calculated; | ||
397 | } | 397 | } |
398 | 398 | ||
399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, | 399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, |
400 | void *buf, __u32 sequence) | 400 | void *buf, __u32 sequence) |
401 | { | 401 | { |
402 | __u32 csum32; | 402 | __u32 csum32; |
403 | __be32 seq; | ||
403 | 404 | ||
404 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 405 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
405 | return 1; | 406 | return 1; |
406 | 407 | ||
407 | sequence = cpu_to_be32(sequence); | 408 | seq = cpu_to_be32(sequence); |
408 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | 409 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); |
409 | sizeof(sequence)); | ||
410 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); | 410 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); |
411 | 411 | ||
412 | return tag->t_checksum == cpu_to_be16(csum32); | 412 | return tag->t_checksum == cpu_to_be16(csum32); |
@@ -808,7 +808,8 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, | |||
808 | void *buf) | 808 | void *buf) |
809 | { | 809 | { |
810 | struct jbd2_journal_revoke_tail *tail; | 810 | struct jbd2_journal_revoke_tail *tail; |
811 | __u32 provided, calculated; | 811 | __be32 provided; |
812 | __u32 calculated; | ||
812 | 813 | ||
813 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 814 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
814 | return 1; | 815 | return 1; |
@@ -820,8 +821,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, | |||
820 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 821 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
821 | tail->r_checksum = provided; | 822 | tail->r_checksum = provided; |
822 | 823 | ||
823 | provided = be32_to_cpu(provided); | 824 | return provided == cpu_to_be32(calculated); |
824 | return provided == calculated; | ||
825 | } | 825 | } |
826 | 826 | ||
827 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ | 827 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index fbad622841f9..9a702e193538 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -1094,6 +1094,14 @@ static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number) | |||
1094 | dquot->dq_dqb.dqb_rsvspace -= number; | 1094 | dquot->dq_dqb.dqb_rsvspace -= number; |
1095 | } | 1095 | } |
1096 | 1096 | ||
1097 | static void dquot_reclaim_reserved_space(struct dquot *dquot, qsize_t number) | ||
1098 | { | ||
1099 | if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) | ||
1100 | number = dquot->dq_dqb.dqb_curspace; | ||
1101 | dquot->dq_dqb.dqb_rsvspace += number; | ||
1102 | dquot->dq_dqb.dqb_curspace -= number; | ||
1103 | } | ||
1104 | |||
1097 | static inline | 1105 | static inline |
1098 | void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) | 1106 | void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) |
1099 | { | 1107 | { |
@@ -1528,6 +1536,15 @@ void inode_claim_rsv_space(struct inode *inode, qsize_t number) | |||
1528 | } | 1536 | } |
1529 | EXPORT_SYMBOL(inode_claim_rsv_space); | 1537 | EXPORT_SYMBOL(inode_claim_rsv_space); |
1530 | 1538 | ||
1539 | void inode_reclaim_rsv_space(struct inode *inode, qsize_t number) | ||
1540 | { | ||
1541 | spin_lock(&inode->i_lock); | ||
1542 | *inode_reserved_space(inode) += number; | ||
1543 | __inode_sub_bytes(inode, number); | ||
1544 | spin_unlock(&inode->i_lock); | ||
1545 | } | ||
1546 | EXPORT_SYMBOL(inode_reclaim_rsv_space); | ||
1547 | |||
1531 | void inode_sub_rsv_space(struct inode *inode, qsize_t number) | 1548 | void inode_sub_rsv_space(struct inode *inode, qsize_t number) |
1532 | { | 1549 | { |
1533 | spin_lock(&inode->i_lock); | 1550 | spin_lock(&inode->i_lock); |
@@ -1702,6 +1719,35 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) | |||
1702 | EXPORT_SYMBOL(dquot_claim_space_nodirty); | 1719 | EXPORT_SYMBOL(dquot_claim_space_nodirty); |
1703 | 1720 | ||
1704 | /* | 1721 | /* |
1722 | * Convert allocated space back to in-memory reserved quotas | ||
1723 | */ | ||
1724 | void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) | ||
1725 | { | ||
1726 | int cnt; | ||
1727 | |||
1728 | if (!dquot_active(inode)) { | ||
1729 | inode_reclaim_rsv_space(inode, number); | ||
1730 | return; | ||
1731 | } | ||
1732 | |||
1733 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||
1734 | spin_lock(&dq_data_lock); | ||
1735 | /* Claim reserved quotas to allocated quotas */ | ||
1736 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||
1737 | if (inode->i_dquot[cnt]) | ||
1738 | dquot_reclaim_reserved_space(inode->i_dquot[cnt], | ||
1739 | number); | ||
1740 | } | ||
1741 | /* Update inode bytes */ | ||
1742 | inode_reclaim_rsv_space(inode, number); | ||
1743 | spin_unlock(&dq_data_lock); | ||
1744 | mark_all_dquot_dirty(inode->i_dquot); | ||
1745 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||
1746 | return; | ||
1747 | } | ||
1748 | EXPORT_SYMBOL(dquot_reclaim_space_nodirty); | ||
1749 | |||
1750 | /* | ||
1705 | * This operation can block, but only after everything is updated | 1751 | * This operation can block, but only after everything is updated |
1706 | */ | 1752 | */ |
1707 | void __dquot_free_space(struct inode *inode, qsize_t number, int flags) | 1753 | void __dquot_free_space(struct inode *inode, qsize_t number, int flags) |
@@ -447,9 +447,8 @@ void inode_add_bytes(struct inode *inode, loff_t bytes) | |||
447 | 447 | ||
448 | EXPORT_SYMBOL(inode_add_bytes); | 448 | EXPORT_SYMBOL(inode_add_bytes); |
449 | 449 | ||
450 | void inode_sub_bytes(struct inode *inode, loff_t bytes) | 450 | void __inode_sub_bytes(struct inode *inode, loff_t bytes) |
451 | { | 451 | { |
452 | spin_lock(&inode->i_lock); | ||
453 | inode->i_blocks -= bytes >> 9; | 452 | inode->i_blocks -= bytes >> 9; |
454 | bytes &= 511; | 453 | bytes &= 511; |
455 | if (inode->i_bytes < bytes) { | 454 | if (inode->i_bytes < bytes) { |
@@ -457,6 +456,14 @@ void inode_sub_bytes(struct inode *inode, loff_t bytes) | |||
457 | inode->i_bytes += 512; | 456 | inode->i_bytes += 512; |
458 | } | 457 | } |
459 | inode->i_bytes -= bytes; | 458 | inode->i_bytes -= bytes; |
459 | } | ||
460 | |||
461 | EXPORT_SYMBOL(__inode_sub_bytes); | ||
462 | |||
463 | void inode_sub_bytes(struct inode *inode, loff_t bytes) | ||
464 | { | ||
465 | spin_lock(&inode->i_lock); | ||
466 | __inode_sub_bytes(inode, bytes); | ||
460 | spin_unlock(&inode->i_lock); | 467 | spin_unlock(&inode->i_lock); |
461 | } | 468 | } |
462 | 469 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index 981874773e85..e7893523f81f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2503,6 +2503,7 @@ extern void generic_fillattr(struct inode *, struct kstat *); | |||
2503 | extern int vfs_getattr(struct path *, struct kstat *); | 2503 | extern int vfs_getattr(struct path *, struct kstat *); |
2504 | void __inode_add_bytes(struct inode *inode, loff_t bytes); | 2504 | void __inode_add_bytes(struct inode *inode, loff_t bytes); |
2505 | void inode_add_bytes(struct inode *inode, loff_t bytes); | 2505 | void inode_add_bytes(struct inode *inode, loff_t bytes); |
2506 | void __inode_sub_bytes(struct inode *inode, loff_t bytes); | ||
2506 | void inode_sub_bytes(struct inode *inode, loff_t bytes); | 2507 | void inode_sub_bytes(struct inode *inode, loff_t bytes); |
2507 | loff_t inode_get_bytes(struct inode *inode); | 2508 | loff_t inode_get_bytes(struct inode *inode); |
2508 | void inode_set_bytes(struct inode *inode, loff_t bytes); | 2509 | void inode_set_bytes(struct inode *inode, loff_t bytes); |
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 1c50093ae656..6965fe394c3b 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h | |||
@@ -41,6 +41,7 @@ void __quota_error(struct super_block *sb, const char *func, | |||
41 | void inode_add_rsv_space(struct inode *inode, qsize_t number); | 41 | void inode_add_rsv_space(struct inode *inode, qsize_t number); |
42 | void inode_claim_rsv_space(struct inode *inode, qsize_t number); | 42 | void inode_claim_rsv_space(struct inode *inode, qsize_t number); |
43 | void inode_sub_rsv_space(struct inode *inode, qsize_t number); | 43 | void inode_sub_rsv_space(struct inode *inode, qsize_t number); |
44 | void inode_reclaim_rsv_space(struct inode *inode, qsize_t number); | ||
44 | 45 | ||
45 | void dquot_initialize(struct inode *inode); | 46 | void dquot_initialize(struct inode *inode); |
46 | void dquot_drop(struct inode *inode); | 47 | void dquot_drop(struct inode *inode); |
@@ -59,6 +60,7 @@ int dquot_alloc_inode(const struct inode *inode); | |||
59 | 60 | ||
60 | int dquot_claim_space_nodirty(struct inode *inode, qsize_t number); | 61 | int dquot_claim_space_nodirty(struct inode *inode, qsize_t number); |
61 | void dquot_free_inode(const struct inode *inode); | 62 | void dquot_free_inode(const struct inode *inode); |
63 | void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number); | ||
62 | 64 | ||
63 | int dquot_disable(struct super_block *sb, int type, unsigned int flags); | 65 | int dquot_disable(struct super_block *sb, int type, unsigned int flags); |
64 | /* Suspend quotas on remount RO */ | 66 | /* Suspend quotas on remount RO */ |
@@ -238,6 +240,13 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) | |||
238 | return 0; | 240 | return 0; |
239 | } | 241 | } |
240 | 242 | ||
243 | static inline int dquot_reclaim_space_nodirty(struct inode *inode, | ||
244 | qsize_t number) | ||
245 | { | ||
246 | inode_sub_bytes(inode, number); | ||
247 | return 0; | ||
248 | } | ||
249 | |||
241 | static inline int dquot_disable(struct super_block *sb, int type, | 250 | static inline int dquot_disable(struct super_block *sb, int type, |
242 | unsigned int flags) | 251 | unsigned int flags) |
243 | { | 252 | { |
@@ -336,6 +345,12 @@ static inline int dquot_claim_block(struct inode *inode, qsize_t nr) | |||
336 | return ret; | 345 | return ret; |
337 | } | 346 | } |
338 | 347 | ||
348 | static inline void dquot_reclaim_block(struct inode *inode, qsize_t nr) | ||
349 | { | ||
350 | dquot_reclaim_space_nodirty(inode, nr << inode->i_blkbits); | ||
351 | mark_inode_dirty_sync(inode); | ||
352 | } | ||
353 | |||
339 | static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr) | 354 | static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr) |
340 | { | 355 | { |
341 | __dquot_free_space(inode, nr, 0); | 356 | __dquot_free_space(inode, nr, 0); |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 2068db241f22..197d3125df2a 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
@@ -14,7 +14,6 @@ struct ext4_prealloc_space; | |||
14 | struct ext4_inode_info; | 14 | struct ext4_inode_info; |
15 | struct mpage_da_data; | 15 | struct mpage_da_data; |
16 | struct ext4_map_blocks; | 16 | struct ext4_map_blocks; |
17 | struct ext4_extent; | ||
18 | struct extent_status; | 17 | struct extent_status; |
19 | 18 | ||
20 | #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) | 19 | #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) |
@@ -64,10 +63,10 @@ struct extent_status; | |||
64 | { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) | 63 | { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" }) |
65 | 64 | ||
66 | #define show_extent_status(status) __print_flags(status, "", \ | 65 | #define show_extent_status(status) __print_flags(status, "", \ |
67 | { (1 << 3), "W" }, \ | 66 | { EXTENT_STATUS_WRITTEN, "W" }, \ |
68 | { (1 << 2), "U" }, \ | 67 | { EXTENT_STATUS_UNWRITTEN, "U" }, \ |
69 | { (1 << 1), "D" }, \ | 68 | { EXTENT_STATUS_DELAYED, "D" }, \ |
70 | { (1 << 0), "H" }) | 69 | { EXTENT_STATUS_HOLE, "H" }) |
71 | 70 | ||
72 | 71 | ||
73 | TRACE_EVENT(ext4_free_inode, | 72 | TRACE_EVENT(ext4_free_inode, |
@@ -2192,7 +2191,7 @@ TRACE_EVENT(ext4_ext_remove_space_done, | |||
2192 | (unsigned short) __entry->eh_entries) | 2191 | (unsigned short) __entry->eh_entries) |
2193 | ); | 2192 | ); |
2194 | 2193 | ||
2195 | TRACE_EVENT(ext4_es_insert_extent, | 2194 | DECLARE_EVENT_CLASS(ext4__es_extent, |
2196 | TP_PROTO(struct inode *inode, struct extent_status *es), | 2195 | TP_PROTO(struct inode *inode, struct extent_status *es), |
2197 | 2196 | ||
2198 | TP_ARGS(inode, es), | 2197 | TP_ARGS(inode, es), |
@@ -2212,7 +2211,7 @@ TRACE_EVENT(ext4_es_insert_extent, | |||
2212 | __entry->lblk = es->es_lblk; | 2211 | __entry->lblk = es->es_lblk; |
2213 | __entry->len = es->es_len; | 2212 | __entry->len = es->es_len; |
2214 | __entry->pblk = ext4_es_pblock(es); | 2213 | __entry->pblk = ext4_es_pblock(es); |
2215 | __entry->status = ext4_es_status(es) >> 60; | 2214 | __entry->status = ext4_es_status(es); |
2216 | ), | 2215 | ), |
2217 | 2216 | ||
2218 | TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", | 2217 | TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", |
@@ -2222,6 +2221,18 @@ TRACE_EVENT(ext4_es_insert_extent, | |||
2222 | __entry->pblk, show_extent_status(__entry->status)) | 2221 | __entry->pblk, show_extent_status(__entry->status)) |
2223 | ); | 2222 | ); |
2224 | 2223 | ||
2224 | DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent, | ||
2225 | TP_PROTO(struct inode *inode, struct extent_status *es), | ||
2226 | |||
2227 | TP_ARGS(inode, es) | ||
2228 | ); | ||
2229 | |||
2230 | DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent, | ||
2231 | TP_PROTO(struct inode *inode, struct extent_status *es), | ||
2232 | |||
2233 | TP_ARGS(inode, es) | ||
2234 | ); | ||
2235 | |||
2225 | TRACE_EVENT(ext4_es_remove_extent, | 2236 | TRACE_EVENT(ext4_es_remove_extent, |
2226 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), | 2237 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), |
2227 | 2238 | ||
@@ -2289,7 +2300,7 @@ TRACE_EVENT(ext4_es_find_delayed_extent_range_exit, | |||
2289 | __entry->lblk = es->es_lblk; | 2300 | __entry->lblk = es->es_lblk; |
2290 | __entry->len = es->es_len; | 2301 | __entry->len = es->es_len; |
2291 | __entry->pblk = ext4_es_pblock(es); | 2302 | __entry->pblk = ext4_es_pblock(es); |
2292 | __entry->status = ext4_es_status(es) >> 60; | 2303 | __entry->status = ext4_es_status(es); |
2293 | ), | 2304 | ), |
2294 | 2305 | ||
2295 | TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", | 2306 | TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s", |
@@ -2343,7 +2354,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit, | |||
2343 | __entry->lblk = es->es_lblk; | 2354 | __entry->lblk = es->es_lblk; |
2344 | __entry->len = es->es_len; | 2355 | __entry->len = es->es_len; |
2345 | __entry->pblk = ext4_es_pblock(es); | 2356 | __entry->pblk = ext4_es_pblock(es); |
2346 | __entry->status = ext4_es_status(es) >> 60; | 2357 | __entry->status = ext4_es_status(es); |
2347 | __entry->found = found; | 2358 | __entry->found = found; |
2348 | ), | 2359 | ), |
2349 | 2360 | ||
diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index d830747f5c0b..0c51d617dae9 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h | |||
@@ -40,6 +40,7 @@ struct fiemap { | |||
40 | 40 | ||
41 | #define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ | 41 | #define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ |
42 | #define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ | 42 | #define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ |
43 | #define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */ | ||
43 | 44 | ||
44 | #define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) | 45 | #define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) |
45 | 46 | ||