diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-12 12:28:03 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-12 12:28:03 -0500 |
commit | 9bfccec24e31f4f83445cfe0c1b0a5ef97900628 (patch) | |
tree | cea50a0797abbd27a5a4a47853d1e09b97cd8c83 | |
parent | 2756d373a3f45a3a9ebf4ac389f9e0e02bd35a93 (diff) | |
parent | 50db71abc529c48b21f4c3034d3cff27cfb25795 (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"Lots of bugs fixes, including Zheng and Jan's extent status shrinker
fixes, which should improve CPU utilization and potential soft lockups
under heavy memory pressure, and Eric Whitney's bigalloc fixes"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (26 commits)
ext4: ext4_da_convert_inline_data_to_extent drop locked page after error
ext4: fix suboptimal seek_{data,hole} extents traversial
ext4: ext4_inline_data_fiemap should respect callers argument
ext4: prevent fsreentrance deadlock for inline_data
ext4: forbid journal_async_commit in data=ordered mode
jbd2: remove unnecessary NULL check before iput()
ext4: Remove an unnecessary check for NULL before iput()
ext4: remove unneeded code in ext4_unlink
ext4: don't count external journal blocks as overhead
ext4: remove never taken branch from ext4_ext_shift_path_extents()
ext4: create nojournal_checksum mount option
ext4: update comments regarding ext4_delete_inode()
ext4: cleanup GFP flags inside resize path
ext4: introduce aging to extent status tree
ext4: cleanup flag definitions for extent status tree
ext4: limit number of scanned extents in status tree shrinker
ext4: move handling of list of shrinkable inodes into extent status code
ext4: change LRU to round-robin in extent status tree shrinker
ext4: cache extent hole in extent status tree for ext4_da_map_blocks()
ext4: fix block reservation for bigalloc filesystems
...
-rw-r--r-- | fs/ext4/ext4.h | 41 | ||||
-rw-r--r-- | fs/ext4/extents.c | 223 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 321 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 82 | ||||
-rw-r--r-- | fs/ext4/file.c | 220 | ||||
-rw-r--r-- | fs/ext4/inline.c | 35 | ||||
-rw-r--r-- | fs/ext4/inode.c | 37 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 2 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 15 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 8 | ||||
-rw-r--r-- | fs/ext4/namei.c | 1 | ||||
-rw-r--r-- | fs/ext4/resize.c | 6 | ||||
-rw-r--r-- | fs/ext4/super.c | 51 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 3 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 17 |
16 files changed, 533 insertions, 531 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index db3f772e57ae..a75fba67bb1f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -158,17 +158,8 @@ struct ext4_allocation_request { | |||
158 | #define EXT4_MAP_MAPPED (1 << BH_Mapped) | 158 | #define EXT4_MAP_MAPPED (1 << BH_Mapped) |
159 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) | 159 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) |
160 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) | 160 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) |
161 | /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of | ||
162 | * ext4_map_blocks wants to know whether or not the underlying cluster has | ||
163 | * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that | ||
164 | * the requested mapping was from previously mapped (or delayed allocated) | ||
165 | * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster | ||
166 | * should never appear on buffer_head's state flags. | ||
167 | */ | ||
168 | #define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster) | ||
169 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ | 161 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ |
170 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ | 162 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) |
171 | EXT4_MAP_FROM_CLUSTER) | ||
172 | 163 | ||
173 | struct ext4_map_blocks { | 164 | struct ext4_map_blocks { |
174 | ext4_fsblk_t m_pblk; | 165 | ext4_fsblk_t m_pblk; |
@@ -565,10 +556,8 @@ enum { | |||
565 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 | 556 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 |
566 | /* Do not take i_data_sem locking in ext4_map_blocks */ | 557 | /* Do not take i_data_sem locking in ext4_map_blocks */ |
567 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 | 558 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 |
568 | /* Do not put hole in extent cache */ | ||
569 | #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 | ||
570 | /* Convert written extents to unwritten */ | 559 | /* Convert written extents to unwritten */ |
571 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400 | 560 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 |
572 | 561 | ||
573 | /* | 562 | /* |
574 | * The bit position of these flags must not overlap with any of the | 563 | * The bit position of these flags must not overlap with any of the |
@@ -889,10 +878,12 @@ struct ext4_inode_info { | |||
889 | /* extents status tree */ | 878 | /* extents status tree */ |
890 | struct ext4_es_tree i_es_tree; | 879 | struct ext4_es_tree i_es_tree; |
891 | rwlock_t i_es_lock; | 880 | rwlock_t i_es_lock; |
892 | struct list_head i_es_lru; | 881 | struct list_head i_es_list; |
893 | unsigned int i_es_all_nr; /* protected by i_es_lock */ | 882 | unsigned int i_es_all_nr; /* protected by i_es_lock */ |
894 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 883 | unsigned int i_es_shk_nr; /* protected by i_es_lock */ |
895 | unsigned long i_touch_when; /* jiffies of last accessing */ | 884 | ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for |
885 | extents to shrink. Protected by | ||
886 | i_es_lock */ | ||
896 | 887 | ||
897 | /* ialloc */ | 888 | /* ialloc */ |
898 | ext4_group_t i_last_alloc_group; | 889 | ext4_group_t i_last_alloc_group; |
@@ -1337,10 +1328,11 @@ struct ext4_sb_info { | |||
1337 | 1328 | ||
1338 | /* Reclaim extents from extent status tree */ | 1329 | /* Reclaim extents from extent status tree */ |
1339 | struct shrinker s_es_shrinker; | 1330 | struct shrinker s_es_shrinker; |
1340 | struct list_head s_es_lru; | 1331 | struct list_head s_es_list; /* List of inodes with reclaimable extents */ |
1332 | long s_es_nr_inode; | ||
1341 | struct ext4_es_stats s_es_stats; | 1333 | struct ext4_es_stats s_es_stats; |
1342 | struct mb_cache *s_mb_cache; | 1334 | struct mb_cache *s_mb_cache; |
1343 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1335 | spinlock_t s_es_lock ____cacheline_aligned_in_smp; |
1344 | 1336 | ||
1345 | /* Ratelimit ext4 messages. */ | 1337 | /* Ratelimit ext4 messages. */ |
1346 | struct ratelimit_state s_err_ratelimit_state; | 1338 | struct ratelimit_state s_err_ratelimit_state; |
@@ -2196,7 +2188,6 @@ extern int ext4_calculate_overhead(struct super_block *sb); | |||
2196 | extern void ext4_superblock_csum_set(struct super_block *sb); | 2188 | extern void ext4_superblock_csum_set(struct super_block *sb); |
2197 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | 2189 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); |
2198 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); | 2190 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); |
2199 | extern void ext4_kvfree(void *ptr); | ||
2200 | extern int ext4_alloc_flex_bg_array(struct super_block *sb, | 2191 | extern int ext4_alloc_flex_bg_array(struct super_block *sb, |
2201 | ext4_group_t ngroup); | 2192 | ext4_group_t ngroup); |
2202 | extern const char *ext4_decode_error(struct super_block *sb, int errno, | 2193 | extern const char *ext4_decode_error(struct super_block *sb, int errno, |
@@ -2647,7 +2638,7 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, | |||
2647 | int *retval); | 2638 | int *retval); |
2648 | extern int ext4_inline_data_fiemap(struct inode *inode, | 2639 | extern int ext4_inline_data_fiemap(struct inode *inode, |
2649 | struct fiemap_extent_info *fieinfo, | 2640 | struct fiemap_extent_info *fieinfo, |
2650 | int *has_inline); | 2641 | int *has_inline, __u64 start, __u64 len); |
2651 | extern int ext4_try_to_evict_inline_data(handle_t *handle, | 2642 | extern int ext4_try_to_evict_inline_data(handle_t *handle, |
2652 | struct inode *inode, | 2643 | struct inode *inode, |
2653 | int needed); | 2644 | int needed); |
@@ -2795,16 +2786,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, | |||
2795 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | 2786 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); |
2796 | 2787 | ||
2797 | /* | 2788 | /* |
2798 | * Note that these flags will never ever appear in a buffer_head's state flag. | ||
2799 | * See EXT4_MAP_... to see where this is used. | ||
2800 | */ | ||
2801 | enum ext4_state_bits { | ||
2802 | BH_AllocFromCluster /* allocated blocks were part of already | ||
2803 | * allocated cluster. */ | ||
2804 | = BH_JBDPrivateStart | ||
2805 | }; | ||
2806 | |||
2807 | /* | ||
2808 | * Add new method to test whether block and inode bitmaps are properly | 2789 | * Add new method to test whether block and inode bitmaps are properly |
2809 | * initialized. With uninit_bg reading the block from disk is not enough | 2790 | * initialized. With uninit_bg reading the block from disk is not enough |
2810 | * to mark the bitmap uptodate. We need to also zero-out the bitmap | 2791 | * to mark the bitmap uptodate. We need to also zero-out the bitmap |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0b16fb4c06d3..e5d3eadf47b1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2306,16 +2306,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2306 | ext4_lblk_t block) | 2306 | ext4_lblk_t block) |
2307 | { | 2307 | { |
2308 | int depth = ext_depth(inode); | 2308 | int depth = ext_depth(inode); |
2309 | unsigned long len = 0; | 2309 | ext4_lblk_t len; |
2310 | ext4_lblk_t lblock = 0; | 2310 | ext4_lblk_t lblock; |
2311 | struct ext4_extent *ex; | 2311 | struct ext4_extent *ex; |
2312 | struct extent_status es; | ||
2312 | 2313 | ||
2313 | ex = path[depth].p_ext; | 2314 | ex = path[depth].p_ext; |
2314 | if (ex == NULL) { | 2315 | if (ex == NULL) { |
2315 | /* | 2316 | /* there is no extent yet, so gap is [0;-] */ |
2316 | * there is no extent yet, so gap is [0;-] and we | 2317 | lblock = 0; |
2317 | * don't cache it | 2318 | len = EXT_MAX_BLOCKS; |
2318 | */ | ||
2319 | ext_debug("cache gap(whole file):"); | 2319 | ext_debug("cache gap(whole file):"); |
2320 | } else if (block < le32_to_cpu(ex->ee_block)) { | 2320 | } else if (block < le32_to_cpu(ex->ee_block)) { |
2321 | lblock = block; | 2321 | lblock = block; |
@@ -2324,9 +2324,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2324 | block, | 2324 | block, |
2325 | le32_to_cpu(ex->ee_block), | 2325 | le32_to_cpu(ex->ee_block), |
2326 | ext4_ext_get_actual_len(ex)); | 2326 | ext4_ext_get_actual_len(ex)); |
2327 | if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1)) | ||
2328 | ext4_es_insert_extent(inode, lblock, len, ~0, | ||
2329 | EXTENT_STATUS_HOLE); | ||
2330 | } else if (block >= le32_to_cpu(ex->ee_block) | 2327 | } else if (block >= le32_to_cpu(ex->ee_block) |
2331 | + ext4_ext_get_actual_len(ex)) { | 2328 | + ext4_ext_get_actual_len(ex)) { |
2332 | ext4_lblk_t next; | 2329 | ext4_lblk_t next; |
@@ -2340,14 +2337,19 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2340 | block); | 2337 | block); |
2341 | BUG_ON(next == lblock); | 2338 | BUG_ON(next == lblock); |
2342 | len = next - lblock; | 2339 | len = next - lblock; |
2343 | if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1)) | ||
2344 | ext4_es_insert_extent(inode, lblock, len, ~0, | ||
2345 | EXTENT_STATUS_HOLE); | ||
2346 | } else { | 2340 | } else { |
2347 | BUG(); | 2341 | BUG(); |
2348 | } | 2342 | } |
2349 | 2343 | ||
2350 | ext_debug(" -> %u:%lu\n", lblock, len); | 2344 | ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es); |
2345 | if (es.es_len) { | ||
2346 | /* There's delayed extent containing lblock? */ | ||
2347 | if (es.es_lblk <= lblock) | ||
2348 | return; | ||
2349 | len = min(es.es_lblk - lblock, len); | ||
2350 | } | ||
2351 | ext_debug(" -> %u:%u\n", lblock, len); | ||
2352 | ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE); | ||
2351 | } | 2353 | } |
2352 | 2354 | ||
2353 | /* | 2355 | /* |
@@ -2481,7 +2483,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2481 | ext4_lblk_t from, ext4_lblk_t to) | 2483 | ext4_lblk_t from, ext4_lblk_t to) |
2482 | { | 2484 | { |
2483 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2485 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2484 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2486 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2485 | ext4_fsblk_t pblk; | 2487 | ext4_fsblk_t pblk; |
2486 | int flags = get_default_free_blocks_flags(inode); | 2488 | int flags = get_default_free_blocks_flags(inode); |
2487 | 2489 | ||
@@ -2490,7 +2492,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2490 | * at the beginning of the extent. Instead, we make a note | 2492 | * at the beginning of the extent. Instead, we make a note |
2491 | * that we tried freeing the cluster, and check to see if we | 2493 | * that we tried freeing the cluster, and check to see if we |
2492 | * need to free it on a subsequent call to ext4_remove_blocks, | 2494 | * need to free it on a subsequent call to ext4_remove_blocks, |
2493 | * or at the end of the ext4_truncate() operation. | 2495 | * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space. |
2494 | */ | 2496 | */ |
2495 | flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; | 2497 | flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; |
2496 | 2498 | ||
@@ -2501,8 +2503,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2501 | * partial cluster here. | 2503 | * partial cluster here. |
2502 | */ | 2504 | */ |
2503 | pblk = ext4_ext_pblock(ex) + ee_len - 1; | 2505 | pblk = ext4_ext_pblock(ex) + ee_len - 1; |
2504 | if ((*partial_cluster > 0) && | 2506 | if (*partial_cluster > 0 && |
2505 | (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | 2507 | *partial_cluster != (long long) EXT4_B2C(sbi, pblk)) { |
2506 | ext4_free_blocks(handle, inode, NULL, | 2508 | ext4_free_blocks(handle, inode, NULL, |
2507 | EXT4_C2B(sbi, *partial_cluster), | 2509 | EXT4_C2B(sbi, *partial_cluster), |
2508 | sbi->s_cluster_ratio, flags); | 2510 | sbi->s_cluster_ratio, flags); |
@@ -2528,7 +2530,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2528 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2530 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2529 | /* tail removal */ | 2531 | /* tail removal */ |
2530 | ext4_lblk_t num; | 2532 | ext4_lblk_t num; |
2531 | unsigned int unaligned; | 2533 | long long first_cluster; |
2532 | 2534 | ||
2533 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2535 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2534 | pblk = ext4_ext_pblock(ex) + ee_len - num; | 2536 | pblk = ext4_ext_pblock(ex) + ee_len - num; |
@@ -2538,7 +2540,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2538 | * used by any other extent (partial_cluster is negative). | 2540 | * used by any other extent (partial_cluster is negative). |
2539 | */ | 2541 | */ |
2540 | if (*partial_cluster < 0 && | 2542 | if (*partial_cluster < 0 && |
2541 | -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1)) | 2543 | *partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1)) |
2542 | flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; | 2544 | flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; |
2543 | 2545 | ||
2544 | ext_debug("free last %u blocks starting %llu partial %lld\n", | 2546 | ext_debug("free last %u blocks starting %llu partial %lld\n", |
@@ -2549,21 +2551,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2549 | * beginning of a cluster, and we removed the entire | 2551 | * beginning of a cluster, and we removed the entire |
2550 | * extent and the cluster is not used by any other extent, | 2552 | * extent and the cluster is not used by any other extent, |
2551 | * save the partial cluster here, since we might need to | 2553 | * save the partial cluster here, since we might need to |
2552 | * delete if we determine that the truncate operation has | 2554 | * delete if we determine that the truncate or punch hole |
2553 | * removed all of the blocks in the cluster. | 2555 | * operation has removed all of the blocks in the cluster. |
2556 | * If that cluster is used by another extent, preserve its | ||
2557 | * negative value so it isn't freed later on. | ||
2554 | * | 2558 | * |
2555 | * On the other hand, if we did not manage to free the whole | 2559 | * If the whole extent wasn't freed, we've reached the |
2556 | * extent, we have to mark the cluster as used (store negative | 2560 | * start of the truncated/punched region and have finished |
2557 | * cluster number in partial_cluster). | 2561 | * removing blocks. If there's a partial cluster here it's |
2562 | * shared with the remainder of the extent and is no longer | ||
2563 | * a candidate for removal. | ||
2558 | */ | 2564 | */ |
2559 | unaligned = EXT4_PBLK_COFF(sbi, pblk); | 2565 | if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) { |
2560 | if (unaligned && (ee_len == num) && | 2566 | first_cluster = (long long) EXT4_B2C(sbi, pblk); |
2561 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) | 2567 | if (first_cluster != -*partial_cluster) |
2562 | *partial_cluster = EXT4_B2C(sbi, pblk); | 2568 | *partial_cluster = first_cluster; |
2563 | else if (unaligned) | 2569 | } else { |
2564 | *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); | ||
2565 | else if (*partial_cluster > 0) | ||
2566 | *partial_cluster = 0; | 2570 | *partial_cluster = 0; |
2571 | } | ||
2567 | } else | 2572 | } else |
2568 | ext4_error(sbi->s_sb, "strange request: removal(2) " | 2573 | ext4_error(sbi->s_sb, "strange request: removal(2) " |
2569 | "%u-%u from %u:%u\n", | 2574 | "%u-%u from %u:%u\n", |
@@ -2574,15 +2579,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2574 | 2579 | ||
2575 | /* | 2580 | /* |
2576 | * ext4_ext_rm_leaf() Removes the extents associated with the | 2581 | * ext4_ext_rm_leaf() Removes the extents associated with the |
2577 | * blocks appearing between "start" and "end", and splits the extents | 2582 | * blocks appearing between "start" and "end". Both "start" |
2578 | * if "start" and "end" appear in the same extent | 2583 | * and "end" must appear in the same extent or EIO is returned. |
2579 | * | 2584 | * |
2580 | * @handle: The journal handle | 2585 | * @handle: The journal handle |
2581 | * @inode: The files inode | 2586 | * @inode: The files inode |
2582 | * @path: The path to the leaf | 2587 | * @path: The path to the leaf |
2583 | * @partial_cluster: The cluster which we'll have to free if all extents | 2588 | * @partial_cluster: The cluster which we'll have to free if all extents |
2584 | * has been released from it. It gets negative in case | 2589 | * has been released from it. However, if this value is |
2585 | * that the cluster is still used. | 2590 | * negative, it's a cluster just to the right of the |
2591 | * punched region and it must not be freed. | ||
2586 | * @start: The first block to remove | 2592 | * @start: The first block to remove |
2587 | * @end: The last block to remove | 2593 | * @end: The last block to remove |
2588 | */ | 2594 | */ |
@@ -2621,27 +2627,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2621 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2627 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2622 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2628 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2623 | 2629 | ||
2624 | /* | ||
2625 | * If we're starting with an extent other than the last one in the | ||
2626 | * node, we need to see if it shares a cluster with the extent to | ||
2627 | * the right (towards the end of the file). If its leftmost cluster | ||
2628 | * is this extent's rightmost cluster and it is not cluster aligned, | ||
2629 | * we'll mark it as a partial that is not to be deallocated. | ||
2630 | */ | ||
2631 | |||
2632 | if (ex != EXT_LAST_EXTENT(eh)) { | ||
2633 | ext4_fsblk_t current_pblk, right_pblk; | ||
2634 | long long current_cluster, right_cluster; | ||
2635 | |||
2636 | current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; | ||
2637 | current_cluster = (long long)EXT4_B2C(sbi, current_pblk); | ||
2638 | right_pblk = ext4_ext_pblock(ex + 1); | ||
2639 | right_cluster = (long long)EXT4_B2C(sbi, right_pblk); | ||
2640 | if (current_cluster == right_cluster && | ||
2641 | EXT4_PBLK_COFF(sbi, right_pblk)) | ||
2642 | *partial_cluster = -right_cluster; | ||
2643 | } | ||
2644 | |||
2645 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); | 2630 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); |
2646 | 2631 | ||
2647 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2632 | while (ex >= EXT_FIRST_EXTENT(eh) && |
@@ -2666,14 +2651,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2666 | if (end < ex_ee_block) { | 2651 | if (end < ex_ee_block) { |
2667 | /* | 2652 | /* |
2668 | * We're going to skip this extent and move to another, | 2653 | * We're going to skip this extent and move to another, |
2669 | * so if this extent is not cluster aligned we have | 2654 | * so note that its first cluster is in use to avoid |
2670 | * to mark the current cluster as used to avoid | 2655 | * freeing it when removing blocks. Eventually, the |
2671 | * accidentally freeing it later on | 2656 | * right edge of the truncated/punched region will |
2657 | * be just to the left. | ||
2672 | */ | 2658 | */ |
2673 | pblk = ext4_ext_pblock(ex); | 2659 | if (sbi->s_cluster_ratio > 1) { |
2674 | if (EXT4_PBLK_COFF(sbi, pblk)) | 2660 | pblk = ext4_ext_pblock(ex); |
2675 | *partial_cluster = | 2661 | *partial_cluster = |
2676 | -((long long)EXT4_B2C(sbi, pblk)); | 2662 | -(long long) EXT4_B2C(sbi, pblk); |
2663 | } | ||
2677 | ex--; | 2664 | ex--; |
2678 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2665 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2679 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2666 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2749,8 +2736,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2749 | sizeof(struct ext4_extent)); | 2736 | sizeof(struct ext4_extent)); |
2750 | } | 2737 | } |
2751 | le16_add_cpu(&eh->eh_entries, -1); | 2738 | le16_add_cpu(&eh->eh_entries, -1); |
2752 | } else if (*partial_cluster > 0) | 2739 | } |
2753 | *partial_cluster = 0; | ||
2754 | 2740 | ||
2755 | err = ext4_ext_dirty(handle, inode, path + depth); | 2741 | err = ext4_ext_dirty(handle, inode, path + depth); |
2756 | if (err) | 2742 | if (err) |
@@ -2769,20 +2755,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2769 | /* | 2755 | /* |
2770 | * If there's a partial cluster and at least one extent remains in | 2756 | * If there's a partial cluster and at least one extent remains in |
2771 | * the leaf, free the partial cluster if it isn't shared with the | 2757 | * the leaf, free the partial cluster if it isn't shared with the |
2772 | * current extent. If there's a partial cluster and no extents | 2758 | * current extent. If it is shared with the current extent |
2773 | * remain in the leaf, it can't be freed here. It can only be | 2759 | * we zero partial_cluster because we've reached the start of the |
2774 | * freed when it's possible to determine if it's not shared with | 2760 | * truncated/punched region and we're done removing blocks. |
2775 | * any other extent - when the next leaf is processed or when space | ||
2776 | * removal is complete. | ||
2777 | */ | 2761 | */ |
2778 | if (*partial_cluster > 0 && eh->eh_entries && | 2762 | if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) { |
2779 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2763 | pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; |
2780 | *partial_cluster)) { | 2764 | if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) { |
2781 | int flags = get_default_free_blocks_flags(inode); | 2765 | ext4_free_blocks(handle, inode, NULL, |
2782 | 2766 | EXT4_C2B(sbi, *partial_cluster), | |
2783 | ext4_free_blocks(handle, inode, NULL, | 2767 | sbi->s_cluster_ratio, |
2784 | EXT4_C2B(sbi, *partial_cluster), | 2768 | get_default_free_blocks_flags(inode)); |
2785 | sbi->s_cluster_ratio, flags); | 2769 | } |
2786 | *partial_cluster = 0; | 2770 | *partial_cluster = 0; |
2787 | } | 2771 | } |
2788 | 2772 | ||
@@ -2819,7 +2803,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
2819 | int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | 2803 | int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, |
2820 | ext4_lblk_t end) | 2804 | ext4_lblk_t end) |
2821 | { | 2805 | { |
2822 | struct super_block *sb = inode->i_sb; | 2806 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2823 | int depth = ext_depth(inode); | 2807 | int depth = ext_depth(inode); |
2824 | struct ext4_ext_path *path = NULL; | 2808 | struct ext4_ext_path *path = NULL; |
2825 | long long partial_cluster = 0; | 2809 | long long partial_cluster = 0; |
@@ -2845,9 +2829,10 @@ again: | |||
2845 | */ | 2829 | */ |
2846 | if (end < EXT_MAX_BLOCKS - 1) { | 2830 | if (end < EXT_MAX_BLOCKS - 1) { |
2847 | struct ext4_extent *ex; | 2831 | struct ext4_extent *ex; |
2848 | ext4_lblk_t ee_block; | 2832 | ext4_lblk_t ee_block, ex_end, lblk; |
2833 | ext4_fsblk_t pblk; | ||
2849 | 2834 | ||
2850 | /* find extent for this block */ | 2835 | /* find extent for or closest extent to this block */ |
2851 | path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); | 2836 | path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); |
2852 | if (IS_ERR(path)) { | 2837 | if (IS_ERR(path)) { |
2853 | ext4_journal_stop(handle); | 2838 | ext4_journal_stop(handle); |
@@ -2867,6 +2852,7 @@ again: | |||
2867 | } | 2852 | } |
2868 | 2853 | ||
2869 | ee_block = le32_to_cpu(ex->ee_block); | 2854 | ee_block = le32_to_cpu(ex->ee_block); |
2855 | ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1; | ||
2870 | 2856 | ||
2871 | /* | 2857 | /* |
2872 | * See if the last block is inside the extent, if so split | 2858 | * See if the last block is inside the extent, if so split |
@@ -2874,8 +2860,19 @@ again: | |||
2874 | * tail of the first part of the split extent in | 2860 | * tail of the first part of the split extent in |
2875 | * ext4_ext_rm_leaf(). | 2861 | * ext4_ext_rm_leaf(). |
2876 | */ | 2862 | */ |
2877 | if (end >= ee_block && | 2863 | if (end >= ee_block && end < ex_end) { |
2878 | end < ee_block + ext4_ext_get_actual_len(ex) - 1) { | 2864 | |
2865 | /* | ||
2866 | * If we're going to split the extent, note that | ||
2867 | * the cluster containing the block after 'end' is | ||
2868 | * in use to avoid freeing it when removing blocks. | ||
2869 | */ | ||
2870 | if (sbi->s_cluster_ratio > 1) { | ||
2871 | pblk = ext4_ext_pblock(ex) + end - ee_block + 2; | ||
2872 | partial_cluster = | ||
2873 | -(long long) EXT4_B2C(sbi, pblk); | ||
2874 | } | ||
2875 | |||
2879 | /* | 2876 | /* |
2880 | * Split the extent in two so that 'end' is the last | 2877 | * Split the extent in two so that 'end' is the last |
2881 | * block in the first new extent. Also we should not | 2878 | * block in the first new extent. Also we should not |
@@ -2886,6 +2883,24 @@ again: | |||
2886 | end + 1, 1); | 2883 | end + 1, 1); |
2887 | if (err < 0) | 2884 | if (err < 0) |
2888 | goto out; | 2885 | goto out; |
2886 | |||
2887 | } else if (sbi->s_cluster_ratio > 1 && end >= ex_end) { | ||
2888 | /* | ||
2889 | * If there's an extent to the right its first cluster | ||
2890 | * contains the immediate right boundary of the | ||
2891 | * truncated/punched region. Set partial_cluster to | ||
2892 | * its negative value so it won't be freed if shared | ||
2893 | * with the current extent. The end < ee_block case | ||
2894 | * is handled in ext4_ext_rm_leaf(). | ||
2895 | */ | ||
2896 | lblk = ex_end + 1; | ||
2897 | err = ext4_ext_search_right(inode, path, &lblk, &pblk, | ||
2898 | &ex); | ||
2899 | if (err) | ||
2900 | goto out; | ||
2901 | if (pblk) | ||
2902 | partial_cluster = | ||
2903 | -(long long) EXT4_B2C(sbi, pblk); | ||
2889 | } | 2904 | } |
2890 | } | 2905 | } |
2891 | /* | 2906 | /* |
@@ -2996,16 +3011,18 @@ again: | |||
2996 | trace_ext4_ext_remove_space_done(inode, start, end, depth, | 3011 | trace_ext4_ext_remove_space_done(inode, start, end, depth, |
2997 | partial_cluster, path->p_hdr->eh_entries); | 3012 | partial_cluster, path->p_hdr->eh_entries); |
2998 | 3013 | ||
2999 | /* If we still have something in the partial cluster and we have removed | 3014 | /* |
3015 | * If we still have something in the partial cluster and we have removed | ||
3000 | * even the first extent, then we should free the blocks in the partial | 3016 | * even the first extent, then we should free the blocks in the partial |
3001 | * cluster as well. */ | 3017 | * cluster as well. (This code will only run when there are no leaves |
3002 | if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { | 3018 | * to the immediate left of the truncated/punched region.) |
3003 | int flags = get_default_free_blocks_flags(inode); | 3019 | */ |
3004 | 3020 | if (partial_cluster > 0 && err == 0) { | |
3021 | /* don't zero partial_cluster since it's not used afterwards */ | ||
3005 | ext4_free_blocks(handle, inode, NULL, | 3022 | ext4_free_blocks(handle, inode, NULL, |
3006 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | 3023 | EXT4_C2B(sbi, partial_cluster), |
3007 | EXT4_SB(sb)->s_cluster_ratio, flags); | 3024 | sbi->s_cluster_ratio, |
3008 | partial_cluster = 0; | 3025 | get_default_free_blocks_flags(inode)); |
3009 | } | 3026 | } |
3010 | 3027 | ||
3011 | /* TODO: flexible tree reduction should be here */ | 3028 | /* TODO: flexible tree reduction should be here */ |
@@ -4267,6 +4284,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4267 | ext4_io_end_t *io = ext4_inode_aio(inode); | 4284 | ext4_io_end_t *io = ext4_inode_aio(inode); |
4268 | ext4_lblk_t cluster_offset; | 4285 | ext4_lblk_t cluster_offset; |
4269 | int set_unwritten = 0; | 4286 | int set_unwritten = 0; |
4287 | bool map_from_cluster = false; | ||
4270 | 4288 | ||
4271 | ext_debug("blocks %u/%u requested for inode %lu\n", | 4289 | ext_debug("blocks %u/%u requested for inode %lu\n", |
4272 | map->m_lblk, map->m_len, inode->i_ino); | 4290 | map->m_lblk, map->m_len, inode->i_ino); |
@@ -4343,10 +4361,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4343 | } | 4361 | } |
4344 | } | 4362 | } |
4345 | 4363 | ||
4346 | if ((sbi->s_cluster_ratio > 1) && | ||
4347 | ext4_find_delalloc_cluster(inode, map->m_lblk)) | ||
4348 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
4349 | |||
4350 | /* | 4364 | /* |
4351 | * requested block isn't allocated yet; | 4365 | * requested block isn't allocated yet; |
4352 | * we couldn't try to create block if create flag is zero | 4366 | * we couldn't try to create block if create flag is zero |
@@ -4356,15 +4370,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4356 | * put just found gap into cache to speed up | 4370 | * put just found gap into cache to speed up |
4357 | * subsequent requests | 4371 | * subsequent requests |
4358 | */ | 4372 | */ |
4359 | if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0) | 4373 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); |
4360 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); | ||
4361 | goto out2; | 4374 | goto out2; |
4362 | } | 4375 | } |
4363 | 4376 | ||
4364 | /* | 4377 | /* |
4365 | * Okay, we need to do block allocation. | 4378 | * Okay, we need to do block allocation. |
4366 | */ | 4379 | */ |
4367 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
4368 | newex.ee_block = cpu_to_le32(map->m_lblk); | 4380 | newex.ee_block = cpu_to_le32(map->m_lblk); |
4369 | cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); | 4381 | cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); |
4370 | 4382 | ||
@@ -4376,7 +4388,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4376 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { | 4388 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { |
4377 | ar.len = allocated = map->m_len; | 4389 | ar.len = allocated = map->m_len; |
4378 | newblock = map->m_pblk; | 4390 | newblock = map->m_pblk; |
4379 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | 4391 | map_from_cluster = true; |
4380 | goto got_allocated_blocks; | 4392 | goto got_allocated_blocks; |
4381 | } | 4393 | } |
4382 | 4394 | ||
@@ -4397,7 +4409,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4397 | get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { | 4409 | get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { |
4398 | ar.len = allocated = map->m_len; | 4410 | ar.len = allocated = map->m_len; |
4399 | newblock = map->m_pblk; | 4411 | newblock = map->m_pblk; |
4400 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | 4412 | map_from_cluster = true; |
4401 | goto got_allocated_blocks; | 4413 | goto got_allocated_blocks; |
4402 | } | 4414 | } |
4403 | 4415 | ||
@@ -4523,7 +4535,7 @@ got_allocated_blocks: | |||
4523 | */ | 4535 | */ |
4524 | reserved_clusters = get_reserved_cluster_alloc(inode, | 4536 | reserved_clusters = get_reserved_cluster_alloc(inode, |
4525 | map->m_lblk, allocated); | 4537 | map->m_lblk, allocated); |
4526 | if (map->m_flags & EXT4_MAP_FROM_CLUSTER) { | 4538 | if (map_from_cluster) { |
4527 | if (reserved_clusters) { | 4539 | if (reserved_clusters) { |
4528 | /* | 4540 | /* |
4529 | * We have clusters reserved for this range. | 4541 | * We have clusters reserved for this range. |
@@ -4620,7 +4632,6 @@ out2: | |||
4620 | 4632 | ||
4621 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4633 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
4622 | err ? err : allocated); | 4634 | err ? err : allocated); |
4623 | ext4_es_lru_add(inode); | ||
4624 | return err ? err : allocated; | 4635 | return err ? err : allocated; |
4625 | } | 4636 | } |
4626 | 4637 | ||
@@ -5140,7 +5151,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5140 | if (ext4_has_inline_data(inode)) { | 5151 | if (ext4_has_inline_data(inode)) { |
5141 | int has_inline = 1; | 5152 | int has_inline = 1; |
5142 | 5153 | ||
5143 | error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline); | 5154 | error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline, |
5155 | start, len); | ||
5144 | 5156 | ||
5145 | if (has_inline) | 5157 | if (has_inline) |
5146 | return error; | 5158 | return error; |
@@ -5154,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5154 | 5166 | ||
5155 | /* fallback to generic here if not in extents fmt */ | 5167 | /* fallback to generic here if not in extents fmt */ |
5156 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 5168 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
5157 | return generic_block_fiemap(inode, fieinfo, start, len, | 5169 | return __generic_block_fiemap(inode, fieinfo, start, len, |
5158 | ext4_get_block); | 5170 | ext4_get_block); |
5159 | 5171 | ||
5160 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) | 5172 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) |
5161 | return -EBADR; | 5173 | return -EBADR; |
@@ -5179,7 +5191,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5179 | error = ext4_fill_fiemap_extents(inode, start_blk, | 5191 | error = ext4_fill_fiemap_extents(inode, start_blk, |
5180 | len_blks, fieinfo); | 5192 | len_blks, fieinfo); |
5181 | } | 5193 | } |
5182 | ext4_es_lru_add(inode); | ||
5183 | return error; | 5194 | return error; |
5184 | } | 5195 | } |
5185 | 5196 | ||
@@ -5239,8 +5250,6 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, | |||
5239 | return -EIO; | 5250 | return -EIO; |
5240 | 5251 | ||
5241 | ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); | 5252 | ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); |
5242 | if (!ex_last) | ||
5243 | return -EIO; | ||
5244 | 5253 | ||
5245 | err = ext4_access_path(handle, inode, path + depth); | 5254 | err = ext4_access_path(handle, inode, path + depth); |
5246 | if (err) | 5255 | if (err) |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 94e7855ae71b..e04d45733976 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -147,10 +147,9 @@ static struct kmem_cache *ext4_es_cachep; | |||
147 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); | 147 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); |
148 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 148 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
149 | ext4_lblk_t end); | 149 | ext4_lblk_t end); |
150 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 150 | static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); |
151 | int nr_to_scan); | 151 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
152 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | 152 | struct ext4_inode_info *locked_ei); |
153 | struct ext4_inode_info *locked_ei); | ||
154 | 153 | ||
155 | int __init ext4_init_es(void) | 154 | int __init ext4_init_es(void) |
156 | { | 155 | { |
@@ -298,6 +297,36 @@ out: | |||
298 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 297 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
299 | } | 298 | } |
300 | 299 | ||
300 | static void ext4_es_list_add(struct inode *inode) | ||
301 | { | ||
302 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
303 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
304 | |||
305 | if (!list_empty(&ei->i_es_list)) | ||
306 | return; | ||
307 | |||
308 | spin_lock(&sbi->s_es_lock); | ||
309 | if (list_empty(&ei->i_es_list)) { | ||
310 | list_add_tail(&ei->i_es_list, &sbi->s_es_list); | ||
311 | sbi->s_es_nr_inode++; | ||
312 | } | ||
313 | spin_unlock(&sbi->s_es_lock); | ||
314 | } | ||
315 | |||
316 | static void ext4_es_list_del(struct inode *inode) | ||
317 | { | ||
318 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
319 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
320 | |||
321 | spin_lock(&sbi->s_es_lock); | ||
322 | if (!list_empty(&ei->i_es_list)) { | ||
323 | list_del_init(&ei->i_es_list); | ||
324 | sbi->s_es_nr_inode--; | ||
325 | WARN_ON_ONCE(sbi->s_es_nr_inode < 0); | ||
326 | } | ||
327 | spin_unlock(&sbi->s_es_lock); | ||
328 | } | ||
329 | |||
301 | static struct extent_status * | 330 | static struct extent_status * |
302 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | 331 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, |
303 | ext4_fsblk_t pblk) | 332 | ext4_fsblk_t pblk) |
@@ -314,9 +343,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | |||
314 | * We don't count delayed extent because we never try to reclaim them | 343 | * We don't count delayed extent because we never try to reclaim them |
315 | */ | 344 | */ |
316 | if (!ext4_es_is_delayed(es)) { | 345 | if (!ext4_es_is_delayed(es)) { |
317 | EXT4_I(inode)->i_es_lru_nr++; | 346 | if (!EXT4_I(inode)->i_es_shk_nr++) |
347 | ext4_es_list_add(inode); | ||
318 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> | 348 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> |
319 | s_es_stats.es_stats_lru_cnt); | 349 | s_es_stats.es_stats_shk_cnt); |
320 | } | 350 | } |
321 | 351 | ||
322 | EXT4_I(inode)->i_es_all_nr++; | 352 | EXT4_I(inode)->i_es_all_nr++; |
@@ -330,12 +360,13 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
330 | EXT4_I(inode)->i_es_all_nr--; | 360 | EXT4_I(inode)->i_es_all_nr--; |
331 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | 361 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); |
332 | 362 | ||
333 | /* Decrease the lru counter when this es is not delayed */ | 363 | /* Decrease the shrink counter when this es is not delayed */ |
334 | if (!ext4_es_is_delayed(es)) { | 364 | if (!ext4_es_is_delayed(es)) { |
335 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | 365 | BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); |
336 | EXT4_I(inode)->i_es_lru_nr--; | 366 | if (!--EXT4_I(inode)->i_es_shk_nr) |
367 | ext4_es_list_del(inode); | ||
337 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> | 368 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> |
338 | s_es_stats.es_stats_lru_cnt); | 369 | s_es_stats.es_stats_shk_cnt); |
339 | } | 370 | } |
340 | 371 | ||
341 | kmem_cache_free(ext4_es_cachep, es); | 372 | kmem_cache_free(ext4_es_cachep, es); |
@@ -351,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
351 | static int ext4_es_can_be_merged(struct extent_status *es1, | 382 | static int ext4_es_can_be_merged(struct extent_status *es1, |
352 | struct extent_status *es2) | 383 | struct extent_status *es2) |
353 | { | 384 | { |
354 | if (ext4_es_status(es1) != ext4_es_status(es2)) | 385 | if (ext4_es_type(es1) != ext4_es_type(es2)) |
355 | return 0; | 386 | return 0; |
356 | 387 | ||
357 | if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { | 388 | if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { |
@@ -394,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es) | |||
394 | es1 = rb_entry(node, struct extent_status, rb_node); | 425 | es1 = rb_entry(node, struct extent_status, rb_node); |
395 | if (ext4_es_can_be_merged(es1, es)) { | 426 | if (ext4_es_can_be_merged(es1, es)) { |
396 | es1->es_len += es->es_len; | 427 | es1->es_len += es->es_len; |
428 | if (ext4_es_is_referenced(es)) | ||
429 | ext4_es_set_referenced(es1); | ||
397 | rb_erase(&es->rb_node, &tree->root); | 430 | rb_erase(&es->rb_node, &tree->root); |
398 | ext4_es_free_extent(inode, es); | 431 | ext4_es_free_extent(inode, es); |
399 | es = es1; | 432 | es = es1; |
@@ -416,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) | |||
416 | es1 = rb_entry(node, struct extent_status, rb_node); | 449 | es1 = rb_entry(node, struct extent_status, rb_node); |
417 | if (ext4_es_can_be_merged(es, es1)) { | 450 | if (ext4_es_can_be_merged(es, es1)) { |
418 | es->es_len += es1->es_len; | 451 | es->es_len += es1->es_len; |
452 | if (ext4_es_is_referenced(es1)) | ||
453 | ext4_es_set_referenced(es); | ||
419 | rb_erase(node, &tree->root); | 454 | rb_erase(node, &tree->root); |
420 | ext4_es_free_extent(inode, es1); | 455 | ext4_es_free_extent(inode, es1); |
421 | } | 456 | } |
@@ -683,8 +718,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
683 | goto error; | 718 | goto error; |
684 | retry: | 719 | retry: |
685 | err = __es_insert_extent(inode, &newes); | 720 | err = __es_insert_extent(inode, &newes); |
686 | if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 721 | if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), |
687 | EXT4_I(inode))) | 722 | 128, EXT4_I(inode))) |
688 | goto retry; | 723 | goto retry; |
689 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | 724 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) |
690 | err = 0; | 725 | err = 0; |
@@ -782,6 +817,8 @@ out: | |||
782 | es->es_lblk = es1->es_lblk; | 817 | es->es_lblk = es1->es_lblk; |
783 | es->es_len = es1->es_len; | 818 | es->es_len = es1->es_len; |
784 | es->es_pblk = es1->es_pblk; | 819 | es->es_pblk = es1->es_pblk; |
820 | if (!ext4_es_is_referenced(es)) | ||
821 | ext4_es_set_referenced(es); | ||
785 | stats->es_stats_cache_hits++; | 822 | stats->es_stats_cache_hits++; |
786 | } else { | 823 | } else { |
787 | stats->es_stats_cache_misses++; | 824 | stats->es_stats_cache_misses++; |
@@ -841,8 +878,8 @@ retry: | |||
841 | es->es_lblk = orig_es.es_lblk; | 878 | es->es_lblk = orig_es.es_lblk; |
842 | es->es_len = orig_es.es_len; | 879 | es->es_len = orig_es.es_len; |
843 | if ((err == -ENOMEM) && | 880 | if ((err == -ENOMEM) && |
844 | __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 881 | __es_shrink(EXT4_SB(inode->i_sb), |
845 | EXT4_I(inode))) | 882 | 128, EXT4_I(inode))) |
846 | goto retry; | 883 | goto retry; |
847 | goto out; | 884 | goto out; |
848 | } | 885 | } |
@@ -914,6 +951,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
914 | end = lblk + len - 1; | 951 | end = lblk + len - 1; |
915 | BUG_ON(end < lblk); | 952 | BUG_ON(end < lblk); |
916 | 953 | ||
954 | /* | ||
955 | * ext4_clear_inode() depends on us taking i_es_lock unconditionally | ||
956 | * so that we are sure __es_shrink() is done with the inode before it | ||
957 | * is reclaimed. | ||
958 | */ | ||
917 | write_lock(&EXT4_I(inode)->i_es_lock); | 959 | write_lock(&EXT4_I(inode)->i_es_lock); |
918 | err = __es_remove_extent(inode, lblk, end); | 960 | err = __es_remove_extent(inode, lblk, end); |
919 | write_unlock(&EXT4_I(inode)->i_es_lock); | 961 | write_unlock(&EXT4_I(inode)->i_es_lock); |
@@ -921,114 +963,75 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
921 | return err; | 963 | return err; |
922 | } | 964 | } |
923 | 965 | ||
924 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | 966 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
925 | struct list_head *b) | 967 | struct ext4_inode_info *locked_ei) |
926 | { | ||
927 | struct ext4_inode_info *eia, *eib; | ||
928 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
929 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
930 | |||
931 | if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
932 | !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
933 | return 1; | ||
934 | if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
935 | ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
936 | return -1; | ||
937 | if (eia->i_touch_when == eib->i_touch_when) | ||
938 | return 0; | ||
939 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
940 | return 1; | ||
941 | else | ||
942 | return -1; | ||
943 | } | ||
944 | |||
945 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
946 | struct ext4_inode_info *locked_ei) | ||
947 | { | 968 | { |
948 | struct ext4_inode_info *ei; | 969 | struct ext4_inode_info *ei; |
949 | struct ext4_es_stats *es_stats; | 970 | struct ext4_es_stats *es_stats; |
950 | struct list_head *cur, *tmp; | ||
951 | LIST_HEAD(skipped); | ||
952 | ktime_t start_time; | 971 | ktime_t start_time; |
953 | u64 scan_time; | 972 | u64 scan_time; |
973 | int nr_to_walk; | ||
954 | int nr_shrunk = 0; | 974 | int nr_shrunk = 0; |
955 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 975 | int retried = 0, nr_skipped = 0; |
956 | 976 | ||
957 | es_stats = &sbi->s_es_stats; | 977 | es_stats = &sbi->s_es_stats; |
958 | start_time = ktime_get(); | 978 | start_time = ktime_get(); |
959 | spin_lock(&sbi->s_es_lru_lock); | ||
960 | 979 | ||
961 | retry: | 980 | retry: |
962 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 981 | spin_lock(&sbi->s_es_lock); |
963 | int shrunk; | 982 | nr_to_walk = sbi->s_es_nr_inode; |
964 | 983 | while (nr_to_walk-- > 0) { | |
965 | /* | 984 | if (list_empty(&sbi->s_es_list)) { |
966 | * If we have already reclaimed all extents from extent | 985 | spin_unlock(&sbi->s_es_lock); |
967 | * status tree, just stop the loop immediately. | 986 | goto out; |
968 | */ | 987 | } |
969 | if (percpu_counter_read_positive( | 988 | ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, |
970 | &es_stats->es_stats_lru_cnt) == 0) | 989 | i_es_list); |
971 | break; | 990 | /* Move the inode to the tail */ |
972 | 991 | list_move_tail(&ei->i_es_list, &sbi->s_es_list); | |
973 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | ||
974 | 992 | ||
975 | /* | 993 | /* |
976 | * Skip the inode that is newer than the last_sorted | 994 | * Normally we try hard to avoid shrinking precached inodes, |
977 | * time. Normally we try hard to avoid shrinking | 995 | * but we will as a last resort. |
978 | * precached inodes, but we will as a last resort. | ||
979 | */ | 996 | */ |
980 | if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || | 997 | if (!retried && ext4_test_inode_state(&ei->vfs_inode, |
981 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | 998 | EXT4_STATE_EXT_PRECACHED)) { |
982 | EXT4_STATE_EXT_PRECACHED))) { | ||
983 | nr_skipped++; | 999 | nr_skipped++; |
984 | list_move_tail(cur, &skipped); | ||
985 | continue; | 1000 | continue; |
986 | } | 1001 | } |
987 | 1002 | ||
988 | if (ei->i_es_lru_nr == 0 || ei == locked_ei || | 1003 | if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) { |
989 | !write_trylock(&ei->i_es_lock)) | 1004 | nr_skipped++; |
990 | continue; | 1005 | continue; |
1006 | } | ||
1007 | /* | ||
1008 | * Now we hold i_es_lock which protects us from inode reclaim | ||
1009 | * freeing inode under us | ||
1010 | */ | ||
1011 | spin_unlock(&sbi->s_es_lock); | ||
991 | 1012 | ||
992 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | 1013 | nr_shrunk += es_reclaim_extents(ei, &nr_to_scan); |
993 | if (ei->i_es_lru_nr == 0) | ||
994 | list_del_init(&ei->i_es_lru); | ||
995 | write_unlock(&ei->i_es_lock); | 1014 | write_unlock(&ei->i_es_lock); |
996 | 1015 | ||
997 | nr_shrunk += shrunk; | 1016 | if (nr_to_scan <= 0) |
998 | nr_to_scan -= shrunk; | 1017 | goto out; |
999 | if (nr_to_scan == 0) | 1018 | spin_lock(&sbi->s_es_lock); |
1000 | break; | ||
1001 | } | 1019 | } |
1002 | 1020 | spin_unlock(&sbi->s_es_lock); | |
1003 | /* Move the newer inodes into the tail of the LRU list. */ | ||
1004 | list_splice_tail(&skipped, &sbi->s_es_lru); | ||
1005 | INIT_LIST_HEAD(&skipped); | ||
1006 | 1021 | ||
1007 | /* | 1022 | /* |
1008 | * If we skipped any inodes, and we weren't able to make any | 1023 | * If we skipped any inodes, and we weren't able to make any |
1009 | * forward progress, sort the list and try again. | 1024 | * forward progress, try again to scan precached inodes. |
1010 | */ | 1025 | */ |
1011 | if ((nr_shrunk == 0) && nr_skipped && !retried) { | 1026 | if ((nr_shrunk == 0) && nr_skipped && !retried) { |
1012 | retried++; | 1027 | retried++; |
1013 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
1014 | es_stats->es_stats_last_sorted = jiffies; | ||
1015 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | ||
1016 | i_es_lru); | ||
1017 | /* | ||
1018 | * If there are no non-precached inodes left on the | ||
1019 | * list, start releasing precached extents. | ||
1020 | */ | ||
1021 | if (ext4_test_inode_state(&ei->vfs_inode, | ||
1022 | EXT4_STATE_EXT_PRECACHED)) | ||
1023 | skip_precached = 0; | ||
1024 | goto retry; | 1028 | goto retry; |
1025 | } | 1029 | } |
1026 | 1030 | ||
1027 | spin_unlock(&sbi->s_es_lru_lock); | ||
1028 | |||
1029 | if (locked_ei && nr_shrunk == 0) | 1031 | if (locked_ei && nr_shrunk == 0) |
1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); | 1032 | nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan); |
1031 | 1033 | ||
1034 | out: | ||
1032 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | 1035 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
1033 | if (likely(es_stats->es_stats_scan_time)) | 1036 | if (likely(es_stats->es_stats_scan_time)) |
1034 | es_stats->es_stats_scan_time = (scan_time + | 1037 | es_stats->es_stats_scan_time = (scan_time + |
@@ -1043,7 +1046,7 @@ retry: | |||
1043 | else | 1046 | else |
1044 | es_stats->es_stats_shrunk = nr_shrunk; | 1047 | es_stats->es_stats_shrunk = nr_shrunk; |
1045 | 1048 | ||
1046 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, | 1049 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, |
1047 | nr_skipped, retried); | 1050 | nr_skipped, retried); |
1048 | return nr_shrunk; | 1051 | return nr_shrunk; |
1049 | } | 1052 | } |
@@ -1055,7 +1058,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, | |||
1055 | struct ext4_sb_info *sbi; | 1058 | struct ext4_sb_info *sbi; |
1056 | 1059 | ||
1057 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | 1060 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); |
1058 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1061 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
1059 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); | 1062 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); |
1060 | return nr; | 1063 | return nr; |
1061 | } | 1064 | } |
@@ -1068,13 +1071,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, | |||
1068 | int nr_to_scan = sc->nr_to_scan; | 1071 | int nr_to_scan = sc->nr_to_scan; |
1069 | int ret, nr_shrunk; | 1072 | int ret, nr_shrunk; |
1070 | 1073 | ||
1071 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1074 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
1072 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); | 1075 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); |
1073 | 1076 | ||
1074 | if (!nr_to_scan) | 1077 | if (!nr_to_scan) |
1075 | return ret; | 1078 | return ret; |
1076 | 1079 | ||
1077 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1080 | nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL); |
1078 | 1081 | ||
1079 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); | 1082 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); |
1080 | return nr_shrunk; | 1083 | return nr_shrunk; |
@@ -1102,28 +1105,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
1102 | return 0; | 1105 | return 0; |
1103 | 1106 | ||
1104 | /* here we just find an inode that has the max nr. of objects */ | 1107 | /* here we just find an inode that has the max nr. of objects */ |
1105 | spin_lock(&sbi->s_es_lru_lock); | 1108 | spin_lock(&sbi->s_es_lock); |
1106 | list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { | 1109 | list_for_each_entry(ei, &sbi->s_es_list, i_es_list) { |
1107 | inode_cnt++; | 1110 | inode_cnt++; |
1108 | if (max && max->i_es_all_nr < ei->i_es_all_nr) | 1111 | if (max && max->i_es_all_nr < ei->i_es_all_nr) |
1109 | max = ei; | 1112 | max = ei; |
1110 | else if (!max) | 1113 | else if (!max) |
1111 | max = ei; | 1114 | max = ei; |
1112 | } | 1115 | } |
1113 | spin_unlock(&sbi->s_es_lru_lock); | 1116 | spin_unlock(&sbi->s_es_lock); |
1114 | 1117 | ||
1115 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", | 1118 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", |
1116 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), | 1119 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), |
1117 | percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); | 1120 | percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); |
1118 | seq_printf(seq, " %lu/%lu cache hits/misses\n", | 1121 | seq_printf(seq, " %lu/%lu cache hits/misses\n", |
1119 | es_stats->es_stats_cache_hits, | 1122 | es_stats->es_stats_cache_hits, |
1120 | es_stats->es_stats_cache_misses); | 1123 | es_stats->es_stats_cache_misses); |
1121 | if (es_stats->es_stats_last_sorted != 0) | ||
1122 | seq_printf(seq, " %u ms last sorted interval\n", | ||
1123 | jiffies_to_msecs(jiffies - | ||
1124 | es_stats->es_stats_last_sorted)); | ||
1125 | if (inode_cnt) | 1124 | if (inode_cnt) |
1126 | seq_printf(seq, " %d inodes on lru list\n", inode_cnt); | 1125 | seq_printf(seq, " %d inodes on list\n", inode_cnt); |
1127 | 1126 | ||
1128 | seq_printf(seq, "average:\n %llu us scan time\n", | 1127 | seq_printf(seq, "average:\n %llu us scan time\n", |
1129 | div_u64(es_stats->es_stats_scan_time, 1000)); | 1128 | div_u64(es_stats->es_stats_scan_time, 1000)); |
@@ -1132,7 +1131,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
1132 | seq_printf(seq, | 1131 | seq_printf(seq, |
1133 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" | 1132 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" |
1134 | " %llu us max scan time\n", | 1133 | " %llu us max scan time\n", |
1135 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, | 1134 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr, |
1136 | div_u64(es_stats->es_stats_max_scan_time, 1000)); | 1135 | div_u64(es_stats->es_stats_max_scan_time, 1000)); |
1137 | 1136 | ||
1138 | return 0; | 1137 | return 0; |
@@ -1181,9 +1180,11 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1181 | { | 1180 | { |
1182 | int err; | 1181 | int err; |
1183 | 1182 | ||
1184 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1183 | /* Make sure we have enough bits for physical block number */ |
1185 | spin_lock_init(&sbi->s_es_lru_lock); | 1184 | BUILD_BUG_ON(ES_SHIFT < 48); |
1186 | sbi->s_es_stats.es_stats_last_sorted = 0; | 1185 | INIT_LIST_HEAD(&sbi->s_es_list); |
1186 | sbi->s_es_nr_inode = 0; | ||
1187 | spin_lock_init(&sbi->s_es_lock); | ||
1187 | sbi->s_es_stats.es_stats_shrunk = 0; | 1188 | sbi->s_es_stats.es_stats_shrunk = 0; |
1188 | sbi->s_es_stats.es_stats_cache_hits = 0; | 1189 | sbi->s_es_stats.es_stats_cache_hits = 0; |
1189 | sbi->s_es_stats.es_stats_cache_misses = 0; | 1190 | sbi->s_es_stats.es_stats_cache_misses = 0; |
@@ -1192,7 +1193,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); | 1193 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); |
1193 | if (err) | 1194 | if (err) |
1194 | return err; | 1195 | return err; |
1195 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); | 1196 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL); |
1196 | if (err) | 1197 | if (err) |
1197 | goto err1; | 1198 | goto err1; |
1198 | 1199 | ||
@@ -1210,7 +1211,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1210 | return 0; | 1211 | return 0; |
1211 | 1212 | ||
1212 | err2: | 1213 | err2: |
1213 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1214 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1214 | err1: | 1215 | err1: |
1215 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1216 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1216 | return err; | 1217 | return err; |
@@ -1221,71 +1222,83 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | |||
1221 | if (sbi->s_proc) | 1222 | if (sbi->s_proc) |
1222 | remove_proc_entry("es_shrinker_info", sbi->s_proc); | 1223 | remove_proc_entry("es_shrinker_info", sbi->s_proc); |
1223 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1225 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1225 | unregister_shrinker(&sbi->s_es_shrinker); | 1226 | unregister_shrinker(&sbi->s_es_shrinker); |
1226 | } | 1227 | } |
1227 | 1228 | ||
1228 | void ext4_es_lru_add(struct inode *inode) | 1229 | /* |
1230 | * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at | ||
1231 | * most *nr_to_scan extents, update *nr_to_scan accordingly. | ||
1232 | * | ||
1233 | * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan. | ||
1234 | * Increment *nr_shrunk by the number of reclaimed extents. Also update | ||
1235 | * ei->i_es_shrink_lblk to where we should continue scanning. | ||
1236 | */ | ||
1237 | static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, | ||
1238 | int *nr_to_scan, int *nr_shrunk) | ||
1229 | { | 1239 | { |
1230 | struct ext4_inode_info *ei = EXT4_I(inode); | 1240 | struct inode *inode = &ei->vfs_inode; |
1231 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1241 | struct ext4_es_tree *tree = &ei->i_es_tree; |
1232 | 1242 | struct extent_status *es; | |
1233 | ei->i_touch_when = jiffies; | 1243 | struct rb_node *node; |
1234 | |||
1235 | if (!list_empty(&ei->i_es_lru)) | ||
1236 | return; | ||
1237 | 1244 | ||
1238 | spin_lock(&sbi->s_es_lru_lock); | 1245 | es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); |
1239 | if (list_empty(&ei->i_es_lru)) | 1246 | if (!es) |
1240 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 1247 | goto out_wrap; |
1241 | spin_unlock(&sbi->s_es_lru_lock); | 1248 | node = &es->rb_node; |
1242 | } | 1249 | while (*nr_to_scan > 0) { |
1250 | if (es->es_lblk > end) { | ||
1251 | ei->i_es_shrink_lblk = end + 1; | ||
1252 | return 0; | ||
1253 | } | ||
1243 | 1254 | ||
1244 | void ext4_es_lru_del(struct inode *inode) | 1255 | (*nr_to_scan)--; |
1245 | { | 1256 | node = rb_next(&es->rb_node); |
1246 | struct ext4_inode_info *ei = EXT4_I(inode); | 1257 | /* |
1247 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1258 | * We can't reclaim delayed extent from status tree because |
1259 | * fiemap, bigallic, and seek_data/hole need to use it. | ||
1260 | */ | ||
1261 | if (ext4_es_is_delayed(es)) | ||
1262 | goto next; | ||
1263 | if (ext4_es_is_referenced(es)) { | ||
1264 | ext4_es_clear_referenced(es); | ||
1265 | goto next; | ||
1266 | } | ||
1248 | 1267 | ||
1249 | spin_lock(&sbi->s_es_lru_lock); | 1268 | rb_erase(&es->rb_node, &tree->root); |
1250 | if (!list_empty(&ei->i_es_lru)) | 1269 | ext4_es_free_extent(inode, es); |
1251 | list_del_init(&ei->i_es_lru); | 1270 | (*nr_shrunk)++; |
1252 | spin_unlock(&sbi->s_es_lru_lock); | 1271 | next: |
1272 | if (!node) | ||
1273 | goto out_wrap; | ||
1274 | es = rb_entry(node, struct extent_status, rb_node); | ||
1275 | } | ||
1276 | ei->i_es_shrink_lblk = es->es_lblk; | ||
1277 | return 1; | ||
1278 | out_wrap: | ||
1279 | ei->i_es_shrink_lblk = 0; | ||
1280 | return 0; | ||
1253 | } | 1281 | } |
1254 | 1282 | ||
1255 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 1283 | static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan) |
1256 | int nr_to_scan) | ||
1257 | { | 1284 | { |
1258 | struct inode *inode = &ei->vfs_inode; | 1285 | struct inode *inode = &ei->vfs_inode; |
1259 | struct ext4_es_tree *tree = &ei->i_es_tree; | 1286 | int nr_shrunk = 0; |
1260 | struct rb_node *node; | 1287 | ext4_lblk_t start = ei->i_es_shrink_lblk; |
1261 | struct extent_status *es; | ||
1262 | unsigned long nr_shrunk = 0; | ||
1263 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 1288 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
1264 | DEFAULT_RATELIMIT_BURST); | 1289 | DEFAULT_RATELIMIT_BURST); |
1265 | 1290 | ||
1266 | if (ei->i_es_lru_nr == 0) | 1291 | if (ei->i_es_shk_nr == 0) |
1267 | return 0; | 1292 | return 0; |
1268 | 1293 | ||
1269 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && | 1294 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && |
1270 | __ratelimit(&_rs)) | 1295 | __ratelimit(&_rs)) |
1271 | ext4_warning(inode->i_sb, "forced shrink of precached extents"); | 1296 | ext4_warning(inode->i_sb, "forced shrink of precached extents"); |
1272 | 1297 | ||
1273 | node = rb_first(&tree->root); | 1298 | if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) && |
1274 | while (node != NULL) { | 1299 | start != 0) |
1275 | es = rb_entry(node, struct extent_status, rb_node); | 1300 | es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk); |
1276 | node = rb_next(&es->rb_node); | 1301 | |
1277 | /* | 1302 | ei->i_es_tree.cache_es = NULL; |
1278 | * We can't reclaim delayed extent from status tree because | ||
1279 | * fiemap, bigallic, and seek_data/hole need to use it. | ||
1280 | */ | ||
1281 | if (!ext4_es_is_delayed(es)) { | ||
1282 | rb_erase(&es->rb_node, &tree->root); | ||
1283 | ext4_es_free_extent(inode, es); | ||
1284 | nr_shrunk++; | ||
1285 | if (--nr_to_scan == 0) | ||
1286 | break; | ||
1287 | } | ||
1288 | } | ||
1289 | tree->cache_es = NULL; | ||
1290 | return nr_shrunk; | 1303 | return nr_shrunk; |
1291 | } | 1304 | } |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index efd5f970b501..691b52613ce4 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -29,25 +29,28 @@ | |||
29 | /* | 29 | /* |
30 | * These flags live in the high bits of extent_status.es_pblk | 30 | * These flags live in the high bits of extent_status.es_pblk |
31 | */ | 31 | */ |
32 | #define ES_SHIFT 60 | 32 | enum { |
33 | 33 | ES_WRITTEN_B, | |
34 | #define EXTENT_STATUS_WRITTEN (1 << 3) | 34 | ES_UNWRITTEN_B, |
35 | #define EXTENT_STATUS_UNWRITTEN (1 << 2) | 35 | ES_DELAYED_B, |
36 | #define EXTENT_STATUS_DELAYED (1 << 1) | 36 | ES_HOLE_B, |
37 | #define EXTENT_STATUS_HOLE (1 << 0) | 37 | ES_REFERENCED_B, |
38 | ES_FLAGS | ||
39 | }; | ||
38 | 40 | ||
39 | #define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ | 41 | #define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS) |
40 | EXTENT_STATUS_UNWRITTEN | \ | 42 | #define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT) |
41 | EXTENT_STATUS_DELAYED | \ | ||
42 | EXTENT_STATUS_HOLE) | ||
43 | 43 | ||
44 | #define ES_WRITTEN (1ULL << 63) | 44 | #define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B) |
45 | #define ES_UNWRITTEN (1ULL << 62) | 45 | #define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B) |
46 | #define ES_DELAYED (1ULL << 61) | 46 | #define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B) |
47 | #define ES_HOLE (1ULL << 60) | 47 | #define EXTENT_STATUS_HOLE (1 << ES_HOLE_B) |
48 | #define EXTENT_STATUS_REFERENCED (1 << ES_REFERENCED_B) | ||
48 | 49 | ||
49 | #define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \ | 50 | #define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \ |
50 | ES_DELAYED | ES_HOLE) | 51 | EXTENT_STATUS_UNWRITTEN | \ |
52 | EXTENT_STATUS_DELAYED | \ | ||
53 | EXTENT_STATUS_HOLE) << ES_SHIFT) | ||
51 | 54 | ||
52 | struct ext4_sb_info; | 55 | struct ext4_sb_info; |
53 | struct ext4_extent; | 56 | struct ext4_extent; |
@@ -65,14 +68,13 @@ struct ext4_es_tree { | |||
65 | }; | 68 | }; |
66 | 69 | ||
67 | struct ext4_es_stats { | 70 | struct ext4_es_stats { |
68 | unsigned long es_stats_last_sorted; | ||
69 | unsigned long es_stats_shrunk; | 71 | unsigned long es_stats_shrunk; |
70 | unsigned long es_stats_cache_hits; | 72 | unsigned long es_stats_cache_hits; |
71 | unsigned long es_stats_cache_misses; | 73 | unsigned long es_stats_cache_misses; |
72 | u64 es_stats_scan_time; | 74 | u64 es_stats_scan_time; |
73 | u64 es_stats_max_scan_time; | 75 | u64 es_stats_max_scan_time; |
74 | struct percpu_counter es_stats_all_cnt; | 76 | struct percpu_counter es_stats_all_cnt; |
75 | struct percpu_counter es_stats_lru_cnt; | 77 | struct percpu_counter es_stats_shk_cnt; |
76 | }; | 78 | }; |
77 | 79 | ||
78 | extern int __init ext4_init_es(void); | 80 | extern int __init ext4_init_es(void); |
@@ -93,29 +95,49 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode, | |||
93 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | 95 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, |
94 | struct extent_status *es); | 96 | struct extent_status *es); |
95 | 97 | ||
98 | static inline unsigned int ext4_es_status(struct extent_status *es) | ||
99 | { | ||
100 | return es->es_pblk >> ES_SHIFT; | ||
101 | } | ||
102 | |||
103 | static inline unsigned int ext4_es_type(struct extent_status *es) | ||
104 | { | ||
105 | return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT; | ||
106 | } | ||
107 | |||
96 | static inline int ext4_es_is_written(struct extent_status *es) | 108 | static inline int ext4_es_is_written(struct extent_status *es) |
97 | { | 109 | { |
98 | return (es->es_pblk & ES_WRITTEN) != 0; | 110 | return (ext4_es_type(es) & EXTENT_STATUS_WRITTEN) != 0; |
99 | } | 111 | } |
100 | 112 | ||
101 | static inline int ext4_es_is_unwritten(struct extent_status *es) | 113 | static inline int ext4_es_is_unwritten(struct extent_status *es) |
102 | { | 114 | { |
103 | return (es->es_pblk & ES_UNWRITTEN) != 0; | 115 | return (ext4_es_type(es) & EXTENT_STATUS_UNWRITTEN) != 0; |
104 | } | 116 | } |
105 | 117 | ||
106 | static inline int ext4_es_is_delayed(struct extent_status *es) | 118 | static inline int ext4_es_is_delayed(struct extent_status *es) |
107 | { | 119 | { |
108 | return (es->es_pblk & ES_DELAYED) != 0; | 120 | return (ext4_es_type(es) & EXTENT_STATUS_DELAYED) != 0; |
109 | } | 121 | } |
110 | 122 | ||
111 | static inline int ext4_es_is_hole(struct extent_status *es) | 123 | static inline int ext4_es_is_hole(struct extent_status *es) |
112 | { | 124 | { |
113 | return (es->es_pblk & ES_HOLE) != 0; | 125 | return (ext4_es_type(es) & EXTENT_STATUS_HOLE) != 0; |
114 | } | 126 | } |
115 | 127 | ||
116 | static inline unsigned int ext4_es_status(struct extent_status *es) | 128 | static inline void ext4_es_set_referenced(struct extent_status *es) |
117 | { | 129 | { |
118 | return es->es_pblk >> ES_SHIFT; | 130 | es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT; |
131 | } | ||
132 | |||
133 | static inline void ext4_es_clear_referenced(struct extent_status *es) | ||
134 | { | ||
135 | es->es_pblk &= ~(((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT); | ||
136 | } | ||
137 | |||
138 | static inline int ext4_es_is_referenced(struct extent_status *es) | ||
139 | { | ||
140 | return (ext4_es_status(es) & EXTENT_STATUS_REFERENCED) != 0; | ||
119 | } | 141 | } |
120 | 142 | ||
121 | static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) | 143 | static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) |
@@ -135,23 +157,19 @@ static inline void ext4_es_store_pblock(struct extent_status *es, | |||
135 | static inline void ext4_es_store_status(struct extent_status *es, | 157 | static inline void ext4_es_store_status(struct extent_status *es, |
136 | unsigned int status) | 158 | unsigned int status) |
137 | { | 159 | { |
138 | es->es_pblk = (((ext4_fsblk_t) | 160 | es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) | |
139 | (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | | 161 | (es->es_pblk & ~ES_MASK); |
140 | (es->es_pblk & ~ES_MASK)); | ||
141 | } | 162 | } |
142 | 163 | ||
143 | static inline void ext4_es_store_pblock_status(struct extent_status *es, | 164 | static inline void ext4_es_store_pblock_status(struct extent_status *es, |
144 | ext4_fsblk_t pb, | 165 | ext4_fsblk_t pb, |
145 | unsigned int status) | 166 | unsigned int status) |
146 | { | 167 | { |
147 | es->es_pblk = (((ext4_fsblk_t) | 168 | es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) | |
148 | (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | | 169 | (pb & ~ES_MASK); |
149 | (pb & ~ES_MASK)); | ||
150 | } | 170 | } |
151 | 171 | ||
152 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 172 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
153 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); | 173 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
154 | extern void ext4_es_lru_add(struct inode *inode); | ||
155 | extern void ext4_es_lru_del(struct inode *inode); | ||
156 | 174 | ||
157 | #endif /* _EXT4_EXTENTS_STATUS_H */ | 175 | #endif /* _EXT4_EXTENTS_STATUS_H */ |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8131be8c0af3..513c12cf444c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
273 | * we determine this extent as a data or a hole according to whether the | 273 | * we determine this extent as a data or a hole according to whether the |
274 | * page cache has data or not. | 274 | * page cache has data or not. |
275 | */ | 275 | */ |
276 | static int ext4_find_unwritten_pgoff(struct inode *inode, | 276 | static int ext4_find_unwritten_pgoff(struct inode *inode, int whence, |
277 | int whence, | 277 | loff_t endoff, loff_t *offset) |
278 | struct ext4_map_blocks *map, | ||
279 | loff_t *offset) | ||
280 | { | 278 | { |
281 | struct pagevec pvec; | 279 | struct pagevec pvec; |
282 | unsigned int blkbits; | ||
283 | pgoff_t index; | 280 | pgoff_t index; |
284 | pgoff_t end; | 281 | pgoff_t end; |
285 | loff_t endoff; | ||
286 | loff_t startoff; | 282 | loff_t startoff; |
287 | loff_t lastoff; | 283 | loff_t lastoff; |
288 | int found = 0; | 284 | int found = 0; |
289 | 285 | ||
290 | blkbits = inode->i_sb->s_blocksize_bits; | ||
291 | startoff = *offset; | 286 | startoff = *offset; |
292 | lastoff = startoff; | 287 | lastoff = startoff; |
293 | endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; | 288 | |
294 | 289 | ||
295 | index = startoff >> PAGE_CACHE_SHIFT; | 290 | index = startoff >> PAGE_CACHE_SHIFT; |
296 | end = endoff >> PAGE_CACHE_SHIFT; | 291 | end = endoff >> PAGE_CACHE_SHIFT; |
@@ -408,147 +403,144 @@ out: | |||
408 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | 403 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) |
409 | { | 404 | { |
410 | struct inode *inode = file->f_mapping->host; | 405 | struct inode *inode = file->f_mapping->host; |
411 | struct ext4_map_blocks map; | 406 | struct fiemap_extent_info fie; |
412 | struct extent_status es; | 407 | struct fiemap_extent ext[2]; |
413 | ext4_lblk_t start, last, end; | 408 | loff_t next; |
414 | loff_t dataoff, isize; | 409 | int i, ret = 0; |
415 | int blkbits; | ||
416 | int ret = 0; | ||
417 | 410 | ||
418 | mutex_lock(&inode->i_mutex); | 411 | mutex_lock(&inode->i_mutex); |
419 | 412 | if (offset >= inode->i_size) { | |
420 | isize = i_size_read(inode); | ||
421 | if (offset >= isize) { | ||
422 | mutex_unlock(&inode->i_mutex); | 413 | mutex_unlock(&inode->i_mutex); |
423 | return -ENXIO; | 414 | return -ENXIO; |
424 | } | 415 | } |
425 | 416 | fie.fi_flags = 0; | |
426 | blkbits = inode->i_sb->s_blocksize_bits; | 417 | fie.fi_extents_max = 2; |
427 | start = offset >> blkbits; | 418 | fie.fi_extents_start = (struct fiemap_extent __user *) &ext; |
428 | last = start; | 419 | while (1) { |
429 | end = isize >> blkbits; | 420 | mm_segment_t old_fs = get_fs(); |
430 | dataoff = offset; | 421 | |
431 | 422 | fie.fi_extents_mapped = 0; | |
432 | do { | 423 | memset(ext, 0, sizeof(*ext) * fie.fi_extents_max); |
433 | map.m_lblk = last; | 424 | |
434 | map.m_len = end - last + 1; | 425 | set_fs(get_ds()); |
435 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 426 | ret = ext4_fiemap(inode, &fie, offset, maxsize - offset); |
436 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 427 | set_fs(old_fs); |
437 | if (last != start) | 428 | if (ret) |
438 | dataoff = (loff_t)last << blkbits; | ||
439 | break; | 429 | break; |
440 | } | ||
441 | 430 | ||
442 | /* | 431 | /* No extents found, EOF */ |
443 | * If there is a delay extent at this offset, | 432 | if (!fie.fi_extents_mapped) { |
444 | * it will be as a data. | 433 | ret = -ENXIO; |
445 | */ | ||
446 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | ||
447 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | ||
448 | if (last != start) | ||
449 | dataoff = (loff_t)last << blkbits; | ||
450 | break; | 434 | break; |
451 | } | 435 | } |
436 | for (i = 0; i < fie.fi_extents_mapped; i++) { | ||
437 | next = (loff_t)(ext[i].fe_length + ext[i].fe_logical); | ||
452 | 438 | ||
453 | /* | 439 | if (offset < (loff_t)ext[i].fe_logical) |
454 | * If there is a unwritten extent at this offset, | 440 | offset = (loff_t)ext[i].fe_logical; |
455 | * it will be as a data or a hole according to page | 441 | /* |
456 | * cache that has data or not. | 442 | * If extent is not unwritten, then it contains valid |
457 | */ | 443 | * data, mapped or delayed. |
458 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | 444 | */ |
459 | int unwritten; | 445 | if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) |
460 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, | 446 | goto out; |
461 | &map, &dataoff); | ||
462 | if (unwritten) | ||
463 | break; | ||
464 | } | ||
465 | 447 | ||
466 | last++; | 448 | /* |
467 | dataoff = (loff_t)last << blkbits; | 449 | * If there is a unwritten extent at this offset, |
468 | } while (last <= end); | 450 | * it will be as a data or a hole according to page |
451 | * cache that has data or not. | ||
452 | */ | ||
453 | if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, | ||
454 | next, &offset)) | ||
455 | goto out; | ||
469 | 456 | ||
457 | if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) { | ||
458 | ret = -ENXIO; | ||
459 | goto out; | ||
460 | } | ||
461 | offset = next; | ||
462 | } | ||
463 | } | ||
464 | if (offset > inode->i_size) | ||
465 | offset = inode->i_size; | ||
466 | out: | ||
470 | mutex_unlock(&inode->i_mutex); | 467 | mutex_unlock(&inode->i_mutex); |
468 | if (ret) | ||
469 | return ret; | ||
471 | 470 | ||
472 | if (dataoff > isize) | 471 | return vfs_setpos(file, offset, maxsize); |
473 | return -ENXIO; | ||
474 | |||
475 | return vfs_setpos(file, dataoff, maxsize); | ||
476 | } | 472 | } |
477 | 473 | ||
478 | /* | 474 | /* |
479 | * ext4_seek_hole() retrieves the offset for SEEK_HOLE. | 475 | * ext4_seek_hole() retrieves the offset for SEEK_HOLE |
480 | */ | 476 | */ |
481 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | 477 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) |
482 | { | 478 | { |
483 | struct inode *inode = file->f_mapping->host; | 479 | struct inode *inode = file->f_mapping->host; |
484 | struct ext4_map_blocks map; | 480 | struct fiemap_extent_info fie; |
485 | struct extent_status es; | 481 | struct fiemap_extent ext[2]; |
486 | ext4_lblk_t start, last, end; | 482 | loff_t next; |
487 | loff_t holeoff, isize; | 483 | int i, ret = 0; |
488 | int blkbits; | ||
489 | int ret = 0; | ||
490 | 484 | ||
491 | mutex_lock(&inode->i_mutex); | 485 | mutex_lock(&inode->i_mutex); |
492 | 486 | if (offset >= inode->i_size) { | |
493 | isize = i_size_read(inode); | ||
494 | if (offset >= isize) { | ||
495 | mutex_unlock(&inode->i_mutex); | 487 | mutex_unlock(&inode->i_mutex); |
496 | return -ENXIO; | 488 | return -ENXIO; |
497 | } | 489 | } |
498 | 490 | ||
499 | blkbits = inode->i_sb->s_blocksize_bits; | 491 | fie.fi_flags = 0; |
500 | start = offset >> blkbits; | 492 | fie.fi_extents_max = 2; |
501 | last = start; | 493 | fie.fi_extents_start = (struct fiemap_extent __user *)&ext; |
502 | end = isize >> blkbits; | 494 | while (1) { |
503 | holeoff = offset; | 495 | mm_segment_t old_fs = get_fs(); |
504 | 496 | ||
505 | do { | 497 | fie.fi_extents_mapped = 0; |
506 | map.m_lblk = last; | 498 | memset(ext, 0, sizeof(*ext)); |
507 | map.m_len = end - last + 1; | ||
508 | ret = ext4_map_blocks(NULL, inode, &map, 0); | ||
509 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | ||
510 | last += ret; | ||
511 | holeoff = (loff_t)last << blkbits; | ||
512 | continue; | ||
513 | } | ||
514 | 499 | ||
515 | /* | 500 | set_fs(get_ds()); |
516 | * If there is a delay extent at this offset, | 501 | ret = ext4_fiemap(inode, &fie, offset, maxsize - offset); |
517 | * we will skip this extent. | 502 | set_fs(old_fs); |
518 | */ | 503 | if (ret) |
519 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 504 | break; |
520 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | ||
521 | last = es.es_lblk + es.es_len; | ||
522 | holeoff = (loff_t)last << blkbits; | ||
523 | continue; | ||
524 | } | ||
525 | 505 | ||
526 | /* | 506 | /* No extents found */ |
527 | * If there is a unwritten extent at this offset, | 507 | if (!fie.fi_extents_mapped) |
528 | * it will be as a data or a hole according to page | 508 | break; |
529 | * cache that has data or not. | 509 | |
530 | */ | 510 | for (i = 0; i < fie.fi_extents_mapped; i++) { |
531 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | 511 | next = (loff_t)(ext[i].fe_logical + ext[i].fe_length); |
532 | int unwritten; | 512 | /* |
533 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, | 513 | * If extent is not unwritten, then it contains valid |
534 | &map, &holeoff); | 514 | * data, mapped or delayed. |
535 | if (!unwritten) { | 515 | */ |
536 | last += ret; | 516 | if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) { |
537 | holeoff = (loff_t)last << blkbits; | 517 | if (offset < (loff_t)ext[i].fe_logical) |
518 | goto out; | ||
519 | offset = next; | ||
538 | continue; | 520 | continue; |
539 | } | 521 | } |
540 | } | 522 | /* |
541 | 523 | * If there is a unwritten extent at this offset, | |
542 | /* find a hole */ | 524 | * it will be as a data or a hole according to page |
543 | break; | 525 | * cache that has data or not. |
544 | } while (last <= end); | 526 | */ |
527 | if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE, | ||
528 | next, &offset)) | ||
529 | goto out; | ||
545 | 530 | ||
531 | offset = next; | ||
532 | if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) | ||
533 | goto out; | ||
534 | } | ||
535 | } | ||
536 | if (offset > inode->i_size) | ||
537 | offset = inode->i_size; | ||
538 | out: | ||
546 | mutex_unlock(&inode->i_mutex); | 539 | mutex_unlock(&inode->i_mutex); |
540 | if (ret) | ||
541 | return ret; | ||
547 | 542 | ||
548 | if (holeoff > isize) | 543 | return vfs_setpos(file, offset, maxsize); |
549 | holeoff = isize; | ||
550 | |||
551 | return vfs_setpos(file, holeoff, maxsize); | ||
552 | } | 544 | } |
553 | 545 | ||
554 | /* | 546 | /* |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3ea62695abce..4b143febf21f 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -811,8 +811,11 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, | |||
811 | ret = __block_write_begin(page, 0, inline_size, | 811 | ret = __block_write_begin(page, 0, inline_size, |
812 | ext4_da_get_block_prep); | 812 | ext4_da_get_block_prep); |
813 | if (ret) { | 813 | if (ret) { |
814 | up_read(&EXT4_I(inode)->xattr_sem); | ||
815 | unlock_page(page); | ||
816 | page_cache_release(page); | ||
814 | ext4_truncate_failed_write(inode); | 817 | ext4_truncate_failed_write(inode); |
815 | goto out; | 818 | return ret; |
816 | } | 819 | } |
817 | 820 | ||
818 | SetPageDirty(page); | 821 | SetPageDirty(page); |
@@ -870,6 +873,12 @@ retry_journal: | |||
870 | goto out_journal; | 873 | goto out_journal; |
871 | } | 874 | } |
872 | 875 | ||
876 | /* | ||
877 | * We cannot recurse into the filesystem as the transaction | ||
878 | * is already started. | ||
879 | */ | ||
880 | flags |= AOP_FLAG_NOFS; | ||
881 | |||
873 | if (ret == -ENOSPC) { | 882 | if (ret == -ENOSPC) { |
874 | ret = ext4_da_convert_inline_data_to_extent(mapping, | 883 | ret = ext4_da_convert_inline_data_to_extent(mapping, |
875 | inode, | 884 | inode, |
@@ -882,11 +891,6 @@ retry_journal: | |||
882 | goto out; | 891 | goto out; |
883 | } | 892 | } |
884 | 893 | ||
885 | /* | ||
886 | * We cannot recurse into the filesystem as the transaction | ||
887 | * is already started. | ||
888 | */ | ||
889 | flags |= AOP_FLAG_NOFS; | ||
890 | 894 | ||
891 | page = grab_cache_page_write_begin(mapping, 0, flags); | 895 | page = grab_cache_page_write_begin(mapping, 0, flags); |
892 | if (!page) { | 896 | if (!page) { |
@@ -1807,11 +1811,12 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) | |||
1807 | 1811 | ||
1808 | int ext4_inline_data_fiemap(struct inode *inode, | 1812 | int ext4_inline_data_fiemap(struct inode *inode, |
1809 | struct fiemap_extent_info *fieinfo, | 1813 | struct fiemap_extent_info *fieinfo, |
1810 | int *has_inline) | 1814 | int *has_inline, __u64 start, __u64 len) |
1811 | { | 1815 | { |
1812 | __u64 physical = 0; | 1816 | __u64 physical = 0; |
1813 | __u64 length; | 1817 | __u64 inline_len; |
1814 | __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST; | 1818 | __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | |
1819 | FIEMAP_EXTENT_LAST; | ||
1815 | int error = 0; | 1820 | int error = 0; |
1816 | struct ext4_iloc iloc; | 1821 | struct ext4_iloc iloc; |
1817 | 1822 | ||
@@ -1820,6 +1825,13 @@ int ext4_inline_data_fiemap(struct inode *inode, | |||
1820 | *has_inline = 0; | 1825 | *has_inline = 0; |
1821 | goto out; | 1826 | goto out; |
1822 | } | 1827 | } |
1828 | inline_len = min_t(size_t, ext4_get_inline_size(inode), | ||
1829 | i_size_read(inode)); | ||
1830 | if (start >= inline_len) | ||
1831 | goto out; | ||
1832 | if (start + len < inline_len) | ||
1833 | inline_len = start + len; | ||
1834 | inline_len -= start; | ||
1823 | 1835 | ||
1824 | error = ext4_get_inode_loc(inode, &iloc); | 1836 | error = ext4_get_inode_loc(inode, &iloc); |
1825 | if (error) | 1837 | if (error) |
@@ -1828,11 +1840,10 @@ int ext4_inline_data_fiemap(struct inode *inode, | |||
1828 | physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; | 1840 | physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; |
1829 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; | 1841 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; |
1830 | physical += offsetof(struct ext4_inode, i_block); | 1842 | physical += offsetof(struct ext4_inode, i_block); |
1831 | length = i_size_read(inode); | ||
1832 | 1843 | ||
1833 | if (physical) | 1844 | if (physical) |
1834 | error = fiemap_fill_next_extent(fieinfo, 0, physical, | 1845 | error = fiemap_fill_next_extent(fieinfo, start, physical, |
1835 | length, flags); | 1846 | inline_len, flags); |
1836 | brelse(iloc.bh); | 1847 | brelse(iloc.bh); |
1837 | out: | 1848 | out: |
1838 | up_read(&EXT4_I(inode)->xattr_sem); | 1849 | up_read(&EXT4_I(inode)->xattr_sem); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3356ab5395f4..5653fa42930b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, | |||
416 | } | 416 | } |
417 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 417 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
418 | up_read((&EXT4_I(inode)->i_data_sem)); | 418 | up_read((&EXT4_I(inode)->i_data_sem)); |
419 | /* | ||
420 | * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag | ||
421 | * because it shouldn't be marked in es_map->m_flags. | ||
422 | */ | ||
423 | map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY); | ||
424 | 419 | ||
425 | /* | 420 | /* |
426 | * We don't check m_len because extent will be collpased in status | 421 | * We don't check m_len because extent will be collpased in status |
@@ -491,7 +486,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
491 | 486 | ||
492 | /* Lookup extent status tree firstly */ | 487 | /* Lookup extent status tree firstly */ |
493 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 488 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
494 | ext4_es_lru_add(inode); | ||
495 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 489 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
496 | map->m_pblk = ext4_es_pblock(&es) + | 490 | map->m_pblk = ext4_es_pblock(&es) + |
497 | map->m_lblk - es.es_lblk; | 491 | map->m_lblk - es.es_lblk; |
@@ -1393,7 +1387,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1393 | 1387 | ||
1394 | /* Lookup extent status tree firstly */ | 1388 | /* Lookup extent status tree firstly */ |
1395 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1389 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
1396 | ext4_es_lru_add(inode); | ||
1397 | if (ext4_es_is_hole(&es)) { | 1390 | if (ext4_es_is_hole(&es)) { |
1398 | retval = 0; | 1391 | retval = 0; |
1399 | down_read(&EXT4_I(inode)->i_data_sem); | 1392 | down_read(&EXT4_I(inode)->i_data_sem); |
@@ -1434,24 +1427,12 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1434 | * file system block. | 1427 | * file system block. |
1435 | */ | 1428 | */ |
1436 | down_read(&EXT4_I(inode)->i_data_sem); | 1429 | down_read(&EXT4_I(inode)->i_data_sem); |
1437 | if (ext4_has_inline_data(inode)) { | 1430 | if (ext4_has_inline_data(inode)) |
1438 | /* | ||
1439 | * We will soon create blocks for this page, and let | ||
1440 | * us pretend as if the blocks aren't allocated yet. | ||
1441 | * In case of clusters, we have to handle the work | ||
1442 | * of mapping from cluster so that the reserved space | ||
1443 | * is calculated properly. | ||
1444 | */ | ||
1445 | if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) && | ||
1446 | ext4_find_delalloc_cluster(inode, map->m_lblk)) | ||
1447 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
1448 | retval = 0; | 1431 | retval = 0; |
1449 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 1432 | else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1450 | retval = ext4_ext_map_blocks(NULL, inode, map, | 1433 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); |
1451 | EXT4_GET_BLOCKS_NO_PUT_HOLE); | ||
1452 | else | 1434 | else |
1453 | retval = ext4_ind_map_blocks(NULL, inode, map, | 1435 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); |
1454 | EXT4_GET_BLOCKS_NO_PUT_HOLE); | ||
1455 | 1436 | ||
1456 | add_delayed: | 1437 | add_delayed: |
1457 | if (retval == 0) { | 1438 | if (retval == 0) { |
@@ -1465,7 +1446,8 @@ add_delayed: | |||
1465 | * then we don't need to reserve it again. However we still need | 1446 | * then we don't need to reserve it again. However we still need |
1466 | * to reserve metadata for every block we're going to write. | 1447 | * to reserve metadata for every block we're going to write. |
1467 | */ | 1448 | */ |
1468 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | 1449 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 || |
1450 | !ext4_find_delalloc_cluster(inode, map->m_lblk)) { | ||
1469 | ret = ext4_da_reserve_space(inode, iblock); | 1451 | ret = ext4_da_reserve_space(inode, iblock); |
1470 | if (ret) { | 1452 | if (ret) { |
1471 | /* not enough space to reserve */ | 1453 | /* not enough space to reserve */ |
@@ -1481,11 +1463,6 @@ add_delayed: | |||
1481 | goto out_unlock; | 1463 | goto out_unlock; |
1482 | } | 1464 | } |
1483 | 1465 | ||
1484 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served | ||
1485 | * and it should not appear on the bh->b_state. | ||
1486 | */ | ||
1487 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
1488 | |||
1489 | map_bh(bh, inode->i_sb, invalid_block); | 1466 | map_bh(bh, inode->i_sb, invalid_block); |
1490 | set_buffer_new(bh); | 1467 | set_buffer_new(bh); |
1491 | set_buffer_delay(bh); | 1468 | set_buffer_delay(bh); |
@@ -3643,7 +3620,7 @@ out_stop: | |||
3643 | * If this was a simple ftruncate() and the file will remain alive, | 3620 | * If this was a simple ftruncate() and the file will remain alive, |
3644 | * then we need to clear up the orphan record which we created above. | 3621 | * then we need to clear up the orphan record which we created above. |
3645 | * However, if this was a real unlink then we were called by | 3622 | * However, if this was a real unlink then we were called by |
3646 | * ext4_delete_inode(), and we allow that function to clean up the | 3623 | * ext4_evict_inode(), and we allow that function to clean up the |
3647 | * orphan info for us. | 3624 | * orphan info for us. |
3648 | */ | 3625 | */ |
3649 | if (inode->i_nlink) | 3626 | if (inode->i_nlink) |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bfda18a15592..f58a0d106726 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -78,8 +78,6 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) | |||
78 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); | 78 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); |
79 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); | 79 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); |
80 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); | 80 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); |
81 | ext4_es_lru_del(inode1); | ||
82 | ext4_es_lru_del(inode2); | ||
83 | 81 | ||
84 | isize = i_size_read(inode1); | 82 | isize = i_size_read(inode1); |
85 | i_size_write(inode1, i_size_read(inode2)); | 83 | i_size_write(inode1, i_size_read(inode2)); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dbfe15c2533c..8d1e60214ef0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2358,7 +2358,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) | |||
2358 | if (sbi->s_group_info) { | 2358 | if (sbi->s_group_info) { |
2359 | memcpy(new_groupinfo, sbi->s_group_info, | 2359 | memcpy(new_groupinfo, sbi->s_group_info, |
2360 | sbi->s_group_info_size * sizeof(*sbi->s_group_info)); | 2360 | sbi->s_group_info_size * sizeof(*sbi->s_group_info)); |
2361 | ext4_kvfree(sbi->s_group_info); | 2361 | kvfree(sbi->s_group_info); |
2362 | } | 2362 | } |
2363 | sbi->s_group_info = new_groupinfo; | 2363 | sbi->s_group_info = new_groupinfo; |
2364 | sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); | 2364 | sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); |
@@ -2385,7 +2385,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2385 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { | 2385 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { |
2386 | metalen = sizeof(*meta_group_info) << | 2386 | metalen = sizeof(*meta_group_info) << |
2387 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2387 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2388 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | 2388 | meta_group_info = kmalloc(metalen, GFP_NOFS); |
2389 | if (meta_group_info == NULL) { | 2389 | if (meta_group_info == NULL) { |
2390 | ext4_msg(sb, KERN_ERR, "can't allocate mem " | 2390 | ext4_msg(sb, KERN_ERR, "can't allocate mem " |
2391 | "for a buddy group"); | 2391 | "for a buddy group"); |
@@ -2399,7 +2399,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2399 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2399 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2400 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2400 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2401 | 2401 | ||
2402 | meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL); | 2402 | meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); |
2403 | if (meta_group_info[i] == NULL) { | 2403 | if (meta_group_info[i] == NULL) { |
2404 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); | 2404 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); |
2405 | goto exit_group_info; | 2405 | goto exit_group_info; |
@@ -2428,7 +2428,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2428 | { | 2428 | { |
2429 | struct buffer_head *bh; | 2429 | struct buffer_head *bh; |
2430 | meta_group_info[i]->bb_bitmap = | 2430 | meta_group_info[i]->bb_bitmap = |
2431 | kmalloc(sb->s_blocksize, GFP_KERNEL); | 2431 | kmalloc(sb->s_blocksize, GFP_NOFS); |
2432 | BUG_ON(meta_group_info[i]->bb_bitmap == NULL); | 2432 | BUG_ON(meta_group_info[i]->bb_bitmap == NULL); |
2433 | bh = ext4_read_block_bitmap(sb, group); | 2433 | bh = ext4_read_block_bitmap(sb, group); |
2434 | BUG_ON(bh == NULL); | 2434 | BUG_ON(bh == NULL); |
@@ -2495,7 +2495,7 @@ err_freebuddy: | |||
2495 | kfree(sbi->s_group_info[i]); | 2495 | kfree(sbi->s_group_info[i]); |
2496 | iput(sbi->s_buddy_cache); | 2496 | iput(sbi->s_buddy_cache); |
2497 | err_freesgi: | 2497 | err_freesgi: |
2498 | ext4_kvfree(sbi->s_group_info); | 2498 | kvfree(sbi->s_group_info); |
2499 | return -ENOMEM; | 2499 | return -ENOMEM; |
2500 | } | 2500 | } |
2501 | 2501 | ||
@@ -2708,12 +2708,11 @@ int ext4_mb_release(struct super_block *sb) | |||
2708 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2708 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2709 | for (i = 0; i < num_meta_group_infos; i++) | 2709 | for (i = 0; i < num_meta_group_infos; i++) |
2710 | kfree(sbi->s_group_info[i]); | 2710 | kfree(sbi->s_group_info[i]); |
2711 | ext4_kvfree(sbi->s_group_info); | 2711 | kvfree(sbi->s_group_info); |
2712 | } | 2712 | } |
2713 | kfree(sbi->s_mb_offsets); | 2713 | kfree(sbi->s_mb_offsets); |
2714 | kfree(sbi->s_mb_maxs); | 2714 | kfree(sbi->s_mb_maxs); |
2715 | if (sbi->s_buddy_cache) | 2715 | iput(sbi->s_buddy_cache); |
2716 | iput(sbi->s_buddy_cache); | ||
2717 | if (sbi->s_mb_stats) { | 2716 | if (sbi->s_mb_stats) { |
2718 | ext4_msg(sb, KERN_INFO, | 2717 | ext4_msg(sb, KERN_INFO, |
2719 | "mballoc: %u blocks %u reqs (%u success)", | 2718 | "mballoc: %u blocks %u reqs (%u success)", |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index a432634f2e6a..3cb267aee802 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -592,7 +592,7 @@ err_out: | |||
592 | 592 | ||
593 | /* | 593 | /* |
594 | * set the i_blocks count to zero | 594 | * set the i_blocks count to zero |
595 | * so that the ext4_delete_inode does the | 595 | * so that the ext4_evict_inode() does the |
596 | * right job | 596 | * right job |
597 | * | 597 | * |
598 | * We don't need to take the i_lock because | 598 | * We don't need to take the i_lock because |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 9f2311bc9c4f..503ea15dc5db 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -273,6 +273,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
273 | int replaced_count = 0; | 273 | int replaced_count = 0; |
274 | int from = data_offset_in_page << orig_inode->i_blkbits; | 274 | int from = data_offset_in_page << orig_inode->i_blkbits; |
275 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 275 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
276 | struct super_block *sb = orig_inode->i_sb; | ||
276 | 277 | ||
277 | /* | 278 | /* |
278 | * It needs twice the amount of ordinary journal buffers because | 279 | * It needs twice the amount of ordinary journal buffers because |
@@ -405,10 +406,13 @@ unlock_pages: | |||
405 | page_cache_release(pagep[1]); | 406 | page_cache_release(pagep[1]); |
406 | stop_journal: | 407 | stop_journal: |
407 | ext4_journal_stop(handle); | 408 | ext4_journal_stop(handle); |
409 | if (*err == -ENOSPC && | ||
410 | ext4_should_retry_alloc(sb, &retries)) | ||
411 | goto again; | ||
408 | /* Buffer was busy because probably is pinned to journal transaction, | 412 | /* Buffer was busy because probably is pinned to journal transaction, |
409 | * force transaction commit may help to free it. */ | 413 | * force transaction commit may help to free it. */ |
410 | if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb, | 414 | if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal && |
411 | &retries)) | 415 | jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal)) |
412 | goto again; | 416 | goto again; |
413 | return replaced_count; | 417 | return replaced_count; |
414 | 418 | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 426211882f72..2291923dae4e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2814,7 +2814,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2814 | ext4_orphan_add(handle, inode); | 2814 | ext4_orphan_add(handle, inode); |
2815 | inode->i_ctime = ext4_current_time(inode); | 2815 | inode->i_ctime = ext4_current_time(inode); |
2816 | ext4_mark_inode_dirty(handle, inode); | 2816 | ext4_mark_inode_dirty(handle, inode); |
2817 | retval = 0; | ||
2818 | 2817 | ||
2819 | end_unlink: | 2818 | end_unlink: |
2820 | brelse(bh); | 2819 | brelse(bh); |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ca4588388fc3..bf76f405a5f9 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -856,7 +856,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
856 | n_group_desc[gdb_num] = gdb_bh; | 856 | n_group_desc[gdb_num] = gdb_bh; |
857 | EXT4_SB(sb)->s_group_desc = n_group_desc; | 857 | EXT4_SB(sb)->s_group_desc = n_group_desc; |
858 | EXT4_SB(sb)->s_gdb_count++; | 858 | EXT4_SB(sb)->s_gdb_count++; |
859 | ext4_kvfree(o_group_desc); | 859 | kvfree(o_group_desc); |
860 | 860 | ||
861 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); | 861 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); |
862 | err = ext4_handle_dirty_super(handle, sb); | 862 | err = ext4_handle_dirty_super(handle, sb); |
@@ -866,7 +866,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
866 | return err; | 866 | return err; |
867 | 867 | ||
868 | exit_inode: | 868 | exit_inode: |
869 | ext4_kvfree(n_group_desc); | 869 | kvfree(n_group_desc); |
870 | brelse(iloc.bh); | 870 | brelse(iloc.bh); |
871 | exit_dind: | 871 | exit_dind: |
872 | brelse(dind); | 872 | brelse(dind); |
@@ -909,7 +909,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb, | |||
909 | n_group_desc[gdb_num] = gdb_bh; | 909 | n_group_desc[gdb_num] = gdb_bh; |
910 | EXT4_SB(sb)->s_group_desc = n_group_desc; | 910 | EXT4_SB(sb)->s_group_desc = n_group_desc; |
911 | EXT4_SB(sb)->s_gdb_count++; | 911 | EXT4_SB(sb)->s_gdb_count++; |
912 | ext4_kvfree(o_group_desc); | 912 | kvfree(o_group_desc); |
913 | BUFFER_TRACE(gdb_bh, "get_write_access"); | 913 | BUFFER_TRACE(gdb_bh, "get_write_access"); |
914 | err = ext4_journal_get_write_access(handle, gdb_bh); | 914 | err = ext4_journal_get_write_access(handle, gdb_bh); |
915 | if (unlikely(err)) | 915 | if (unlikely(err)) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 63e802b8ec68..43c92b1685cb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -176,15 +176,6 @@ void *ext4_kvzalloc(size_t size, gfp_t flags) | |||
176 | return ret; | 176 | return ret; |
177 | } | 177 | } |
178 | 178 | ||
179 | void ext4_kvfree(void *ptr) | ||
180 | { | ||
181 | if (is_vmalloc_addr(ptr)) | ||
182 | vfree(ptr); | ||
183 | else | ||
184 | kfree(ptr); | ||
185 | |||
186 | } | ||
187 | |||
188 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, | 179 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, |
189 | struct ext4_group_desc *bg) | 180 | struct ext4_group_desc *bg) |
190 | { | 181 | { |
@@ -811,8 +802,8 @@ static void ext4_put_super(struct super_block *sb) | |||
811 | 802 | ||
812 | for (i = 0; i < sbi->s_gdb_count; i++) | 803 | for (i = 0; i < sbi->s_gdb_count; i++) |
813 | brelse(sbi->s_group_desc[i]); | 804 | brelse(sbi->s_group_desc[i]); |
814 | ext4_kvfree(sbi->s_group_desc); | 805 | kvfree(sbi->s_group_desc); |
815 | ext4_kvfree(sbi->s_flex_groups); | 806 | kvfree(sbi->s_flex_groups); |
816 | percpu_counter_destroy(&sbi->s_freeclusters_counter); | 807 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
817 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 808 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
818 | percpu_counter_destroy(&sbi->s_dirs_counter); | 809 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -880,10 +871,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
880 | spin_lock_init(&ei->i_prealloc_lock); | 871 | spin_lock_init(&ei->i_prealloc_lock); |
881 | ext4_es_init_tree(&ei->i_es_tree); | 872 | ext4_es_init_tree(&ei->i_es_tree); |
882 | rwlock_init(&ei->i_es_lock); | 873 | rwlock_init(&ei->i_es_lock); |
883 | INIT_LIST_HEAD(&ei->i_es_lru); | 874 | INIT_LIST_HEAD(&ei->i_es_list); |
884 | ei->i_es_all_nr = 0; | 875 | ei->i_es_all_nr = 0; |
885 | ei->i_es_lru_nr = 0; | 876 | ei->i_es_shk_nr = 0; |
886 | ei->i_touch_when = 0; | 877 | ei->i_es_shrink_lblk = 0; |
887 | ei->i_reserved_data_blocks = 0; | 878 | ei->i_reserved_data_blocks = 0; |
888 | ei->i_reserved_meta_blocks = 0; | 879 | ei->i_reserved_meta_blocks = 0; |
889 | ei->i_allocated_meta_blocks = 0; | 880 | ei->i_allocated_meta_blocks = 0; |
@@ -973,7 +964,6 @@ void ext4_clear_inode(struct inode *inode) | |||
973 | dquot_drop(inode); | 964 | dquot_drop(inode); |
974 | ext4_discard_preallocations(inode); | 965 | ext4_discard_preallocations(inode); |
975 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | 966 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); |
976 | ext4_es_lru_del(inode); | ||
977 | if (EXT4_I(inode)->jinode) { | 967 | if (EXT4_I(inode)->jinode) { |
978 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), | 968 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), |
979 | EXT4_I(inode)->jinode); | 969 | EXT4_I(inode)->jinode); |
@@ -1153,7 +1143,7 @@ enum { | |||
1153 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1143 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1154 | Opt_dioread_nolock, Opt_dioread_lock, | 1144 | Opt_dioread_nolock, Opt_dioread_lock, |
1155 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, | 1145 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, |
1156 | Opt_max_dir_size_kb, | 1146 | Opt_max_dir_size_kb, Opt_nojournal_checksum, |
1157 | }; | 1147 | }; |
1158 | 1148 | ||
1159 | static const match_table_t tokens = { | 1149 | static const match_table_t tokens = { |
@@ -1187,6 +1177,7 @@ static const match_table_t tokens = { | |||
1187 | {Opt_journal_dev, "journal_dev=%u"}, | 1177 | {Opt_journal_dev, "journal_dev=%u"}, |
1188 | {Opt_journal_path, "journal_path=%s"}, | 1178 | {Opt_journal_path, "journal_path=%s"}, |
1189 | {Opt_journal_checksum, "journal_checksum"}, | 1179 | {Opt_journal_checksum, "journal_checksum"}, |
1180 | {Opt_nojournal_checksum, "nojournal_checksum"}, | ||
1190 | {Opt_journal_async_commit, "journal_async_commit"}, | 1181 | {Opt_journal_async_commit, "journal_async_commit"}, |
1191 | {Opt_abort, "abort"}, | 1182 | {Opt_abort, "abort"}, |
1192 | {Opt_data_journal, "data=journal"}, | 1183 | {Opt_data_journal, "data=journal"}, |
@@ -1368,6 +1359,8 @@ static const struct mount_opts { | |||
1368 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, | 1359 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, |
1369 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, | 1360 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, |
1370 | MOPT_EXT4_ONLY | MOPT_CLEAR}, | 1361 | MOPT_EXT4_ONLY | MOPT_CLEAR}, |
1362 | {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, | ||
1363 | MOPT_EXT4_ONLY | MOPT_CLEAR}, | ||
1371 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, | 1364 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, |
1372 | MOPT_EXT4_ONLY | MOPT_SET}, | 1365 | MOPT_EXT4_ONLY | MOPT_SET}, |
1373 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | | 1366 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | |
@@ -1709,6 +1702,12 @@ static int parse_options(char *options, struct super_block *sb, | |||
1709 | return 0; | 1702 | return 0; |
1710 | } | 1703 | } |
1711 | } | 1704 | } |
1705 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && | ||
1706 | test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | ||
1707 | ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit " | ||
1708 | "in data=ordered mode"); | ||
1709 | return 0; | ||
1710 | } | ||
1712 | return 1; | 1711 | return 1; |
1713 | } | 1712 | } |
1714 | 1713 | ||
@@ -1946,7 +1945,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) | |||
1946 | memcpy(new_groups, sbi->s_flex_groups, | 1945 | memcpy(new_groups, sbi->s_flex_groups, |
1947 | (sbi->s_flex_groups_allocated * | 1946 | (sbi->s_flex_groups_allocated * |
1948 | sizeof(struct flex_groups))); | 1947 | sizeof(struct flex_groups))); |
1949 | ext4_kvfree(sbi->s_flex_groups); | 1948 | kvfree(sbi->s_flex_groups); |
1950 | } | 1949 | } |
1951 | sbi->s_flex_groups = new_groups; | 1950 | sbi->s_flex_groups = new_groups; |
1952 | sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); | 1951 | sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); |
@@ -3317,7 +3316,7 @@ int ext4_calculate_overhead(struct super_block *sb) | |||
3317 | struct ext4_super_block *es = sbi->s_es; | 3316 | struct ext4_super_block *es = sbi->s_es; |
3318 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); | 3317 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
3319 | ext4_fsblk_t overhead = 0; | 3318 | ext4_fsblk_t overhead = 0; |
3320 | char *buf = (char *) get_zeroed_page(GFP_KERNEL); | 3319 | char *buf = (char *) get_zeroed_page(GFP_NOFS); |
3321 | 3320 | ||
3322 | if (!buf) | 3321 | if (!buf) |
3323 | return -ENOMEM; | 3322 | return -ENOMEM; |
@@ -3345,8 +3344,8 @@ int ext4_calculate_overhead(struct super_block *sb) | |||
3345 | memset(buf, 0, PAGE_SIZE); | 3344 | memset(buf, 0, PAGE_SIZE); |
3346 | cond_resched(); | 3345 | cond_resched(); |
3347 | } | 3346 | } |
3348 | /* Add the journal blocks as well */ | 3347 | /* Add the internal journal blocks as well */ |
3349 | if (sbi->s_journal) | 3348 | if (sbi->s_journal && !sbi->journal_bdev) |
3350 | overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); | 3349 | overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); |
3351 | 3350 | ||
3352 | sbi->s_overhead = overhead; | 3351 | sbi->s_overhead = overhead; |
@@ -4232,7 +4231,7 @@ failed_mount7: | |||
4232 | failed_mount6: | 4231 | failed_mount6: |
4233 | ext4_mb_release(sb); | 4232 | ext4_mb_release(sb); |
4234 | if (sbi->s_flex_groups) | 4233 | if (sbi->s_flex_groups) |
4235 | ext4_kvfree(sbi->s_flex_groups); | 4234 | kvfree(sbi->s_flex_groups); |
4236 | percpu_counter_destroy(&sbi->s_freeclusters_counter); | 4235 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
4237 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 4236 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
4238 | percpu_counter_destroy(&sbi->s_dirs_counter); | 4237 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -4261,7 +4260,7 @@ failed_mount3: | |||
4261 | failed_mount2: | 4260 | failed_mount2: |
4262 | for (i = 0; i < db_count; i++) | 4261 | for (i = 0; i < db_count; i++) |
4263 | brelse(sbi->s_group_desc[i]); | 4262 | brelse(sbi->s_group_desc[i]); |
4264 | ext4_kvfree(sbi->s_group_desc); | 4263 | kvfree(sbi->s_group_desc); |
4265 | failed_mount: | 4264 | failed_mount: |
4266 | if (sbi->s_chksum_driver) | 4265 | if (sbi->s_chksum_driver) |
4267 | crypto_free_shash(sbi->s_chksum_driver); | 4266 | crypto_free_shash(sbi->s_chksum_driver); |
@@ -4862,6 +4861,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4862 | goto restore_opts; | 4861 | goto restore_opts; |
4863 | } | 4862 | } |
4864 | 4863 | ||
4864 | if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ | ||
4865 | test_opt(sb, JOURNAL_CHECKSUM)) { | ||
4866 | ext4_msg(sb, KERN_ERR, "changing journal_checksum " | ||
4867 | "during remount not supported"); | ||
4868 | err = -EINVAL; | ||
4869 | goto restore_opts; | ||
4870 | } | ||
4871 | |||
4865 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 4872 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
4866 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | 4873 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { |
4867 | ext4_msg(sb, KERN_ERR, "can't mount with " | 4874 | ext4_msg(sb, KERN_ERR, "can't mount with " |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 1df94fabe4eb..b96bd8076b70 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1714,8 +1714,7 @@ int jbd2_journal_destroy(journal_t *journal) | |||
1714 | 1714 | ||
1715 | if (journal->j_proc_entry) | 1715 | if (journal->j_proc_entry) |
1716 | jbd2_stats_proc_exit(journal); | 1716 | jbd2_stats_proc_exit(journal); |
1717 | if (journal->j_inode) | 1717 | iput(journal->j_inode); |
1718 | iput(journal->j_inode); | ||
1719 | if (journal->j_revoke) | 1718 | if (journal->j_revoke) |
1720 | jbd2_journal_destroy_revoke(journal); | 1719 | jbd2_journal_destroy_revoke(journal); |
1721 | if (journal->j_chksum_driver) | 1720 | if (journal->j_chksum_driver) |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index ff4bd1b35246..6cfb841fea7c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
@@ -43,15 +43,13 @@ struct extent_status; | |||
43 | { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ | 43 | { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ |
44 | { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ | 44 | { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ |
45 | { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ | 45 | { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ |
46 | { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }, \ | 46 | { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }) |
47 | { EXT4_GET_BLOCKS_NO_PUT_HOLE, "NO_PUT_HOLE" }) | ||
48 | 47 | ||
49 | #define show_mflags(flags) __print_flags(flags, "", \ | 48 | #define show_mflags(flags) __print_flags(flags, "", \ |
50 | { EXT4_MAP_NEW, "N" }, \ | 49 | { EXT4_MAP_NEW, "N" }, \ |
51 | { EXT4_MAP_MAPPED, "M" }, \ | 50 | { EXT4_MAP_MAPPED, "M" }, \ |
52 | { EXT4_MAP_UNWRITTEN, "U" }, \ | 51 | { EXT4_MAP_UNWRITTEN, "U" }, \ |
53 | { EXT4_MAP_BOUNDARY, "B" }, \ | 52 | { EXT4_MAP_BOUNDARY, "B" }) |
54 | { EXT4_MAP_FROM_CLUSTER, "C" }) | ||
55 | 53 | ||
56 | #define show_free_flags(flags) __print_flags(flags, "|", \ | 54 | #define show_free_flags(flags) __print_flags(flags, "|", \ |
57 | { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ | 55 | { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ |
@@ -2452,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range, | |||
2452 | 2450 | ||
2453 | TRACE_EVENT(ext4_es_shrink, | 2451 | TRACE_EVENT(ext4_es_shrink, |
2454 | TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, | 2452 | TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, |
2455 | int skip_precached, int nr_skipped, int retried), | 2453 | int nr_skipped, int retried), |
2456 | 2454 | ||
2457 | TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried), | 2455 | TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried), |
2458 | 2456 | ||
2459 | TP_STRUCT__entry( | 2457 | TP_STRUCT__entry( |
2460 | __field( dev_t, dev ) | 2458 | __field( dev_t, dev ) |
2461 | __field( int, nr_shrunk ) | 2459 | __field( int, nr_shrunk ) |
2462 | __field( unsigned long long, scan_time ) | 2460 | __field( unsigned long long, scan_time ) |
2463 | __field( int, skip_precached ) | ||
2464 | __field( int, nr_skipped ) | 2461 | __field( int, nr_skipped ) |
2465 | __field( int, retried ) | 2462 | __field( int, retried ) |
2466 | ), | 2463 | ), |
@@ -2469,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink, | |||
2469 | __entry->dev = sb->s_dev; | 2466 | __entry->dev = sb->s_dev; |
2470 | __entry->nr_shrunk = nr_shrunk; | 2467 | __entry->nr_shrunk = nr_shrunk; |
2471 | __entry->scan_time = div_u64(scan_time, 1000); | 2468 | __entry->scan_time = div_u64(scan_time, 1000); |
2472 | __entry->skip_precached = skip_precached; | ||
2473 | __entry->nr_skipped = nr_skipped; | 2469 | __entry->nr_skipped = nr_skipped; |
2474 | __entry->retried = retried; | 2470 | __entry->retried = retried; |
2475 | ), | 2471 | ), |
2476 | 2472 | ||
2477 | TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d " | 2473 | TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu " |
2478 | "nr_skipped %d retried %d", | 2474 | "nr_skipped %d retried %d", |
2479 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, | 2475 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, |
2480 | __entry->scan_time, __entry->skip_precached, | 2476 | __entry->scan_time, __entry->nr_skipped, __entry->retried) |
2481 | __entry->nr_skipped, __entry->retried) | ||
2482 | ); | 2477 | ); |
2483 | 2478 | ||
2484 | #endif /* _TRACE_EXT4_H */ | 2479 | #endif /* _TRACE_EXT4_H */ |