aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Whitney <enwlinux@gmail.com>2018-10-01 14:25:08 -0400
committerTheodore Ts'o <tytso@mit.edu>2018-10-01 14:25:08 -0400
commit9fe671496b6c286f9033aedfc1718d67721da0ae (patch)
tree582e6b8c567477053abf885cd91e53e65dc12c18
parentb6bf9171ef5c37b66d446378ba63af5339a56a97 (diff)
ext4: adjust reserved cluster count when removing extents
Modify ext4_ext_remove_space() and the code it calls to correct the reserved cluster count for pending reservations (delayed allocated clusters shared with allocated blocks) when a block range is removed from the extent tree. Pending reservations may be found for the clusters at the ends of written or unwritten extents when a block range is removed. If a physical cluster at the end of an extent is freed, it's necessary to increment the reserved cluster count to maintain correct accounting if the corresponding logical cluster is shared with at least one delayed and unwritten extent as found in the extents status tree. Add a new function, ext4_rereserve_cluster(), to reapply a reservation on a delayed allocated cluster sharing blocks with a freed allocated cluster. To avoid ENOSPC on reservation, a flag is applied to ext4_free_blocks() to briefly defer updating the freeclusters counter when an allocated cluster is freed. This prevents another thread from allocating the freed block before the reservation can be reapplied. Redefine the partial cluster object as a struct to carry more state information and to clarify the code using it. Adjust the conditional code structure in ext4_ext_remove_space to reduce the indentation level in the main body of the code to improve readability. Signed-off-by: Eric Whitney <enwlinux@gmail.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/ext4_extents.h13
-rw-r--r--fs/ext4/extents.c284
-rw-r--r--fs/ext4/mballoc.c14
-rw-r--r--include/trace/events/ext4.h60
5 files changed, 238 insertions, 134 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d85fd5c8a2c4..0bdbbd151d2c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -628,6 +628,7 @@ enum {
628#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 628#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
629#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 629#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
630#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 630#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
631#define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040
631 632
632/* 633/*
633 * ioctl commands 634 * ioctl commands
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index adf6668b596f..98bd0e9ee7df 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -120,6 +120,19 @@ struct ext4_ext_path {
120}; 120};
121 121
122/* 122/*
123 * Used to record a portion of a cluster found at the beginning or end
124 * of an extent while traversing the extent tree during space removal.
125 * A partial cluster may be removed if it does not contain blocks shared
126 * with extents that aren't being deleted (tofree state). Otherwise,
127 * it cannot be removed (nofree state).
128 */
129struct partial_cluster {
130 ext4_fsblk_t pclu; /* physical cluster number */
131 ext4_lblk_t lblk; /* logical block number within logical cluster */
132 enum {initial, tofree, nofree} state;
133};
134
135/*
123 * structure for external API 136 * structure for external API
124 */ 137 */
125 138
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b52ac813ca20..240b6dea5441 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2490,106 +2490,157 @@ static inline int get_default_free_blocks_flags(struct inode *inode)
2490 return 0; 2490 return 0;
2491} 2491}
2492 2492
2493/*
2494 * ext4_rereserve_cluster - increment the reserved cluster count when
2495 * freeing a cluster with a pending reservation
2496 *
2497 * @inode - file containing the cluster
2498 * @lblk - logical block in cluster to be reserved
2499 *
2500 * Increments the reserved cluster count and adjusts quota in a bigalloc
2501 * file system when freeing a partial cluster containing at least one
2502 * delayed and unwritten block. A partial cluster meeting that
2503 * requirement will have a pending reservation. If so, the
2504 * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
2505 * defer reserved and allocated space accounting to a subsequent call
2506 * to this function.
2507 */
2508static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
2509{
2510 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2511 struct ext4_inode_info *ei = EXT4_I(inode);
2512
2513 dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
2514
2515 spin_lock(&ei->i_block_reservation_lock);
2516 ei->i_reserved_data_blocks++;
2517 percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
2518 spin_unlock(&ei->i_block_reservation_lock);
2519
2520 percpu_counter_add(&sbi->s_freeclusters_counter, 1);
2521 ext4_remove_pending(inode, lblk);
2522}
2523
2493static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 2524static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2494 struct ext4_extent *ex, 2525 struct ext4_extent *ex,
2495 long long *partial_cluster, 2526 struct partial_cluster *partial,
2496 ext4_lblk_t from, ext4_lblk_t to) 2527 ext4_lblk_t from, ext4_lblk_t to)
2497{ 2528{
2498 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2529 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2499 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2530 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2500 ext4_fsblk_t pblk; 2531 ext4_fsblk_t last_pblk, pblk;
2501 int flags = get_default_free_blocks_flags(inode); 2532 ext4_lblk_t num;
2533 int flags;
2534
2535 /* only extent tail removal is allowed */
2536 if (from < le32_to_cpu(ex->ee_block) ||
2537 to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
2538 ext4_error(sbi->s_sb,
2539 "strange request: removal(2) %u-%u from %u:%u",
2540 from, to, le32_to_cpu(ex->ee_block), ee_len);
2541 return 0;
2542 }
2543
2544#ifdef EXTENTS_STATS
2545 spin_lock(&sbi->s_ext_stats_lock);
2546 sbi->s_ext_blocks += ee_len;
2547 sbi->s_ext_extents++;
2548 if (ee_len < sbi->s_ext_min)
2549 sbi->s_ext_min = ee_len;
2550 if (ee_len > sbi->s_ext_max)
2551 sbi->s_ext_max = ee_len;
2552 if (ext_depth(inode) > sbi->s_depth_max)
2553 sbi->s_depth_max = ext_depth(inode);
2554 spin_unlock(&sbi->s_ext_stats_lock);
2555#endif
2556
2557 trace_ext4_remove_blocks(inode, ex, from, to, partial);
2502 2558
2503 /* 2559 /*
2504 * For bigalloc file systems, we never free a partial cluster 2560 * if we have a partial cluster, and it's different from the
2505 * at the beginning of the extent. Instead, we make a note 2561 * cluster of the last block in the extent, we free it
2506 * that we tried freeing the cluster, and check to see if we
2507 * need to free it on a subsequent call to ext4_remove_blocks,
2508 * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
2509 */ 2562 */
2510 flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; 2563 last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
2564
2565 if (partial->state != initial &&
2566 partial->pclu != EXT4_B2C(sbi, last_pblk)) {
2567 if (partial->state == tofree) {
2568 flags = get_default_free_blocks_flags(inode);
2569 if (ext4_is_pending(inode, partial->lblk))
2570 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2571 ext4_free_blocks(handle, inode, NULL,
2572 EXT4_C2B(sbi, partial->pclu),
2573 sbi->s_cluster_ratio, flags);
2574 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2575 ext4_rereserve_cluster(inode, partial->lblk);
2576 }
2577 partial->state = initial;
2578 }
2579
2580 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2581 pblk = ext4_ext_pblock(ex) + ee_len - num;
2511 2582
2512 trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
2513 /* 2583 /*
2514 * If we have a partial cluster, and it's different from the 2584 * We free the partial cluster at the end of the extent (if any),
2515 * cluster of the last block, we need to explicitly free the 2585 * unless the cluster is used by another extent (partial_cluster
2516 * partial cluster here. 2586 * state is nofree). If a partial cluster exists here, it must be
2587 * shared with the last block in the extent.
2517 */ 2588 */
2518 pblk = ext4_ext_pblock(ex) + ee_len - 1; 2589 flags = get_default_free_blocks_flags(inode);
2519 if (*partial_cluster > 0 && 2590
2520 *partial_cluster != (long long) EXT4_B2C(sbi, pblk)) { 2591 /* partial, left end cluster aligned, right end unaligned */
2592 if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
2593 (EXT4_LBLK_CMASK(sbi, to) >= from) &&
2594 (partial->state != nofree)) {
2595 if (ext4_is_pending(inode, to))
2596 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2521 ext4_free_blocks(handle, inode, NULL, 2597 ext4_free_blocks(handle, inode, NULL,
2522 EXT4_C2B(sbi, *partial_cluster), 2598 EXT4_PBLK_CMASK(sbi, last_pblk),
2523 sbi->s_cluster_ratio, flags); 2599 sbi->s_cluster_ratio, flags);
2524 *partial_cluster = 0; 2600 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2601 ext4_rereserve_cluster(inode, to);
2602 partial->state = initial;
2603 flags = get_default_free_blocks_flags(inode);
2525 } 2604 }
2526 2605
2527#ifdef EXTENTS_STATS 2606 flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
2528 {
2529 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2530 spin_lock(&sbi->s_ext_stats_lock);
2531 sbi->s_ext_blocks += ee_len;
2532 sbi->s_ext_extents++;
2533 if (ee_len < sbi->s_ext_min)
2534 sbi->s_ext_min = ee_len;
2535 if (ee_len > sbi->s_ext_max)
2536 sbi->s_ext_max = ee_len;
2537 if (ext_depth(inode) > sbi->s_depth_max)
2538 sbi->s_depth_max = ext_depth(inode);
2539 spin_unlock(&sbi->s_ext_stats_lock);
2540 }
2541#endif
2542 if (from >= le32_to_cpu(ex->ee_block)
2543 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
2544 /* tail removal */
2545 ext4_lblk_t num;
2546 long long first_cluster;
2547
2548 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2549 pblk = ext4_ext_pblock(ex) + ee_len - num;
2550 /*
2551 * Usually we want to free partial cluster at the end of the
2552 * extent, except for the situation when the cluster is still
2553 * used by any other extent (partial_cluster is negative).
2554 */
2555 if (*partial_cluster < 0 &&
2556 *partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1))
2557 flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
2558 2607
2559 ext_debug("free last %u blocks starting %llu partial %lld\n", 2608 /*
2560 num, pblk, *partial_cluster); 2609 * For bigalloc file systems, we never free a partial cluster
2561 ext4_free_blocks(handle, inode, NULL, pblk, num, flags); 2610 * at the beginning of the extent. Instead, we check to see if we
2562 /* 2611 * need to free it on a subsequent call to ext4_remove_blocks,
2563 * If the block range to be freed didn't start at the 2612 * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
2564 * beginning of a cluster, and we removed the entire 2613 */
2565 * extent and the cluster is not used by any other extent, 2614 flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2566 * save the partial cluster here, since we might need to 2615 ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2567 * delete if we determine that the truncate or punch hole 2616
2568 * operation has removed all of the blocks in the cluster. 2617 /* reset the partial cluster if we've freed past it */
2569 * If that cluster is used by another extent, preserve its 2618 if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
2570 * negative value so it isn't freed later on. 2619 partial->state = initial;
2571 * 2620
2572 * If the whole extent wasn't freed, we've reached the 2621 /*
2573 * start of the truncated/punched region and have finished 2622 * If we've freed the entire extent but the beginning is not left
2574 * removing blocks. If there's a partial cluster here it's 2623 * cluster aligned and is not marked as ineligible for freeing we
2575 * shared with the remainder of the extent and is no longer 2624 * record the partial cluster at the beginning of the extent. It
2576 * a candidate for removal. 2625 * wasn't freed by the preceding ext4_free_blocks() call, and we
2577 */ 2626 * need to look farther to the left to determine if it's to be freed
2578 if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) { 2627 * (not shared with another extent). Else, reset the partial
2579 first_cluster = (long long) EXT4_B2C(sbi, pblk); 2628 * cluster - we're either done freeing or the beginning of the
2580 if (first_cluster != -*partial_cluster) 2629 * extent is left cluster aligned.
2581 *partial_cluster = first_cluster; 2630 */
2582 } else { 2631 if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
2583 *partial_cluster = 0; 2632 if (partial->state == initial) {
2633 partial->pclu = EXT4_B2C(sbi, pblk);
2634 partial->lblk = from;
2635 partial->state = tofree;
2584 } 2636 }
2585 } else 2637 } else {
2586 ext4_error(sbi->s_sb, "strange request: removal(2) " 2638 partial->state = initial;
2587 "%u-%u from %u:%u", 2639 }
2588 from, to, le32_to_cpu(ex->ee_block), ee_len); 2640
2589 return 0; 2641 return 0;
2590} 2642}
2591 2643
2592
2593/* 2644/*
2594 * ext4_ext_rm_leaf() Removes the extents associated with the 2645 * ext4_ext_rm_leaf() Removes the extents associated with the
2595 * blocks appearing between "start" and "end". Both "start" 2646 * blocks appearing between "start" and "end". Both "start"
@@ -2608,7 +2659,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2608static int 2659static int
2609ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2660ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2610 struct ext4_ext_path *path, 2661 struct ext4_ext_path *path,
2611 long long *partial_cluster, 2662 struct partial_cluster *partial,
2612 ext4_lblk_t start, ext4_lblk_t end) 2663 ext4_lblk_t start, ext4_lblk_t end)
2613{ 2664{
2614 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2665 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2640,7 +2691,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2640 ex_ee_block = le32_to_cpu(ex->ee_block); 2691 ex_ee_block = le32_to_cpu(ex->ee_block);
2641 ex_ee_len = ext4_ext_get_actual_len(ex); 2692 ex_ee_len = ext4_ext_get_actual_len(ex);
2642 2693
2643 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); 2694 trace_ext4_ext_rm_leaf(inode, start, ex, partial);
2644 2695
2645 while (ex >= EXT_FIRST_EXTENT(eh) && 2696 while (ex >= EXT_FIRST_EXTENT(eh) &&
2646 ex_ee_block + ex_ee_len > start) { 2697 ex_ee_block + ex_ee_len > start) {
@@ -2671,8 +2722,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2671 */ 2722 */
2672 if (sbi->s_cluster_ratio > 1) { 2723 if (sbi->s_cluster_ratio > 1) {
2673 pblk = ext4_ext_pblock(ex); 2724 pblk = ext4_ext_pblock(ex);
2674 *partial_cluster = 2725 partial->pclu = EXT4_B2C(sbi, pblk);
2675 -(long long) EXT4_B2C(sbi, pblk); 2726 partial->state = nofree;
2676 } 2727 }
2677 ex--; 2728 ex--;
2678 ex_ee_block = le32_to_cpu(ex->ee_block); 2729 ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -2714,8 +2765,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2714 if (err) 2765 if (err)
2715 goto out; 2766 goto out;
2716 2767
2717 err = ext4_remove_blocks(handle, inode, ex, partial_cluster, 2768 err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
2718 a, b);
2719 if (err) 2769 if (err)
2720 goto out; 2770 goto out;
2721 2771
@@ -2769,18 +2819,23 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2769 * If there's a partial cluster and at least one extent remains in 2819 * If there's a partial cluster and at least one extent remains in
2770 * the leaf, free the partial cluster if it isn't shared with the 2820 * the leaf, free the partial cluster if it isn't shared with the
2771 * current extent. If it is shared with the current extent 2821 * current extent. If it is shared with the current extent
2772 * we zero partial_cluster because we've reached the start of the 2822 * we reset the partial cluster because we've reached the start of the
2773 * truncated/punched region and we're done removing blocks. 2823 * truncated/punched region and we're done removing blocks.
2774 */ 2824 */
2775 if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) { 2825 if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
2776 pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; 2826 pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
2777 if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) { 2827 if (partial->pclu != EXT4_B2C(sbi, pblk)) {
2828 int flags = get_default_free_blocks_flags(inode);
2829
2830 if (ext4_is_pending(inode, partial->lblk))
2831 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2778 ext4_free_blocks(handle, inode, NULL, 2832 ext4_free_blocks(handle, inode, NULL,
2779 EXT4_C2B(sbi, *partial_cluster), 2833 EXT4_C2B(sbi, partial->pclu),
2780 sbi->s_cluster_ratio, 2834 sbi->s_cluster_ratio, flags);
2781 get_default_free_blocks_flags(inode)); 2835 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2836 ext4_rereserve_cluster(inode, partial->lblk);
2782 } 2837 }
2783 *partial_cluster = 0; 2838 partial->state = initial;
2784 } 2839 }
2785 2840
2786 /* if this leaf is free, then we should 2841 /* if this leaf is free, then we should
@@ -2819,10 +2874,14 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2819 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2874 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2820 int depth = ext_depth(inode); 2875 int depth = ext_depth(inode);
2821 struct ext4_ext_path *path = NULL; 2876 struct ext4_ext_path *path = NULL;
2822 long long partial_cluster = 0; 2877 struct partial_cluster partial;
2823 handle_t *handle; 2878 handle_t *handle;
2824 int i = 0, err = 0; 2879 int i = 0, err = 0;
2825 2880
2881 partial.pclu = 0;
2882 partial.lblk = 0;
2883 partial.state = initial;
2884
2826 ext_debug("truncate since %u to %u\n", start, end); 2885 ext_debug("truncate since %u to %u\n", start, end);
2827 2886
2828 /* probably first extent we're gonna free will be last in block */ 2887 /* probably first extent we're gonna free will be last in block */
@@ -2882,8 +2941,8 @@ again:
2882 */ 2941 */
2883 if (sbi->s_cluster_ratio > 1) { 2942 if (sbi->s_cluster_ratio > 1) {
2884 pblk = ext4_ext_pblock(ex) + end - ee_block + 2; 2943 pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
2885 partial_cluster = 2944 partial.pclu = EXT4_B2C(sbi, pblk);
2886 -(long long) EXT4_B2C(sbi, pblk); 2945 partial.state = nofree;
2887 } 2946 }
2888 2947
2889 /* 2948 /*
@@ -2911,9 +2970,10 @@ again:
2911 &ex); 2970 &ex);
2912 if (err) 2971 if (err)
2913 goto out; 2972 goto out;
2914 if (pblk) 2973 if (pblk) {
2915 partial_cluster = 2974 partial.pclu = EXT4_B2C(sbi, pblk);
2916 -(long long) EXT4_B2C(sbi, pblk); 2975 partial.state = nofree;
2976 }
2917 } 2977 }
2918 } 2978 }
2919 /* 2979 /*
@@ -2948,8 +3008,7 @@ again:
2948 if (i == depth) { 3008 if (i == depth) {
2949 /* this is leaf block */ 3009 /* this is leaf block */
2950 err = ext4_ext_rm_leaf(handle, inode, path, 3010 err = ext4_ext_rm_leaf(handle, inode, path,
2951 &partial_cluster, start, 3011 &partial, start, end);
2952 end);
2953 /* root level has p_bh == NULL, brelse() eats this */ 3012 /* root level has p_bh == NULL, brelse() eats this */
2954 brelse(path[i].p_bh); 3013 brelse(path[i].p_bh);
2955 path[i].p_bh = NULL; 3014 path[i].p_bh = NULL;
@@ -3021,21 +3080,24 @@ again:
3021 } 3080 }
3022 } 3081 }
3023 3082
3024 trace_ext4_ext_remove_space_done(inode, start, end, depth, 3083 trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
3025 partial_cluster, path->p_hdr->eh_entries); 3084 path->p_hdr->eh_entries);
3026 3085
3027 /* 3086 /*
3028 * If we still have something in the partial cluster and we have removed 3087 * if there's a partial cluster and we have removed the first extent
3029 * even the first extent, then we should free the blocks in the partial 3088 * in the file, then we also free the partial cluster, if any
3030 * cluster as well. (This code will only run when there are no leaves
3031 * to the immediate left of the truncated/punched region.)
3032 */ 3089 */
3033 if (partial_cluster > 0 && err == 0) { 3090 if (partial.state == tofree && err == 0) {
3034 /* don't zero partial_cluster since it's not used afterwards */ 3091 int flags = get_default_free_blocks_flags(inode);
3092
3093 if (ext4_is_pending(inode, partial.lblk))
3094 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
3035 ext4_free_blocks(handle, inode, NULL, 3095 ext4_free_blocks(handle, inode, NULL,
3036 EXT4_C2B(sbi, partial_cluster), 3096 EXT4_C2B(sbi, partial.pclu),
3037 sbi->s_cluster_ratio, 3097 sbi->s_cluster_ratio, flags);
3038 get_default_free_blocks_flags(inode)); 3098 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
3099 ext4_rereserve_cluster(inode, partial.lblk);
3100 partial.state = initial;
3039 } 3101 }
3040 3102
3041 /* TODO: flexible tree reduction should be here */ 3103 /* TODO: flexible tree reduction should be here */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e29fce2fbf25..e2248083cdca 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4915,9 +4915,17 @@ do_more:
4915 &sbi->s_flex_groups[flex_group].free_clusters); 4915 &sbi->s_flex_groups[flex_group].free_clusters);
4916 } 4916 }
4917 4917
4918 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) 4918 /*
4919 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); 4919 * on a bigalloc file system, defer the s_freeclusters_counter
4920 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); 4920 * update to the caller (ext4_remove_space and friends) so they
4921 * can determine if a cluster freed here should be rereserved
4922 */
4923 if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
4924 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4925 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4926 percpu_counter_add(&sbi->s_freeclusters_counter,
4927 count_clusters);
4928 }
4921 4929
4922 ext4_mb_unload_buddy(&e4b); 4930 ext4_mb_unload_buddy(&e4b);
4923 4931
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6d7a943f849c..698e0d8a5ca4 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -17,6 +17,7 @@ struct mpage_da_data;
17struct ext4_map_blocks; 17struct ext4_map_blocks;
18struct extent_status; 18struct extent_status;
19struct ext4_fsmap; 19struct ext4_fsmap;
20struct partial_cluster;
20 21
21#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) 22#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
22 23
@@ -2035,21 +2036,23 @@ TRACE_EVENT(ext4_ext_show_extent,
2035); 2036);
2036 2037
2037TRACE_EVENT(ext4_remove_blocks, 2038TRACE_EVENT(ext4_remove_blocks,
2038 TP_PROTO(struct inode *inode, struct ext4_extent *ex, 2039 TP_PROTO(struct inode *inode, struct ext4_extent *ex,
2039 ext4_lblk_t from, ext4_fsblk_t to, 2040 ext4_lblk_t from, ext4_fsblk_t to,
2040 long long partial_cluster), 2041 struct partial_cluster *pc),
2041 2042
2042 TP_ARGS(inode, ex, from, to, partial_cluster), 2043 TP_ARGS(inode, ex, from, to, pc),
2043 2044
2044 TP_STRUCT__entry( 2045 TP_STRUCT__entry(
2045 __field( dev_t, dev ) 2046 __field( dev_t, dev )
2046 __field( ino_t, ino ) 2047 __field( ino_t, ino )
2047 __field( ext4_lblk_t, from ) 2048 __field( ext4_lblk_t, from )
2048 __field( ext4_lblk_t, to ) 2049 __field( ext4_lblk_t, to )
2049 __field( long long, partial )
2050 __field( ext4_fsblk_t, ee_pblk ) 2050 __field( ext4_fsblk_t, ee_pblk )
2051 __field( ext4_lblk_t, ee_lblk ) 2051 __field( ext4_lblk_t, ee_lblk )
2052 __field( unsigned short, ee_len ) 2052 __field( unsigned short, ee_len )
2053 __field( ext4_fsblk_t, pc_pclu )
2054 __field( ext4_lblk_t, pc_lblk )
2055 __field( int, pc_state)
2053 ), 2056 ),
2054 2057
2055 TP_fast_assign( 2058 TP_fast_assign(
@@ -2057,14 +2060,16 @@ TRACE_EVENT(ext4_remove_blocks,
2057 __entry->ino = inode->i_ino; 2060 __entry->ino = inode->i_ino;
2058 __entry->from = from; 2061 __entry->from = from;
2059 __entry->to = to; 2062 __entry->to = to;
2060 __entry->partial = partial_cluster;
2061 __entry->ee_pblk = ext4_ext_pblock(ex); 2063 __entry->ee_pblk = ext4_ext_pblock(ex);
2062 __entry->ee_lblk = le32_to_cpu(ex->ee_block); 2064 __entry->ee_lblk = le32_to_cpu(ex->ee_block);
2063 __entry->ee_len = ext4_ext_get_actual_len(ex); 2065 __entry->ee_len = ext4_ext_get_actual_len(ex);
2066 __entry->pc_pclu = pc->pclu;
2067 __entry->pc_lblk = pc->lblk;
2068 __entry->pc_state = pc->state;
2064 ), 2069 ),
2065 2070
2066 TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" 2071 TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
2067 "from %u to %u partial_cluster %lld", 2072 "from %u to %u partial [pclu %lld lblk %u state %d]",
2068 MAJOR(__entry->dev), MINOR(__entry->dev), 2073 MAJOR(__entry->dev), MINOR(__entry->dev),
2069 (unsigned long) __entry->ino, 2074 (unsigned long) __entry->ino,
2070 (unsigned) __entry->ee_lblk, 2075 (unsigned) __entry->ee_lblk,
@@ -2072,45 +2077,53 @@ TRACE_EVENT(ext4_remove_blocks,
2072 (unsigned short) __entry->ee_len, 2077 (unsigned short) __entry->ee_len,
2073 (unsigned) __entry->from, 2078 (unsigned) __entry->from,
2074 (unsigned) __entry->to, 2079 (unsigned) __entry->to,
2075 (long long) __entry->partial) 2080 (long long) __entry->pc_pclu,
2081 (unsigned int) __entry->pc_lblk,
2082 (int) __entry->pc_state)
2076); 2083);
2077 2084
2078TRACE_EVENT(ext4_ext_rm_leaf, 2085TRACE_EVENT(ext4_ext_rm_leaf,
2079 TP_PROTO(struct inode *inode, ext4_lblk_t start, 2086 TP_PROTO(struct inode *inode, ext4_lblk_t start,
2080 struct ext4_extent *ex, 2087 struct ext4_extent *ex,
2081 long long partial_cluster), 2088 struct partial_cluster *pc),
2082 2089
2083 TP_ARGS(inode, start, ex, partial_cluster), 2090 TP_ARGS(inode, start, ex, pc),
2084 2091
2085 TP_STRUCT__entry( 2092 TP_STRUCT__entry(
2086 __field( dev_t, dev ) 2093 __field( dev_t, dev )
2087 __field( ino_t, ino ) 2094 __field( ino_t, ino )
2088 __field( long long, partial )
2089 __field( ext4_lblk_t, start ) 2095 __field( ext4_lblk_t, start )
2090 __field( ext4_lblk_t, ee_lblk ) 2096 __field( ext4_lblk_t, ee_lblk )
2091 __field( ext4_fsblk_t, ee_pblk ) 2097 __field( ext4_fsblk_t, ee_pblk )
2092 __field( short, ee_len ) 2098 __field( short, ee_len )
2099 __field( ext4_fsblk_t, pc_pclu )
2100 __field( ext4_lblk_t, pc_lblk )
2101 __field( int, pc_state)
2093 ), 2102 ),
2094 2103
2095 TP_fast_assign( 2104 TP_fast_assign(
2096 __entry->dev = inode->i_sb->s_dev; 2105 __entry->dev = inode->i_sb->s_dev;
2097 __entry->ino = inode->i_ino; 2106 __entry->ino = inode->i_ino;
2098 __entry->partial = partial_cluster;
2099 __entry->start = start; 2107 __entry->start = start;
2100 __entry->ee_lblk = le32_to_cpu(ex->ee_block); 2108 __entry->ee_lblk = le32_to_cpu(ex->ee_block);
2101 __entry->ee_pblk = ext4_ext_pblock(ex); 2109 __entry->ee_pblk = ext4_ext_pblock(ex);
2102 __entry->ee_len = ext4_ext_get_actual_len(ex); 2110 __entry->ee_len = ext4_ext_get_actual_len(ex);
2111 __entry->pc_pclu = pc->pclu;
2112 __entry->pc_lblk = pc->lblk;
2113 __entry->pc_state = pc->state;
2103 ), 2114 ),
2104 2115
2105 TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" 2116 TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
2106 "partial_cluster %lld", 2117 "partial [pclu %lld lblk %u state %d]",
2107 MAJOR(__entry->dev), MINOR(__entry->dev), 2118 MAJOR(__entry->dev), MINOR(__entry->dev),
2108 (unsigned long) __entry->ino, 2119 (unsigned long) __entry->ino,
2109 (unsigned) __entry->start, 2120 (unsigned) __entry->start,
2110 (unsigned) __entry->ee_lblk, 2121 (unsigned) __entry->ee_lblk,
2111 (unsigned long long) __entry->ee_pblk, 2122 (unsigned long long) __entry->ee_pblk,
2112 (unsigned short) __entry->ee_len, 2123 (unsigned short) __entry->ee_len,
2113 (long long) __entry->partial) 2124 (long long) __entry->pc_pclu,
2125 (unsigned int) __entry->pc_lblk,
2126 (int) __entry->pc_state)
2114); 2127);
2115 2128
2116TRACE_EVENT(ext4_ext_rm_idx, 2129TRACE_EVENT(ext4_ext_rm_idx,
@@ -2168,9 +2181,9 @@ TRACE_EVENT(ext4_ext_remove_space,
2168 2181
2169TRACE_EVENT(ext4_ext_remove_space_done, 2182TRACE_EVENT(ext4_ext_remove_space_done,
2170 TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, 2183 TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end,
2171 int depth, long long partial, __le16 eh_entries), 2184 int depth, struct partial_cluster *pc, __le16 eh_entries),
2172 2185
2173 TP_ARGS(inode, start, end, depth, partial, eh_entries), 2186 TP_ARGS(inode, start, end, depth, pc, eh_entries),
2174 2187
2175 TP_STRUCT__entry( 2188 TP_STRUCT__entry(
2176 __field( dev_t, dev ) 2189 __field( dev_t, dev )
@@ -2178,7 +2191,9 @@ TRACE_EVENT(ext4_ext_remove_space_done,
2178 __field( ext4_lblk_t, start ) 2191 __field( ext4_lblk_t, start )
2179 __field( ext4_lblk_t, end ) 2192 __field( ext4_lblk_t, end )
2180 __field( int, depth ) 2193 __field( int, depth )
2181 __field( long long, partial ) 2194 __field( ext4_fsblk_t, pc_pclu )
2195 __field( ext4_lblk_t, pc_lblk )
2196 __field( int, pc_state )
2182 __field( unsigned short, eh_entries ) 2197 __field( unsigned short, eh_entries )
2183 ), 2198 ),
2184 2199
@@ -2188,18 +2203,23 @@ TRACE_EVENT(ext4_ext_remove_space_done,
2188 __entry->start = start; 2203 __entry->start = start;
2189 __entry->end = end; 2204 __entry->end = end;
2190 __entry->depth = depth; 2205 __entry->depth = depth;
2191 __entry->partial = partial; 2206 __entry->pc_pclu = pc->pclu;
2207 __entry->pc_lblk = pc->lblk;
2208 __entry->pc_state = pc->state;
2192 __entry->eh_entries = le16_to_cpu(eh_entries); 2209 __entry->eh_entries = le16_to_cpu(eh_entries);
2193 ), 2210 ),
2194 2211
2195 TP_printk("dev %d,%d ino %lu since %u end %u depth %d partial %lld " 2212 TP_printk("dev %d,%d ino %lu since %u end %u depth %d "
2213 "partial [pclu %lld lblk %u state %d] "
2196 "remaining_entries %u", 2214 "remaining_entries %u",
2197 MAJOR(__entry->dev), MINOR(__entry->dev), 2215 MAJOR(__entry->dev), MINOR(__entry->dev),
2198 (unsigned long) __entry->ino, 2216 (unsigned long) __entry->ino,
2199 (unsigned) __entry->start, 2217 (unsigned) __entry->start,
2200 (unsigned) __entry->end, 2218 (unsigned) __entry->end,
2201 __entry->depth, 2219 __entry->depth,
2202 (long long) __entry->partial, 2220 (long long) __entry->pc_pclu,
2221 (unsigned int) __entry->pc_lblk,
2222 (int) __entry->pc_state,
2203 (unsigned short) __entry->eh_entries) 2223 (unsigned short) __entry->eh_entries)
2204); 2224);
2205 2225