aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2013-05-27 23:33:35 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-05-27 23:33:35 -0400
commitd23142c6271c499d913d0d5e668b5a4eb6dafcb0 (patch)
tree71cd768dc0d4dfc9034187393f6084042f775b57
parent61801325f790ea15ba0630a7a26bd80a0741813f (diff)
ext4: make punch hole code path work with bigalloc
Currently punch hole is disabled in file systems with bigalloc feature enabled. However the recent changes in punch hole patch should make it easier to support punching holes on bigalloc enabled file systems. This commit changes partial_cluster handling in ext4_remove_blocks(), ext4_ext_rm_leaf() and ext4_ext_remove_space(). Currently partial_cluster is unsigned long long type and it makes sure that we will free the partial cluster if all extents has been released from that cluster. However it has been specifically designed only for truncate. With punch hole we can be freeing just some extents in the cluster leaving the rest untouched. So we have to make sure that we will notice cluster which still has some extents. To do this I've changed partial_cluster to be signed long long type. The only scenario where this could be a problem is when cluster_size == block size, however in that case there would not be any partial clusters so we're safe. For bigger clusters the signed type is enough. Now we use the negative value in partial_cluster to mark such cluster used, hence we know that we must not free it even if all other extents has been freed from such cluster. This scenario can be described in simple diagram: |FFF...FF..FF.UUU| ^----------^ punch hole . - free space | - cluster boundary F - freed extent U - used extent Also update respective tracepoints to use signed long long type for partial_cluster. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--fs/ext4/extents.c69
-rw-r--r--include/trace/events/ext4.h25
2 files changed, 64 insertions, 30 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index fb9b41483c86..214e68a5e79f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2359,7 +2359,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
2359 2359
2360static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 2360static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2361 struct ext4_extent *ex, 2361 struct ext4_extent *ex,
2362 ext4_fsblk_t *partial_cluster, 2362 long long *partial_cluster,
2363 ext4_lblk_t from, ext4_lblk_t to) 2363 ext4_lblk_t from, ext4_lblk_t to)
2364{ 2364{
2365 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2365 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2388,7 +2388,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2388 * partial cluster here. 2388 * partial cluster here.
2389 */ 2389 */
2390 pblk = ext4_ext_pblock(ex) + ee_len - 1; 2390 pblk = ext4_ext_pblock(ex) + ee_len - 1;
2391 if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { 2391 if ((*partial_cluster > 0) &&
2392 (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
2392 ext4_free_blocks(handle, inode, NULL, 2393 ext4_free_blocks(handle, inode, NULL,
2393 EXT4_C2B(sbi, *partial_cluster), 2394 EXT4_C2B(sbi, *partial_cluster),
2394 sbi->s_cluster_ratio, flags); 2395 sbi->s_cluster_ratio, flags);
@@ -2414,23 +2415,41 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2414 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { 2415 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
2415 /* tail removal */ 2416 /* tail removal */
2416 ext4_lblk_t num; 2417 ext4_lblk_t num;
2418 unsigned int unaligned;
2417 2419
2418 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2420 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2419 pblk = ext4_ext_pblock(ex) + ee_len - num; 2421 pblk = ext4_ext_pblock(ex) + ee_len - num;
2420 ext_debug("free last %u blocks starting %llu\n", num, pblk); 2422 /*
2423 * Usually we want to free partial cluster at the end of the
2424 * extent, except for the situation when the cluster is still
2425 * used by any other extent (partial_cluster is negative).
2426 */
2427 if (*partial_cluster < 0 &&
2428 -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1))
2429 flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
2430
2431 ext_debug("free last %u blocks starting %llu partial %lld\n",
2432 num, pblk, *partial_cluster);
2421 ext4_free_blocks(handle, inode, NULL, pblk, num, flags); 2433 ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2422 /* 2434 /*
2423 * If the block range to be freed didn't start at the 2435 * If the block range to be freed didn't start at the
2424 * beginning of a cluster, and we removed the entire 2436 * beginning of a cluster, and we removed the entire
2425 * extent, save the partial cluster here, since we 2437 * extent and the cluster is not used by any other extent,
2426 * might need to delete if we determine that the 2438 * save the partial cluster here, since we might need to
2427 * truncate operation has removed all of the blocks in 2439 * delete if we determine that the truncate operation has
2428 * the cluster. 2440 * removed all of the blocks in the cluster.
2441 *
2442 * On the other hand, if we did not manage to free the whole
2443 * extent, we have to mark the cluster as used (store negative
2444 * cluster number in partial_cluster).
2429 */ 2445 */
2430 if (pblk & (sbi->s_cluster_ratio - 1) && 2446 unaligned = pblk & (sbi->s_cluster_ratio - 1);
2431 (ee_len == num)) 2447 if (unaligned && (ee_len == num) &&
2448 (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
2432 *partial_cluster = EXT4_B2C(sbi, pblk); 2449 *partial_cluster = EXT4_B2C(sbi, pblk);
2433 else 2450 else if (unaligned)
2451 *partial_cluster = -((long long)EXT4_B2C(sbi, pblk));
2452 else if (*partial_cluster > 0)
2434 *partial_cluster = 0; 2453 *partial_cluster = 0;
2435 } else 2454 } else
2436 ext4_error(sbi->s_sb, "strange request: removal(2) " 2455 ext4_error(sbi->s_sb, "strange request: removal(2) "
@@ -2448,12 +2467,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2448 * @handle: The journal handle 2467 * @handle: The journal handle
2449 * @inode: The files inode 2468 * @inode: The files inode
2450 * @path: The path to the leaf 2469 * @path: The path to the leaf
2470 * @partial_cluster: The cluster which we'll have to free if all extents
2471 * has been released from it. It gets negative in case
2472 * that the cluster is still used.
2451 * @start: The first block to remove 2473 * @start: The first block to remove
2452 * @end: The last block to remove 2474 * @end: The last block to remove
2453 */ 2475 */
2454static int 2476static int
2455ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2477ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2456 struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, 2478 struct ext4_ext_path *path,
2479 long long *partial_cluster,
2457 ext4_lblk_t start, ext4_lblk_t end) 2480 ext4_lblk_t start, ext4_lblk_t end)
2458{ 2481{
2459 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2482 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2466,6 +2489,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2466 unsigned short ex_ee_len; 2489 unsigned short ex_ee_len;
2467 unsigned uninitialized = 0; 2490 unsigned uninitialized = 0;
2468 struct ext4_extent *ex; 2491 struct ext4_extent *ex;
2492 ext4_fsblk_t pblk;
2469 2493
2470 /* the header must be checked already in ext4_ext_remove_space() */ 2494 /* the header must be checked already in ext4_ext_remove_space() */
2471 ext_debug("truncate since %u in leaf to %u\n", start, end); 2495 ext_debug("truncate since %u in leaf to %u\n", start, end);
@@ -2504,6 +2528,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2504 2528
2505 /* If this extent is beyond the end of the hole, skip it */ 2529 /* If this extent is beyond the end of the hole, skip it */
2506 if (end < ex_ee_block) { 2530 if (end < ex_ee_block) {
2531 /*
2532 * We're going to skip this extent and move to another,
2533 * so if this extent is not cluster aligned we have
2534 * to mark the current cluster as used to avoid
2535 * accidentally freeing it later on
2536 */
2537 pblk = ext4_ext_pblock(ex);
2538 if (pblk & (sbi->s_cluster_ratio - 1))
2539 *partial_cluster =
2540 -((long long)EXT4_B2C(sbi, pblk));
2507 ex--; 2541 ex--;
2508 ex_ee_block = le32_to_cpu(ex->ee_block); 2542 ex_ee_block = le32_to_cpu(ex->ee_block);
2509 ex_ee_len = ext4_ext_get_actual_len(ex); 2543 ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2579,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2579 sizeof(struct ext4_extent)); 2613 sizeof(struct ext4_extent));
2580 } 2614 }
2581 le16_add_cpu(&eh->eh_entries, -1); 2615 le16_add_cpu(&eh->eh_entries, -1);
2582 } else 2616 } else if (*partial_cluster > 0)
2583 *partial_cluster = 0; 2617 *partial_cluster = 0;
2584 2618
2585 err = ext4_ext_dirty(handle, inode, path + depth); 2619 err = ext4_ext_dirty(handle, inode, path + depth);
@@ -2597,11 +2631,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2597 err = ext4_ext_correct_indexes(handle, inode, path); 2631 err = ext4_ext_correct_indexes(handle, inode, path);
2598 2632
2599 /* 2633 /*
2600 * If there is still a entry in the leaf node, check to see if 2634 * Free the partial cluster only if the current extent does not
2601 * it references the partial cluster. This is the only place 2635 * reference it. Otherwise we might free used cluster.
2602 * where it could; if it doesn't, we can free the cluster.
2603 */ 2636 */
2604 if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && 2637 if (*partial_cluster > 0 &&
2605 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != 2638 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
2606 *partial_cluster)) { 2639 *partial_cluster)) {
2607 int flags = EXT4_FREE_BLOCKS_FORGET; 2640 int flags = EXT4_FREE_BLOCKS_FORGET;
@@ -2651,7 +2684,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2651 struct super_block *sb = inode->i_sb; 2684 struct super_block *sb = inode->i_sb;
2652 int depth = ext_depth(inode); 2685 int depth = ext_depth(inode);
2653 struct ext4_ext_path *path = NULL; 2686 struct ext4_ext_path *path = NULL;
2654 ext4_fsblk_t partial_cluster = 0; 2687 long long partial_cluster = 0;
2655 handle_t *handle; 2688 handle_t *handle;
2656 int i = 0, err = 0; 2689 int i = 0, err = 0;
2657 2690
@@ -2837,7 +2870,7 @@ again:
2837 /* If we still have something in the partial cluster and we have removed 2870 /* If we still have something in the partial cluster and we have removed
2838 * even the first extent, then we should free the blocks in the partial 2871 * even the first extent, then we should free the blocks in the partial
2839 * cluster as well. */ 2872 * cluster as well. */
2840 if (partial_cluster && path->p_hdr->eh_entries == 0) { 2873 if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) {
2841 int flags = EXT4_FREE_BLOCKS_FORGET; 2874 int flags = EXT4_FREE_BLOCKS_FORGET;
2842 2875
2843 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2876 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index bcb5a021945c..e23b2188110a 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -1928,7 +1928,7 @@ TRACE_EVENT(ext4_ext_show_extent,
1928TRACE_EVENT(ext4_remove_blocks, 1928TRACE_EVENT(ext4_remove_blocks,
1929 TP_PROTO(struct inode *inode, struct ext4_extent *ex, 1929 TP_PROTO(struct inode *inode, struct ext4_extent *ex,
1930 ext4_lblk_t from, ext4_fsblk_t to, 1930 ext4_lblk_t from, ext4_fsblk_t to,
1931 ext4_fsblk_t partial_cluster), 1931 long long partial_cluster),
1932 1932
1933 TP_ARGS(inode, ex, from, to, partial_cluster), 1933 TP_ARGS(inode, ex, from, to, partial_cluster),
1934 1934
@@ -1937,7 +1937,7 @@ TRACE_EVENT(ext4_remove_blocks,
1937 __field( ino_t, ino ) 1937 __field( ino_t, ino )
1938 __field( ext4_lblk_t, from ) 1938 __field( ext4_lblk_t, from )
1939 __field( ext4_lblk_t, to ) 1939 __field( ext4_lblk_t, to )
1940 __field( ext4_fsblk_t, partial ) 1940 __field( long long, partial )
1941 __field( ext4_fsblk_t, ee_pblk ) 1941 __field( ext4_fsblk_t, ee_pblk )
1942 __field( ext4_lblk_t, ee_lblk ) 1942 __field( ext4_lblk_t, ee_lblk )
1943 __field( unsigned short, ee_len ) 1943 __field( unsigned short, ee_len )
@@ -1955,7 +1955,7 @@ TRACE_EVENT(ext4_remove_blocks,
1955 ), 1955 ),
1956 1956
1957 TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" 1957 TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
1958 "from %u to %u partial_cluster %u", 1958 "from %u to %u partial_cluster %lld",
1959 MAJOR(__entry->dev), MINOR(__entry->dev), 1959 MAJOR(__entry->dev), MINOR(__entry->dev),
1960 (unsigned long) __entry->ino, 1960 (unsigned long) __entry->ino,
1961 (unsigned) __entry->ee_lblk, 1961 (unsigned) __entry->ee_lblk,
@@ -1963,19 +1963,20 @@ TRACE_EVENT(ext4_remove_blocks,
1963 (unsigned short) __entry->ee_len, 1963 (unsigned short) __entry->ee_len,
1964 (unsigned) __entry->from, 1964 (unsigned) __entry->from,
1965 (unsigned) __entry->to, 1965 (unsigned) __entry->to,
1966 (unsigned) __entry->partial) 1966 (long long) __entry->partial)
1967); 1967);
1968 1968
1969TRACE_EVENT(ext4_ext_rm_leaf, 1969TRACE_EVENT(ext4_ext_rm_leaf,
1970 TP_PROTO(struct inode *inode, ext4_lblk_t start, 1970 TP_PROTO(struct inode *inode, ext4_lblk_t start,
1971 struct ext4_extent *ex, ext4_fsblk_t partial_cluster), 1971 struct ext4_extent *ex,
1972 long long partial_cluster),
1972 1973
1973 TP_ARGS(inode, start, ex, partial_cluster), 1974 TP_ARGS(inode, start, ex, partial_cluster),
1974 1975
1975 TP_STRUCT__entry( 1976 TP_STRUCT__entry(
1976 __field( dev_t, dev ) 1977 __field( dev_t, dev )
1977 __field( ino_t, ino ) 1978 __field( ino_t, ino )
1978 __field( ext4_fsblk_t, partial ) 1979 __field( long long, partial )
1979 __field( ext4_lblk_t, start ) 1980 __field( ext4_lblk_t, start )
1980 __field( ext4_lblk_t, ee_lblk ) 1981 __field( ext4_lblk_t, ee_lblk )
1981 __field( ext4_fsblk_t, ee_pblk ) 1982 __field( ext4_fsblk_t, ee_pblk )
@@ -1993,14 +1994,14 @@ TRACE_EVENT(ext4_ext_rm_leaf,
1993 ), 1994 ),
1994 1995
1995 TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" 1996 TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
1996 "partial_cluster %u", 1997 "partial_cluster %lld",
1997 MAJOR(__entry->dev), MINOR(__entry->dev), 1998 MAJOR(__entry->dev), MINOR(__entry->dev),
1998 (unsigned long) __entry->ino, 1999 (unsigned long) __entry->ino,
1999 (unsigned) __entry->start, 2000 (unsigned) __entry->start,
2000 (unsigned) __entry->ee_lblk, 2001 (unsigned) __entry->ee_lblk,
2001 (unsigned long long) __entry->ee_pblk, 2002 (unsigned long long) __entry->ee_pblk,
2002 (unsigned short) __entry->ee_len, 2003 (unsigned short) __entry->ee_len,
2003 (unsigned) __entry->partial) 2004 (long long) __entry->partial)
2004); 2005);
2005 2006
2006TRACE_EVENT(ext4_ext_rm_idx, 2007TRACE_EVENT(ext4_ext_rm_idx,
@@ -2058,7 +2059,7 @@ TRACE_EVENT(ext4_ext_remove_space,
2058 2059
2059TRACE_EVENT(ext4_ext_remove_space_done, 2060TRACE_EVENT(ext4_ext_remove_space_done,
2060 TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, 2061 TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end,
2061 int depth, ext4_lblk_t partial, __le16 eh_entries), 2062 int depth, long long partial, __le16 eh_entries),
2062 2063
2063 TP_ARGS(inode, start, end, depth, partial, eh_entries), 2064 TP_ARGS(inode, start, end, depth, partial, eh_entries),
2064 2065
@@ -2068,7 +2069,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
2068 __field( ext4_lblk_t, start ) 2069 __field( ext4_lblk_t, start )
2069 __field( ext4_lblk_t, end ) 2070 __field( ext4_lblk_t, end )
2070 __field( int, depth ) 2071 __field( int, depth )
2071 __field( ext4_lblk_t, partial ) 2072 __field( long long, partial )
2072 __field( unsigned short, eh_entries ) 2073 __field( unsigned short, eh_entries )
2073 ), 2074 ),
2074 2075
@@ -2082,14 +2083,14 @@ TRACE_EVENT(ext4_ext_remove_space_done,
2082 __entry->eh_entries = le16_to_cpu(eh_entries); 2083 __entry->eh_entries = le16_to_cpu(eh_entries);
2083 ), 2084 ),
2084 2085
2085 TP_printk("dev %d,%d ino %lu since %u end %u depth %d partial %u " 2086 TP_printk("dev %d,%d ino %lu since %u end %u depth %d partial %lld "
2086 "remaining_entries %u", 2087 "remaining_entries %u",
2087 MAJOR(__entry->dev), MINOR(__entry->dev), 2088 MAJOR(__entry->dev), MINOR(__entry->dev),
2088 (unsigned long) __entry->ino, 2089 (unsigned long) __entry->ino,
2089 (unsigned) __entry->start, 2090 (unsigned) __entry->start,
2090 (unsigned) __entry->end, 2091 (unsigned) __entry->end,
2091 __entry->depth, 2092 __entry->depth,
2092 (unsigned) __entry->partial, 2093 (long long) __entry->partial,
2093 (unsigned short) __entry->eh_entries) 2094 (unsigned short) __entry->eh_entries)
2094); 2095);
2095 2096