diff options
author | Tao Ma <boyu.mt@taobao.com> | 2011-07-11 00:03:38 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-07-11 00:03:38 -0400 |
commit | 3d56b8d2c74cc3f375ce332b3ac3519e009d79ee (patch) | |
tree | 47c13dc80779a3d8d93902897a500b056375689c /fs/ext4 | |
parent | b3d4c2b10b68d205d3eb1b5c17dcb4649a502798 (diff) |
ext4: Speed up FITRIM by recording flags in ext4_group_info
In ext4, when FITRIM is called every time, we iterate all the
groups and do trim one by one. It is a bit time wasting if the
group has been trimmed and there is no change since the last
trim.
So this patch adds a new flag in ext4_group_info->bb_state to
indicate that the group has been trimmed, and it will be cleared
if some blocks is freed(in release_blocks_on_commit). Another
trim_minlen is added in ext4_sb_info to record the last minlen
we use to trim the volume, so that if the caller provide a small
one, we will go on the trim regardless of the bb_state.
A simple test with my intel x25m ssd:
df -h shows:
/dev/sdb1 40G 21G 17G 56% /mnt/ext4
Block size: 4096
run the FITRIM with the following parameter:
range.start = 0;
range.len = UINT64_MAX;
range.minlen = 1048576;
without the patch:
[root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
real 0m5.505s
user 0m0.000s
sys 0m1.224s
[root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
real 0m5.359s
user 0m0.000s
sys 0m1.178s
[root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
real 0m5.228s
user 0m0.000s
sys 0m1.151s
with the patch:
[root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
real 0m5.625s
user 0m0.000s
sys 0m1.269s
[root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
real 0m0.002s
user 0m0.000s
sys 0m0.001s
[root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
real 0m0.002s
user 0m0.000s
sys 0m0.001s
A big improvement for the 2nd and 3rd run.
Even after I delete some big image files, it is still much
faster than iterating the whole disk.
[root@boyu-tm test]# time ./ftrim /mnt/ext4/a
real 0m1.217s
user 0m0.000s
sys 0m0.196s
Cc: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Andreas Dilger <adilger.kernel@dilger.ca>
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 13 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 20 |
2 files changed, 32 insertions, 1 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d13f3b509886..62cee2b6fe79 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1215,6 +1215,9 @@ struct ext4_sb_info { | |||
1215 | 1215 | ||
1216 | /* Kernel thread for multiple mount protection */ | 1216 | /* Kernel thread for multiple mount protection */ |
1217 | struct task_struct *s_mmp_tsk; | 1217 | struct task_struct *s_mmp_tsk; |
1218 | |||
1219 | /* record the last minlen when FITRIM is called. */ | ||
1220 | atomic_t s_last_trim_minblks; | ||
1218 | }; | 1221 | }; |
1219 | 1222 | ||
1220 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1223 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -2080,11 +2083,19 @@ struct ext4_group_info { | |||
2080 | * 5 free 8-block regions. */ | 2083 | * 5 free 8-block regions. */ |
2081 | }; | 2084 | }; |
2082 | 2085 | ||
2083 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 2086 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
2087 | #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 | ||
2084 | 2088 | ||
2085 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 2089 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
2086 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 2090 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
2087 | 2091 | ||
2092 | #define EXT4_MB_GRP_WAS_TRIMMED(grp) \ | ||
2093 | (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | ||
2094 | #define EXT4_MB_GRP_SET_TRIMMED(grp) \ | ||
2095 | (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | ||
2096 | #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ | ||
2097 | (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | ||
2098 | |||
2088 | #define EXT4_MAX_CONTENTION 8 | 2099 | #define EXT4_MAX_CONTENTION 8 |
2089 | #define EXT4_CONTENTION_THRESHOLD 2 | 2100 | #define EXT4_CONTENTION_THRESHOLD 2 |
2090 | 2101 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7aa4c16caca1..73c254085a41 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2628,6 +2628,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2628 | rb_erase(&entry->node, &(db->bb_free_root)); | 2628 | rb_erase(&entry->node, &(db->bb_free_root)); |
2629 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); | 2629 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
2630 | 2630 | ||
2631 | /* | ||
2632 | * Clear the trimmed flag for the group so that the next | ||
2633 | * ext4_trim_fs can trim it. | ||
2634 | * If the volume is mounted with -o discard, online discard | ||
2635 | * is supported and the free blocks will be trimmed online. | ||
2636 | */ | ||
2637 | if (!test_opt(sb, DISCARD)) | ||
2638 | EXT4_MB_GRP_CLEAR_TRIMMED(db); | ||
2639 | |||
2631 | if (!db->bb_free_root.rb_node) { | 2640 | if (!db->bb_free_root.rb_node) { |
2632 | /* No more items in the per group rb tree | 2641 | /* No more items in the per group rb tree |
2633 | * balance refcounts from ext4_mb_free_metadata() | 2642 | * balance refcounts from ext4_mb_free_metadata() |
@@ -4838,6 +4847,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | |||
4838 | bitmap = e4b.bd_bitmap; | 4847 | bitmap = e4b.bd_bitmap; |
4839 | 4848 | ||
4840 | ext4_lock_group(sb, group); | 4849 | ext4_lock_group(sb, group); |
4850 | if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && | ||
4851 | minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) | ||
4852 | goto out; | ||
4853 | |||
4841 | start = (e4b.bd_info->bb_first_free > start) ? | 4854 | start = (e4b.bd_info->bb_first_free > start) ? |
4842 | e4b.bd_info->bb_first_free : start; | 4855 | e4b.bd_info->bb_first_free : start; |
4843 | 4856 | ||
@@ -4869,6 +4882,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | |||
4869 | if ((e4b.bd_info->bb_free - free_count) < minblocks) | 4882 | if ((e4b.bd_info->bb_free - free_count) < minblocks) |
4870 | break; | 4883 | break; |
4871 | } | 4884 | } |
4885 | |||
4886 | if (!ret) | ||
4887 | EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); | ||
4888 | out: | ||
4872 | ext4_unlock_group(sb, group); | 4889 | ext4_unlock_group(sb, group); |
4873 | ext4_mb_unload_buddy(&e4b); | 4890 | ext4_mb_unload_buddy(&e4b); |
4874 | 4891 | ||
@@ -4957,6 +4974,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4957 | } | 4974 | } |
4958 | range->len = trimmed * sb->s_blocksize; | 4975 | range->len = trimmed * sb->s_blocksize; |
4959 | 4976 | ||
4977 | if (!ret) | ||
4978 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); | ||
4979 | |||
4960 | out: | 4980 | out: |
4961 | return ret; | 4981 | return ret; |
4962 | } | 4982 | } |