diff options
author | Theodore Ts'o <tytso@mit.edu> | 2009-01-03 20:27:38 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2009-01-03 20:27:38 -0500 |
commit | 30773840c19cea60dcef39545960d541b1ac1cf8 (patch) | |
tree | f220a2dce451a40dc7264e8fd70c77c5a3908873 | |
parent | d7cfa4684d82f58e5d7cb73b8a3c88c169937f25 (diff) |
ext4: add fsync batch tuning knobs
Add new mount options, min_batch_time and max_batch_time, which
controls how long the jbd2 layer should wait for additional filesystem
operations to get batched with a synchronous write transaction.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | Documentation/filesystems/ext4.txt | 29 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 7 | ||||
-rw-r--r-- | fs/ext4/ext4_sb.h | 2 | ||||
-rw-r--r-- | fs/ext4/super.c | 47 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 2 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 4 | ||||
-rw-r--r-- | include/linux/jbd2.h | 8 |
7 files changed, 91 insertions, 8 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index f75ab101c00a..e3fcbea3ec8c 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -283,6 +283,35 @@ delalloc (*) Deferring block allocation until write-out time. | |||
283 | nodelalloc Disable delayed allocation. Blocks are allocation | 283 | nodelalloc Disable delayed allocation. Blocks are allocation |
284 | when data is copied from user to page cache. | 284 | when data is copied from user to page cache. |
285 | 285 | ||
286 | max_batch_time=usec Maximum amount of time ext4 should wait for | ||
287 | additional filesystem operations to be batch | ||
288 | together with a synchronous write operation. | ||
289 | Since a synchronous write operation is going to | ||
290 | force a commit and then a wait for the I/O | ||
291 | complete, it doesn't cost much, and can be a | ||
292 | huge throughput win, we wait for a small amount | ||
293 | of time to see if any other transactions can | ||
294 | piggyback on the synchronous write. The | ||
295 | algorithm used is designed to automatically tune | ||
296 | for the speed of the disk, by measuring the | ||
297 | amount of time (on average) that it takes to | ||
298 | finish committing a transaction. Call this time | ||
299 | the "commit time". If the time that the | ||
300 | transactoin has been running is less than the | ||
301 | commit time, ext4 will try sleeping for the | ||
302 | commit time to see if other operations will join | ||
303 | the transaction. The commit time is capped by | ||
304 | the max_batch_time, which defaults to 15000us | ||
305 | (15ms). This optimization can be turned off | ||
306 | entirely by setting max_batch_time to 0. | ||
307 | |||
308 | min_batch_time=usec This parameter sets the commit time (as | ||
309 | described above) to be at least min_batch_time. | ||
310 | It defaults to zero microseconds. Increasing | ||
311 | this parameter may improve the throughput of | ||
312 | multi-threaded, synchronous workloads on very | ||
313 | fast disks, at the cost of increasing latency. | ||
314 | |||
286 | Data Mode | 315 | Data Mode |
287 | ========= | 316 | ========= |
288 | There are 3 different data modes: | 317 | There are 3 different data modes: |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ac8551e0b70a..9ba9fd6d14da 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -328,6 +328,7 @@ struct ext4_mount_options { | |||
328 | uid_t s_resuid; | 328 | uid_t s_resuid; |
329 | gid_t s_resgid; | 329 | gid_t s_resgid; |
330 | unsigned long s_commit_interval; | 330 | unsigned long s_commit_interval; |
331 | u32 s_min_batch_time, s_max_batch_time; | ||
331 | #ifdef CONFIG_QUOTA | 332 | #ifdef CONFIG_QUOTA |
332 | int s_jquota_fmt; | 333 | int s_jquota_fmt; |
333 | char *s_qf_names[MAXQUOTAS]; | 334 | char *s_qf_names[MAXQUOTAS]; |
@@ -806,6 +807,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
806 | #define EXT4_DEFM_JMODE_WBACK 0x0060 | 807 | #define EXT4_DEFM_JMODE_WBACK 0x0060 |
807 | 808 | ||
808 | /* | 809 | /* |
810 | * Default journal batch times | ||
811 | */ | ||
812 | #define EXT4_DEF_MIN_BATCH_TIME 0 | ||
813 | #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ | ||
814 | |||
815 | /* | ||
809 | * Structure of a directory entry | 816 | * Structure of a directory entry |
810 | */ | 817 | */ |
811 | #define EXT4_NAME_LEN 255 | 818 | #define EXT4_NAME_LEN 255 |
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 3db800f399a6..039b6ea1a042 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
@@ -74,6 +74,8 @@ struct ext4_sb_info { | |||
74 | struct journal_s *s_journal; | 74 | struct journal_s *s_journal; |
75 | struct list_head s_orphan; | 75 | struct list_head s_orphan; |
76 | unsigned long s_commit_interval; | 76 | unsigned long s_commit_interval; |
77 | u32 s_max_batch_time; | ||
78 | u32 s_min_batch_time; | ||
77 | struct block_device *journal_bdev; | 79 | struct block_device *journal_bdev; |
78 | #ifdef CONFIG_JBD2_DEBUG | 80 | #ifdef CONFIG_JBD2_DEBUG |
79 | struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ | 81 | struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dc27d4c613c0..da377f9521bb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -705,10 +705,19 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
705 | #endif | 705 | #endif |
706 | if (!test_opt(sb, RESERVATION)) | 706 | if (!test_opt(sb, RESERVATION)) |
707 | seq_puts(seq, ",noreservation"); | 707 | seq_puts(seq, ",noreservation"); |
708 | if (sbi->s_commit_interval) { | 708 | if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { |
709 | seq_printf(seq, ",commit=%u", | 709 | seq_printf(seq, ",commit=%u", |
710 | (unsigned) (sbi->s_commit_interval / HZ)); | 710 | (unsigned) (sbi->s_commit_interval / HZ)); |
711 | } | 711 | } |
712 | if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { | ||
713 | seq_printf(seq, ",min_batch_time=%u", | ||
714 | (unsigned) sbi->s_min_batch_time); | ||
715 | } | ||
716 | if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { | ||
717 | seq_printf(seq, ",max_batch_time=%u", | ||
718 | (unsigned) sbi->s_min_batch_time); | ||
719 | } | ||
720 | |||
712 | /* | 721 | /* |
713 | * We're changing the default of barrier mount option, so | 722 | * We're changing the default of barrier mount option, so |
714 | * let's always display its mount state so it's clear what its | 723 | * let's always display its mount state so it's clear what its |
@@ -874,7 +883,8 @@ enum { | |||
874 | Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, | 883 | Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, |
875 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 884 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
876 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 885 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
877 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 886 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, |
887 | Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | ||
878 | Opt_journal_checksum, Opt_journal_async_commit, | 888 | Opt_journal_checksum, Opt_journal_async_commit, |
879 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 889 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
880 | Opt_data_err_abort, Opt_data_err_ignore, | 890 | Opt_data_err_abort, Opt_data_err_ignore, |
@@ -913,6 +923,8 @@ static const match_table_t tokens = { | |||
913 | {Opt_nobh, "nobh"}, | 923 | {Opt_nobh, "nobh"}, |
914 | {Opt_bh, "bh"}, | 924 | {Opt_bh, "bh"}, |
915 | {Opt_commit, "commit=%u"}, | 925 | {Opt_commit, "commit=%u"}, |
926 | {Opt_min_batch_time, "min_batch_time=%u"}, | ||
927 | {Opt_max_batch_time, "max_batch_time=%u"}, | ||
916 | {Opt_journal_update, "journal=update"}, | 928 | {Opt_journal_update, "journal=update"}, |
917 | {Opt_journal_inum, "journal=%u"}, | 929 | {Opt_journal_inum, "journal=%u"}, |
918 | {Opt_journal_dev, "journal_dev=%u"}, | 930 | {Opt_journal_dev, "journal_dev=%u"}, |
@@ -1131,6 +1143,22 @@ static int parse_options(char *options, struct super_block *sb, | |||
1131 | option = JBD2_DEFAULT_MAX_COMMIT_AGE; | 1143 | option = JBD2_DEFAULT_MAX_COMMIT_AGE; |
1132 | sbi->s_commit_interval = HZ * option; | 1144 | sbi->s_commit_interval = HZ * option; |
1133 | break; | 1145 | break; |
1146 | case Opt_max_batch_time: | ||
1147 | if (match_int(&args[0], &option)) | ||
1148 | return 0; | ||
1149 | if (option < 0) | ||
1150 | return 0; | ||
1151 | if (option == 0) | ||
1152 | option = EXT4_DEF_MAX_BATCH_TIME; | ||
1153 | sbi->s_max_batch_time = option; | ||
1154 | break; | ||
1155 | case Opt_min_batch_time: | ||
1156 | if (match_int(&args[0], &option)) | ||
1157 | return 0; | ||
1158 | if (option < 0) | ||
1159 | return 0; | ||
1160 | sbi->s_min_batch_time = option; | ||
1161 | break; | ||
1134 | case Opt_data_journal: | 1162 | case Opt_data_journal: |
1135 | data_opt = EXT4_MOUNT_JOURNAL_DATA; | 1163 | data_opt = EXT4_MOUNT_JOURNAL_DATA; |
1136 | goto datacheck; | 1164 | goto datacheck; |
@@ -1979,6 +2007,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1979 | 2007 | ||
1980 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); | 2008 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); |
1981 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); | 2009 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); |
2010 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; | ||
2011 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; | ||
2012 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; | ||
1982 | 2013 | ||
1983 | set_opt(sbi->s_mount_opt, RESERVATION); | 2014 | set_opt(sbi->s_mount_opt, RESERVATION); |
1984 | set_opt(sbi->s_mount_opt, BARRIER); | 2015 | set_opt(sbi->s_mount_opt, BARRIER); |
@@ -2524,11 +2555,9 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
2524 | { | 2555 | { |
2525 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2556 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2526 | 2557 | ||
2527 | if (sbi->s_commit_interval) | 2558 | journal->j_commit_interval = sbi->s_commit_interval; |
2528 | journal->j_commit_interval = sbi->s_commit_interval; | 2559 | journal->j_min_batch_time = sbi->s_min_batch_time; |
2529 | /* We could also set up an ext4-specific default for the commit | 2560 | journal->j_max_batch_time = sbi->s_max_batch_time; |
2530 | * interval here, but for now we'll just fall back to the jbd | ||
2531 | * default. */ | ||
2532 | 2561 | ||
2533 | spin_lock(&journal->j_state_lock); | 2562 | spin_lock(&journal->j_state_lock); |
2534 | if (test_opt(sb, BARRIER)) | 2563 | if (test_opt(sb, BARRIER)) |
@@ -3042,6 +3071,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3042 | old_opts.s_resuid = sbi->s_resuid; | 3071 | old_opts.s_resuid = sbi->s_resuid; |
3043 | old_opts.s_resgid = sbi->s_resgid; | 3072 | old_opts.s_resgid = sbi->s_resgid; |
3044 | old_opts.s_commit_interval = sbi->s_commit_interval; | 3073 | old_opts.s_commit_interval = sbi->s_commit_interval; |
3074 | old_opts.s_min_batch_time = sbi->s_min_batch_time; | ||
3075 | old_opts.s_max_batch_time = sbi->s_max_batch_time; | ||
3045 | #ifdef CONFIG_QUOTA | 3076 | #ifdef CONFIG_QUOTA |
3046 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; | 3077 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; |
3047 | for (i = 0; i < MAXQUOTAS; i++) | 3078 | for (i = 0; i < MAXQUOTAS; i++) |
@@ -3178,6 +3209,8 @@ restore_opts: | |||
3178 | sbi->s_resuid = old_opts.s_resuid; | 3209 | sbi->s_resuid = old_opts.s_resuid; |
3179 | sbi->s_resgid = old_opts.s_resgid; | 3210 | sbi->s_resgid = old_opts.s_resgid; |
3180 | sbi->s_commit_interval = old_opts.s_commit_interval; | 3211 | sbi->s_commit_interval = old_opts.s_commit_interval; |
3212 | sbi->s_min_batch_time = old_opts.s_min_batch_time; | ||
3213 | sbi->s_max_batch_time = old_opts.s_max_batch_time; | ||
3181 | #ifdef CONFIG_QUOTA | 3214 | #ifdef CONFIG_QUOTA |
3182 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; | 3215 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; |
3183 | for (i = 0; i < MAXQUOTAS; i++) { | 3216 | for (i = 0; i < MAXQUOTAS; i++) { |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 74d87290381c..fd1d7557a098 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -964,6 +964,8 @@ static journal_t * journal_init_common (void) | |||
964 | spin_lock_init(&journal->j_state_lock); | 964 | spin_lock_init(&journal->j_state_lock); |
965 | 965 | ||
966 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); | 966 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); |
967 | journal->j_min_batch_time = 0; | ||
968 | journal->j_max_batch_time = 15000; /* 15ms */ | ||
967 | 969 | ||
968 | /* The journal is marked for error until we succeed with recovery! */ | 970 | /* The journal is marked for error until we succeed with recovery! */ |
969 | journal->j_flags = JBD2_ABORT; | 971 | journal->j_flags = JBD2_ABORT; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 13dcbc990f41..48c21bac5a56 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1255,8 +1255,10 @@ int jbd2_journal_stop(handle_t *handle) | |||
1255 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | 1255 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), |
1256 | transaction->t_start_time)); | 1256 | transaction->t_start_time)); |
1257 | 1257 | ||
1258 | commit_time = max_t(u64, commit_time, | ||
1259 | 1000*journal->j_min_batch_time); | ||
1258 | commit_time = min_t(u64, commit_time, | 1260 | commit_time = min_t(u64, commit_time, |
1259 | 1000*jiffies_to_usecs(1)); | 1261 | 1000*journal->j_max_batch_time); |
1260 | 1262 | ||
1261 | if (trans_time < commit_time) { | 1263 | if (trans_time < commit_time) { |
1262 | ktime_t expires = ktime_add_ns(ktime_get(), | 1264 | ktime_t expires = ktime_add_ns(ktime_get(), |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ab8cef130c28..a3cd647ea1bc 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -956,6 +956,14 @@ struct journal_s | |||
956 | */ | 956 | */ |
957 | u64 j_average_commit_time; | 957 | u64 j_average_commit_time; |
958 | 958 | ||
959 | /* | ||
960 | * minimum and maximum times that we should wait for | ||
961 | * additional filesystem operations to get batched into a | ||
962 | * synchronous handle in microseconds | ||
963 | */ | ||
964 | u32 j_min_batch_time; | ||
965 | u32 j_max_batch_time; | ||
966 | |||
959 | /* This function is called when a transaction is closed */ | 967 | /* This function is called when a transaction is closed */ |
960 | void (*j_commit_callback)(journal_t *, | 968 | void (*j_commit_callback)(journal_t *, |
961 | transaction_t *); | 969 | transaction_t *); |