aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 20:14:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 20:14:59 -0500
commit2150edc6c5cf00f7adb54538b9ea2a3e9cedca3f (patch)
treef72a0d85e66f500b4cead348a231e3d3b9f357bc /fs/jbd2
parentcd764695b67386a81964f68e9c66efd9f13f4d29 (diff)
parent4b905671d2ea09fd48fed72c581df17e40823f39 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (57 commits) jbd2: Fix oops in jbd2_journal_init_inode() on corrupted fs ext4: Remove "extents" mount option block: Add Kconfig help which notes that ext4 needs CONFIG_LBD ext4: Make printk's consistently prefixed with "EXT4-fs: " ext4: Add sanity checks for the superblock before mounting the filesystem ext4: Add mount option to set kjournald's I/O priority jbd2: Submit writes to the journal using WRITE_SYNC jbd2: Add pid and journal device name to the "kjournald2 starting" message ext4: Add markers for better debuggability ext4: Remove code to create the journal inode ext4: provide function to release metadata pages under memory pressure ext3: provide function to release metadata pages under memory pressure add releasepage hooks to block devices which can be used by file systems ext4: Fix s_dirty_blocks_counter if block allocation failed with nodelalloc ext4: Init the complete page while building buddy cache ext4: Don't allow new groups to be added during block allocation ext4: mark the blocks/inode bitmap beyond end of group as used ext4: Use new buffer_head flag to check uninit group bitmaps initialization ext4: Fix the race between read_inode_bitmap() and ext4_new_inode() ext4: code cleanup ...
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c24
-rw-r--r--fs/jbd2/commit.c58
-rw-r--r--fs/jbd2/journal.c124
-rw-r--r--fs/jbd2/transaction.c60
4 files changed, 142 insertions, 124 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 9497718fe920..17159cacbd9e 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -249,16 +249,14 @@ restart:
249 return ret; 249 return ret;
250} 250}
251 251
252#define NR_BATCH 64
253
254static void 252static void
255__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) 253__flush_batch(journal_t *journal, int *batch_count)
256{ 254{
257 int i; 255 int i;
258 256
259 ll_rw_block(SWRITE, *batch_count, bhs); 257 ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs);
260 for (i = 0; i < *batch_count; i++) { 258 for (i = 0; i < *batch_count; i++) {
261 struct buffer_head *bh = bhs[i]; 259 struct buffer_head *bh = journal->j_chkpt_bhs[i];
262 clear_buffer_jwrite(bh); 260 clear_buffer_jwrite(bh);
263 BUFFER_TRACE(bh, "brelse"); 261 BUFFER_TRACE(bh, "brelse");
264 __brelse(bh); 262 __brelse(bh);
@@ -277,8 +275,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
277 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 275 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
278 */ 276 */
279static int __process_buffer(journal_t *journal, struct journal_head *jh, 277static int __process_buffer(journal_t *journal, struct journal_head *jh,
280 struct buffer_head **bhs, int *batch_count, 278 int *batch_count, transaction_t *transaction)
281 transaction_t *transaction)
282{ 279{
283 struct buffer_head *bh = jh2bh(jh); 280 struct buffer_head *bh = jh2bh(jh);
284 int ret = 0; 281 int ret = 0;
@@ -325,14 +322,14 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
325 get_bh(bh); 322 get_bh(bh);
326 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 323 J_ASSERT_BH(bh, !buffer_jwrite(bh));
327 set_buffer_jwrite(bh); 324 set_buffer_jwrite(bh);
328 bhs[*batch_count] = bh; 325 journal->j_chkpt_bhs[*batch_count] = bh;
329 __buffer_relink_io(jh); 326 __buffer_relink_io(jh);
330 jbd_unlock_bh_state(bh); 327 jbd_unlock_bh_state(bh);
331 transaction->t_chp_stats.cs_written++; 328 transaction->t_chp_stats.cs_written++;
332 (*batch_count)++; 329 (*batch_count)++;
333 if (*batch_count == NR_BATCH) { 330 if (*batch_count == JBD2_NR_BATCH) {
334 spin_unlock(&journal->j_list_lock); 331 spin_unlock(&journal->j_list_lock);
335 __flush_batch(journal, bhs, batch_count); 332 __flush_batch(journal, batch_count);
336 ret = 1; 333 ret = 1;
337 } 334 }
338 } 335 }
@@ -388,7 +385,6 @@ restart:
388 if (journal->j_checkpoint_transactions == transaction && 385 if (journal->j_checkpoint_transactions == transaction &&
389 transaction->t_tid == this_tid) { 386 transaction->t_tid == this_tid) {
390 int batch_count = 0; 387 int batch_count = 0;
391 struct buffer_head *bhs[NR_BATCH];
392 struct journal_head *jh; 388 struct journal_head *jh;
393 int retry = 0, err; 389 int retry = 0, err;
394 390
@@ -402,7 +398,7 @@ restart:
402 retry = 1; 398 retry = 1;
403 break; 399 break;
404 } 400 }
405 retry = __process_buffer(journal, jh, bhs, &batch_count, 401 retry = __process_buffer(journal, jh, &batch_count,
406 transaction); 402 transaction);
407 if (retry < 0 && !result) 403 if (retry < 0 && !result)
408 result = retry; 404 result = retry;
@@ -419,7 +415,7 @@ restart:
419 spin_unlock(&journal->j_list_lock); 415 spin_unlock(&journal->j_list_lock);
420 retry = 1; 416 retry = 1;
421 } 417 }
422 __flush_batch(journal, bhs, &batch_count); 418 __flush_batch(journal, &batch_count);
423 } 419 }
424 420
425 if (retry) { 421 if (retry) {
@@ -686,6 +682,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
686 safely remove this transaction from the log */ 682 safely remove this transaction from the log */
687 683
688 __jbd2_journal_drop_transaction(journal, transaction); 684 __jbd2_journal_drop_transaction(journal, transaction);
685 kfree(transaction);
689 686
690 /* Just in case anybody was waiting for more transactions to be 687 /* Just in case anybody was waiting for more transactions to be
691 checkpointed... */ 688 checkpointed... */
@@ -760,5 +757,4 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
760 J_ASSERT(journal->j_running_transaction != transaction); 757 J_ASSERT(journal->j_running_transaction != transaction);
761 758
762 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 759 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
763 kfree(transaction);
764} 760}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c8a1bace685a..62804e57a44c 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -25,6 +25,7 @@
25#include <linux/crc32.h> 25#include <linux/crc32.h>
26#include <linux/writeback.h> 26#include <linux/writeback.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/bio.h>
28 29
29/* 30/*
30 * Default IO end handler for temporary BJ_IO buffer_heads. 31 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -137,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal,
137 set_buffer_ordered(bh); 138 set_buffer_ordered(bh);
138 barrier_done = 1; 139 barrier_done = 1;
139 } 140 }
140 ret = submit_bh(WRITE, bh); 141 ret = submit_bh(WRITE_SYNC, bh);
141 if (barrier_done) 142 if (barrier_done)
142 clear_buffer_ordered(bh); 143 clear_buffer_ordered(bh);
143 144
@@ -158,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal,
158 lock_buffer(bh); 159 lock_buffer(bh);
159 set_buffer_uptodate(bh); 160 set_buffer_uptodate(bh);
160 clear_buffer_dirty(bh); 161 clear_buffer_dirty(bh);
161 ret = submit_bh(WRITE, bh); 162 ret = submit_bh(WRITE_SYNC, bh);
162 } 163 }
163 *cbh = bh; 164 *cbh = bh;
164 return ret; 165 return ret;
@@ -168,12 +169,34 @@ static int journal_submit_commit_record(journal_t *journal,
168 * This function along with journal_submit_commit_record 169 * This function along with journal_submit_commit_record
169 * allows to write the commit record asynchronously. 170 * allows to write the commit record asynchronously.
170 */ 171 */
171static int journal_wait_on_commit_record(struct buffer_head *bh) 172static int journal_wait_on_commit_record(journal_t *journal,
173 struct buffer_head *bh)
172{ 174{
173 int ret = 0; 175 int ret = 0;
174 176
177retry:
175 clear_buffer_dirty(bh); 178 clear_buffer_dirty(bh);
176 wait_on_buffer(bh); 179 wait_on_buffer(bh);
180 if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
181 printk(KERN_WARNING
182 "JBD2: wait_on_commit_record: sync failed on %s - "
183 "disabling barriers\n", journal->j_devname);
184 spin_lock(&journal->j_state_lock);
185 journal->j_flags &= ~JBD2_BARRIER;
186 spin_unlock(&journal->j_state_lock);
187
188 lock_buffer(bh);
189 clear_buffer_dirty(bh);
190 set_buffer_uptodate(bh);
191 bh->b_end_io = journal_end_buffer_io_sync;
192
193 ret = submit_bh(WRITE_SYNC, bh);
194 if (ret) {
195 unlock_buffer(bh);
196 return ret;
197 }
198 goto retry;
199 }
177 200
178 if (unlikely(!buffer_uptodate(bh))) 201 if (unlikely(!buffer_uptodate(bh)))
179 ret = -EIO; 202 ret = -EIO;
@@ -332,13 +355,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
332 int flags; 355 int flags;
333 int err; 356 int err;
334 unsigned long long blocknr; 357 unsigned long long blocknr;
358 ktime_t start_time;
359 u64 commit_time;
335 char *tagp = NULL; 360 char *tagp = NULL;
336 journal_header_t *header; 361 journal_header_t *header;
337 journal_block_tag_t *tag = NULL; 362 journal_block_tag_t *tag = NULL;
338 int space_left = 0; 363 int space_left = 0;
339 int first_tag = 0; 364 int first_tag = 0;
340 int tag_flag; 365 int tag_flag;
341 int i; 366 int i, to_free = 0;
342 int tag_bytes = journal_tag_bytes(journal); 367 int tag_bytes = journal_tag_bytes(journal);
343 struct buffer_head *cbh = NULL; /* For transactional checksums */ 368 struct buffer_head *cbh = NULL; /* For transactional checksums */
344 __u32 crc32_sum = ~0; 369 __u32 crc32_sum = ~0;
@@ -458,6 +483,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
458 commit_transaction->t_state = T_FLUSH; 483 commit_transaction->t_state = T_FLUSH;
459 journal->j_committing_transaction = commit_transaction; 484 journal->j_committing_transaction = commit_transaction;
460 journal->j_running_transaction = NULL; 485 journal->j_running_transaction = NULL;
486 start_time = ktime_get();
461 commit_transaction->t_log_start = journal->j_head; 487 commit_transaction->t_log_start = journal->j_head;
462 wake_up(&journal->j_wait_transaction_locked); 488 wake_up(&journal->j_wait_transaction_locked);
463 spin_unlock(&journal->j_state_lock); 489 spin_unlock(&journal->j_state_lock);
@@ -803,7 +829,7 @@ wait_for_iobuf:
803 __jbd2_journal_abort_hard(journal); 829 __jbd2_journal_abort_hard(journal);
804 } 830 }
805 if (!err && !is_journal_aborted(journal)) 831 if (!err && !is_journal_aborted(journal))
806 err = journal_wait_on_commit_record(cbh); 832 err = journal_wait_on_commit_record(journal, cbh);
807 833
808 if (err) 834 if (err)
809 jbd2_journal_abort(journal, err); 835 jbd2_journal_abort(journal, err);
@@ -981,14 +1007,23 @@ restart_loop:
981 J_ASSERT(commit_transaction == journal->j_committing_transaction); 1007 J_ASSERT(commit_transaction == journal->j_committing_transaction);
982 journal->j_commit_sequence = commit_transaction->t_tid; 1008 journal->j_commit_sequence = commit_transaction->t_tid;
983 journal->j_committing_transaction = NULL; 1009 journal->j_committing_transaction = NULL;
984 spin_unlock(&journal->j_state_lock); 1010 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
985 1011
986 if (journal->j_commit_callback) 1012 /*
987 journal->j_commit_callback(journal, commit_transaction); 1013 * weight the commit time higher than the average time so we don't
1014 * react too strongly to vast changes in the commit time
1015 */
1016 if (likely(journal->j_average_commit_time))
1017 journal->j_average_commit_time = (commit_time +
1018 journal->j_average_commit_time*3) / 4;
1019 else
1020 journal->j_average_commit_time = commit_time;
1021 spin_unlock(&journal->j_state_lock);
988 1022
989 if (commit_transaction->t_checkpoint_list == NULL && 1023 if (commit_transaction->t_checkpoint_list == NULL &&
990 commit_transaction->t_checkpoint_io_list == NULL) { 1024 commit_transaction->t_checkpoint_io_list == NULL) {
991 __jbd2_journal_drop_transaction(journal, commit_transaction); 1025 __jbd2_journal_drop_transaction(journal, commit_transaction);
1026 to_free = 1;
992 } else { 1027 } else {
993 if (journal->j_checkpoint_transactions == NULL) { 1028 if (journal->j_checkpoint_transactions == NULL) {
994 journal->j_checkpoint_transactions = commit_transaction; 1029 journal->j_checkpoint_transactions = commit_transaction;
@@ -1007,11 +1042,16 @@ restart_loop:
1007 } 1042 }
1008 spin_unlock(&journal->j_list_lock); 1043 spin_unlock(&journal->j_list_lock);
1009 1044
1045 if (journal->j_commit_callback)
1046 journal->j_commit_callback(journal, commit_transaction);
1047
1010 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 1048 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
1011 journal->j_devname, journal->j_commit_sequence, 1049 journal->j_devname, commit_transaction->t_tid,
1012 journal->j_tail_sequence); 1050 journal->j_tail_sequence);
1013 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1051 jbd_debug(1, "JBD: commit %d complete, head %d\n",
1014 journal->j_commit_sequence, journal->j_tail_sequence); 1052 journal->j_commit_sequence, journal->j_tail_sequence);
1053 if (to_free)
1054 kfree(commit_transaction);
1015 1055
1016 wake_up(&journal->j_wait_done_commit); 1056 wake_up(&journal->j_wait_done_commit);
1017} 1057}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f6bff9d6f8df..56675306ed81 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -40,6 +40,7 @@
40 40
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42#include <asm/page.h> 42#include <asm/page.h>
43#include <asm/div64.h>
43 44
44EXPORT_SYMBOL(jbd2_journal_start); 45EXPORT_SYMBOL(jbd2_journal_start);
45EXPORT_SYMBOL(jbd2_journal_restart); 46EXPORT_SYMBOL(jbd2_journal_restart);
@@ -66,7 +67,6 @@ EXPORT_SYMBOL(jbd2_journal_update_format);
66EXPORT_SYMBOL(jbd2_journal_check_used_features); 67EXPORT_SYMBOL(jbd2_journal_check_used_features);
67EXPORT_SYMBOL(jbd2_journal_check_available_features); 68EXPORT_SYMBOL(jbd2_journal_check_available_features);
68EXPORT_SYMBOL(jbd2_journal_set_features); 69EXPORT_SYMBOL(jbd2_journal_set_features);
69EXPORT_SYMBOL(jbd2_journal_create);
70EXPORT_SYMBOL(jbd2_journal_load); 70EXPORT_SYMBOL(jbd2_journal_load);
71EXPORT_SYMBOL(jbd2_journal_destroy); 71EXPORT_SYMBOL(jbd2_journal_destroy);
72EXPORT_SYMBOL(jbd2_journal_abort); 72EXPORT_SYMBOL(jbd2_journal_abort);
@@ -132,8 +132,9 @@ static int kjournald2(void *arg)
132 journal->j_task = current; 132 journal->j_task = current;
133 wake_up(&journal->j_wait_done_commit); 133 wake_up(&journal->j_wait_done_commit);
134 134
135 printk(KERN_INFO "kjournald2 starting. Commit interval %ld seconds\n", 135 printk(KERN_INFO "kjournald2 starting: pid %d, dev %s, "
136 journal->j_commit_interval / HZ); 136 "commit interval %ld seconds\n", current->pid,
137 journal->j_devname, journal->j_commit_interval / HZ);
137 138
138 /* 139 /*
139 * And now, wait forever for commit wakeup events. 140 * And now, wait forever for commit wakeup events.
@@ -650,6 +651,8 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
650 return NULL; 651 return NULL;
651 652
652 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 653 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
654 if (!bh)
655 return NULL;
653 lock_buffer(bh); 656 lock_buffer(bh);
654 memset(bh->b_data, 0, journal->j_blocksize); 657 memset(bh->b_data, 0, journal->j_blocksize);
655 set_buffer_uptodate(bh); 658 set_buffer_uptodate(bh);
@@ -843,6 +846,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v)
843 jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); 846 jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
844 seq_printf(seq, " %ums logging transaction\n", 847 seq_printf(seq, " %ums logging transaction\n",
845 jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); 848 jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
849 seq_printf(seq, " %luus average transaction commit time\n",
850 do_div(s->journal->j_average_commit_time, 1000));
846 seq_printf(seq, " %lu handles per transaction\n", 851 seq_printf(seq, " %lu handles per transaction\n",
847 s->stats->u.run.rs_handle_count / s->stats->ts_tid); 852 s->stats->u.run.rs_handle_count / s->stats->ts_tid);
848 seq_printf(seq, " %lu blocks per transaction\n", 853 seq_printf(seq, " %lu blocks per transaction\n",
@@ -980,6 +985,8 @@ static journal_t * journal_init_common (void)
980 spin_lock_init(&journal->j_state_lock); 985 spin_lock_init(&journal->j_state_lock);
981 986
982 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 987 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
988 journal->j_min_batch_time = 0;
989 journal->j_max_batch_time = 15000; /* 15ms */
983 990
984 /* The journal is marked for error until we succeed with recovery! */ 991 /* The journal is marked for error until we succeed with recovery! */
985 journal->j_flags = JBD2_ABORT; 992 journal->j_flags = JBD2_ABORT;
@@ -1035,15 +1042,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1035 1042
1036 /* journal descriptor can store up to n blocks -bzzz */ 1043 /* journal descriptor can store up to n blocks -bzzz */
1037 journal->j_blocksize = blocksize; 1044 journal->j_blocksize = blocksize;
1045 jbd2_stats_proc_init(journal);
1038 n = journal->j_blocksize / sizeof(journal_block_tag_t); 1046 n = journal->j_blocksize / sizeof(journal_block_tag_t);
1039 journal->j_wbufsize = n; 1047 journal->j_wbufsize = n;
1040 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1048 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1041 if (!journal->j_wbuf) { 1049 if (!journal->j_wbuf) {
1042 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1050 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1043 __func__); 1051 __func__);
1044 kfree(journal); 1052 goto out_err;
1045 journal = NULL;
1046 goto out;
1047 } 1053 }
1048 journal->j_dev = bdev; 1054 journal->j_dev = bdev;
1049 journal->j_fs_dev = fs_dev; 1055 journal->j_fs_dev = fs_dev;
@@ -1053,14 +1059,22 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1053 p = journal->j_devname; 1059 p = journal->j_devname;
1054 while ((p = strchr(p, '/'))) 1060 while ((p = strchr(p, '/')))
1055 *p = '!'; 1061 *p = '!';
1056 jbd2_stats_proc_init(journal);
1057 1062
1058 bh = __getblk(journal->j_dev, start, journal->j_blocksize); 1063 bh = __getblk(journal->j_dev, start, journal->j_blocksize);
1059 J_ASSERT(bh != NULL); 1064 if (!bh) {
1065 printk(KERN_ERR
1066 "%s: Cannot get buffer for journal superblock\n",
1067 __func__);
1068 goto out_err;
1069 }
1060 journal->j_sb_buffer = bh; 1070 journal->j_sb_buffer = bh;
1061 journal->j_superblock = (journal_superblock_t *)bh->b_data; 1071 journal->j_superblock = (journal_superblock_t *)bh->b_data;
1062out: 1072
1063 return journal; 1073 return journal;
1074out_err:
1075 jbd2_stats_proc_exit(journal);
1076 kfree(journal);
1077 return NULL;
1064} 1078}
1065 1079
1066/** 1080/**
@@ -1108,9 +1122,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1108 if (!journal->j_wbuf) { 1122 if (!journal->j_wbuf) {
1109 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1123 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1110 __func__); 1124 __func__);
1111 jbd2_stats_proc_exit(journal); 1125 goto out_err;
1112 kfree(journal);
1113 return NULL;
1114 } 1126 }
1115 1127
1116 err = jbd2_journal_bmap(journal, 0, &blocknr); 1128 err = jbd2_journal_bmap(journal, 0, &blocknr);
@@ -1118,17 +1130,24 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1118 if (err) { 1130 if (err) {
1119 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1131 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1120 __func__); 1132 __func__);
1121 jbd2_stats_proc_exit(journal); 1133 goto out_err;
1122 kfree(journal);
1123 return NULL;
1124 } 1134 }
1125 1135
1126 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 1136 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
1127 J_ASSERT(bh != NULL); 1137 if (!bh) {
1138 printk(KERN_ERR
1139 "%s: Cannot get buffer for journal superblock\n",
1140 __func__);
1141 goto out_err;
1142 }
1128 journal->j_sb_buffer = bh; 1143 journal->j_sb_buffer = bh;
1129 journal->j_superblock = (journal_superblock_t *)bh->b_data; 1144 journal->j_superblock = (journal_superblock_t *)bh->b_data;
1130 1145
1131 return journal; 1146 return journal;
1147out_err:
1148 jbd2_stats_proc_exit(journal);
1149 kfree(journal);
1150 return NULL;
1132} 1151}
1133 1152
1134/* 1153/*
@@ -1177,77 +1196,6 @@ static int journal_reset(journal_t *journal)
1177} 1196}
1178 1197
1179/** 1198/**
1180 * int jbd2_journal_create() - Initialise the new journal file
1181 * @journal: Journal to create. This structure must have been initialised
1182 *
1183 * Given a journal_t structure which tells us which disk blocks we can
1184 * use, create a new journal superblock and initialise all of the
1185 * journal fields from scratch.
1186 **/
1187int jbd2_journal_create(journal_t *journal)
1188{
1189 unsigned long long blocknr;
1190 struct buffer_head *bh;
1191 journal_superblock_t *sb;
1192 int i, err;
1193
1194 if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) {
1195 printk (KERN_ERR "Journal length (%d blocks) too short.\n",
1196 journal->j_maxlen);
1197 journal_fail_superblock(journal);
1198 return -EINVAL;
1199 }
1200
1201 if (journal->j_inode == NULL) {
1202 /*
1203 * We don't know what block to start at!
1204 */
1205 printk(KERN_EMERG
1206 "%s: creation of journal on external device!\n",
1207 __func__);
1208 BUG();
1209 }
1210
1211 /* Zero out the entire journal on disk. We cannot afford to
1212 have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */
1213 jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
1214 for (i = 0; i < journal->j_maxlen; i++) {
1215 err = jbd2_journal_bmap(journal, i, &blocknr);
1216 if (err)
1217 return err;
1218 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
1219 lock_buffer(bh);
1220 memset (bh->b_data, 0, journal->j_blocksize);
1221 BUFFER_TRACE(bh, "marking dirty");
1222 mark_buffer_dirty(bh);
1223 BUFFER_TRACE(bh, "marking uptodate");
1224 set_buffer_uptodate(bh);
1225 unlock_buffer(bh);
1226 __brelse(bh);
1227 }
1228
1229 sync_blockdev(journal->j_dev);
1230 jbd_debug(1, "JBD: journal cleared.\n");
1231
1232 /* OK, fill in the initial static fields in the new superblock */
1233 sb = journal->j_superblock;
1234
1235 sb->s_header.h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
1236 sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
1237
1238 sb->s_blocksize = cpu_to_be32(journal->j_blocksize);
1239 sb->s_maxlen = cpu_to_be32(journal->j_maxlen);
1240 sb->s_first = cpu_to_be32(1);
1241
1242 journal->j_transaction_sequence = 1;
1243
1244 journal->j_flags &= ~JBD2_ABORT;
1245 journal->j_format_version = 2;
1246
1247 return journal_reset(journal);
1248}
1249
1250/**
1251 * void jbd2_journal_update_superblock() - Update journal sb on disk. 1199 * void jbd2_journal_update_superblock() - Update journal sb on disk.
1252 * @journal: The journal to update. 1200 * @journal: The journal to update.
1253 * @wait: Set to '0' if you don't want to wait for IO completion. 1201 * @wait: Set to '0' if you don't want to wait for IO completion.
@@ -1491,7 +1439,9 @@ int jbd2_journal_destroy(journal_t *journal)
1491 spin_lock(&journal->j_list_lock); 1439 spin_lock(&journal->j_list_lock);
1492 while (journal->j_checkpoint_transactions != NULL) { 1440 while (journal->j_checkpoint_transactions != NULL) {
1493 spin_unlock(&journal->j_list_lock); 1441 spin_unlock(&journal->j_list_lock);
1442 mutex_lock(&journal->j_checkpoint_mutex);
1494 jbd2_log_do_checkpoint(journal); 1443 jbd2_log_do_checkpoint(journal);
1444 mutex_unlock(&journal->j_checkpoint_mutex);
1495 spin_lock(&journal->j_list_lock); 1445 spin_lock(&journal->j_list_lock);
1496 } 1446 }
1497 1447
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f925a4f3d05..46b4e347ed7d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -25,6 +25,7 @@
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h>
28 29
29static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 30static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
30 31
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
48{ 49{
49 transaction->t_journal = journal; 50 transaction->t_journal = journal;
50 transaction->t_state = T_RUNNING; 51 transaction->t_state = T_RUNNING;
52 transaction->t_start_time = ktime_get();
51 transaction->t_tid = journal->j_transaction_sequence++; 53 transaction->t_tid = journal->j_transaction_sequence++;
52 transaction->t_expires = jiffies + journal->j_commit_interval; 54 transaction->t_expires = jiffies + journal->j_commit_interval;
53 spin_lock_init(&transaction->t_handle_lock); 55 spin_lock_init(&transaction->t_handle_lock);
@@ -1240,7 +1242,7 @@ int jbd2_journal_stop(handle_t *handle)
1240{ 1242{
1241 transaction_t *transaction = handle->h_transaction; 1243 transaction_t *transaction = handle->h_transaction;
1242 journal_t *journal = transaction->t_journal; 1244 journal_t *journal = transaction->t_journal;
1243 int old_handle_count, err; 1245 int err;
1244 pid_t pid; 1246 pid_t pid;
1245 1247
1246 J_ASSERT(journal_current_handle() == handle); 1248 J_ASSERT(journal_current_handle() == handle);
@@ -1263,24 +1265,54 @@ int jbd2_journal_stop(handle_t *handle)
1263 /* 1265 /*
1264 * Implement synchronous transaction batching. If the handle 1266 * Implement synchronous transaction batching. If the handle
1265 * was synchronous, don't force a commit immediately. Let's 1267 * was synchronous, don't force a commit immediately. Let's
1266 * yield and let another thread piggyback onto this transaction. 1268 * yield and let another thread piggyback onto this
1267 * Keep doing that while new threads continue to arrive. 1269 * transaction. Keep doing that while new threads continue to
1268 * It doesn't cost much - we're about to run a commit and sleep 1270 * arrive. It doesn't cost much - we're about to run a commit
1269 * on IO anyway. Speeds up many-threaded, many-dir operations 1271 * and sleep on IO anyway. Speeds up many-threaded, many-dir
1270 * by 30x or more... 1272 * operations by 30x or more...
1273 *
1274 * We try and optimize the sleep time against what the
1275 * underlying disk can do, instead of having a static sleep
1276 * time. This is useful for the case where our storage is so
1277 * fast that it is more optimal to go ahead and force a flush
1278 * and wait for the transaction to be committed than it is to
1279 * wait for an arbitrary amount of time for new writers to
1280 * join the transaction. We achieve this by measuring how
1281 * long it takes to commit a transaction, and compare it with
1282 * how long this transaction has been running, and if run time
1283 * < commit time then we sleep for the delta and commit. This
1284 * greatly helps super fast disks that would see slowdowns as
1285 * more threads started doing fsyncs.
1271 * 1286 *
1272 * But don't do this if this process was the most recent one to 1287 * But don't do this if this process was the most recent one
1273 * perform a synchronous write. We do this to detect the case where a 1288 * to perform a synchronous write. We do this to detect the
1274 * single process is doing a stream of sync writes. No point in waiting 1289 * case where a single process is doing a stream of sync
1275 * for joiners in that case. 1290 * writes. No point in waiting for joiners in that case.
1276 */ 1291 */
1277 pid = current->pid; 1292 pid = current->pid;
1278 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1293 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1294 u64 commit_time, trans_time;
1295
1279 journal->j_last_sync_writer = pid; 1296 journal->j_last_sync_writer = pid;
1280 do { 1297
1281 old_handle_count = transaction->t_handle_count; 1298 spin_lock(&journal->j_state_lock);
1282 schedule_timeout_uninterruptible(1); 1299 commit_time = journal->j_average_commit_time;
1283 } while (old_handle_count != transaction->t_handle_count); 1300 spin_unlock(&journal->j_state_lock);
1301
1302 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1303 transaction->t_start_time));
1304
1305 commit_time = max_t(u64, commit_time,
1306 1000*journal->j_min_batch_time);
1307 commit_time = min_t(u64, commit_time,
1308 1000*journal->j_max_batch_time);
1309
1310 if (trans_time < commit_time) {
1311 ktime_t expires = ktime_add_ns(ktime_get(),
1312 commit_time);
1313 set_current_state(TASK_UNINTERRUPTIBLE);
1314 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1315 }
1284 } 1316 }
1285 1317
1286 current->journal_info = NULL; 1318 current->journal_info = NULL;