diff options
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/checkpoint.c | 24 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 58 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 124 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 60 |
4 files changed, 142 insertions, 124 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 9497718fe920..17159cacbd9e 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -249,16 +249,14 @@ restart: | |||
249 | return ret; | 249 | return ret; |
250 | } | 250 | } |
251 | 251 | ||
252 | #define NR_BATCH 64 | ||
253 | |||
254 | static void | 252 | static void |
255 | __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | 253 | __flush_batch(journal_t *journal, int *batch_count) |
256 | { | 254 | { |
257 | int i; | 255 | int i; |
258 | 256 | ||
259 | ll_rw_block(SWRITE, *batch_count, bhs); | 257 | ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); |
260 | for (i = 0; i < *batch_count; i++) { | 258 | for (i = 0; i < *batch_count; i++) { |
261 | struct buffer_head *bh = bhs[i]; | 259 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
262 | clear_buffer_jwrite(bh); | 260 | clear_buffer_jwrite(bh); |
263 | BUFFER_TRACE(bh, "brelse"); | 261 | BUFFER_TRACE(bh, "brelse"); |
264 | __brelse(bh); | 262 | __brelse(bh); |
@@ -277,8 +275,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
277 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 275 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
278 | */ | 276 | */ |
279 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | 277 | static int __process_buffer(journal_t *journal, struct journal_head *jh, |
280 | struct buffer_head **bhs, int *batch_count, | 278 | int *batch_count, transaction_t *transaction) |
281 | transaction_t *transaction) | ||
282 | { | 279 | { |
283 | struct buffer_head *bh = jh2bh(jh); | 280 | struct buffer_head *bh = jh2bh(jh); |
284 | int ret = 0; | 281 | int ret = 0; |
@@ -325,14 +322,14 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
325 | get_bh(bh); | 322 | get_bh(bh); |
326 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | 323 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); |
327 | set_buffer_jwrite(bh); | 324 | set_buffer_jwrite(bh); |
328 | bhs[*batch_count] = bh; | 325 | journal->j_chkpt_bhs[*batch_count] = bh; |
329 | __buffer_relink_io(jh); | 326 | __buffer_relink_io(jh); |
330 | jbd_unlock_bh_state(bh); | 327 | jbd_unlock_bh_state(bh); |
331 | transaction->t_chp_stats.cs_written++; | 328 | transaction->t_chp_stats.cs_written++; |
332 | (*batch_count)++; | 329 | (*batch_count)++; |
333 | if (*batch_count == NR_BATCH) { | 330 | if (*batch_count == JBD2_NR_BATCH) { |
334 | spin_unlock(&journal->j_list_lock); | 331 | spin_unlock(&journal->j_list_lock); |
335 | __flush_batch(journal, bhs, batch_count); | 332 | __flush_batch(journal, batch_count); |
336 | ret = 1; | 333 | ret = 1; |
337 | } | 334 | } |
338 | } | 335 | } |
@@ -388,7 +385,6 @@ restart: | |||
388 | if (journal->j_checkpoint_transactions == transaction && | 385 | if (journal->j_checkpoint_transactions == transaction && |
389 | transaction->t_tid == this_tid) { | 386 | transaction->t_tid == this_tid) { |
390 | int batch_count = 0; | 387 | int batch_count = 0; |
391 | struct buffer_head *bhs[NR_BATCH]; | ||
392 | struct journal_head *jh; | 388 | struct journal_head *jh; |
393 | int retry = 0, err; | 389 | int retry = 0, err; |
394 | 390 | ||
@@ -402,7 +398,7 @@ restart: | |||
402 | retry = 1; | 398 | retry = 1; |
403 | break; | 399 | break; |
404 | } | 400 | } |
405 | retry = __process_buffer(journal, jh, bhs, &batch_count, | 401 | retry = __process_buffer(journal, jh, &batch_count, |
406 | transaction); | 402 | transaction); |
407 | if (retry < 0 && !result) | 403 | if (retry < 0 && !result) |
408 | result = retry; | 404 | result = retry; |
@@ -419,7 +415,7 @@ restart: | |||
419 | spin_unlock(&journal->j_list_lock); | 415 | spin_unlock(&journal->j_list_lock); |
420 | retry = 1; | 416 | retry = 1; |
421 | } | 417 | } |
422 | __flush_batch(journal, bhs, &batch_count); | 418 | __flush_batch(journal, &batch_count); |
423 | } | 419 | } |
424 | 420 | ||
425 | if (retry) { | 421 | if (retry) { |
@@ -686,6 +682,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
686 | safely remove this transaction from the log */ | 682 | safely remove this transaction from the log */ |
687 | 683 | ||
688 | __jbd2_journal_drop_transaction(journal, transaction); | 684 | __jbd2_journal_drop_transaction(journal, transaction); |
685 | kfree(transaction); | ||
689 | 686 | ||
690 | /* Just in case anybody was waiting for more transactions to be | 687 | /* Just in case anybody was waiting for more transactions to be |
691 | checkpointed... */ | 688 | checkpointed... */ |
@@ -760,5 +757,4 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
760 | J_ASSERT(journal->j_running_transaction != transaction); | 757 | J_ASSERT(journal->j_running_transaction != transaction); |
761 | 758 | ||
762 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); | 759 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); |
763 | kfree(transaction); | ||
764 | } | 760 | } |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index c8a1bace685a..62804e57a44c 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/crc32.h> | 25 | #include <linux/crc32.h> |
26 | #include <linux/writeback.h> | 26 | #include <linux/writeback.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/bio.h> | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * Default IO end handler for temporary BJ_IO buffer_heads. | 31 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -137,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
137 | set_buffer_ordered(bh); | 138 | set_buffer_ordered(bh); |
138 | barrier_done = 1; | 139 | barrier_done = 1; |
139 | } | 140 | } |
140 | ret = submit_bh(WRITE, bh); | 141 | ret = submit_bh(WRITE_SYNC, bh); |
141 | if (barrier_done) | 142 | if (barrier_done) |
142 | clear_buffer_ordered(bh); | 143 | clear_buffer_ordered(bh); |
143 | 144 | ||
@@ -158,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
158 | lock_buffer(bh); | 159 | lock_buffer(bh); |
159 | set_buffer_uptodate(bh); | 160 | set_buffer_uptodate(bh); |
160 | clear_buffer_dirty(bh); | 161 | clear_buffer_dirty(bh); |
161 | ret = submit_bh(WRITE, bh); | 162 | ret = submit_bh(WRITE_SYNC, bh); |
162 | } | 163 | } |
163 | *cbh = bh; | 164 | *cbh = bh; |
164 | return ret; | 165 | return ret; |
@@ -168,12 +169,34 @@ static int journal_submit_commit_record(journal_t *journal, | |||
168 | * This function along with journal_submit_commit_record | 169 | * This function along with journal_submit_commit_record |
169 | * allows to write the commit record asynchronously. | 170 | * allows to write the commit record asynchronously. |
170 | */ | 171 | */ |
171 | static int journal_wait_on_commit_record(struct buffer_head *bh) | 172 | static int journal_wait_on_commit_record(journal_t *journal, |
173 | struct buffer_head *bh) | ||
172 | { | 174 | { |
173 | int ret = 0; | 175 | int ret = 0; |
174 | 176 | ||
177 | retry: | ||
175 | clear_buffer_dirty(bh); | 178 | clear_buffer_dirty(bh); |
176 | wait_on_buffer(bh); | 179 | wait_on_buffer(bh); |
180 | if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { | ||
181 | printk(KERN_WARNING | ||
182 | "JBD2: wait_on_commit_record: sync failed on %s - " | ||
183 | "disabling barriers\n", journal->j_devname); | ||
184 | spin_lock(&journal->j_state_lock); | ||
185 | journal->j_flags &= ~JBD2_BARRIER; | ||
186 | spin_unlock(&journal->j_state_lock); | ||
187 | |||
188 | lock_buffer(bh); | ||
189 | clear_buffer_dirty(bh); | ||
190 | set_buffer_uptodate(bh); | ||
191 | bh->b_end_io = journal_end_buffer_io_sync; | ||
192 | |||
193 | ret = submit_bh(WRITE_SYNC, bh); | ||
194 | if (ret) { | ||
195 | unlock_buffer(bh); | ||
196 | return ret; | ||
197 | } | ||
198 | goto retry; | ||
199 | } | ||
177 | 200 | ||
178 | if (unlikely(!buffer_uptodate(bh))) | 201 | if (unlikely(!buffer_uptodate(bh))) |
179 | ret = -EIO; | 202 | ret = -EIO; |
@@ -332,13 +355,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
332 | int flags; | 355 | int flags; |
333 | int err; | 356 | int err; |
334 | unsigned long long blocknr; | 357 | unsigned long long blocknr; |
358 | ktime_t start_time; | ||
359 | u64 commit_time; | ||
335 | char *tagp = NULL; | 360 | char *tagp = NULL; |
336 | journal_header_t *header; | 361 | journal_header_t *header; |
337 | journal_block_tag_t *tag = NULL; | 362 | journal_block_tag_t *tag = NULL; |
338 | int space_left = 0; | 363 | int space_left = 0; |
339 | int first_tag = 0; | 364 | int first_tag = 0; |
340 | int tag_flag; | 365 | int tag_flag; |
341 | int i; | 366 | int i, to_free = 0; |
342 | int tag_bytes = journal_tag_bytes(journal); | 367 | int tag_bytes = journal_tag_bytes(journal); |
343 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 368 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
344 | __u32 crc32_sum = ~0; | 369 | __u32 crc32_sum = ~0; |
@@ -458,6 +483,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
458 | commit_transaction->t_state = T_FLUSH; | 483 | commit_transaction->t_state = T_FLUSH; |
459 | journal->j_committing_transaction = commit_transaction; | 484 | journal->j_committing_transaction = commit_transaction; |
460 | journal->j_running_transaction = NULL; | 485 | journal->j_running_transaction = NULL; |
486 | start_time = ktime_get(); | ||
461 | commit_transaction->t_log_start = journal->j_head; | 487 | commit_transaction->t_log_start = journal->j_head; |
462 | wake_up(&journal->j_wait_transaction_locked); | 488 | wake_up(&journal->j_wait_transaction_locked); |
463 | spin_unlock(&journal->j_state_lock); | 489 | spin_unlock(&journal->j_state_lock); |
@@ -803,7 +829,7 @@ wait_for_iobuf: | |||
803 | __jbd2_journal_abort_hard(journal); | 829 | __jbd2_journal_abort_hard(journal); |
804 | } | 830 | } |
805 | if (!err && !is_journal_aborted(journal)) | 831 | if (!err && !is_journal_aborted(journal)) |
806 | err = journal_wait_on_commit_record(cbh); | 832 | err = journal_wait_on_commit_record(journal, cbh); |
807 | 833 | ||
808 | if (err) | 834 | if (err) |
809 | jbd2_journal_abort(journal, err); | 835 | jbd2_journal_abort(journal, err); |
@@ -981,14 +1007,23 @@ restart_loop: | |||
981 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 1007 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
982 | journal->j_commit_sequence = commit_transaction->t_tid; | 1008 | journal->j_commit_sequence = commit_transaction->t_tid; |
983 | journal->j_committing_transaction = NULL; | 1009 | journal->j_committing_transaction = NULL; |
984 | spin_unlock(&journal->j_state_lock); | 1010 | commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
985 | 1011 | ||
986 | if (journal->j_commit_callback) | 1012 | /* |
987 | journal->j_commit_callback(journal, commit_transaction); | 1013 | * weight the commit time higher than the average time so we don't |
1014 | * react too strongly to vast changes in the commit time | ||
1015 | */ | ||
1016 | if (likely(journal->j_average_commit_time)) | ||
1017 | journal->j_average_commit_time = (commit_time + | ||
1018 | journal->j_average_commit_time*3) / 4; | ||
1019 | else | ||
1020 | journal->j_average_commit_time = commit_time; | ||
1021 | spin_unlock(&journal->j_state_lock); | ||
988 | 1022 | ||
989 | if (commit_transaction->t_checkpoint_list == NULL && | 1023 | if (commit_transaction->t_checkpoint_list == NULL && |
990 | commit_transaction->t_checkpoint_io_list == NULL) { | 1024 | commit_transaction->t_checkpoint_io_list == NULL) { |
991 | __jbd2_journal_drop_transaction(journal, commit_transaction); | 1025 | __jbd2_journal_drop_transaction(journal, commit_transaction); |
1026 | to_free = 1; | ||
992 | } else { | 1027 | } else { |
993 | if (journal->j_checkpoint_transactions == NULL) { | 1028 | if (journal->j_checkpoint_transactions == NULL) { |
994 | journal->j_checkpoint_transactions = commit_transaction; | 1029 | journal->j_checkpoint_transactions = commit_transaction; |
@@ -1007,11 +1042,16 @@ restart_loop: | |||
1007 | } | 1042 | } |
1008 | spin_unlock(&journal->j_list_lock); | 1043 | spin_unlock(&journal->j_list_lock); |
1009 | 1044 | ||
1045 | if (journal->j_commit_callback) | ||
1046 | journal->j_commit_callback(journal, commit_transaction); | ||
1047 | |||
1010 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | 1048 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", |
1011 | journal->j_devname, journal->j_commit_sequence, | 1049 | journal->j_devname, commit_transaction->t_tid, |
1012 | journal->j_tail_sequence); | 1050 | journal->j_tail_sequence); |
1013 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1051 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
1014 | journal->j_commit_sequence, journal->j_tail_sequence); | 1052 | journal->j_commit_sequence, journal->j_tail_sequence); |
1053 | if (to_free) | ||
1054 | kfree(commit_transaction); | ||
1015 | 1055 | ||
1016 | wake_up(&journal->j_wait_done_commit); | 1056 | wake_up(&journal->j_wait_done_commit); |
1017 | } | 1057 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f6bff9d6f8df..56675306ed81 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -40,6 +40,7 @@ | |||
40 | 40 | ||
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | #include <asm/page.h> | 42 | #include <asm/page.h> |
43 | #include <asm/div64.h> | ||
43 | 44 | ||
44 | EXPORT_SYMBOL(jbd2_journal_start); | 45 | EXPORT_SYMBOL(jbd2_journal_start); |
45 | EXPORT_SYMBOL(jbd2_journal_restart); | 46 | EXPORT_SYMBOL(jbd2_journal_restart); |
@@ -66,7 +67,6 @@ EXPORT_SYMBOL(jbd2_journal_update_format); | |||
66 | EXPORT_SYMBOL(jbd2_journal_check_used_features); | 67 | EXPORT_SYMBOL(jbd2_journal_check_used_features); |
67 | EXPORT_SYMBOL(jbd2_journal_check_available_features); | 68 | EXPORT_SYMBOL(jbd2_journal_check_available_features); |
68 | EXPORT_SYMBOL(jbd2_journal_set_features); | 69 | EXPORT_SYMBOL(jbd2_journal_set_features); |
69 | EXPORT_SYMBOL(jbd2_journal_create); | ||
70 | EXPORT_SYMBOL(jbd2_journal_load); | 70 | EXPORT_SYMBOL(jbd2_journal_load); |
71 | EXPORT_SYMBOL(jbd2_journal_destroy); | 71 | EXPORT_SYMBOL(jbd2_journal_destroy); |
72 | EXPORT_SYMBOL(jbd2_journal_abort); | 72 | EXPORT_SYMBOL(jbd2_journal_abort); |
@@ -132,8 +132,9 @@ static int kjournald2(void *arg) | |||
132 | journal->j_task = current; | 132 | journal->j_task = current; |
133 | wake_up(&journal->j_wait_done_commit); | 133 | wake_up(&journal->j_wait_done_commit); |
134 | 134 | ||
135 | printk(KERN_INFO "kjournald2 starting. Commit interval %ld seconds\n", | 135 | printk(KERN_INFO "kjournald2 starting: pid %d, dev %s, " |
136 | journal->j_commit_interval / HZ); | 136 | "commit interval %ld seconds\n", current->pid, |
137 | journal->j_devname, journal->j_commit_interval / HZ); | ||
137 | 138 | ||
138 | /* | 139 | /* |
139 | * And now, wait forever for commit wakeup events. | 140 | * And now, wait forever for commit wakeup events. |
@@ -650,6 +651,8 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
650 | return NULL; | 651 | return NULL; |
651 | 652 | ||
652 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 653 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); |
654 | if (!bh) | ||
655 | return NULL; | ||
653 | lock_buffer(bh); | 656 | lock_buffer(bh); |
654 | memset(bh->b_data, 0, journal->j_blocksize); | 657 | memset(bh->b_data, 0, journal->j_blocksize); |
655 | set_buffer_uptodate(bh); | 658 | set_buffer_uptodate(bh); |
@@ -843,6 +846,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) | |||
843 | jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); | 846 | jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); |
844 | seq_printf(seq, " %ums logging transaction\n", | 847 | seq_printf(seq, " %ums logging transaction\n", |
845 | jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); | 848 | jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); |
849 | seq_printf(seq, " %luus average transaction commit time\n", | ||
850 | do_div(s->journal->j_average_commit_time, 1000)); | ||
846 | seq_printf(seq, " %lu handles per transaction\n", | 851 | seq_printf(seq, " %lu handles per transaction\n", |
847 | s->stats->u.run.rs_handle_count / s->stats->ts_tid); | 852 | s->stats->u.run.rs_handle_count / s->stats->ts_tid); |
848 | seq_printf(seq, " %lu blocks per transaction\n", | 853 | seq_printf(seq, " %lu blocks per transaction\n", |
@@ -980,6 +985,8 @@ static journal_t * journal_init_common (void) | |||
980 | spin_lock_init(&journal->j_state_lock); | 985 | spin_lock_init(&journal->j_state_lock); |
981 | 986 | ||
982 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); | 987 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); |
988 | journal->j_min_batch_time = 0; | ||
989 | journal->j_max_batch_time = 15000; /* 15ms */ | ||
983 | 990 | ||
984 | /* The journal is marked for error until we succeed with recovery! */ | 991 | /* The journal is marked for error until we succeed with recovery! */ |
985 | journal->j_flags = JBD2_ABORT; | 992 | journal->j_flags = JBD2_ABORT; |
@@ -1035,15 +1042,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1035 | 1042 | ||
1036 | /* journal descriptor can store up to n blocks -bzzz */ | 1043 | /* journal descriptor can store up to n blocks -bzzz */ |
1037 | journal->j_blocksize = blocksize; | 1044 | journal->j_blocksize = blocksize; |
1045 | jbd2_stats_proc_init(journal); | ||
1038 | n = journal->j_blocksize / sizeof(journal_block_tag_t); | 1046 | n = journal->j_blocksize / sizeof(journal_block_tag_t); |
1039 | journal->j_wbufsize = n; | 1047 | journal->j_wbufsize = n; |
1040 | journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); | 1048 | journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); |
1041 | if (!journal->j_wbuf) { | 1049 | if (!journal->j_wbuf) { |
1042 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", | 1050 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", |
1043 | __func__); | 1051 | __func__); |
1044 | kfree(journal); | 1052 | goto out_err; |
1045 | journal = NULL; | ||
1046 | goto out; | ||
1047 | } | 1053 | } |
1048 | journal->j_dev = bdev; | 1054 | journal->j_dev = bdev; |
1049 | journal->j_fs_dev = fs_dev; | 1055 | journal->j_fs_dev = fs_dev; |
@@ -1053,14 +1059,22 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1053 | p = journal->j_devname; | 1059 | p = journal->j_devname; |
1054 | while ((p = strchr(p, '/'))) | 1060 | while ((p = strchr(p, '/'))) |
1055 | *p = '!'; | 1061 | *p = '!'; |
1056 | jbd2_stats_proc_init(journal); | ||
1057 | 1062 | ||
1058 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 1063 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
1059 | J_ASSERT(bh != NULL); | 1064 | if (!bh) { |
1065 | printk(KERN_ERR | ||
1066 | "%s: Cannot get buffer for journal superblock\n", | ||
1067 | __func__); | ||
1068 | goto out_err; | ||
1069 | } | ||
1060 | journal->j_sb_buffer = bh; | 1070 | journal->j_sb_buffer = bh; |
1061 | journal->j_superblock = (journal_superblock_t *)bh->b_data; | 1071 | journal->j_superblock = (journal_superblock_t *)bh->b_data; |
1062 | out: | 1072 | |
1063 | return journal; | 1073 | return journal; |
1074 | out_err: | ||
1075 | jbd2_stats_proc_exit(journal); | ||
1076 | kfree(journal); | ||
1077 | return NULL; | ||
1064 | } | 1078 | } |
1065 | 1079 | ||
1066 | /** | 1080 | /** |
@@ -1108,9 +1122,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1108 | if (!journal->j_wbuf) { | 1122 | if (!journal->j_wbuf) { |
1109 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", | 1123 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", |
1110 | __func__); | 1124 | __func__); |
1111 | jbd2_stats_proc_exit(journal); | 1125 | goto out_err; |
1112 | kfree(journal); | ||
1113 | return NULL; | ||
1114 | } | 1126 | } |
1115 | 1127 | ||
1116 | err = jbd2_journal_bmap(journal, 0, &blocknr); | 1128 | err = jbd2_journal_bmap(journal, 0, &blocknr); |
@@ -1118,17 +1130,24 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1118 | if (err) { | 1130 | if (err) { |
1119 | printk(KERN_ERR "%s: Cannnot locate journal superblock\n", | 1131 | printk(KERN_ERR "%s: Cannnot locate journal superblock\n", |
1120 | __func__); | 1132 | __func__); |
1121 | jbd2_stats_proc_exit(journal); | 1133 | goto out_err; |
1122 | kfree(journal); | ||
1123 | return NULL; | ||
1124 | } | 1134 | } |
1125 | 1135 | ||
1126 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 1136 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); |
1127 | J_ASSERT(bh != NULL); | 1137 | if (!bh) { |
1138 | printk(KERN_ERR | ||
1139 | "%s: Cannot get buffer for journal superblock\n", | ||
1140 | __func__); | ||
1141 | goto out_err; | ||
1142 | } | ||
1128 | journal->j_sb_buffer = bh; | 1143 | journal->j_sb_buffer = bh; |
1129 | journal->j_superblock = (journal_superblock_t *)bh->b_data; | 1144 | journal->j_superblock = (journal_superblock_t *)bh->b_data; |
1130 | 1145 | ||
1131 | return journal; | 1146 | return journal; |
1147 | out_err: | ||
1148 | jbd2_stats_proc_exit(journal); | ||
1149 | kfree(journal); | ||
1150 | return NULL; | ||
1132 | } | 1151 | } |
1133 | 1152 | ||
1134 | /* | 1153 | /* |
@@ -1177,77 +1196,6 @@ static int journal_reset(journal_t *journal) | |||
1177 | } | 1196 | } |
1178 | 1197 | ||
1179 | /** | 1198 | /** |
1180 | * int jbd2_journal_create() - Initialise the new journal file | ||
1181 | * @journal: Journal to create. This structure must have been initialised | ||
1182 | * | ||
1183 | * Given a journal_t structure which tells us which disk blocks we can | ||
1184 | * use, create a new journal superblock and initialise all of the | ||
1185 | * journal fields from scratch. | ||
1186 | **/ | ||
1187 | int jbd2_journal_create(journal_t *journal) | ||
1188 | { | ||
1189 | unsigned long long blocknr; | ||
1190 | struct buffer_head *bh; | ||
1191 | journal_superblock_t *sb; | ||
1192 | int i, err; | ||
1193 | |||
1194 | if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) { | ||
1195 | printk (KERN_ERR "Journal length (%d blocks) too short.\n", | ||
1196 | journal->j_maxlen); | ||
1197 | journal_fail_superblock(journal); | ||
1198 | return -EINVAL; | ||
1199 | } | ||
1200 | |||
1201 | if (journal->j_inode == NULL) { | ||
1202 | /* | ||
1203 | * We don't know what block to start at! | ||
1204 | */ | ||
1205 | printk(KERN_EMERG | ||
1206 | "%s: creation of journal on external device!\n", | ||
1207 | __func__); | ||
1208 | BUG(); | ||
1209 | } | ||
1210 | |||
1211 | /* Zero out the entire journal on disk. We cannot afford to | ||
1212 | have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */ | ||
1213 | jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); | ||
1214 | for (i = 0; i < journal->j_maxlen; i++) { | ||
1215 | err = jbd2_journal_bmap(journal, i, &blocknr); | ||
1216 | if (err) | ||
1217 | return err; | ||
1218 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | ||
1219 | lock_buffer(bh); | ||
1220 | memset (bh->b_data, 0, journal->j_blocksize); | ||
1221 | BUFFER_TRACE(bh, "marking dirty"); | ||
1222 | mark_buffer_dirty(bh); | ||
1223 | BUFFER_TRACE(bh, "marking uptodate"); | ||
1224 | set_buffer_uptodate(bh); | ||
1225 | unlock_buffer(bh); | ||
1226 | __brelse(bh); | ||
1227 | } | ||
1228 | |||
1229 | sync_blockdev(journal->j_dev); | ||
1230 | jbd_debug(1, "JBD: journal cleared.\n"); | ||
1231 | |||
1232 | /* OK, fill in the initial static fields in the new superblock */ | ||
1233 | sb = journal->j_superblock; | ||
1234 | |||
1235 | sb->s_header.h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | ||
1236 | sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); | ||
1237 | |||
1238 | sb->s_blocksize = cpu_to_be32(journal->j_blocksize); | ||
1239 | sb->s_maxlen = cpu_to_be32(journal->j_maxlen); | ||
1240 | sb->s_first = cpu_to_be32(1); | ||
1241 | |||
1242 | journal->j_transaction_sequence = 1; | ||
1243 | |||
1244 | journal->j_flags &= ~JBD2_ABORT; | ||
1245 | journal->j_format_version = 2; | ||
1246 | |||
1247 | return journal_reset(journal); | ||
1248 | } | ||
1249 | |||
1250 | /** | ||
1251 | * void jbd2_journal_update_superblock() - Update journal sb on disk. | 1199 | * void jbd2_journal_update_superblock() - Update journal sb on disk. |
1252 | * @journal: The journal to update. | 1200 | * @journal: The journal to update. |
1253 | * @wait: Set to '0' if you don't want to wait for IO completion. | 1201 | * @wait: Set to '0' if you don't want to wait for IO completion. |
@@ -1491,7 +1439,9 @@ int jbd2_journal_destroy(journal_t *journal) | |||
1491 | spin_lock(&journal->j_list_lock); | 1439 | spin_lock(&journal->j_list_lock); |
1492 | while (journal->j_checkpoint_transactions != NULL) { | 1440 | while (journal->j_checkpoint_transactions != NULL) { |
1493 | spin_unlock(&journal->j_list_lock); | 1441 | spin_unlock(&journal->j_list_lock); |
1442 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1494 | jbd2_log_do_checkpoint(journal); | 1443 | jbd2_log_do_checkpoint(journal); |
1444 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1495 | spin_lock(&journal->j_list_lock); | 1445 | spin_lock(&journal->j_list_lock); |
1496 | } | 1446 | } |
1497 | 1447 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 4f925a4f3d05..46b4e347ed7d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/timer.h> | 25 | #include <linux/timer.h> |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | ||
28 | 29 | ||
29 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 30 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
30 | 31 | ||
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
48 | { | 49 | { |
49 | transaction->t_journal = journal; | 50 | transaction->t_journal = journal; |
50 | transaction->t_state = T_RUNNING; | 51 | transaction->t_state = T_RUNNING; |
52 | transaction->t_start_time = ktime_get(); | ||
51 | transaction->t_tid = journal->j_transaction_sequence++; | 53 | transaction->t_tid = journal->j_transaction_sequence++; |
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 54 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 55 | spin_lock_init(&transaction->t_handle_lock); |
@@ -1240,7 +1242,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1240 | { | 1242 | { |
1241 | transaction_t *transaction = handle->h_transaction; | 1243 | transaction_t *transaction = handle->h_transaction; |
1242 | journal_t *journal = transaction->t_journal; | 1244 | journal_t *journal = transaction->t_journal; |
1243 | int old_handle_count, err; | 1245 | int err; |
1244 | pid_t pid; | 1246 | pid_t pid; |
1245 | 1247 | ||
1246 | J_ASSERT(journal_current_handle() == handle); | 1248 | J_ASSERT(journal_current_handle() == handle); |
@@ -1263,24 +1265,54 @@ int jbd2_journal_stop(handle_t *handle) | |||
1263 | /* | 1265 | /* |
1264 | * Implement synchronous transaction batching. If the handle | 1266 | * Implement synchronous transaction batching. If the handle |
1265 | * was synchronous, don't force a commit immediately. Let's | 1267 | * was synchronous, don't force a commit immediately. Let's |
1266 | * yield and let another thread piggyback onto this transaction. | 1268 | * yield and let another thread piggyback onto this |
1267 | * Keep doing that while new threads continue to arrive. | 1269 | * transaction. Keep doing that while new threads continue to |
1268 | * It doesn't cost much - we're about to run a commit and sleep | 1270 | * arrive. It doesn't cost much - we're about to run a commit |
1269 | * on IO anyway. Speeds up many-threaded, many-dir operations | 1271 | * and sleep on IO anyway. Speeds up many-threaded, many-dir |
1270 | * by 30x or more... | 1272 | * operations by 30x or more... |
1273 | * | ||
1274 | * We try and optimize the sleep time against what the | ||
1275 | * underlying disk can do, instead of having a static sleep | ||
1276 | * time. This is useful for the case where our storage is so | ||
1277 | * fast that it is more optimal to go ahead and force a flush | ||
1278 | * and wait for the transaction to be committed than it is to | ||
1279 | * wait for an arbitrary amount of time for new writers to | ||
1280 | * join the transaction. We achieve this by measuring how | ||
1281 | * long it takes to commit a transaction, and compare it with | ||
1282 | * how long this transaction has been running, and if run time | ||
1283 | * < commit time then we sleep for the delta and commit. This | ||
1284 | * greatly helps super fast disks that would see slowdowns as | ||
1285 | * more threads started doing fsyncs. | ||
1271 | * | 1286 | * |
1272 | * But don't do this if this process was the most recent one to | 1287 | * But don't do this if this process was the most recent one |
1273 | * perform a synchronous write. We do this to detect the case where a | 1288 | * to perform a synchronous write. We do this to detect the |
1274 | * single process is doing a stream of sync writes. No point in waiting | 1289 | * case where a single process is doing a stream of sync |
1275 | * for joiners in that case. | 1290 | * writes. No point in waiting for joiners in that case. |
1276 | */ | 1291 | */ |
1277 | pid = current->pid; | 1292 | pid = current->pid; |
1278 | if (handle->h_sync && journal->j_last_sync_writer != pid) { | 1293 | if (handle->h_sync && journal->j_last_sync_writer != pid) { |
1294 | u64 commit_time, trans_time; | ||
1295 | |||
1279 | journal->j_last_sync_writer = pid; | 1296 | journal->j_last_sync_writer = pid; |
1280 | do { | 1297 | |
1281 | old_handle_count = transaction->t_handle_count; | 1298 | spin_lock(&journal->j_state_lock); |
1282 | schedule_timeout_uninterruptible(1); | 1299 | commit_time = journal->j_average_commit_time; |
1283 | } while (old_handle_count != transaction->t_handle_count); | 1300 | spin_unlock(&journal->j_state_lock); |
1301 | |||
1302 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | ||
1303 | transaction->t_start_time)); | ||
1304 | |||
1305 | commit_time = max_t(u64, commit_time, | ||
1306 | 1000*journal->j_min_batch_time); | ||
1307 | commit_time = min_t(u64, commit_time, | ||
1308 | 1000*journal->j_max_batch_time); | ||
1309 | |||
1310 | if (trans_time < commit_time) { | ||
1311 | ktime_t expires = ktime_add_ns(ktime_get(), | ||
1312 | commit_time); | ||
1313 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1314 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1315 | } | ||
1284 | } | 1316 | } |
1285 | 1317 | ||
1286 | current->journal_info = NULL; | 1318 | current->journal_info = NULL; |