aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2008-01-30 19:25:51 -0500
committerPaul Mackerras <paulus@samba.org>2008-01-30 19:25:51 -0500
commitbd45ac0c5daae35e7c71138172e63df5cf644cf6 (patch)
tree5eb5a599bf6a9d7a8a34e802db932aa9e9555de4 /fs/jbd2
parent4eece4ccf997c0e6d8fdad3d842e37b16b8d705f (diff)
parent5bdeae46be6dfe9efa44a548bd622af325f4bdb4 (diff)
Merge branch 'linux-2.6'
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c25
-rw-r--r--fs/jbd2/commit.c257
-rw-r--r--fs/jbd2/journal.c368
-rw-r--r--fs/jbd2/recovery.c151
-rw-r--r--fs/jbd2/revoke.c6
-rw-r--r--fs/jbd2/transaction.c34
6 files changed, 759 insertions, 82 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 3fccde7ba008..6914598022ce 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -232,7 +232,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
233 */ 233 */
234static int __process_buffer(journal_t *journal, struct journal_head *jh, 234static int __process_buffer(journal_t *journal, struct journal_head *jh,
235 struct buffer_head **bhs, int *batch_count) 235 struct buffer_head **bhs, int *batch_count,
236 transaction_t *transaction)
236{ 237{
237 struct buffer_head *bh = jh2bh(jh); 238 struct buffer_head *bh = jh2bh(jh);
238 int ret = 0; 239 int ret = 0;
@@ -250,6 +251,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
250 transaction_t *t = jh->b_transaction; 251 transaction_t *t = jh->b_transaction;
251 tid_t tid = t->t_tid; 252 tid_t tid = t->t_tid;
252 253
254 transaction->t_chp_stats.cs_forced_to_close++;
253 spin_unlock(&journal->j_list_lock); 255 spin_unlock(&journal->j_list_lock);
254 jbd_unlock_bh_state(bh); 256 jbd_unlock_bh_state(bh);
255 jbd2_log_start_commit(journal, tid); 257 jbd2_log_start_commit(journal, tid);
@@ -279,6 +281,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
279 bhs[*batch_count] = bh; 281 bhs[*batch_count] = bh;
280 __buffer_relink_io(jh); 282 __buffer_relink_io(jh);
281 jbd_unlock_bh_state(bh); 283 jbd_unlock_bh_state(bh);
284 transaction->t_chp_stats.cs_written++;
282 (*batch_count)++; 285 (*batch_count)++;
283 if (*batch_count == NR_BATCH) { 286 if (*batch_count == NR_BATCH) {
284 spin_unlock(&journal->j_list_lock); 287 spin_unlock(&journal->j_list_lock);
@@ -322,6 +325,8 @@ int jbd2_log_do_checkpoint(journal_t *journal)
322 if (!journal->j_checkpoint_transactions) 325 if (!journal->j_checkpoint_transactions)
323 goto out; 326 goto out;
324 transaction = journal->j_checkpoint_transactions; 327 transaction = journal->j_checkpoint_transactions;
328 if (transaction->t_chp_stats.cs_chp_time == 0)
329 transaction->t_chp_stats.cs_chp_time = jiffies;
325 this_tid = transaction->t_tid; 330 this_tid = transaction->t_tid;
326restart: 331restart:
327 /* 332 /*
@@ -346,8 +351,10 @@ restart:
346 retry = 1; 351 retry = 1;
347 break; 352 break;
348 } 353 }
349 retry = __process_buffer(journal, jh, bhs,&batch_count); 354 retry = __process_buffer(journal, jh, bhs, &batch_count,
350 if (!retry && lock_need_resched(&journal->j_list_lock)){ 355 transaction);
356 if (!retry && (need_resched() ||
357 spin_needbreak(&journal->j_list_lock))) {
351 spin_unlock(&journal->j_list_lock); 358 spin_unlock(&journal->j_list_lock);
352 retry = 1; 359 retry = 1;
353 break; 360 break;
@@ -602,15 +609,15 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
602 609
603 /* 610 /*
604 * There is one special case to worry about: if we have just pulled the 611 * There is one special case to worry about: if we have just pulled the
605 * buffer off a committing transaction's forget list, then even if the 612 * buffer off a running or committing transaction's checkpoing list,
606 * checkpoint list is empty, the transaction obviously cannot be 613 * then even if the checkpoint list is empty, the transaction obviously
607 * dropped! 614 * cannot be dropped!
608 * 615 *
609 * The locking here around j_committing_transaction is a bit sleazy. 616 * The locking here around t_state is a bit sleazy.
610 * See the comment at the end of jbd2_journal_commit_transaction(). 617 * See the comment at the end of jbd2_journal_commit_transaction().
611 */ 618 */
612 if (transaction == journal->j_committing_transaction) { 619 if (transaction->t_state != T_FINISHED) {
613 JBUFFER_TRACE(jh, "belongs to committing transaction"); 620 JBUFFER_TRACE(jh, "belongs to running/committing transaction");
614 goto out; 621 goto out;
615 } 622 }
616 623
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6986f334c643..4f302d279279 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -20,6 +20,8 @@
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/jiffies.h>
24#include <linux/crc32.h>
23 25
24/* 26/*
25 * Default IO end handler for temporary BJ_IO buffer_heads. 27 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -92,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh)
92 return 1; 94 return 1;
93} 95}
94 96
95/* Done it all: now write the commit record. We should have 97/*
98 * Done it all: now submit the commit record. We should have
96 * cleaned up our previous buffers by now, so if we are in abort 99 * cleaned up our previous buffers by now, so if we are in abort
97 * mode we can now just skip the rest of the journal write 100 * mode we can now just skip the rest of the journal write
98 * entirely. 101 * entirely.
99 * 102 *
100 * Returns 1 if the journal needs to be aborted or 0 on success 103 * Returns 1 if the journal needs to be aborted or 0 on success
101 */ 104 */
102static int journal_write_commit_record(journal_t *journal, 105static int journal_submit_commit_record(journal_t *journal,
103 transaction_t *commit_transaction) 106 transaction_t *commit_transaction,
107 struct buffer_head **cbh,
108 __u32 crc32_sum)
104{ 109{
105 struct journal_head *descriptor; 110 struct journal_head *descriptor;
111 struct commit_header *tmp;
106 struct buffer_head *bh; 112 struct buffer_head *bh;
107 int i, ret; 113 int ret;
108 int barrier_done = 0; 114 int barrier_done = 0;
109 115
110 if (is_journal_aborted(journal)) 116 if (is_journal_aborted(journal))
@@ -116,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal,
116 122
117 bh = jh2bh(descriptor); 123 bh = jh2bh(descriptor);
118 124
119 /* AKPM: buglet - add `i' to tmp! */ 125 tmp = (struct commit_header *)bh->b_data;
120 for (i = 0; i < bh->b_size; i += 512) { 126 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
121 journal_header_t *tmp = (journal_header_t*)bh->b_data; 127 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
122 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 128 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
123 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); 129
124 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); 130 if (JBD2_HAS_COMPAT_FEATURE(journal,
131 JBD2_FEATURE_COMPAT_CHECKSUM)) {
132 tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
133 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
134 tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
125 } 135 }
126 136
127 JBUFFER_TRACE(descriptor, "write commit block"); 137 JBUFFER_TRACE(descriptor, "submit commit block");
138 lock_buffer(bh);
139
128 set_buffer_dirty(bh); 140 set_buffer_dirty(bh);
129 if (journal->j_flags & JBD2_BARRIER) { 141 set_buffer_uptodate(bh);
142 bh->b_end_io = journal_end_buffer_io_sync;
143
144 if (journal->j_flags & JBD2_BARRIER &&
145 !JBD2_HAS_COMPAT_FEATURE(journal,
146 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
130 set_buffer_ordered(bh); 147 set_buffer_ordered(bh);
131 barrier_done = 1; 148 barrier_done = 1;
132 } 149 }
133 ret = sync_dirty_buffer(bh); 150 ret = submit_bh(WRITE, bh);
151
134 /* is it possible for another commit to fail at roughly 152 /* is it possible for another commit to fail at roughly
135 * the same time as this one? If so, we don't want to 153 * the same time as this one? If so, we don't want to
136 * trust the barrier flag in the super, but instead want 154 * trust the barrier flag in the super, but instead want
@@ -151,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal,
151 clear_buffer_ordered(bh); 169 clear_buffer_ordered(bh);
152 set_buffer_uptodate(bh); 170 set_buffer_uptodate(bh);
153 set_buffer_dirty(bh); 171 set_buffer_dirty(bh);
154 ret = sync_dirty_buffer(bh); 172 ret = submit_bh(WRITE, bh);
155 } 173 }
156 put_bh(bh); /* One for getblk() */ 174 *cbh = bh;
157 jbd2_journal_put_journal_head(descriptor); 175 return ret;
176}
177
178/*
179 * This function along with journal_submit_commit_record
180 * allows to write the commit record asynchronously.
181 */
182static int journal_wait_on_commit_record(struct buffer_head *bh)
183{
184 int ret = 0;
185
186 clear_buffer_dirty(bh);
187 wait_on_buffer(bh);
188
189 if (unlikely(!buffer_uptodate(bh)))
190 ret = -EIO;
191 put_bh(bh); /* One for getblk() */
192 jbd2_journal_put_journal_head(bh2jh(bh));
158 193
159 return (ret == -EIO); 194 return ret;
160} 195}
161 196
197/*
198 * Wait for all submitted IO to complete.
199 */
200static int journal_wait_on_locked_list(journal_t *journal,
201 transaction_t *commit_transaction)
202{
203 int ret = 0;
204 struct journal_head *jh;
205
206 while (commit_transaction->t_locked_list) {
207 struct buffer_head *bh;
208
209 jh = commit_transaction->t_locked_list->b_tprev;
210 bh = jh2bh(jh);
211 get_bh(bh);
212 if (buffer_locked(bh)) {
213 spin_unlock(&journal->j_list_lock);
214 wait_on_buffer(bh);
215 if (unlikely(!buffer_uptodate(bh)))
216 ret = -EIO;
217 spin_lock(&journal->j_list_lock);
218 }
219 if (!inverted_lock(journal, bh)) {
220 put_bh(bh);
221 spin_lock(&journal->j_list_lock);
222 continue;
223 }
224 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
225 __jbd2_journal_unfile_buffer(jh);
226 jbd_unlock_bh_state(bh);
227 jbd2_journal_remove_journal_head(bh);
228 put_bh(bh);
229 } else {
230 jbd_unlock_bh_state(bh);
231 }
232 put_bh(bh);
233 cond_resched_lock(&journal->j_list_lock);
234 }
235 return ret;
236 }
237
162static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) 238static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
163{ 239{
164 int i; 240 int i;
@@ -265,7 +341,7 @@ write_out_data:
265 put_bh(bh); 341 put_bh(bh);
266 } 342 }
267 343
268 if (lock_need_resched(&journal->j_list_lock)) { 344 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
269 spin_unlock(&journal->j_list_lock); 345 spin_unlock(&journal->j_list_lock);
270 goto write_out_data; 346 goto write_out_data;
271 } 347 }
@@ -274,7 +350,21 @@ write_out_data:
274 journal_do_submit_data(wbuf, bufs); 350 journal_do_submit_data(wbuf, bufs);
275} 351}
276 352
277static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, 353static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
354{
355 struct page *page = bh->b_page;
356 char *addr;
357 __u32 checksum;
358
359 addr = kmap_atomic(page, KM_USER0);
360 checksum = crc32_be(crc32_sum,
361 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
362 kunmap_atomic(addr, KM_USER0);
363
364 return checksum;
365}
366
367static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
278 unsigned long long block) 368 unsigned long long block)
279{ 369{
280 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 370 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
@@ -290,6 +380,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
290 */ 380 */
291void jbd2_journal_commit_transaction(journal_t *journal) 381void jbd2_journal_commit_transaction(journal_t *journal)
292{ 382{
383 struct transaction_stats_s stats;
293 transaction_t *commit_transaction; 384 transaction_t *commit_transaction;
294 struct journal_head *jh, *new_jh, *descriptor; 385 struct journal_head *jh, *new_jh, *descriptor;
295 struct buffer_head **wbuf = journal->j_wbuf; 386 struct buffer_head **wbuf = journal->j_wbuf;
@@ -305,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
305 int tag_flag; 396 int tag_flag;
306 int i; 397 int i;
307 int tag_bytes = journal_tag_bytes(journal); 398 int tag_bytes = journal_tag_bytes(journal);
399 struct buffer_head *cbh = NULL; /* For transactional checksums */
400 __u32 crc32_sum = ~0;
308 401
309 /* 402 /*
310 * First job: lock down the current transaction and wait for 403 * First job: lock down the current transaction and wait for
@@ -337,6 +430,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
337 spin_lock(&journal->j_state_lock); 430 spin_lock(&journal->j_state_lock);
338 commit_transaction->t_state = T_LOCKED; 431 commit_transaction->t_state = T_LOCKED;
339 432
433 stats.u.run.rs_wait = commit_transaction->t_max_wait;
434 stats.u.run.rs_locked = jiffies;
435 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
436 stats.u.run.rs_locked);
437
340 spin_lock(&commit_transaction->t_handle_lock); 438 spin_lock(&commit_transaction->t_handle_lock);
341 while (commit_transaction->t_updates) { 439 while (commit_transaction->t_updates) {
342 DEFINE_WAIT(wait); 440 DEFINE_WAIT(wait);
@@ -407,6 +505,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
407 */ 505 */
408 jbd2_journal_switch_revoke_table(journal); 506 jbd2_journal_switch_revoke_table(journal);
409 507
508 stats.u.run.rs_flushing = jiffies;
509 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
510 stats.u.run.rs_flushing);
511
410 commit_transaction->t_state = T_FLUSH; 512 commit_transaction->t_state = T_FLUSH;
411 journal->j_committing_transaction = commit_transaction; 513 journal->j_committing_transaction = commit_transaction;
412 journal->j_running_transaction = NULL; 514 journal->j_running_transaction = NULL;
@@ -440,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
440 journal_submit_data_buffers(journal, commit_transaction); 542 journal_submit_data_buffers(journal, commit_transaction);
441 543
442 /* 544 /*
443 * Wait for all previously submitted IO to complete. 545 * Wait for all previously submitted IO to complete if commit
546 * record is to be written synchronously.
444 */ 547 */
445 spin_lock(&journal->j_list_lock); 548 spin_lock(&journal->j_list_lock);
446 while (commit_transaction->t_locked_list) { 549 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
447 struct buffer_head *bh; 550 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
551 err = journal_wait_on_locked_list(journal,
552 commit_transaction);
448 553
449 jh = commit_transaction->t_locked_list->b_tprev;
450 bh = jh2bh(jh);
451 get_bh(bh);
452 if (buffer_locked(bh)) {
453 spin_unlock(&journal->j_list_lock);
454 wait_on_buffer(bh);
455 if (unlikely(!buffer_uptodate(bh)))
456 err = -EIO;
457 spin_lock(&journal->j_list_lock);
458 }
459 if (!inverted_lock(journal, bh)) {
460 put_bh(bh);
461 spin_lock(&journal->j_list_lock);
462 continue;
463 }
464 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
465 __jbd2_journal_unfile_buffer(jh);
466 jbd_unlock_bh_state(bh);
467 jbd2_journal_remove_journal_head(bh);
468 put_bh(bh);
469 } else {
470 jbd_unlock_bh_state(bh);
471 }
472 put_bh(bh);
473 cond_resched_lock(&journal->j_list_lock);
474 }
475 spin_unlock(&journal->j_list_lock); 554 spin_unlock(&journal->j_list_lock);
476 555
477 if (err) 556 if (err)
@@ -498,6 +577,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
498 */ 577 */
499 commit_transaction->t_state = T_COMMIT; 578 commit_transaction->t_state = T_COMMIT;
500 579
580 stats.u.run.rs_logging = jiffies;
581 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
582 stats.u.run.rs_logging);
583 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
584 stats.u.run.rs_blocks_logged = 0;
585
501 descriptor = NULL; 586 descriptor = NULL;
502 bufs = 0; 587 bufs = 0;
503 while (commit_transaction->t_buffers) { 588 while (commit_transaction->t_buffers) {
@@ -639,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
639start_journal_io: 724start_journal_io:
640 for (i = 0; i < bufs; i++) { 725 for (i = 0; i < bufs; i++) {
641 struct buffer_head *bh = wbuf[i]; 726 struct buffer_head *bh = wbuf[i];
727 /*
728 * Compute checksum.
729 */
730 if (JBD2_HAS_COMPAT_FEATURE(journal,
731 JBD2_FEATURE_COMPAT_CHECKSUM)) {
732 crc32_sum =
733 jbd2_checksum_data(crc32_sum, bh);
734 }
735
642 lock_buffer(bh); 736 lock_buffer(bh);
643 clear_buffer_dirty(bh); 737 clear_buffer_dirty(bh);
644 set_buffer_uptodate(bh); 738 set_buffer_uptodate(bh);
@@ -646,6 +740,7 @@ start_journal_io:
646 submit_bh(WRITE, bh); 740 submit_bh(WRITE, bh);
647 } 741 }
648 cond_resched(); 742 cond_resched();
743 stats.u.run.rs_blocks_logged += bufs;
649 744
650 /* Force a new descriptor to be generated next 745 /* Force a new descriptor to be generated next
651 time round the loop. */ 746 time round the loop. */
@@ -654,6 +749,23 @@ start_journal_io:
654 } 749 }
655 } 750 }
656 751
752 /* Done it all: now write the commit record asynchronously. */
753
754 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
755 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
756 err = journal_submit_commit_record(journal, commit_transaction,
757 &cbh, crc32_sum);
758 if (err)
759 __jbd2_journal_abort_hard(journal);
760
761 spin_lock(&journal->j_list_lock);
762 err = journal_wait_on_locked_list(journal,
763 commit_transaction);
764 spin_unlock(&journal->j_list_lock);
765 if (err)
766 __jbd2_journal_abort_hard(journal);
767 }
768
657 /* Lo and behold: we have just managed to send a transaction to 769 /* Lo and behold: we have just managed to send a transaction to
658 the log. Before we can commit it, wait for the IO so far to 770 the log. Before we can commit it, wait for the IO so far to
659 complete. Control buffers being written are on the 771 complete. Control buffers being written are on the
@@ -753,8 +865,14 @@ wait_for_iobuf:
753 865
754 jbd_debug(3, "JBD: commit phase 6\n"); 866 jbd_debug(3, "JBD: commit phase 6\n");
755 867
756 if (journal_write_commit_record(journal, commit_transaction)) 868 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
757 err = -EIO; 869 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
870 err = journal_submit_commit_record(journal, commit_transaction,
871 &cbh, crc32_sum);
872 if (err)
873 __jbd2_journal_abort_hard(journal);
874 }
875 err = journal_wait_on_commit_record(cbh);
758 876
759 if (err) 877 if (err)
760 jbd2_journal_abort(journal, err); 878 jbd2_journal_abort(journal, err);
@@ -816,6 +934,7 @@ restart_loop:
816 cp_transaction = jh->b_cp_transaction; 934 cp_transaction = jh->b_cp_transaction;
817 if (cp_transaction) { 935 if (cp_transaction) {
818 JBUFFER_TRACE(jh, "remove from old cp transaction"); 936 JBUFFER_TRACE(jh, "remove from old cp transaction");
937 cp_transaction->t_chp_stats.cs_dropped++;
819 __jbd2_journal_remove_checkpoint(jh); 938 __jbd2_journal_remove_checkpoint(jh);
820 } 939 }
821 940
@@ -867,10 +986,10 @@ restart_loop:
867 } 986 }
868 spin_unlock(&journal->j_list_lock); 987 spin_unlock(&journal->j_list_lock);
869 /* 988 /*
870 * This is a bit sleazy. We borrow j_list_lock to protect 989 * This is a bit sleazy. We use j_list_lock to protect transition
871 * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. 990 * of a transaction into T_FINISHED state and calling
872 * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but 991 * __jbd2_journal_drop_transaction(). Otherwise we could race with
873 * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint 992 * other checkpointing code processing the transaction...
874 */ 993 */
875 spin_lock(&journal->j_state_lock); 994 spin_lock(&journal->j_state_lock);
876 spin_lock(&journal->j_list_lock); 995 spin_lock(&journal->j_list_lock);
@@ -890,6 +1009,36 @@ restart_loop:
890 1009
891 J_ASSERT(commit_transaction->t_state == T_COMMIT); 1010 J_ASSERT(commit_transaction->t_state == T_COMMIT);
892 1011
1012 commit_transaction->t_start = jiffies;
1013 stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging,
1014 commit_transaction->t_start);
1015
1016 /*
1017 * File the transaction for history
1018 */
1019 stats.ts_type = JBD2_STATS_RUN;
1020 stats.ts_tid = commit_transaction->t_tid;
1021 stats.u.run.rs_handle_count = commit_transaction->t_handle_count;
1022 spin_lock(&journal->j_history_lock);
1023 memcpy(journal->j_history + journal->j_history_cur, &stats,
1024 sizeof(stats));
1025 if (++journal->j_history_cur == journal->j_history_max)
1026 journal->j_history_cur = 0;
1027
1028 /*
1029 * Calculate overall stats
1030 */
1031 journal->j_stats.ts_tid++;
1032 journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait;
1033 journal->j_stats.u.run.rs_running += stats.u.run.rs_running;
1034 journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked;
1035 journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing;
1036 journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging;
1037 journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count;
1038 journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks;
1039 journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged;
1040 spin_unlock(&journal->j_history_lock);
1041
893 commit_transaction->t_state = T_FINISHED; 1042 commit_transaction->t_state = T_FINISHED;
894 J_ASSERT(commit_transaction == journal->j_committing_transaction); 1043 J_ASSERT(commit_transaction == journal->j_committing_transaction);
895 journal->j_commit_sequence = commit_transaction->t_tid; 1044 journal->j_commit_sequence = commit_transaction->t_tid;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 6ddc5531587c..96ba846992e9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -36,6 +36,7 @@
36#include <linux/poison.h> 36#include <linux/poison.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/seq_file.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/page.h> 42#include <asm/page.h>
@@ -640,6 +641,312 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
640 return jbd2_journal_add_journal_head(bh); 641 return jbd2_journal_add_journal_head(bh);
641} 642}
642 643
644struct jbd2_stats_proc_session {
645 journal_t *journal;
646 struct transaction_stats_s *stats;
647 int start;
648 int max;
649};
650
651static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s,
652 struct transaction_stats_s *ts,
653 int first)
654{
655 if (ts == s->stats + s->max)
656 ts = s->stats;
657 if (!first && ts == s->stats + s->start)
658 return NULL;
659 while (ts->ts_type == 0) {
660 ts++;
661 if (ts == s->stats + s->max)
662 ts = s->stats;
663 if (ts == s->stats + s->start)
664 return NULL;
665 }
666 return ts;
667
668}
669
670static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos)
671{
672 struct jbd2_stats_proc_session *s = seq->private;
673 struct transaction_stats_s *ts;
674 int l = *pos;
675
676 if (l == 0)
677 return SEQ_START_TOKEN;
678 ts = jbd2_history_skip_empty(s, s->stats + s->start, 1);
679 if (!ts)
680 return NULL;
681 l--;
682 while (l) {
683 ts = jbd2_history_skip_empty(s, ++ts, 0);
684 if (!ts)
685 break;
686 l--;
687 }
688 return ts;
689}
690
691static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
692{
693 struct jbd2_stats_proc_session *s = seq->private;
694 struct transaction_stats_s *ts = v;
695
696 ++*pos;
697 if (v == SEQ_START_TOKEN)
698 return jbd2_history_skip_empty(s, s->stats + s->start, 1);
699 else
700 return jbd2_history_skip_empty(s, ++ts, 0);
701}
702
703static int jbd2_seq_history_show(struct seq_file *seq, void *v)
704{
705 struct transaction_stats_s *ts = v;
706 if (v == SEQ_START_TOKEN) {
707 seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
708 "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
709 "wait", "run", "lock", "flush", "log", "hndls",
710 "block", "inlog", "ctime", "write", "drop",
711 "close");
712 return 0;
713 }
714 if (ts->ts_type == JBD2_STATS_RUN)
715 seq_printf(seq, "%-4s %-5lu %-5u %-5u %-5u %-5u %-5u "
716 "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
717 jiffies_to_msecs(ts->u.run.rs_wait),
718 jiffies_to_msecs(ts->u.run.rs_running),
719 jiffies_to_msecs(ts->u.run.rs_locked),
720 jiffies_to_msecs(ts->u.run.rs_flushing),
721 jiffies_to_msecs(ts->u.run.rs_logging),
722 ts->u.run.rs_handle_count,
723 ts->u.run.rs_blocks,
724 ts->u.run.rs_blocks_logged);
725 else if (ts->ts_type == JBD2_STATS_CHECKPOINT)
726 seq_printf(seq, "%-4s %-5lu %48s %-5u %-5lu %-5lu %-5lu\n",
727 "C", ts->ts_tid, " ",
728 jiffies_to_msecs(ts->u.chp.cs_chp_time),
729 ts->u.chp.cs_written, ts->u.chp.cs_dropped,
730 ts->u.chp.cs_forced_to_close);
731 else
732 J_ASSERT(0);
733 return 0;
734}
735
736static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
737{
738}
739
740static struct seq_operations jbd2_seq_history_ops = {
741 .start = jbd2_seq_history_start,
742 .next = jbd2_seq_history_next,
743 .stop = jbd2_seq_history_stop,
744 .show = jbd2_seq_history_show,
745};
746
747static int jbd2_seq_history_open(struct inode *inode, struct file *file)
748{
749 journal_t *journal = PDE(inode)->data;
750 struct jbd2_stats_proc_session *s;
751 int rc, size;
752
753 s = kmalloc(sizeof(*s), GFP_KERNEL);
754 if (s == NULL)
755 return -ENOMEM;
756 size = sizeof(struct transaction_stats_s) * journal->j_history_max;
757 s->stats = kmalloc(size, GFP_KERNEL);
758 if (s->stats == NULL) {
759 kfree(s);
760 return -ENOMEM;
761 }
762 spin_lock(&journal->j_history_lock);
763 memcpy(s->stats, journal->j_history, size);
764 s->max = journal->j_history_max;
765 s->start = journal->j_history_cur % s->max;
766 spin_unlock(&journal->j_history_lock);
767
768 rc = seq_open(file, &jbd2_seq_history_ops);
769 if (rc == 0) {
770 struct seq_file *m = file->private_data;
771 m->private = s;
772 } else {
773 kfree(s->stats);
774 kfree(s);
775 }
776 return rc;
777
778}
779
780static int jbd2_seq_history_release(struct inode *inode, struct file *file)
781{
782 struct seq_file *seq = file->private_data;
783 struct jbd2_stats_proc_session *s = seq->private;
784
785 kfree(s->stats);
786 kfree(s);
787 return seq_release(inode, file);
788}
789
790static struct file_operations jbd2_seq_history_fops = {
791 .owner = THIS_MODULE,
792 .open = jbd2_seq_history_open,
793 .read = seq_read,
794 .llseek = seq_lseek,
795 .release = jbd2_seq_history_release,
796};
797
798static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
799{
800 return *pos ? NULL : SEQ_START_TOKEN;
801}
802
803static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
804{
805 return NULL;
806}
807
808static int jbd2_seq_info_show(struct seq_file *seq, void *v)
809{
810 struct jbd2_stats_proc_session *s = seq->private;
811
812 if (v != SEQ_START_TOKEN)
813 return 0;
814 seq_printf(seq, "%lu transaction, each upto %u blocks\n",
815 s->stats->ts_tid,
816 s->journal->j_max_transaction_buffers);
817 if (s->stats->ts_tid == 0)
818 return 0;
819 seq_printf(seq, "average: \n %ums waiting for transaction\n",
820 jiffies_to_msecs(s->stats->u.run.rs_wait / s->stats->ts_tid));
821 seq_printf(seq, " %ums running transaction\n",
822 jiffies_to_msecs(s->stats->u.run.rs_running / s->stats->ts_tid));
823 seq_printf(seq, " %ums transaction was being locked\n",
824 jiffies_to_msecs(s->stats->u.run.rs_locked / s->stats->ts_tid));
825 seq_printf(seq, " %ums flushing data (in ordered mode)\n",
826 jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
827 seq_printf(seq, " %ums logging transaction\n",
828 jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
829 seq_printf(seq, " %lu handles per transaction\n",
830 s->stats->u.run.rs_handle_count / s->stats->ts_tid);
831 seq_printf(seq, " %lu blocks per transaction\n",
832 s->stats->u.run.rs_blocks / s->stats->ts_tid);
833 seq_printf(seq, " %lu logged blocks per transaction\n",
834 s->stats->u.run.rs_blocks_logged / s->stats->ts_tid);
835 return 0;
836}
837
838static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
839{
840}
841
842static struct seq_operations jbd2_seq_info_ops = {
843 .start = jbd2_seq_info_start,
844 .next = jbd2_seq_info_next,
845 .stop = jbd2_seq_info_stop,
846 .show = jbd2_seq_info_show,
847};
848
849static int jbd2_seq_info_open(struct inode *inode, struct file *file)
850{
851 journal_t *journal = PDE(inode)->data;
852 struct jbd2_stats_proc_session *s;
853 int rc, size;
854
855 s = kmalloc(sizeof(*s), GFP_KERNEL);
856 if (s == NULL)
857 return -ENOMEM;
858 size = sizeof(struct transaction_stats_s);
859 s->stats = kmalloc(size, GFP_KERNEL);
860 if (s->stats == NULL) {
861 kfree(s);
862 return -ENOMEM;
863 }
864 spin_lock(&journal->j_history_lock);
865 memcpy(s->stats, &journal->j_stats, size);
866 s->journal = journal;
867 spin_unlock(&journal->j_history_lock);
868
869 rc = seq_open(file, &jbd2_seq_info_ops);
870 if (rc == 0) {
871 struct seq_file *m = file->private_data;
872 m->private = s;
873 } else {
874 kfree(s->stats);
875 kfree(s);
876 }
877 return rc;
878
879}
880
881static int jbd2_seq_info_release(struct inode *inode, struct file *file)
882{
883 struct seq_file *seq = file->private_data;
884 struct jbd2_stats_proc_session *s = seq->private;
885 kfree(s->stats);
886 kfree(s);
887 return seq_release(inode, file);
888}
889
890static struct file_operations jbd2_seq_info_fops = {
891 .owner = THIS_MODULE,
892 .open = jbd2_seq_info_open,
893 .read = seq_read,
894 .llseek = seq_lseek,
895 .release = jbd2_seq_info_release,
896};
897
898static struct proc_dir_entry *proc_jbd2_stats;
899
900static void jbd2_stats_proc_init(journal_t *journal)
901{
902 char name[BDEVNAME_SIZE];
903
904 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
906 if (journal->j_proc_entry) {
907 struct proc_dir_entry *p;
908 p = create_proc_entry("history", S_IRUGO,
909 journal->j_proc_entry);
910 if (p) {
911 p->proc_fops = &jbd2_seq_history_fops;
912 p->data = journal;
913 p = create_proc_entry("info", S_IRUGO,
914 journal->j_proc_entry);
915 if (p) {
916 p->proc_fops = &jbd2_seq_info_fops;
917 p->data = journal;
918 }
919 }
920 }
921}
922
923static void jbd2_stats_proc_exit(journal_t *journal)
924{
925 char name[BDEVNAME_SIZE];
926
927 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
928 remove_proc_entry("info", journal->j_proc_entry);
929 remove_proc_entry("history", journal->j_proc_entry);
930 remove_proc_entry(name, proc_jbd2_stats);
931}
932
933static void journal_init_stats(journal_t *journal)
934{
935 int size;
936
937 if (!proc_jbd2_stats)
938 return;
939
940 journal->j_history_max = 100;
941 size = sizeof(struct transaction_stats_s) * journal->j_history_max;
942 journal->j_history = kzalloc(size, GFP_KERNEL);
943 if (!journal->j_history) {
944 journal->j_history_max = 0;
945 return;
946 }
947 spin_lock_init(&journal->j_history_lock);
948}
949
643/* 950/*
644 * Management for journal control blocks: functions to create and 951 * Management for journal control blocks: functions to create and
645 * destroy journal_t structures, and to initialise and read existing 952 * destroy journal_t structures, and to initialise and read existing
@@ -681,6 +988,9 @@ static journal_t * journal_init_common (void)
681 kfree(journal); 988 kfree(journal);
682 goto fail; 989 goto fail;
683 } 990 }
991
992 journal_init_stats(journal);
993
684 return journal; 994 return journal;
685fail: 995fail:
686 return NULL; 996 return NULL;
@@ -735,6 +1045,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
735 journal->j_fs_dev = fs_dev; 1045 journal->j_fs_dev = fs_dev;
736 journal->j_blk_offset = start; 1046 journal->j_blk_offset = start;
737 journal->j_maxlen = len; 1047 journal->j_maxlen = len;
1048 jbd2_stats_proc_init(journal);
738 1049
739 bh = __getblk(journal->j_dev, start, journal->j_blocksize); 1050 bh = __getblk(journal->j_dev, start, journal->j_blocksize);
740 J_ASSERT(bh != NULL); 1051 J_ASSERT(bh != NULL);
@@ -773,6 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
773 1084
774 journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; 1085 journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
775 journal->j_blocksize = inode->i_sb->s_blocksize; 1086 journal->j_blocksize = inode->i_sb->s_blocksize;
1087 jbd2_stats_proc_init(journal);
776 1088
777 /* journal descriptor can store up to n blocks -bzzz */ 1089 /* journal descriptor can store up to n blocks -bzzz */
778 n = journal->j_blocksize / sizeof(journal_block_tag_t); 1090 n = journal->j_blocksize / sizeof(journal_block_tag_t);
@@ -1153,6 +1465,8 @@ void jbd2_journal_destroy(journal_t *journal)
1153 brelse(journal->j_sb_buffer); 1465 brelse(journal->j_sb_buffer);
1154 } 1466 }
1155 1467
1468 if (journal->j_proc_entry)
1469 jbd2_stats_proc_exit(journal);
1156 if (journal->j_inode) 1470 if (journal->j_inode)
1157 iput(journal->j_inode); 1471 iput(journal->j_inode);
1158 if (journal->j_revoke) 1472 if (journal->j_revoke)
@@ -1264,6 +1578,32 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1264 return 1; 1578 return 1;
1265} 1579}
1266 1580
1581/*
1582 * jbd2_journal_clear_features () - Clear a given journal feature in the
1583 * superblock
1584 * @journal: Journal to act on.
1585 * @compat: bitmask of compatible features
1586 * @ro: bitmask of features that force read-only mount
1587 * @incompat: bitmask of incompatible features
1588 *
1589 * Clear a given journal feature as present on the
1590 * superblock.
1591 */
1592void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
1593 unsigned long ro, unsigned long incompat)
1594{
1595 journal_superblock_t *sb;
1596
1597 jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
1598 compat, ro, incompat);
1599
1600 sb = journal->j_superblock;
1601
1602 sb->s_feature_compat &= ~cpu_to_be32(compat);
1603 sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
1604 sb->s_feature_incompat &= ~cpu_to_be32(incompat);
1605}
1606EXPORT_SYMBOL(jbd2_journal_clear_features);
1267 1607
1268/** 1608/**
1269 * int jbd2_journal_update_format () - Update on-disk journal structure. 1609 * int jbd2_journal_update_format () - Update on-disk journal structure.
@@ -1633,7 +1973,7 @@ static int journal_init_jbd2_journal_head_cache(void)
1633 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", 1973 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
1634 sizeof(struct journal_head), 1974 sizeof(struct journal_head),
1635 0, /* offset */ 1975 0, /* offset */
1636 0, /* flags */ 1976 SLAB_TEMPORARY, /* flags */
1637 NULL); /* ctor */ 1977 NULL); /* ctor */
1638 retval = 0; 1978 retval = 0;
1639 if (jbd2_journal_head_cache == 0) { 1979 if (jbd2_journal_head_cache == 0) {
@@ -1900,6 +2240,28 @@ static void __exit jbd2_remove_debugfs_entry(void)
1900 2240
1901#endif 2241#endif
1902 2242
2243#ifdef CONFIG_PROC_FS
2244
2245#define JBD2_STATS_PROC_NAME "fs/jbd2"
2246
2247static void __init jbd2_create_jbd_stats_proc_entry(void)
2248{
2249 proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
2250}
2251
2252static void __exit jbd2_remove_jbd_stats_proc_entry(void)
2253{
2254 if (proc_jbd2_stats)
2255 remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
2256}
2257
2258#else
2259
2260#define jbd2_create_jbd_stats_proc_entry() do {} while (0)
2261#define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
2262
2263#endif
2264
1903struct kmem_cache *jbd2_handle_cache; 2265struct kmem_cache *jbd2_handle_cache;
1904 2266
1905static int __init journal_init_handle_cache(void) 2267static int __init journal_init_handle_cache(void)
@@ -1907,7 +2269,7 @@ static int __init journal_init_handle_cache(void)
1907 jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", 2269 jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle",
1908 sizeof(handle_t), 2270 sizeof(handle_t),
1909 0, /* offset */ 2271 0, /* offset */
1910 0, /* flags */ 2272 SLAB_TEMPORARY, /* flags */
1911 NULL); /* ctor */ 2273 NULL); /* ctor */
1912 if (jbd2_handle_cache == NULL) { 2274 if (jbd2_handle_cache == NULL) {
1913 printk(KERN_EMERG "JBD: failed to create handle cache\n"); 2275 printk(KERN_EMERG "JBD: failed to create handle cache\n");
@@ -1955,6 +2317,7 @@ static int __init journal_init(void)
1955 if (ret != 0) 2317 if (ret != 0)
1956 jbd2_journal_destroy_caches(); 2318 jbd2_journal_destroy_caches();
1957 jbd2_create_debugfs_entry(); 2319 jbd2_create_debugfs_entry();
2320 jbd2_create_jbd_stats_proc_entry();
1958 return ret; 2321 return ret;
1959} 2322}
1960 2323
@@ -1966,6 +2329,7 @@ static void __exit journal_exit(void)
1966 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); 2329 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
1967#endif 2330#endif
1968 jbd2_remove_debugfs_entry(); 2331 jbd2_remove_debugfs_entry();
2332 jbd2_remove_jbd_stats_proc_entry();
1969 jbd2_journal_destroy_caches(); 2333 jbd2_journal_destroy_caches();
1970} 2334}
1971 2335
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index d0ce627539ef..921680663fa2 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/crc32.h>
24#endif 25#endif
25 26
26/* 27/*
@@ -316,6 +317,37 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag
316 return block; 317 return block;
317} 318}
318 319
320/*
321 * calc_chksums calculates the checksums for the blocks described in the
322 * descriptor block.
323 */
324static int calc_chksums(journal_t *journal, struct buffer_head *bh,
325 unsigned long *next_log_block, __u32 *crc32_sum)
326{
327 int i, num_blks, err;
328 unsigned long io_block;
329 struct buffer_head *obh;
330
331 num_blks = count_tags(journal, bh);
332 /* Calculate checksum of the descriptor block. */
333 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
334
335 for (i = 0; i < num_blks; i++) {
336 io_block = (*next_log_block)++;
337 wrap(journal, *next_log_block);
338 err = jread(&obh, journal, io_block);
339 if (err) {
340 printk(KERN_ERR "JBD: IO error %d recovering block "
341 "%lu in log\n", err, io_block);
342 return 1;
343 } else {
344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
345 obh->b_size);
346 }
347 }
348 return 0;
349}
350
319static int do_one_pass(journal_t *journal, 351static int do_one_pass(journal_t *journal,
320 struct recovery_info *info, enum passtype pass) 352 struct recovery_info *info, enum passtype pass)
321{ 353{
@@ -328,6 +360,7 @@ static int do_one_pass(journal_t *journal,
328 unsigned int sequence; 360 unsigned int sequence;
329 int blocktype; 361 int blocktype;
330 int tag_bytes = journal_tag_bytes(journal); 362 int tag_bytes = journal_tag_bytes(journal);
363 __u32 crc32_sum = ~0; /* Transactional Checksums */
331 364
332 /* Precompute the maximum metadata descriptors in a descriptor block */ 365 /* Precompute the maximum metadata descriptors in a descriptor block */
333 int MAX_BLOCKS_PER_DESC; 366 int MAX_BLOCKS_PER_DESC;
@@ -419,12 +452,26 @@ static int do_one_pass(journal_t *journal,
419 switch(blocktype) { 452 switch(blocktype) {
420 case JBD2_DESCRIPTOR_BLOCK: 453 case JBD2_DESCRIPTOR_BLOCK:
421 /* If it is a valid descriptor block, replay it 454 /* If it is a valid descriptor block, replay it
422 * in pass REPLAY; otherwise, just skip over the 455 * in pass REPLAY; if journal_checksums enabled, then
423 * blocks it describes. */ 456 * calculate checksums in PASS_SCAN, otherwise,
457 * just skip over the blocks it describes. */
424 if (pass != PASS_REPLAY) { 458 if (pass != PASS_REPLAY) {
459 if (pass == PASS_SCAN &&
460 JBD2_HAS_COMPAT_FEATURE(journal,
461 JBD2_FEATURE_COMPAT_CHECKSUM) &&
462 !info->end_transaction) {
463 if (calc_chksums(journal, bh,
464 &next_log_block,
465 &crc32_sum)) {
466 put_bh(bh);
467 break;
468 }
469 put_bh(bh);
470 continue;
471 }
425 next_log_block += count_tags(journal, bh); 472 next_log_block += count_tags(journal, bh);
426 wrap(journal, next_log_block); 473 wrap(journal, next_log_block);
427 brelse(bh); 474 put_bh(bh);
428 continue; 475 continue;
429 } 476 }
430 477
@@ -516,9 +563,96 @@ static int do_one_pass(journal_t *journal,
516 continue; 563 continue;
517 564
518 case JBD2_COMMIT_BLOCK: 565 case JBD2_COMMIT_BLOCK:
519 /* Found an expected commit block: not much to 566 /* How to differentiate between interrupted commit
520 * do other than move on to the next sequence 567 * and journal corruption ?
568 *
569 * {nth transaction}
570 * Checksum Verification Failed
571 * |
572 * ____________________
573 * | |
574 * async_commit sync_commit
575 * | |
576 * | GO TO NEXT "Journal Corruption"
577 * | TRANSACTION
578 * |
579 * {(n+1)th transanction}
580 * |
581 * _______|______________
582 * | |
583 * Commit block found Commit block not found
584 * | |
585 * "Journal Corruption" |
586 * _____________|_________
587 * | |
588 * nth trans corrupt OR nth trans
589 * and (n+1)th interrupted interrupted
590 * before commit block
591 * could reach the disk.
592 * (Cannot find the difference in above
593 * mentioned conditions. Hence assume
594 * "Interrupted Commit".)
595 */
596
597 /* Found an expected commit block: if checksums
598 * are present verify them in PASS_SCAN; else not
599 * much to do other than move on to the next sequence
521 * number. */ 600 * number. */
601 if (pass == PASS_SCAN &&
602 JBD2_HAS_COMPAT_FEATURE(journal,
603 JBD2_FEATURE_COMPAT_CHECKSUM)) {
604 int chksum_err, chksum_seen;
605 struct commit_header *cbh =
606 (struct commit_header *)bh->b_data;
607 unsigned found_chksum =
608 be32_to_cpu(cbh->h_chksum[0]);
609
610 chksum_err = chksum_seen = 0;
611
612 if (info->end_transaction) {
613 printk(KERN_ERR "JBD: Transaction %u "
614 "found to be corrupt.\n",
615 next_commit_ID - 1);
616 brelse(bh);
617 break;
618 }
619
620 if (crc32_sum == found_chksum &&
621 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
622 cbh->h_chksum_size ==
623 JBD2_CRC32_CHKSUM_SIZE)
624 chksum_seen = 1;
625 else if (!(cbh->h_chksum_type == 0 &&
626 cbh->h_chksum_size == 0 &&
627 found_chksum == 0 &&
628 !chksum_seen))
629 /*
630 * If fs is mounted using an old kernel and then
631 * kernel with journal_chksum is used then we
632 * get a situation where the journal flag has
633 * checksum flag set but checksums are not
634 * present i.e chksum = 0, in the individual
635 * commit blocks.
636 * Hence to avoid checksum failures, in this
637 * situation, this extra check is added.
638 */
639 chksum_err = 1;
640
641 if (chksum_err) {
642 info->end_transaction = next_commit_ID;
643
644 if (!JBD2_HAS_COMPAT_FEATURE(journal,
645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
646 printk(KERN_ERR
647 "JBD: Transaction %u "
648 "found to be corrupt.\n",
649 next_commit_ID);
650 brelse(bh);
651 break;
652 }
653 }
654 crc32_sum = ~0;
655 }
522 brelse(bh); 656 brelse(bh);
523 next_commit_ID++; 657 next_commit_ID++;
524 continue; 658 continue;
@@ -554,9 +688,10 @@ static int do_one_pass(journal_t *journal,
554 * transaction marks the end of the valid log. 688 * transaction marks the end of the valid log.
555 */ 689 */
556 690
557 if (pass == PASS_SCAN) 691 if (pass == PASS_SCAN) {
558 info->end_transaction = next_commit_ID; 692 if (!info->end_transaction)
559 else { 693 info->end_transaction = next_commit_ID;
694 } else {
560 /* It's really bad news if different passes end up at 695 /* It's really bad news if different passes end up at
561 * different places (but possible due to IO errors). */ 696 * different places (but possible due to IO errors). */
562 if (info->end_transaction != next_commit_ID) { 697 if (info->end_transaction != next_commit_ID) {
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 3595fd432d5b..df36f42e19e1 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -171,13 +171,15 @@ int __init jbd2_journal_init_revoke_caches(void)
171{ 171{
172 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 172 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
173 sizeof(struct jbd2_revoke_record_s), 173 sizeof(struct jbd2_revoke_record_s),
174 0, SLAB_HWCACHE_ALIGN, NULL); 174 0,
175 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
176 NULL);
175 if (jbd2_revoke_record_cache == 0) 177 if (jbd2_revoke_record_cache == 0)
176 return -ENOMEM; 178 return -ENOMEM;
177 179
178 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 180 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
179 sizeof(struct jbd2_revoke_table_s), 181 sizeof(struct jbd2_revoke_table_s),
180 0, 0, NULL); 182 0, SLAB_TEMPORARY, NULL);
181 if (jbd2_revoke_table_cache == 0) { 183 if (jbd2_revoke_table_cache == 0) {
182 kmem_cache_destroy(jbd2_revoke_record_cache); 184 kmem_cache_destroy(jbd2_revoke_record_cache);
183 jbd2_revoke_record_cache = NULL; 185 jbd2_revoke_record_cache = NULL;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b1fcf2b3dca3..b9b0b6f899b9 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -54,11 +54,13 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
54 spin_lock_init(&transaction->t_handle_lock); 54 spin_lock_init(&transaction->t_handle_lock);
55 55
56 /* Set up the commit timer for the new transaction. */ 56 /* Set up the commit timer for the new transaction. */
57 journal->j_commit_timer.expires = transaction->t_expires; 57 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
58 add_timer(&journal->j_commit_timer); 58 add_timer(&journal->j_commit_timer);
59 59
60 J_ASSERT(journal->j_running_transaction == NULL); 60 J_ASSERT(journal->j_running_transaction == NULL);
61 journal->j_running_transaction = transaction; 61 journal->j_running_transaction = transaction;
62 transaction->t_max_wait = 0;
63 transaction->t_start = jiffies;
62 64
63 return transaction; 65 return transaction;
64} 66}
@@ -85,6 +87,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
85 int nblocks = handle->h_buffer_credits; 87 int nblocks = handle->h_buffer_credits;
86 transaction_t *new_transaction = NULL; 88 transaction_t *new_transaction = NULL;
87 int ret = 0; 89 int ret = 0;
90 unsigned long ts = jiffies;
88 91
89 if (nblocks > journal->j_max_transaction_buffers) { 92 if (nblocks > journal->j_max_transaction_buffers) {
90 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 93 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -217,6 +220,12 @@ repeat_locked:
217 /* OK, account for the buffers that this operation expects to 220 /* OK, account for the buffers that this operation expects to
218 * use and add the handle to the running transaction. */ 221 * use and add the handle to the running transaction. */
219 222
223 if (time_after(transaction->t_start, ts)) {
224 ts = jbd2_time_diff(ts, transaction->t_start);
225 if (ts > transaction->t_max_wait)
226 transaction->t_max_wait = ts;
227 }
228
220 handle->h_transaction = transaction; 229 handle->h_transaction = transaction;
221 transaction->t_outstanding_credits += nblocks; 230 transaction->t_outstanding_credits += nblocks;
222 transaction->t_updates++; 231 transaction->t_updates++;
@@ -232,6 +241,8 @@ out:
232 return ret; 241 return ret;
233} 242}
234 243
244static struct lock_class_key jbd2_handle_key;
245
235/* Allocate a new handle. This should probably be in a slab... */ 246/* Allocate a new handle. This should probably be in a slab... */
236static handle_t *new_handle(int nblocks) 247static handle_t *new_handle(int nblocks)
237{ 248{
@@ -242,6 +253,9 @@ static handle_t *new_handle(int nblocks)
242 handle->h_buffer_credits = nblocks; 253 handle->h_buffer_credits = nblocks;
243 handle->h_ref = 1; 254 handle->h_ref = 1;
244 255
256 lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
257 &jbd2_handle_key, 0);
258
245 return handle; 259 return handle;
246} 260}
247 261
@@ -284,7 +298,11 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
284 jbd2_free_handle(handle); 298 jbd2_free_handle(handle);
285 current->journal_info = NULL; 299 current->journal_info = NULL;
286 handle = ERR_PTR(err); 300 handle = ERR_PTR(err);
301 goto out;
287 } 302 }
303
304 lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
305out:
288 return handle; 306 return handle;
289} 307}
290 308
@@ -1164,7 +1182,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1164 } 1182 }
1165 1183
1166 /* That test should have eliminated the following case: */ 1184 /* That test should have eliminated the following case: */
1167 J_ASSERT_JH(jh, jh->b_frozen_data == 0); 1185 J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
1168 1186
1169 JBUFFER_TRACE(jh, "file as BJ_Metadata"); 1187 JBUFFER_TRACE(jh, "file as BJ_Metadata");
1170 spin_lock(&journal->j_list_lock); 1188 spin_lock(&journal->j_list_lock);
@@ -1410,6 +1428,8 @@ int jbd2_journal_stop(handle_t *handle)
1410 spin_unlock(&journal->j_state_lock); 1428 spin_unlock(&journal->j_state_lock);
1411 } 1429 }
1412 1430
1431 lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
1432
1413 jbd2_free_handle(handle); 1433 jbd2_free_handle(handle);
1414 return err; 1434 return err;
1415} 1435}
@@ -1512,7 +1532,7 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1512 1532
1513 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1533 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1514 if (jh->b_jlist != BJ_None) 1534 if (jh->b_jlist != BJ_None)
1515 J_ASSERT_JH(jh, transaction != 0); 1535 J_ASSERT_JH(jh, transaction != NULL);
1516 1536
1517 switch (jh->b_jlist) { 1537 switch (jh->b_jlist) {
1518 case BJ_None: 1538 case BJ_None:
@@ -1581,11 +1601,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1581 if (buffer_locked(bh) || buffer_dirty(bh)) 1601 if (buffer_locked(bh) || buffer_dirty(bh))
1582 goto out; 1602 goto out;
1583 1603
1584 if (jh->b_next_transaction != 0) 1604 if (jh->b_next_transaction != NULL)
1585 goto out; 1605 goto out;
1586 1606
1587 spin_lock(&journal->j_list_lock); 1607 spin_lock(&journal->j_list_lock);
1588 if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { 1608 if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
1589 if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { 1609 if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
1590 /* A written-back ordered data buffer */ 1610 /* A written-back ordered data buffer */
1591 JBUFFER_TRACE(jh, "release data"); 1611 JBUFFER_TRACE(jh, "release data");
@@ -1593,7 +1613,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1593 jbd2_journal_remove_journal_head(bh); 1613 jbd2_journal_remove_journal_head(bh);
1594 __brelse(bh); 1614 __brelse(bh);
1595 } 1615 }
1596 } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { 1616 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1597 /* written-back checkpointed metadata buffer */ 1617 /* written-back checkpointed metadata buffer */
1598 if (jh->b_jlist == BJ_None) { 1618 if (jh->b_jlist == BJ_None) {
1599 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1619 JBUFFER_TRACE(jh, "remove from checkpoint list");
@@ -1953,7 +1973,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
1953 1973
1954 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1974 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1955 J_ASSERT_JH(jh, jh->b_transaction == transaction || 1975 J_ASSERT_JH(jh, jh->b_transaction == transaction ||
1956 jh->b_transaction == 0); 1976 jh->b_transaction == NULL);
1957 1977
1958 if (jh->b_transaction && jh->b_jlist == jlist) 1978 if (jh->b_transaction && jh->b_jlist == jlist)
1959 return; 1979 return;