aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd')
-rw-r--r--fs/jbd/checkpoint.c37
-rw-r--r--fs/jbd/commit.c57
-rw-r--r--fs/jbd/journal.c107
-rw-r--r--fs/jbd/transaction.c83
4 files changed, 141 insertions, 143 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e4b87bc1fa5..f94fc48ff3a 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -22,6 +22,8 @@
22#include <linux/jbd.h> 22#include <linux/jbd.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
26#include <trace/events/jbd.h>
25 27
26/* 28/*
27 * Unlink a buffer from a transaction checkpoint list. 29 * Unlink a buffer from a transaction checkpoint list.
@@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
95 97
96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
97 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100 /*
101 * Get our reference so that bh cannot be freed before
102 * we unlock it
103 */
104 get_bh(bh);
98 JBUFFER_TRACE(jh, "remove from checkpoint list"); 105 JBUFFER_TRACE(jh, "remove from checkpoint list");
99 ret = __journal_remove_checkpoint(jh) + 1; 106 ret = __journal_remove_checkpoint(jh) + 1;
100 jbd_unlock_bh_state(bh); 107 jbd_unlock_bh_state(bh);
101 journal_remove_journal_head(bh);
102 BUFFER_TRACE(bh, "release"); 108 BUFFER_TRACE(bh, "release");
103 __brelse(bh); 109 __brelse(bh);
104 } else { 110 } else {
@@ -220,8 +226,8 @@ restart:
220 spin_lock(&journal->j_list_lock); 226 spin_lock(&journal->j_list_lock);
221 goto restart; 227 goto restart;
222 } 228 }
229 get_bh(bh);
223 if (buffer_locked(bh)) { 230 if (buffer_locked(bh)) {
224 get_bh(bh);
225 spin_unlock(&journal->j_list_lock); 231 spin_unlock(&journal->j_list_lock);
226 jbd_unlock_bh_state(bh); 232 jbd_unlock_bh_state(bh);
227 wait_on_buffer(bh); 233 wait_on_buffer(bh);
@@ -240,7 +246,6 @@ restart:
240 */ 246 */
241 released = __journal_remove_checkpoint(jh); 247 released = __journal_remove_checkpoint(jh);
242 jbd_unlock_bh_state(bh); 248 jbd_unlock_bh_state(bh);
243 journal_remove_journal_head(bh);
244 __brelse(bh); 249 __brelse(bh);
245 } 250 }
246 251
@@ -253,9 +258,12 @@ static void
253__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) 258__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
254{ 259{
255 int i; 260 int i;
261 struct blk_plug plug;
256 262
263 blk_start_plug(&plug);
257 for (i = 0; i < *batch_count; i++) 264 for (i = 0; i < *batch_count; i++)
258 write_dirty_buffer(bhs[i], WRITE); 265 write_dirty_buffer(bhs[i], WRITE_SYNC);
266 blk_finish_plug(&plug);
259 267
260 for (i = 0; i < *batch_count; i++) { 268 for (i = 0; i < *batch_count; i++) {
261 struct buffer_head *bh = bhs[i]; 269 struct buffer_head *bh = bhs[i];
@@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
304 ret = 1; 312 ret = 1;
305 if (unlikely(buffer_write_io_error(bh))) 313 if (unlikely(buffer_write_io_error(bh)))
306 ret = -EIO; 314 ret = -EIO;
315 get_bh(bh);
307 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 316 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
308 BUFFER_TRACE(bh, "remove from checkpoint"); 317 BUFFER_TRACE(bh, "remove from checkpoint");
309 __journal_remove_checkpoint(jh); 318 __journal_remove_checkpoint(jh);
310 spin_unlock(&journal->j_list_lock); 319 spin_unlock(&journal->j_list_lock);
311 jbd_unlock_bh_state(bh); 320 jbd_unlock_bh_state(bh);
312 journal_remove_journal_head(bh);
313 __brelse(bh); 321 __brelse(bh);
314 } else { 322 } else {
315 /* 323 /*
@@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal)
358 * journal straight away. 366 * journal straight away.
359 */ 367 */
360 result = cleanup_journal_tail(journal); 368 result = cleanup_journal_tail(journal);
369 trace_jbd_checkpoint(journal, result);
361 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 370 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
362 if (result <= 0) 371 if (result <= 0)
363 return result; 372 return result;
@@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal)
503 if (blocknr < journal->j_tail) 512 if (blocknr < journal->j_tail)
504 freed = freed + journal->j_last - journal->j_first; 513 freed = freed + journal->j_last - journal->j_first;
505 514
515 trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
506 jbd_debug(1, 516 jbd_debug(1,
507 "Cleaning journal tail from %d to %d (offset %u), " 517 "Cleaning journal tail from %d to %d (offset %u), "
508 "freeing %u\n", 518 "freeing %u\n",
@@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal)
523/* 533/*
524 * journal_clean_one_cp_list 534 * journal_clean_one_cp_list
525 * 535 *
526 * Find all the written-back checkpoint buffers in the given list and release them. 536 * Find all the written-back checkpoint buffers in the given list and release
537 * them.
527 * 538 *
528 * Called with the journal locked.
529 * Called with j_list_lock held. 539 * Called with j_list_lock held.
530 * Returns number of bufers reaped (for debug) 540 * Returns number of bufers reaped (for debug)
531 */ 541 */
@@ -632,8 +642,8 @@ out:
632 * checkpoint lists. 642 * checkpoint lists.
633 * 643 *
634 * The function returns 1 if it frees the transaction, 0 otherwise. 644 * The function returns 1 if it frees the transaction, 0 otherwise.
645 * The function can free jh and bh.
635 * 646 *
636 * This function is called with the journal locked.
637 * This function is called with j_list_lock held. 647 * This function is called with j_list_lock held.
638 * This function is called with jbd_lock_bh_state(jh2bh(jh)) 648 * This function is called with jbd_lock_bh_state(jh2bh(jh))
639 */ 649 */
@@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
652 } 662 }
653 journal = transaction->t_journal; 663 journal = transaction->t_journal;
654 664
665 JBUFFER_TRACE(jh, "removing from transaction");
655 __buffer_unlink(jh); 666 __buffer_unlink(jh);
656 jh->b_cp_transaction = NULL; 667 jh->b_cp_transaction = NULL;
668 journal_put_journal_head(jh);
657 669
658 if (transaction->t_checkpoint_list != NULL || 670 if (transaction->t_checkpoint_list != NULL ||
659 transaction->t_checkpoint_io_list != NULL) 671 transaction->t_checkpoint_io_list != NULL)
660 goto out; 672 goto out;
661 JBUFFER_TRACE(jh, "transaction has no more buffers");
662 673
663 /* 674 /*
664 * There is one special case to worry about: if we have just pulled the 675 * There is one special case to worry about: if we have just pulled the
@@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
669 * The locking here around t_state is a bit sleazy. 680 * The locking here around t_state is a bit sleazy.
670 * See the comment at the end of journal_commit_transaction(). 681 * See the comment at the end of journal_commit_transaction().
671 */ 682 */
672 if (transaction->t_state != T_FINISHED) { 683 if (transaction->t_state != T_FINISHED)
673 JBUFFER_TRACE(jh, "belongs to running/committing transaction");
674 goto out; 684 goto out;
675 }
676 685
677 /* OK, that was the last buffer for the transaction: we can now 686 /* OK, that was the last buffer for the transaction: we can now
678 safely remove this transaction from the log */ 687 safely remove this transaction from the log */
@@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
684 wake_up(&journal->j_wait_logspace); 693 wake_up(&journal->j_wait_logspace);
685 ret = 1; 694 ret = 1;
686out: 695out:
687 JBUFFER_TRACE(jh, "exit");
688 return ret; 696 return ret;
689} 697}
690 698
@@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
703 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); 711 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
704 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 712 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
705 713
714 /* Get reference for checkpointing transaction */
715 journal_grab_journal_head(jh2bh(jh));
706 jh->b_cp_transaction = transaction; 716 jh->b_cp_transaction = transaction;
707 717
708 if (!transaction->t_checkpoint_list) { 718 if (!transaction->t_checkpoint_list) {
@@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
752 J_ASSERT(journal->j_committing_transaction != transaction); 762 J_ASSERT(journal->j_committing_transaction != transaction);
753 J_ASSERT(journal->j_running_transaction != transaction); 763 J_ASSERT(journal->j_running_transaction != transaction);
754 764
765 trace_jbd_drop_transaction(journal, transaction);
755 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 766 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
756 kfree(transaction); 767 kfree(transaction);
757} 768}
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 72ffa974b0b..8799207df05 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -21,6 +21,7 @@
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include <linux/bio.h> 22#include <linux/bio.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <trace/events/jbd.h>
24 25
25/* 26/*
26 * Default IO end handler for temporary BJ_IO buffer_heads. 27 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -204,6 +205,8 @@ write_out_data:
204 if (!trylock_buffer(bh)) { 205 if (!trylock_buffer(bh)) {
205 BUFFER_TRACE(bh, "needs blocking lock"); 206 BUFFER_TRACE(bh, "needs blocking lock");
206 spin_unlock(&journal->j_list_lock); 207 spin_unlock(&journal->j_list_lock);
208 trace_jbd_do_submit_data(journal,
209 commit_transaction);
207 /* Write out all data to prevent deadlocks */ 210 /* Write out all data to prevent deadlocks */
208 journal_do_submit_data(wbuf, bufs, write_op); 211 journal_do_submit_data(wbuf, bufs, write_op);
209 bufs = 0; 212 bufs = 0;
@@ -236,6 +239,8 @@ write_out_data:
236 jbd_unlock_bh_state(bh); 239 jbd_unlock_bh_state(bh);
237 if (bufs == journal->j_wbufsize) { 240 if (bufs == journal->j_wbufsize) {
238 spin_unlock(&journal->j_list_lock); 241 spin_unlock(&journal->j_list_lock);
242 trace_jbd_do_submit_data(journal,
243 commit_transaction);
239 journal_do_submit_data(wbuf, bufs, write_op); 244 journal_do_submit_data(wbuf, bufs, write_op);
240 bufs = 0; 245 bufs = 0;
241 goto write_out_data; 246 goto write_out_data;
@@ -253,10 +258,6 @@ write_out_data:
253 jbd_unlock_bh_state(bh); 258 jbd_unlock_bh_state(bh);
254 if (locked) 259 if (locked)
255 unlock_buffer(bh); 260 unlock_buffer(bh);
256 journal_remove_journal_head(bh);
257 /* One for our safety reference, other for
258 * journal_remove_journal_head() */
259 put_bh(bh);
260 release_data_buffer(bh); 261 release_data_buffer(bh);
261 } 262 }
262 263
@@ -266,6 +267,7 @@ write_out_data:
266 } 267 }
267 } 268 }
268 spin_unlock(&journal->j_list_lock); 269 spin_unlock(&journal->j_list_lock);
270 trace_jbd_do_submit_data(journal, commit_transaction);
269 journal_do_submit_data(wbuf, bufs, write_op); 271 journal_do_submit_data(wbuf, bufs, write_op);
270 272
271 return err; 273 return err;
@@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal)
316 commit_transaction = journal->j_running_transaction; 318 commit_transaction = journal->j_running_transaction;
317 J_ASSERT(commit_transaction->t_state == T_RUNNING); 319 J_ASSERT(commit_transaction->t_state == T_RUNNING);
318 320
321 trace_jbd_start_commit(journal, commit_transaction);
319 jbd_debug(1, "JBD: starting commit of transaction %d\n", 322 jbd_debug(1, "JBD: starting commit of transaction %d\n",
320 commit_transaction->t_tid); 323 commit_transaction->t_tid);
321 324
322 spin_lock(&journal->j_state_lock); 325 spin_lock(&journal->j_state_lock);
323 commit_transaction->t_state = T_LOCKED; 326 commit_transaction->t_state = T_LOCKED;
324 327
328 trace_jbd_commit_locking(journal, commit_transaction);
325 spin_lock(&commit_transaction->t_handle_lock); 329 spin_lock(&commit_transaction->t_handle_lock);
326 while (commit_transaction->t_updates) { 330 while (commit_transaction->t_updates) {
327 DEFINE_WAIT(wait); 331 DEFINE_WAIT(wait);
@@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal)
392 */ 396 */
393 journal_switch_revoke_table(journal); 397 journal_switch_revoke_table(journal);
394 398
399 trace_jbd_commit_flushing(journal, commit_transaction);
395 commit_transaction->t_state = T_FLUSH; 400 commit_transaction->t_state = T_FLUSH;
396 journal->j_committing_transaction = commit_transaction; 401 journal->j_committing_transaction = commit_transaction;
397 journal->j_running_transaction = NULL; 402 journal->j_running_transaction = NULL;
@@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
446 } 451 }
447 if (buffer_jbd(bh) && bh2jh(bh) == jh && 452 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
448 jh->b_transaction == commit_transaction && 453 jh->b_transaction == commit_transaction &&
449 jh->b_jlist == BJ_Locked) { 454 jh->b_jlist == BJ_Locked)
450 __journal_unfile_buffer(jh); 455 __journal_unfile_buffer(jh);
451 jbd_unlock_bh_state(bh); 456 jbd_unlock_bh_state(bh);
452 journal_remove_journal_head(bh);
453 put_bh(bh);
454 } else {
455 jbd_unlock_bh_state(bh);
456 }
457 release_data_buffer(bh); 457 release_data_buffer(bh);
458 cond_resched_lock(&journal->j_list_lock); 458 cond_resched_lock(&journal->j_list_lock);
459 } 459 }
@@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal)
493 commit_transaction->t_state = T_COMMIT; 493 commit_transaction->t_state = T_COMMIT;
494 spin_unlock(&journal->j_state_lock); 494 spin_unlock(&journal->j_state_lock);
495 495
496 trace_jbd_commit_logging(journal, commit_transaction);
496 J_ASSERT(commit_transaction->t_nr_buffers <= 497 J_ASSERT(commit_transaction->t_nr_buffers <=
497 commit_transaction->t_outstanding_credits); 498 commit_transaction->t_outstanding_credits);
498 499
@@ -797,10 +798,16 @@ restart_loop:
797 while (commit_transaction->t_forget) { 798 while (commit_transaction->t_forget) {
798 transaction_t *cp_transaction; 799 transaction_t *cp_transaction;
799 struct buffer_head *bh; 800 struct buffer_head *bh;
801 int try_to_free = 0;
800 802
801 jh = commit_transaction->t_forget; 803 jh = commit_transaction->t_forget;
802 spin_unlock(&journal->j_list_lock); 804 spin_unlock(&journal->j_list_lock);
803 bh = jh2bh(jh); 805 bh = jh2bh(jh);
806 /*
807 * Get a reference so that bh cannot be freed before we are
808 * done with it.
809 */
810 get_bh(bh);
804 jbd_lock_bh_state(bh); 811 jbd_lock_bh_state(bh);
805 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 812 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
806 jh->b_transaction == journal->j_running_transaction); 813 jh->b_transaction == journal->j_running_transaction);
@@ -858,28 +865,27 @@ restart_loop:
858 __journal_insert_checkpoint(jh, commit_transaction); 865 __journal_insert_checkpoint(jh, commit_transaction);
859 if (is_journal_aborted(journal)) 866 if (is_journal_aborted(journal))
860 clear_buffer_jbddirty(bh); 867 clear_buffer_jbddirty(bh);
861 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
862 __journal_refile_buffer(jh);
863 jbd_unlock_bh_state(bh);
864 } else { 868 } else {
865 J_ASSERT_BH(bh, !buffer_dirty(bh)); 869 J_ASSERT_BH(bh, !buffer_dirty(bh));
866 /* The buffer on BJ_Forget list and not jbddirty means 870 /*
871 * The buffer on BJ_Forget list and not jbddirty means
867 * it has been freed by this transaction and hence it 872 * it has been freed by this transaction and hence it
868 * could not have been reallocated until this 873 * could not have been reallocated until this
869 * transaction has committed. *BUT* it could be 874 * transaction has committed. *BUT* it could be
870 * reallocated once we have written all the data to 875 * reallocated once we have written all the data to
871 * disk and before we process the buffer on BJ_Forget 876 * disk and before we process the buffer on BJ_Forget
872 * list. */ 877 * list.
873 JBUFFER_TRACE(jh, "refile or unfile freed buffer"); 878 */
874 __journal_refile_buffer(jh); 879 if (!jh->b_next_transaction)
875 if (!jh->b_transaction) { 880 try_to_free = 1;
876 jbd_unlock_bh_state(bh);
877 /* needs a brelse */
878 journal_remove_journal_head(bh);
879 release_buffer_page(bh);
880 } else
881 jbd_unlock_bh_state(bh);
882 } 881 }
882 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
883 __journal_refile_buffer(jh);
884 jbd_unlock_bh_state(bh);
885 if (try_to_free)
886 release_buffer_page(bh);
887 else
888 __brelse(bh);
883 cond_resched_lock(&journal->j_list_lock); 889 cond_resched_lock(&journal->j_list_lock);
884 } 890 }
885 spin_unlock(&journal->j_list_lock); 891 spin_unlock(&journal->j_list_lock);
@@ -946,6 +952,7 @@ restart_loop:
946 } 952 }
947 spin_unlock(&journal->j_list_lock); 953 spin_unlock(&journal->j_list_lock);
948 954
955 trace_jbd_end_commit(journal, commit_transaction);
949 jbd_debug(1, "JBD: commit %d complete, head %d\n", 956 jbd_debug(1, "JBD: commit %d complete, head %d\n",
950 journal->j_commit_sequence, journal->j_tail_sequence); 957 journal->j_commit_sequence, journal->j_tail_sequence);
951 958
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index e2d4285fbe9..fea8dd661d2 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -38,6 +38,9 @@
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
40 40
41#define CREATE_TRACE_POINTS
42#include <trace/events/jbd.h>
43
41#include <asm/uaccess.h> 44#include <asm/uaccess.h>
42#include <asm/page.h> 45#include <asm/page.h>
43 46
@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait)
1065 } else 1068 } else
1066 write_dirty_buffer(bh, WRITE); 1069 write_dirty_buffer(bh, WRITE);
1067 1070
1071 trace_jbd_update_superblock_end(journal, wait);
1068out: 1072out:
1069 /* If we have just flushed the log (by marking s_start==0), then 1073 /* If we have just flushed the log (by marking s_start==0), then
1070 * any future commit will have to be careful to update the 1074 * any future commit will have to be careful to update the
@@ -1131,6 +1135,14 @@ static int journal_get_superblock(journal_t *journal)
1131 goto out; 1135 goto out;
1132 } 1136 }
1133 1137
1138 if (be32_to_cpu(sb->s_first) == 0 ||
1139 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1140 printk(KERN_WARNING
1141 "JBD: Invalid start block of journal: %u\n",
1142 be32_to_cpu(sb->s_first));
1143 goto out;
1144 }
1145
1134 return 0; 1146 return 0;
1135 1147
1136out: 1148out:
@@ -1799,10 +1811,9 @@ static void journal_free_journal_head(struct journal_head *jh)
1799 * When a buffer has its BH_JBD bit set it is immune from being released by 1811 * When a buffer has its BH_JBD bit set it is immune from being released by
1800 * core kernel code, mainly via ->b_count. 1812 * core kernel code, mainly via ->b_count.
1801 * 1813 *
1802 * A journal_head may be detached from its buffer_head when the journal_head's 1814 * A journal_head is detached from its buffer_head when the journal_head's
1803 * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. 1815 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
1804 * Various places in JBD call journal_remove_journal_head() to indicate that the 1816 * transaction (b_cp_transaction) hold their references to b_jcount.
1805 * journal_head can be dropped if needed.
1806 * 1817 *
1807 * Various places in the kernel want to attach a journal_head to a buffer_head 1818 * Various places in the kernel want to attach a journal_head to a buffer_head
1808 * _before_ attaching the journal_head to a transaction. To protect the 1819 * _before_ attaching the journal_head to a transaction. To protect the
@@ -1815,17 +1826,16 @@ static void journal_free_journal_head(struct journal_head *jh)
1815 * (Attach a journal_head if needed. Increments b_jcount) 1826 * (Attach a journal_head if needed. Increments b_jcount)
1816 * struct journal_head *jh = journal_add_journal_head(bh); 1827 * struct journal_head *jh = journal_add_journal_head(bh);
1817 * ... 1828 * ...
1818 * jh->b_transaction = xxx; 1829 * (Get another reference for transaction)
1819 * journal_put_journal_head(jh); 1830 * journal_grab_journal_head(bh);
1820 * 1831 * jh->b_transaction = xxx;
1821 * Now, the journal_head's b_jcount is zero, but it is safe from being released 1832 * (Put original reference)
1822 * because it has a non-zero b_transaction. 1833 * journal_put_journal_head(jh);
1823 */ 1834 */
1824 1835
1825/* 1836/*
1826 * Give a buffer_head a journal_head. 1837 * Give a buffer_head a journal_head.
1827 * 1838 *
1828 * Doesn't need the journal lock.
1829 * May sleep. 1839 * May sleep.
1830 */ 1840 */
1831struct journal_head *journal_add_journal_head(struct buffer_head *bh) 1841struct journal_head *journal_add_journal_head(struct buffer_head *bh)
@@ -1889,61 +1899,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1889 struct journal_head *jh = bh2jh(bh); 1899 struct journal_head *jh = bh2jh(bh);
1890 1900
1891 J_ASSERT_JH(jh, jh->b_jcount >= 0); 1901 J_ASSERT_JH(jh, jh->b_jcount >= 0);
1892 1902 J_ASSERT_JH(jh, jh->b_transaction == NULL);
1893 get_bh(bh); 1903 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1894 if (jh->b_jcount == 0) { 1904 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
1895 if (jh->b_transaction == NULL && 1905 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
1896 jh->b_next_transaction == NULL && 1906 J_ASSERT_BH(bh, buffer_jbd(bh));
1897 jh->b_cp_transaction == NULL) { 1907 J_ASSERT_BH(bh, jh2bh(jh) == bh);
1898 J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 1908 BUFFER_TRACE(bh, "remove journal_head");
1899 J_ASSERT_BH(bh, buffer_jbd(bh)); 1909 if (jh->b_frozen_data) {
1900 J_ASSERT_BH(bh, jh2bh(jh) == bh); 1910 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
1901 BUFFER_TRACE(bh, "remove journal_head"); 1911 jbd_free(jh->b_frozen_data, bh->b_size);
1902 if (jh->b_frozen_data) {
1903 printk(KERN_WARNING "%s: freeing "
1904 "b_frozen_data\n",
1905 __func__);
1906 jbd_free(jh->b_frozen_data, bh->b_size);
1907 }
1908 if (jh->b_committed_data) {
1909 printk(KERN_WARNING "%s: freeing "
1910 "b_committed_data\n",
1911 __func__);
1912 jbd_free(jh->b_committed_data, bh->b_size);
1913 }
1914 bh->b_private = NULL;
1915 jh->b_bh = NULL; /* debug, really */
1916 clear_buffer_jbd(bh);
1917 __brelse(bh);
1918 journal_free_journal_head(jh);
1919 } else {
1920 BUFFER_TRACE(bh, "journal_head was locked");
1921 }
1922 } 1912 }
1913 if (jh->b_committed_data) {
1914 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
1915 jbd_free(jh->b_committed_data, bh->b_size);
1916 }
1917 bh->b_private = NULL;
1918 jh->b_bh = NULL; /* debug, really */
1919 clear_buffer_jbd(bh);
1920 journal_free_journal_head(jh);
1923} 1921}
1924 1922
1925/* 1923/*
1926 * journal_remove_journal_head(): if the buffer isn't attached to a transaction 1924 * Drop a reference on the passed journal_head. If it fell to zero then
1927 * and has a zero b_jcount then remove and release its journal_head. If we did
1928 * see that the buffer is not used by any transaction we also "logically"
1929 * decrement ->b_count.
1930 *
1931 * We in fact take an additional increment on ->b_count as a convenience,
1932 * because the caller usually wants to do additional things with the bh
1933 * after calling here.
1934 * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
1935 * time. Once the caller has run __brelse(), the buffer is eligible for
1936 * reaping by try_to_free_buffers().
1937 */
1938void journal_remove_journal_head(struct buffer_head *bh)
1939{
1940 jbd_lock_bh_journal_head(bh);
1941 __journal_remove_journal_head(bh);
1942 jbd_unlock_bh_journal_head(bh);
1943}
1944
1945/*
1946 * Drop a reference on the passed journal_head. If it fell to zero then try to
1947 * release the journal_head from the buffer_head. 1925 * release the journal_head from the buffer_head.
1948 */ 1926 */
1949void journal_put_journal_head(struct journal_head *jh) 1927void journal_put_journal_head(struct journal_head *jh)
@@ -1953,11 +1931,12 @@ void journal_put_journal_head(struct journal_head *jh)
1953 jbd_lock_bh_journal_head(bh); 1931 jbd_lock_bh_journal_head(bh);
1954 J_ASSERT_JH(jh, jh->b_jcount > 0); 1932 J_ASSERT_JH(jh, jh->b_jcount > 0);
1955 --jh->b_jcount; 1933 --jh->b_jcount;
1956 if (!jh->b_jcount && !jh->b_transaction) { 1934 if (!jh->b_jcount) {
1957 __journal_remove_journal_head(bh); 1935 __journal_remove_journal_head(bh);
1936 jbd_unlock_bh_journal_head(bh);
1958 __brelse(bh); 1937 __brelse(bh);
1959 } 1938 } else
1960 jbd_unlock_bh_journal_head(bh); 1939 jbd_unlock_bh_journal_head(bh);
1961} 1940}
1962 1941
1963/* 1942/*
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f7ee81a065d..7e59c6e66f9 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -26,6 +26,7 @@
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/backing-dev.h>
29 30
30static void __journal_temp_unlink_buffer(struct journal_head *jh); 31static void __journal_temp_unlink_buffer(struct journal_head *jh);
31 32
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
99 100
100alloc_transaction: 101alloc_transaction:
101 if (!journal->j_running_transaction) { 102 if (!journal->j_running_transaction) {
102 new_transaction = kzalloc(sizeof(*new_transaction), 103 new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
103 GFP_NOFS|__GFP_NOFAIL);
104 if (!new_transaction) { 104 if (!new_transaction) {
105 ret = -ENOMEM; 105 congestion_wait(BLK_RW_ASYNC, HZ/50);
106 goto out; 106 goto alloc_transaction;
107 } 107 }
108 } 108 }
109 109
@@ -696,7 +696,6 @@ repeat:
696 if (!jh->b_transaction) { 696 if (!jh->b_transaction) {
697 JBUFFER_TRACE(jh, "no transaction"); 697 JBUFFER_TRACE(jh, "no transaction");
698 J_ASSERT_JH(jh, !jh->b_next_transaction); 698 J_ASSERT_JH(jh, !jh->b_next_transaction);
699 jh->b_transaction = transaction;
700 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 699 JBUFFER_TRACE(jh, "file as BJ_Reserved");
701 spin_lock(&journal->j_list_lock); 700 spin_lock(&journal->j_list_lock);
702 __journal_file_buffer(jh, transaction, BJ_Reserved); 701 __journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
818 * committed and so it's safe to clear the dirty bit. 817 * committed and so it's safe to clear the dirty bit.
819 */ 818 */
820 clear_buffer_dirty(jh2bh(jh)); 819 clear_buffer_dirty(jh2bh(jh));
821 jh->b_transaction = transaction;
822 820
823 /* first access by this transaction */ 821 /* first access by this transaction */
824 jh->b_modified = 0; 822 jh->b_modified = 0;
@@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
844 */ 842 */
845 JBUFFER_TRACE(jh, "cancelling revoke"); 843 JBUFFER_TRACE(jh, "cancelling revoke");
846 journal_cancel_revoke(handle, jh); 844 journal_cancel_revoke(handle, jh);
847 journal_put_journal_head(jh);
848out: 845out:
846 journal_put_journal_head(jh);
849 return err; 847 return err;
850} 848}
851 849
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1069 ret = -EIO; 1067 ret = -EIO;
1070 goto no_journal; 1068 goto no_journal;
1071 } 1069 }
1072 1070 /* We might have slept so buffer could be refiled now */
1073 if (jh->b_transaction != NULL) { 1071 if (jh->b_transaction != NULL &&
1072 jh->b_transaction != handle->h_transaction) {
1074 JBUFFER_TRACE(jh, "unfile from commit"); 1073 JBUFFER_TRACE(jh, "unfile from commit");
1075 __journal_temp_unlink_buffer(jh); 1074 __journal_temp_unlink_buffer(jh);
1076 /* It still points to the committing 1075 /* It still points to the committing
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1091 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { 1090 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
1092 JBUFFER_TRACE(jh, "not on correct data list: unfile"); 1091 JBUFFER_TRACE(jh, "not on correct data list: unfile");
1093 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); 1092 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1094 __journal_temp_unlink_buffer(jh);
1095 jh->b_transaction = handle->h_transaction;
1096 JBUFFER_TRACE(jh, "file as data"); 1093 JBUFFER_TRACE(jh, "file as data");
1097 __journal_file_buffer(jh, handle->h_transaction, 1094 __journal_file_buffer(jh, handle->h_transaction,
1098 BJ_SyncData); 1095 BJ_SyncData);
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1300 __journal_file_buffer(jh, transaction, BJ_Forget); 1297 __journal_file_buffer(jh, transaction, BJ_Forget);
1301 } else { 1298 } else {
1302 __journal_unfile_buffer(jh); 1299 __journal_unfile_buffer(jh);
1303 journal_remove_journal_head(bh);
1304 __brelse(bh);
1305 if (!buffer_jbd(bh)) { 1300 if (!buffer_jbd(bh)) {
1306 spin_unlock(&journal->j_list_lock); 1301 spin_unlock(&journal->j_list_lock);
1307 jbd_unlock_bh_state(bh); 1302 jbd_unlock_bh_state(bh);
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
1622 mark_buffer_dirty(bh); /* Expose it to the VM */ 1617 mark_buffer_dirty(bh); /* Expose it to the VM */
1623} 1618}
1624 1619
1620/*
1621 * Remove buffer from all transactions.
1622 *
1623 * Called with bh_state lock and j_list_lock
1624 *
1625 * jh and bh may be already freed when this function returns.
1626 */
1625void __journal_unfile_buffer(struct journal_head *jh) 1627void __journal_unfile_buffer(struct journal_head *jh)
1626{ 1628{
1627 __journal_temp_unlink_buffer(jh); 1629 __journal_temp_unlink_buffer(jh);
1628 jh->b_transaction = NULL; 1630 jh->b_transaction = NULL;
1631 journal_put_journal_head(jh);
1629} 1632}
1630 1633
1631void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1634void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1632{ 1635{
1633 jbd_lock_bh_state(jh2bh(jh)); 1636 struct buffer_head *bh = jh2bh(jh);
1637
1638 /* Get reference so that buffer cannot be freed before we unlock it */
1639 get_bh(bh);
1640 jbd_lock_bh_state(bh);
1634 spin_lock(&journal->j_list_lock); 1641 spin_lock(&journal->j_list_lock);
1635 __journal_unfile_buffer(jh); 1642 __journal_unfile_buffer(jh);
1636 spin_unlock(&journal->j_list_lock); 1643 spin_unlock(&journal->j_list_lock);
1637 jbd_unlock_bh_state(jh2bh(jh)); 1644 jbd_unlock_bh_state(bh);
1645 __brelse(bh);
1638} 1646}
1639 1647
1640/* 1648/*
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1661 /* A written-back ordered data buffer */ 1669 /* A written-back ordered data buffer */
1662 JBUFFER_TRACE(jh, "release data"); 1670 JBUFFER_TRACE(jh, "release data");
1663 __journal_unfile_buffer(jh); 1671 __journal_unfile_buffer(jh);
1664 journal_remove_journal_head(bh);
1665 __brelse(bh);
1666 } 1672 }
1667 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1673 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1668 /* written-back checkpointed metadata buffer */ 1674 /* written-back checkpointed metadata buffer */
1669 if (jh->b_jlist == BJ_None) { 1675 if (jh->b_jlist == BJ_None) {
1670 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1676 JBUFFER_TRACE(jh, "remove from checkpoint list");
1671 __journal_remove_checkpoint(jh); 1677 __journal_remove_checkpoint(jh);
1672 journal_remove_journal_head(bh);
1673 __brelse(bh);
1674 } 1678 }
1675 } 1679 }
1676 spin_unlock(&journal->j_list_lock); 1680 spin_unlock(&journal->j_list_lock);
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
1733 /* 1737 /*
1734 * We take our own ref against the journal_head here to avoid 1738 * We take our own ref against the journal_head here to avoid
1735 * having to add tons of locking around each instance of 1739 * having to add tons of locking around each instance of
1736 * journal_remove_journal_head() and journal_put_journal_head(). 1740 * journal_put_journal_head().
1737 */ 1741 */
1738 jh = journal_grab_journal_head(bh); 1742 jh = journal_grab_journal_head(bh);
1739 if (!jh) 1743 if (!jh)
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1770 int may_free = 1; 1774 int may_free = 1;
1771 struct buffer_head *bh = jh2bh(jh); 1775 struct buffer_head *bh = jh2bh(jh);
1772 1776
1773 __journal_unfile_buffer(jh);
1774
1775 if (jh->b_cp_transaction) { 1777 if (jh->b_cp_transaction) {
1776 JBUFFER_TRACE(jh, "on running+cp transaction"); 1778 JBUFFER_TRACE(jh, "on running+cp transaction");
1779 __journal_temp_unlink_buffer(jh);
1777 /* 1780 /*
1778 * We don't want to write the buffer anymore, clear the 1781 * We don't want to write the buffer anymore, clear the
1779 * bit so that we don't confuse checks in 1782 * bit so that we don't confuse checks in
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1784 may_free = 0; 1787 may_free = 0;
1785 } else { 1788 } else {
1786 JBUFFER_TRACE(jh, "on running transaction"); 1789 JBUFFER_TRACE(jh, "on running transaction");
1787 journal_remove_journal_head(bh); 1790 __journal_unfile_buffer(jh);
1788 __brelse(bh);
1789 } 1791 }
1790 return may_free; 1792 return may_free;
1791} 1793}
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
2070 2072
2071 if (jh->b_transaction) 2073 if (jh->b_transaction)
2072 __journal_temp_unlink_buffer(jh); 2074 __journal_temp_unlink_buffer(jh);
2075 else
2076 journal_grab_journal_head(bh);
2073 jh->b_transaction = transaction; 2077 jh->b_transaction = transaction;
2074 2078
2075 switch (jlist) { 2079 switch (jlist) {
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
2127 * already started to be used by a subsequent transaction, refile the 2131 * already started to be used by a subsequent transaction, refile the
2128 * buffer on that transaction's metadata list. 2132 * buffer on that transaction's metadata list.
2129 * 2133 *
2130 * Called under journal->j_list_lock 2134 * Called under j_list_lock
2131 *
2132 * Called under jbd_lock_bh_state(jh2bh(jh)) 2135 * Called under jbd_lock_bh_state(jh2bh(jh))
2136 *
2137 * jh and bh may be already free when this function returns
2133 */ 2138 */
2134void __journal_refile_buffer(struct journal_head *jh) 2139void __journal_refile_buffer(struct journal_head *jh)
2135{ 2140{
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
2153 2158
2154 was_dirty = test_clear_buffer_jbddirty(bh); 2159 was_dirty = test_clear_buffer_jbddirty(bh);
2155 __journal_temp_unlink_buffer(jh); 2160 __journal_temp_unlink_buffer(jh);
2161 /*
2162 * We set b_transaction here because b_next_transaction will inherit
2163 * our jh reference and thus __journal_file_buffer() must not take a
2164 * new one.
2165 */
2156 jh->b_transaction = jh->b_next_transaction; 2166 jh->b_transaction = jh->b_next_transaction;
2157 jh->b_next_transaction = NULL; 2167 jh->b_next_transaction = NULL;
2158 if (buffer_freed(bh)) 2168 if (buffer_freed(bh))
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
2169} 2179}
2170 2180
2171/* 2181/*
2172 * For the unlocked version of this call, also make sure that any 2182 * __journal_refile_buffer() with necessary locking added. We take our bh
2173 * hanging journal_head is cleaned up if necessary. 2183 * reference so that we can safely unlock bh.
2174 * 2184 *
2175 * __journal_refile_buffer is usually called as part of a single locked 2185 * The jh and bh may be freed by this call.
2176 * operation on a buffer_head, in which the caller is probably going to
2177 * be hooking the journal_head onto other lists. In that case it is up
2178 * to the caller to remove the journal_head if necessary. For the
2179 * unlocked journal_refile_buffer call, the caller isn't going to be
2180 * doing anything else to the buffer so we need to do the cleanup
2181 * ourselves to avoid a jh leak.
2182 *
2183 * *** The journal_head may be freed by this call! ***
2184 */ 2186 */
2185void journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2187void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2186{ 2188{
2187 struct buffer_head *bh = jh2bh(jh); 2189 struct buffer_head *bh = jh2bh(jh);
2188 2190
2191 /* Get reference so that buffer cannot be freed before we unlock it */
2192 get_bh(bh);
2189 jbd_lock_bh_state(bh); 2193 jbd_lock_bh_state(bh);
2190 spin_lock(&journal->j_list_lock); 2194 spin_lock(&journal->j_list_lock);
2191
2192 __journal_refile_buffer(jh); 2195 __journal_refile_buffer(jh);
2193 jbd_unlock_bh_state(bh); 2196 jbd_unlock_bh_state(bh);
2194 journal_remove_journal_head(bh);
2195
2196 spin_unlock(&journal->j_list_lock); 2197 spin_unlock(&journal->j_list_lock);
2197 __brelse(bh); 2198 __brelse(bh);
2198} 2199}