aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 13:02:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 13:02:55 -0400
commit69e1aaddd63104f37021d0b0f6abfd9623c9134c (patch)
tree14ad49741b428d270b681694bb2df349465455b9 /fs/jbd2
parent56b59b429b4c26e5e730bc8c3d837de9f7d0a966 (diff)
parent9d547c35799a4ddd235f1565cec2fff6c9263504 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates for 3.4 from Ted Ts'o: "Ext4 commits for 3.3 merge window; mostly cleanups and bug fixes The changes to export dirty_writeback_interval are from Artem's s_dirt cleanup patch series. The same is true of the change to remove the s_dirt helper functions which never got used by anyone in-tree. I've run these changes by Al Viro, and am carrying them so that Artem can more easily fix up the rest of the file systems during the next merge window. (Originally we had hopped to remove the use of s_dirt from ext4 during this merge window, but his patches had some bugs, so I ultimately ended dropping them from the ext4 tree.)" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (66 commits) vfs: remove unused superblock helpers mm: export dirty_writeback_interval ext4: remove useless s_dirt assignment ext4: write superblock only once on unmount ext4: do not mark superblock as dirty unnecessarily ext4: correct ext4_punch_hole return codes ext4: remove restrictive checks for EOFBLOCKS_FL ext4: always set then trimmed blocks count into len ext4: fix trimmed block count accunting ext4: fix start and len arguments handling in ext4_trim_fs() ext4: update s_free_{inodes,blocks}_count during online resize ext4: change some printk() calls to use ext4_msg() instead ext4: avoid output message interleaving in ext4_error_<foo>() ext4: remove trailing newlines from ext4_msg() and ext4_error() messages ext4: add no_printk argument validation, fix fallout ext4: remove redundant "EXT4-fs: " from uses of ext4_msg ext4: give more helpful error message in ext4_ext_rm_leaf() ext4: remove unused code from ext4_ext_map_blocks() ext4: rewrite punch hole to use ext4_ext_remove_space() jbd2: cleanup journal tail after transaction commit ...
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c140
-rw-r--r--fs/jbd2/commit.c47
-rw-r--r--fs/jbd2/journal.c361
-rw-r--r--fs/jbd2/recovery.c5
-rw-r--r--fs/jbd2/revoke.c12
-rw-r--r--fs/jbd2/transaction.c48
6 files changed, 343 insertions, 270 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index d49d202903fb..c78841ee81cf 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh)
88 * whole transaction. 88 * whole transaction.
89 * 89 *
90 * Requires j_list_lock 90 * Requires j_list_lock
91 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
92 */ 91 */
93static int __try_to_free_cp_buf(struct journal_head *jh) 92static int __try_to_free_cp_buf(struct journal_head *jh)
94{ 93{
95 int ret = 0; 94 int ret = 0;
96 struct buffer_head *bh = jh2bh(jh); 95 struct buffer_head *bh = jh2bh(jh);
97 96
98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 97 if (jh->b_transaction == NULL && !buffer_locked(bh) &&
99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100 /* 99 /*
101 * Get our reference so that bh cannot be freed before 100 * Get our reference so that bh cannot be freed before
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
104 get_bh(bh); 103 get_bh(bh);
105 JBUFFER_TRACE(jh, "remove from checkpoint list"); 104 JBUFFER_TRACE(jh, "remove from checkpoint list");
106 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 105 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
107 jbd_unlock_bh_state(bh);
108 BUFFER_TRACE(bh, "release"); 106 BUFFER_TRACE(bh, "release");
109 __brelse(bh); 107 __brelse(bh);
110 } else {
111 jbd_unlock_bh_state(bh);
112 } 108 }
113 return ret; 109 return ret;
114} 110}
@@ -180,21 +176,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
180} 176}
181 177
182/* 178/*
183 * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
184 * The caller must restart a list walk. Wait for someone else to run
185 * jbd_unlock_bh_state().
186 */
187static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
188 __releases(journal->j_list_lock)
189{
190 get_bh(bh);
191 spin_unlock(&journal->j_list_lock);
192 jbd_lock_bh_state(bh);
193 jbd_unlock_bh_state(bh);
194 put_bh(bh);
195}
196
197/*
198 * Clean up transaction's list of buffers submitted for io. 179 * Clean up transaction's list of buffers submitted for io.
199 * We wait for any pending IO to complete and remove any clean 180 * We wait for any pending IO to complete and remove any clean
200 * buffers. Note that we take the buffers in the opposite ordering 181 * buffers. Note that we take the buffers in the opposite ordering
@@ -222,15 +203,9 @@ restart:
222 while (!released && transaction->t_checkpoint_io_list) { 203 while (!released && transaction->t_checkpoint_io_list) {
223 jh = transaction->t_checkpoint_io_list; 204 jh = transaction->t_checkpoint_io_list;
224 bh = jh2bh(jh); 205 bh = jh2bh(jh);
225 if (!jbd_trylock_bh_state(bh)) {
226 jbd_sync_bh(journal, bh);
227 spin_lock(&journal->j_list_lock);
228 goto restart;
229 }
230 get_bh(bh); 206 get_bh(bh);
231 if (buffer_locked(bh)) { 207 if (buffer_locked(bh)) {
232 spin_unlock(&journal->j_list_lock); 208 spin_unlock(&journal->j_list_lock);
233 jbd_unlock_bh_state(bh);
234 wait_on_buffer(bh); 209 wait_on_buffer(bh);
235 /* the journal_head may have gone by now */ 210 /* the journal_head may have gone by now */
236 BUFFER_TRACE(bh, "brelse"); 211 BUFFER_TRACE(bh, "brelse");
@@ -246,7 +221,6 @@ restart:
246 * it has been written out and so we can drop it from the list 221 * it has been written out and so we can drop it from the list
247 */ 222 */
248 released = __jbd2_journal_remove_checkpoint(jh); 223 released = __jbd2_journal_remove_checkpoint(jh);
249 jbd_unlock_bh_state(bh);
250 __brelse(bh); 224 __brelse(bh);
251 } 225 }
252 226
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count)
266 240
267 for (i = 0; i < *batch_count; i++) { 241 for (i = 0; i < *batch_count; i++) {
268 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 242 struct buffer_head *bh = journal->j_chkpt_bhs[i];
269 clear_buffer_jwrite(bh);
270 BUFFER_TRACE(bh, "brelse"); 243 BUFFER_TRACE(bh, "brelse");
271 __brelse(bh); 244 __brelse(bh);
272 } 245 }
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count)
281 * be written out. 254 * be written out.
282 * 255 *
283 * Called with j_list_lock held and drops it if 1 is returned 256 * Called with j_list_lock held and drops it if 1 is returned
284 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
285 */ 257 */
286static int __process_buffer(journal_t *journal, struct journal_head *jh, 258static int __process_buffer(journal_t *journal, struct journal_head *jh,
287 int *batch_count, transaction_t *transaction) 259 int *batch_count, transaction_t *transaction)
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
292 if (buffer_locked(bh)) { 264 if (buffer_locked(bh)) {
293 get_bh(bh); 265 get_bh(bh);
294 spin_unlock(&journal->j_list_lock); 266 spin_unlock(&journal->j_list_lock);
295 jbd_unlock_bh_state(bh);
296 wait_on_buffer(bh); 267 wait_on_buffer(bh);
297 /* the journal_head may have gone by now */ 268 /* the journal_head may have gone by now */
298 BUFFER_TRACE(bh, "brelse"); 269 BUFFER_TRACE(bh, "brelse");
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
304 275
305 transaction->t_chp_stats.cs_forced_to_close++; 276 transaction->t_chp_stats.cs_forced_to_close++;
306 spin_unlock(&journal->j_list_lock); 277 spin_unlock(&journal->j_list_lock);
307 jbd_unlock_bh_state(bh);
308 if (unlikely(journal->j_flags & JBD2_UNMOUNT)) 278 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
309 /* 279 /*
310 * The journal thread is dead; so starting and 280 * The journal thread is dead; so starting and
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
323 if (unlikely(buffer_write_io_error(bh))) 293 if (unlikely(buffer_write_io_error(bh)))
324 ret = -EIO; 294 ret = -EIO;
325 get_bh(bh); 295 get_bh(bh);
326 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
327 BUFFER_TRACE(bh, "remove from checkpoint"); 296 BUFFER_TRACE(bh, "remove from checkpoint");
328 __jbd2_journal_remove_checkpoint(jh); 297 __jbd2_journal_remove_checkpoint(jh);
329 spin_unlock(&journal->j_list_lock); 298 spin_unlock(&journal->j_list_lock);
330 jbd_unlock_bh_state(bh);
331 __brelse(bh); 299 __brelse(bh);
332 } else { 300 } else {
333 /* 301 /*
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
340 BUFFER_TRACE(bh, "queue"); 308 BUFFER_TRACE(bh, "queue");
341 get_bh(bh); 309 get_bh(bh);
342 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 310 J_ASSERT_BH(bh, !buffer_jwrite(bh));
343 set_buffer_jwrite(bh);
344 journal->j_chkpt_bhs[*batch_count] = bh; 311 journal->j_chkpt_bhs[*batch_count] = bh;
345 __buffer_relink_io(jh); 312 __buffer_relink_io(jh);
346 jbd_unlock_bh_state(bh);
347 transaction->t_chp_stats.cs_written++; 313 transaction->t_chp_stats.cs_written++;
348 (*batch_count)++; 314 (*batch_count)++;
349 if (*batch_count == JBD2_NR_BATCH) { 315 if (*batch_count == JBD2_NR_BATCH) {
@@ -407,15 +373,7 @@ restart:
407 int retry = 0, err; 373 int retry = 0, err;
408 374
409 while (!retry && transaction->t_checkpoint_list) { 375 while (!retry && transaction->t_checkpoint_list) {
410 struct buffer_head *bh;
411
412 jh = transaction->t_checkpoint_list; 376 jh = transaction->t_checkpoint_list;
413 bh = jh2bh(jh);
414 if (!jbd_trylock_bh_state(bh)) {
415 jbd_sync_bh(journal, bh);
416 retry = 1;
417 break;
418 }
419 retry = __process_buffer(journal, jh, &batch_count, 377 retry = __process_buffer(journal, jh, &batch_count,
420 transaction); 378 transaction);
421 if (retry < 0 && !result) 379 if (retry < 0 && !result)
@@ -478,79 +436,28 @@ out:
478 436
479int jbd2_cleanup_journal_tail(journal_t *journal) 437int jbd2_cleanup_journal_tail(journal_t *journal)
480{ 438{
481 transaction_t * transaction;
482 tid_t first_tid; 439 tid_t first_tid;
483 unsigned long blocknr, freed; 440 unsigned long blocknr;
484 441
485 if (is_journal_aborted(journal)) 442 if (is_journal_aborted(journal))
486 return 1; 443 return 1;
487 444
488 /* OK, work out the oldest transaction remaining in the log, and 445 if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
489 * the log block it starts at.
490 *
491 * If the log is now empty, we need to work out which is the
492 * next transaction ID we will write, and where it will
493 * start. */
494
495 write_lock(&journal->j_state_lock);
496 spin_lock(&journal->j_list_lock);
497 transaction = journal->j_checkpoint_transactions;
498 if (transaction) {
499 first_tid = transaction->t_tid;
500 blocknr = transaction->t_log_start;
501 } else if ((transaction = journal->j_committing_transaction) != NULL) {
502 first_tid = transaction->t_tid;
503 blocknr = transaction->t_log_start;
504 } else if ((transaction = journal->j_running_transaction) != NULL) {
505 first_tid = transaction->t_tid;
506 blocknr = journal->j_head;
507 } else {
508 first_tid = journal->j_transaction_sequence;
509 blocknr = journal->j_head;
510 }
511 spin_unlock(&journal->j_list_lock);
512 J_ASSERT(blocknr != 0);
513
514 /* If the oldest pinned transaction is at the tail of the log
515 already then there's not much we can do right now. */
516 if (journal->j_tail_sequence == first_tid) {
517 write_unlock(&journal->j_state_lock);
518 return 1; 446 return 1;
519 } 447 J_ASSERT(blocknr != 0);
520
521 /* OK, update the superblock to recover the freed space.
522 * Physical blocks come first: have we wrapped beyond the end of
523 * the log? */
524 freed = blocknr - journal->j_tail;
525 if (blocknr < journal->j_tail)
526 freed = freed + journal->j_last - journal->j_first;
527
528 trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
529 jbd_debug(1,
530 "Cleaning journal tail from %d to %d (offset %lu), "
531 "freeing %lu\n",
532 journal->j_tail_sequence, first_tid, blocknr, freed);
533
534 journal->j_free += freed;
535 journal->j_tail_sequence = first_tid;
536 journal->j_tail = blocknr;
537 write_unlock(&journal->j_state_lock);
538 448
539 /* 449 /*
540 * If there is an external journal, we need to make sure that 450 * We need to make sure that any blocks that were recently written out
541 * any data blocks that were recently written out --- perhaps 451 * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
542 * by jbd2_log_do_checkpoint() --- are flushed out before we 452 * we drop the transactions from the journal. It's unlikely this will
543 * drop the transactions from the external journal. It's 453 * be necessary, especially with an appropriately sized journal, but we
544 * unlikely this will be necessary, especially with a 454 * need this to guarantee correctness. Fortunately
545 * appropriately sized journal, but we need this to guarantee 455 * jbd2_cleanup_journal_tail() doesn't get called all that often.
546 * correctness. Fortunately jbd2_cleanup_journal_tail()
547 * doesn't get called all that often.
548 */ 456 */
549 if ((journal->j_fs_dev != journal->j_dev) && 457 if (journal->j_flags & JBD2_BARRIER)
550 (journal->j_flags & JBD2_BARRIER))
551 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 458 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
552 if (!(journal->j_flags & JBD2_ABORT)) 459
553 jbd2_journal_update_superblock(journal, 1); 460 __jbd2_update_log_tail(journal, first_tid, blocknr);
554 return 0; 461 return 0;
555} 462}
556 463
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
582 do { 489 do {
583 jh = next_jh; 490 jh = next_jh;
584 next_jh = jh->b_cpnext; 491 next_jh = jh->b_cpnext;
585 /* Use trylock because of the ranking */ 492 ret = __try_to_free_cp_buf(jh);
586 if (jbd_trylock_bh_state(jh2bh(jh))) { 493 if (ret) {
587 ret = __try_to_free_cp_buf(jh); 494 freed++;
588 if (ret) { 495 if (ret == 2) {
589 freed++; 496 *released = 1;
590 if (ret == 2) { 497 return freed;
591 *released = 1;
592 return freed;
593 }
594 } 498 }
595 } 499 }
596 /* 500 /*
@@ -673,9 +577,7 @@ out:
673 * The function can free jh and bh. 577 * The function can free jh and bh.
674 * 578 *
675 * This function is called with j_list_lock held. 579 * This function is called with j_list_lock held.
676 * This function is called with jbd_lock_bh_state(jh2bh(jh))
677 */ 580 */
678
679int __jbd2_journal_remove_checkpoint(struct journal_head *jh) 581int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
680{ 582{
681 struct transaction_chp_stats_s *stats; 583 struct transaction_chp_stats_s *stats;
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
722 transaction->t_tid, stats); 624 transaction->t_tid, stats);
723 625
724 __jbd2_journal_drop_transaction(journal, transaction); 626 __jbd2_journal_drop_transaction(journal, transaction);
725 kfree(transaction); 627 jbd2_journal_free_transaction(transaction);
726 628
727 /* Just in case anybody was waiting for more transactions to be 629 /* Just in case anybody was waiting for more transactions to be
728 checkpointed... */ 630 checkpointed... */
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
797 J_ASSERT(journal->j_committing_transaction != transaction); 699 J_ASSERT(journal->j_committing_transaction != transaction);
798 J_ASSERT(journal->j_running_transaction != transaction); 700 J_ASSERT(journal->j_running_transaction != transaction);
799 701
702 trace_jbd2_drop_transaction(journal, transaction);
703
800 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 704 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
801} 705}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c067a8cae63b..17f557f01cf0 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -331,6 +331,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
331 struct buffer_head *cbh = NULL; /* For transactional checksums */ 331 struct buffer_head *cbh = NULL; /* For transactional checksums */
332 __u32 crc32_sum = ~0; 332 __u32 crc32_sum = ~0;
333 struct blk_plug plug; 333 struct blk_plug plug;
334 /* Tail of the journal */
335 unsigned long first_block;
336 tid_t first_tid;
337 int update_tail;
334 338
335 /* 339 /*
336 * First job: lock down the current transaction and wait for 340 * First job: lock down the current transaction and wait for
@@ -340,7 +344,18 @@ void jbd2_journal_commit_transaction(journal_t *journal)
340 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 344 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
341 if (journal->j_flags & JBD2_FLUSHED) { 345 if (journal->j_flags & JBD2_FLUSHED) {
342 jbd_debug(3, "super block updated\n"); 346 jbd_debug(3, "super block updated\n");
343 jbd2_journal_update_superblock(journal, 1); 347 mutex_lock(&journal->j_checkpoint_mutex);
348 /*
349 * We hold j_checkpoint_mutex so tail cannot change under us.
350 * We don't need any special data guarantees for writing sb
351 * since journal is empty and it is ok for write to be
352 * flushed only with transaction commit.
353 */
354 jbd2_journal_update_sb_log_tail(journal,
355 journal->j_tail_sequence,
356 journal->j_tail,
357 WRITE_SYNC);
358 mutex_unlock(&journal->j_checkpoint_mutex);
344 } else { 359 } else {
345 jbd_debug(3, "superblock not updated\n"); 360 jbd_debug(3, "superblock not updated\n");
346 } 361 }
@@ -677,10 +692,30 @@ start_journal_io:
677 err = 0; 692 err = 0;
678 } 693 }
679 694
695 /*
696 * Get current oldest transaction in the log before we issue flush
697 * to the filesystem device. After the flush we can be sure that
698 * blocks of all older transactions are checkpointed to persistent
699 * storage and we will be safe to update journal start in the
700 * superblock with the numbers we get here.
701 */
702 update_tail =
703 jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
704
680 write_lock(&journal->j_state_lock); 705 write_lock(&journal->j_state_lock);
706 if (update_tail) {
707 long freed = first_block - journal->j_tail;
708
709 if (first_block < journal->j_tail)
710 freed += journal->j_last - journal->j_first;
711 /* Update tail only if we free significant amount of space */
712 if (freed < journal->j_maxlen / 4)
713 update_tail = 0;
714 }
681 J_ASSERT(commit_transaction->t_state == T_COMMIT); 715 J_ASSERT(commit_transaction->t_state == T_COMMIT);
682 commit_transaction->t_state = T_COMMIT_DFLUSH; 716 commit_transaction->t_state = T_COMMIT_DFLUSH;
683 write_unlock(&journal->j_state_lock); 717 write_unlock(&journal->j_state_lock);
718
684 /* 719 /*
685 * If the journal is not located on the file system device, 720 * If the journal is not located on the file system device,
686 * then we must flush the file system device before we issue 721 * then we must flush the file system device before we issue
@@ -831,6 +866,14 @@ wait_for_iobuf:
831 if (err) 866 if (err)
832 jbd2_journal_abort(journal, err); 867 jbd2_journal_abort(journal, err);
833 868
869 /*
870 * Now disk caches for filesystem device are flushed so we are safe to
871 * erase checkpointed transactions from the log by updating journal
872 * superblock.
873 */
874 if (update_tail)
875 jbd2_update_log_tail(journal, first_tid, first_block);
876
834 /* End of a transaction! Finally, we can do checkpoint 877 /* End of a transaction! Finally, we can do checkpoint
835 processing: any buffers committed as a result of this 878 processing: any buffers committed as a result of this
836 transaction can be removed from any checkpoint list it was on 879 transaction can be removed from any checkpoint list it was on
@@ -1048,7 +1091,7 @@ restart_loop:
1048 jbd_debug(1, "JBD2: commit %d complete, head %d\n", 1091 jbd_debug(1, "JBD2: commit %d complete, head %d\n",
1049 journal->j_commit_sequence, journal->j_tail_sequence); 1092 journal->j_commit_sequence, journal->j_tail_sequence);
1050 if (to_free) 1093 if (to_free)
1051 kfree(commit_transaction); 1094 jbd2_journal_free_transaction(commit_transaction);
1052 1095
1053 wake_up(&journal->j_wait_done_commit); 1096 wake_up(&journal->j_wait_done_commit);
1054} 1097}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 839377e3d624..98ed6dbfe381 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -71,7 +71,6 @@ EXPORT_SYMBOL(jbd2_journal_revoke);
71 71
72EXPORT_SYMBOL(jbd2_journal_init_dev); 72EXPORT_SYMBOL(jbd2_journal_init_dev);
73EXPORT_SYMBOL(jbd2_journal_init_inode); 73EXPORT_SYMBOL(jbd2_journal_init_inode);
74EXPORT_SYMBOL(jbd2_journal_update_format);
75EXPORT_SYMBOL(jbd2_journal_check_used_features); 74EXPORT_SYMBOL(jbd2_journal_check_used_features);
76EXPORT_SYMBOL(jbd2_journal_check_available_features); 75EXPORT_SYMBOL(jbd2_journal_check_available_features);
77EXPORT_SYMBOL(jbd2_journal_set_features); 76EXPORT_SYMBOL(jbd2_journal_set_features);
@@ -96,7 +95,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
96EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 95EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
97EXPORT_SYMBOL(jbd2_inode_cache); 96EXPORT_SYMBOL(jbd2_inode_cache);
98 97
99static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
100static void __journal_abort_soft (journal_t *journal, int errno); 98static void __journal_abort_soft (journal_t *journal, int errno);
101static int jbd2_journal_create_slab(size_t slab_size); 99static int jbd2_journal_create_slab(size_t slab_size);
102 100
@@ -746,6 +744,98 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
746 return jbd2_journal_add_journal_head(bh); 744 return jbd2_journal_add_journal_head(bh);
747} 745}
748 746
747/*
748 * Return tid of the oldest transaction in the journal and block in the journal
749 * where the transaction starts.
750 *
751 * If the journal is now empty, return which will be the next transaction ID
752 * we will write and where will that transaction start.
753 *
754 * The return value is 0 if journal tail cannot be pushed any further, 1 if
755 * it can.
756 */
757int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
758 unsigned long *block)
759{
760 transaction_t *transaction;
761 int ret;
762
763 read_lock(&journal->j_state_lock);
764 spin_lock(&journal->j_list_lock);
765 transaction = journal->j_checkpoint_transactions;
766 if (transaction) {
767 *tid = transaction->t_tid;
768 *block = transaction->t_log_start;
769 } else if ((transaction = journal->j_committing_transaction) != NULL) {
770 *tid = transaction->t_tid;
771 *block = transaction->t_log_start;
772 } else if ((transaction = journal->j_running_transaction) != NULL) {
773 *tid = transaction->t_tid;
774 *block = journal->j_head;
775 } else {
776 *tid = journal->j_transaction_sequence;
777 *block = journal->j_head;
778 }
779 ret = tid_gt(*tid, journal->j_tail_sequence);
780 spin_unlock(&journal->j_list_lock);
781 read_unlock(&journal->j_state_lock);
782
783 return ret;
784}
785
786/*
787 * Update information in journal structure and in on disk journal superblock
788 * about log tail. This function does not check whether information passed in
789 * really pushes log tail further. It's responsibility of the caller to make
790 * sure provided log tail information is valid (e.g. by holding
791 * j_checkpoint_mutex all the time between computing log tail and calling this
792 * function as is the case with jbd2_cleanup_journal_tail()).
793 *
794 * Requires j_checkpoint_mutex
795 */
796void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
797{
798 unsigned long freed;
799
800 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
801
802 /*
803 * We cannot afford for write to remain in drive's caches since as
804 * soon as we update j_tail, next transaction can start reusing journal
805 * space and if we lose sb update during power failure we'd replay
806 * old transaction with possibly newly overwritten data.
807 */
808 jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
809 write_lock(&journal->j_state_lock);
810 freed = block - journal->j_tail;
811 if (block < journal->j_tail)
812 freed += journal->j_last - journal->j_first;
813
814 trace_jbd2_update_log_tail(journal, tid, block, freed);
815 jbd_debug(1,
816 "Cleaning journal tail from %d to %d (offset %lu), "
817 "freeing %lu\n",
818 journal->j_tail_sequence, tid, block, freed);
819
820 journal->j_free += freed;
821 journal->j_tail_sequence = tid;
822 journal->j_tail = block;
823 write_unlock(&journal->j_state_lock);
824}
825
826/*
827 * This is a variaon of __jbd2_update_log_tail which checks for validity of
828 * provided log tail and locks j_checkpoint_mutex. So it is safe against races
829 * with other threads updating log tail.
830 */
831void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
832{
833 mutex_lock(&journal->j_checkpoint_mutex);
834 if (tid_gt(tid, journal->j_tail_sequence))
835 __jbd2_update_log_tail(journal, tid, block);
836 mutex_unlock(&journal->j_checkpoint_mutex);
837}
838
749struct jbd2_stats_proc_session { 839struct jbd2_stats_proc_session {
750 journal_t *journal; 840 journal_t *journal;
751 struct transaction_stats_s *stats; 841 struct transaction_stats_s *stats;
@@ -1114,40 +1204,45 @@ static int journal_reset(journal_t *journal)
1114 1204
1115 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 1205 journal->j_max_transaction_buffers = journal->j_maxlen / 4;
1116 1206
1117 /* Add the dynamic fields and write it to disk. */
1118 jbd2_journal_update_superblock(journal, 1);
1119 return jbd2_journal_start_thread(journal);
1120}
1121
1122/**
1123 * void jbd2_journal_update_superblock() - Update journal sb on disk.
1124 * @journal: The journal to update.
1125 * @wait: Set to '0' if you don't want to wait for IO completion.
1126 *
1127 * Update a journal's dynamic superblock fields and write it to disk,
1128 * optionally waiting for the IO to complete.
1129 */
1130void jbd2_journal_update_superblock(journal_t *journal, int wait)
1131{
1132 journal_superblock_t *sb = journal->j_superblock;
1133 struct buffer_head *bh = journal->j_sb_buffer;
1134
1135 /* 1207 /*
1136 * As a special case, if the on-disk copy is already marked as needing 1208 * As a special case, if the on-disk copy is already marked as needing
1137 * no recovery (s_start == 0) and there are no outstanding transactions 1209 * no recovery (s_start == 0), then we can safely defer the superblock
1138 * in the filesystem, then we can safely defer the superblock update 1210 * update until the next commit by setting JBD2_FLUSHED. This avoids
1139 * until the next commit by setting JBD2_FLUSHED. This avoids
1140 * attempting a write to a potential-readonly device. 1211 * attempting a write to a potential-readonly device.
1141 */ 1212 */
1142 if (sb->s_start == 0 && journal->j_tail_sequence == 1213 if (sb->s_start == 0) {
1143 journal->j_transaction_sequence) {
1144 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " 1214 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1145 "(start %ld, seq %d, errno %d)\n", 1215 "(start %ld, seq %d, errno %d)\n",
1146 journal->j_tail, journal->j_tail_sequence, 1216 journal->j_tail, journal->j_tail_sequence,
1147 journal->j_errno); 1217 journal->j_errno);
1148 goto out; 1218 journal->j_flags |= JBD2_FLUSHED;
1219 } else {
1220 /* Lock here to make assertions happy... */
1221 mutex_lock(&journal->j_checkpoint_mutex);
1222 /*
1223 * Update log tail information. We use WRITE_FUA since new
1224 * transaction will start reusing journal space and so we
1225 * must make sure information about current log tail is on
1226 * disk before that.
1227 */
1228 jbd2_journal_update_sb_log_tail(journal,
1229 journal->j_tail_sequence,
1230 journal->j_tail,
1231 WRITE_FUA);
1232 mutex_unlock(&journal->j_checkpoint_mutex);
1149 } 1233 }
1234 return jbd2_journal_start_thread(journal);
1235}
1150 1236
1237static void jbd2_write_superblock(journal_t *journal, int write_op)
1238{
1239 struct buffer_head *bh = journal->j_sb_buffer;
1240 int ret;
1241
1242 trace_jbd2_write_superblock(journal, write_op);
1243 if (!(journal->j_flags & JBD2_BARRIER))
1244 write_op &= ~(REQ_FUA | REQ_FLUSH);
1245 lock_buffer(bh);
1151 if (buffer_write_io_error(bh)) { 1246 if (buffer_write_io_error(bh)) {
1152 /* 1247 /*
1153 * Oh, dear. A previous attempt to write the journal 1248 * Oh, dear. A previous attempt to write the journal
@@ -1163,48 +1258,106 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1163 clear_buffer_write_io_error(bh); 1258 clear_buffer_write_io_error(bh);
1164 set_buffer_uptodate(bh); 1259 set_buffer_uptodate(bh);
1165 } 1260 }
1261 get_bh(bh);
1262 bh->b_end_io = end_buffer_write_sync;
1263 ret = submit_bh(write_op, bh);
1264 wait_on_buffer(bh);
1265 if (buffer_write_io_error(bh)) {
1266 clear_buffer_write_io_error(bh);
1267 set_buffer_uptodate(bh);
1268 ret = -EIO;
1269 }
1270 if (ret) {
1271 printk(KERN_ERR "JBD2: Error %d detected when updating "
1272 "journal superblock for %s.\n", ret,
1273 journal->j_devname);
1274 }
1275}
1276
1277/**
1278 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1279 * @journal: The journal to update.
1280 * @tail_tid: TID of the new transaction at the tail of the log
1281 * @tail_block: The first block of the transaction at the tail of the log
1282 * @write_op: With which operation should we write the journal sb
1283 *
1284 * Update a journal's superblock information about log tail and write it to
1285 * disk, waiting for the IO to complete.
1286 */
1287void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1288 unsigned long tail_block, int write_op)
1289{
1290 journal_superblock_t *sb = journal->j_superblock;
1291
1292 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1293 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
1294 tail_block, tail_tid);
1295
1296 sb->s_sequence = cpu_to_be32(tail_tid);
1297 sb->s_start = cpu_to_be32(tail_block);
1298
1299 jbd2_write_superblock(journal, write_op);
1300
1301 /* Log is no longer empty */
1302 write_lock(&journal->j_state_lock);
1303 WARN_ON(!sb->s_sequence);
1304 journal->j_flags &= ~JBD2_FLUSHED;
1305 write_unlock(&journal->j_state_lock);
1306}
1307
1308/**
1309 * jbd2_mark_journal_empty() - Mark on disk journal as empty.
1310 * @journal: The journal to update.
1311 *
1312 * Update a journal's dynamic superblock fields to show that journal is empty.
1313 * Write updated superblock to disk waiting for IO to complete.
1314 */
1315static void jbd2_mark_journal_empty(journal_t *journal)
1316{
1317 journal_superblock_t *sb = journal->j_superblock;
1166 1318
1319 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1167 read_lock(&journal->j_state_lock); 1320 read_lock(&journal->j_state_lock);
1168 jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", 1321 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
1169 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1322 journal->j_tail_sequence);
1170 1323
1171 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1324 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1172 sb->s_start = cpu_to_be32(journal->j_tail); 1325 sb->s_start = cpu_to_be32(0);
1173 sb->s_errno = cpu_to_be32(journal->j_errno);
1174 read_unlock(&journal->j_state_lock); 1326 read_unlock(&journal->j_state_lock);
1175 1327
1176 BUFFER_TRACE(bh, "marking dirty"); 1328 jbd2_write_superblock(journal, WRITE_FUA);
1177 mark_buffer_dirty(bh);
1178 if (wait) {
1179 sync_dirty_buffer(bh);
1180 if (buffer_write_io_error(bh)) {
1181 printk(KERN_ERR "JBD2: I/O error detected "
1182 "when updating journal superblock for %s.\n",
1183 journal->j_devname);
1184 clear_buffer_write_io_error(bh);
1185 set_buffer_uptodate(bh);
1186 }
1187 } else
1188 write_dirty_buffer(bh, WRITE);
1189
1190out:
1191 /* If we have just flushed the log (by marking s_start==0), then
1192 * any future commit will have to be careful to update the
1193 * superblock again to re-record the true start of the log. */
1194 1329
1330 /* Log is no longer empty */
1195 write_lock(&journal->j_state_lock); 1331 write_lock(&journal->j_state_lock);
1196 if (sb->s_start) 1332 journal->j_flags |= JBD2_FLUSHED;
1197 journal->j_flags &= ~JBD2_FLUSHED;
1198 else
1199 journal->j_flags |= JBD2_FLUSHED;
1200 write_unlock(&journal->j_state_lock); 1333 write_unlock(&journal->j_state_lock);
1201} 1334}
1202 1335
1336
1337/**
1338 * jbd2_journal_update_sb_errno() - Update error in the journal.
1339 * @journal: The journal to update.
1340 *
1341 * Update a journal's errno. Write updated superblock to disk waiting for IO
1342 * to complete.
1343 */
1344static void jbd2_journal_update_sb_errno(journal_t *journal)
1345{
1346 journal_superblock_t *sb = journal->j_superblock;
1347
1348 read_lock(&journal->j_state_lock);
1349 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
1350 journal->j_errno);
1351 sb->s_errno = cpu_to_be32(journal->j_errno);
1352 read_unlock(&journal->j_state_lock);
1353
1354 jbd2_write_superblock(journal, WRITE_SYNC);
1355}
1356
1203/* 1357/*
1204 * Read the superblock for a given journal, performing initial 1358 * Read the superblock for a given journal, performing initial
1205 * validation of the format. 1359 * validation of the format.
1206 */ 1360 */
1207
1208static int journal_get_superblock(journal_t *journal) 1361static int journal_get_superblock(journal_t *journal)
1209{ 1362{
1210 struct buffer_head *bh; 1363 struct buffer_head *bh;
@@ -1398,14 +1551,11 @@ int jbd2_journal_destroy(journal_t *journal)
1398 1551
1399 if (journal->j_sb_buffer) { 1552 if (journal->j_sb_buffer) {
1400 if (!is_journal_aborted(journal)) { 1553 if (!is_journal_aborted(journal)) {
1401 /* We can now mark the journal as empty. */ 1554 mutex_lock(&journal->j_checkpoint_mutex);
1402 journal->j_tail = 0; 1555 jbd2_mark_journal_empty(journal);
1403 journal->j_tail_sequence = 1556 mutex_unlock(&journal->j_checkpoint_mutex);
1404 ++journal->j_transaction_sequence; 1557 } else
1405 jbd2_journal_update_superblock(journal, 1);
1406 } else {
1407 err = -EIO; 1558 err = -EIO;
1408 }
1409 brelse(journal->j_sb_buffer); 1559 brelse(journal->j_sb_buffer);
1410 } 1560 }
1411 1561
@@ -1552,61 +1702,6 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
1552EXPORT_SYMBOL(jbd2_journal_clear_features); 1702EXPORT_SYMBOL(jbd2_journal_clear_features);
1553 1703
1554/** 1704/**
1555 * int jbd2_journal_update_format () - Update on-disk journal structure.
1556 * @journal: Journal to act on.
1557 *
1558 * Given an initialised but unloaded journal struct, poke about in the
1559 * on-disk structure to update it to the most recent supported version.
1560 */
1561int jbd2_journal_update_format (journal_t *journal)
1562{
1563 journal_superblock_t *sb;
1564 int err;
1565
1566 err = journal_get_superblock(journal);
1567 if (err)
1568 return err;
1569
1570 sb = journal->j_superblock;
1571
1572 switch (be32_to_cpu(sb->s_header.h_blocktype)) {
1573 case JBD2_SUPERBLOCK_V2:
1574 return 0;
1575 case JBD2_SUPERBLOCK_V1:
1576 return journal_convert_superblock_v1(journal, sb);
1577 default:
1578 break;
1579 }
1580 return -EINVAL;
1581}
1582
1583static int journal_convert_superblock_v1(journal_t *journal,
1584 journal_superblock_t *sb)
1585{
1586 int offset, blocksize;
1587 struct buffer_head *bh;
1588
1589 printk(KERN_WARNING
1590 "JBD2: Converting superblock from version 1 to 2.\n");
1591
1592 /* Pre-initialise new fields to zero */
1593 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
1594 blocksize = be32_to_cpu(sb->s_blocksize);
1595 memset(&sb->s_feature_compat, 0, blocksize-offset);
1596
1597 sb->s_nr_users = cpu_to_be32(1);
1598 sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
1599 journal->j_format_version = 2;
1600
1601 bh = journal->j_sb_buffer;
1602 BUFFER_TRACE(bh, "marking dirty");
1603 mark_buffer_dirty(bh);
1604 sync_dirty_buffer(bh);
1605 return 0;
1606}
1607
1608
1609/**
1610 * int jbd2_journal_flush () - Flush journal 1705 * int jbd2_journal_flush () - Flush journal
1611 * @journal: Journal to act on. 1706 * @journal: Journal to act on.
1612 * 1707 *
@@ -1619,7 +1714,6 @@ int jbd2_journal_flush(journal_t *journal)
1619{ 1714{
1620 int err = 0; 1715 int err = 0;
1621 transaction_t *transaction = NULL; 1716 transaction_t *transaction = NULL;
1622 unsigned long old_tail;
1623 1717
1624 write_lock(&journal->j_state_lock); 1718 write_lock(&journal->j_state_lock);
1625 1719
@@ -1654,6 +1748,7 @@ int jbd2_journal_flush(journal_t *journal)
1654 if (is_journal_aborted(journal)) 1748 if (is_journal_aborted(journal))
1655 return -EIO; 1749 return -EIO;
1656 1750
1751 mutex_lock(&journal->j_checkpoint_mutex);
1657 jbd2_cleanup_journal_tail(journal); 1752 jbd2_cleanup_journal_tail(journal);
1658 1753
1659 /* Finally, mark the journal as really needing no recovery. 1754 /* Finally, mark the journal as really needing no recovery.
@@ -1661,14 +1756,9 @@ int jbd2_journal_flush(journal_t *journal)
1661 * the magic code for a fully-recovered superblock. Any future 1756 * the magic code for a fully-recovered superblock. Any future
1662 * commits of data to the journal will restore the current 1757 * commits of data to the journal will restore the current
1663 * s_start value. */ 1758 * s_start value. */
1759 jbd2_mark_journal_empty(journal);
1760 mutex_unlock(&journal->j_checkpoint_mutex);
1664 write_lock(&journal->j_state_lock); 1761 write_lock(&journal->j_state_lock);
1665 old_tail = journal->j_tail;
1666 journal->j_tail = 0;
1667 write_unlock(&journal->j_state_lock);
1668 jbd2_journal_update_superblock(journal, 1);
1669 write_lock(&journal->j_state_lock);
1670 journal->j_tail = old_tail;
1671
1672 J_ASSERT(!journal->j_running_transaction); 1762 J_ASSERT(!journal->j_running_transaction);
1673 J_ASSERT(!journal->j_committing_transaction); 1763 J_ASSERT(!journal->j_committing_transaction);
1674 J_ASSERT(!journal->j_checkpoint_transactions); 1764 J_ASSERT(!journal->j_checkpoint_transactions);
@@ -1708,8 +1798,12 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1708 write ? "Clearing" : "Ignoring"); 1798 write ? "Clearing" : "Ignoring");
1709 1799
1710 err = jbd2_journal_skip_recovery(journal); 1800 err = jbd2_journal_skip_recovery(journal);
1711 if (write) 1801 if (write) {
1712 jbd2_journal_update_superblock(journal, 1); 1802 /* Lock to make assertions happy... */
1803 mutex_lock(&journal->j_checkpoint_mutex);
1804 jbd2_mark_journal_empty(journal);
1805 mutex_unlock(&journal->j_checkpoint_mutex);
1806 }
1713 1807
1714 no_recovery: 1808 no_recovery:
1715 return err; 1809 return err;
@@ -1759,7 +1853,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1759 __jbd2_journal_abort_hard(journal); 1853 __jbd2_journal_abort_hard(journal);
1760 1854
1761 if (errno) 1855 if (errno)
1762 jbd2_journal_update_superblock(journal, 1); 1856 jbd2_journal_update_sb_errno(journal);
1763} 1857}
1764 1858
1765/** 1859/**
@@ -2017,7 +2111,7 @@ static struct kmem_cache *jbd2_journal_head_cache;
2017static atomic_t nr_journal_heads = ATOMIC_INIT(0); 2111static atomic_t nr_journal_heads = ATOMIC_INIT(0);
2018#endif 2112#endif
2019 2113
2020static int journal_init_jbd2_journal_head_cache(void) 2114static int jbd2_journal_init_journal_head_cache(void)
2021{ 2115{
2022 int retval; 2116 int retval;
2023 2117
@@ -2035,7 +2129,7 @@ static int journal_init_jbd2_journal_head_cache(void)
2035 return retval; 2129 return retval;
2036} 2130}
2037 2131
2038static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 2132static void jbd2_journal_destroy_journal_head_cache(void)
2039{ 2133{
2040 if (jbd2_journal_head_cache) { 2134 if (jbd2_journal_head_cache) {
2041 kmem_cache_destroy(jbd2_journal_head_cache); 2135 kmem_cache_destroy(jbd2_journal_head_cache);
@@ -2323,7 +2417,7 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
2323 2417
2324struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; 2418struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
2325 2419
2326static int __init journal_init_handle_cache(void) 2420static int __init jbd2_journal_init_handle_cache(void)
2327{ 2421{
2328 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); 2422 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
2329 if (jbd2_handle_cache == NULL) { 2423 if (jbd2_handle_cache == NULL) {
@@ -2358,17 +2452,20 @@ static int __init journal_init_caches(void)
2358 2452
2359 ret = jbd2_journal_init_revoke_caches(); 2453 ret = jbd2_journal_init_revoke_caches();
2360 if (ret == 0) 2454 if (ret == 0)
2361 ret = journal_init_jbd2_journal_head_cache(); 2455 ret = jbd2_journal_init_journal_head_cache();
2456 if (ret == 0)
2457 ret = jbd2_journal_init_handle_cache();
2362 if (ret == 0) 2458 if (ret == 0)
2363 ret = journal_init_handle_cache(); 2459 ret = jbd2_journal_init_transaction_cache();
2364 return ret; 2460 return ret;
2365} 2461}
2366 2462
2367static void jbd2_journal_destroy_caches(void) 2463static void jbd2_journal_destroy_caches(void)
2368{ 2464{
2369 jbd2_journal_destroy_revoke_caches(); 2465 jbd2_journal_destroy_revoke_caches();
2370 jbd2_journal_destroy_jbd2_journal_head_cache(); 2466 jbd2_journal_destroy_journal_head_cache();
2371 jbd2_journal_destroy_handle_cache(); 2467 jbd2_journal_destroy_handle_cache();
2468 jbd2_journal_destroy_transaction_cache();
2372 jbd2_journal_destroy_slabs(); 2469 jbd2_journal_destroy_slabs();
2373} 2470}
2374 2471
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index da6d7baf1390..c1a03354a22f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/crc32.h> 23#include <linux/crc32.h>
24#include <linux/blkdev.h>
24#endif 25#endif
25 26
26/* 27/*
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
265 err2 = sync_blockdev(journal->j_fs_dev); 266 err2 = sync_blockdev(journal->j_fs_dev);
266 if (!err) 267 if (!err)
267 err = err2; 268 err = err2;
268 269 /* Make sure all replayed data is on permanent storage */
270 if (journal->j_flags & JBD2_BARRIER)
271 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
269 return err; 272 return err;
270} 273}
271 274
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 30b2867d6cc9..6973705d6a3d 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void)
208 J_ASSERT(!jbd2_revoke_record_cache); 208 J_ASSERT(!jbd2_revoke_record_cache);
209 J_ASSERT(!jbd2_revoke_table_cache); 209 J_ASSERT(!jbd2_revoke_table_cache);
210 210
211 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 211 jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s,
212 sizeof(struct jbd2_revoke_record_s), 212 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY);
213 0,
214 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
215 NULL);
216 if (!jbd2_revoke_record_cache) 213 if (!jbd2_revoke_record_cache)
217 goto record_cache_failure; 214 goto record_cache_failure;
218 215
219 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 216 jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s,
220 sizeof(struct jbd2_revoke_table_s), 217 SLAB_TEMPORARY);
221 0, SLAB_TEMPORARY, NULL);
222 if (!jbd2_revoke_table_cache) 218 if (!jbd2_revoke_table_cache)
223 goto table_cache_failure; 219 goto table_cache_failure;
224 return 0; 220 return 0;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e5aba56e1fd5..ddcd3549c6c2 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -33,6 +33,35 @@
33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
34static void __jbd2_journal_unfile_buffer(struct journal_head *jh); 34static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
35 35
36static struct kmem_cache *transaction_cache;
37int __init jbd2_journal_init_transaction_cache(void)
38{
39 J_ASSERT(!transaction_cache);
40 transaction_cache = kmem_cache_create("jbd2_transaction_s",
41 sizeof(transaction_t),
42 0,
43 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
44 NULL);
45 if (transaction_cache)
46 return 0;
47 return -ENOMEM;
48}
49
50void jbd2_journal_destroy_transaction_cache(void)
51{
52 if (transaction_cache) {
53 kmem_cache_destroy(transaction_cache);
54 transaction_cache = NULL;
55 }
56}
57
58void jbd2_journal_free_transaction(transaction_t *transaction)
59{
60 if (unlikely(ZERO_OR_NULL_PTR(transaction)))
61 return;
62 kmem_cache_free(transaction_cache, transaction);
63}
64
36/* 65/*
37 * jbd2_get_transaction: obtain a new transaction_t object. 66 * jbd2_get_transaction: obtain a new transaction_t object.
38 * 67 *
@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
133 162
134alloc_transaction: 163alloc_transaction:
135 if (!journal->j_running_transaction) { 164 if (!journal->j_running_transaction) {
136 new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); 165 new_transaction = kmem_cache_alloc(transaction_cache,
166 gfp_mask | __GFP_ZERO);
137 if (!new_transaction) { 167 if (!new_transaction) {
138 /* 168 /*
139 * If __GFP_FS is not present, then we may be 169 * If __GFP_FS is not present, then we may be
@@ -162,7 +192,7 @@ repeat:
162 if (is_journal_aborted(journal) || 192 if (is_journal_aborted(journal) ||
163 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 193 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
164 read_unlock(&journal->j_state_lock); 194 read_unlock(&journal->j_state_lock);
165 kfree(new_transaction); 195 jbd2_journal_free_transaction(new_transaction);
166 return -EROFS; 196 return -EROFS;
167 } 197 }
168 198
@@ -284,7 +314,7 @@ repeat:
284 read_unlock(&journal->j_state_lock); 314 read_unlock(&journal->j_state_lock);
285 315
286 lock_map_acquire(&handle->h_lockdep_map); 316 lock_map_acquire(&handle->h_lockdep_map);
287 kfree(new_transaction); 317 jbd2_journal_free_transaction(new_transaction);
288 return 0; 318 return 0;
289} 319}
290 320
@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1549 * of these pointers, it could go bad. Generally the caller needs to re-read 1579 * of these pointers, it could go bad. Generally the caller needs to re-read
1550 * the pointer from the transaction_t. 1580 * the pointer from the transaction_t.
1551 * 1581 *
1552 * Called under j_list_lock. The journal may not be locked. 1582 * Called under j_list_lock.
1553 */ 1583 */
1554void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1584static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1555{ 1585{
1556 struct journal_head **list = NULL; 1586 struct journal_head **list = NULL;
1557 transaction_t *transaction; 1587 transaction_t *transaction;
@@ -1646,10 +1676,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1646 spin_lock(&journal->j_list_lock); 1676 spin_lock(&journal->j_list_lock);
1647 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1677 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1648 /* written-back checkpointed metadata buffer */ 1678 /* written-back checkpointed metadata buffer */
1649 if (jh->b_jlist == BJ_None) { 1679 JBUFFER_TRACE(jh, "remove from checkpoint list");
1650 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1680 __jbd2_journal_remove_checkpoint(jh);
1651 __jbd2_journal_remove_checkpoint(jh);
1652 }
1653 } 1681 }
1654 spin_unlock(&journal->j_list_lock); 1682 spin_unlock(&journal->j_list_lock);
1655out: 1683out:
@@ -1949,6 +1977,8 @@ zap_buffer_unlocked:
1949 clear_buffer_mapped(bh); 1977 clear_buffer_mapped(bh);
1950 clear_buffer_req(bh); 1978 clear_buffer_req(bh);
1951 clear_buffer_new(bh); 1979 clear_buffer_new(bh);
1980 clear_buffer_delay(bh);
1981 clear_buffer_unwritten(bh);
1952 bh->b_bdev = NULL; 1982 bh->b_bdev = NULL;
1953 return may_free; 1983 return may_free;
1954} 1984}